aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap8
-rw-r--r--CREDITS4
-rw-r--r--Documentation/ABI/testing/procfs-diskstats5
-rw-r--r--Documentation/ABI/testing/sysfs-block6
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu2
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst20
-rw-r--r--Documentation/admin-guide/device-mapper/dm-integrity.rst5
-rw-r--r--Documentation/admin-guide/device-mapper/dm-raid.rst2
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst2
-rw-r--r--Documentation/admin-guide/hw-vuln/multihit.rst163
-rw-r--r--Documentation/admin-guide/hw-vuln/tsx_async_abort.rst276
-rw-r--r--Documentation/admin-guide/iostats.rst9
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt96
-rw-r--r--Documentation/admin-guide/perf/imx-ddr.rst15
-rw-r--r--Documentation/admin-guide/perf/thunderx2-pmu.rst20
-rw-r--r--Documentation/arm64/booting.rst3
-rw-r--r--Documentation/arm64/cpu-feature-registers.rst19
-rw-r--r--Documentation/arm64/elf_hwcaps.rst67
-rw-r--r--Documentation/arm64/memory.rst9
-rw-r--r--Documentation/arm64/silicon-errata.rst15
-rw-r--r--Documentation/block/stat.rst14
-rw-r--r--Documentation/core-api/index.rst1
-rw-r--r--Documentation/core-api/memory-allocation.rst4
-rw-r--r--Documentation/core-api/symbol-namespaces.rst (renamed from Documentation/kbuild/namespaces.rst)0
-rw-r--r--Documentation/dev-tools/index.rst1
-rw-r--r--Documentation/dev-tools/kasan.rst3
-rw-r--r--Documentation/dev-tools/kselftest.rst16
-rw-r--r--Documentation/dev-tools/kunit/api/index.rst16
-rw-r--r--Documentation/dev-tools/kunit/api/test.rst11
-rw-r--r--Documentation/dev-tools/kunit/faq.rst62
-rw-r--r--Documentation/dev-tools/kunit/index.rst79
-rw-r--r--Documentation/dev-tools/kunit/start.rst180
-rw-r--r--Documentation/dev-tools/kunit/usage.rst576
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip.yaml4
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt7
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml9
-rw-r--r--Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt53
-rw-r--r--Documentation/devicetree/bindings/mtd/intel,ixp4xx-flash.txt22
-rw-r--r--Documentation/devicetree/bindings/perf/arm-ccn.txt1
-rw-r--r--Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt1
-rw-r--r--Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml86
-rw-r--r--Documentation/devicetree/bindings/regulator/fixed-regulator.yaml4
-rw-r--r--Documentation/devicetree/bindings/riscv/cpus.yaml29
-rw-r--r--Documentation/devicetree/bindings/security/tpm/google,cr50.txt19
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,sci-serial.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/amlogic,dwc3.txt4
-rw-r--r--Documentation/devicetree/bindings/usb/generic-ehci.yaml7
-rw-r--r--Documentation/devicetree/bindings/usb/generic-ohci.yaml7
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt4
-rw-r--r--Documentation/devicetree/bindings/usb/mediatek,mtu3.txt4
-rw-r--r--Documentation/devicetree/bindings/usb/usb-hcd.yaml5
-rw-r--r--Documentation/devicetree/bindings/usb/usb-uhci.txt2
-rw-r--r--Documentation/devicetree/bindings/usb/usb-xhci.txt4
-rw-r--r--Documentation/driver-api/libata.rst14
-rw-r--r--Documentation/filesystems/fscrypt.rst68
-rw-r--r--Documentation/filesystems/fsverity.rst12
-rw-r--r--Documentation/hwmon/index.rst1
-rw-r--r--Documentation/hwmon/inspur-ipsps1.rst2
-rw-r--r--Documentation/hwmon/k10temp.rst18
-rw-r--r--Documentation/ioctl/ioctl-number.rst1
-rw-r--r--Documentation/networking/device_drivers/intel/e100.rst14
-rw-r--r--Documentation/networking/device_drivers/intel/e1000.rst12
-rw-r--r--Documentation/networking/device_drivers/intel/e1000e.rst14
-rw-r--r--Documentation/networking/device_drivers/intel/fm10k.rst10
-rw-r--r--Documentation/networking/device_drivers/intel/i40e.rst8
-rw-r--r--Documentation/networking/device_drivers/intel/iavf.rst8
-rw-r--r--Documentation/networking/device_drivers/intel/ice.rst6
-rw-r--r--Documentation/networking/device_drivers/intel/igb.rst12
-rw-r--r--Documentation/networking/device_drivers/intel/igbvf.rst6
-rw-r--r--Documentation/networking/device_drivers/intel/ixgbe.rst10
-rw-r--r--Documentation/networking/device_drivers/intel/ixgbevf.rst6
-rw-r--r--Documentation/networking/device_drivers/pensando/ionic.rst10
-rw-r--r--Documentation/networking/ip-sysctl.txt11
-rw-r--r--Documentation/networking/net_dim.txt36
-rw-r--r--Documentation/networking/tls-offload.rst4
-rw-r--r--Documentation/process/coding-style.rst2
-rw-r--r--Documentation/process/deprecated.rst33
-rw-r--r--Documentation/usb/rio.rst109
-rw-r--r--Documentation/x86/index.rst1
-rw-r--r--Documentation/x86/tsx_async_abort.rst117
-rw-r--r--MAINTAINERS116
-rw-r--r--Makefile16
-rw-r--r--arch/arc/boot/dts/hsdk.dts23
-rw-r--r--arch/arc/configs/hsdk_defconfig6
-rw-r--r--arch/arc/kernel/perf_event.c4
-rw-r--r--arch/arm/boot/dts/am3874-iceboard.dts9
-rw-r--r--arch/arm/boot/dts/bcm2835-rpi-zero-w.dts1
-rw-r--r--arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi8
-rw-r--r--arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi4
-rw-r--r--arch/arm/boot/dts/imx6-logicpd-som.dtsi4
-rw-r--r--arch/arm/boot/dts/imx6qdl-sabreauto.dtsi8
-rw-r--r--arch/arm/boot/dts/imx7s.dtsi8
-rw-r--r--arch/arm/boot/dts/logicpd-torpedo-som.dtsi4
-rw-r--r--arch/arm/boot/dts/mt7629-rfb.dts13
-rw-r--r--arch/arm/boot/dts/mt7629.dtsi2
-rw-r--r--arch/arm/boot/dts/omap4-droid4-xt894.dts2
-rw-r--r--arch/arm/boot/dts/omap4-panda-common.dtsi2
-rw-r--r--arch/arm/boot/dts/omap4-sdp.dts2
-rw-r--r--arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5-board-common.dtsi2
-rw-r--r--arch/arm/boot/dts/omap54xx-clocks.dtsi2
-rw-r--r--arch/arm/boot/dts/stm32mp157-pinctrl.dtsi8
-rw-r--r--arch/arm/boot/dts/stm32mp157c-ev1.dts13
-rw-r--r--arch/arm/boot/dts/stm32mp157c.dtsi4
-rw-r--r--arch/arm/boot/dts/sun4i-a10.dtsi4
-rw-r--r--arch/arm/boot/dts/sun5i.dtsi2
-rw-r--r--arch/arm/boot/dts/sun6i-a31.dtsi4
-rw-r--r--arch/arm/boot/dts/sun7i-a20.dtsi9
-rw-r--r--arch/arm/boot/dts/sun8i-a23-a33.dtsi2
-rw-r--r--arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts1
-rw-r--r--arch/arm/boot/dts/sun8i-a83t.dtsi3
-rw-r--r--arch/arm/boot/dts/sun8i-r40.dtsi4
-rw-r--r--arch/arm/boot/dts/sun9i-a80.dtsi5
-rw-r--r--arch/arm/boot/dts/sunxi-h3-h5.dtsi6
-rw-r--r--arch/arm/boot/dts/vf610-zii-scu4-aib.dts2
-rw-r--r--arch/arm/configs/badge4_defconfig1
-rw-r--r--arch/arm/configs/corgi_defconfig1
-rw-r--r--arch/arm/configs/davinci_all_defconfig1
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig1
-rw-r--r--arch/arm/configs/omap2plus_defconfig12
-rw-r--r--arch/arm/configs/pxa_defconfig1
-rw-r--r--arch/arm/configs/s3c2410_defconfig1
-rw-r--r--arch/arm/configs/spitz_defconfig1
-rw-r--r--arch/arm/crypto/Kconfig1
-rw-r--r--arch/arm/crypto/aes-ce-core.S1
-rw-r--r--arch/arm/include/asm/domain.h8
-rw-r--r--arch/arm/include/asm/uaccess.h4
-rw-r--r--arch/arm/kernel/head-common.S5
-rw-r--r--arch/arm/kernel/head-nommu.S2
-rw-r--r--arch/arm/mach-davinci/dm365.c4
-rw-r--r--arch/arm/mach-omap2/pdata-quirks.c11
-rw-r--r--arch/arm/mach-sunxi/mc_smp.c6
-rw-r--r--arch/arm/mm/alignment.c70
-rw-r--r--arch/arm/mm/proc-v7-bugs.c10
-rw-r--r--arch/arm/mm/proc-v7m.S6
-rw-r--r--arch/arm64/Kconfig83
-rw-r--r--arch/arm64/Makefile21
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts9
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts2
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts6
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi11
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi2
-rw-r--r--arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi5
-rw-r--r--arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi3
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi36
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm.dtsi12
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn.dtsi12
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi6
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq.dtsi4
-rw-r--r--arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts13
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts12
-rw-r--r--arch/arm64/include/asm/asm-uaccess.h26
-rw-r--r--arch/arm64/include/asm/atomic_lse.h6
-rw-r--r--arch/arm64/include/asm/barrier.h12
-rw-r--r--arch/arm64/include/asm/cache.h3
-rw-r--r--arch/arm64/include/asm/cpucaps.h6
-rw-r--r--arch/arm64/include/asm/cpufeature.h14
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/daifflags.h19
-rw-r--r--arch/arm64/include/asm/exception.h22
-rw-r--r--arch/arm64/include/asm/ftrace.h23
-rw-r--r--arch/arm64/include/asm/insn.h3
-rw-r--r--arch/arm64/include/asm/irqflags.h19
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/asm/memory.h16
-rw-r--r--arch/arm64/include/asm/module.h2
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h2
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h15
-rw-r--r--arch/arm64/include/asm/pgtable.h36
-rw-r--r--arch/arm64/include/asm/processor.h16
-rw-r--r--arch/arm64/include/asm/syscall_wrapper.h6
-rw-r--r--arch/arm64/include/asm/sysreg.h2
-rw-r--r--arch/arm64/include/asm/traps.h10
-rw-r--r--arch/arm64/include/asm/uaccess.h27
-rw-r--r--arch/arm64/include/asm/vdso/compat_barrier.h2
-rw-r--r--arch/arm64/include/asm/vdso/vsyscall.h7
-rw-r--r--arch/arm64/include/asm/vdso_datapage.h33
-rw-r--r--arch/arm64/kernel/Makefile6
-rw-r--r--arch/arm64/kernel/armv8_deprecated.c5
-rw-r--r--arch/arm64/kernel/asm-offsets.c1
-rw-r--r--arch/arm64/kernel/cpu_errata.c183
-rw-r--r--arch/arm64/kernel/cpufeature.c17
-rw-r--r--arch/arm64/kernel/cpuinfo.c2
-rw-r--r--arch/arm64/kernel/entry-common.c332
-rw-r--r--arch/arm64/kernel/entry-ftrace.S140
-rw-r--r--arch/arm64/kernel/entry.S284
-rw-r--r--arch/arm64/kernel/fpsimd.c6
-rw-r--r--arch/arm64/kernel/ftrace.c117
-rw-r--r--arch/arm64/kernel/hibernate.c9
-rw-r--r--arch/arm64/kernel/insn.c13
-rw-r--r--arch/arm64/kernel/kaslr.c44
-rw-r--r--arch/arm64/kernel/module-plts.c3
-rw-r--r--arch/arm64/kernel/module.c57
-rw-r--r--arch/arm64/kernel/perf_event.c191
-rw-r--r--arch/arm64/kernel/probes/kprobes.c4
-rw-r--r--arch/arm64/kernel/process.c50
-rw-r--r--arch/arm64/kernel/psci.c15
-rw-r--r--arch/arm64/kernel/sdei.c3
-rw-r--r--arch/arm64/kernel/sys_compat.c11
-rw-r--r--arch/arm64/kernel/syscall.c4
-rw-r--r--arch/arm64/kernel/traps.c21
-rw-r--r--arch/arm64/kernel/vdso32/Makefile44
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S3
-rw-r--r--arch/arm64/kvm/hyp/switch.c121
-rw-r--r--arch/arm64/kvm/hyp/sysreg-sr.c35
-rw-r--r--arch/arm64/kvm/hyp/tlb.c23
-rw-r--r--arch/arm64/kvm/sys_regs.c4
-rw-r--r--arch/arm64/lib/clear_user.S2
-rw-r--r--arch/arm64/lib/copy_from_user.S2
-rw-r--r--arch/arm64/lib/copy_in_user.S2
-rw-r--r--arch/arm64/lib/copy_to_user.S2
-rw-r--r--arch/arm64/lib/uaccess_flushcache.c6
-rw-r--r--arch/arm64/mm/fault.c75
-rw-r--r--arch/arm64/mm/init.c91
-rw-r--r--arch/arm64/mm/mmu.c5
-rw-r--r--arch/mips/bcm63xx/prom.c2
-rw-r--r--arch/mips/configs/mtx1_defconfig1
-rw-r--r--arch/mips/configs/rm200_defconfig1
-rw-r--r--arch/mips/fw/sni/sniprom.c2
-rw-r--r--arch/mips/include/asm/bmips.h10
-rw-r--r--arch/mips/include/asm/cmpxchg.h9
-rw-r--r--arch/mips/include/asm/vdso/gettimeofday.h4
-rw-r--r--arch/mips/include/asm/vdso/vsyscall.h7
-rw-r--r--arch/mips/include/uapi/asm/hwcap.h11
-rw-r--r--arch/mips/kernel/cpu-probe.c33
-rw-r--r--arch/mips/kernel/smp-bmips.c8
-rw-r--r--arch/mips/loongson64/Platform4
-rw-r--r--arch/mips/mm/tlbex.c23
-rw-r--r--arch/mips/sgi-ip27/Kconfig7
-rw-r--r--arch/mips/sgi-ip27/ip27-init.c21
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c4
-rw-r--r--arch/mips/vdso/Makefile1
-rw-r--r--arch/parisc/Makefile1
-rw-r--r--arch/parisc/include/asm/cache.h2
-rw-r--r--arch/parisc/include/asm/ldcw.h2
-rw-r--r--arch/parisc/kernel/entry.S2
-rw-r--r--arch/parisc/kernel/module.c10
-rw-r--r--arch/parisc/kernel/module.lds7
-rw-r--r--arch/parisc/mm/ioremap.c12
-rw-r--r--arch/powerpc/include/asm/book3s/32/kup.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h4
-rw-r--r--arch/powerpc/include/asm/elf.h3
-rw-r--r--arch/powerpc/include/asm/page.h9
-rw-r--r--arch/powerpc/kernel/prom_init.c13
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S1
-rw-r--r--arch/powerpc/kvm/book3s_xive.c24
-rw-r--r--arch/powerpc/kvm/book3s_xive.h12
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c6
-rw-r--r--arch/powerpc/mm/mem.c20
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c13
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c1
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c2
-rw-r--r--arch/powerpc/platforms/powernv/smp.c53
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c3
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts1
-rw-r--r--arch/riscv/include/asm/bug.h16
-rw-r--r--arch/riscv/include/asm/io.h7
-rw-r--r--arch/riscv/include/asm/irq.h3
-rw-r--r--arch/riscv/include/asm/pgtable.h28
-rw-r--r--arch/riscv/include/asm/switch_to.h1
-rw-r--r--arch/riscv/include/asm/tlbflush.h4
-rw-r--r--arch/riscv/kernel/cpufeature.c1
-rw-r--r--arch/riscv/kernel/entry.S3
-rw-r--r--arch/riscv/kernel/head.h21
-rw-r--r--arch/riscv/kernel/irq.c2
-rw-r--r--arch/riscv/kernel/module-sections.c1
-rw-r--r--arch/riscv/kernel/process.c2
-rw-r--r--arch/riscv/kernel/ptrace.c4
-rw-r--r--arch/riscv/kernel/reset.c1
-rw-r--r--arch/riscv/kernel/setup.c2
-rw-r--r--arch/riscv/kernel/signal.c8
-rw-r--r--arch/riscv/kernel/smp.c2
-rw-r--r--arch/riscv/kernel/smpboot.c5
-rw-r--r--arch/riscv/kernel/syscall_table.c1
-rw-r--r--arch/riscv/kernel/time.c1
-rw-r--r--arch/riscv/kernel/traps.c31
-rw-r--r--arch/riscv/kernel/vdso.c3
-rw-r--r--arch/riscv/mm/context.c1
-rw-r--r--arch/riscv/mm/fault.c2
-rw-r--r--arch/riscv/mm/init.c7
-rw-r--r--arch/riscv/mm/sifive_l2_cache.c2
-rw-r--r--arch/s390/boot/startup.c14
-rw-r--r--arch/s390/include/asm/page.h2
-rw-r--r--arch/s390/include/asm/uaccess.h4
-rw-r--r--arch/s390/include/asm/unwind.h1
-rw-r--r--arch/s390/kernel/idle.c29
-rw-r--r--arch/s390/kernel/machine_kexec_reloc.c1
-rw-r--r--arch/s390/kernel/unwind_bc.c18
-rw-r--r--arch/s390/mm/cmm.c12
-rw-r--r--arch/s390/mm/init.c1
-rw-r--r--arch/sparc/Kconfig1
-rw-r--r--arch/sparc/vdso/Makefile4
-rw-r--r--arch/um/configs/kunit_defconfig3
-rw-r--r--arch/um/drivers/ubd_kern.c8
-rw-r--r--arch/x86/Kconfig45
-rw-r--r--arch/x86/boot/compressed/acpi.c48
-rw-r--r--arch/x86/boot/compressed/eboot.c4
-rw-r--r--arch/x86/boot/compressed/misc.c25
-rw-r--r--arch/x86/events/amd/core.c30
-rw-r--r--arch/x86/events/amd/ibs.c8
-rw-r--r--arch/x86/events/intel/core.c4
-rw-r--r--arch/x86/events/intel/cstate.c44
-rw-r--r--arch/x86/events/intel/pt.c2
-rw-r--r--arch/x86/events/intel/uncore.c44
-rw-r--r--arch/x86/events/intel/uncore.h12
-rw-r--r--arch/x86/events/msr.c7
-rw-r--r--arch/x86/hyperv/hv_apic.c20
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/intel-family.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h8
-rw-r--r--arch/x86/include/asm/msr-index.h16
-rw-r--r--arch/x86/include/asm/mwait.h2
-rw-r--r--arch/x86/include/asm/nospec-branch.h4
-rw-r--r--arch/x86/include/asm/pgtable.h6
-rw-r--r--arch/x86/include/asm/processor.h7
-rw-r--r--arch/x86/include/asm/pti.h2
-rw-r--r--arch/x86/include/asm/uaccess.h23
-rw-r--r--arch/x86/include/asm/vmware.h14
-rw-r--r--arch/x86/kernel/apic/apic.c28
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c3
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/bugs.c159
-rw-r--r--arch/x86/kernel/cpu/common.c99
-rw-r--r--arch/x86/kernel/cpu/cpu.h18
-rw-r--r--arch/x86/kernel/cpu/intel.c5
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c4
-rw-r--r--arch/x86/kernel/cpu/tsx.c140
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c7
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/head64.c22
-rw-r--r--arch/x86/kernel/process.h2
-rw-r--r--arch/x86/kernel/tsc.c3
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/lapic.c5
-rw-r--r--arch/x86/kvm/lapic.h5
-rw-r--r--arch/x86/kvm/mmu.c282
-rw-r--r--arch/x86/kvm/mmu.h4
-rw-r--r--arch/x86/kvm/paging_tmpl.h29
-rw-r--r--arch/x86/kvm/svm.c16
-rw-r--r--arch/x86/kvm/vmx/nested.c64
-rw-r--r--arch/x86/kvm/vmx/nested.h13
-rw-r--r--arch/x86/kvm/vmx/vmx.c49
-rw-r--r--arch/x86/kvm/vmx/vmx.h11
-rw-r--r--arch/x86/kvm/x86.c118
-rw-r--r--arch/x86/lib/delay.c4
-rw-r--r--arch/x86/platform/efi/efi.c3
-rw-r--r--arch/x86/xen/enlighten.c28
-rw-r--r--arch/x86/xen/enlighten_pv.c8
-rw-r--r--arch/xtensa/boot/dts/virt.dts2
-rw-r--r--arch/xtensa/include/asm/bitops.h2
-rw-r--r--arch/xtensa/include/asm/uaccess.h94
-rw-r--r--arch/xtensa/kernel/xtensa_ksyms.c7
-rw-r--r--block/Kconfig4
-rw-r--r--block/Kconfig.iosched1
-rw-r--r--block/Makefile1
-rw-r--r--block/bfq-cgroup.c85
-rw-r--r--block/bfq-iosched.c36
-rw-r--r--block/bfq-iosched.h10
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-cgroup-rwstat.c129
-rw-r--r--block/blk-cgroup-rwstat.h149
-rw-r--r--block/blk-cgroup.c386
-rw-r--r--block/blk-core.c16
-rw-r--r--block/blk-exec.c2
-rw-r--r--block/blk-flush.c15
-rw-r--r--block/blk-iocost.c12
-rw-r--r--block/blk-merge.c17
-rw-r--r--block/blk-mq-sysfs.c31
-rw-r--r--block/blk-mq-tag.c8
-rw-r--r--block/blk-mq-tag.h1
-rw-r--r--block/blk-mq.c136
-rw-r--r--block/blk-mq.h9
-rw-r--r--block/blk-rq-qos.c14
-rw-r--r--block/blk-rq-qos.h17
-rw-r--r--block/blk-softirq.c4
-rw-r--r--block/blk-stat.c7
-rw-r--r--block/blk-sysfs.c8
-rw-r--r--block/blk-throttle.c71
-rw-r--r--block/blk-wbt.c6
-rw-r--r--block/blk-zoned.c453
-rw-r--r--block/blk.h7
-rw-r--r--block/elevator.c12
-rw-r--r--block/genhd.c8
-rw-r--r--block/ioctl.c42
-rw-r--r--block/opal_proto.h6
-rw-r--r--block/partition-generic.c231
-rw-r--r--block/sed-opal.c318
-rw-r--r--block/t10-pi.c8
-rw-r--r--crypto/asymmetric_keys/asym_tpm.c101
-rw-r--r--drivers/acpi/cppc_acpi.c2
-rw-r--r--drivers/acpi/hmat/hmat.c2
-rw-r--r--drivers/acpi/nfit/core.c2
-rw-r--r--drivers/acpi/processor_driver.c9
-rw-r--r--drivers/acpi/processor_perflib.c40
-rw-r--r--drivers/acpi/processor_thermal.c42
-rw-r--r--drivers/acpi/sleep.c13
-rw-r--r--drivers/amba/bus.c14
-rw-r--r--drivers/android/binder.c11
-rw-r--r--drivers/android/binder_alloc.c8
-rw-r--r--drivers/android/binder_internal.h2
-rw-r--r--drivers/ata/acard-ahci.c6
-rw-r--r--drivers/ata/ahci.c17
-rw-r--r--drivers/ata/ahci_tegra.c6
-rw-r--r--drivers/ata/ata_piix.c14
-rw-r--r--drivers/ata/libahci.c6
-rw-r--r--drivers/ata/libahci_platform.c38
-rw-r--r--drivers/ata/libata-core.c12
-rw-r--r--drivers/ata/libata-scsi.c21
-rw-r--r--drivers/ata/libata-sff.c12
-rw-r--r--drivers/ata/pata_artop.c4
-rw-r--r--drivers/ata/pata_macio.c6
-rw-r--r--drivers/ata/pata_pxa.c8
-rw-r--r--drivers/ata/pdc_adma.c7
-rw-r--r--drivers/ata/sata_fsl.c4
-rw-r--r--drivers/ata/sata_inic162x.c4
-rw-r--r--drivers/ata/sata_mv.c34
-rw-r--r--drivers/ata/sata_nv.c18
-rw-r--r--drivers/ata/sata_promise.c6
-rw-r--r--drivers/ata/sata_qstor.c8
-rw-r--r--drivers/ata/sata_rcar.c6
-rw-r--r--drivers/ata/sata_sil.c8
-rw-r--r--drivers/ata/sata_sil24.c6
-rw-r--r--drivers/ata/sata_sx4.c6
-rw-r--r--drivers/base/core.c3
-rw-r--r--drivers/base/cpu.c17
-rw-r--r--drivers/base/memory.c39
-rw-r--r--drivers/base/platform.c46
-rw-r--r--drivers/base/power/qos.c70
-rw-r--r--drivers/block/drbd/drbd_main.c1
-rw-r--r--drivers/block/loop.c39
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c2
-rw-r--r--drivers/block/nbd.c49
-rw-r--r--drivers/block/null_blk.h19
-rw-r--r--drivers/block/null_blk_main.c125
-rw-r--r--drivers/block/null_blk_zoned.c90
-rw-r--r--drivers/block/rbd.c11
-rw-r--r--drivers/block/rsxx/core.c2
-rw-r--r--drivers/block/zram/zram_drv.c5
-rw-r--r--drivers/bus/ti-sysc.c18
-rw-r--r--drivers/char/hw_random/core.c5
-rw-r--r--drivers/char/random.c4
-rw-r--r--drivers/char/tpm/Kconfig7
-rw-r--r--drivers/char/tpm/Makefile4
-rw-r--r--drivers/char/tpm/tpm-interface.c64
-rw-r--r--drivers/char/tpm/tpm-sysfs.c45
-rw-r--r--drivers/char/tpm/tpm.h248
-rw-r--r--drivers/char/tpm/tpm1-cmd.c15
-rw-r--r--drivers/char/tpm/tpm2-cmd.c311
-rw-r--r--drivers/char/tpm/tpm_crb.c123
-rw-r--r--drivers/char/tpm/tpm_tis.c2
-rw-r--r--drivers/char/tpm/tpm_tis_core.c79
-rw-r--r--drivers/char/tpm/tpm_tis_spi.c143
-rw-r--r--drivers/char/tpm/tpm_tis_spi.h53
-rw-r--r--drivers/char/tpm/tpm_tis_spi_cr50.c322
-rw-r--r--drivers/char/virtio_console.c28
-rw-r--r--drivers/clk/at91/clk-main.c5
-rw-r--r--drivers/clk/at91/sam9x60.c1
-rw-r--r--drivers/clk/at91/sckc.c20
-rw-r--r--drivers/clk/clk-ast2600.c7
-rw-r--r--drivers/clk/imx/clk-imx8mm.c2
-rw-r--r--drivers/clk/imx/clk-imx8mn.c2
-rw-r--r--drivers/clk/meson/g12a.c13
-rw-r--r--drivers/clk/meson/gxbb.c1
-rw-r--r--drivers/clk/samsung/clk-exynos5420.c27
-rw-r--r--drivers/clk/samsung/clk-exynos5433.c14
-rw-r--r--drivers/clk/sunxi-ng/ccu-sun9i-a80.c2
-rw-r--r--drivers/clk/sunxi/clk-sunxi.c4
-rw-r--r--drivers/clk/ti/clk-dra7-atl.c6
-rw-r--r--drivers/clk/ti/clkctrl.c5
-rw-r--r--drivers/clocksource/sh_mtu2.c16
-rw-r--r--drivers/clocksource/timer-mediatek.c10
-rw-r--r--drivers/cpufreq/cpufreq.c72
-rw-r--r--drivers/cpufreq/intel_pstate.c34
-rw-r--r--drivers/cpufreq/ppc_cbe_cpufreq_pmi.c15
-rw-r--r--drivers/cpuidle/cpuidle-haltpoll.c4
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_cm.c2
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_io.c2
-rw-r--r--drivers/dma-buf/dma-resv.c2
-rw-r--r--drivers/dma/imx-sdma.c8
-rw-r--r--drivers/dma/qcom/bam_dma.c19
-rw-r--r--drivers/dma/sprd-dma.c27
-rw-r--r--drivers/dma/tegra210-adma.c7
-rw-r--r--drivers/dma/ti/cppi41.c21
-rw-r--r--drivers/dma/xilinx/xilinx_dma.c10
-rw-r--r--drivers/edac/ghes_edac.c4
-rw-r--r--drivers/firmware/arm_sdei.c12
-rw-r--r--drivers/firmware/dmi_scan.c2
-rw-r--r--drivers/firmware/efi/Kconfig1
-rw-r--r--drivers/firmware/efi/cper.c2
-rw-r--r--drivers/firmware/efi/efi.c5
-rw-r--r--drivers/firmware/efi/libstub/Makefile1
-rw-r--r--drivers/firmware/efi/libstub/arm32-stub.c16
-rw-r--r--drivers/firmware/efi/libstub/efi-stub-helper.c24
-rw-r--r--drivers/firmware/efi/rci2-table.c2
-rw-r--r--drivers/firmware/efi/test/efi_test.c8
-rw-r--r--drivers/firmware/efi/tpm.c27
-rw-r--r--drivers/firmware/google/vpd_decode.c2
-rw-r--r--drivers/firmware/psci/psci.c24
-rw-r--r--drivers/gpio/gpio-bd70528.c6
-rw-r--r--drivers/gpio/gpio-eic-sprd.c7
-rw-r--r--drivers/gpio/gpio-intel-mid.c9
-rw-r--r--drivers/gpio/gpio-lynxpoint.c10
-rw-r--r--drivers/gpio/gpio-max77620.c6
-rw-r--r--drivers/gpio/gpio-merrifield.c28
-rw-r--r--drivers/gpio/gpiolib-acpi.c17
-rw-r--r--drivers/gpio/gpiolib-of.c2
-rw-r--r--drivers/gpio/gpiolib.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c33
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/calcs/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c24
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_resource.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c22
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/Makefile19
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/Makefile19
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c23
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/navi10_ppt.c8
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c2
-rw-r--r--drivers/gpu/drm/amd/powerplay/vega20_ppt.c2
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_kms.c3
-rw-r--r--drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c4
-rw-r--r--drivers/gpu/drm/bridge/tc358767.c7
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c15
-rw-r--r--drivers/gpu/drm/drm_edid.c3
-rw-r--r--drivers/gpu/drm/drm_self_refresh_helper.c18
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_dump.c4
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c6
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_mmu.c17
-rw-r--r--drivers/gpu/drm/i915/display/intel_atomic.c1
-rw-r--r--drivers/gpu/drm/i915/display/intel_bios.c22
-rw-r--r--drivers/gpu/drm/i915/display/intel_color.c61
-rw-r--r--drivers/gpu/drm/i915/display/intel_crt.c7
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c35
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_types.h1
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c12
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll_mgr.c15
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpll_mgr.h4
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.c9
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdmi.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c5
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context_types.h7
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c111
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c19
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c164
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ringbuffer.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c3
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c4
-rw-r--r--drivers/gpu/drm/i915/i915_cmd_parser.c435
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c12
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h33
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c19
-rw-r--r--drivers/gpu/drm/i915/i915_gem.h6
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c2
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c4
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h10
-rw-r--r--drivers/gpu/drm/i915/i915_request.c69
-rw-r--r--drivers/gpu/drm/i915/i915_request.h2
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.c50
-rw-r--r--drivers/gpu/drm/i915/intel_pch.c1
-rw-r--r--drivers/gpu/drm/i915/intel_pch.h1
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c122
-rw-r--r--drivers/gpu/drm/i915/intel_pm.h3
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem.c6
-rw-r--r--drivers/gpu/drm/msm/dsi/dsi_host.c6
-rw-r--r--drivers/gpu/drm/panel/panel-lg-lb035q02.c9
-rw-r--r--drivers/gpu/drm/panel/panel-nec-nl8048hl11.c9
-rw-r--r--drivers/gpu/drm/panel/panel-sony-acx565akm.c9
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-td028ttec1.c3
-rw-r--r--drivers/gpu/drm/panel/panel-tpo-td043mtea1.c9
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c2
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gpu.c3
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_job.c16
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_mmu.c15
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_perfcnt.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_drv.c12
-rw-r--r--drivers/gpu/drm/radeon/si_dpm.c1
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c19
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.c2
-rw-r--r--drivers/gpu/drm/tiny/Kconfig1
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c9
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c16
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c5
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front.c12
-rw-r--r--drivers/hid/hid-axff.c11
-rw-r--r--drivers/hid/hid-core.c7
-rw-r--r--drivers/hid/hid-dr.c12
-rw-r--r--drivers/hid/hid-emsff.c12
-rw-r--r--drivers/hid/hid-gaff.c12
-rw-r--r--drivers/hid/hid-google-hammer.c4
-rw-r--r--drivers/hid/hid-holtekff.c12
-rw-r--r--drivers/hid/hid-hyperv.c56
-rw-r--r--drivers/hid/hid-ids.h2
-rw-r--r--drivers/hid/hid-lg2ff.c12
-rw-r--r--drivers/hid/hid-lg3ff.c11
-rw-r--r--drivers/hid/hid-lg4ff.c11
-rw-r--r--drivers/hid/hid-lgff.c11
-rw-r--r--drivers/hid/hid-logitech-hidpp.c248
-rw-r--r--drivers/hid/hid-microsoft.c12
-rw-r--r--drivers/hid/hid-prodikeys.c4
-rw-r--r--drivers/hid/hid-sony.c12
-rw-r--r--drivers/hid/hid-tmff.c12
-rw-r--r--drivers/hid/hid-zpff.c12
-rw-r--r--drivers/hid/i2c-hid/i2c-hid-core.c122
-rw-r--r--drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c19
-rw-r--r--drivers/hid/intel-ish-hid/ishtp/client-buffers.c2
-rw-r--r--drivers/hid/wacom.h15
-rw-r--r--drivers/hid/wacom_wac.c10
-rw-r--r--drivers/hv/vmbus_drv.c6
-rw-r--r--drivers/hwmon/ina3221.c2
-rw-r--r--drivers/hwmon/nct7904.c40
-rw-r--r--drivers/hwtracing/intel_th/gth.c3
-rw-r--r--drivers/hwtracing/intel_th/msu.c11
-rw-r--r--drivers/hwtracing/intel_th/pci.c10
-rw-r--r--drivers/i2c/busses/i2c-aspeed.c54
-rw-r--r--drivers/i2c/busses/i2c-mt65xx.c2
-rw-r--r--drivers/i2c/busses/i2c-stm32f7.c21
-rw-r--r--drivers/i2c/i2c-core-acpi.c28
-rw-r--r--drivers/i2c/i2c-core-of.c4
-rw-r--r--drivers/iio/accel/adxl372.c22
-rw-r--r--drivers/iio/accel/bmc150-accel-core.c2
-rw-r--r--drivers/iio/adc/ad799x.c4
-rw-r--r--drivers/iio/adc/axp288_adc.c32
-rw-r--r--drivers/iio/adc/hx711.c10
-rw-r--r--drivers/iio/adc/meson_saradc.c10
-rw-r--r--drivers/iio/adc/stm32-adc-core.c70
-rw-r--r--drivers/iio/adc/stm32-adc-core.h137
-rw-r--r--drivers/iio/adc/stm32-adc.c113
-rw-r--r--drivers/iio/imu/adis16480.c5
-rw-r--r--drivers/iio/imu/adis_buffer.c10
-rw-r--r--drivers/iio/imu/inv_mpu6050/inv_mpu_core.c9
-rw-r--r--drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h2
-rw-r--r--drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c15
-rw-r--r--drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h2
-rw-r--r--drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c28
-rw-r--r--drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c15
-rw-r--r--drivers/iio/light/Kconfig1
-rw-r--r--drivers/iio/light/opt3001.c6
-rw-r--r--drivers/iio/light/vcnl4000.c14
-rw-r--r--drivers/iio/proximity/srf04.c29
-rw-r--r--drivers/infiniband/core/cm.c3
-rw-r--r--drivers/infiniband/core/cma.c3
-rw-r--r--drivers/infiniband/core/core_priv.h1
-rw-r--r--drivers/infiniband/core/device.c11
-rw-r--r--drivers/infiniband/core/iwcm.c52
-rw-r--r--drivers/infiniband/core/netlink.c107
-rw-r--r--drivers/infiniband/core/nldev.c14
-rw-r--r--drivers/infiniband/core/security.c2
-rw-r--r--drivers/infiniband/core/umem_odp.c6
-rw-r--r--drivers/infiniband/core/uverbs.h2
-rw-r--r--drivers/infiniband/core/verbs.c9
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c30
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c28
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c10
-rw-r--r--drivers/infiniband/hw/hfi1/init.c1
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c4
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c16
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c10
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c62
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h3
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c4
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c58
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c68
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c58
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c8
-rw-r--r--drivers/infiniband/hw/qedr/main.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c2
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c17
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c2
-rw-r--r--drivers/input/ff-memless.c9
-rw-r--r--drivers/input/misc/da9063_onkey.c5
-rw-r--r--drivers/input/misc/soc_button_array.c17
-rw-r--r--drivers/input/mouse/elantech.c55
-rw-r--r--drivers/input/rmi4/rmi_driver.c6
-rw-r--r--drivers/input/rmi4/rmi_f11.c9
-rw-r--r--drivers/input/rmi4/rmi_f12.c32
-rw-r--r--drivers/input/rmi4/rmi_f54.c5
-rw-r--r--drivers/input/touchscreen/cyttsp4_core.c7
-rw-r--r--drivers/input/touchscreen/goodix.c58
-rw-r--r--drivers/input/touchscreen/st1232.c6
-rw-r--r--drivers/interconnect/core.c4
-rw-r--r--drivers/interconnect/qcom/qcs404.c3
-rw-r--r--drivers/interconnect/qcom/sdm845.c3
-rw-r--r--drivers/iommu/amd_iommu.c12
-rw-r--r--drivers/iommu/amd_iommu_quirks.c13
-rw-r--r--drivers/iommu/amd_iommu_types.h4
-rw-r--r--drivers/iommu/arm-smmu.c1
-rw-r--r--drivers/iommu/intel-iommu.c12
-rw-r--r--drivers/iommu/io-pgtable-arm.c58
-rw-r--r--drivers/iommu/ipmmu-vmsa.c7
-rw-r--r--drivers/iommu/rockchip-iommu.c19
-rw-r--r--drivers/irqchip/irq-al-fic.c12
-rw-r--r--drivers/irqchip/irq-atmel-aic5.c10
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c21
-rw-r--r--drivers/irqchip/irq-gic-v3.c22
-rw-r--r--drivers/irqchip/irq-sifive-plic.c33
-rw-r--r--drivers/isdn/capi/capi.c2
-rw-r--r--drivers/macintosh/windfarm_cpufreq_clamp.c38
-rw-r--r--drivers/md/Kconfig54
-rw-r--r--drivers/md/bcache/Makefile2
-rw-r--r--drivers/md/bcache/alloc.c5
-rw-r--r--drivers/md/bcache/bcache.h4
-rw-r--r--drivers/md/bcache/bset.c17
-rw-r--r--drivers/md/bcache/btree.c19
-rw-r--r--drivers/md/bcache/closure.c7
-rw-r--r--drivers/md/bcache/request.c12
-rw-r--r--drivers/md/bcache/super.c56
-rw-r--r--drivers/md/bcache/sysfs.c7
-rw-r--r--drivers/md/bcache/writeback.c4
-rw-r--r--drivers/md/dm-bio-prison-v1.c27
-rw-r--r--drivers/md/dm-bio-prison-v2.c26
-rw-r--r--drivers/md/dm-cache-target.c105
-rw-r--r--drivers/md/dm-clone-metadata.c29
-rw-r--r--drivers/md/dm-clone-metadata.h4
-rw-r--r--drivers/md/dm-clone-target.c66
-rw-r--r--drivers/md/dm-crypt.c9
-rw-r--r--drivers/md/dm-dust.c97
-rw-r--r--drivers/md/dm-flakey.c25
-rw-r--r--drivers/md/dm-integrity.c28
-rw-r--r--drivers/md/dm-linear.c22
-rw-r--r--drivers/md/dm-raid.c164
-rw-r--r--drivers/md/dm-snap.c94
-rw-r--r--drivers/md/dm-stripe.c15
-rw-r--r--drivers/md/dm-table.c27
-rw-r--r--drivers/md/dm-thin.c118
-rw-r--r--drivers/md/dm-writecache.c5
-rw-r--r--drivers/md/dm-zoned-metadata.c166
-rw-r--r--drivers/md/dm-zoned-reclaim.c8
-rw-r--r--drivers/md/dm-zoned-target.c54
-rw-r--r--drivers/md/dm-zoned.h2
-rw-r--r--drivers/md/dm.c135
-rw-r--r--drivers/md/md-bitmap.c2
-rw-r--r--drivers/md/md-linear.c5
-rw-r--r--drivers/md/md-multipath.c5
-rw-r--r--drivers/md/md.c57
-rw-r--r--drivers/md/md.h4
-rw-r--r--drivers/md/raid0.c9
-rw-r--r--drivers/md/raid1.c6
-rw-r--r--drivers/md/raid10.c7
-rw-r--r--drivers/md/raid5-ppl.c2
-rw-r--r--drivers/md/raid5.c8
-rw-r--r--drivers/media/usb/stkwebcam/stk-webcam.c3
-rw-r--r--drivers/memstick/host/jmb38x_ms.c2
-rw-r--r--drivers/mfd/mt6397-core.c64
-rw-r--r--drivers/misc/fastrpc.c1
-rw-r--r--drivers/misc/mei/bus-fixup.c14
-rw-r--r--drivers/misc/mei/hw-me-regs.h3
-rw-r--r--drivers/misc/mei/hw-me.c21
-rw-r--r--drivers/misc/mei/hw-me.h8
-rw-r--r--drivers/misc/mei/mei_dev.h4
-rw-r--r--drivers/misc/mei/pci-me.c13
-rw-r--r--drivers/mmc/host/cqhci.c3
-rw-r--r--drivers/mmc/host/mxs-mmc.c7
-rw-r--r--drivers/mmc/host/renesas_sdhi_core.c31
-rw-r--r--drivers/mmc/host/sdhci-iproc.c1
-rw-r--r--drivers/mmc/host/sdhci-of-at91.c2
-rw-r--r--drivers/mmc/host/sdhci-omap.c2
-rw-r--r--drivers/mmc/host/sh_mmcif.c6
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0001.c10
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0002.c79
-rw-r--r--drivers/mtd/chips/cfi_cmdset_0020.c8
-rw-r--r--drivers/mtd/chips/cfi_util.c2
-rw-r--r--drivers/mtd/devices/mchp23k256.c20
-rw-r--r--drivers/mtd/devices/spear_smi.c42
-rw-r--r--drivers/mtd/devices/st_spi_fsm.c1
-rw-r--r--drivers/mtd/maps/Kconfig11
-rw-r--r--drivers/mtd/maps/Makefile1
-rw-r--r--drivers/mtd/maps/l440gx.c2
-rw-r--r--drivers/mtd/maps/physmap-core.c5
-rw-r--r--drivers/mtd/maps/physmap-ixp4xx.c132
-rw-r--r--drivers/mtd/maps/physmap-ixp4xx.h17
-rw-r--r--drivers/mtd/mtdchar.c10
-rw-r--r--drivers/mtd/mtdcore.c26
-rw-r--r--drivers/mtd/mtdswap.c8
-rw-r--r--drivers/mtd/nand/raw/Kconfig7
-rw-r--r--drivers/mtd/nand/raw/Makefile1
-rw-r--r--drivers/mtd/nand/raw/au1550nd.c5
-rw-r--r--drivers/mtd/nand/raw/brcmnand/brcmnand.c23
-rw-r--r--drivers/mtd/nand/raw/cadence-nand-controller.c3030
-rw-r--r--drivers/mtd/nand/raw/denali_dt.c59
-rw-r--r--drivers/mtd/nand/raw/hisi504_nand.c4
-rw-r--r--drivers/mtd/nand/raw/lpc32xx_mlc.c1
-rw-r--r--drivers/mtd/nand/raw/marvell_nand.c4
-rw-r--r--drivers/mtd/nand/raw/meson_nand.c4
-rw-r--r--drivers/mtd/nand/raw/mtk_ecc.c4
-rw-r--r--drivers/mtd/nand/raw/mtk_nand.c1
-rw-r--r--drivers/mtd/nand/raw/mxic_nand.c4
-rw-r--r--drivers/mtd/nand/raw/nand_base.c8
-rw-r--r--drivers/mtd/nand/raw/nand_micron.c4
-rw-r--r--drivers/mtd/nand/raw/omap2.c8
-rw-r--r--drivers/mtd/nand/raw/sh_flctl.c4
-rw-r--r--drivers/mtd/nand/raw/stm32_fmc2_nand.c5
-rw-r--r--drivers/mtd/nand/raw/sunxi_nand.c4
-rw-r--r--drivers/mtd/spi-nor/aspeed-smc.c23
-rw-r--r--drivers/mtd/spi-nor/cadence-quadspi.c58
-rw-r--r--drivers/mtd/spi-nor/hisi-sfc.c23
-rw-r--r--drivers/mtd/spi-nor/intel-spi-pci.c6
-rw-r--r--drivers/mtd/spi-nor/intel-spi.c58
-rw-r--r--drivers/mtd/spi-nor/mtk-quadspi.c25
-rw-r--r--drivers/mtd/spi-nor/nxp-spifi.c23
-rw-r--r--drivers/mtd/spi-nor/spi-nor.c1491
-rw-r--r--drivers/mtd/ubi/debug.c131
-rw-r--r--drivers/net/bonding/bond_alb.c2
-rw-r--r--drivers/net/bonding/bond_main.c74
-rw-r--r--drivers/net/can/c_can/c_can.c71
-rw-r--r--drivers/net/can/c_can/c_can.h1
-rw-r--r--drivers/net/can/dev.c1
-rw-r--r--drivers/net/can/flexcan.c11
-rw-r--r--drivers/net/can/m_can/m_can_platform.c4
-rw-r--r--drivers/net/can/rx-offload.c102
-rw-r--r--drivers/net/can/slcan.c1
-rw-r--r--drivers/net/can/spi/mcp251x.c2
-rw-r--r--drivers/net/can/ti_hecc.c232
-rw-r--r--drivers/net/can/usb/gs_usb.c1
-rw-r--r--drivers/net/can/usb/mcba_usb.c3
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb.c32
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb_core.c2
-rw-r--r--drivers/net/can/usb/usb_8dev.c3
-rw-r--r--drivers/net/can/xilinx_can.c1
-rw-r--r--drivers/net/dsa/b53/b53_common.c1
-rw-r--r--drivers/net/dsa/bcm_sf2.c40
-rw-r--r--drivers/net/dsa/microchip/ksz8795.c4
-rw-r--r--drivers/net/dsa/microchip/ksz8795_spi.c7
-rw-r--r--drivers/net/dsa/microchip/ksz9477_i2c.c6
-rw-r--r--drivers/net/dsa/microchip/ksz9477_reg.h4
-rw-r--r--drivers/net/dsa/microchip/ksz9477_spi.c6
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c2
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h20
-rw-r--r--drivers/net/dsa/mv88e6xxx/ptp.c13
-rw-r--r--drivers/net/dsa/sja1105/Kconfig4
-rw-r--r--drivers/net/dsa/sja1105/sja1105.h4
-rw-r--r--drivers/net/dsa/sja1105/sja1105_dynamic_config.h4
-rw-r--r--drivers/net/dsa/sja1105/sja1105_ptp.h4
-rw-r--r--drivers/net/dsa/sja1105/sja1105_static_config.h4
-rw-r--r--drivers/net/dsa/sja1105/sja1105_tas.h4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_main.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.c34
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_ring.c3
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c23
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c17
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h7
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h19
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c2
-rw-r--r--drivers/net/ethernet/arc/emac_rockchip.c3
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c10
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c112
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h3
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c18
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h1
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c44
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c4
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c12
-rw-r--r--drivers/net/ethernet/cavium/common/cavium_ptp.h2
-rw-r--r--drivers/net/ethernet/cavium/octeon/octeon_mgmt.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c28
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c8
-rw-r--r--drivers/net/ethernet/cirrus/ep93xx_eth.c5
-rw-r--r--drivers/net/ethernet/cortina/gemini.c1
-rw-r--r--drivers/net/ethernet/cortina/gemini.h2
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c25
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c60
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h2
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpni.h5
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h2
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dprtc.h2
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c15
-rw-r--r--drivers/net/ethernet/freescale/fec_ptp.c4
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c25
-rw-r--r--drivers/net/ethernet/hisilicon/hip04_eth.c16
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hnae.c1
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hnae.h3
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c22
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c5
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c19
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c55
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h5
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c11
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c28
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h1
-rw-r--r--drivers/net/ethernet/i825xx/lasi_82596.c4
-rw-r--r--drivers/net/ethernet/i825xx/lib82596.c4
-rw-r--r--drivers/net/ethernet/i825xx/sni_82596.c4
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c5
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_ethtool.c7
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c15
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sched.c2
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_82575.c2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c12
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c17
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c7
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c10
-rw-r--r--drivers/net/ethernet/marvell/mvneta_bm.h32
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/common.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/mbox.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h4
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h4
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c190
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c15
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mr.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c1
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ptp.c4
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c20
-rw-r--r--drivers/net/ethernet/mscc/ocelot.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_repr.c18
-rw-r--r--drivers/net/ethernet/nxp/lpc_eth.c3
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c2
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_stats.c29
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c27
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sriov.c2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c12
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.c11
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.h1
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c4
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c49
-rw-r--r--drivers/net/ethernet/renesas/ravb.h3
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c26
-rw-r--r--drivers/net/ethernet/renesas/ravb_ptp.c11
-rw-r--r--drivers/net/ethernet/sfc/ptp.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac5.c1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac5.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc_core.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c85
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c137
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c2
-rw-r--r--drivers/net/ethernet/ti/davinci_cpdma.c2
-rw-r--r--drivers/net/fjes/fjes_main.c15
-rw-r--r--drivers/net/hamradio/bpqether.c22
-rw-r--r--drivers/net/hyperv/hyperv_net.h3
-rw-r--r--drivers/net/hyperv/netvsc.c38
-rw-r--r--drivers/net/hyperv/netvsc_drv.c15
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c2
-rw-r--r--drivers/net/macsec.c18
-rw-r--r--drivers/net/macvlan.c19
-rw-r--r--drivers/net/netdevsim/dev.c7
-rw-r--r--drivers/net/netdevsim/fib.c3
-rw-r--r--drivers/net/phy/bcm7xxx.c1
-rw-r--r--drivers/net/phy/dp83640.c16
-rw-r--r--drivers/net/phy/mdio-sun4i.c3
-rw-r--r--drivers/net/phy/mdio_bus.c11
-rw-r--r--drivers/net/phy/micrel.c42
-rw-r--r--drivers/net/phy/phy-c45.c2
-rw-r--r--drivers/net/phy/phy.c3
-rw-r--r--drivers/net/phy/phy_device.c11
-rw-r--r--drivers/net/phy/phylink.c47
-rw-r--r--drivers/net/phy/smsc.c1
-rw-r--r--drivers/net/ppp/ppp_generic.c2
-rw-r--r--drivers/net/slip/slip.c1
-rw-r--r--drivers/net/team/team.c16
-rw-r--r--drivers/net/tun.c4
-rw-r--r--drivers/net/usb/ax88172a.c2
-rw-r--r--drivers/net/usb/cdc_ether.c7
-rw-r--r--drivers/net/usb/cdc_ncm.c6
-rw-r--r--drivers/net/usb/hso.c13
-rw-r--r--drivers/net/usb/lan78xx.c17
-rw-r--r--drivers/net/usb/qmi_wwan.c4
-rw-r--r--drivers/net/usb/r8152.c31
-rw-r--r--drivers/net/usb/sr9800.c2
-rw-r--r--drivers/net/vrf.c1
-rw-r--r--drivers/net/vxlan.c62
-rw-r--r--drivers/net/wimax/i2400m/op-rfkill.c2
-rw-r--r--drivers/net/wireless/ath/ath10k/core.c15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.c10
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/scan.h22
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/file.h3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-csr.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-io.h12
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-prph.h5
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c43
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h6
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/scan.c40
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sta.c140
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c36
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c295
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c25
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans.c25
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c20
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_hw.c25
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/Makefile2
-rw-r--r--drivers/net/wireless/mediatek/mt76/dma.c6
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76.h6
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x2/pci.c2
-rw-r--r--drivers/net/wireless/mediatek/mt76/pci.c46
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00.h1
-rw-r--r--drivers/net/wireless/ralink/rt2x00/rt2x00debug.c2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/pci.c3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/ps.c6
-rw-r--r--drivers/net/wireless/virt_wifi.c54
-rw-r--r--drivers/net/xen-netback/interface.c1
-rw-r--r--drivers/nfc/fdp/i2c.c2
-rw-r--r--drivers/nfc/nxp-nci/i2c.c6
-rw-r--r--drivers/nfc/pn533/usb.c9
-rw-r--r--drivers/nfc/port100.c2
-rw-r--r--drivers/nfc/st21nfca/core.c1
-rw-r--r--drivers/nvme/host/Kconfig10
-rw-r--r--drivers/nvme/host/Makefile1
-rw-r--r--drivers/nvme/host/core.c136
-rw-r--r--drivers/nvme/host/fabrics.h3
-rw-r--r--drivers/nvme/host/fc.c49
-rw-r--r--drivers/nvme/host/hwmon.c259
-rw-r--r--drivers/nvme/host/multipath.c24
-rw-r--r--drivers/nvme/host/nvme.h38
-rw-r--r--drivers/nvme/host/pci.c92
-rw-r--r--drivers/nvme/host/rdma.c32
-rw-r--r--drivers/nvme/host/tcp.c17
-rw-r--r--drivers/nvme/target/admin-cmd.c133
-rw-r--r--drivers/nvme/target/core.c20
-rw-r--r--drivers/nvme/target/discovery.c70
-rw-r--r--drivers/nvme/target/fabrics-cmd.c15
-rw-r--r--drivers/nvme/target/fc.c31
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c43
-rw-r--r--drivers/nvme/target/io-cmd-file.c20
-rw-r--r--drivers/nvme/target/loop.c11
-rw-r--r--drivers/nvme/target/nvmet.h10
-rw-r--r--drivers/nvme/target/rdma.c8
-rw-r--r--drivers/nvme/target/tcp.c14
-rw-r--r--drivers/of/of_reserved_mem.c4
-rw-r--r--drivers/of/unittest.c1
-rw-r--r--drivers/opp/core.c16
-rw-r--r--drivers/opp/of.c9
-rw-r--r--drivers/parisc/sba_iommu.c8
-rw-r--r--drivers/pci/pci.c24
-rw-r--r--drivers/perf/arm-cci.c4
-rw-r--r--drivers/perf/arm-ccn.c4
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c5
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c124
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c5
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_hha_pmu.c4
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c4
-rw-r--r--drivers/perf/hisilicon/hisi_uncore_pmu.c26
-rw-r--r--drivers/perf/thunderx2_pmu.c267
-rw-r--r--drivers/perf/xgene_pmu.c14
-rw-r--r--drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c124
-rw-r--r--drivers/pinctrl/aspeed/pinmux-aspeed.h3
-rw-r--r--drivers/pinctrl/bcm/pinctrl-iproc-gpio.c12
-rw-r--r--drivers/pinctrl/bcm/pinctrl-ns2-mux.c4
-rw-r--r--drivers/pinctrl/berlin/pinctrl-as370.c2
-rw-r--r--drivers/pinctrl/intel/pinctrl-cherryview.c30
-rw-r--r--drivers/pinctrl/intel/pinctrl-intel.c48
-rw-r--r--drivers/pinctrl/mvebu/pinctrl-armada-37xx.c26
-rw-r--r--drivers/pinctrl/pinctrl-stmfx.c16
-rw-r--r--drivers/platform/x86/classmate-laptop.c12
-rw-r--r--drivers/platform/x86/i2c-multi-instantiate.c1
-rw-r--r--drivers/platform/x86/intel_punit_ipc.c3
-rw-r--r--drivers/ptp/Kconfig4
-rw-r--r--drivers/ptp/ptp_chardev.c20
-rw-r--r--drivers/pwm/core.c9
-rw-r--r--drivers/pwm/pwm-bcm-iproc.c1
-rw-r--r--drivers/regulator/core.c13
-rw-r--r--drivers/regulator/da9062-regulator.c118
-rw-r--r--drivers/regulator/fixed.c5
-rw-r--r--drivers/regulator/lochnagar-regulator.c1
-rw-r--r--drivers/regulator/of_regulator.c27
-rw-r--r--drivers/regulator/pfuze100-regulator.c8
-rw-r--r--drivers/regulator/qcom-rpmh-regulator.c4
-rw-r--r--drivers/regulator/ti-abb-regulator.c26
-rw-r--r--drivers/reset/core.c5
-rw-r--r--drivers/s390/block/dasd_genhd.c4
-rw-r--r--drivers/s390/cio/cio.h1
-rw-r--r--drivers/s390/cio/css.c7
-rw-r--r--drivers/s390/cio/device.c2
-rw-r--r--drivers/s390/crypto/zcrypt_api.c3
-rw-r--r--drivers/s390/net/qeth_core.h1
-rw-r--r--drivers/s390/net/qeth_core_main.c10
-rw-r--r--drivers/s390/net/qeth_l2_main.c44
-rw-r--r--drivers/s390/net/qeth_l2_sys.c14
-rw-r--r--drivers/s390/scsi/zfcp_fsf.c16
-rw-r--r--drivers/scsi/Kconfig2
-rw-r--r--drivers/scsi/ch.c1
-rw-r--r--drivers/scsi/device_handler/scsi_dh_alua.c21
-rw-r--r--drivers/scsi/hpsa.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_nportdisc.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c7
-rw-r--r--drivers/scsi/qla2xxx/qla_bsg.c6
-rw-r--r--drivers/scsi/qla2xxx/qla_isr.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_mbx.c3
-rw-r--r--drivers/scsi/qla2xxx/qla_mid.c8
-rw-r--r--drivers/scsi/qla2xxx/qla_os.c16
-rw-r--r--drivers/scsi/scsi_error.c3
-rw-r--r--drivers/scsi/scsi_lib.c7
-rw-r--r--drivers/scsi/scsi_sysfs.c11
-rw-r--r--drivers/scsi/sd.c21
-rw-r--r--drivers/scsi/sd.h12
-rw-r--r--drivers/scsi/sd_zbc.c278
-rw-r--r--drivers/scsi/sni_53c710.c4
-rw-r--r--drivers/scsi/ufs/ufs_bsg.c4
-rw-r--r--drivers/soc/imx/gpc.c8
-rw-r--r--drivers/soc/imx/soc-imx-scu.c2
-rw-r--r--drivers/soundwire/Kconfig1
-rw-r--r--drivers/soundwire/intel.c4
-rw-r--r--drivers/soundwire/slave.c3
-rw-r--r--drivers/staging/exfat/Kconfig3
-rw-r--r--drivers/staging/exfat/Makefile2
-rw-r--r--drivers/staging/exfat/exfat.h2
-rw-r--r--drivers/staging/exfat/exfat_blkdev.c2
-rw-r--r--drivers/staging/exfat/exfat_cache.c2
-rw-r--r--drivers/staging/exfat/exfat_core.c2
-rw-r--r--drivers/staging/exfat/exfat_nls.c2
-rw-r--r--drivers/staging/exfat/exfat_super.c7
-rw-r--r--drivers/staging/exfat/exfat_upcase.c2
-rw-r--r--drivers/staging/fbtft/Kconfig12
-rw-r--r--drivers/staging/fbtft/Makefile4
-rw-r--r--drivers/staging/fbtft/fbtft-core.c7
-rw-r--r--drivers/staging/fbtft/fbtft_device.c1261
-rw-r--r--drivers/staging/fbtft/flexfb.c851
-rw-r--r--drivers/staging/octeon/ethernet-tx.c9
-rw-r--r--drivers/staging/octeon/octeon-stubs.h2
-rw-r--r--drivers/staging/rtl8188eu/hal/hal8188e_rate_adaptive.c2
-rw-r--r--drivers/staging/rtl8188eu/os_dep/usb_intf.c6
-rw-r--r--drivers/staging/speakup/sysfs-driver-speakup369
-rw-r--r--drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c4
-rw-r--r--drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c1
-rw-r--r--drivers/staging/vt6655/device_main.c4
-rw-r--r--drivers/staging/wlan-ng/cfg80211.c6
-rw-r--r--drivers/target/iscsi/cxgbit/cxgbit_cm.c3
-rw-r--r--drivers/target/target_core_device.c21
-rw-r--r--drivers/thermal/cpu_cooling.c14
-rw-r--r--drivers/thunderbolt/nhi_ops.c1
-rw-r--r--drivers/thunderbolt/switch.c28
-rw-r--r--drivers/tty/n_hdlc.c5
-rw-r--r--drivers/tty/serial/8250/8250_men_mcb.c8
-rw-r--r--drivers/tty/serial/8250/8250_omap.c5
-rw-r--r--drivers/tty/serial/Kconfig1
-rw-r--r--drivers/tty/serial/fsl_linflexuart.c21
-rw-r--r--drivers/tty/serial/fsl_lpuart.c2
-rw-r--r--drivers/tty/serial/imx.c4
-rw-r--r--drivers/tty/serial/owl-uart.c2
-rw-r--r--drivers/tty/serial/rda-uart.c2
-rw-r--r--drivers/tty/serial/serial_core.c2
-rw-r--r--drivers/tty/serial/serial_mctrl_gpio.c3
-rw-r--r--drivers/tty/serial/sh-sci.c8
-rw-r--r--drivers/tty/serial/uartlite.c3
-rw-r--r--drivers/tty/serial/xilinx_uartps.c8
-rw-r--r--drivers/usb/cdns3/cdns3-pci-wrap.c3
-rw-r--r--drivers/usb/cdns3/core.c22
-rw-r--r--drivers/usb/cdns3/ep0.c12
-rw-r--r--drivers/usb/cdns3/gadget.c78
-rw-r--r--drivers/usb/cdns3/host-export.h1
-rw-r--r--drivers/usb/cdns3/host.c1
-rw-r--r--drivers/usb/class/usblp.c12
-rw-r--r--drivers/usb/core/config.c5
-rw-r--r--drivers/usb/dwc3/Kconfig1
-rw-r--r--drivers/usb/dwc3/core.c3
-rw-r--r--drivers/usb/dwc3/drd.c7
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c2
-rw-r--r--drivers/usb/dwc3/gadget.c13
-rw-r--r--drivers/usb/dwc3/host.c7
-rw-r--r--drivers/usb/gadget/composite.c4
-rw-r--r--drivers/usb/gadget/configfs.c110
-rw-r--r--drivers/usb/gadget/udc/Kconfig2
-rw-r--r--drivers/usb/gadget/udc/atmel_usba_udc.c6
-rw-r--r--drivers/usb/gadget/udc/core.c11
-rw-r--r--drivers/usb/gadget/udc/dummy_hcd.c3
-rw-r--r--drivers/usb/gadget/udc/fsl_udc_core.c2
-rw-r--r--drivers/usb/gadget/udc/lpc32xx_udc.c10
-rw-r--r--drivers/usb/gadget/udc/renesas_usb3.c11
-rw-r--r--drivers/usb/host/xhci-debugfs.c24
-rw-r--r--drivers/usb/host/xhci-ext-caps.c1
-rw-r--r--drivers/usb/host/xhci-ring.c6
-rw-r--r--drivers/usb/host/xhci.c116
-rw-r--r--drivers/usb/image/microtek.c4
-rw-r--r--drivers/usb/misc/Kconfig10
-rw-r--r--drivers/usb/misc/Makefile1
-rw-r--r--drivers/usb/misc/adutux.c24
-rw-r--r--drivers/usb/misc/chaoskey.c5
-rw-r--r--drivers/usb/misc/iowarrior.c48
-rw-r--r--drivers/usb/misc/ldusb.c60
-rw-r--r--drivers/usb/misc/legousbtower.c63
-rw-r--r--drivers/usb/misc/rio500.c554
-rw-r--r--drivers/usb/misc/rio500_usb.h20
-rw-r--r--drivers/usb/misc/usblcd.c60
-rw-r--r--drivers/usb/misc/yurex.c18
-rw-r--r--drivers/usb/mtu3/mtu3_core.c1
-rw-r--r--drivers/usb/renesas_usbhs/common.c12
-rw-r--r--drivers/usb/renesas_usbhs/common.h1
-rw-r--r--drivers/usb/renesas_usbhs/fifo.c2
-rw-r--r--drivers/usb/renesas_usbhs/fifo.h1
-rw-r--r--drivers/usb/renesas_usbhs/mod_gadget.c22
-rw-r--r--drivers/usb/renesas_usbhs/pipe.c15
-rw-r--r--drivers/usb/renesas_usbhs/pipe.h1
-rw-r--r--drivers/usb/serial/ftdi_sio.c3
-rw-r--r--drivers/usb/serial/ftdi_sio_ids.h9
-rw-r--r--drivers/usb/serial/keyspan.c4
-rw-r--r--drivers/usb/serial/option.c11
-rw-r--r--drivers/usb/serial/ti_usb_3410_5052.c12
-rw-r--r--drivers/usb/serial/usb-serial.c5
-rw-r--r--drivers/usb/serial/whiteheat.c13
-rw-r--r--drivers/usb/serial/whiteheat.h2
-rw-r--r--drivers/usb/storage/scsiglue.c10
-rw-r--r--drivers/usb/storage/uas.c20
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c14
-rw-r--r--drivers/usb/typec/ucsi/displayport.c2
-rw-r--r--drivers/usb/typec/ucsi/ucsi_ccg.c42
-rw-r--r--drivers/usb/usb-skeleton.c19
-rw-r--r--drivers/usb/usbip/vhci_hcd.c4
-rw-r--r--drivers/usb/usbip/vhci_tx.c3
-rw-r--r--drivers/vfio/vfio_iommu_type1.c2
-rw-r--r--drivers/vhost/test.c2
-rw-r--r--drivers/vhost/vringh.c8
-rw-r--r--drivers/video/fbdev/c2p_core.h8
-rw-r--r--drivers/virt/vboxguest/vboxguest_utils.c3
-rw-r--r--drivers/virtio/virtio_balloon.c20
-rw-r--r--drivers/virtio/virtio_ring.c11
-rw-r--r--drivers/w1/slaves/Kconfig1
-rw-r--r--drivers/watchdog/bd70528_wdt.c1
-rw-r--r--drivers/watchdog/cpwd.c8
-rw-r--r--drivers/watchdog/imx_sc_wdt.c8
-rw-r--r--drivers/watchdog/meson_gxbb_wdt.c4
-rw-r--r--drivers/watchdog/pm8916_wdt.c15
-rw-r--r--drivers/xen/gntdev.c13
-rw-r--r--drivers/xen/grant-table.c3
-rw-r--r--drivers/xen/pvcalls-back.c2
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/affs/affs.h16
-rw-r--r--fs/affs/super.c10
-rw-r--r--fs/afs/callback.c1
-rw-r--r--fs/afs/dir.c7
-rw-r--r--fs/afs/rxrpc.c1
-rw-r--r--fs/afs/super.c1
-rw-r--r--fs/aio.c10
-rw-r--r--fs/autofs/expire.c5
-rw-r--r--fs/block_dev.c69
-rw-r--r--fs/btrfs/Kconfig2
-rw-r--r--fs/btrfs/async-thread.c113
-rw-r--r--fs/btrfs/async-thread.h37
-rw-r--r--fs/btrfs/block-group.c588
-rw-r--r--fs/btrfs/block-group.h51
-rw-r--r--fs/btrfs/btrfs_inode.h3
-rw-r--r--fs/btrfs/compression.c269
-rw-r--r--fs/btrfs/compression.h46
-rw-r--r--fs/btrfs/ctree.c287
-rw-r--r--fs/btrfs/ctree.h56
-rw-r--r--fs/btrfs/delalloc-space.c28
-rw-r--r--fs/btrfs/delayed-inode.c18
-rw-r--r--fs/btrfs/dev-replace.c2
-rw-r--r--fs/btrfs/dev-replace.h2
-rw-r--r--fs/btrfs/disk-io.c371
-rw-r--r--fs/btrfs/disk-io.h4
-rw-r--r--fs/btrfs/export.c4
-rw-r--r--fs/btrfs/extent-io-tree.h248
-rw-r--r--fs/btrfs/extent-tree.c146
-rw-r--r--fs/btrfs/extent_io.c120
-rw-r--r--fs/btrfs/extent_io.h231
-rw-r--r--fs/btrfs/extent_map.c6
-rw-r--r--fs/btrfs/extent_map.h11
-rw-r--r--fs/btrfs/file-item.c1
-rw-r--r--fs/btrfs/file.c130
-rw-r--r--fs/btrfs/free-space-cache.c118
-rw-r--r--fs/btrfs/free-space-cache.h39
-rw-r--r--fs/btrfs/free-space-tree.c133
-rw-r--r--fs/btrfs/free-space-tree.h18
-rw-r--r--fs/btrfs/inode-map.c4
-rw-r--r--fs/btrfs/inode.c215
-rw-r--r--fs/btrfs/ioctl.c61
-rw-r--r--fs/btrfs/locking.c309
-rw-r--r--fs/btrfs/locking.h13
-rw-r--r--fs/btrfs/lzo.c53
-rw-r--r--fs/btrfs/misc.h11
-rw-r--r--fs/btrfs/ordered-data.c7
-rw-r--r--fs/btrfs/ordered-data.h2
-rw-r--r--fs/btrfs/print-tree.c6
-rw-r--r--fs/btrfs/props.c6
-rw-r--r--fs/btrfs/qgroup.c15
-rw-r--r--fs/btrfs/qgroup.h2
-rw-r--r--fs/btrfs/raid56.c101
-rw-r--r--fs/btrfs/reada.c19
-rw-r--r--fs/btrfs/ref-verify.c2
-rw-r--r--fs/btrfs/relocation.c52
-rw-r--r--fs/btrfs/scrub.c100
-rw-r--r--fs/btrfs/send.c47
-rw-r--r--fs/btrfs/space-info.c29
-rw-r--r--fs/btrfs/space-info.h3
-rw-r--r--fs/btrfs/super.c26
-rw-r--r--fs/btrfs/sysfs.c47
-rw-r--r--fs/btrfs/sysfs.h2
-rw-r--r--fs/btrfs/tests/btrfs-tests.c11
-rw-r--r--fs/btrfs/tests/btrfs-tests.h4
-rw-r--r--fs/btrfs/tests/free-space-tests.c15
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c101
-rw-r--r--fs/btrfs/transaction.c98
-rw-r--r--fs/btrfs/transaction.h5
-rw-r--r--fs/btrfs/tree-checker.c219
-rw-r--r--fs/btrfs/tree-log.c172
-rw-r--r--fs/btrfs/volumes.c491
-rw-r--r--fs/btrfs/volumes.h24
-rw-r--r--fs/btrfs/zlib.c52
-rw-r--r--fs/btrfs/zstd.c47
-rw-r--r--fs/ceph/caps.c10
-rw-r--r--fs/ceph/dir.c15
-rw-r--r--fs/ceph/file.c44
-rw-r--r--fs/ceph/inode.c1
-rw-r--r--fs/ceph/mds_client.c21
-rw-r--r--fs/ceph/super.c11
-rw-r--r--fs/cifs/cifsfs.c30
-rw-r--r--fs/cifs/cifsglob.h7
-rw-r--r--fs/cifs/cifsproto.h1
-rw-r--r--fs/cifs/connect.c22
-rw-r--r--fs/cifs/dir.c8
-rw-r--r--fs/cifs/file.c62
-rw-r--r--fs/cifs/inode.c8
-rw-r--r--fs/cifs/netmisc.c4
-rw-r--r--fs/cifs/smb1ops.c3
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2ops.c3
-rw-r--r--fs/cifs/smb2pdu.c14
-rw-r--r--fs/cifs/smb2pdu.h1
-rw-r--r--fs/cifs/smb2proto.h4
-rw-r--r--fs/cifs/transport.c42
-rw-r--r--fs/configfs/symlink.c2
-rw-r--r--fs/cramfs/inode.c4
-rw-r--r--fs/crypto/bio.c29
-rw-r--r--fs/crypto/crypto.c124
-rw-r--r--fs/crypto/fscrypt_private.h25
-rw-r--r--fs/crypto/keyring.c6
-rw-r--r--fs/crypto/keysetup.c158
-rw-r--r--fs/crypto/keysetup_v1.c4
-rw-r--r--fs/crypto/policy.c41
-rw-r--r--fs/dax.c5
-rw-r--r--fs/direct-io.c3
-rw-r--r--fs/ecryptfs/inode.c84
-rw-r--r--fs/exportfs/expfs.c31
-rw-r--r--fs/ext4/Kconfig17
-rw-r--r--fs/ext4/Makefile1
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/inode-test.c272
-rw-r--r--fs/ext4/inode.c5
-rw-r--r--fs/ext4/super.c14
-rw-r--r--fs/f2fs/file.c5
-rw-r--r--fs/f2fs/segment.c3
-rw-r--r--fs/f2fs/super.c77
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/file.c2
-rw-r--r--fs/fs-writeback.c20
-rw-r--r--fs/fuse/Makefile3
-rw-r--r--fs/fuse/dev.c4
-rw-r--r--fs/fuse/dir.c16
-rw-r--r--fs/fuse/file.c14
-rw-r--r--fs/fuse/fuse_i.h4
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/fuse/virtio_fs.c169
-rw-r--r--fs/gfs2/ops_fstype.c21
-rw-r--r--fs/io-wq.c1065
-rw-r--r--fs/io-wq.h74
-rw-r--r--fs/io_uring.c2455
-rw-r--r--fs/libfs.c140
-rw-r--r--fs/namespace.c15
-rw-r--r--fs/nfs/delegation.c12
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/direct.c106
-rw-r--r--fs/nfs/nfs4proc.c8
-rw-r--r--fs/nfs/write.c5
-rw-r--r--fs/ocfs2/aops.c25
-rw-r--r--fs/ocfs2/file.c136
-rw-r--r--fs/ocfs2/ioctl.c2
-rw-r--r--fs/ocfs2/journal.c3
-rw-r--r--fs/ocfs2/localalloc.c3
-rw-r--r--fs/open.c6
-rw-r--r--fs/pipe.c6
-rw-r--r--fs/proc/meminfo.c4
-rw-r--r--fs/proc/page.c28
-rw-r--r--fs/readdir.c48
-rw-r--r--fs/super.c5
-rw-r--r--fs/tracefs/inode.c46
-rw-r--r--fs/xfs/libxfs/xfs_ag.c5
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c21
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c2
-rw-r--r--fs/xfs/libxfs/xfs_fs.h8
-rw-r--r--fs/xfs/scrub/refcount.c3
-rw-r--r--fs/xfs/xfs_bmap_util.c4
-rw-r--r--fs/xfs/xfs_buf.c12
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--include/Kbuild2
-rw-r--r--include/acpi/processor.h20
-rw-r--r--include/asm-generic/vdso/vsyscall.h7
-rw-r--r--include/asm-generic/vmlinux.lds.h14
-rw-r--r--include/drm/drm_gem_shmem_helper.h13
-rw-r--r--include/drm/drm_self_refresh_helper.h3
-rw-r--r--include/keys/trusted_tpm.h (renamed from include/keys/trusted.h)49
-rw-r--r--include/kunit/assert.h356
-rw-r--r--include/kunit/string-stream.h51
-rw-r--r--include/kunit/test.h1490
-rw-r--r--include/kunit/try-catch.h75
-rw-r--r--include/linux/arm-smccc.h16
-rw-r--r--include/linux/arm_sdei.h6
-rw-r--r--include/linux/bitmap.h3
-rw-r--r--include/linux/blk-cgroup.h199
-rw-r--r--include/linux/blk-mq.h300
-rw-r--r--include/linux/blk_types.h28
-rw-r--r--include/linux/blkdev.h31
-rw-r--r--include/linux/bpf.h4
-rw-r--r--include/linux/can/core.h1
-rw-r--r--include/linux/compiler_attributes.h17
-rw-r--r--include/linux/cpu.h30
-rw-r--r--include/linux/cpufreq.h7
-rw-r--r--include/linux/device-mapper.h27
-rw-r--r--include/linux/dma-direct.h2
-rw-r--r--include/linux/dynamic_debug.h6
-rw-r--r--include/linux/efi.h18
-rw-r--r--include/linux/export.h20
-rw-r--r--include/linux/filter.h1
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/fscrypt.h35
-rw-r--r--include/linux/ftrace.h40
-rw-r--r--include/linux/genhd.h5
-rw-r--r--include/linux/gfp.h23
-rw-r--r--include/linux/gpio/driver.h8
-rw-r--r--include/linux/hwmon.h2
-rw-r--r--include/linux/idr.h2
-rw-r--r--include/linux/if_macvlan.h1
-rw-r--r--include/linux/if_team.h1
-rw-r--r--include/linux/if_vlan.h11
-rw-r--r--include/linux/intel-iommu.h6
-rw-r--r--include/linux/irqchip/arm-gic-v3.h2
-rw-r--r--include/linux/kvm_host.h7
-rw-r--r--include/linux/leds.h5
-rw-r--r--include/linux/libata.h13
-rw-r--r--include/linux/memcontrol.h29
-rw-r--r--include/linux/memory.h1
-rw-r--r--include/linux/micrel_phy.h2
-rw-r--r--include/linux/mlx5/mlx5_ifc.h3
-rw-r--r--include/linux/mm.h5
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/mmzone.h45
-rw-r--r--include/linux/mtd/spi-nor.h64
-rw-r--r--include/linux/netdevice.h61
-rw-r--r--include/linux/nvme-fc.h182
-rw-r--r--include/linux/nvme.h60
-rw-r--r--include/linux/page-flags.h20
-rw-r--r--include/linux/page_ext.h10
-rw-r--r--include/linux/perf_event.h2
-rw-r--r--include/linux/platform_data/dma-imx-sdma.h3
-rw-r--r--include/linux/platform_data/intel-spi.h1
-rw-r--r--include/linux/platform_device.h2
-rw-r--r--include/linux/pm_qos.h56
-rw-r--r--include/linux/psci.h9
-rw-r--r--include/linux/radix-tree.h18
-rw-r--r--include/linux/reset-controller.h4
-rw-r--r--include/linux/reset.h2
-rw-r--r--include/linux/sbitmap.h9
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/security.h1
-rw-r--r--include/linux/sed-opal.h1
-rw-r--r--include/linux/skbuff.h47
-rw-r--r--include/linux/skmsg.h9
-rw-r--r--include/linux/slab.h4
-rw-r--r--include/linux/socket.h5
-rw-r--r--include/linux/stat.h3
-rw-r--r--include/linux/string.h21
-rw-r--r--include/linux/sunrpc/bc_xprt.h5
-rw-r--r--include/linux/sunrpc/xprtsock.h1
-rw-r--r--include/linux/sysfs.h4
-rw-r--r--include/linux/tcp.h6
-rw-r--r--include/linux/tpm.h250
-rw-r--r--include/linux/tpm_eventlog.h16
-rw-r--r--include/linux/uaccess.h6
-rw-r--r--include/linux/virtio_vsock.h1
-rw-r--r--include/linux/xarray.h4
-rw-r--r--include/net/bonding.h5
-rw-r--r--include/net/busy_poll.h6
-rw-r--r--include/net/cfg80211.h8
-rw-r--r--include/net/devlink.h5
-rw-r--r--include/net/flow_dissector.h3
-rw-r--r--include/net/fq.h2
-rw-r--r--include/net/fq_impl.h8
-rw-r--r--include/net/hwbm.h10
-rw-r--r--include/net/ip.h4
-rw-r--r--include/net/ip_vs.h1
-rw-r--r--include/net/llc_conn.h2
-rw-r--r--include/net/neighbour.h4
-rw-r--r--include/net/net_namespace.h27
-rw-r--r--include/net/netfilter/nf_tables.h3
-rw-r--r--include/net/request_sock.h4
-rw-r--r--include/net/sch_generic.h4
-rw-r--r--include/net/sctp/sctp.h5
-rw-r--r--include/net/sock.h52
-rw-r--r--include/net/tcp.h10
-rw-r--r--include/net/tls.h7
-rw-r--r--include/net/vxlan.h1
-rw-r--r--include/rdma/ib_verbs.h2
-rw-r--r--include/scsi/scsi_eh.h1
-rw-r--r--include/sound/hda_register.h3
-rw-r--r--include/sound/simple_card_utils.h8
-rw-r--r--include/trace/events/btrfs.h134
-rw-r--r--include/trace/events/io_uring.h358
-rw-r--r--include/trace/events/rxrpc.h18
-rw-r--r--include/trace/events/sock.h4
-rw-r--r--include/trace/events/tcp.h2
-rw-r--r--include/trace/events/wbt.h12
-rw-r--r--include/uapi/linux/blkzoned.h17
-rw-r--r--include/uapi/linux/btrfs.h5
-rw-r--r--include/uapi/linux/btrfs_tree.h23
-rw-r--r--include/uapi/linux/can.h2
-rw-r--r--include/uapi/linux/can/bcm.h2
-rw-r--r--include/uapi/linux/can/error.h2
-rw-r--r--include/uapi/linux/can/gw.h2
-rw-r--r--include/uapi/linux/can/j1939.h2
-rw-r--r--include/uapi/linux/can/netlink.h2
-rw-r--r--include/uapi/linux/can/raw.h2
-rw-r--r--include/uapi/linux/can/vxcan.h2
-rw-r--r--include/uapi/linux/devlink.h1
-rw-r--r--include/uapi/linux/fcntl.h9
-rw-r--r--include/uapi/linux/fscrypt.h3
-rw-r--r--include/uapi/linux/fuse.h37
-rw-r--r--include/uapi/linux/io_uring.h24
-rw-r--r--include/uapi/linux/nvme_ioctl.h1
-rw-r--r--include/uapi/linux/ptp_clock.h5
-rw-r--r--include/uapi/linux/sched.h4
-rw-r--r--include/uapi/linux/sed-opal.h20
-rw-r--r--include/uapi/linux/serial_core.h2
-rw-r--r--include/uapi/linux/stat.h2
-rw-r--r--init/Kconfig1
-rw-r--r--init/initramfs.c8
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/audit_watch.c2
-rw-r--r--kernel/bpf/cgroup.c4
-rw-r--r--kernel/bpf/core.c2
-rw-r--r--kernel/bpf/devmap.c33
-rw-r--r--kernel/bpf/offload.c4
-rw-r--r--kernel/bpf/syscall.c38
-rw-r--r--kernel/cgroup/cgroup.c5
-rw-r--r--kernel/cgroup/cpuset.c3
-rw-r--r--kernel/cpu.c27
-rw-r--r--kernel/dma/direct.c13
-rw-r--r--kernel/events/core.c74
-rw-r--r--kernel/events/uprobes.c13
-rw-r--r--kernel/fork.c43
-rw-r--r--kernel/freezer.c6
-rwxr-xr-xkernel/gen_kheaders.sh11
-rw-r--r--kernel/irq/irqdomain.c2
-rw-r--r--kernel/kthread.c6
-rw-r--r--kernel/module.c2
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/power/main.c1
-rw-r--r--kernel/power/qos.c246
-rw-r--r--kernel/sched/core.c44
-rw-r--r--kernel/sched/cputime.c6
-rw-r--r--kernel/sched/deadline.c40
-rw-r--r--kernel/sched/fair.c80
-rw-r--r--kernel/sched/idle.c9
-rw-r--r--kernel/sched/rt.c37
-rw-r--r--kernel/sched/sched.h30
-rw-r--r--kernel/sched/stop_task.c18
-rw-r--r--kernel/sched/topology.c11
-rw-r--r--kernel/signal.c2
-rw-r--r--kernel/stacktrace.c6
-rw-r--r--kernel/stop_machine.c10
-rw-r--r--kernel/sysctl-test.c392
-rw-r--r--kernel/sysctl.c4
-rw-r--r--kernel/time/hrtimer.c8
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/posix-cpu-timers.c6
-rw-r--r--kernel/time/sched_clock.c2
-rw-r--r--kernel/time/vsyscall.c9
-rw-r--r--kernel/trace/ftrace.c61
-rw-r--r--kernel/trace/trace.c139
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_dynevent.c4
-rw-r--r--kernel/trace/trace_event_perf.c4
-rw-r--r--kernel/trace/trace_events.c35
-rw-r--r--kernel/trace/trace_events_hist.c15
-rw-r--r--kernel/trace/trace_events_trigger.c8
-rw-r--r--kernel/trace/trace_hwlat.c4
-rw-r--r--kernel/trace/trace_kprobe.c12
-rw-r--r--kernel/trace/trace_printk.c7
-rw-r--r--kernel/trace/trace_stack.c8
-rw-r--r--kernel/trace/trace_stat.c6
-rw-r--r--kernel/trace/trace_uprobe.c11
-rw-r--r--lib/Kconfig1
-rw-r--r--lib/Kconfig.debug31
-rw-r--r--lib/Makefile5
-rw-r--r--lib/dump_stack.c7
-rw-r--r--lib/generic-radix-tree.c32
-rw-r--r--lib/idr.c31
-rw-r--r--lib/kunit/Kconfig36
-rw-r--r--lib/kunit/Makefile9
-rw-r--r--lib/kunit/assert.c141
-rw-r--r--lib/kunit/example-test.c88
-rw-r--r--lib/kunit/string-stream-test.c52
-rw-r--r--lib/kunit/string-stream.c217
-rw-r--r--lib/kunit/test-test.c331
-rw-r--r--lib/kunit/test.c478
-rw-r--r--lib/kunit/try-catch.c118
-rw-r--r--lib/list-test.c746
-rw-r--r--lib/radix-tree.c2
-rw-r--r--lib/sbitmap.c17
-rw-r--r--lib/string.c21
-rw-r--r--lib/test_meminit.c27
-rw-r--r--lib/test_user_copy.c37
-rw-r--r--lib/test_xarray.c24
-rw-r--r--lib/vdso/Kconfig9
-rw-r--r--lib/vdso/gettimeofday.c9
-rw-r--r--lib/xarray.c4
-rw-r--r--lib/xz/xz_dec_lzma2.c1
-rw-r--r--mm/backing-dev.c4
-rw-r--r--mm/compaction.c7
-rw-r--r--mm/debug.c31
-rw-r--r--mm/filemap.c1
-rw-r--r--mm/gup.c14
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/hugetlb.c5
-rw-r--r--mm/hugetlb_cgroup.c2
-rw-r--r--mm/init-mm.c1
-rw-r--r--mm/khugepaged.c35
-rw-r--r--mm/kmemleak.c30
-rw-r--r--mm/ksm.c14
-rw-r--r--mm/madvise.c16
-rw-r--r--mm/memblock.c6
-rw-r--r--mm/memcontrol.c48
-rw-r--r--mm/memory-failure.c36
-rw-r--r--mm/memory.c104
-rw-r--r--mm/memory_hotplug.c133
-rw-r--r--mm/mempolicy.c14
-rw-r--r--mm/memremap.c13
-rw-r--r--mm/mmu_notifier.c2
-rw-r--r--mm/page_alloc.c31
-rw-r--r--mm/page_ext.c23
-rw-r--r--mm/page_io.c6
-rw-r--r--mm/page_owner.c60
-rw-r--r--mm/rmap.c1
-rw-r--r--mm/shmem.c6
-rw-r--r--mm/shuffle.c2
-rw-r--r--mm/slab.c3
-rw-r--r--mm/slab.h4
-rw-r--r--mm/slab_common.c28
-rw-r--r--mm/slob.c62
-rw-r--r--mm/slub.c88
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/truncate.c12
-rw-r--r--mm/vmpressure.c20
-rw-r--r--mm/vmscan.c86
-rw-r--r--mm/vmstat.c25
-rw-r--r--mm/z3fold.c10
-rw-r--r--net/8021q/vlan.c1
-rw-r--r--net/8021q/vlan_dev.c33
-rw-r--r--net/atm/clip.c6
-rw-r--r--net/atm/common.c2
-rw-r--r--net/batman-adv/bat_iv_ogm.c61
-rw-r--r--net/batman-adv/bat_v_ogm.c41
-rw-r--r--net/batman-adv/hard-interface.c2
-rw-r--r--net/batman-adv/soft-interface.c32
-rw-r--r--net/batman-adv/types.h7
-rw-r--r--net/bluetooth/6lowpan.c8
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/netfilter/ebt_dnat.c19
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c5
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/can/af_can.c3
-rw-r--r--net/can/j1939/main.c9
-rw-r--r--net/can/j1939/socket.c103
-rw-r--r--net/can/j1939/transport.c56
-rw-r--r--net/core/datagram.c10
-rw-r--r--net/core/dev.c623
-rw-r--r--net/core/dev_addr_lists.c12
-rw-r--r--net/core/devlink.c45
-rw-r--r--net/core/ethtool.c4
-rw-r--r--net/core/filter.c8
-rw-r--r--net/core/flow_dissector.c38
-rw-r--r--net/core/lwt_bpf.c7
-rw-r--r--net/core/net-sysfs.c25
-rw-r--r--net/core/net_namespace.c23
-rw-r--r--net/core/request_sock.c2
-rw-r--r--net/core/rtnetlink.c40
-rw-r--r--net/core/skbuff.c23
-rw-r--r--net/core/skmsg.c20
-rw-r--r--net/core/sock.c38
-rw-r--r--net/dccp/ipv4.c4
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/dsa/dsa2.c2
-rw-r--r--net/dsa/master.c5
-rw-r--r--net/dsa/slave.c12
-rw-r--r--net/dsa/tag_8021q.c2
-rw-r--r--net/ieee802154/6lowpan/core.c8
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/inet_connection_sock.c6
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_output.c14
-rw-r--r--net/ipv4/ipmr.c3
-rw-r--r--net/ipv4/route.c11
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv4/tcp.c79
-rw-r--r--net/ipv4/tcp_diag.c5
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_input.c37
-rw-r--r--net/ipv4/tcp_ipv4.c36
-rw-r--r--net/ipv4/tcp_minisocks.c17
-rw-r--r--net/ipv4/tcp_output.c32
-rw-r--r--net/ipv4/tcp_timer.c11
-rw-r--r--net/ipv4/udp.c56
-rw-r--r--net/ipv6/addrconf_core.c1
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_gre.c5
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/netfilter.c3
-rw-r--r--net/ipv6/route.c15
-rw-r--r--net/ipv6/seg6_local.c11
-rw-r--r--net/ipv6/tcp_ipv6.c18
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/l2tp/l2tp_eth.c1
-rw-r--r--net/llc/af_llc.c34
-rw-r--r--net/llc/llc_c_ac.c8
-rw-r--r--net/llc/llc_conn.c69
-rw-r--r--net/llc/llc_if.c12
-rw-r--r--net/llc/llc_s_ac.c12
-rw-r--r--net/llc/llc_sap.c23
-rw-r--r--net/mac80211/main.c2
-rw-r--r--net/mac80211/mlme.c5
-rw-r--r--net/mac80211/rx.c11
-rw-r--r--net/mac80211/scan.c30
-rw-r--r--net/mac80211/sta_info.c3
-rw-r--r--net/netfilter/ipset/ip_set_core.c49
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c1
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c29
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c13
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/nf_flow_table_core.c3
-rw-r--r--net/netfilter/nf_tables_api.c7
-rw-r--r--net/netfilter/nf_tables_offload.c5
-rw-r--r--net/netfilter/nft_bitwise.c5
-rw-r--r--net/netfilter/nft_cmp.c2
-rw-r--r--net/netfilter/nft_payload.c38
-rw-r--r--net/netrom/af_netrom.c23
-rw-r--r--net/nfc/llcp_sock.c4
-rw-r--r--net/nfc/netlink.c2
-rw-r--r--net/openvswitch/actions.c5
-rw-r--r--net/openvswitch/datapath.c20
-rw-r--r--net/openvswitch/vport-internal_dev.c11
-rw-r--r--net/phonet/socket.c4
-rw-r--r--net/rds/ib_cm.c23
-rw-r--r--net/rose/af_rose.c23
-rw-r--r--net/rxrpc/ar-internal.h2
-rw-r--r--net/rxrpc/call_accept.c5
-rw-r--r--net/rxrpc/call_object.c34
-rw-r--r--net/rxrpc/conn_client.c9
-rw-r--r--net/rxrpc/conn_object.c13
-rw-r--r--net/rxrpc/conn_service.c2
-rw-r--r--net/rxrpc/peer_event.c11
-rw-r--r--net/rxrpc/peer_object.c16
-rw-r--r--net/rxrpc/recvmsg.c24
-rw-r--r--net/rxrpc/sendmsg.c3
-rw-r--r--net/sched/act_api.c23
-rw-r--r--net/sched/act_mirred.c6
-rw-r--r--net/sched/act_mpls.c12
-rw-r--r--net/sched/act_pedit.c12
-rw-r--r--net/sched/act_tunnel_key.c4
-rw-r--r--net/sched/cls_api.c119
-rw-r--r--net/sched/cls_bpf.c8
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/sch_api.c3
-rw-r--r--net/sched/sch_etf.c2
-rw-r--r--net/sched/sch_generic.c19
-rw-r--r--net/sched/sch_hhf.c8
-rw-r--r--net/sched/sch_netem.c11
-rw-r--r--net/sched/sch_sfb.c13
-rw-r--r--net/sched/sch_sfq.c14
-rw-r--r--net/sched/sch_taprio.c39
-rw-r--r--net/sctp/diag.c2
-rw-r--r--net/sctp/input.c16
-rw-r--r--net/sctp/sm_make_chunk.c12
-rw-r--r--net/sctp/socket.c12
-rw-r--r--net/smc/af_smc.c16
-rw-r--r--net/smc/smc_core.c7
-rw-r--r--net/smc/smc_pnet.c4
-rw-r--r--net/smc/smc_rx.c29
-rw-r--r--net/socket.c66
-rw-r--r--net/sunrpc/backchannel_rqst.c7
-rw-r--r--net/sunrpc/xprt.c5
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c2
-rw-r--r--net/sunrpc/xprtsock.c17
-rw-r--r--net/tipc/core.c2
-rw-r--r--net/tipc/core.h6
-rw-r--r--net/tipc/socket.c12
-rw-r--r--net/tls/tls_device.c10
-rw-r--r--net/tls/tls_main.c3
-rw-r--r--net/tls/tls_sw.c41
-rw-r--r--net/unix/af_unix.c6
-rw-r--r--net/vmw_vsock/af_vsock.c2
-rw-r--r--net/vmw_vsock/virtio_transport_common.c25
-rw-r--r--net/wireless/chan.c5
-rw-r--r--net/wireless/nl80211.c4
-rw-r--r--net/wireless/reg.c1
-rw-r--r--net/wireless/reg.h8
-rw-r--r--net/wireless/scan.c23
-rw-r--r--net/wireless/util.c3
-rw-r--r--net/wireless/wext-sme.c8
-rw-r--r--net/x25/x25_dev.c2
-rw-r--r--net/xdp/xdp_umem.c6
-rw-r--r--net/xdp/xsk.c42
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_state.c2
-rw-r--r--samples/bpf/Makefile1
-rw-r--r--samples/bpf/asm_goto_workaround.h13
-rw-r--r--samples/bpf/task_fd_query_user.c1
-rw-r--r--scripts/coccinelle/api/devm_platform_ioremap_resource.cocci60
-rw-r--r--scripts/coccinelle/misc/add_namespace.cocci2
-rw-r--r--scripts/gdb/linux/dmesg.py16
-rw-r--r--scripts/gdb/linux/symbols.py11
-rw-r--r--scripts/gdb/linux/utils.py25
-rw-r--r--scripts/mod/modpost.c60
-rw-r--r--scripts/mod/modpost.h1
-rw-r--r--scripts/nsdeps10
-rw-r--r--scripts/recordmcount.h5
-rwxr-xr-xscripts/setlocalversion2
-rwxr-xr-xscripts/tools-support-relr.sh8
-rw-r--r--security/keys/Makefile2
-rw-r--r--security/keys/trusted-keys/Makefile8
-rw-r--r--security/keys/trusted-keys/trusted_tpm1.c (renamed from security/keys/trusted.c)98
-rw-r--r--security/keys/trusted-keys/trusted_tpm2.c314
-rw-r--r--security/lockdown/lockdown.c1
-rw-r--r--security/selinux/ss/services.c9
-rw-r--r--sound/core/compress_offload.c2
-rw-r--r--sound/core/pcm_lib.c8
-rw-r--r--sound/core/timer.c30
-rw-r--r--sound/firewire/bebob/bebob_focusrite.c3
-rw-r--r--sound/firewire/bebob/bebob_stream.c3
-rw-r--r--sound/hda/ext/hdac_ext_controller.c5
-rw-r--r--sound/hda/hdac_controller.c2
-rw-r--r--sound/pci/hda/hda_intel.c11
-rw-r--r--sound/pci/hda/patch_ca0132.c2
-rw-r--r--sound/pci/hda/patch_hdmi.c29
-rw-r--r--sound/pci/hda/patch_realtek.c49
-rw-r--r--sound/soc/codecs/hdac_hda.c2
-rw-r--r--sound/soc/codecs/hdmi-codec.c12
-rw-r--r--sound/soc/codecs/max98373.c24
-rw-r--r--sound/soc/codecs/msm8916-wcd-analog.c4
-rw-r--r--sound/soc/codecs/msm8916-wcd-digital.c22
-rw-r--r--sound/soc/codecs/rt5651.c3
-rw-r--r--sound/soc/codecs/rt5682.c12
-rw-r--r--sound/soc/codecs/wm8994.c43
-rw-r--r--sound/soc/codecs/wm_adsp.c10
-rw-r--r--sound/soc/intel/boards/sof_rt5682.c60
-rw-r--r--sound/soc/kirkwood/kirkwood-i2s.c11
-rw-r--r--sound/soc/rockchip/rockchip_i2s.c2
-rw-r--r--sound/soc/rockchip/rockchip_max98090.c7
-rw-r--r--sound/soc/samsung/arndale_rt5631.c34
-rw-r--r--sound/soc/sh/rcar/core.c1
-rw-r--r--sound/soc/sh/rcar/dma.c4
-rw-r--r--sound/soc/soc-pcm.c17
-rw-r--r--sound/soc/soc-topology.c2
-rw-r--r--sound/soc/sof/control.c26
-rw-r--r--sound/soc/sof/debug.c6
-rw-r--r--sound/soc/sof/intel/Kconfig10
-rw-r--r--sound/soc/sof/intel/bdw.c7
-rw-r--r--sound/soc/sof/intel/byt.c6
-rw-r--r--sound/soc/sof/intel/hda-ctrl.c12
-rw-r--r--sound/soc/sof/intel/hda-loader.c1
-rw-r--r--sound/soc/sof/intel/hda-stream.c45
-rw-r--r--sound/soc/sof/intel/hda.c7
-rw-r--r--sound/soc/sof/intel/hda.h5
-rw-r--r--sound/soc/sof/ipc.c4
-rw-r--r--sound/soc/sof/loader.c4
-rw-r--r--sound/soc/sof/pcm.c35
-rw-r--r--sound/soc/sof/topology.c15
-rw-r--r--sound/soc/stm/stm32_sai_sub.c33
-rw-r--r--sound/soc/ti/sdma-pcm.c2
-rw-r--r--sound/usb/endpoint.c3
-rw-r--r--sound/usb/mixer.c4
-rw-r--r--sound/usb/pcm.c3
-rw-r--r--sound/usb/quirks.c5
-rw-r--r--sound/usb/validate.c8
-rw-r--r--tools/arch/arm/include/uapi/asm/kvm.h4
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h4
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm.h6
-rw-r--r--tools/arch/x86/include/uapi/asm/svm.h1
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h8
-rw-r--r--tools/bpf/Makefile6
-rw-r--r--tools/gpio/Build1
-rw-r--r--tools/gpio/Makefile16
-rw-r--r--tools/include/uapi/asm-generic/mman-common.h3
-rw-r--r--tools/include/uapi/drm/i915_drm.h1
-rw-r--r--tools/include/uapi/linux/fcntl.h9
-rw-r--r--tools/include/uapi/linux/fs.h55
-rw-r--r--tools/include/uapi/linux/fscrypt.h181
-rw-r--r--tools/include/uapi/linux/kvm.h5
-rw-r--r--tools/include/uapi/linux/sched.h30
-rw-r--r--tools/include/uapi/linux/usbdevice_fs.h4
-rw-r--r--tools/lib/bpf/Makefile33
-rw-r--r--tools/lib/bpf/libbpf_internal.h16
-rw-r--r--tools/lib/bpf/xsk.c4
-rw-r--r--tools/lib/subcmd/Makefile8
-rw-r--r--tools/objtool/check.c1
-rw-r--r--tools/perf/Documentation/asciidoc.conf3
-rw-r--r--tools/perf/Documentation/jitdump-specification.txt4
-rw-r--r--tools/perf/arch/arm/annotate/instructions.c4
-rw-r--r--tools/perf/arch/arm64/annotate/instructions.c4
-rw-r--r--tools/perf/arch/powerpc/util/header.c3
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c6
-rw-r--r--tools/perf/arch/s390/util/header.c9
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c6
-rw-r--r--tools/perf/arch/x86/util/header.c3
-rw-r--r--tools/perf/builtin-c2c.c14
-rw-r--r--tools/perf/builtin-kmem.c1
-rw-r--r--tools/perf/builtin-kvm.c7
-rw-r--r--tools/perf/builtin-script.c6
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/jvmti/Build6
-rw-r--r--tools/perf/perf-sys.h6
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z15/basic.json (renamed from tools/perf/pmu-events/arch/s390/cf_m8561/basic.json)0
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z15/crypto.json (renamed from tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json)0
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json (renamed from tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json)0
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z15/extended.json (renamed from tools/perf/pmu-events/arch/s390/cf_m8561/extended.json)0
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z15/transaction.json7
-rw-r--r--tools/perf/pmu-events/arch/s390/mapfile.csv2
-rw-r--r--tools/perf/pmu-events/jevents.c12
-rw-r--r--tools/perf/tests/perf-hooks.c3
-rw-r--r--tools/perf/util/annotate.c35
-rw-r--r--tools/perf/util/annotate.h4
-rw-r--r--tools/perf/util/copyfile.c8
-rw-r--r--tools/perf/util/evlist.c2
-rw-r--r--tools/perf/util/evsel.c3
-rw-r--r--tools/perf/util/header.c4
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/jitdump.c6
-rw-r--r--tools/perf/util/llvm-utils.c6
-rw-r--r--tools/perf/util/map.c3
-rw-r--r--tools/perf/util/python.c6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c8
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c9
-rw-r--r--tools/perf/util/trace-event-parse.c31
-rw-r--r--tools/perf/util/trace-event.h2
-rw-r--r--tools/perf/util/util.c6
-rw-r--r--tools/testing/kunit/.gitignore3
-rw-r--r--tools/testing/kunit/configs/all_tests.config3
-rwxr-xr-xtools/testing/kunit/kunit.py138
-rw-r--r--tools/testing/kunit/kunit_config.py66
-rw-r--r--tools/testing/kunit/kunit_kernel.py149
-rw-r--r--tools/testing/kunit/kunit_parser.py310
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py206
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-all_passed.log32
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-crash.log69
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-failure.log36
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log75
-rw-r--r--tools/testing/kunit/test_data/test_output_isolated_correctly.log106
-rw-r--r--tools/testing/kunit/test_data/test_read_from_file.kconfig17
-rw-r--r--tools/testing/selftests/Makefile26
-rw-r--r--tools/testing/selftests/arm64/Makefile64
-rw-r--r--tools/testing/selftests/arm64/README25
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore3
-rw-r--r--tools/testing/selftests/arm64/signal/Makefile32
-rw-r--r--tools/testing/selftests/arm64/signal/README59
-rw-r--r--tools/testing/selftests/arm64/signal/signals.S64
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.c29
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.h100
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c328
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.h120
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c52
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c77
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c46
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c50
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c37
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c50
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c31
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c35
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c15
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h28
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.c196
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.h104
-rw-r--r--tools/testing/selftests/arm64/tags/.gitignore (renamed from tools/testing/selftests/arm64/.gitignore)0
-rw-r--r--tools/testing/selftests/arm64/tags/Makefile7
-rwxr-xr-xtools/testing/selftests/arm64/tags/run_tags_test.sh (renamed from tools/testing/selftests/arm64/run_tags_test.sh)0
-rw-r--r--tools/testing/selftests/arm64/tags/tags_test.c (renamed from tools/testing/selftests/arm64/tags_test.c)0
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c3
-rwxr-xr-xtools/testing/selftests/bpf/test_flow_dissector.sh3
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh6
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py5
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c8
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_edt.sh2
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan.sh8
-rwxr-xr-xtools/testing/selftests/gen_kselftest_tar.sh21
-rwxr-xr-xtools/testing/selftests/kselftest/module.sh (renamed from tools/testing/selftests/kselftest_module.sh)0
-rw-r--r--tools/testing/selftests/kselftest/runner.sh36
-rwxr-xr-xtools/testing/selftests/kselftest_install.sh24
-rw-r--r--tools/testing/selftests/kvm/.gitignore2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h2
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c4
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c10
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c21
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c13
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c6
-rwxr-xr-xtools/testing/selftests/lib/bitmap.sh2
-rwxr-xr-xtools/testing/selftests/lib/prime_numbers.sh2
-rwxr-xr-xtools/testing/selftests/lib/printf.sh2
-rwxr-xr-xtools/testing/selftests/lib/strscpy.sh2
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh21
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/net/l2tp.sh0
-rw-r--r--tools/testing/selftests/net/reuseport_dualstack.c3
-rw-r--r--tools/testing/selftests/net/so_txtime.c4
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c8
-rw-r--r--tools/testing/selftests/net/tls.c108
-rw-r--r--tools/testing/selftests/net/udpgso.c3
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c3
-rw-r--r--tools/testing/selftests/powerpc/mm/tlbie_test.c2
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-002.c6
-rw-r--r--tools/testing/selftests/ptp/testptp.c53
-rw-r--r--tools/testing/selftests/rtc/settings1
-rw-r--r--tools/testing/selftests/sync/sync.c6
-rw-r--r--tools/testing/selftests/vm/Makefile5
-rw-r--r--tools/testing/selftests/vm/gup_benchmark.c4
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests10
-rw-r--r--tools/testing/selftests/watchdog/watchdog-test.c27
-rw-r--r--tools/usb/usbip/libsrc/usbip_device_driver.c6
-rw-r--r--tools/virtio/crypto/hash.h (renamed from arch/arm64/kernel/vdso/gettimeofday.S)0
-rw-r--r--tools/virtio/linux/dma-mapping.h2
-rw-r--r--tools/virtio/xen/xen.h6
-rw-r--r--virt/kvm/arm/pmu.c48
-rw-r--r--virt/kvm/kvm_main.c238
2154 files changed, 44888 insertions, 20875 deletions
diff --git a/.mailmap b/.mailmap
index edcac87e76c8..fd6219293057 100644
--- a/.mailmap
+++ b/.mailmap
@@ -108,6 +108,10 @@ Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com>
<javier@osg.samsung.com> <javier.martinez@collabora.co.uk>
+Jayachandran C <c.jayachandran@gmail.com> <jayachandranc@netlogicmicro.com>
+Jayachandran C <c.jayachandran@gmail.com> <jchandra@broadcom.com>
+Jayachandran C <c.jayachandran@gmail.com> <jchandra@digeo.com>
+Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com>
Jean Tourrilhes <jt@hpl.hp.com>
<jean-philippe@linaro.org> <jean-philippe.brucker@arm.com>
Jeff Garzik <jgarzik@pretzel.yyz.us>
@@ -196,7 +200,8 @@ Oleksij Rempel <linux@rempel-privat.de> <o.rempel@pengutronix.de>
Oleksij Rempel <linux@rempel-privat.de> <ore@pengutronix.de>
Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Patrick Mochel <mochel@digitalimplant.org>
-Paul Burton <paul.burton@mips.com> <paul.burton@imgtec.com>
+Paul Burton <paulburton@kernel.org> <paul.burton@imgtec.com>
+Paul Burton <paulburton@kernel.org> <paul.burton@mips.com>
Peter A Jonsson <pj@ludd.ltu.se>
Peter Oruba <peter@oruba.de>
Peter Oruba <peter.oruba@amd.com>
@@ -229,6 +234,7 @@ Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
Shuah Khan <shuah@kernel.org> <shuahkh@osg.samsung.com>
Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
+Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu>
Simon Kelley <simon@thekelleys.org.uk>
Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
Stephen Hemminger <shemminger@osdl.org>
diff --git a/CREDITS b/CREDITS
index 8b67a85844b5..031605d46b4d 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1637,6 +1637,10 @@ S: Panoramastrasse 18
S: D-69126 Heidelberg
S: Germany
+N: Simon Horman
+M: horms@verge.net.au
+D: Renesas ARM/ARM64 SoC maintainer
+
N: Christopher Horn
E: chorn@warwick.net
D: Miscellaneous sysctl hacks
diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
index 2c44b4f1b060..70dcaf2481f4 100644
--- a/Documentation/ABI/testing/procfs-diskstats
+++ b/Documentation/ABI/testing/procfs-diskstats
@@ -29,4 +29,9 @@ Description:
17 - sectors discarded
18 - time spent discarding
+ Kernel 5.5+ appends two more fields for flush requests:
+
+ 19 - flush requests completed successfully
+ 20 - time spent flushing
+
For more details refer to Documentation/admin-guide/iostats.rst
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index f8c7c7126bb1..ed8c14f161ee 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -15,6 +15,12 @@ Description:
9 - I/Os currently in progress
10 - time spent doing I/Os (ms)
11 - weighted time spent doing I/Os (ms)
+ 12 - discards completed
+ 13 - discards merged
+ 14 - sectors discarded
+ 15 - time spent discarding (ms)
+ 16 - flush requests completed
+ 17 - time spent flushing (ms)
For more details refer Documentation/admin-guide/iostats.rst
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index 06d0931119cc..fc20cde63d1e 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -486,6 +486,8 @@ What: /sys/devices/system/cpu/vulnerabilities
/sys/devices/system/cpu/vulnerabilities/spec_store_bypass
/sys/devices/system/cpu/vulnerabilities/l1tf
/sys/devices/system/cpu/vulnerabilities/mds
+ /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+ /sys/devices/system/cpu/vulnerabilities/itlb_multihit
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 0fa8c0e615c2..5361ebec3361 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -615,8 +615,8 @@ on an IO device and is an example of this type.
Protections
-----------
-A cgroup is protected to be allocated upto the configured amount of
-the resource if the usages of all its ancestors are under their
+A cgroup is protected upto the configured amount of the resource
+as long as the usages of all its ancestors are under their
protected levels. Protections can be hard guarantees or best effort
soft boundaries. Protections can also be over-committed in which case
only upto the amount available to the parent is protected among
@@ -1096,7 +1096,10 @@ PAGE_SIZE multiple when read back.
is within its effective min boundary, the cgroup's memory
won't be reclaimed under any conditions. If there is no
unprotected reclaimable memory available, OOM killer
- is invoked.
+ is invoked. Above the effective min boundary (or
+ effective low boundary if it is higher), pages are reclaimed
+ proportionally to the overage, reducing reclaim pressure for
+ smaller overages.
Effective min boundary is limited by memory.min values of
all ancestor cgroups. If there is memory.min overcommitment
@@ -1118,7 +1121,10 @@ PAGE_SIZE multiple when read back.
Best-effort memory protection. If the memory usage of a
cgroup is within its effective low boundary, the cgroup's
memory won't be reclaimed unless memory can be reclaimed
- from unprotected cgroups.
+ from unprotected cgroups. Above the effective low boundary (or
+ effective min boundary if it is higher), pages are reclaimed
+ proportionally to the overage, reducing reclaim pressure for
+ smaller overages.
Effective low boundary is limited by memory.low values of
all ancestor cgroups. If there is memory.low overcommitment
@@ -2482,8 +2488,10 @@ system performance due to overreclaim, to the point where the feature
becomes self-defeating.
The memory.low boundary on the other hand is a top-down allocated
-reserve. A cgroup enjoys reclaim protection when it's within its low,
-which makes delegation of subtrees possible.
+reserve. A cgroup enjoys reclaim protection when it's within its
+effective low, which makes delegation of subtrees possible. It also
+enjoys having reclaim pressure proportional to its overage when
+above its effective low.
The original high boundary, the hard limit, is defined as a strict
limit that can not budge, even if the OOM killer has to be called.
diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst
index a30aa91b5fbe..594095b54b29 100644
--- a/Documentation/admin-guide/device-mapper/dm-integrity.rst
+++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst
@@ -177,6 +177,11 @@ bitmap_flush_interval:number
The bitmap flush interval in milliseconds. The metadata buffers
are synchronized when this interval expires.
+fix_padding
+ Use a smaller padding of the tag area that is more
+ space-efficient. If this option is not present, large padding is
+ used - that is for compatibility with older kernels.
+
The journal mode (D/J), buffer_sectors, journal_watermark, commit_time can
be changed when reloading the target (load an inactive table and swap the
diff --git a/Documentation/admin-guide/device-mapper/dm-raid.rst b/Documentation/admin-guide/device-mapper/dm-raid.rst
index 2fe255b130fb..f6344675e395 100644
--- a/Documentation/admin-guide/device-mapper/dm-raid.rst
+++ b/Documentation/admin-guide/device-mapper/dm-raid.rst
@@ -417,3 +417,5 @@ Version History
deadlock/potential data corruption. Update superblock when
specific devices are requested via rebuild. Fix RAID leg
rebuild errors.
+ 1.15.0 Fix size extensions not being synchronized in case of new MD bitmap
+ pages allocated; also fix those not occuring after previous reductions
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index 49311f3da6f2..0795e3c2643f 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -12,3 +12,5 @@ are configurable at compile, boot or run time.
spectre
l1tf
mds
+ tsx_async_abort
+ multihit.rst
diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst
new file mode 100644
index 000000000000..ba9988d8bce5
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/multihit.rst
@@ -0,0 +1,163 @@
+iTLB multihit
+=============
+
+iTLB multihit is an erratum where some processors may incur a machine check
+error, possibly resulting in an unrecoverable CPU lockup, when an
+instruction fetch hits multiple entries in the instruction TLB. This can
+occur when the page size is changed along with either the physical address
+or cache type. A malicious guest running on a virtualized system can
+exploit this erratum to perform a denial of service attack.
+
+
+Affected processors
+-------------------
+
+Variations of this erratum are present on most Intel Core and Xeon processor
+models. The erratum is not present on:
+
+ - non-Intel processors
+
+ - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont)
+
+ - Intel processors that have the PSCHANGE_MC_NO bit set in the
+ IA32_ARCH_CAPABILITIES MSR.
+
+
+Related CVEs
+------------
+
+The following CVE entry is related to this issue:
+
+ ============== =================================================
+ CVE-2018-12207 Machine Check Error Avoidance on Page Size Change
+ ============== =================================================
+
+
+Problem
+-------
+
+Privileged software, including OS and virtual machine managers (VMM), are in
+charge of memory management. A key component in memory management is the control
+of the page tables. Modern processors use virtual memory, a technique that creates
+the illusion of a very large memory for processors. This virtual space is split
+into pages of a given size. Page tables translate virtual addresses to physical
+addresses.
+
+To reduce latency when performing a virtual to physical address translation,
+processors include a structure, called TLB, that caches recent translations.
+There are separate TLBs for instruction (iTLB) and data (dTLB).
+
+Under this errata, instructions are fetched from a linear address translated
+using a 4 KB translation cached in the iTLB. Privileged software modifies the
+paging structure so that the same linear address using large page size (2 MB, 4
+MB, 1 GB) with a different physical address or memory type. After the page
+structure modification but before the software invalidates any iTLB entries for
+the linear address, a code fetch that happens on the same linear address may
+cause a machine-check error which can result in a system hang or shutdown.
+
+
+Attack scenarios
+----------------
+
+Attacks against the iTLB multihit erratum can be mounted from malicious
+guests in a virtualized system.
+
+
+iTLB multihit system information
+--------------------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current iTLB
+multihit status of the system:whether the system is vulnerable and which
+mitigations are active. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/itlb_multihit
+
+The possible values in this file are:
+
+.. list-table::
+
+ * - Not affected
+ - The processor is not vulnerable.
+ * - KVM: Mitigation: Split huge pages
+ - Software changes mitigate this issue.
+ * - KVM: Vulnerable
+ - The processor is vulnerable, but no mitigation enabled
+
+
+Enumeration of the erratum
+--------------------------------
+
+A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr
+and will be set on CPU's which are mitigated against this issue.
+
+ ======================================= =========== ===============================
+ IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model
+ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model
+ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable
+ ======================================= =========== ===============================
+
+
+Mitigation mechanism
+-------------------------
+
+This erratum can be mitigated by restricting the use of large page sizes to
+non-executable pages. This forces all iTLB entries to be 4K, and removes
+the possibility of multiple hits.
+
+In order to mitigate the vulnerability, KVM initially marks all huge pages
+as non-executable. If the guest attempts to execute in one of those pages,
+the page is broken down into 4K pages, which are then marked executable.
+
+If EPT is disabled or not available on the host, KVM is in control of TLB
+flushes and the problematic situation cannot happen. However, the shadow
+EPT paging mechanism used by nested virtualization is vulnerable, because
+the nested guest can trigger multiple iTLB hits by modifying its own
+(non-nested) page tables. For simplicity, KVM will make large pages
+non-executable in all shadow paging modes.
+
+Mitigation control on the kernel command line and KVM - module parameter
+------------------------------------------------------------------------
+
+The KVM hypervisor mitigation mechanism for marking huge pages as
+non-executable can be controlled with a module parameter "nx_huge_pages=".
+The kernel command line allows to control the iTLB multihit mitigations at
+boot time with the option "kvm.nx_huge_pages=".
+
+The valid arguments for these options are:
+
+ ========== ================================================================
+ force Mitigation is enabled. In this case, the mitigation implements
+ non-executable huge pages in Linux kernel KVM module. All huge
+ pages in the EPT are marked as non-executable.
+ If a guest attempts to execute in one of those pages, the page is
+ broken down into 4K pages, which are then marked executable.
+
+ off Mitigation is disabled.
+
+ auto Enable mitigation only if the platform is affected and the kernel
+ was not booted with the "mitigations=off" command line parameter.
+ This is the default option.
+ ========== ================================================================
+
+
+Mitigation selection guide
+--------------------------
+
+1. No virtualization in use
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ The system is protected by the kernel unconditionally and no further
+ action is required.
+
+2. Virtualization with trusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ If the guest comes from a trusted source, you may assume that the guest will
+ not attempt to maliciously exploit these errata and no further action is
+ required.
+
+3. Virtualization with untrusted guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ If the guest comes from an untrusted source, the guest host kernel will need
+ to apply iTLB multihit mitigation via the kernel command line or kvm
+ module parameter.
diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
new file mode 100644
index 000000000000..fddbd7579c53
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
@@ -0,0 +1,276 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TAA - TSX Asynchronous Abort
+======================================
+
+TAA is a hardware vulnerability that allows unprivileged speculative access to
+data which is available in various CPU internal buffers by using asynchronous
+aborts within an Intel TSX transactional region.
+
+Affected processors
+-------------------
+
+This vulnerability only affects Intel processors that support Intel
+Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8)
+is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit
+(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations
+also mitigate against TAA.
+
+Whether a processor is affected or not can be read out from the TAA
+vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`.
+
+Related CVEs
+------------
+
+The following CVE entry is related to this TAA issue:
+
+ ============== ===== ===================================================
+ CVE-2019-11135 TAA TSX Asynchronous Abort (TAA) condition on some
+ microprocessors utilizing speculative execution may
+ allow an authenticated user to potentially enable
+ information disclosure via a side channel with
+ local access.
+ ============== ===== ===================================================
+
+Problem
+-------
+
+When performing store, load or L1 refill operations, processors write
+data into temporary microarchitectural structures (buffers). The data in
+those buffers can be forwarded to load operations as an optimization.
+
+Intel TSX is an extension to the x86 instruction set architecture that adds
+hardware transactional memory support to improve performance of multi-threaded
+software. TSX lets the processor expose and exploit concurrency hidden in an
+application due to dynamically avoiding unnecessary synchronization.
+
+TSX supports atomic memory transactions that are either committed (success) or
+aborted. During an abort, operations that happened within the transactional region
+are rolled back. An asynchronous abort takes place, among other options, when a
+different thread accesses a cache line that is also used within the transactional
+region when that access might lead to a data race.
+
+Immediately after an uncompleted asynchronous abort, certain speculatively
+executed loads may read data from those internal buffers and pass it to dependent
+operations. This can be then used to infer the value via a cache side channel
+attack.
+
+Because the buffers are potentially shared between Hyper-Threads cross
+Hyper-Thread attacks are possible.
+
+The victim of a malicious actor does not need to make use of TSX. Only the
+attacker needs to begin a TSX transaction and raise an asynchronous abort
+which in turn potenitally leaks data stored in the buffers.
+
+More detailed technical information is available in the TAA specific x86
+architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`.
+
+
+Attack scenarios
+----------------
+
+Attacks against the TAA vulnerability can be implemented from unprivileged
+applications running on hosts or guests.
+
+As for MDS, the attacker has no control over the memory addresses that can
+be leaked. Only the victim is responsible for bringing data to the CPU. As
+a result, the malicious actor has to sample as much data as possible and
+then postprocess it to try to infer any useful information from it.
+
+A potential attacker only has read access to the data. Also, there is no direct
+privilege escalation by using this technique.
+
+
+.. _tsx_async_abort_sys_info:
+
+TAA system information
+-----------------------
+
+The Linux kernel provides a sysfs interface to enumerate the current TAA status
+of mitigated systems. The relevant sysfs file is:
+
+/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
+
+The possible values in this file are:
+
+.. list-table::
+
+ * - 'Vulnerable'
+ - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied.
+ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+ - The system tries to clear the buffers but the microcode might not support the operation.
+ * - 'Mitigation: Clear CPU buffers'
+ - The microcode has been updated to clear the buffers. TSX is still enabled.
+ * - 'Mitigation: TSX disabled'
+ - TSX is disabled.
+ * - 'Not affected'
+ - The CPU is not affected by this issue.
+
+.. _ucode_needed:
+
+Best effort mitigation mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If the processor is vulnerable, but the availability of the microcode-based
+mitigation mechanism is not advertised via CPUID the kernel selects a best
+effort mitigation mode. This mode invokes the mitigation instructions
+without a guarantee that they clear the CPU buffers.
+
+This is done to address virtualization scenarios where the host has the
+microcode update applied, but the hypervisor is not yet updated to expose the
+CPUID to the guest. If the host has updated microcode the protection takes
+effect; otherwise a few CPU cycles are wasted pointlessly.
+
+The state in the tsx_async_abort sysfs file reflects this situation
+accordingly.
+
+
+Mitigation mechanism
+--------------------
+
+The kernel detects the affected CPUs and the presence of the microcode which is
+required. If a CPU is affected and the microcode is available, then the kernel
+enables the mitigation by default.
+
+
+The mitigation can be controlled at boot time via a kernel command line option.
+See :ref:`taa_mitigation_control_command_line`.
+
+.. _virt_mechanism:
+
+Virtualization mitigation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Affected systems where the host has TAA microcode and TAA is mitigated by
+having disabled TSX previously, are not vulnerable regardless of the status
+of the VMs.
+
+In all other cases, if the host either does not have the TAA microcode or
+the kernel is not mitigated, the system might be vulnerable.
+
+
+.. _taa_mitigation_control_command_line:
+
+Mitigation control on the kernel command line
+---------------------------------------------
+
+The kernel command line allows to control the TAA mitigations at boot time with
+the option "tsx_async_abort=". The valid arguments for this option are:
+
+ ============ =============================================================
+ off This option disables the TAA mitigation on affected platforms.
+ If the system has TSX enabled (see next parameter) and the CPU
+ is affected, the system is vulnerable.
+
+ full TAA mitigation is enabled. If TSX is enabled, on an affected
+ system it will clear CPU buffers on ring transitions. On
+ systems which are MDS-affected and deploy MDS mitigation,
+ TAA is also mitigated. Specifying this option on those
+ systems will have no effect.
+
+ full,nosmt The same as tsx_async_abort=full, with SMT disabled on
+ vulnerable CPUs that have TSX enabled. This is the complete
+ mitigation. When TSX is disabled, SMT is not disabled because
+ CPU is not vulnerable to cross-thread TAA attacks.
+ ============ =============================================================
+
+Not specifying this option is equivalent to "tsx_async_abort=full".
+
+The kernel command line also allows to control the TSX feature using the
+parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used
+to control the TSX feature and the enumeration of the TSX feature bits (RTM
+and HLE) in CPUID.
+
+The valid options are:
+
+ ============ =============================================================
+ off Disables TSX on the system.
+
+ Note that this option takes effect only on newer CPUs which are
+ not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1
+ and which get the new IA32_TSX_CTRL MSR through a microcode
+ update. This new MSR allows for the reliable deactivation of
+ the TSX functionality.
+
+ on Enables TSX.
+
+ Although there are mitigations for all known security
+ vulnerabilities, TSX has been known to be an accelerator for
+ several previous speculation-related CVEs, and so there may be
+ unknown security risks associated with leaving it enabled.
+
+ auto Disables TSX if X86_BUG_TAA is present, otherwise enables TSX
+ on the system.
+ ============ =============================================================
+
+Not specifying this option is equivalent to "tsx=off".
+
+The following combinations of the "tsx_async_abort" and "tsx" are possible. For
+affected platforms tsx=auto is equivalent to tsx=off and the result will be:
+
+ ========= ========================== =========================================
+ tsx=on tsx_async_abort=full The system will use VERW to clear CPU
+ buffers. Cross-thread attacks are still
+ possible on SMT machines.
+ tsx=on tsx_async_abort=full,nosmt As above, cross-thread attacks on SMT
+ mitigated.
+ tsx=on tsx_async_abort=off The system is vulnerable.
+ tsx=off tsx_async_abort=full TSX might be disabled if microcode
+ provides a TSX control MSR. If so,
+ system is not vulnerable.
+ tsx=off tsx_async_abort=full,nosmt Ditto
+ tsx=off tsx_async_abort=off ditto
+ ========= ========================== =========================================
+
+
+For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU
+buffers. For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0)
+"tsx" command line argument has no effect.
+
+For the affected platforms below table indicates the mitigation status for the
+combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO
+and TSX_CTRL_MSR.
+
+ ======= ========= ============= ========================================
+ MDS_NO MD_CLEAR TSX_CTRL_MSR Status
+ ======= ========= ============= ========================================
+ 0 0 0 Vulnerable (needs microcode)
+ 0 1 0 MDS and TAA mitigated via VERW
+ 1 1 0 MDS fixed, TAA vulnerable if TSX enabled
+ because MD_CLEAR has no meaning and
+ VERW is not guaranteed to clear buffers
+ 1 X 1 MDS fixed, TAA can be mitigated by
+ VERW or TSX_CTRL_MSR
+ ======= ========= ============= ========================================
+
+Mitigation selection guide
+--------------------------
+
+1. Trusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If all user space applications are from a trusted source and do not execute
+untrusted code which is supplied externally, then the mitigation can be
+disabled. The same applies to virtualized environments with trusted guests.
+
+
+2. Untrusted userspace and guests
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If there are untrusted applications or guests on the system, enabling TSX
+might allow a malicious actor to leak data from the host or from other
+processes running on the same physical core.
+
+If the microcode is available and the TSX is disabled on the host, attacks
+are prevented in a virtualized environment as well, even if the VMs do not
+explicitly enable the mitigation.
+
+
+.. _taa_default_mitigations:
+
+Default mitigations
+-------------------
+
+The kernel's default action for vulnerable processors is:
+
+ - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).
diff --git a/Documentation/admin-guide/iostats.rst b/Documentation/admin-guide/iostats.rst
index 5d63b18bd6d1..4f0462af3ca7 100644
--- a/Documentation/admin-guide/iostats.rst
+++ b/Documentation/admin-guide/iostats.rst
@@ -121,6 +121,15 @@ Field 15 -- # of milliseconds spent discarding
This is the total number of milliseconds spent by all discards (as
measured from __make_request() to end_that_request_last()).
+Field 16 -- # of flush requests completed
+ This is the total number of flush requests completed successfully.
+
+ Block layer combines flush requests and executes at most one at a time.
+ This counts flush requests executed by disk. Not tracked for partitions.
+
+Field 17 -- # of milliseconds spent flushing
+ This is the total number of milliseconds spent by all flush requests.
+
To avoid introducing performance bottlenecks, no locks are held while
modifying these counters. This implies that minor inaccuracies may be
introduced when changes collide, so (for instance) adding up all the
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index c7ac2f3ac99f..8dee8f68fe15 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2055,6 +2055,25 @@
KVM MMU at runtime.
Default is 0 (off)
+ kvm.nx_huge_pages=
+ [KVM] Controls the software workaround for the
+ X86_BUG_ITLB_MULTIHIT bug.
+ force : Always deploy workaround.
+ off : Never deploy workaround.
+ auto : Deploy workaround based on the presence of
+ X86_BUG_ITLB_MULTIHIT.
+
+ Default is 'auto'.
+
+ If the software workaround is enabled for the host,
+ guests do need not to enable it for nested guests.
+
+ kvm.nx_huge_pages_recovery_ratio=
+ [KVM] Controls how many 4KiB pages are periodically zapped
+ back to huge pages. 0 disables the recovery, otherwise if
+ the value is N KVM will zap 1/Nth of the 4KiB pages every
+ minute. The default is 60.
+
kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
Default is 1 (enabled)
@@ -2636,6 +2655,13 @@
ssbd=force-off [ARM64]
l1tf=off [X86]
mds=off [X86]
+ tsx_async_abort=off [X86]
+ kvm.nx_huge_pages=off [X86]
+
+ Exceptions:
+ This does not have any effect on
+ kvm.nx_huge_pages when
+ kvm.nx_huge_pages=force.
auto (default)
Mitigate all CPU vulnerabilities, but leave SMT
@@ -2651,6 +2677,7 @@
be fully mitigated, even if it means losing SMT.
Equivalent to: l1tf=flush,nosmt [X86]
mds=full,nosmt [X86]
+ tsx_async_abort=full,nosmt [X86]
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
@@ -4848,6 +4875,71 @@
interruptions from clocksource watchdog are not
acceptable).
+ tsx= [X86] Control Transactional Synchronization
+ Extensions (TSX) feature in Intel processors that
+ support TSX control.
+
+ This parameter controls the TSX feature. The options are:
+
+ on - Enable TSX on the system. Although there are
+ mitigations for all known security vulnerabilities,
+ TSX has been known to be an accelerator for
+ several previous speculation-related CVEs, and
+ so there may be unknown security risks associated
+ with leaving it enabled.
+
+ off - Disable TSX on the system. (Note that this
+ option takes effect only on newer CPUs which are
+ not vulnerable to MDS, i.e., have
+ MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get
+ the new IA32_TSX_CTRL MSR through a microcode
+ update. This new MSR allows for the reliable
+ deactivation of the TSX functionality.)
+
+ auto - Disable TSX if X86_BUG_TAA is present,
+ otherwise enable TSX on the system.
+
+ Not specifying this option is equivalent to tsx=off.
+
+ See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+ for more details.
+
+ tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async
+ Abort (TAA) vulnerability.
+
+ Similar to Micro-architectural Data Sampling (MDS)
+ certain CPUs that support Transactional
+ Synchronization Extensions (TSX) are vulnerable to an
+ exploit against CPU internal buffers which can forward
+ information to a disclosure gadget under certain
+ conditions.
+
+ In vulnerable processors, the speculatively forwarded
+ data can be used in a cache side channel attack, to
+ access data to which the attacker does not have direct
+ access.
+
+ This parameter controls the TAA mitigation. The
+ options are:
+
+ full - Enable TAA mitigation on vulnerable CPUs
+ if TSX is enabled.
+
+ full,nosmt - Enable TAA mitigation and disable SMT on
+ vulnerable CPUs. If TSX is disabled, SMT
+ is not disabled because CPU is not
+ vulnerable to cross-thread TAA attacks.
+ off - Unconditionally disable TAA mitigation
+
+ Not specifying this option is equivalent to
+ tsx_async_abort=full. On CPUs which are MDS affected
+ and deploy MDS mitigation, TAA mitigation is not
+ required and doesn't provide any additional
+ mitigation.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/tsx_async_abort.rst
+
turbografx.map[2|3]= [HW,JOY]
TurboGraFX parallel port interface
Format:
@@ -5302,6 +5394,10 @@
the unplug protocol
never -- do not unplug even if version check succeeds
+ xen_legacy_crash [X86,XEN]
+ Crash from Xen panic notifier, without executing late
+ panic() code such as dumping handler.
+
xen_nopvspin [X86,XEN]
Disables the ticketlock slowpath using Xen PV
optimizations.
diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst
index 517a205abad6..90056e4e8859 100644
--- a/Documentation/admin-guide/perf/imx-ddr.rst
+++ b/Documentation/admin-guide/perf/imx-ddr.rst
@@ -17,7 +17,8 @@ The "format" directory describes format of the config (event ID) and config1
(AXI filtering) fields of the perf_event_attr structure, see /sys/bus/event_source/
devices/imx8_ddr0/format/. The "events" directory describes the events types
hardware supported that can be used with perf tool, see /sys/bus/event_source/
-devices/imx8_ddr0/events/.
+devices/imx8_ddr0/events/. The "caps" directory describes filter features implemented
+in DDR PMU, see /sys/bus/events_source/devices/imx8_ddr0/caps/.
e.g.::
perf stat -a -e imx8_ddr0/cycles/ cmd
perf stat -a -e imx8_ddr0/read/,imx8_ddr0/write/ cmd
@@ -25,9 +26,12 @@ devices/imx8_ddr0/events/.
AXI filtering is only used by CSV modes 0x41 (axid-read) and 0x42 (axid-write)
to count reading or writing matches filter setting. Filter setting is various
from different DRAM controller implementations, which is distinguished by quirks
-in the driver.
+in the driver. You also can dump info from userspace, filter in "caps" directory
+indicates whether PMU supports AXI ID filter or not; enhanced_filter indicates
+whether PMU supports enhanced AXI ID filter or not. Value 0 for un-supported, and
+value 1 for supported.
-* With DDR_CAP_AXI_ID_FILTER quirk.
+* With DDR_CAP_AXI_ID_FILTER quirk(filter: 1, enhanced_filter: 0).
Filter is defined with two configuration parts:
--AXI_ID defines AxID matching value.
--AXI_MASKING defines which bits of AxID are meaningful for the matching.
@@ -50,3 +54,8 @@ in the driver.
axi_id to monitor a specific id, rather than having to specify axi_mask.
e.g.::
perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12
+
+* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk(filter: 1, enhanced_filter: 1).
+ This is an extension to the DDR_CAP_AXI_ID_FILTER quirk which permits
+ counting the number of bytes (as opposed to the number of bursts) from DDR
+ read and write transactions concurrently with another set of data counters.
diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst
index 08e33675853a..01f158238ae1 100644
--- a/Documentation/admin-guide/perf/thunderx2-pmu.rst
+++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst
@@ -3,24 +3,26 @@ Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE)
=============================================================
The ThunderX2 SoC PMU consists of independent, system-wide, per-socket
-PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC).
+PMUs such as the Level 3 Cache (L3C), DDR4 Memory Controller (DMC) and
+Cavium Coherent Processor Interconnect (CCPI2).
The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles.
Events are counted for the default channel (i.e. channel 0) and prorated
to the total number of channels/tiles.
-The DMC and L3C support up to 4 counters. Counters are independently
-programmable and can be started and stopped individually. Each counter
-can be set to a different event. Counters are 32-bit and do not support
-an overflow interrupt; they are read every 2 seconds.
+The DMC and L3C support up to 4 counters, while the CCPI2 supports up to 8
+counters. Counters are independently programmable to different events and
+can be started and stopped individually. None of the counters support an
+overflow interrupt. DMC and L3C counters are 32-bit and read every 2 seconds.
+The CCPI2 counters are 64-bit and assumed not to overflow in normal operation.
PMU UNCORE (perf) driver:
The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and
-L3C devices. Each PMU can be used to count up to 4 events
-simultaneously. The PMUs provide a description of their available events
-and configuration options under sysfs, see
-/sys/devices/uncore_<l3c_S/dmc_S/>; S is the socket id.
+L3C devices. Each PMU can be used to count up to 4 (DMC/L3C) or up to 8
+(CCPI2) events simultaneously. The PMUs provide a description of their
+available events and configuration options under sysfs, see
+/sys/devices/uncore_<l3c_S/dmc_S/ccpi2_S/>; S is the socket id.
The driver does not support sampling, therefore "perf record" will not
work. Per-task perf sessions are also not supported.
diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst
index d3f3a60fbf25..5d78a6f5b0ae 100644
--- a/Documentation/arm64/booting.rst
+++ b/Documentation/arm64/booting.rst
@@ -213,6 +213,9 @@ Before jumping into the kernel, the following conditions must be met:
- ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
- ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
+ - ICC_CTLR_EL3.PMHE (bit 6) must be set to the same value across
+ all CPUs the kernel is executing on, and must stay constant
+ for the lifetime of the kernel.
- If the kernel is entered at EL1:
diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst
index 2955287e9acc..b6e44884e3ad 100644
--- a/Documentation/arm64/cpu-feature-registers.rst
+++ b/Documentation/arm64/cpu-feature-registers.rst
@@ -168,8 +168,15 @@ infrastructure:
+------------------------------+---------+---------+
- 3) MIDR_EL1 - Main ID Register
+ 3) ID_AA64PFR1_EL1 - Processor Feature Register 1
+ +------------------------------+---------+---------+
+ | Name | bits | visible |
+ +------------------------------+---------+---------+
+ | SSBS | [7-4] | y |
+ +------------------------------+---------+---------+
+
+ 4) MIDR_EL1 - Main ID Register
+------------------------------+---------+---------+
| Name | bits | visible |
+------------------------------+---------+---------+
@@ -188,11 +195,15 @@ infrastructure:
as available on the CPU where it is fetched and is not a system
wide safe value.
- 4) ID_AA64ISAR1_EL1 - Instruction set attribute register 1
+ 5) ID_AA64ISAR1_EL1 - Instruction set attribute register 1
+------------------------------+---------+---------+
| Name | bits | visible |
+------------------------------+---------+---------+
+ | SB | [39-36] | y |
+ +------------------------------+---------+---------+
+ | FRINTTS | [35-32] | y |
+ +------------------------------+---------+---------+
| GPI | [31-28] | y |
+------------------------------+---------+---------+
| GPA | [27-24] | y |
@@ -210,7 +221,7 @@ infrastructure:
| DPB | [3-0] | y |
+------------------------------+---------+---------+
- 5) ID_AA64MMFR2_EL1 - Memory model feature register 2
+ 6) ID_AA64MMFR2_EL1 - Memory model feature register 2
+------------------------------+---------+---------+
| Name | bits | visible |
@@ -218,7 +229,7 @@ infrastructure:
| AT | [35-32] | y |
+------------------------------+---------+---------+
- 6) ID_AA64ZFR0_EL1 - SVE feature ID register 0
+ 7) ID_AA64ZFR0_EL1 - SVE feature ID register 0
+------------------------------+---------+---------+
| Name | bits | visible |
diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst
index 91f79529c58c..7fa3d215ae6a 100644
--- a/Documentation/arm64/elf_hwcaps.rst
+++ b/Documentation/arm64/elf_hwcaps.rst
@@ -119,10 +119,6 @@ HWCAP_LRCPC
HWCAP_DCPOP
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001.
-HWCAP2_DCPODP
-
- Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
-
HWCAP_SHA3
Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001.
@@ -141,30 +137,6 @@ HWCAP_SHA512
HWCAP_SVE
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
-HWCAP2_SVE2
-
- Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001.
-
-HWCAP2_SVEAES
-
- Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
-
-HWCAP2_SVEPMULL
-
- Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
-
-HWCAP2_SVEBITPERM
-
- Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
-
-HWCAP2_SVESHA3
-
- Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
-
-HWCAP2_SVESM4
-
- Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
-
HWCAP_ASIMDFHM
Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
@@ -180,13 +152,12 @@ HWCAP_ILRCPC
HWCAP_FLAGM
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001.
-HWCAP2_FLAGM2
-
- Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
-
HWCAP_SSBS
Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010.
+HWCAP_SB
+ Functionality implied by ID_AA64ISAR1_EL1.SB == 0b0001.
+
HWCAP_PACA
Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or
ID_AA64ISAR1_EL1.API == 0b0001, as described by
@@ -197,6 +168,38 @@ HWCAP_PACG
ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
Documentation/arm64/pointer-authentication.rst.
+HWCAP2_DCPODP
+
+ Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
+
+HWCAP2_SVE2
+
+ Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001.
+
+HWCAP2_SVEAES
+
+ Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
+
+HWCAP2_SVEPMULL
+
+ Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
+
+HWCAP2_SVEBITPERM
+
+ Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
+
+HWCAP2_SVESHA3
+
+ Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
+
+HWCAP2_SVESM4
+
+ Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
+
+HWCAP2_FLAGM2
+
+ Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
+
HWCAP2_FRINT
Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
index b040909e45f8..02e02175e6f5 100644
--- a/Documentation/arm64/memory.rst
+++ b/Documentation/arm64/memory.rst
@@ -154,11 +154,18 @@ return virtual addresses to userspace from a 48-bit range.
Software can "opt-in" to receiving VAs from a 52-bit space by
specifying an mmap hint parameter that is larger than 48-bit.
+
For example:
- maybe_high_address = mmap(~0UL, size, prot, flags,...);
+
+.. code-block:: c
+
+ maybe_high_address = mmap(~0UL, size, prot, flags,...);
It is also possible to build a debug kernel that returns addresses
from a 52-bit space by enabling the following kernel config options:
+
+.. code-block:: sh
+
CONFIG_EXPERT=y && CONFIG_ARM64_FORCE_52BIT=y
Note that this option is only intended for debugging applications
diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index 17ea3fecddaa..99b2545455ff 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -70,8 +70,12 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A72 | #853709 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 |
@@ -88,9 +92,16 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1349291 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 |
++----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-500 | #841119,826419 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
+| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
++----------------+-----------------+-----------------+-----------------------------+
+| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_843419 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX ITS | #22375,24313 | CAVIUM_ERRATUM_22375 |
+----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 |
@@ -107,6 +118,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Cavium | ThunderX2 SMMUv3| #126 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+| Cavium | ThunderX2 Core | #219 | CAVIUM_TX2_ERRATUM_219 |
++----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 |
+----------------+-----------------+-----------------+-----------------------------+
@@ -124,7 +137,7 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
+----------------+-----------------+-----------------+-----------------------------+
-| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
+| Qualcomm Tech. | Kryo/Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
+----------------+-----------------+-----------------+-----------------------------+
| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
+----------------+-----------------+-----------------+-----------------------------+
diff --git a/Documentation/block/stat.rst b/Documentation/block/stat.rst
index 9c07bc22b0bc..77311335c08b 100644
--- a/Documentation/block/stat.rst
+++ b/Documentation/block/stat.rst
@@ -41,6 +41,8 @@ discard I/Os requests number of discard I/Os processed
discard merges requests number of discard I/Os merged with in-queue I/O
discard sectors sectors number of sectors discarded
discard ticks milliseconds total wait time for discard requests
+flush I/Os requests number of flush I/Os processed
+flush ticks milliseconds total wait time for flush requests
=============== ============= =================================================
read I/Os, write I/Os, discard I/0s
@@ -48,6 +50,14 @@ read I/Os, write I/Os, discard I/0s
These values increment when an I/O request completes.
+flush I/Os
+==========
+
+These values increment when an flush I/O request completes.
+
+Block layer combines flush requests and executes at most one at a time.
+This counts flush requests executed by disk. Not tracked for partitions.
+
read merges, write merges, discard merges
=========================================
@@ -62,8 +72,8 @@ discarded from this block device. The "sectors" in question are the
standard UNIX 512-byte sectors, not any device- or filesystem-specific
block size. The counters are incremented when the I/O completes.
-read ticks, write ticks, discard ticks
-======================================
+read ticks, write ticks, discard ticks, flush ticks
+===================================================
These values count the number of milliseconds that I/O requests have
waited on this block device. If there are multiple I/O requests waiting,
diff --git a/Documentation/core-api/index.rst b/Documentation/core-api/index.rst
index fa16a0538dcb..ab0eae1c153a 100644
--- a/Documentation/core-api/index.rst
+++ b/Documentation/core-api/index.rst
@@ -38,6 +38,7 @@ Core utilities
protection-keys
../RCU/index
gcc-plugins
+ symbol-namespaces
Interfaces for kernel debugging
diff --git a/Documentation/core-api/memory-allocation.rst b/Documentation/core-api/memory-allocation.rst
index 7744aa3bf2e0..939e3dfc86e9 100644
--- a/Documentation/core-api/memory-allocation.rst
+++ b/Documentation/core-api/memory-allocation.rst
@@ -98,6 +98,10 @@ limited. The actual limit depends on the hardware and the kernel
configuration, but it is a good practice to use `kmalloc` for objects
smaller than page size.
+The address of a chunk allocated with `kmalloc` is aligned to at least
+ARCH_KMALLOC_MINALIGN bytes. For sizes which are a power of two, the
+alignment is also guaranteed to be at least the respective size.
+
For large allocations you can use :c:func:`vmalloc` and
:c:func:`vzalloc`, or directly request pages from the page
allocator. The memory allocated by `vmalloc` and related functions is
diff --git a/Documentation/kbuild/namespaces.rst b/Documentation/core-api/symbol-namespaces.rst
index 982ed7b568ac..982ed7b568ac 100644
--- a/Documentation/kbuild/namespaces.rst
+++ b/Documentation/core-api/symbol-namespaces.rst
diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
index b0522a4dd107..09dee10d2592 100644
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@@ -24,6 +24,7 @@ whole; patches welcome!
gdb-kernel-debugging
kgdb
kselftest
+ kunit/index
.. only:: subproject and html
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index b72d07d70239..525296121d89 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -41,6 +41,9 @@ smaller binary while the latter is 1.1 - 2 times faster.
Both KASAN modes work with both SLUB and SLAB memory allocators.
For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
+To augment reports with last allocation and freeing stack of the physical page,
+it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on.
+
To disable instrumentation for specific files or directories, add a line
similar to the following to the respective kernel Makefile:
diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index 25604904fa6e..ecdfdc9d4b03 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -89,6 +89,22 @@ To build, save output files in a separate directory with KBUILD_OUTPUT ::
$ export KBUILD_OUTPUT=/tmp/kselftest; make TARGETS="size timers" kselftest
+Additionally you can use the "SKIP_TARGETS" variable on the make command
+line to specify one or more targets to exclude from the TARGETS list.
+
+To run all tests but a single subsystem::
+
+ $ make -C tools/testing/selftests SKIP_TARGETS=ptrace run_tests
+
+You can specify multiple tests to skip::
+
+ $ make SKIP_TARGETS="size timers" kselftest
+
+You can also specify a restricted list of tests to run together with a
+dedicated skiplist::
+
+ $ make TARGETS="bpf breakpoints size timers" SKIP_TARGETS=bpf kselftest
+
See the top-level tools/testing/selftests/Makefile for the list of all
possible targets.
diff --git a/Documentation/dev-tools/kunit/api/index.rst b/Documentation/dev-tools/kunit/api/index.rst
new file mode 100644
index 000000000000..9b9bffe5d41a
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/index.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+API Reference
+=============
+.. toctree::
+
+ test
+
+This section documents the KUnit kernel testing API. It is divided into the
+following sections:
+
+================================= ==============================================
+:doc:`test` documents all of the standard testing API
+ excluding mocking or mocking related features.
+================================= ==============================================
diff --git a/Documentation/dev-tools/kunit/api/test.rst b/Documentation/dev-tools/kunit/api/test.rst
new file mode 100644
index 000000000000..aaa97f17e5b3
--- /dev/null
+++ b/Documentation/dev-tools/kunit/api/test.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
+Test API
+========
+
+This file documents all of the standard testing API excluding mocking or mocking
+related features.
+
+.. kernel-doc:: include/kunit/test.h
+ :internal:
diff --git a/Documentation/dev-tools/kunit/faq.rst b/Documentation/dev-tools/kunit/faq.rst
new file mode 100644
index 000000000000..bf2095112d89
--- /dev/null
+++ b/Documentation/dev-tools/kunit/faq.rst
@@ -0,0 +1,62 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Frequently Asked Questions
+==========================
+
+How is this different from Autotest, kselftest, etc?
+====================================================
+KUnit is a unit testing framework. Autotest, kselftest (and some others) are
+not.
+
+A `unit test <https://martinfowler.com/bliki/UnitTest.html>`_ is supposed to
+test a single unit of code in isolation, hence the name. A unit test should be
+the finest granularity of testing and as such should allow all possible code
+paths to be tested in the code under test; this is only possible if the code
+under test is very small and does not have any external dependencies outside of
+the test's control like hardware.
+
+There are no testing frameworks currently available for the kernel that do not
+require installing the kernel on a test machine or in a VM and all require
+tests to be written in userspace and run on the kernel under test; this is true
+for Autotest, kselftest, and some others, disqualifying any of them from being
+considered unit testing frameworks.
+
+Does KUnit support running on architectures other than UML?
+===========================================================
+
+Yes, well, mostly.
+
+For the most part, the KUnit core framework (what you use to write the tests)
+can compile to any architecture; it compiles like just another part of the
+kernel and runs when the kernel boots. However, there is some infrastructure,
+like the KUnit Wrapper (``tools/testing/kunit/kunit.py``) that does not support
+other architectures.
+
+In short, this means that, yes, you can run KUnit on other architectures, but
+it might require more work than using KUnit on UML.
+
+For more information, see :ref:`kunit-on-non-uml`.
+
+What is the difference between a unit test and these other kinds of tests?
+==========================================================================
+Most existing tests for the Linux kernel would be categorized as an integration
+test, or an end-to-end test.
+
+- A unit test is supposed to test a single unit of code in isolation, hence the
+ name. A unit test should be the finest granularity of testing and as such
+ should allow all possible code paths to be tested in the code under test; this
+ is only possible if the code under test is very small and does not have any
+ external dependencies outside of the test's control like hardware.
+- An integration test tests the interaction between a minimal set of components,
+ usually just two or three. For example, someone might write an integration
+ test to test the interaction between a driver and a piece of hardware, or to
+ test the interaction between the userspace libraries the kernel provides and
+ the kernel itself; however, one of these tests would probably not test the
+ entire kernel along with hardware interactions and interactions with the
+ userspace.
+- An end-to-end test usually tests the entire system from the perspective of the
+ code under test. For example, someone might write an end-to-end test for the
+ kernel by installing a production configuration of the kernel on production
+ hardware with a production userspace and then trying to exercise some behavior
+ that depends on interactions between the hardware, the kernel, and userspace.
diff --git a/Documentation/dev-tools/kunit/index.rst b/Documentation/dev-tools/kunit/index.rst
new file mode 100644
index 000000000000..26ffb46bdf99
--- /dev/null
+++ b/Documentation/dev-tools/kunit/index.rst
@@ -0,0 +1,79 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+KUnit - Unit Testing for the Linux Kernel
+=========================================
+
+.. toctree::
+ :maxdepth: 2
+
+ start
+ usage
+ api/index
+ faq
+
+What is KUnit?
+==============
+
+KUnit is a lightweight unit testing and mocking framework for the Linux kernel.
+These tests are able to be run locally on a developer's workstation without a VM
+or special hardware.
+
+KUnit is heavily inspired by JUnit, Python's unittest.mock, and
+Googletest/Googlemock for C++. KUnit provides facilities for defining unit test
+cases, grouping related test cases into test suites, providing common
+infrastructure for running tests, and much more.
+
+Get started now: :doc:`start`
+
+Why KUnit?
+==========
+
+A unit test is supposed to test a single unit of code in isolation, hence the
+name. A unit test should be the finest granularity of testing and as such should
+allow all possible code paths to be tested in the code under test; this is only
+possible if the code under test is very small and does not have any external
+dependencies outside of the test's control like hardware.
+
+Outside of KUnit, there are no testing frameworks currently
+available for the kernel that do not require installing the kernel on a test
+machine or in a VM and all require tests to be written in userspace running on
+the kernel; this is true for Autotest, and kselftest, disqualifying
+any of them from being considered unit testing frameworks.
+
+KUnit addresses the problem of being able to run tests without needing a virtual
+machine or actual hardware with User Mode Linux. User Mode Linux is a Linux
+architecture, like ARM or x86; however, unlike other architectures it compiles
+to a standalone program that can be run like any other program directly inside
+of a host operating system; to be clear, it does not require any virtualization
+support; it is just a regular program.
+
+KUnit is fast. Excluding build time, from invocation to completion KUnit can run
+several dozen tests in only 10 to 20 seconds; this might not sound like a big
+deal to some people, but having such fast and easy to run tests fundamentally
+changes the way you go about testing and even writing code in the first place.
+Linus himself said in his `git talk at Google
+<https://gist.github.com/lorn/1272686/revisions#diff-53c65572127855f1b003db4064a94573R874>`_:
+
+ "... a lot of people seem to think that performance is about doing the
+ same thing, just doing it faster, and that is not true. That is not what
+ performance is all about. If you can do something really fast, really
+ well, people will start using it differently."
+
+In this context Linus was talking about branching and merging,
+but this point also applies to testing. If your tests are slow, unreliable, are
+difficult to write, and require a special setup or special hardware to run,
+then you wait a lot longer to write tests, and you wait a lot longer to run
+tests; this means that tests are likely to break, unlikely to test a lot of
+things, and are unlikely to be rerun once they pass. If your tests are really
+fast, you run them all the time, every time you make a change, and every time
+someone sends you some code. Why trust that someone ran all their tests
+correctly on every change when you can just run them yourself in less time than
+it takes to read their test log?
+
+How do I use it?
+================
+
+* :doc:`start` - for new users of KUnit
+* :doc:`usage` - for a more detailed explanation of KUnit features
+* :doc:`api/index` - for the list of KUnit APIs used for testing
diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst
new file mode 100644
index 000000000000..aeeddfafeea2
--- /dev/null
+++ b/Documentation/dev-tools/kunit/start.rst
@@ -0,0 +1,180 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Getting Started
+===============
+
+Installing dependencies
+=======================
+KUnit has the same dependencies as the Linux kernel. As long as you can build
+the kernel, you can run KUnit.
+
+KUnit Wrapper
+=============
+Included with KUnit is a simple Python wrapper that helps format the output to
+easily use and read KUnit output. It handles building and running the kernel, as
+well as formatting the output.
+
+The wrapper can be run with:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run
+
+Creating a kunitconfig
+======================
+The Python script is a thin wrapper around Kbuild as such, it needs to be
+configured with a ``kunitconfig`` file. This file essentially contains the
+regular Kernel config, with the specific test targets as well.
+
+.. code-block:: bash
+
+ git clone -b master https://kunit.googlesource.com/kunitconfig $PATH_TO_KUNITCONFIG_REPO
+ cd $PATH_TO_LINUX_REPO
+ ln -s $PATH_TO_KUNIT_CONFIG_REPO/kunitconfig kunitconfig
+
+You may want to add kunitconfig to your local gitignore.
+
+Verifying KUnit Works
+---------------------
+
+To make sure that everything is set up correctly, simply invoke the Python
+wrapper from your kernel repo:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run
+
+.. note::
+ You may want to run ``make mrproper`` first.
+
+If everything worked correctly, you should see the following:
+
+.. code-block:: bash
+
+ Generating .config ...
+ Building KUnit Kernel ...
+ Starting KUnit Kernel ...
+
+followed by a list of tests that are run. All of them should be passing.
+
+.. note::
+ Because it is building a lot of sources for the first time, the ``Building
+ kunit kernel`` step may take a while.
+
+Writing your first test
+=======================
+
+In your kernel repo let's add some code that we can test. Create a file
+``drivers/misc/example.h`` with the contents:
+
+.. code-block:: c
+
+ int misc_example_add(int left, int right);
+
+create a file ``drivers/misc/example.c``:
+
+.. code-block:: c
+
+ #include <linux/errno.h>
+
+ #include "example.h"
+
+ int misc_example_add(int left, int right)
+ {
+ return left + right;
+ }
+
+Now add the following lines to ``drivers/misc/Kconfig``:
+
+.. code-block:: kconfig
+
+ config MISC_EXAMPLE
+ bool "My example"
+
+and the following lines to ``drivers/misc/Makefile``:
+
+.. code-block:: make
+
+ obj-$(CONFIG_MISC_EXAMPLE) += example.o
+
+Now we are ready to write the test. The test will be in
+``drivers/misc/example-test.c``:
+
+.. code-block:: c
+
+ #include <kunit/test.h>
+ #include "example.h"
+
+ /* Define the test cases. */
+
+ static void misc_example_add_test_basic(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, 1, misc_example_add(1, 0));
+ KUNIT_EXPECT_EQ(test, 2, misc_example_add(1, 1));
+ KUNIT_EXPECT_EQ(test, 0, misc_example_add(-1, 1));
+ KUNIT_EXPECT_EQ(test, INT_MAX, misc_example_add(0, INT_MAX));
+ KUNIT_EXPECT_EQ(test, -1, misc_example_add(INT_MAX, INT_MIN));
+ }
+
+ static void misc_example_test_failure(struct kunit *test)
+ {
+ KUNIT_FAIL(test, "This test never passes.");
+ }
+
+ static struct kunit_case misc_example_test_cases[] = {
+ KUNIT_CASE(misc_example_add_test_basic),
+ KUNIT_CASE(misc_example_test_failure),
+ {}
+ };
+
+ static struct kunit_suite misc_example_test_suite = {
+ .name = "misc-example",
+ .test_cases = misc_example_test_cases,
+ };
+ kunit_test_suite(misc_example_test_suite);
+
+Now add the following to ``drivers/misc/Kconfig``:
+
+.. code-block:: kconfig
+
+ config MISC_EXAMPLE_TEST
+ bool "Test for my example"
+ depends on MISC_EXAMPLE && KUNIT
+
+and the following to ``drivers/misc/Makefile``:
+
+.. code-block:: make
+
+ obj-$(CONFIG_MISC_EXAMPLE_TEST) += example-test.o
+
+Now add it to your ``kunitconfig``:
+
+.. code-block:: none
+
+ CONFIG_MISC_EXAMPLE=y
+ CONFIG_MISC_EXAMPLE_TEST=y
+
+Now you can run the test:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py
+
+You should see the following failure:
+
+.. code-block:: none
+
+ ...
+ [16:08:57] [PASSED] misc-example:misc_example_add_test_basic
+ [16:08:57] [FAILED] misc-example:misc_example_test_failure
+ [16:08:57] EXPECTATION FAILED at drivers/misc/example-test.c:17
+ [16:08:57] This test never passes.
+ ...
+
+Congrats! You just wrote your first KUnit test!
+
+Next Steps
+==========
+* Check out the :doc:`usage` page for a more
+ in-depth explanation of KUnit.
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
new file mode 100644
index 000000000000..c6e69634e274
--- /dev/null
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -0,0 +1,576 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+Using KUnit
+===========
+
+The purpose of this document is to describe what KUnit is, how it works, how it
+is intended to be used, and all the concepts and terminology that are needed to
+understand it. This guide assumes a working knowledge of the Linux kernel and
+some basic knowledge of testing.
+
+For a high level introduction to KUnit, including setting up KUnit for your
+project, see :doc:`start`.
+
+Organization of this document
+=============================
+
+This document is organized into two main sections: Testing and Isolating
+Behavior. The first covers what a unit test is and how to use KUnit to write
+them. The second covers how to use KUnit to isolate code and make it possible
+to unit test code that was otherwise un-unit-testable.
+
+Testing
+=======
+
+What is KUnit?
+--------------
+
+"K" is short for "kernel" so "KUnit" is the "(Linux) Kernel Unit Testing
+Framework." KUnit is intended first and foremost for writing unit tests; it is
+general enough that it can be used to write integration tests; however, this is
+a secondary goal. KUnit has no ambition of being the only testing framework for
+the kernel; for example, it does not intend to be an end-to-end testing
+framework.
+
+What is Unit Testing?
+---------------------
+
+A `unit test <https://martinfowler.com/bliki/UnitTest.html>`_ is a test that
+tests code at the smallest possible scope, a *unit* of code. In the C
+programming language that's a function.
+
+Unit tests should be written for all the publicly exposed functions in a
+compilation unit; so that is all the functions that are exported in either a
+*class* (defined below) or all functions which are **not** static.
+
+Writing Tests
+-------------
+
+Test Cases
+~~~~~~~~~~
+
+The fundamental unit in KUnit is the test case. A test case is a function with
+the signature ``void (*)(struct kunit *test)``. It calls a function to be tested
+and then sets *expectations* for what should happen. For example:
+
+.. code-block:: c
+
+ void example_test_success(struct kunit *test)
+ {
+ }
+
+ void example_test_failure(struct kunit *test)
+ {
+ KUNIT_FAIL(test, "This test never passes.");
+ }
+
+In the above example ``example_test_success`` always passes because it does
+nothing; no expectations are set, so all expectations pass. On the other hand
+``example_test_failure`` always fails because it calls ``KUNIT_FAIL``, which is
+a special expectation that logs a message and causes the test case to fail.
+
+Expectations
+~~~~~~~~~~~~
+An *expectation* is a way to specify that you expect a piece of code to do
+something in a test. An expectation is called like a function. A test is made
+by setting expectations about the behavior of a piece of code under test; when
+one or more of the expectations fail, the test case fails and information about
+the failure is logged. For example:
+
+.. code-block:: c
+
+ void add_test_basic(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, 1, add(1, 0));
+ KUNIT_EXPECT_EQ(test, 2, add(1, 1));
+ }
+
+In the above example ``add_test_basic`` makes a number of assertions about the
+behavior of a function called ``add``; the first parameter is always of type
+``struct kunit *``, which contains information about the current test context;
+the second parameter, in this case, is what the value is expected to be; the
+last value is what the value actually is. If ``add`` passes all of these
+expectations, the test case, ``add_test_basic`` will pass; if any one of these
+expectations fail, the test case will fail.
+
+It is important to understand that a test case *fails* when any expectation is
+violated; however, the test will continue running, potentially trying other
+expectations until the test case ends or is otherwise terminated. This is as
+opposed to *assertions* which are discussed later.
+
+To learn about more expectations supported by KUnit, see :doc:`api/test`.
+
+.. note::
+ A single test case should be pretty short, pretty easy to understand,
+ focused on a single behavior.
+
+For example, if we wanted to properly test the add function above, we would
+create additional tests cases which would each test a different property that an
+add function should have like this:
+
+.. code-block:: c
+
+ void add_test_basic(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, 1, add(1, 0));
+ KUNIT_EXPECT_EQ(test, 2, add(1, 1));
+ }
+
+ void add_test_negative(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, 0, add(-1, 1));
+ }
+
+ void add_test_max(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, INT_MAX, add(0, INT_MAX));
+ KUNIT_EXPECT_EQ(test, -1, add(INT_MAX, INT_MIN));
+ }
+
+ void add_test_overflow(struct kunit *test)
+ {
+ KUNIT_EXPECT_EQ(test, INT_MIN, add(INT_MAX, 1));
+ }
+
+Notice how it is immediately obvious what all the properties that we are testing
+for are.
+
+Assertions
+~~~~~~~~~~
+
+KUnit also has the concept of an *assertion*. An assertion is just like an
+expectation except the assertion immediately terminates the test case if it is
+not satisfied.
+
+For example:
+
+.. code-block:: c
+
+ static void mock_test_do_expect_default_return(struct kunit *test)
+ {
+ struct mock_test_context *ctx = test->priv;
+ struct mock *mock = ctx->mock;
+ int param0 = 5, param1 = -5;
+ const char *two_param_types[] = {"int", "int"};
+ const void *two_params[] = {&param0, &param1};
+ const void *ret;
+
+ ret = mock->do_expect(mock,
+ "test_printk", test_printk,
+ two_param_types, two_params,
+ ARRAY_SIZE(two_params));
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ret);
+ KUNIT_EXPECT_EQ(test, -4, *((int *) ret));
+ }
+
+In this example, the method under test should return a pointer to a value, so
+if the pointer returned by the method is null or an errno, we don't want to
+bother continuing the test since the following expectation could crash the test
+case. `ASSERT_NOT_ERR_OR_NULL(...)` allows us to bail out of the test case if
+the appropriate conditions have not been satisfied to complete the test.
+
+Test Suites
+~~~~~~~~~~~
+
+Now obviously one unit test isn't very helpful; the power comes from having
+many test cases covering all of your behaviors. Consequently it is common to
+have many *similar* tests; in order to reduce duplication in these closely
+related tests most unit testing frameworks provide the concept of a *test
+suite*, in KUnit we call it a *test suite*; all it is is just a collection of
+test cases for a unit of code with a set up function that gets invoked before
+every test cases and then a tear down function that gets invoked after every
+test case completes.
+
+Example:
+
+.. code-block:: c
+
+ static struct kunit_case example_test_cases[] = {
+ KUNIT_CASE(example_test_foo),
+ KUNIT_CASE(example_test_bar),
+ KUNIT_CASE(example_test_baz),
+ {}
+ };
+
+ static struct kunit_suite example_test_suite = {
+ .name = "example",
+ .init = example_test_init,
+ .exit = example_test_exit,
+ .test_cases = example_test_cases,
+ };
+ kunit_test_suite(example_test_suite);
+
+In the above example the test suite, ``example_test_suite``, would run the test
+cases ``example_test_foo``, ``example_test_bar``, and ``example_test_baz``,
+each would have ``example_test_init`` called immediately before it and would
+have ``example_test_exit`` called immediately after it.
+``kunit_test_suite(example_test_suite)`` registers the test suite with the
+KUnit test framework.
+
+.. note::
+ A test case will only be run if it is associated with a test suite.
+
+For a more information on these types of things see the :doc:`api/test`.
+
+Isolating Behavior
+==================
+
+The most important aspect of unit testing that other forms of testing do not
+provide is the ability to limit the amount of code under test to a single unit.
+In practice, this is only possible by being able to control what code gets run
+when the unit under test calls a function and this is usually accomplished
+through some sort of indirection where a function is exposed as part of an API
+such that the definition of that function can be changed without affecting the
+rest of the code base. In the kernel this primarily comes from two constructs,
+classes, structs that contain function pointers that are provided by the
+implementer, and architecture specific functions which have definitions selected
+at compile time.
+
+Classes
+-------
+
+Classes are not a construct that is built into the C programming language;
+however, it is an easily derived concept. Accordingly, pretty much every project
+that does not use a standardized object oriented library (like GNOME's GObject)
+has their own slightly different way of doing object oriented programming; the
+Linux kernel is no exception.
+
+The central concept in kernel object oriented programming is the class. In the
+kernel, a *class* is a struct that contains function pointers. This creates a
+contract between *implementers* and *users* since it forces them to use the
+same function signature without having to call the function directly. In order
+for it to truly be a class, the function pointers must specify that a pointer
+to the class, known as a *class handle*, be one of the parameters; this makes
+it possible for the member functions (also known as *methods*) to have access
+to member variables (more commonly known as *fields*) allowing the same
+implementation to have multiple *instances*.
+
+Typically a class can be *overridden* by *child classes* by embedding the
+*parent class* in the child class. Then when a method provided by the child
+class is called, the child implementation knows that the pointer passed to it is
+of a parent contained within the child; because of this, the child can compute
+the pointer to itself because the pointer to the parent is always a fixed offset
+from the pointer to the child; this offset is the offset of the parent contained
+in the child struct. For example:
+
+.. code-block:: c
+
+ struct shape {
+ int (*area)(struct shape *this);
+ };
+
+ struct rectangle {
+ struct shape parent;
+ int length;
+ int width;
+ };
+
+ int rectangle_area(struct shape *this)
+ {
+ struct rectangle *self = container_of(this, struct shape, parent);
+
+ return self->length * self->width;
+ };
+
+ void rectangle_new(struct rectangle *self, int length, int width)
+ {
+ self->parent.area = rectangle_area;
+ self->length = length;
+ self->width = width;
+ }
+
+In this example (as in most kernel code) the operation of computing the pointer
+to the child from the pointer to the parent is done by ``container_of``.
+
+Faking Classes
+~~~~~~~~~~~~~~
+
+In order to unit test a piece of code that calls a method in a class, the
+behavior of the method must be controllable, otherwise the test ceases to be a
+unit test and becomes an integration test.
+
+A fake just provides an implementation of a piece of code that is different than
+what runs in a production instance, but behaves identically from the standpoint
+of the callers; this is usually done to replace a dependency that is hard to
+deal with, or is slow.
+
+A good example for this might be implementing a fake EEPROM that just stores the
+"contents" in an internal buffer. For example, let's assume we have a class that
+represents an EEPROM:
+
+.. code-block:: c
+
+ struct eeprom {
+ ssize_t (*read)(struct eeprom *this, size_t offset, char *buffer, size_t count);
+ ssize_t (*write)(struct eeprom *this, size_t offset, const char *buffer, size_t count);
+ };
+
+And we want to test some code that buffers writes to the EEPROM:
+
+.. code-block:: c
+
+ struct eeprom_buffer {
+ ssize_t (*write)(struct eeprom_buffer *this, const char *buffer, size_t count);
+ int flush(struct eeprom_buffer *this);
+ size_t flush_count; /* Flushes when buffer exceeds flush_count. */
+ };
+
+ struct eeprom_buffer *new_eeprom_buffer(struct eeprom *eeprom);
+ void destroy_eeprom_buffer(struct eeprom *eeprom);
+
+We can easily test this code by *faking out* the underlying EEPROM:
+
+.. code-block:: c
+
+ struct fake_eeprom {
+ struct eeprom parent;
+ char contents[FAKE_EEPROM_CONTENTS_SIZE];
+ };
+
+ ssize_t fake_eeprom_read(struct eeprom *parent, size_t offset, char *buffer, size_t count)
+ {
+ struct fake_eeprom *this = container_of(parent, struct fake_eeprom, parent);
+
+ count = min(count, FAKE_EEPROM_CONTENTS_SIZE - offset);
+ memcpy(buffer, this->contents + offset, count);
+
+ return count;
+ }
+
+ ssize_t fake_eeprom_write(struct eeprom *this, size_t offset, const char *buffer, size_t count)
+ {
+ struct fake_eeprom *this = container_of(parent, struct fake_eeprom, parent);
+
+ count = min(count, FAKE_EEPROM_CONTENTS_SIZE - offset);
+ memcpy(this->contents + offset, buffer, count);
+
+ return count;
+ }
+
+ void fake_eeprom_init(struct fake_eeprom *this)
+ {
+ this->parent.read = fake_eeprom_read;
+ this->parent.write = fake_eeprom_write;
+ memset(this->contents, 0, FAKE_EEPROM_CONTENTS_SIZE);
+ }
+
+We can now use it to test ``struct eeprom_buffer``:
+
+.. code-block:: c
+
+ struct eeprom_buffer_test {
+ struct fake_eeprom *fake_eeprom;
+ struct eeprom_buffer *eeprom_buffer;
+ };
+
+ static void eeprom_buffer_test_does_not_write_until_flush(struct kunit *test)
+ {
+ struct eeprom_buffer_test *ctx = test->priv;
+ struct eeprom_buffer *eeprom_buffer = ctx->eeprom_buffer;
+ struct fake_eeprom *fake_eeprom = ctx->fake_eeprom;
+ char buffer[] = {0xff};
+
+ eeprom_buffer->flush_count = SIZE_MAX;
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 1);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0);
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 1);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0);
+
+ eeprom_buffer->flush(eeprom_buffer);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0xff);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0xff);
+ }
+
+ static void eeprom_buffer_test_flushes_after_flush_count_met(struct kunit *test)
+ {
+ struct eeprom_buffer_test *ctx = test->priv;
+ struct eeprom_buffer *eeprom_buffer = ctx->eeprom_buffer;
+ struct fake_eeprom *fake_eeprom = ctx->fake_eeprom;
+ char buffer[] = {0xff};
+
+ eeprom_buffer->flush_count = 2;
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 1);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0);
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 1);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0xff);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0xff);
+ }
+
+ static void eeprom_buffer_test_flushes_increments_of_flush_count(struct kunit *test)
+ {
+ struct eeprom_buffer_test *ctx = test->priv;
+ struct eeprom_buffer *eeprom_buffer = ctx->eeprom_buffer;
+ struct fake_eeprom *fake_eeprom = ctx->fake_eeprom;
+ char buffer[] = {0xff, 0xff};
+
+ eeprom_buffer->flush_count = 2;
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 1);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0);
+
+ eeprom_buffer->write(eeprom_buffer, buffer, 2);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[0], 0xff);
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[1], 0xff);
+ /* Should have only flushed the first two bytes. */
+ KUNIT_EXPECT_EQ(test, fake_eeprom->contents[2], 0);
+ }
+
+ static int eeprom_buffer_test_init(struct kunit *test)
+ {
+ struct eeprom_buffer_test *ctx;
+
+ ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+
+ ctx->fake_eeprom = kunit_kzalloc(test, sizeof(*ctx->fake_eeprom), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx->fake_eeprom);
+ fake_eeprom_init(ctx->fake_eeprom);
+
+ ctx->eeprom_buffer = new_eeprom_buffer(&ctx->fake_eeprom->parent);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx->eeprom_buffer);
+
+ test->priv = ctx;
+
+ return 0;
+ }
+
+ static void eeprom_buffer_test_exit(struct kunit *test)
+ {
+ struct eeprom_buffer_test *ctx = test->priv;
+
+ destroy_eeprom_buffer(ctx->eeprom_buffer);
+ }
+
+.. _kunit-on-non-uml:
+
+KUnit on non-UML architectures
+==============================
+
+By default KUnit uses UML as a way to provide dependencies for code under test.
+Under most circumstances KUnit's usage of UML should be treated as an
+implementation detail of how KUnit works under the hood. Nevertheless, there
+are instances where being able to run architecture specific code, or test
+against real hardware is desirable. For these reasons KUnit supports running on
+other architectures.
+
+Running existing KUnit tests on non-UML architectures
+-----------------------------------------------------
+
+There are some special considerations when running existing KUnit tests on
+non-UML architectures:
+
+* Hardware may not be deterministic, so a test that always passes or fails
+ when run under UML may not always do so on real hardware.
+* Hardware and VM environments may not be hermetic. KUnit tries its best to
+ provide a hermetic environment to run tests; however, it cannot manage state
+ that it doesn't know about outside of the kernel. Consequently, tests that
+ may be hermetic on UML may not be hermetic on other architectures.
+* Some features and tooling may not be supported outside of UML.
+* Hardware and VMs are slower than UML.
+
+None of these are reasons not to run your KUnit tests on real hardware; they are
+only things to be aware of when doing so.
+
+The biggest impediment will likely be that certain KUnit features and
+infrastructure may not support your target environment. For example, at this
+time the KUnit Wrapper (``tools/testing/kunit/kunit.py``) does not work outside
+of UML. Unfortunately, there is no way around this. Using UML (or even just a
+particular architecture) allows us to make a lot of assumptions that make it
+possible to do things which might otherwise be impossible.
+
+Nevertheless, all core KUnit framework features are fully supported on all
+architectures, and using them is straightforward: all you need to do is to take
+your kunitconfig, your Kconfig options for the tests you would like to run, and
+merge them into whatever config your are using for your platform. That's it!
+
+For example, let's say you have the following kunitconfig:
+
+.. code-block:: none
+
+ CONFIG_KUNIT=y
+ CONFIG_KUNIT_EXAMPLE_TEST=y
+
+If you wanted to run this test on an x86 VM, you might add the following config
+options to your ``.config``:
+
+.. code-block:: none
+
+ CONFIG_KUNIT=y
+ CONFIG_KUNIT_EXAMPLE_TEST=y
+ CONFIG_SERIAL_8250=y
+ CONFIG_SERIAL_8250_CONSOLE=y
+
+All these new options do is enable support for a common serial console needed
+for logging.
+
+Next, you could build a kernel with these tests as follows:
+
+
+.. code-block:: bash
+
+ make ARCH=x86 olddefconfig
+ make ARCH=x86
+
+Once you have built a kernel, you could run it on QEMU as follows:
+
+.. code-block:: bash
+
+ qemu-system-x86_64 -enable-kvm \
+ -m 1024 \
+ -kernel arch/x86_64/boot/bzImage \
+ -append 'console=ttyS0' \
+ --nographic
+
+Interspersed in the kernel logs you might see the following:
+
+.. code-block:: none
+
+ TAP version 14
+ # Subtest: example
+ 1..1
+ # example_simple_test: initializing
+ ok 1 - example_simple_test
+ ok 1 - example
+
+Congratulations, you just ran a KUnit test on the x86 architecture!
+
+Writing new tests for other architectures
+-----------------------------------------
+
+The first thing you must do is ask yourself whether it is necessary to write a
+KUnit test for a specific architecture, and then whether it is necessary to
+write that test for a particular piece of hardware. In general, writing a test
+that depends on having access to a particular piece of hardware or software (not
+included in the Linux source repo) should be avoided at all costs.
+
+Even if you only ever plan on running your KUnit test on your hardware
+configuration, other people may want to run your tests and may not have access
+to your hardware. If you write your test to run on UML, then anyone can run your
+tests without knowing anything about your particular setup, and you can still
+run your tests on your hardware setup just by compiling for your architecture.
+
+.. important::
+ Always prefer tests that run on UML to tests that only run under a particular
+ architecture, and always prefer tests that run under QEMU or another easy
+ (and monitarily free) to obtain software environment to a specific piece of
+ hardware.
+
+Nevertheless, there are still valid reasons to write an architecture or hardware
+specific test: for example, you might want to test some code that really belongs
+in ``arch/some-arch/*``. Even so, try your best to write the test so that it
+does not depend on physical hardware: if some of your test cases don't need the
+hardware, only require the hardware for tests that actually need it.
+
+Now that you have narrowed down exactly what bits are hardware specific, the
+actual procedure for writing and running the tests is pretty much the same as
+writing normal KUnit tests. One special caveat is that you have to reset
+hardware state in between test cases; if this is not possible, you may only be
+able to run one test case per invocation.
+
+.. TODO(brendanhiggins@google.com): Add an actual example of an architecture
+ dependent KUnit test.
diff --git a/Documentation/devicetree/bindings/arm/rockchip.yaml b/Documentation/devicetree/bindings/arm/rockchip.yaml
index c82c5e57d44c..9c7e70335ac0 100644
--- a/Documentation/devicetree/bindings/arm/rockchip.yaml
+++ b/Documentation/devicetree/bindings/arm/rockchip.yaml
@@ -496,12 +496,12 @@ properties:
- description: Theobroma Systems RK3368-uQ7 with Haikou baseboard
items:
- - const: tsd,rk3368-uq7-haikou
+ - const: tsd,rk3368-lion-haikou
- const: rockchip,rk3368
- description: Theobroma Systems RK3399-Q7 with Haikou baseboard
items:
- - const: tsd,rk3399-q7-haikou
+ - const: tsd,rk3399-puma-haikou
- const: rockchip,rk3399
- description: Tronsmart Orion R68 Meta
diff --git a/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt b/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt
index f4c5d34c4111..7079d44bf3ba 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt
@@ -1,8 +1,11 @@
* Advanced Interrupt Controller (AIC)
Required properties:
-- compatible: Should be "atmel,<chip>-aic"
- <chip> can be "at91rm9200", "sama5d2", "sama5d3" or "sama5d4"
+- compatible: Should be:
+ - "atmel,<chip>-aic" where <chip> can be "at91rm9200", "sama5d2",
+ "sama5d3" or "sama5d4"
+ - "microchip,<chip>-aic" where <chip> can be "sam9x60"
+
- interrupt-controller: Identifies the node as an interrupt controller.
- #interrupt-cells: The number of cells to define the interrupts. It should be 3.
The first cell is the IRQ number (aka "Peripheral IDentifier" on datasheet).
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
index 27f38eed389e..d3e423fcb6c2 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
-$id: http://devicetree.org/schemas/arm/allwinner,sun4i-a10-csi.yaml#
+$id: http://devicetree.org/schemas/media/allwinner,sun4i-a10-csi.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Allwinner A10 CMOS Sensor Interface (CSI) Device Tree Bindings
@@ -27,14 +27,12 @@ properties:
clocks:
items:
- description: The CSI interface clock
- - description: The CSI module clock
- description: The CSI ISP clock
- description: The CSI DRAM clock
clock-names:
items:
- const: bus
- - const: mod
- const: isp
- const: ram
@@ -89,9 +87,8 @@ examples:
compatible = "allwinner,sun7i-a20-csi0";
reg = <0x01c09000 0x1000>;
interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI0>,
- <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>;
- clock-names = "bus", "mod", "isp", "ram";
+ clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>;
+ clock-names = "bus", "isp", "ram";
resets = <&ccu RST_CSI0>;
port {
diff --git a/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt b/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt
new file mode 100644
index 000000000000..f3893c4d3c6a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt
@@ -0,0 +1,53 @@
+* Cadence NAND controller
+
+Required properties:
+ - compatible : "cdns,hp-nfc"
+ - reg : Contains two entries, each of which is a tuple consisting of a
+ physical address and length. The first entry is the address and
+ length of the controller register set. The second entry is the
+ address and length of the Slave DMA data port.
+ - reg-names: should contain "reg" and "sdma"
+ - #address-cells: should be 1. The cell encodes the chip select connection.
+ - #size-cells : should be 0.
+ - interrupts : The interrupt number.
+ - clocks: phandle of the controller core clock (nf_clk).
+
+Optional properties:
+ - dmas: shall reference DMA channel associated to the NAND controller
+ - cdns,board-delay-ps : Estimated Board delay. The value includes the total
+ round trip delay for the signals and is used for deciding on values
+ associated with data read capture. The example formula for SDR mode is
+ the following:
+ board delay = RE#PAD delay + PCB trace to device + PCB trace from device
+ + DQ PAD delay
+
+Child nodes represent the available NAND chips.
+
+Required properties of NAND chips:
+ - reg: shall contain the native Chip Select ids from 0 to max supported by
+ the cadence nand flash controller
+
+See Documentation/devicetree/bindings/mtd/nand.txt for more details on
+generic bindings.
+
+Example:
+
+nand_controller: nand-controller@60000000 {
+ compatible = "cdns,hp-nfc";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x60000000 0x10000>, <0x80000000 0x10000>;
+ reg-names = "reg", "sdma";
+ clocks = <&nf_clk>;
+ cdns,board-delay-ps = <4830>;
+ interrupts = <2 0>;
+ nand@0 {
+ reg = <0>;
+ label = "nand-1";
+ };
+ nand@1 {
+ reg = <1>;
+ label = "nand-2";
+ };
+
+};
diff --git a/Documentation/devicetree/bindings/mtd/intel,ixp4xx-flash.txt b/Documentation/devicetree/bindings/mtd/intel,ixp4xx-flash.txt
new file mode 100644
index 000000000000..4bdcb92ae381
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/intel,ixp4xx-flash.txt
@@ -0,0 +1,22 @@
+Flash device on Intel IXP4xx SoC
+
+This flash is regular CFI compatible (Intel or AMD extended) flash chips with
+specific big-endian or mixed-endian memory access pattern.
+
+Required properties:
+- compatible : must be "intel,ixp4xx-flash", "cfi-flash";
+- reg : memory address for the flash chip
+- bank-width : width in bytes of flash interface, should be <2>
+
+For the rest of the properties, see mtd-physmap.txt.
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Example:
+
+flash@50000000 {
+ compatible = "intel,ixp4xx-flash", "cfi-flash";
+ reg = <0x50000000 0x01000000>;
+ bank-width = <2>;
+};
diff --git a/Documentation/devicetree/bindings/perf/arm-ccn.txt b/Documentation/devicetree/bindings/perf/arm-ccn.txt
index 43b5a71a5a9d..1c53b5aa3317 100644
--- a/Documentation/devicetree/bindings/perf/arm-ccn.txt
+++ b/Documentation/devicetree/bindings/perf/arm-ccn.txt
@@ -6,6 +6,7 @@ Required properties:
"arm,ccn-502"
"arm,ccn-504"
"arm,ccn-508"
+ "arm,ccn-512"
- reg: (standard registers property) physical address and size
(16MB) of the configuration registers block
diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
index d77e3f26f9e6..7822a806ea0a 100644
--- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
+++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
@@ -5,6 +5,7 @@ Required properties:
- compatible: should be one of:
"fsl,imx8-ddr-pmu"
"fsl,imx8m-ddr-pmu"
+ "fsl,imx8mp-ddr-pmu"
- reg: physical address and size
diff --git a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
index f83d888176cc..064b7dfc4252 100644
--- a/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/aspeed,ast2600-pinctrl.yaml
@@ -33,13 +33,13 @@ patternProperties:
allOf:
- $ref: "/schemas/types.yaml#/definitions/string"
- enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15,
- ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, ESPI,
- ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0, GPIT1,
- GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1, GPIU2,
- GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, I2C1, I2C10, I2C11, I2C12,
- I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6, I2C7,
- I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, LPC,
- LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2,
+ ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMC,
+ ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWSPIWP, GPIT0,
+ GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1,
+ GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, I2C1, I2C10, I2C11,
+ I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6,
+ I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ,
+ LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2,
MACLINK3, MACLINK4, MDIO1, MDIO2, MDIO3, MDIO4, NCTS1, NCTS2,
NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, NDSR3,
NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, NRTS1,
@@ -48,47 +48,45 @@ patternProperties:
PWM8, PWM9, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3,
RMII4, RXD1, RXD2, RXD3, RXD4, SALT1, SALT10, SALT11, SALT12,
SALT13, SALT14, SALT15, SALT16, SALT2, SALT3, SALT4, SALT5,
- SALT6, SALT7, SALT8, SALT9, SD1, SD2, SD3, SD3DAT4, SD3DAT5,
- SD3DAT6, SD3DAT7, SGPM1, SGPS1, SIOONCTRL, SIOPBI, SIOPBO,
- SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, SPI1CS1,
- SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11,
- TACH12, TACH13, TACH14, TACH15, TACH2, TACH3, TACH4, TACH5,
- TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2, THRU3, TXD1,
- TXD2, TXD3, TXD4, UART10, UART11, UART12, UART13, UART6, UART7,
- UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3,
- WDTRST4, ]
+ SALT6, SALT7, SALT8, SALT9, SD1, SD2, SGPM1, SGPS1, SIOONCTRL,
+ SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1,
+ SPI1ABR, SPI1CS1, SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1,
+ TACH10, TACH11, TACH12, TACH13, TACH14, TACH15, TACH2, TACH3,
+ TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2,
+ THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12, UART13,
+ UART6, UART7, UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2,
+ WDTRST3, WDTRST4, ]
groups:
allOf:
- $ref: "/schemas/types.yaml#/definitions/string"
- enum: [ ADC0, ADC1, ADC10, ADC11, ADC12, ADC13, ADC14, ADC15,
- ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, ESPI,
- ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID, FWQSPID, FWSPIWP, GPIT0,
- GPIT1, GPIT2, GPIT3, GPIT4, GPIT5, GPIT6, GPIT7, GPIU0, GPIU1,
- GPIU2, GPIU3, GPIU4, GPIU5, GPIU6, GPIU7, HVI3C3, HVI3C4, I2C1,
- I2C10, I2C11, I2C12, I2C13, I2C14, I2C15, I2C16, I2C2, I2C3,
- I2C4, I2C5, I2C6, I2C7, I2C8, I2C9, I3C3, I3C4, I3C5, I3C6,
- JTAGM, LHPD, LHSIRQ, LPC, LPCHC, LPCPD, LPCPME, LPCSMI, LSIRQ,
- MACLINK1, MACLINK2, MACLINK3, MACLINK4, MDIO1, MDIO2, MDIO3,
- MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1, NDCD2, NDCD3, NDCD4,
- NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2, NDTR3, NDTR4, NRI1,
- NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4, OSCCLK, PEWAKE,
- PWM0, PWM1, PWM10G0, PWM10G1, PWM11G0, PWM11G1, PWM12G0, PWM12G1,
- PWM13G0, PWM13G1, PWM14G0, PWM14G1, PWM15G0, PWM15G1, PWM2, PWM3,
- PWM4, PWM5, PWM6, PWM7, PWM8G0, PWM8G1, PWM9G0, PWM9G1, QSPI1,
- QSPI2, RGMII1, RGMII2, RGMII3, RGMII4, RMII1, RMII2, RMII3,
- RMII4, RXD1, RXD2, RXD3, RXD4, SALT1, SALT10G0, SALT10G1,
- SALT11G0, SALT11G1, SALT12G0, SALT12G1, SALT13G0, SALT13G1,
- SALT14G0, SALT14G1, SALT15G0, SALT15G1, SALT16G0, SALT16G1,
- SALT2, SALT3, SALT4, SALT5, SALT6, SALT7, SALT8, SALT9G0,
- SALT9G1, SD1, SD2, SD3, SD3DAT4, SD3DAT5, SD3DAT6, SD3DAT7,
- SGPM1, SGPS1, SIOONCTRL, SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD,
- SIOS3, SIOS5, SIOSCI, SPI1, SPI1ABR, SPI1CS1, SPI1WP, SPI2,
- SPI2CS1, SPI2CS2, TACH0, TACH1, TACH10, TACH11, TACH12, TACH13,
- TACH14, TACH15, TACH2, TACH3, TACH4, TACH5, TACH6, TACH7, TACH8,
- TACH9, THRU0, THRU1, THRU2, THRU3, TXD1, TXD2, TXD3, TXD4,
- UART10, UART11, UART12G0, UART12G1, UART13G0, UART13G1, UART6,
- UART7, UART8, UART9, VB, VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3,
- WDTRST4, ]
+ ADC2, ADC3, ADC4, ADC5, ADC6, ADC7, ADC8, ADC9, BMCINT, EMMCG1,
+ EMMCG4, EMMCG8, ESPI, ESPIALT, FSI1, FSI2, FWSPIABR, FWSPID,
+ FWQSPID, FWSPIWP, GPIT0, GPIT1, GPIT2, GPIT3, GPIT4, GPIT5,
+ GPIT6, GPIT7, GPIU0, GPIU1, GPIU2, GPIU3, GPIU4, GPIU5, GPIU6,
+ GPIU7, HVI3C3, HVI3C4, I2C1, I2C10, I2C11, I2C12, I2C13, I2C14,
+ I2C15, I2C16, I2C2, I2C3, I2C4, I2C5, I2C6, I2C7, I2C8, I2C9,
+ I3C3, I3C4, I3C5, I3C6, JTAGM, LHPD, LHSIRQ, LPC, LPCHC, LPCPD,
+ LPCPME, LPCSMI, LSIRQ, MACLINK1, MACLINK2, MACLINK3, MACLINK4,
+ MDIO1, MDIO2, MDIO3, MDIO4, NCTS1, NCTS2, NCTS3, NCTS4, NDCD1,
+ NDCD2, NDCD3, NDCD4, NDSR1, NDSR2, NDSR3, NDSR4, NDTR1, NDTR2,
+ NDTR3, NDTR4, NRI1, NRI2, NRI3, NRI4, NRTS1, NRTS2, NRTS3, NRTS4,
+ OSCCLK, PEWAKE, PWM0, PWM1, PWM10G0, PWM10G1, PWM11G0, PWM11G1,
+ PWM12G0, PWM12G1, PWM13G0, PWM13G1, PWM14G0, PWM14G1, PWM15G0,
+ PWM15G1, PWM2, PWM3, PWM4, PWM5, PWM6, PWM7, PWM8G0, PWM8G1,
+ PWM9G0, PWM9G1, QSPI1, QSPI2, RGMII1, RGMII2, RGMII3, RGMII4,
+ RMII1, RMII2, RMII3, RMII4, RXD1, RXD2, RXD3, RXD4, SALT1,
+ SALT10G0, SALT10G1, SALT11G0, SALT11G1, SALT12G0, SALT12G1,
+ SALT13G0, SALT13G1, SALT14G0, SALT14G1, SALT15G0, SALT15G1,
+ SALT16G0, SALT16G1, SALT2, SALT3, SALT4, SALT5, SALT6, SALT7,
+ SALT8, SALT9G0, SALT9G1, SD1, SD2, SD3, SGPM1, SGPS1, SIOONCTRL,
+ SIOPBI, SIOPBO, SIOPWREQ, SIOPWRGD, SIOS3, SIOS5, SIOSCI, SPI1,
+ SPI1ABR, SPI1CS1, SPI1WP, SPI2, SPI2CS1, SPI2CS2, TACH0, TACH1,
+ TACH10, TACH11, TACH12, TACH13, TACH14, TACH15, TACH2, TACH3,
+ TACH4, TACH5, TACH6, TACH7, TACH8, TACH9, THRU0, THRU1, THRU2,
+ THRU3, TXD1, TXD2, TXD3, TXD4, UART10, UART11, UART12G0,
+ UART12G1, UART13G0, UART13G1, UART6, UART7, UART8, UART9, VB,
+ VGAHS, VGAVS, WDTRST1, WDTRST2, WDTRST3, WDTRST4, ]
required:
- compatible
diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
index a78150c47aa2..f32416968197 100644
--- a/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.yaml
@@ -30,8 +30,8 @@ if:
properties:
compatible:
enum:
- - const: regulator-fixed
- - const: regulator-fixed-clock
+ - regulator-fixed
+ - regulator-fixed-clock
regulator-name: true
diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index b261a3015f84..04819ad379c2 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -24,15 +24,17 @@ description: |
properties:
compatible:
- items:
- - enum:
- - sifive,rocket0
- - sifive,e5
- - sifive,e51
- - sifive,u54-mc
- - sifive,u54
- - sifive,u5
- - const: riscv
+ oneOf:
+ - items:
+ - enum:
+ - sifive,rocket0
+ - sifive,e5
+ - sifive,e51
+ - sifive,u54-mc
+ - sifive,u54
+ - sifive,u5
+ - const: riscv
+ - const: riscv # Simulator only
description:
Identifies that the hart uses the RISC-V instruction set
and identifies the type of the hart.
@@ -66,12 +68,8 @@ properties:
insensitive, letters in the riscv,isa string must be all
lowercase to simplify parsing.
- timebase-frequency:
- type: integer
- minimum: 1
- description:
- Specifies the clock frequency of the system timer in Hz.
- This value is common to all harts on a single system image.
+ # RISC-V requires 'timebase-frequency' in /cpus, so disallow it here
+ timebase-frequency: false
interrupt-controller:
type: object
@@ -93,7 +91,6 @@ properties:
required:
- riscv,isa
- - timebase-frequency
- interrupt-controller
examples:
diff --git a/Documentation/devicetree/bindings/security/tpm/google,cr50.txt b/Documentation/devicetree/bindings/security/tpm/google,cr50.txt
new file mode 100644
index 000000000000..cd69c2efdd37
--- /dev/null
+++ b/Documentation/devicetree/bindings/security/tpm/google,cr50.txt
@@ -0,0 +1,19 @@
+* H1 Secure Microcontroller with Cr50 Firmware on SPI Bus.
+
+H1 Secure Microcontroller running Cr50 firmware provides several
+functions, including TPM-like functionality. It communicates over
+SPI using the FIFO protocol described in the PTP Spec, section 6.
+
+Required properties:
+- compatible: Should be "google,cr50".
+- spi-max-frequency: Maximum SPI frequency.
+
+Example:
+
+&spi0 {
+ tpm@0 {
+ compatible = "google,cr50";
+ reg = <0>;
+ spi-max-frequency = <800000>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
index dd63151dc8b6..b143d9a21b2d 100644
--- a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
+++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
@@ -26,6 +26,8 @@ Required properties:
- "renesas,hscif-r8a77470" for R8A77470 (RZ/G1C) HSCIF compatible UART.
- "renesas,scif-r8a774a1" for R8A774A1 (RZ/G2M) SCIF compatible UART.
- "renesas,hscif-r8a774a1" for R8A774A1 (RZ/G2M) HSCIF compatible UART.
+ - "renesas,scif-r8a774b1" for R8A774B1 (RZ/G2N) SCIF compatible UART.
+ - "renesas,hscif-r8a774b1" for R8A774B1 (RZ/G2N) HSCIF compatible UART.
- "renesas,scif-r8a774c0" for R8A774C0 (RZ/G2E) SCIF compatible UART.
- "renesas,hscif-r8a774c0" for R8A774C0 (RZ/G2E) HSCIF compatible UART.
- "renesas,scif-r8a7778" for R8A7778 (R-Car M1) SCIF compatible UART.
diff --git a/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt b/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt
index b9f04e617eb7..6ffb09be7a76 100644
--- a/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/amlogic,dwc3.txt
@@ -85,8 +85,8 @@ A child node must exist to represent the core DWC2 IP block. The name of
the node is not important. The content of the node is defined in dwc2.txt.
PHY documentation is provided in the following places:
-- Documentation/devicetree/bindings/phy/meson-g12a-usb2-phy.txt
-- Documentation/devicetree/bindings/phy/meson-g12a-usb3-pcie-phy.txt
+- Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml
+- Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml
Example device nodes:
usb: usb@ffe09000 {
diff --git a/Documentation/devicetree/bindings/usb/generic-ehci.yaml b/Documentation/devicetree/bindings/usb/generic-ehci.yaml
index 059f6ef1ad4a..1ca64c85191a 100644
--- a/Documentation/devicetree/bindings/usb/generic-ehci.yaml
+++ b/Documentation/devicetree/bindings/usb/generic-ehci.yaml
@@ -63,7 +63,11 @@ properties:
description:
Set this flag to force EHCI reset after resume.
- phys: true
+ phys:
+ description: PHY specifier for the USB PHY
+
+ phy-names:
+ const: usb
required:
- compatible
@@ -89,6 +93,7 @@ examples:
interrupts = <39>;
clocks = <&ahb_gates 1>;
phys = <&usbphy 1>;
+ phy-names = "usb";
};
...
diff --git a/Documentation/devicetree/bindings/usb/generic-ohci.yaml b/Documentation/devicetree/bindings/usb/generic-ohci.yaml
index da5a14becbe5..bcffec1f1341 100644
--- a/Documentation/devicetree/bindings/usb/generic-ohci.yaml
+++ b/Documentation/devicetree/bindings/usb/generic-ohci.yaml
@@ -67,7 +67,11 @@ properties:
description:
Overrides the detected port count
- phys: true
+ phys:
+ description: PHY specifier for the USB PHY
+
+ phy-names:
+ const: usb
required:
- compatible
@@ -84,6 +88,7 @@ examples:
interrupts = <64>;
clocks = <&usb_clk 6>, <&ahb_gates 2>;
phys = <&usbphy 1>;
+ phy-names = "usb";
};
...
diff --git a/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt b/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt
index f3e4acecabe8..42d8814f903a 100644
--- a/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.txt
@@ -33,7 +33,7 @@ Required properties:
"dma_ck": dma_bus clock for data transfer by DMA,
"xhci_ck": controller clock
- - phys : see usb-hcd.txt in the current directory
+ - phys : see usb-hcd.yaml in the current directory
Optional properties:
- wakeup-source : enable USB remote wakeup;
@@ -53,7 +53,7 @@ Optional properties:
See: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
- imod-interval-ns: default interrupt moderation interval is 5000ns
-additionally the properties from usb-hcd.txt (in the current directory) are
+additionally the properties from usb-hcd.yaml (in the current directory) are
supported.
Example:
diff --git a/Documentation/devicetree/bindings/usb/mediatek,mtu3.txt b/Documentation/devicetree/bindings/usb/mediatek,mtu3.txt
index b9af7f5ee91d..e0ae6096f7ac 100644
--- a/Documentation/devicetree/bindings/usb/mediatek,mtu3.txt
+++ b/Documentation/devicetree/bindings/usb/mediatek,mtu3.txt
@@ -17,7 +17,7 @@ Required properties:
- clock-names : must contain "sys_ck" for clock of controller,
the following clocks are optional:
"ref_ck", "mcu_ck" and "dma_ck";
- - phys : see usb-hcd.txt in the current directory
+ - phys : see usb-hcd.yaml in the current directory
- dr_mode : should be one of "host", "peripheral" or "otg",
refer to usb/generic.txt
@@ -60,7 +60,7 @@ Optional properties:
- mediatek,u3p-dis-msk : mask to disable u3ports, bit0 for u3port0,
bit1 for u3port1, ... etc;
-additionally the properties from usb-hcd.txt (in the current directory) are
+additionally the properties from usb-hcd.yaml (in the current directory) are
supported.
Sub-nodes:
diff --git a/Documentation/devicetree/bindings/usb/usb-hcd.yaml b/Documentation/devicetree/bindings/usb/usb-hcd.yaml
index 9c8c56d3a792..7263b7f2b510 100644
--- a/Documentation/devicetree/bindings/usb/usb-hcd.yaml
+++ b/Documentation/devicetree/bindings/usb/usb-hcd.yaml
@@ -18,8 +18,13 @@ properties:
description:
List of all the USB PHYs on this HCD
+ phy-names:
+ description:
+ Name specifier for the USB PHY
+
examples:
- |
usb {
phys = <&usb2_phy1>, <&usb3_phy1>;
+ phy-names = "usb";
};
diff --git a/Documentation/devicetree/bindings/usb/usb-uhci.txt b/Documentation/devicetree/bindings/usb/usb-uhci.txt
index cc2e6f7d602e..d1702eb2c8bd 100644
--- a/Documentation/devicetree/bindings/usb/usb-uhci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-uhci.txt
@@ -6,7 +6,7 @@ Required properties:
- reg : Should contain 1 register ranges(address and length)
- interrupts : UHCI controller interrupt
-additionally the properties from usb-hcd.txt (in the current directory) are
+additionally the properties from usb-hcd.yaml (in the current directory) are
supported.
Example:
diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt
index 97400e8f8605..b49b819571f9 100644
--- a/Documentation/devicetree/bindings/usb/usb-xhci.txt
+++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
@@ -41,9 +41,9 @@ Optional properties:
- usb3-lpm-capable: determines if platform is USB3 LPM capable
- quirk-broken-port-ped: set if the controller has broken port disable mechanism
- imod-interval-ns: default interrupt moderation interval is 5000ns
- - phys : see usb-hcd.txt in the current directory
+ - phys : see usb-hcd.yaml in the current directory
-additionally the properties from usb-hcd.txt (in the current directory) are
+additionally the properties from usb-hcd.yaml (in the current directory) are
supported.
diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst
index 70e180e6b93d..207f0d24de69 100644
--- a/Documentation/driver-api/libata.rst
+++ b/Documentation/driver-api/libata.rst
@@ -250,23 +250,23 @@ High-level taskfile hooks
::
- void (*qc_prep) (struct ata_queued_cmd *qc);
+ enum ata_completion_errors (*qc_prep) (struct ata_queued_cmd *qc);
int (*qc_issue) (struct ata_queued_cmd *qc);
-Higher-level hooks, these two hooks can potentially supercede several of
+Higher-level hooks, these two hooks can potentially supersede several of
the above taskfile/DMA engine hooks. ``->qc_prep`` is called after the
buffers have been DMA-mapped, and is typically used to populate the
-hardware's DMA scatter-gather table. Most drivers use the standard
-:c:func:`ata_qc_prep` helper function, but more advanced drivers roll their
-own.
+hardware's DMA scatter-gather table. Some drivers use the standard
+:c:func:`ata_bmdma_qc_prep` and :c:func:`ata_bmdma_dumb_qc_prep` helper
+functions, but more advanced drivers roll their own.
``->qc_issue`` is used to make a command active, once the hardware and S/G
tables have been prepared. IDE BMDMA drivers use the helper function
-:c:func:`ata_qc_issue_prot` for taskfile protocol-based dispatch. More
+:c:func:`ata_sff_qc_issue` for taskfile protocol-based dispatch. More
advanced drivers implement their own ``->qc_issue``.
-:c:func:`ata_qc_issue_prot` calls ``->tf_load()``, ``->bmdma_setup()``, and
+:c:func:`ata_sff_qc_issue` calls ``->sff_tf_load()``, ``->bmdma_setup()``, and
``->bmdma_start()`` as necessary to initiate a transfer.
Exception and probe handling (EH)
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index 8a0700af9596..471a511c7508 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -256,13 +256,8 @@ alternative master keys or to support rotating master keys. Instead,
the master keys may be wrapped in userspace, e.g. as is done by the
`fscrypt <https://github.com/google/fscrypt>`_ tool.
-Including the inode number in the IVs was considered. However, it was
-rejected as it would have prevented ext4 filesystems from being
-resized, and by itself still wouldn't have been sufficient to prevent
-the same key from being directly reused for both XTS and CTS-CBC.
-
-DIRECT_KEY and per-mode keys
-----------------------------
+DIRECT_KEY policies
+-------------------
The Adiantum encryption mode (see `Encryption modes and usage`_) is
suitable for both contents and filenames encryption, and it accepts
@@ -285,6 +280,21 @@ IV. Moreover:
key derived using the KDF. Users may use the same master key for
other v2 encryption policies.
+IV_INO_LBLK_64 policies
+-----------------------
+
+When FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 is set in the fscrypt policy,
+the encryption keys are derived from the master key, encryption mode
+number, and filesystem UUID. This normally results in all files
+protected by the same master key sharing a single contents encryption
+key and a single filenames encryption key. To still encrypt different
+files' data differently, inode numbers are included in the IVs.
+Consequently, shrinking the filesystem may not be allowed.
+
+This format is optimized for use with inline encryption hardware
+compliant with the UFS or eMMC standards, which support only 64 IV
+bits per I/O request and may have only a small number of keyslots.
+
Key identifiers
---------------
@@ -308,8 +318,9 @@ If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
AES-128-CBC was added only for low-powered embedded devices with
crypto accelerators such as CAAM or CESA that do not support XTS. To
-use AES-128-CBC, CONFIG_CRYPTO_SHA256 (or another SHA-256
-implementation) must be enabled so that ESSIV can be used.
+use AES-128-CBC, CONFIG_CRYPTO_ESSIV and CONFIG_CRYPTO_SHA256 (or
+another SHA-256 implementation) must be enabled so that ESSIV can be
+used.
Adiantum is a (primarily) stream cipher-based mode that is fast even
on CPUs without dedicated crypto instructions. It's also a true
@@ -341,10 +352,16 @@ a little endian number, except that:
is encrypted with AES-256 where the AES-256 key is the SHA-256 hash
of the file's data encryption key.
-- In the "direct key" configuration (FSCRYPT_POLICY_FLAG_DIRECT_KEY
- set in the fscrypt_policy), the file's nonce is also appended to the
- IV. Currently this is only allowed with the Adiantum encryption
- mode.
+- With `DIRECT_KEY policies`_, the file's nonce is appended to the IV.
+ Currently this is only allowed with the Adiantum encryption mode.
+
+- With `IV_INO_LBLK_64 policies`_, the logical block number is limited
+ to 32 bits and is placed in bits 0-31 of the IV. The inode number
+ (which is also limited to 32 bits) is placed in bits 32-63.
+
+Note that because file logical block numbers are included in the IVs,
+filesystems must enforce that blocks are never shifted around within
+encrypted files, e.g. via "collapse range" or "insert range".
Filenames encryption
--------------------
@@ -354,10 +371,10 @@ the requirements to retain support for efficient directory lookups and
filenames of up to 255 bytes, the same IV is used for every filename
in a directory.
-However, each encrypted directory still uses a unique key; or
-alternatively (for the "direct key" configuration) has the file's
-nonce included in the IVs. Thus, IV reuse is limited to within a
-single directory.
+However, each encrypted directory still uses a unique key, or
+alternatively has the file's nonce (for `DIRECT_KEY policies`_) or
+inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs.
+Thus, IV reuse is limited to within a single directory.
With CTS-CBC, the IV reuse means that when the plaintext filenames
share a common prefix at least as long as the cipher block size (16
@@ -431,12 +448,15 @@ This structure must be initialized as follows:
(1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS
(4) for ``filenames_encryption_mode``.
-- ``flags`` must contain a value from ``<linux/fscrypt.h>`` which
- identifies the amount of NUL-padding to use when encrypting
- filenames. If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32 (0x3).
- Additionally, if the encryption modes are both
- FSCRYPT_MODE_ADIANTUM, this can contain
- FSCRYPT_POLICY_FLAG_DIRECT_KEY; see `DIRECT_KEY and per-mode keys`_.
+- ``flags`` contains optional flags from ``<linux/fscrypt.h>``:
+
+ - FSCRYPT_POLICY_FLAGS_PAD_*: The amount of NUL padding to use when
+ encrypting filenames. If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32
+ (0x3).
+ - FSCRYPT_POLICY_FLAG_DIRECT_KEY: See `DIRECT_KEY policies`_.
+ - FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64: See `IV_INO_LBLK_64
+ policies`_. This is mutually exclusive with DIRECT_KEY and is not
+ supported on v1 policies.
- For v2 encryption policies, ``__reserved`` must be zeroed.
@@ -1089,7 +1109,7 @@ policy structs (see `Setting an encryption policy`_), except that the
context structs also contain a nonce. The nonce is randomly generated
by the kernel and is used as KDF input or as a tweak to cause
different files to be encrypted differently; see `Per-file keys`_ and
-`DIRECT_KEY and per-mode keys`_.
+`DIRECT_KEY policies`_.
Data path changes
-----------------
diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst
index 42a0b6dd9e0b..a95536b6443c 100644
--- a/Documentation/filesystems/fsverity.rst
+++ b/Documentation/filesystems/fsverity.rst
@@ -226,6 +226,14 @@ To do so, check for FS_VERITY_FL (0x00100000) in the returned flags.
The verity flag is not settable via FS_IOC_SETFLAGS. You must use
FS_IOC_ENABLE_VERITY instead, since parameters must be provided.
+statx
+-----
+
+Since Linux v5.5, the statx() system call sets STATX_ATTR_VERITY if
+the file has fs-verity enabled. This can perform better than
+FS_IOC_GETFLAGS and FS_IOC_MEASURE_VERITY because it doesn't require
+opening the file, and opening verity files can be expensive.
+
Accessing verity files
======================
@@ -398,7 +406,7 @@ pages have been read into the pagecache. (See `Verifying data`_.)
ext4
----
-ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2.
+ext4 supports fs-verity since Linux v5.4 and e2fsprogs v1.45.2.
To create verity files on an ext4 filesystem, the filesystem must have
been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on
@@ -434,7 +442,7 @@ also only supports extent-based files.
f2fs
----
-f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0.
+f2fs supports fs-verity since Linux v5.4 and f2fs-tools v1.11.0.
To create verity files on an f2fs filesystem, the filesystem must have
been formatted with ``-O verity``.
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 8147c3f218bf..230ad59b462b 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -7,6 +7,7 @@ Linux Hardware Monitoring
hwmon-kernel-api
pmbus-core
+ inspur-ipsps1
submitting-patches
sysfs-interface
userspace-tools
diff --git a/Documentation/hwmon/inspur-ipsps1.rst b/Documentation/hwmon/inspur-ipsps1.rst
index 2b871ae3448f..292c0c26bdd1 100644
--- a/Documentation/hwmon/inspur-ipsps1.rst
+++ b/Documentation/hwmon/inspur-ipsps1.rst
@@ -1,5 +1,5 @@
Kernel driver inspur-ipsps1
-=======================
+===========================
Supported chips:
diff --git a/Documentation/hwmon/k10temp.rst b/Documentation/hwmon/k10temp.rst
index 12a86ba17de9..4451d59b9425 100644
--- a/Documentation/hwmon/k10temp.rst
+++ b/Documentation/hwmon/k10temp.rst
@@ -21,10 +21,17 @@ Supported chips:
* AMD Family 14h processors: "Brazos" (C/E/G/Z-Series)
-* AMD Family 15h processors: "Bulldozer" (FX-Series), "Trinity", "Kaveri", "Carrizo"
+* AMD Family 15h processors: "Bulldozer" (FX-Series), "Trinity", "Kaveri",
+ "Carrizo", "Stoney Ridge", "Bristol Ridge"
* AMD Family 16h processors: "Kabini", "Mullins"
+* AMD Family 17h processors: "Zen", "Zen 2"
+
+* AMD Family 18h processors: "Hygon Dhyana"
+
+* AMD Family 19h processors: "Zen 3"
+
Prefix: 'k10temp'
Addresses scanned: PCI space
@@ -110,3 +117,12 @@ The maximum value for Tctl is available in the file temp1_max.
If the BIOS has enabled hardware temperature control, the threshold at
which the processor will throttle itself to avoid damage is available in
temp1_crit and temp1_crit_hyst.
+
+On some AMD CPUs, there is a difference between the die temperature (Tdie) and
+the reported temperature (Tctl). Tdie is the real measured temperature, and
+Tctl is used for fan control. While Tctl is always available as temp1_input,
+the driver exports Tdie temperature as temp2_input for those CPUs which support
+it.
+
+Models from 17h family report relative temperature, the driver aims to
+compensate and report the real temperature.
diff --git a/Documentation/ioctl/ioctl-number.rst b/Documentation/ioctl/ioctl-number.rst
index bef79cd4c6b4..4ef86433bd67 100644
--- a/Documentation/ioctl/ioctl-number.rst
+++ b/Documentation/ioctl/ioctl-number.rst
@@ -233,6 +233,7 @@ Code Seq# Include File Comments
'f' 00-0F fs/ext4/ext4.h conflict!
'f' 00-0F linux/fs.h conflict!
'f' 00-0F fs/ocfs2/ocfs2_fs.h conflict!
+'f' 13-27 linux/fscrypt.h
'f' 81-8F linux/fsverity.h
'g' 00-0F linux/usb/gadgetfs.h
'g' 20-2F linux/usb/g_printer.h
diff --git a/Documentation/networking/device_drivers/intel/e100.rst b/Documentation/networking/device_drivers/intel/e100.rst
index 2b9f4887beda..caf023cc88de 100644
--- a/Documentation/networking/device_drivers/intel/e100.rst
+++ b/Documentation/networking/device_drivers/intel/e100.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-==============================================================
-Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters
-==============================================================
+=============================================================
+Linux Base Driver for the Intel(R) PRO/100 Family of Adapters
+=============================================================
June 1, 2018
@@ -21,7 +21,7 @@ Contents
In This Release
===============
-This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of
+This file describes the Linux Base Driver for the Intel(R) PRO/100 Family of
Adapters. This driver includes support for Itanium(R)2-based systems.
For questions related to hardware requirements, refer to the documentation
@@ -138,9 +138,9 @@ version 1.6 or later is required for this functionality.
The latest release of ethtool can be found from
https://www.kernel.org/pub/software/network/ethtool/
-Enabling Wake on LAN* (WoL)
----------------------------
-WoL is provided through the ethtool* utility. For instructions on
+Enabling Wake on LAN (WoL)
+--------------------------
+WoL is provided through the ethtool utility. For instructions on
enabling WoL with ethtool, refer to the ethtool man page. WoL will be
enabled on the system during the next shut down or reboot. For this
driver version, in order to enable WoL, the e100 driver must be loaded
diff --git a/Documentation/networking/device_drivers/intel/e1000.rst b/Documentation/networking/device_drivers/intel/e1000.rst
index 956560b6e745..4aaae0f7d6ba 100644
--- a/Documentation/networking/device_drivers/intel/e1000.rst
+++ b/Documentation/networking/device_drivers/intel/e1000.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-===========================================================
-Linux* Base Driver for Intel(R) Ethernet Network Connection
-===========================================================
+==========================================================
+Linux Base Driver for Intel(R) Ethernet Network Connection
+==========================================================
Intel Gigabit Linux driver.
Copyright(c) 1999 - 2013 Intel Corporation.
@@ -438,10 +438,10 @@ ethtool
The latest release of ethtool can be found from
https://www.kernel.org/pub/software/network/ethtool/
-Enabling Wake on LAN* (WoL)
----------------------------
+Enabling Wake on LAN (WoL)
+--------------------------
- WoL is configured through the ethtool* utility.
+ WoL is configured through the ethtool utility.
WoL will be enabled on the system during the next shut down or reboot.
For this driver version, in order to enable WoL, the e1000 driver must be
diff --git a/Documentation/networking/device_drivers/intel/e1000e.rst b/Documentation/networking/device_drivers/intel/e1000e.rst
index 01999f05509c..f49cd370e7bf 100644
--- a/Documentation/networking/device_drivers/intel/e1000e.rst
+++ b/Documentation/networking/device_drivers/intel/e1000e.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-======================================================
-Linux* Driver for Intel(R) Ethernet Network Connection
-======================================================
+=====================================================
+Linux Driver for Intel(R) Ethernet Network Connection
+=====================================================
Intel Gigabit Linux driver.
Copyright(c) 2008-2018 Intel Corporation.
@@ -338,7 +338,7 @@ and higher cannot be forced. Use the autonegotiation advertising setting to
manually set devices for 1 Gbps and higher.
Speed, duplex, and autonegotiation advertising are configured through the
-ethtool* utility.
+ethtool utility.
Caution: Only experienced network administrators should force speed and duplex
or change autonegotiation advertising manually. The settings at the switch must
@@ -351,9 +351,9 @@ will not attempt to auto-negotiate with its link partner since those adapters
operate only in full duplex and only at their native speed.
-Enabling Wake on LAN* (WoL)
----------------------------
-WoL is configured through the ethtool* utility.
+Enabling Wake on LAN (WoL)
+--------------------------
+WoL is configured through the ethtool utility.
WoL will be enabled on the system during the next shut down or reboot. For
this driver version, in order to enable WoL, the e1000e driver must be loaded
diff --git a/Documentation/networking/device_drivers/intel/fm10k.rst b/Documentation/networking/device_drivers/intel/fm10k.rst
index ac3269e34f55..4d279e64e221 100644
--- a/Documentation/networking/device_drivers/intel/fm10k.rst
+++ b/Documentation/networking/device_drivers/intel/fm10k.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-==============================================================
-Linux* Base Driver for Intel(R) Ethernet Multi-host Controller
-==============================================================
+=============================================================
+Linux Base Driver for Intel(R) Ethernet Multi-host Controller
+=============================================================
August 20, 2018
Copyright(c) 2015-2018 Intel Corporation.
@@ -120,8 +120,8 @@ rx-flow-hash tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 m|v|t|s|d|f|n|r
Known Issues/Troubleshooting
============================
-Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS under Linux KVM
----------------------------------------------------------------------------------------
+Enabling SR-IOV in a 64-bit Microsoft Windows Server 2012/R2 guest OS under Linux KVM
+-------------------------------------------------------------------------------------
KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This
includes traditional PCIe devices, as well as SR-IOV-capable devices based on
the Intel Ethernet Controller XL710.
diff --git a/Documentation/networking/device_drivers/intel/i40e.rst b/Documentation/networking/device_drivers/intel/i40e.rst
index 848fd388fa6e..8a9b18573688 100644
--- a/Documentation/networking/device_drivers/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/intel/i40e.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-==================================================================
-Linux* Base Driver for the Intel(R) Ethernet Controller 700 Series
-==================================================================
+=================================================================
+Linux Base Driver for the Intel(R) Ethernet Controller 700 Series
+=================================================================
Intel 40 Gigabit Linux driver.
Copyright(c) 1999-2018 Intel Corporation.
@@ -384,7 +384,7 @@ NOTE: You cannot set the speed for devices based on the Intel(R) Ethernet
Network Adapter XXV710 based devices.
Speed, duplex, and autonegotiation advertising are configured through the
-ethtool* utility.
+ethtool utility.
Caution: Only experienced network administrators should force speed and duplex
or change autonegotiation advertising manually. The settings at the switch must
diff --git a/Documentation/networking/device_drivers/intel/iavf.rst b/Documentation/networking/device_drivers/intel/iavf.rst
index cfc08842e32c..84ac7e75f363 100644
--- a/Documentation/networking/device_drivers/intel/iavf.rst
+++ b/Documentation/networking/device_drivers/intel/iavf.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-==================================================================
-Linux* Base Driver for Intel(R) Ethernet Adaptive Virtual Function
-==================================================================
+=================================================================
+Linux Base Driver for Intel(R) Ethernet Adaptive Virtual Function
+=================================================================
Intel Ethernet Adaptive Virtual Function Linux driver.
Copyright(c) 2013-2018 Intel Corporation.
@@ -19,7 +19,7 @@ Contents
Overview
========
-This file describes the iavf Linux* Base Driver. This driver was formerly
+This file describes the iavf Linux Base Driver. This driver was formerly
called i40evf.
The iavf driver supports the below mentioned virtual function devices and
diff --git a/Documentation/networking/device_drivers/intel/ice.rst b/Documentation/networking/device_drivers/intel/ice.rst
index c220aa2711c6..ee43ea57d443 100644
--- a/Documentation/networking/device_drivers/intel/ice.rst
+++ b/Documentation/networking/device_drivers/intel/ice.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-===================================================================
-Linux* Base Driver for the Intel(R) Ethernet Connection E800 Series
-===================================================================
+==================================================================
+Linux Base Driver for the Intel(R) Ethernet Connection E800 Series
+==================================================================
Intel ice Linux driver.
Copyright(c) 2018 Intel Corporation.
diff --git a/Documentation/networking/device_drivers/intel/igb.rst b/Documentation/networking/device_drivers/intel/igb.rst
index fc8cfaa5dcfa..87e560fe5eaa 100644
--- a/Documentation/networking/device_drivers/intel/igb.rst
+++ b/Documentation/networking/device_drivers/intel/igb.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-===========================================================
-Linux* Base Driver for Intel(R) Ethernet Network Connection
-===========================================================
+==========================================================
+Linux Base Driver for Intel(R) Ethernet Network Connection
+==========================================================
Intel Gigabit Linux driver.
Copyright(c) 1999-2018 Intel Corporation.
@@ -129,9 +129,9 @@ version is required for this functionality. Download it at:
https://www.kernel.org/pub/software/network/ethtool/
-Enabling Wake on LAN* (WoL)
----------------------------
-WoL is configured through the ethtool* utility.
+Enabling Wake on LAN (WoL)
+--------------------------
+WoL is configured through the ethtool utility.
WoL will be enabled on the system during the next shut down or reboot. For
this driver version, in order to enable WoL, the igb driver must be loaded
diff --git a/Documentation/networking/device_drivers/intel/igbvf.rst b/Documentation/networking/device_drivers/intel/igbvf.rst
index 9cddabe8108e..557fc020ef31 100644
--- a/Documentation/networking/device_drivers/intel/igbvf.rst
+++ b/Documentation/networking/device_drivers/intel/igbvf.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-============================================================
-Linux* Base Virtual Function Driver for Intel(R) 1G Ethernet
-============================================================
+===========================================================
+Linux Base Virtual Function Driver for Intel(R) 1G Ethernet
+===========================================================
Intel Gigabit Virtual Function Linux driver.
Copyright(c) 1999-2018 Intel Corporation.
diff --git a/Documentation/networking/device_drivers/intel/ixgbe.rst b/Documentation/networking/device_drivers/intel/ixgbe.rst
index c7d25483fedb..f1d5233e5e51 100644
--- a/Documentation/networking/device_drivers/intel/ixgbe.rst
+++ b/Documentation/networking/device_drivers/intel/ixgbe.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-=============================================================================
-Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters
-=============================================================================
+===========================================================================
+Linux Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Adapters
+===========================================================================
Intel 10 Gigabit Linux driver.
Copyright(c) 1999-2018 Intel Corporation.
@@ -519,8 +519,8 @@ The offload is also supported for ixgbe's VFs, but the VF must be set as
Known Issues/Troubleshooting
============================
-Enabling SR-IOV in a 64-bit Microsoft* Windows Server* 2012/R2 guest OS
------------------------------------------------------------------------
+Enabling SR-IOV in a 64-bit Microsoft Windows Server 2012/R2 guest OS
+---------------------------------------------------------------------
Linux KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM.
This includes traditional PCIe devices, as well as SR-IOV-capable devices based
on the Intel Ethernet Controller XL710.
diff --git a/Documentation/networking/device_drivers/intel/ixgbevf.rst b/Documentation/networking/device_drivers/intel/ixgbevf.rst
index 5d4977360157..76bbde736f21 100644
--- a/Documentation/networking/device_drivers/intel/ixgbevf.rst
+++ b/Documentation/networking/device_drivers/intel/ixgbevf.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-=============================================================
-Linux* Base Virtual Function Driver for Intel(R) 10G Ethernet
-=============================================================
+============================================================
+Linux Base Virtual Function Driver for Intel(R) 10G Ethernet
+============================================================
Intel 10 Gigabit Virtual Function Linux driver.
Copyright(c) 1999-2018 Intel Corporation.
diff --git a/Documentation/networking/device_drivers/pensando/ionic.rst b/Documentation/networking/device_drivers/pensando/ionic.rst
index 67b6839d516b..c17d680cf334 100644
--- a/Documentation/networking/device_drivers/pensando/ionic.rst
+++ b/Documentation/networking/device_drivers/pensando/ionic.rst
@@ -1,8 +1,8 @@
.. SPDX-License-Identifier: GPL-2.0+
-==========================================================
-Linux* Driver for the Pensando(R) Ethernet adapter family
-==========================================================
+========================================================
+Linux Driver for the Pensando(R) Ethernet adapter family
+========================================================
Pensando Linux Ethernet driver.
Copyright(c) 2019 Pensando Systems, Inc
@@ -36,8 +36,10 @@ Support
=======
For general Linux networking support, please use the netdev mailing
list, which is monitored by Pensando personnel::
+
netdev@vger.kernel.org
For more specific support needs, please use the Pensando driver support
email::
- drivers@pensando.io
+
+ drivers@pensando.io
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 49e95f438ed7..8d4ad1d1ae26 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -207,8 +207,8 @@ TCP variables:
somaxconn - INTEGER
Limit of socket listen() backlog, known in userspace as SOMAXCONN.
- Defaults to 128. See also tcp_max_syn_backlog for additional tuning
- for TCP sockets.
+ Defaults to 4096. (Was 128 before linux-5.4)
+ See also tcp_max_syn_backlog for additional tuning for TCP sockets.
tcp_abort_on_overflow - BOOLEAN
If listening service is too slow to accept new connections,
@@ -408,11 +408,14 @@ tcp_max_orphans - INTEGER
up to ~64K of unswappable memory.
tcp_max_syn_backlog - INTEGER
- Maximal number of remembered connection requests, which have not
- received an acknowledgment from connecting client.
+ Maximal number of remembered connection requests (SYN_RECV),
+ which have not received an acknowledgment from connecting client.
+ This is a per-listener limit.
The minimal value is 128 for low memory machines, and it will
increase in proportion to the memory of machine.
If server suffers from overload, try increasing this number.
+ Remember to also check /proc/sys/net/core/somaxconn
+ A SYN_RECV request socket consumes about 304 bytes of memory.
tcp_max_tw_buckets - INTEGER
Maximal number of timewait sockets held by system simultaneously.
diff --git a/Documentation/networking/net_dim.txt b/Documentation/networking/net_dim.txt
index 9cb31c5e2dcd..9bdb7d5a3ba3 100644
--- a/Documentation/networking/net_dim.txt
+++ b/Documentation/networking/net_dim.txt
@@ -92,16 +92,16 @@ under some conditions.
Part III: Registering a Network Device to DIM
==============================================
-Net DIM API exposes the main function net_dim(struct net_dim *dim,
-struct net_dim_sample end_sample). This function is the entry point to the Net
+Net DIM API exposes the main function net_dim(struct dim *dim,
+struct dim_sample end_sample). This function is the entry point to the Net
DIM algorithm and has to be called every time the driver would like to check if
it should change interrupt moderation parameters. The driver should provide two
-data structures: struct net_dim and struct net_dim_sample. Struct net_dim
+data structures: struct dim and struct dim_sample. Struct dim
describes the state of DIM for a specific object (RX queue, TX queue,
other queues, etc.). This includes the current selected profile, previous data
samples, the callback function provided by the driver and more.
-Struct net_dim_sample describes a data sample, which will be compared to the
-data sample stored in struct net_dim in order to decide on the algorithm's next
+Struct dim_sample describes a data sample, which will be compared to the
+data sample stored in struct dim in order to decide on the algorithm's next
step. The sample should include bytes, packets and interrupts, measured by
the driver.
@@ -110,9 +110,9 @@ main net_dim() function. The recommended method is to call net_dim() on each
interrupt. Since Net DIM has a built-in moderation and it might decide to skip
iterations under certain conditions, there is no need to moderate the net_dim()
calls as well. As mentioned above, the driver needs to provide an object of type
-struct net_dim to the net_dim() function call. It is advised for each entity
-using Net DIM to hold a struct net_dim as part of its data structure and use it
-as the main Net DIM API object. The struct net_dim_sample should hold the latest
+struct dim to the net_dim() function call. It is advised for each entity
+using Net DIM to hold a struct dim as part of its data structure and use it
+as the main Net DIM API object. The struct dim_sample should hold the latest
bytes, packets and interrupts count. No need to perform any calculations, just
include the raw data.
@@ -132,19 +132,19 @@ usage is not complete but it should make the outline of the usage clear.
my_driver.c:
-#include <linux/net_dim.h>
+#include <linux/dim.h>
/* Callback for net DIM to schedule on a decision to change moderation */
void my_driver_do_dim_work(struct work_struct *work)
{
- /* Get struct net_dim from struct work_struct */
- struct net_dim *dim = container_of(work, struct net_dim,
- work);
+ /* Get struct dim from struct work_struct */
+ struct dim *dim = container_of(work, struct dim,
+ work);
/* Do interrupt moderation related stuff */
...
/* Signal net DIM work is done and it should move to next iteration */
- dim->state = NET_DIM_START_MEASURE;
+ dim->state = DIM_START_MEASURE;
}
/* My driver's interrupt handler */
@@ -152,13 +152,13 @@ int my_driver_handle_interrupt(struct my_driver_entity *my_entity, ...)
{
...
/* A struct to hold current measured data */
- struct net_dim_sample dim_sample;
+ struct dim_sample dim_sample;
...
/* Initiate data sample struct with current data */
- net_dim_sample(my_entity->events,
- my_entity->packets,
- my_entity->bytes,
- &dim_sample);
+ dim_update_sample(my_entity->events,
+ my_entity->packets,
+ my_entity->bytes,
+ &dim_sample);
/* Call net DIM */
net_dim(&my_entity->dim, dim_sample);
...
diff --git a/Documentation/networking/tls-offload.rst b/Documentation/networking/tls-offload.rst
index 0dd3f748239f..f914e81fd3a6 100644
--- a/Documentation/networking/tls-offload.rst
+++ b/Documentation/networking/tls-offload.rst
@@ -436,6 +436,10 @@ by the driver:
encryption.
* ``tx_tls_ooo`` - number of TX packets which were part of a TLS stream
but did not arrive in the expected order.
+ * ``tx_tls_skip_no_sync_data`` - number of TX packets which were part of
+ a TLS stream and arrived out-of-order, but skipped the HW offload routine
+ and went to the regular transmit flow as they were retransmissions of the
+ connection handshake.
* ``tx_tls_drop_no_sync_data`` - number of TX packets which were part of
a TLS stream dropped, because they arrived out of order and associated
record could not be found.
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index f4a2198187f9..ada573b7d703 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -56,7 +56,7 @@ instead of ``double-indenting`` the ``case`` labels. E.g.:
case 'K':
case 'k':
mem <<= 10;
- /* fall through */
+ fallthrough;
default:
break;
}
diff --git a/Documentation/process/deprecated.rst b/Documentation/process/deprecated.rst
index 053b24a6dd38..179f2a5625a0 100644
--- a/Documentation/process/deprecated.rst
+++ b/Documentation/process/deprecated.rst
@@ -122,14 +122,27 @@ memory adjacent to the stack (when built without `CONFIG_VMAP_STACK=y`)
Implicit switch case fall-through
---------------------------------
-The C language allows switch cases to "fall through" when
-a "break" statement is missing at the end of a case. This,
-however, introduces ambiguity in the code, as it's not always
-clear if the missing break is intentional or a bug. As there
-have been a long list of flaws `due to missing "break" statements
+The C language allows switch cases to "fall-through" when a "break" statement
+is missing at the end of a case. This, however, introduces ambiguity in the
+code, as it's not always clear if the missing break is intentional or a bug.
+
+As there have been a long list of flaws `due to missing "break" statements
<https://cwe.mitre.org/data/definitions/484.html>`_, we no longer allow
-"implicit fall-through". In order to identify an intentional fall-through
-case, we have adopted the marking used by static analyzers: a comment
-saying `/* Fall through */`. Once the C++17 `__attribute__((fallthrough))`
-is more widely handled by C compilers, static analyzers, and IDEs, we can
-switch to using that instead.
+"implicit fall-through".
+
+In order to identify intentional fall-through cases, we have adopted a
+pseudo-keyword macro 'fallthrough' which expands to gcc's extension
+__attribute__((__fallthrough__)). `Statement Attributes
+<https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html>`_
+
+When the C17/C18 [[fallthrough]] syntax is more commonly supported by
+C compilers, static analyzers, and IDEs, we can switch to using that syntax
+for the macro pseudo-keyword.
+
+All switch/case blocks must end in one of:
+
+ break;
+ fallthrough;
+ continue;
+ goto <label>;
+ return [expression];
diff --git a/Documentation/usb/rio.rst b/Documentation/usb/rio.rst
deleted file mode 100644
index ea73475471db..000000000000
--- a/Documentation/usb/rio.rst
+++ /dev/null
@@ -1,109 +0,0 @@
-============
-Diamonds Rio
-============
-
-Copyright (C) 1999, 2000 Bruce Tenison
-
-Portions Copyright (C) 1999, 2000 David Nelson
-
-Thanks to David Nelson for guidance and the usage of the scanner.txt
-and scanner.c files to model our driver and this informative file.
-
-Mar. 2, 2000
-
-Changes
-=======
-
-- Initial Revision
-
-
-Overview
-========
-
-This README will address issues regarding how to configure the kernel
-to access a RIO 500 mp3 player.
-Before I explain how to use this to access the Rio500 please be warned:
-
-.. warning::
-
- Please note that this software is still under development. The authors
- are in no way responsible for any damage that may occur, no matter how
- inconsequential.
-
-It seems that the Rio has a problem when sending .mp3 with low batteries.
-I suggest when the batteries are low and you want to transfer stuff that you
-replace it with a fresh one. In my case, what happened is I lost two 16kb
-blocks (they are no longer usable to store information to it). But I don't
-know if that's normal or not; it could simply be a problem with the flash
-memory.
-
-In an extreme case, I left my Rio playing overnight and the batteries wore
-down to nothing and appear to have corrupted the flash memory. My RIO
-needed to be replaced as a result. Diamond tech support is aware of the
-problem. Do NOT allow your batteries to wear down to nothing before
-changing them. It appears RIO 500 firmware does not handle low battery
-power well at all.
-
-On systems with OHCI controllers, the kernel OHCI code appears to have
-power on problems with some chipsets. If you are having problems
-connecting to your RIO 500, try turning it on first and then plugging it
-into the USB cable.
-
-Contact Information
--------------------
-
- The main page for the project is hosted at sourceforge.net in the following
- URL: <http://rio500.sourceforge.net>. You can also go to the project's
- sourceforge home page at: <http://sourceforge.net/projects/rio500/>.
- There is also a mailing list: rio500-users@lists.sourceforge.net
-
-Authors
--------
-
-Most of the code was written by Cesar Miquel <miquel@df.uba.ar>. Keith
-Clayton <kclayton@jps.net> is incharge of the PPC port and making sure
-things work there. Bruce Tenison <btenison@dibbs.net> is adding support
-for .fon files and also does testing. The program will mostly sure be
-re-written and Pete Ikusz along with the rest will re-design it. I would
-also like to thank Tri Nguyen <tmn_3022000@hotmail.com> who provided use
-with some important information regarding the communication with the Rio.
-
-Additional Information and userspace tools
-
- http://rio500.sourceforge.net/
-
-
-Requirements
-============
-
-A host with a USB port running a Linux kernel with RIO 500 support enabled.
-
-The driver is a module called rio500, which should be automatically loaded
-as you plug in your device. If that fails you can manually load it with
-
- modprobe rio500
-
-Udev should automatically create a device node as soon as plug in your device.
-If that fails, you can manually add a device for the USB rio500::
-
- mknod /dev/usb/rio500 c 180 64
-
-In that case, set appropriate permissions for /dev/usb/rio500 (don't forget
-about group and world permissions). Both read and write permissions are
-required for proper operation.
-
-That's it. The Rio500 Utils at: http://rio500.sourceforge.net should
-be able to access the rio500.
-
-Limits
-======
-
-You can use only a single rio500 device at a time with your computer.
-
-Bugs
-====
-
-If you encounter any problems feel free to drop me an email.
-
-Bruce Tenison
-btenison@dibbs.net
diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst
index af64c4bb4447..a8de2fbc1caa 100644
--- a/Documentation/x86/index.rst
+++ b/Documentation/x86/index.rst
@@ -27,6 +27,7 @@ x86-specific Documentation
mds
microcode
resctrl_ui
+ tsx_async_abort
usb-legacy-support
i386/index
x86_64/index
diff --git a/Documentation/x86/tsx_async_abort.rst b/Documentation/x86/tsx_async_abort.rst
new file mode 100644
index 000000000000..583ddc185ba2
--- /dev/null
+++ b/Documentation/x86/tsx_async_abort.rst
@@ -0,0 +1,117 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+TSX Async Abort (TAA) mitigation
+================================
+
+.. _tsx_async_abort:
+
+Overview
+--------
+
+TSX Async Abort (TAA) is a side channel attack on internal buffers in some
+Intel processors similar to Microachitectural Data Sampling (MDS). In this
+case certain loads may speculatively pass invalid data to dependent operations
+when an asynchronous abort condition is pending in a Transactional
+Synchronization Extensions (TSX) transaction. This includes loads with no
+fault or assist condition. Such loads may speculatively expose stale data from
+the same uarch data structures as in MDS, with same scope of exposure i.e.
+same-thread and cross-thread. This issue affects all current processors that
+support TSX.
+
+Mitigation strategy
+-------------------
+
+a) TSX disable - one of the mitigations is to disable TSX. A new MSR
+IA32_TSX_CTRL will be available in future and current processors after
+microcode update which can be used to disable TSX. In addition, it
+controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID.
+
+b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this
+vulnerability. More details on this approach can be found in
+:ref:`Documentation/admin-guide/hw-vuln/mds.rst <mds>`.
+
+Kernel internal mitigation modes
+--------------------------------
+
+ ============= ============================================================
+ off Mitigation is disabled. Either the CPU is not affected or
+ tsx_async_abort=off is supplied on the kernel command line.
+
+ tsx disabled Mitigation is enabled. TSX feature is disabled by default at
+ bootup on processors that support TSX control.
+
+ verw Mitigation is enabled. CPU is affected and MD_CLEAR is
+ advertised in CPUID.
+
+ ucode needed Mitigation is enabled. CPU is affected and MD_CLEAR is not
+ advertised in CPUID. That is mainly for virtualization
+ scenarios where the host has the updated microcode but the
+ hypervisor does not expose MD_CLEAR in CPUID. It's a best
+ effort approach without guarantee.
+ ============= ============================================================
+
+If the CPU is affected and the "tsx_async_abort" kernel command line parameter is
+not provided then the kernel selects an appropriate mitigation depending on the
+status of RTM and MD_CLEAR CPUID bits.
+
+Below tables indicate the impact of tsx=on|off|auto cmdline options on state of
+TAA mitigation, VERW behavior and TSX feature for various combinations of
+MSR_IA32_ARCH_CAPABILITIES bits.
+
+1. "tsx=off"
+
+========= ========= ============ ============ ============== =================== ======================
+MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=off
+---------------------------------- -------------------------------------------------------------------------
+TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
+ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
+========= ========= ============ ============ ============== =================== ======================
+ 0 0 0 HW default Yes Same as MDS Same as MDS
+ 0 0 1 Invalid case Invalid case Invalid case Invalid case
+ 0 1 0 HW default No Need ucode update Need ucode update
+ 0 1 1 Disabled Yes TSX disabled TSX disabled
+ 1 X 1 Disabled X None needed None needed
+========= ========= ============ ============ ============== =================== ======================
+
+2. "tsx=on"
+
+========= ========= ============ ============ ============== =================== ======================
+MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=on
+---------------------------------- -------------------------------------------------------------------------
+TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
+ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
+========= ========= ============ ============ ============== =================== ======================
+ 0 0 0 HW default Yes Same as MDS Same as MDS
+ 0 0 1 Invalid case Invalid case Invalid case Invalid case
+ 0 1 0 HW default No Need ucode update Need ucode update
+ 0 1 1 Enabled Yes None Same as MDS
+ 1 X 1 Enabled X None needed None needed
+========= ========= ============ ============ ============== =================== ======================
+
+3. "tsx=auto"
+
+========= ========= ============ ============ ============== =================== ======================
+MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=auto
+---------------------------------- -------------------------------------------------------------------------
+TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation
+ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full
+========= ========= ============ ============ ============== =================== ======================
+ 0 0 0 HW default Yes Same as MDS Same as MDS
+ 0 0 1 Invalid case Invalid case Invalid case Invalid case
+ 0 1 0 HW default No Need ucode update Need ucode update
+ 0 1 1 Disabled Yes TSX disabled TSX disabled
+ 1 X 1 Enabled X None needed None needed
+========= ========= ============ ============ ============== =================== ======================
+
+In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that
+indicates whether MSR_IA32_TSX_CTRL is supported.
+
+There are two control bits in IA32_TSX_CTRL MSR:
+
+ Bit 0: When set it disables the Restricted Transactional Memory (RTM)
+ sub-feature of TSX (will force all transactions to abort on the
+ XBEGIN instruction).
+
+ Bit 1: When set it disables the enumeration of the RTM and HLE feature
+ (i.e. it will make CPUID(EAX=7).EBX{bit4} and
+ CPUID(EAX=7).EBX{bit11} read as 0).
diff --git a/MAINTAINERS b/MAINTAINERS
index 55199ef7fa74..58000f47b91b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -643,7 +643,7 @@ F: drivers/net/ethernet/alacritech/*
FORCEDETH GIGABIT ETHERNET DRIVER
M: Rain River <rain.1986.08.12@gmail.com>
-M: Zhu Yanjun <yanjun.zhu@oracle.com>
+M: Zhu Yanjun <zyjzyj2000@gmail.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/nvidia/*
@@ -2165,12 +2165,10 @@ F: arch/arm64/boot/dts/realtek/
F: Documentation/devicetree/bindings/arm/realtek.yaml
ARM/RENESAS ARM64 ARCHITECTURE
-M: Simon Horman <horms@verge.net.au>
M: Geert Uytterhoeven <geert+renesas@glider.be>
M: Magnus Damm <magnus.damm@gmail.com>
L: linux-renesas-soc@vger.kernel.org
Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next
S: Supported
F: arch/arm64/boot/dts/renesas/
@@ -2282,12 +2280,10 @@ S: Maintained
F: drivers/media/platform/s5p-mfc/
ARM/SHMOBILE ARM ARCHITECTURE
-M: Simon Horman <horms@verge.net.au>
M: Geert Uytterhoeven <geert+renesas@glider.be>
M: Magnus Damm <magnus.damm@gmail.com>
L: linux-renesas-soc@vger.kernel.org
Q: http://patchwork.kernel.org/project/linux-renesas-soc/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
T: git git://git.kernel.org/pub/scm/linux/kernel/git/geert/renesas-devel.git next
S: Supported
F: arch/arm/boot/dts/emev2*
@@ -2327,11 +2323,13 @@ F: drivers/edac/altera_edac.
ARM/SPREADTRUM SoC SUPPORT
M: Orson Zhai <orsonzhai@gmail.com>
-M: Baolin Wang <baolin.wang@linaro.org>
+M: Baolin Wang <baolin.wang7@gmail.com>
M: Chunyan Zhang <zhang.lyra@gmail.com>
S: Maintained
F: arch/arm64/boot/dts/sprd
N: sprd
+N: sc27xx
+N: sc2731
ARM/STI ARCHITECTURE
M: Patrice Chotard <patrice.chotard@st.com>
@@ -2613,6 +2611,7 @@ S: Maintained
F: arch/arm64/
X: arch/arm64/boot/dts/
F: Documentation/arm64/
+F: tools/testing/selftests/arm64/
AS3645A LED FLASH CONTROLLER DRIVER
M: Sakari Ailus <sakari.ailus@iki.fi>
@@ -3055,6 +3054,7 @@ M: Daniel Borkmann <daniel@iogearbox.net>
R: Martin KaFai Lau <kafai@fb.com>
R: Song Liu <songliubraving@fb.com>
R: Yonghong Song <yhs@fb.com>
+R: Andrii Nakryiko <andriin@fb.com>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
@@ -3100,7 +3100,7 @@ S: Supported
F: arch/arm64/net/
BPF JIT for MIPS (32-BIT AND 64-BIT)
-M: Paul Burton <paul.burton@mips.com>
+M: Paul Burton <paulburton@kernel.org>
L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
@@ -3187,7 +3187,7 @@ N: bcm216*
N: kona
F: arch/arm/mach-bcm/
-BROADCOM BCM2835 ARM ARCHITECTURE
+BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
M: Eric Anholt <eric@anholt.net>
M: Stefan Wahren <wahrenst@gmx.net>
L: bcm-kernel-feedback-list@broadcom.com
@@ -3195,6 +3195,7 @@ L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
T: git git://github.com/anholt/linux
S: Maintained
+N: bcm2711
N: bcm2835
F: drivers/staging/vc04_services
@@ -3241,8 +3242,6 @@ S: Maintained
F: drivers/usb/gadget/udc/bcm63xx_udc.*
BROADCOM BCM7XXX ARM ARCHITECTURE
-M: Brian Norris <computersforpeace@gmail.com>
-M: Gregory Fong <gregory.0xf0@gmail.com>
M: Florian Fainelli <f.fainelli@gmail.com>
M: bcm-kernel-feedback-list@broadcom.com
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -3263,7 +3262,6 @@ S: Maintained
F: drivers/cpufreq/bmips-cpufreq.c
BROADCOM BMIPS MIPS ARCHITECTURE
-M: Kevin Cernekee <cernekee@gmail.com>
M: Florian Fainelli <f.fainelli@gmail.com>
L: bcm-kernel-feedback-list@broadcom.com
L: linux-mips@vger.kernel.org
@@ -3598,6 +3596,13 @@ S: Maintained
F: Documentation/devicetree/bindings/media/cdns,*.txt
F: drivers/media/platform/cadence/cdns-csi2*
+CADENCE NAND DRIVER
+M: Piotr Sroka <piotrs@cadence.com>
+L: linux-mtd@lists.infradead.org
+S: Maintained
+F: drivers/mtd/nand/raw/cadence-nand-controller.c
+F: Documentation/devicetree/bindings/mtd/cadence-nand-controller.txt
+
CADET FM/AM RADIO RECEIVER DRIVER
M: Hans Verkuil <hverkuil@xs4all.nl>
L: linux-media@vger.kernel.org
@@ -3740,7 +3745,6 @@ F: drivers/crypto/cavium/cpt/
CAVIUM THUNDERX2 ARM64 SOC
M: Robert Richter <rrichter@cavium.com>
-M: Jayachandran C <jnair@caviumnetworks.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm64/boot/dts/cavium/thunder2-99xx*
@@ -8005,7 +8009,7 @@ S: Maintained
F: drivers/usb/atm/ueagle-atm.c
IMGTEC ASCII LCD DRIVER
-M: Paul Burton <paul.burton@mips.com>
+M: Paul Burton <paulburton@kernel.org>
S: Maintained
F: Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt
F: drivers/auxdisplay/img-ascii-lcd.c
@@ -8302,11 +8306,14 @@ F: drivers/hid/intel-ish-hid/
INTEL IOMMU (VT-d)
M: David Woodhouse <dwmw2@infradead.org>
+M: Lu Baolu <baolu.lu@linux.intel.com>
L: iommu@lists.linux-foundation.org
-T: git git://git.infradead.org/iommu-2.6.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
S: Supported
-F: drivers/iommu/intel-iommu.c
+F: drivers/iommu/dmar.c
+F: drivers/iommu/intel*.[ch]
F: include/linux/intel-iommu.h
+F: include/linux/intel-svm.h
INTEL IOP-ADMA DMA DRIVER
R: Dan Williams <dan.j.williams@intel.com>
@@ -8565,12 +8572,13 @@ F: include/linux/iova.h
IO_URING
M: Jens Axboe <axboe@kernel.dk>
-L: linux-block@vger.kernel.org
-L: linux-fsdevel@vger.kernel.org
+L: io-uring@vger.kernel.org
T: git git://git.kernel.dk/linux-block
T: git git://git.kernel.dk/liburing
S: Maintained
F: fs/io_uring.c
+F: fs/io-wq.c
+F: fs/io-wq.h
F: include/uapi/linux/io_uring.h
IPMI SUBSYSTEM
@@ -8921,6 +8929,17 @@ S: Maintained
F: tools/testing/selftests/
F: Documentation/dev-tools/kselftest*
+KERNEL UNIT TESTING FRAMEWORK (KUnit)
+M: Brendan Higgins <brendanhiggins@google.com>
+L: linux-kselftest@vger.kernel.org
+L: kunit-dev@googlegroups.com
+W: https://google.github.io/kunit-docs/third_party/kernel/docs/
+S: Maintained
+F: Documentation/dev-tools/kunit/
+F: include/kunit/
+F: lib/kunit/
+F: tools/testing/kunit/
+
KERNEL USERMODE HELPER
M: Luis Chamberlain <mcgrof@kernel.org>
L: linux-kernel@vger.kernel.org
@@ -9126,7 +9145,7 @@ F: drivers/auxdisplay/ks0108.c
F: include/linux/ks0108.h
L3MDEV
-M: David Ahern <dsa@cumulusnetworks.com>
+M: David Ahern <dsahern@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
F: net/l3mdev
@@ -9187,6 +9206,7 @@ M: Pavel Machek <pavel@ucw.cz>
R: Dan Murphy <dmurphy@ti.com>
L: linux-leds@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git
S: Maintained
F: Documentation/devicetree/bindings/leds/
F: drivers/leds/
@@ -9497,6 +9517,13 @@ F: Documentation/misc-devices/lis3lv02d.rst
F: drivers/misc/lis3lv02d/
F: drivers/platform/x86/hp_accel.c
+LIST KUNIT TEST
+M: David Gow <davidgow@google.com>
+L: linux-kselftest@vger.kernel.org
+L: kunit-dev@googlegroups.com
+S: Maintained
+F: lib/list-test.c
+
LIVE PATCHING
M: Josh Poimboeuf <jpoimboe@redhat.com>
M: Jiri Kosina <jikos@kernel.org>
@@ -10258,7 +10285,7 @@ MEDIATEK ETHERNET DRIVER
M: Felix Fietkau <nbd@openwrt.org>
M: John Crispin <john@phrozen.org>
M: Sean Wang <sean.wang@mediatek.com>
-M: Nelson Chang <nelson.chang@mediatek.com>
+M: Mark Lee <Mark-MC.Lee@mediatek.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/mediatek/
@@ -10521,8 +10548,12 @@ F: mm/memblock.c
F: Documentation/core-api/boot-time-mm.rst
MEMORY MANAGEMENT
+M: Andrew Morton <akpm@linux-foundation.org>
L: linux-mm@kvack.org
W: http://www.linux-mm.org
+T: quilt https://ozlabs.org/~akpm/mmotm/
+T: quilt https://ozlabs.org/~akpm/mmots/
+T: git git://github.com/hnaz/linux-mm.git
S: Maintained
F: include/linux/mm.h
F: include/linux/gfp.h
@@ -10532,15 +10563,13 @@ F: include/linux/vmalloc.h
F: mm/
MEMORY TECHNOLOGY DEVICES (MTD)
-M: David Woodhouse <dwmw2@infradead.org>
-M: Brian Norris <computersforpeace@gmail.com>
-M: Marek Vasut <marek.vasut@gmail.com>
M: Miquel Raynal <miquel.raynal@bootlin.com>
M: Richard Weinberger <richard@nod.at>
M: Vignesh Raghavendra <vigneshr@ti.com>
L: linux-mtd@lists.infradead.org
W: http://www.linux-mtd.infradead.org/
Q: http://patchwork.ozlabs.org/project/linux-mtd/list/
+C: irc://irc.oftc.net/mtd
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/fixes
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mtd/linux.git mtd/next
S: Maintained
@@ -10831,7 +10860,7 @@ F: drivers/usb/image/microtek.*
MIPS
M: Ralf Baechle <ralf@linux-mips.org>
-M: Paul Burton <paul.burton@mips.com>
+M: Paul Burton <paulburton@kernel.org>
M: James Hogan <jhogan@kernel.org>
L: linux-mips@vger.kernel.org
W: http://www.linux-mips.org/
@@ -10845,7 +10874,7 @@ F: arch/mips/
F: drivers/platform/mips/
MIPS BOSTON DEVELOPMENT BOARD
-M: Paul Burton <paul.burton@mips.com>
+M: Paul Burton <paulburton@kernel.org>
L: linux-mips@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/clock/img,boston-clock.txt
@@ -10855,7 +10884,7 @@ F: drivers/clk/imgtec/clk-boston.c
F: include/dt-bindings/clock/boston-clock.h
MIPS GENERIC PLATFORM
-M: Paul Burton <paul.burton@mips.com>
+M: Paul Burton <paulburton@kernel.org>
L: linux-mips@vger.kernel.org
S: Supported
F: Documentation/devicetree/bindings/power/mti,mips-cpc.txt
@@ -11410,7 +11439,6 @@ F: include/trace/events/tcp.h
NETWORKING [TLS]
M: Boris Pismenny <borisp@mellanox.com>
M: Aviad Yehezkel <aviadye@mellanox.com>
-M: Dave Watson <davejwatson@fb.com>
M: John Fastabend <john.fastabend@gmail.com>
M: Daniel Borkmann <daniel@iogearbox.net>
M: Jakub Kicinski <jakub.kicinski@netronome.com>
@@ -11547,6 +11575,7 @@ NSDEPS
M: Matthias Maennich <maennich@google.com>
S: Maintained
F: scripts/nsdeps
+F: Documentation/core-api/symbol-namespaces.rst
NTB AMD DRIVER
M: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
@@ -11633,6 +11662,7 @@ F: drivers/nvme/target/fcloop.c
NVM EXPRESS TARGET DRIVER
M: Christoph Hellwig <hch@lst.de>
M: Sagi Grimberg <sagi@grimberg.me>
+M: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
L: linux-nvme@lists.infradead.org
T: git://git.infradead.org/nvme.git
W: http://git.infradead.org/nvme.git
@@ -12314,12 +12344,15 @@ F: arch/parisc/
F: Documentation/parisc/
F: drivers/parisc/
F: drivers/char/agp/parisc-agp.c
+F: drivers/input/misc/hp_sdc_rtc.c
F: drivers/input/serio/gscps2.c
+F: drivers/input/serio/hp_sdc*
F: drivers/parport/parport_gsc.*
F: drivers/tty/serial/8250/8250_gsc.c
F: drivers/video/fbdev/sti*
F: drivers/video/console/sti*
F: drivers/video/logo/logo_parisc*
+F: include/linux/hp_sdc.h
PARMAN
M: Jiri Pirko <jiri@mellanox.com>
@@ -13125,12 +13158,14 @@ F: Documentation/filesystems/proc.txt
PROC SYSCTL
M: Luis Chamberlain <mcgrof@kernel.org>
M: Kees Cook <keescook@chromium.org>
+M: Iurii Zaikin <yzaikin@google.com>
L: linux-kernel@vger.kernel.org
L: linux-fsdevel@vger.kernel.org
S: Maintained
F: fs/proc/proc_sysctl.c
F: include/linux/sysctl.h
F: kernel/sysctl.c
+F: kernel/sysctl-test.c
F: tools/testing/selftests/sysctl/
PS3 NETWORK SUPPORT
@@ -13363,7 +13398,7 @@ S: Maintained
F: drivers/scsi/qla1280.[ch]
QLOGIC QLA2XXX FC-SCSI DRIVER
-M: qla2xxx-upstream@qlogic.com
+M: hmadhani@marvell.com
L: linux-scsi@vger.kernel.org
S: Supported
F: Documentation/scsi/LICENSE.qla2xxx
@@ -13904,7 +13939,7 @@ F: drivers/mtd/nand/raw/r852.h
RISC-V ARCHITECTURE
M: Paul Walmsley <paul.walmsley@sifive.com>
-M: Palmer Dabbelt <palmer@sifive.com>
+M: Palmer Dabbelt <palmer@dabbelt.com>
M: Albert Ou <aou@eecs.berkeley.edu>
L: linux-riscv@lists.infradead.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
@@ -14781,7 +14816,7 @@ F: drivers/media/usb/siano/
F: drivers/media/mmc/siano/
SIFIVE DRIVERS
-M: Palmer Dabbelt <palmer@sifive.com>
+M: Palmer Dabbelt <palmer@dabbelt.com>
M: Paul Walmsley <paul.walmsley@sifive.com>
L: linux-riscv@lists.infradead.org
T: git git://github.com/sifive/riscv-linux.git
@@ -14791,7 +14826,7 @@ N: sifive
SIFIVE FU540 SYSTEM-ON-CHIP
M: Paul Walmsley <paul.walmsley@sifive.com>
-M: Palmer Dabbelt <palmer@sifive.com>
+M: Palmer Dabbelt <palmer@dabbelt.com>
L: linux-riscv@lists.infradead.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pjw/sifive.git
S: Supported
@@ -15287,7 +15322,6 @@ F: arch/arm/boot/dts/spear*
F: arch/arm/mach-spear/
SPI NOR SUBSYSTEM
-M: Marek Vasut <marek.vasut@gmail.com>
M: Tudor Ambarus <tudor.ambarus@microchip.com>
L: linux-mtd@lists.infradead.org
W: http://www.linux-mtd.infradead.org/
@@ -16584,10 +16618,9 @@ F: drivers/media/pci/tw686x/
UBI FILE SYSTEM (UBIFS)
M: Richard Weinberger <richard@nod.at>
-M: Artem Bityutskiy <dedekind1@gmail.com>
-M: Adrian Hunter <adrian.hunter@intel.com>
L: linux-mtd@lists.infradead.org
-T: git git://git.infradead.org/ubifs-2.6.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git next
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git fixes
W: http://www.linux-mtd.infradead.org/doc/ubifs.html
S: Supported
F: Documentation/filesystems/ubifs.txt
@@ -16702,11 +16735,11 @@ S: Maintained
F: drivers/scsi/ufs/ufs-mediatek*
UNSORTED BLOCK IMAGES (UBI)
-M: Artem Bityutskiy <dedekind1@gmail.com>
M: Richard Weinberger <richard@nod.at>
W: http://www.linux-mtd.infradead.org/
L: linux-mtd@lists.infradead.org
-T: git git://git.infradead.org/ubifs-2.6.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git next
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/rw/ubifs.git fixes
S: Supported
F: drivers/mtd/ubi/
F: include/linux/mtd/ubi.h
@@ -16766,13 +16799,6 @@ W: http://www.linux-usb.org/usbnet
S: Maintained
F: drivers/net/usb/dm9601.c
-USB DIAMOND RIO500 DRIVER
-M: Cesar Miquel <miquel@df.uba.ar>
-L: rio500-users@lists.sourceforge.net
-W: http://rio500.sourceforge.net
-S: Maintained
-F: drivers/usb/misc/rio500*
-
USB EHCI DRIVER
M: Alan Stern <stern@rowland.harvard.edu>
L: linux-usb@vger.kernel.org
@@ -17215,6 +17241,7 @@ F: virt/lib/
VIRTIO AND VHOST VSOCK DRIVER
M: Stefan Hajnoczi <stefanha@redhat.com>
+M: Stefano Garzarella <sgarzare@redhat.com>
L: kvm@vger.kernel.org
L: virtualization@lists.linux-foundation.org
L: netdev@vger.kernel.org
@@ -17439,7 +17466,7 @@ F: include/linux/regulator/
K: regulator_get_optional
VRF
-M: David Ahern <dsa@cumulusnetworks.com>
+M: David Ahern <dsahern@kernel.org>
M: Shrijeet Mukherjee <shrijeet@gmail.com>
L: netdev@vger.kernel.org
S: Maintained
@@ -18040,6 +18067,7 @@ F: Documentation/vm/zsmalloc.rst
ZSWAP COMPRESSED SWAP CACHING
M: Seth Jennings <sjenning@redhat.com>
M: Dan Streetman <ddstreet@ieee.org>
+M: Vitaly Wool <vitaly.wool@konsulko.com>
L: linux-mm@kvack.org
S: Maintained
F: mm/zswap.c
diff --git a/Makefile b/Makefile
index f47dfdec7086..d4d36c61940b 100644
--- a/Makefile
+++ b/Makefile
@@ -2,8 +2,8 @@
VERSION = 5
PATCHLEVEL = 4
SUBLEVEL = 0
-EXTRAVERSION = -rc2
-NAME = Nesting Opossum
+EXTRAVERSION =
+NAME = Kleptomaniac Octopus
# *DOCUMENTATION*
# To see a list of typical targets execute "make help"
@@ -599,7 +599,7 @@ endif
# in addition to whatever we do anyway.
# Just "make" or "make all" shall build modules as well
-ifneq ($(filter all _all modules,$(MAKECMDGOALS)),)
+ifneq ($(filter all _all modules nsdeps,$(MAKECMDGOALS)),)
KBUILD_MODULES := 1
endif
@@ -917,6 +917,9 @@ ifeq ($(CONFIG_RELR),y)
LDFLAGS_vmlinux += --pack-dyn-relocs=relr
endif
+# make the checker run with the right architecture
+CHECKFLAGS += --arch=$(ARCH)
+
# insure the checker run with the right endianness
CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian)
@@ -1037,7 +1040,7 @@ export KBUILD_VMLINUX_OBJS := $(head-y) $(init-y) $(core-y) $(libs-y2) \
export KBUILD_VMLINUX_LIBS := $(libs-y1)
export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds
export LDFLAGS_vmlinux
-# used by scripts/package/Makefile
+# used by scripts/Makefile.package
export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) LICENSES arch include scripts tools)
vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_OBJS) $(KBUILD_VMLINUX_LIBS)
@@ -1217,9 +1220,8 @@ PHONY += kselftest
kselftest:
$(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests
-PHONY += kselftest-clean
-kselftest-clean:
- $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean
+kselftest-%: FORCE
+ $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests $*
PHONY += kselftest-merge
kselftest-merge:
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index bfc7f5f5d6f2..9acbeba832c0 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -65,6 +65,14 @@
clock-frequency = <33333333>;
};
+ reg_5v0: regulator-5v0 {
+ compatible = "regulator-fixed";
+
+ regulator-name = "5v0-supply";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ };
+
cpu_intc: cpu-interrupt-controller {
compatible = "snps,archs-intc";
interrupt-controller;
@@ -264,6 +272,21 @@
clocks = <&input_clk>;
cs-gpios = <&creg_gpio 0 GPIO_ACTIVE_LOW>,
<&creg_gpio 1 GPIO_ACTIVE_LOW>;
+
+ spi-flash@0 {
+ compatible = "sst26wf016b", "jedec,spi-nor";
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ spi-max-frequency = <4000000>;
+ };
+
+ adc@1 {
+ compatible = "ti,adc108s102";
+ reg = <1>;
+ vref-supply = <&reg_5v0>;
+ spi-max-frequency = <1000000>;
+ };
};
creg_gpio: gpio@14b0 {
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index 9b9a74444ce2..0974226fab55 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -32,6 +32,8 @@ CONFIG_INET=y
CONFIG_DEVTMPFS=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_MTD=y
+CONFIG_MTD_SPI_NOR=y
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_NETDEVICES=y
@@ -55,6 +57,8 @@ CONFIG_GPIO_SYSFS=y
CONFIG_GPIO_DWAPB=y
CONFIG_GPIO_SNPS_CREG=y
# CONFIG_HWMON is not set
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_DRM=y
# CONFIG_DRM_FBDEV_EMULATION is not set
CONFIG_DRM_UDL=y
@@ -72,6 +76,8 @@ CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_DW=y
CONFIG_DMADEVICES=y
CONFIG_DW_AXI_DMAC=y
+CONFIG_IIO=y
+CONFIG_TI_ADC108S102=y
CONFIG_EXT3_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 861a8aea51f9..661fd842ea97 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -614,8 +614,8 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
/* loop thru all available h/w condition indexes */
for (i = 0; i < cc_bcr.c; i++) {
write_aux_reg(ARC_REG_CC_INDEX, i);
- cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
- cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);
+ cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0));
+ cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1));
arc_pmu_map_hw_event(i, cc_name.str);
arc_pmu_add_raw_event_attr(i, cc_name.str);
diff --git a/arch/arm/boot/dts/am3874-iceboard.dts b/arch/arm/boot/dts/am3874-iceboard.dts
index 883fb85135d4..1b4b2b0500e4 100644
--- a/arch/arm/boot/dts/am3874-iceboard.dts
+++ b/arch/arm/boot/dts/am3874-iceboard.dts
@@ -111,13 +111,13 @@
reg = <0x70>;
#address-cells = <1>;
#size-cells = <0>;
+ i2c-mux-idle-disconnect;
i2c@0 {
/* FMC A */
#address-cells = <1>;
#size-cells = <0>;
reg = <0>;
- i2c-mux-idle-disconnect;
};
i2c@1 {
@@ -125,7 +125,6 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <1>;
- i2c-mux-idle-disconnect;
};
i2c@2 {
@@ -133,7 +132,6 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <2>;
- i2c-mux-idle-disconnect;
};
i2c@3 {
@@ -141,7 +139,6 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <3>;
- i2c-mux-idle-disconnect;
};
i2c@4 {
@@ -149,14 +146,12 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <4>;
- i2c-mux-idle-disconnect;
};
i2c@5 {
#address-cells = <1>;
#size-cells = <0>;
reg = <5>;
- i2c-mux-idle-disconnect;
ina230@40 { compatible = "ti,ina230"; reg = <0x40>; shunt-resistor = <5000>; };
ina230@41 { compatible = "ti,ina230"; reg = <0x41>; shunt-resistor = <5000>; };
@@ -182,14 +177,12 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <6>;
- i2c-mux-idle-disconnect;
};
i2c@7 {
#address-cells = <1>;
#size-cells = <0>;
reg = <7>;
- i2c-mux-idle-disconnect;
u41: pca9575@20 {
compatible = "nxp,pca9575";
diff --git a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
index 09a088f98566..b75af21069f9 100644
--- a/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
+++ b/arch/arm/boot/dts/bcm2835-rpi-zero-w.dts
@@ -113,6 +113,7 @@
#address-cells = <1>;
#size-cells = <0>;
pinctrl-0 = <&emmc_gpio34 &gpclk2_gpio43>;
+ bus-width = <4>;
mmc-pwrseq = <&wifi_pwrseq>;
non-removable;
status = "okay";
diff --git a/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi b/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi
index 7c3cb7ece6cb..925cb37c22f0 100644
--- a/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi
+++ b/arch/arm/boot/dts/bcm2837-rpi-cm3.dtsi
@@ -9,6 +9,14 @@
reg = <0 0x40000000>;
};
+ leds {
+ /*
+ * Since there is no upstream GPIO driver yet,
+ * remove the incomplete node.
+ */
+ /delete-node/ act;
+ };
+
reg_3v3: fixed-regulator {
compatible = "regulator-fixed";
regulator-name = "3V3";
diff --git a/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi b/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi
index 2a6ce87071f9..9e027b9a5f91 100644
--- a/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi
+++ b/arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi
@@ -328,6 +328,10 @@
pinctrl-0 = <&pinctrl_pwm3>;
};
+&snvs_pwrkey {
+ status = "okay";
+};
+
&ssi2 {
status = "okay";
};
diff --git a/arch/arm/boot/dts/imx6-logicpd-som.dtsi b/arch/arm/boot/dts/imx6-logicpd-som.dtsi
index 7ceae3573248..547fb141ec0c 100644
--- a/arch/arm/boot/dts/imx6-logicpd-som.dtsi
+++ b/arch/arm/boot/dts/imx6-logicpd-som.dtsi
@@ -207,6 +207,10 @@
vin-supply = <&sw1c_reg>;
};
+&snvs_poweroff {
+ status = "okay";
+};
+
&iomuxc {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_hog>;
diff --git a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
index f3404dd10537..cf628465cd0a 100644
--- a/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabreauto.dtsi
@@ -230,6 +230,8 @@
accelerometer@1c {
compatible = "fsl,mma8451";
reg = <0x1c>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_mma8451_int>;
interrupt-parent = <&gpio6>;
interrupts = <31 IRQ_TYPE_LEVEL_LOW>;
};
@@ -628,6 +630,12 @@
>;
};
+ pinctrl_mma8451_int: mma8451intgrp {
+ fsl,pins = <
+ MX6QDL_PAD_EIM_BCLK__GPIO6_IO31 0xb0b1
+ >;
+ };
+
pinctrl_pwm3: pwm1grp {
fsl,pins = <
MX6QDL_PAD_SD4_DAT1__PWM3_OUT 0x1b0b1
diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
index 710f850e785c..e2e604d6ba0b 100644
--- a/arch/arm/boot/dts/imx7s.dtsi
+++ b/arch/arm/boot/dts/imx7s.dtsi
@@ -448,7 +448,7 @@
compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
reg = <0x302d0000 0x10000>;
interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clks IMX7D_CLK_DUMMY>,
+ clocks = <&clks IMX7D_GPT1_ROOT_CLK>,
<&clks IMX7D_GPT1_ROOT_CLK>;
clock-names = "ipg", "per";
};
@@ -457,7 +457,7 @@
compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
reg = <0x302e0000 0x10000>;
interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clks IMX7D_CLK_DUMMY>,
+ clocks = <&clks IMX7D_GPT2_ROOT_CLK>,
<&clks IMX7D_GPT2_ROOT_CLK>;
clock-names = "ipg", "per";
status = "disabled";
@@ -467,7 +467,7 @@
compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
reg = <0x302f0000 0x10000>;
interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clks IMX7D_CLK_DUMMY>,
+ clocks = <&clks IMX7D_GPT3_ROOT_CLK>,
<&clks IMX7D_GPT3_ROOT_CLK>;
clock-names = "ipg", "per";
status = "disabled";
@@ -477,7 +477,7 @@
compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt";
reg = <0x30300000 0x10000>;
interrupts = <GIC_SPI 52 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clks IMX7D_CLK_DUMMY>,
+ clocks = <&clks IMX7D_GPT4_ROOT_CLK>,
<&clks IMX7D_GPT4_ROOT_CLK>;
clock-names = "ipg", "per";
status = "disabled";
diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
index 3fdd0a72f87f..506b118e511a 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
@@ -192,3 +192,7 @@
&twl_gpio {
ti,use-leds;
};
+
+&twl_keypad {
+ status = "disabled";
+};
diff --git a/arch/arm/boot/dts/mt7629-rfb.dts b/arch/arm/boot/dts/mt7629-rfb.dts
index 3621b7d2b22a..9980c10c6e29 100644
--- a/arch/arm/boot/dts/mt7629-rfb.dts
+++ b/arch/arm/boot/dts/mt7629-rfb.dts
@@ -66,9 +66,21 @@
pinctrl-1 = <&ephy_leds_pins>;
status = "okay";
+ gmac0: mac@0 {
+ compatible = "mediatek,eth-mac";
+ reg = <0>;
+ phy-mode = "2500base-x";
+ fixed-link {
+ speed = <2500>;
+ full-duplex;
+ pause;
+ };
+ };
+
gmac1: mac@1 {
compatible = "mediatek,eth-mac";
reg = <1>;
+ phy-mode = "gmii";
phy-handle = <&phy0>;
};
@@ -78,7 +90,6 @@
phy0: ethernet-phy@0 {
reg = <0>;
- phy-mode = "gmii";
};
};
};
diff --git a/arch/arm/boot/dts/mt7629.dtsi b/arch/arm/boot/dts/mt7629.dtsi
index 9608bc2ccb3f..867b88103b9d 100644
--- a/arch/arm/boot/dts/mt7629.dtsi
+++ b/arch/arm/boot/dts/mt7629.dtsi
@@ -468,14 +468,12 @@
compatible = "mediatek,mt7629-sgmiisys", "syscon";
reg = <0x1b128000 0x3000>;
#clock-cells = <1>;
- mediatek,physpeed = "2500";
};
sgmiisys1: syscon@1b130000 {
compatible = "mediatek,mt7629-sgmiisys", "syscon";
reg = <0x1b130000 0x3000>;
#clock-cells = <1>;
- mediatek,physpeed = "2500";
};
};
};
diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts
index 4454449de00c..a40fe8d49da6 100644
--- a/arch/arm/boot/dts/omap4-droid4-xt894.dts
+++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts
@@ -369,7 +369,7 @@
compatible = "ti,wl1285", "ti,wl1283";
reg = <2>;
/* gpio_100 with gpmc_wait2 pad as wakeirq */
- interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>,
+ interrupts-extended = <&gpio4 4 IRQ_TYPE_LEVEL_HIGH>,
<&omap4_pmx_core 0x4e>;
interrupt-names = "irq", "wakeup";
ref-clock-frequency = <26000000>;
diff --git a/arch/arm/boot/dts/omap4-panda-common.dtsi b/arch/arm/boot/dts/omap4-panda-common.dtsi
index 14be2ecb62b1..55ea8b6189af 100644
--- a/arch/arm/boot/dts/omap4-panda-common.dtsi
+++ b/arch/arm/boot/dts/omap4-panda-common.dtsi
@@ -474,7 +474,7 @@
compatible = "ti,wl1271";
reg = <2>;
/* gpio_53 with gpmc_ncs3 pad as wakeup */
- interrupts-extended = <&gpio2 21 IRQ_TYPE_EDGE_RISING>,
+ interrupts-extended = <&gpio2 21 IRQ_TYPE_LEVEL_HIGH>,
<&omap4_pmx_core 0x3a>;
interrupt-names = "irq", "wakeup";
ref-clock-frequency = <38400000>;
diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts
index 3c274965ff40..91480ac1f328 100644
--- a/arch/arm/boot/dts/omap4-sdp.dts
+++ b/arch/arm/boot/dts/omap4-sdp.dts
@@ -512,7 +512,7 @@
compatible = "ti,wl1281";
reg = <2>;
interrupt-parent = <&gpio1>;
- interrupts = <21 IRQ_TYPE_EDGE_RISING>; /* gpio 53 */
+ interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* gpio 53 */
ref-clock-frequency = <26000000>;
tcxo-clock-frequency = <26000000>;
};
diff --git a/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi b/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi
index 6dbbc9b3229c..d0032213101e 100644
--- a/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi
+++ b/arch/arm/boot/dts/omap4-var-som-om44-wlan.dtsi
@@ -69,7 +69,7 @@
compatible = "ti,wl1271";
reg = <2>;
interrupt-parent = <&gpio2>;
- interrupts = <9 IRQ_TYPE_EDGE_RISING>; /* gpio 41 */
+ interrupts = <9 IRQ_TYPE_LEVEL_HIGH>; /* gpio 41 */
ref-clock-frequency = <38400000>;
};
};
diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi
index 7fff555ee394..68ac04641bdb 100644
--- a/arch/arm/boot/dts/omap5-board-common.dtsi
+++ b/arch/arm/boot/dts/omap5-board-common.dtsi
@@ -362,7 +362,7 @@
pinctrl-names = "default";
pinctrl-0 = <&wlcore_irq_pin>;
interrupt-parent = <&gpio1>;
- interrupts = <14 IRQ_TYPE_EDGE_RISING>; /* gpio 14 */
+ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; /* gpio 14 */
ref-clock-frequency = <26000000>;
};
};
diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi
index fac2e57dcca9..4791834dacb2 100644
--- a/arch/arm/boot/dts/omap54xx-clocks.dtsi
+++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi
@@ -1146,7 +1146,7 @@
};
};
- gpu_cm: clock-controller@1500 {
+ gpu_cm: gpu_cm@1500 {
compatible = "ti,omap4-cm";
reg = <0x1500 0x100>;
#address-cells = <1>;
diff --git a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
index e4a0d51ec3a8..0a3a7d66737b 100644
--- a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
+++ b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi
@@ -609,13 +609,13 @@
<STM32_PINMUX('F', 6, AF9)>; /* QSPI_BK1_IO3 */
bias-disable;
drive-push-pull;
- slew-rate = <3>;
+ slew-rate = <1>;
};
pins2 {
pinmux = <STM32_PINMUX('B', 6, AF10)>; /* QSPI_BK1_NCS */
bias-pull-up;
drive-push-pull;
- slew-rate = <3>;
+ slew-rate = <1>;
};
};
@@ -637,13 +637,13 @@
<STM32_PINMUX('G', 7, AF11)>; /* QSPI_BK2_IO3 */
bias-disable;
drive-push-pull;
- slew-rate = <3>;
+ slew-rate = <1>;
};
pins2 {
pinmux = <STM32_PINMUX('C', 0, AF10)>; /* QSPI_BK2_NCS */
bias-pull-up;
drive-push-pull;
- slew-rate = <3>;
+ slew-rate = <1>;
};
};
diff --git a/arch/arm/boot/dts/stm32mp157c-ev1.dts b/arch/arm/boot/dts/stm32mp157c-ev1.dts
index 89d29b50c3f4..91fc0a315c49 100644
--- a/arch/arm/boot/dts/stm32mp157c-ev1.dts
+++ b/arch/arm/boot/dts/stm32mp157c-ev1.dts
@@ -183,14 +183,12 @@
ov5640: camera@3c {
compatible = "ovti,ov5640";
- pinctrl-names = "default";
- pinctrl-0 = <&ov5640_pins>;
reg = <0x3c>;
clocks = <&clk_ext_camera>;
clock-names = "xclk";
DOVDD-supply = <&v2v8>;
- powerdown-gpios = <&stmfx_pinctrl 18 GPIO_ACTIVE_HIGH>;
- reset-gpios = <&stmfx_pinctrl 19 GPIO_ACTIVE_LOW>;
+ powerdown-gpios = <&stmfx_pinctrl 18 (GPIO_ACTIVE_HIGH | GPIO_PUSH_PULL)>;
+ reset-gpios = <&stmfx_pinctrl 19 (GPIO_ACTIVE_LOW | GPIO_PUSH_PULL)>;
rotation = <180>;
status = "okay";
@@ -223,15 +221,8 @@
joystick_pins: joystick {
pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4";
- drive-push-pull;
bias-pull-down;
};
-
- ov5640_pins: camera {
- pins = "agpio2", "agpio3"; /* stmfx pins 18 & 19 */
- drive-push-pull;
- output-low;
- };
};
};
};
diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi
index 9b11654a0a39..f98e0370c0bc 100644
--- a/arch/arm/boot/dts/stm32mp157c.dtsi
+++ b/arch/arm/boot/dts/stm32mp157c.dtsi
@@ -932,7 +932,7 @@
interrupt-names = "int0", "int1";
clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>;
clock-names = "hclk", "cclk";
- bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>;
+ bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>;
status = "disabled";
};
@@ -945,7 +945,7 @@
interrupt-names = "int0", "int1";
clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>;
clock-names = "hclk", "cclk";
- bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>;
+ bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index ce823c44e98a..4c268b70b735 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -520,6 +520,7 @@
interrupts = <39>;
clocks = <&ccu CLK_AHB_EHCI0>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -529,6 +530,7 @@
interrupts = <64>;
clocks = <&ccu CLK_USB_OHCI0>, <&ccu CLK_AHB_OHCI0>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -608,6 +610,7 @@
interrupts = <40>;
clocks = <&ccu CLK_AHB_EHCI1>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
@@ -617,6 +620,7 @@
interrupts = <65>;
clocks = <&ccu CLK_USB_OHCI1>, <&ccu CLK_AHB_OHCI1>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun5i.dtsi b/arch/arm/boot/dts/sun5i.dtsi
index cfb1efc8828c..6befa236ba99 100644
--- a/arch/arm/boot/dts/sun5i.dtsi
+++ b/arch/arm/boot/dts/sun5i.dtsi
@@ -391,6 +391,7 @@
interrupts = <39>;
clocks = <&ccu CLK_AHB_EHCI>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -400,6 +401,7 @@
interrupts = <40>;
clocks = <&ccu CLK_USB_OHCI>, <&ccu CLK_AHB_OHCI>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi
index bbeb743633c6..ac7638078420 100644
--- a/arch/arm/boot/dts/sun6i-a31.dtsi
+++ b/arch/arm/boot/dts/sun6i-a31.dtsi
@@ -545,6 +545,7 @@
clocks = <&ccu CLK_AHB1_EHCI0>;
resets = <&ccu RST_AHB1_EHCI0>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -555,6 +556,7 @@
clocks = <&ccu CLK_AHB1_OHCI0>, <&ccu CLK_USB_OHCI0>;
resets = <&ccu RST_AHB1_OHCI0>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -565,6 +567,7 @@
clocks = <&ccu CLK_AHB1_EHCI1>;
resets = <&ccu RST_AHB1_EHCI1>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
@@ -575,6 +578,7 @@
clocks = <&ccu CLK_AHB1_OHCI1>, <&ccu CLK_USB_OHCI1>;
resets = <&ccu RST_AHB1_OHCI1>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 49380de754a9..8aebefd6accf 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -380,9 +380,8 @@
compatible = "allwinner,sun7i-a20-csi0";
reg = <0x01c09000 0x1000>;
interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI0>,
- <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>;
- clock-names = "bus", "mod", "isp", "ram";
+ clocks = <&ccu CLK_AHB_CSI0>, <&ccu CLK_CSI_SCLK>, <&ccu CLK_DRAM_CSI0>;
+ clock-names = "bus", "isp", "ram";
resets = <&ccu RST_CSI0>;
status = "disabled";
};
@@ -623,6 +622,7 @@
interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_AHB_EHCI0>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -632,6 +632,7 @@
interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_USB_OHCI0>, <&ccu CLK_AHB_OHCI0>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -714,6 +715,7 @@
interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_AHB_EHCI1>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
@@ -723,6 +725,7 @@
interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&ccu CLK_USB_OHCI1>, <&ccu CLK_AHB_OHCI1>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun8i-a23-a33.dtsi b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
index 52eed0ae3607..f292f96ab39b 100644
--- a/arch/arm/boot/dts/sun8i-a23-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a23-a33.dtsi
@@ -307,6 +307,7 @@
clocks = <&ccu CLK_BUS_EHCI>;
resets = <&ccu RST_BUS_EHCI>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -317,6 +318,7 @@
clocks = <&ccu CLK_BUS_OHCI>, <&ccu CLK_USB_OHCI>;
resets = <&ccu RST_BUS_OHCI>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
index 568b90ece342..3bec3e0a81b2 100644
--- a/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
+++ b/arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts
@@ -192,6 +192,7 @@
vqmmc-supply = <&reg_dldo1>;
non-removable;
wakeup-source;
+ keep-power-in-suspend;
status = "okay";
brcmf: wifi@1 {
diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi
index 523be6611c50..74bb053cf23c 100644
--- a/arch/arm/boot/dts/sun8i-a83t.dtsi
+++ b/arch/arm/boot/dts/sun8i-a83t.dtsi
@@ -632,6 +632,7 @@
clocks = <&ccu CLK_BUS_EHCI0>;
resets = <&ccu RST_BUS_EHCI0>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -643,6 +644,7 @@
clocks = <&ccu CLK_BUS_OHCI0>, <&ccu CLK_USB_OHCI0>;
resets = <&ccu RST_BUS_OHCI0>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -654,6 +656,7 @@
clocks = <&ccu CLK_BUS_EHCI1>;
resets = <&ccu RST_BUS_EHCI1>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun8i-r40.dtsi b/arch/arm/boot/dts/sun8i-r40.dtsi
index bde068111b85..c9c2688db66d 100644
--- a/arch/arm/boot/dts/sun8i-r40.dtsi
+++ b/arch/arm/boot/dts/sun8i-r40.dtsi
@@ -273,6 +273,7 @@
clocks = <&ccu CLK_BUS_EHCI1>;
resets = <&ccu RST_BUS_EHCI1>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -284,6 +285,7 @@
<&ccu CLK_USB_OHCI1>;
resets = <&ccu RST_BUS_OHCI1>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -294,6 +296,7 @@
clocks = <&ccu CLK_BUS_EHCI2>;
resets = <&ccu RST_BUS_EHCI2>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
@@ -305,6 +308,7 @@
<&ccu CLK_USB_OHCI2>;
resets = <&ccu RST_BUS_OHCI2>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sun9i-a80.dtsi b/arch/arm/boot/dts/sun9i-a80.dtsi
index c34d505c7efe..b9b6fb00be28 100644
--- a/arch/arm/boot/dts/sun9i-a80.dtsi
+++ b/arch/arm/boot/dts/sun9i-a80.dtsi
@@ -346,6 +346,7 @@
clocks = <&usb_clocks CLK_BUS_HCI0>;
resets = <&usb_clocks RST_USB0_HCI>;
phys = <&usbphy1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -357,6 +358,7 @@
<&usb_clocks CLK_USB_OHCI0>;
resets = <&usb_clocks RST_USB0_HCI>;
phys = <&usbphy1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -378,6 +380,7 @@
clocks = <&usb_clocks CLK_BUS_HCI1>;
resets = <&usb_clocks RST_USB1_HCI>;
phys = <&usbphy2>;
+ phy-names = "usb";
status = "disabled";
};
@@ -407,6 +410,7 @@
clocks = <&usb_clocks CLK_BUS_HCI2>;
resets = <&usb_clocks RST_USB2_HCI>;
phys = <&usbphy3>;
+ phy-names = "usb";
status = "disabled";
};
@@ -418,6 +422,7 @@
<&usb_clocks CLK_USB_OHCI2>;
resets = <&usb_clocks RST_USB2_HCI>;
phys = <&usbphy3>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
index eba190b3f9de..107eeafad20a 100644
--- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi
+++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi
@@ -304,6 +304,7 @@
clocks = <&ccu CLK_BUS_EHCI1>, <&ccu CLK_BUS_OHCI1>;
resets = <&ccu RST_BUS_EHCI1>, <&ccu RST_BUS_OHCI1>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -315,6 +316,7 @@
<&ccu CLK_USB_OHCI1>;
resets = <&ccu RST_BUS_EHCI1>, <&ccu RST_BUS_OHCI1>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -325,6 +327,7 @@
clocks = <&ccu CLK_BUS_EHCI2>, <&ccu CLK_BUS_OHCI2>;
resets = <&ccu RST_BUS_EHCI2>, <&ccu RST_BUS_OHCI2>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
@@ -336,6 +339,7 @@
<&ccu CLK_USB_OHCI2>;
resets = <&ccu RST_BUS_EHCI2>, <&ccu RST_BUS_OHCI2>;
phys = <&usbphy 2>;
+ phy-names = "usb";
status = "disabled";
};
@@ -346,6 +350,7 @@
clocks = <&ccu CLK_BUS_EHCI3>, <&ccu CLK_BUS_OHCI3>;
resets = <&ccu RST_BUS_EHCI3>, <&ccu RST_BUS_OHCI3>;
phys = <&usbphy 3>;
+ phy-names = "usb";
status = "disabled";
};
@@ -357,6 +362,7 @@
<&ccu CLK_USB_OHCI3>;
resets = <&ccu RST_BUS_EHCI3>, <&ccu RST_BUS_OHCI3>;
phys = <&usbphy 3>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/vf610-zii-scu4-aib.dts b/arch/arm/boot/dts/vf610-zii-scu4-aib.dts
index dc8a5f37a1ef..c8ebb23c4e02 100644
--- a/arch/arm/boot/dts/vf610-zii-scu4-aib.dts
+++ b/arch/arm/boot/dts/vf610-zii-scu4-aib.dts
@@ -602,6 +602,7 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <0x70>;
+ i2c-mux-idle-disconnect;
sff0_i2c: i2c@1 {
#address-cells = <1>;
@@ -640,6 +641,7 @@
reg = <0x71>;
#address-cells = <1>;
#size-cells = <0>;
+ i2c-mux-idle-disconnect;
sff5_i2c: i2c@1 {
#address-cells = <1>;
diff --git a/arch/arm/configs/badge4_defconfig b/arch/arm/configs/badge4_defconfig
index 5ae5b5228467..ef484c4cfd1a 100644
--- a/arch/arm/configs/badge4_defconfig
+++ b/arch/arm/configs/badge4_defconfig
@@ -91,7 +91,6 @@ CONFIG_USB_SERIAL_PL2303=m
CONFIG_USB_SERIAL_CYBERJACK=m
CONFIG_USB_SERIAL_XIRCOM=m
CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_RIO500=m
CONFIG_EXT2_FS=m
CONFIG_EXT3_FS=m
CONFIG_MSDOS_FS=y
diff --git a/arch/arm/configs/corgi_defconfig b/arch/arm/configs/corgi_defconfig
index e4f6442588e7..4fec2ec379ad 100644
--- a/arch/arm/configs/corgi_defconfig
+++ b/arch/arm/configs/corgi_defconfig
@@ -195,7 +195,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
CONFIG_USB_SERIAL_OMNINET=m
CONFIG_USB_EMI62=m
CONFIG_USB_EMI26=m
-CONFIG_USB_RIO500=m
CONFIG_USB_LEGOTOWER=m
CONFIG_USB_LCD=m
CONFIG_USB_CYTHERM=m
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index 01e3c0f4be92..231f8973bbb2 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -167,6 +167,7 @@ CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_DA8XX=y
CONFIG_BACKLIGHT_PWM=m
+CONFIG_BACKLIGHT_GPIO=m
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 9bfffbe22d53..0f7381ee0c37 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -276,6 +276,7 @@ CONFIG_VIDEO_OV5640=m
CONFIG_VIDEO_OV5645=m
CONFIG_IMX_IPUV3_CORE=y
CONFIG_DRM=y
+CONFIG_DRM_MSM=y
CONFIG_DRM_PANEL_LVDS=y
CONFIG_DRM_PANEL_SIMPLE=y
CONFIG_DRM_PANEL_SEIKO_43WVF1G=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index d3f50971e451..40d7f1a4fc45 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -356,15 +356,15 @@ CONFIG_DRM_OMAP_CONNECTOR_HDMI=m
CONFIG_DRM_OMAP_CONNECTOR_ANALOG_TV=m
CONFIG_DRM_OMAP_PANEL_DPI=m
CONFIG_DRM_OMAP_PANEL_DSI_CM=m
-CONFIG_DRM_OMAP_PANEL_SONY_ACX565AKM=m
-CONFIG_DRM_OMAP_PANEL_LGPHILIPS_LB035Q02=m
-CONFIG_DRM_OMAP_PANEL_SHARP_LS037V7DW01=m
-CONFIG_DRM_OMAP_PANEL_TPO_TD028TTEC1=m
-CONFIG_DRM_OMAP_PANEL_TPO_TD043MTEA1=m
-CONFIG_DRM_OMAP_PANEL_NEC_NL8048HL11=m
CONFIG_DRM_TILCDC=m
CONFIG_DRM_PANEL_SIMPLE=m
CONFIG_DRM_TI_TFP410=m
+CONFIG_DRM_PANEL_LG_LB035Q02=m
+CONFIG_DRM_PANEL_NEC_NL8048HL11=m
+CONFIG_DRM_PANEL_SHARP_LS037V7DW01=m
+CONFIG_DRM_PANEL_SONY_ACX565AKM=m
+CONFIG_DRM_PANEL_TPO_TD028TTEC1=m
+CONFIG_DRM_PANEL_TPO_TD043MTEA1=m
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_MODE_HELPERS=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index 787c3f9be414..b817c57f05f1 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -581,7 +581,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
CONFIG_USB_SERIAL_OMNINET=m
CONFIG_USB_EMI62=m
CONFIG_USB_EMI26=m
-CONFIG_USB_RIO500=m
CONFIG_USB_LEGOTOWER=m
CONFIG_USB_LCD=m
CONFIG_USB_CYTHERM=m
diff --git a/arch/arm/configs/s3c2410_defconfig b/arch/arm/configs/s3c2410_defconfig
index 95b5a4ffddea..73ed73a8785a 100644
--- a/arch/arm/configs/s3c2410_defconfig
+++ b/arch/arm/configs/s3c2410_defconfig
@@ -327,7 +327,6 @@ CONFIG_USB_EMI62=m
CONFIG_USB_EMI26=m
CONFIG_USB_ADUTUX=m
CONFIG_USB_SEVSEG=m
-CONFIG_USB_RIO500=m
CONFIG_USB_LEGOTOWER=m
CONFIG_USB_LCD=m
CONFIG_USB_CYPRESS_CY7C63=m
diff --git a/arch/arm/configs/spitz_defconfig b/arch/arm/configs/spitz_defconfig
index 4fb51d665abb..a1cdbfa064c5 100644
--- a/arch/arm/configs/spitz_defconfig
+++ b/arch/arm/configs/spitz_defconfig
@@ -189,7 +189,6 @@ CONFIG_USB_SERIAL_XIRCOM=m
CONFIG_USB_SERIAL_OMNINET=m
CONFIG_USB_EMI62=m
CONFIG_USB_EMI26=m
-CONFIG_USB_RIO500=m
CONFIG_USB_LEGOTOWER=m
CONFIG_USB_LCD=m
CONFIG_USB_CYTHERM=m
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index b24df84a1d7a..043b0b18bf7e 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -98,6 +98,7 @@ config CRYPTO_AES_ARM_CE
tristate "Accelerated AES using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
+ select CRYPTO_LIB_AES
select CRYPTO_SIMD
help
Use an implementation of AES in CBC, CTR and XTS modes that uses
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index b978cdf133af..4d1707388d94 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -9,6 +9,7 @@
#include <asm/assembler.h>
.text
+ .arch armv8-a
.fpu crypto-neon-fp-armv8
.align 3
diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h
index 567dbede4785..f1d0a7807cd0 100644
--- a/arch/arm/include/asm/domain.h
+++ b/arch/arm/include/asm/domain.h
@@ -82,7 +82,7 @@
#ifndef __ASSEMBLY__
#ifdef CONFIG_CPU_CP15_MMU
-static inline unsigned int get_domain(void)
+static __always_inline unsigned int get_domain(void)
{
unsigned int domain;
@@ -94,7 +94,7 @@ static inline unsigned int get_domain(void)
return domain;
}
-static inline void set_domain(unsigned val)
+static __always_inline void set_domain(unsigned int val)
{
asm volatile(
"mcr p15, 0, %0, c3, c0 @ set domain"
@@ -102,12 +102,12 @@ static inline void set_domain(unsigned val)
isb();
}
#else
-static inline unsigned int get_domain(void)
+static __always_inline unsigned int get_domain(void)
{
return 0;
}
-static inline void set_domain(unsigned val)
+static __always_inline void set_domain(unsigned int val)
{
}
#endif
diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 303248e5b990..98c6b91be4a8 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -22,7 +22,7 @@
* perform such accesses (eg, via list poison values) which could then
* be exploited for priviledge escalation.
*/
-static inline unsigned int uaccess_save_and_enable(void)
+static __always_inline unsigned int uaccess_save_and_enable(void)
{
#ifdef CONFIG_CPU_SW_DOMAIN_PAN
unsigned int old_domain = get_domain();
@@ -37,7 +37,7 @@ static inline unsigned int uaccess_save_and_enable(void)
#endif
}
-static inline void uaccess_restore(unsigned int flags)
+static __always_inline void uaccess_restore(unsigned int flags)
{
#ifdef CONFIG_CPU_SW_DOMAIN_PAN
/* Restore the user access mask */
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index a7810be07da1..4a3982812a40 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -68,7 +68,7 @@ ENDPROC(__vet_atags)
* The following fragment of code is executed with the MMU on in MMU mode,
* and uses absolute addresses; this is not position independent.
*
- * r0 = cp#15 control register
+ * r0 = cp#15 control register (exc_ret for M-class)
* r1 = machine ID
* r2 = atags/dtb pointer
* r9 = processor ID
@@ -137,7 +137,8 @@ __mmap_switched_data:
#ifdef CONFIG_CPU_CP15
.long cr_alignment @ r3
#else
- .long 0 @ r3
+M_CLASS(.long exc_ret) @ r3
+AR_CLASS(.long 0) @ r3
#endif
.size __mmap_switched_data, . - __mmap_switched_data
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index afa350f44dea..0fc814bbc34b 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -201,6 +201,8 @@ M_CLASS(streq r3, [r12, #PMSAv8_MAIR1])
bic r0, r0, #V7M_SCB_CCR_IC
#endif
str r0, [r12, V7M_SCB_CCR]
+ /* Pass exc_ret to __mmap_switched */
+ mov r0, r10
#endif /* CONFIG_CPU_CP15 elif CONFIG_CPU_V7M */
ret lr
ENDPROC(__after_proc_init)
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index 8062412be70f..9fc5c73cc0be 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -462,8 +462,8 @@ static s8 dm365_queue_priority_mapping[][2] = {
};
static const struct dma_slave_map dm365_edma_map[] = {
- { "davinci-mcbsp.0", "tx", EDMA_FILTER_PARAM(0, 2) },
- { "davinci-mcbsp.0", "rx", EDMA_FILTER_PARAM(0, 3) },
+ { "davinci-mcbsp", "tx", EDMA_FILTER_PARAM(0, 2) },
+ { "davinci-mcbsp", "rx", EDMA_FILTER_PARAM(0, 3) },
{ "davinci_voicecodec", "tx", EDMA_FILTER_PARAM(0, 2) },
{ "davinci_voicecodec", "rx", EDMA_FILTER_PARAM(0, 3) },
{ "spi_davinci.2", "tx", EDMA_FILTER_PARAM(0, 10) },
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index d942a3357090..2efd18e8824c 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -89,6 +89,13 @@ static struct iommu_platform_data omap3_iommu_pdata = {
.reset_name = "mmu",
.assert_reset = omap_device_assert_hardreset,
.deassert_reset = omap_device_deassert_hardreset,
+ .device_enable = omap_device_enable,
+ .device_idle = omap_device_idle,
+};
+
+static struct iommu_platform_data omap3_iommu_isp_pdata = {
+ .device_enable = omap_device_enable,
+ .device_idle = omap_device_idle,
};
static int omap3_sbc_t3730_twl_callback(struct device *dev,
@@ -424,6 +431,8 @@ static struct iommu_platform_data omap4_iommu_pdata = {
.reset_name = "mmu_cache",
.assert_reset = omap_device_assert_hardreset,
.deassert_reset = omap_device_deassert_hardreset,
+ .device_enable = omap_device_enable,
+ .device_idle = omap_device_idle,
};
#endif
@@ -617,6 +626,8 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = {
#ifdef CONFIG_ARCH_OMAP3
OF_DEV_AUXDATA("ti,omap2-iommu", 0x5d000000, "5d000000.mmu",
&omap3_iommu_pdata),
+ OF_DEV_AUXDATA("ti,omap2-iommu", 0x480bd400, "480bd400.mmu",
+ &omap3_iommu_isp_pdata),
OF_DEV_AUXDATA("ti,omap3-smartreflex-core", 0x480cb000,
"480cb000.smartreflex", &omap_sr_pdata[OMAP_SR_CORE]),
OF_DEV_AUXDATA("ti,omap3-smartreflex-mpu-iva", 0x480c9000,
diff --git a/arch/arm/mach-sunxi/mc_smp.c b/arch/arm/mach-sunxi/mc_smp.c
index 239084cf8192..26cbce135338 100644
--- a/arch/arm/mach-sunxi/mc_smp.c
+++ b/arch/arm/mach-sunxi/mc_smp.c
@@ -481,14 +481,18 @@ static void sunxi_mc_smp_cpu_die(unsigned int l_cpu)
static int sunxi_cpu_powerdown(unsigned int cpu, unsigned int cluster)
{
u32 reg;
+ int gating_bit = cpu;
pr_debug("%s: cluster %u cpu %u\n", __func__, cluster, cpu);
if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS)
return -EINVAL;
+ if (is_a83t && cpu == 0)
+ gating_bit = 4;
+
/* gate processor power */
reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster));
- reg |= PRCM_PWROFF_GATING_REG_CORE(cpu);
+ reg |= PRCM_PWROFF_GATING_REG_CORE(gating_bit);
writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster));
udelay(20);
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index 04b36436cbc0..788c5cf46de5 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -324,7 +324,7 @@ union offset_union {
__put32_unaligned_check("strbt", val, addr)
static void
-do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs *regs, union offset_union offset)
+do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs, union offset_union offset)
{
if (!LDST_U_BIT(instr))
offset.un = -offset.un;
@@ -337,7 +337,7 @@ do_alignment_finish_ldst(unsigned long addr, unsigned long instr, struct pt_regs
}
static int
-do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+do_alignment_ldrhstrh(unsigned long addr, u32 instr, struct pt_regs *regs)
{
unsigned int rd = RD_BITS(instr);
@@ -386,8 +386,7 @@ do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *r
}
static int
-do_alignment_ldrdstrd(unsigned long addr, unsigned long instr,
- struct pt_regs *regs)
+do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs)
{
unsigned int rd = RD_BITS(instr);
unsigned int rd2;
@@ -449,7 +448,7 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr,
}
static int
-do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+do_alignment_ldrstr(unsigned long addr, u32 instr, struct pt_regs *regs)
{
unsigned int rd = RD_BITS(instr);
@@ -498,7 +497,7 @@ do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *reg
* PU = 10 A B
*/
static int
-do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *regs)
+do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs)
{
unsigned int rd, rn, correction, nr_regs, regbits;
unsigned long eaddr, newaddr;
@@ -539,7 +538,7 @@ do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *reg
* processor for us.
*/
if (addr != eaddr) {
- pr_err("LDMSTM: PC = %08lx, instr = %08lx, "
+ pr_err("LDMSTM: PC = %08lx, instr = %08x, "
"addr = %08lx, eaddr = %08lx\n",
instruction_pointer(regs), instr, addr, eaddr);
show_regs(regs);
@@ -716,10 +715,10 @@ thumb2arm(u16 tinstr)
* 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt)
*/
static void *
-do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
+do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs,
union offset_union *poffset)
{
- unsigned long instr = *pinstr;
+ u32 instr = *pinstr;
u16 tinst1 = (instr >> 16) & 0xffff;
u16 tinst2 = instr & 0xffff;
@@ -767,17 +766,48 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs,
return NULL;
}
+static int alignment_get_arm(struct pt_regs *regs, u32 *ip, u32 *inst)
+{
+ u32 instr = 0;
+ int fault;
+
+ if (user_mode(regs))
+ fault = get_user(instr, ip);
+ else
+ fault = probe_kernel_address(ip, instr);
+
+ *inst = __mem_to_opcode_arm(instr);
+
+ return fault;
+}
+
+static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst)
+{
+ u16 instr = 0;
+ int fault;
+
+ if (user_mode(regs))
+ fault = get_user(instr, ip);
+ else
+ fault = probe_kernel_address(ip, instr);
+
+ *inst = __mem_to_opcode_thumb16(instr);
+
+ return fault;
+}
+
static int
do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
{
union offset_union uninitialized_var(offset);
- unsigned long instr = 0, instrptr;
- int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs);
+ unsigned long instrptr;
+ int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs);
unsigned int type;
- unsigned int fault;
+ u32 instr = 0;
u16 tinstr = 0;
int isize = 4;
int thumb2_32b = 0;
+ int fault;
if (interrupts_enabled(regs))
local_irq_enable();
@@ -786,15 +816,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
if (thumb_mode(regs)) {
u16 *ptr = (u16 *)(instrptr & ~1);
- fault = probe_kernel_address(ptr, tinstr);
- tinstr = __mem_to_opcode_thumb16(tinstr);
+
+ fault = alignment_get_thumb(regs, ptr, &tinstr);
if (!fault) {
if (cpu_architecture() >= CPU_ARCH_ARMv7 &&
IS_T32(tinstr)) {
/* Thumb-2 32-bit */
- u16 tinst2 = 0;
- fault = probe_kernel_address(ptr + 1, tinst2);
- tinst2 = __mem_to_opcode_thumb16(tinst2);
+ u16 tinst2;
+ fault = alignment_get_thumb(regs, ptr + 1, &tinst2);
instr = __opcode_thumb32_compose(tinstr, tinst2);
thumb2_32b = 1;
} else {
@@ -803,8 +832,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
}
}
} else {
- fault = probe_kernel_address((void *)instrptr, instr);
- instr = __mem_to_opcode_arm(instr);
+ fault = alignment_get_arm(regs, (void *)instrptr, &instr);
}
if (fault) {
@@ -926,7 +954,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
* Oops, we didn't handle the instruction.
*/
pr_err("Alignment trap: not handling instruction "
- "%0*lx at [<%08lx>]\n",
+ "%0*x at [<%08lx>]\n",
isize << 1,
isize == 2 ? tinstr : instr, instrptr);
ai_skipped += 1;
@@ -936,7 +964,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
ai_user += 1;
if (ai_usermode & UM_WARN)
- printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*lx "
+ printk("Alignment trap: %s (%d) PC=0x%08lx Instr=0x%0*x "
"Address=0x%08lx FSR 0x%03x\n", current->comm,
task_pid_nr(current), instrptr,
isize << 1,
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 9a07916af8dd..54d87506d3b5 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/arm-smccc.h>
#include <linux/kernel.h>
-#include <linux/psci.h>
#include <linux/smp.h>
#include <asm/cp15.h>
@@ -75,11 +74,8 @@ static void cpu_v7_spectre_init(void)
case ARM_CPU_PART_CORTEX_A72: {
struct arm_smccc_res res;
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
- break;
-
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
if ((int)res.a0 != 0)
@@ -90,7 +86,7 @@ static void cpu_v7_spectre_init(void)
spectre_v2_method = "hypervisor";
break;
- case PSCI_CONDUIT_SMC:
+ case SMCCC_CONDUIT_SMC:
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
if ((int)res.a0 != 0)
diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S
index 1448f144e7fb..1a49d503eafc 100644
--- a/arch/arm/mm/proc-v7m.S
+++ b/arch/arm/mm/proc-v7m.S
@@ -132,13 +132,11 @@ __v7m_setup_cont:
dsb
mov r6, lr @ save LR
ldr sp, =init_thread_union + THREAD_START_SP
- stmia sp, {r0-r3, r12}
cpsie i
svc #0
1: cpsid i
- ldr r0, =exc_ret
- orr lr, lr, #EXC_RET_THREADMODE_PROCESSSTACK
- str lr, [r0]
+ /* Calculate exc_ret */
+ orr r10, lr, #EXC_RET_THREADMODE_PROCESSSTACK
ldmia sp, {r0-r3, r12}
str r5, [r12, #11 * 4] @ restore the original SVC vector entry
mov lr, r6 @ restore LR
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 41a9b4257b72..d66a9727344d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -110,7 +110,6 @@ config ARM64
select GENERIC_STRNLEN_USER
select GENERIC_TIME_VSYSCALL
select GENERIC_GETTIMEOFDAY
- select GENERIC_COMPAT_VDSO if (!CPU_BIG_ENDIAN && COMPAT)
select HANDLE_DOMAIN_IRQ
select HARDIRQS_SW_RESEND
select HAVE_PCI
@@ -144,6 +143,8 @@ config ARM64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS \
+ if $(cc-option,-fpatchable-function-entry=2)
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
@@ -267,6 +268,10 @@ config GENERIC_CSUM
config GENERIC_CALIBRATE_DELAY
def_bool y
+config ZONE_DMA
+ bool "Support DMA zone" if EXPERT
+ default y
+
config ZONE_DMA32
bool "Support DMA32 zone" if EXPERT
default y
@@ -539,6 +544,16 @@ config ARM64_ERRATUM_1286807
invalidated has been observed by other observers. The
workaround repeats the TLBI+DSB operation.
+config ARM64_ERRATUM_1319367
+ bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
+ default y
+ help
+ This option adds work arounds for ARM Cortex-A57 erratum 1319537
+ and A72 erratum 1319367
+
+ Cortex-A57 and A72 cores could end-up with corrupted TLBs by
+ speculating an AT instruction during a guest context switch.
+
If unsure, say Y.
config ARM64_ERRATUM_1463225
@@ -559,6 +574,22 @@ config ARM64_ERRATUM_1463225
If unsure, say Y.
+config ARM64_ERRATUM_1542419
+ bool "Neoverse-N1: workaround mis-ordering of instruction fetches"
+ default y
+ help
+ This option adds a workaround for ARM Neoverse-N1 erratum
+ 1542419.
+
+ Affected Neoverse-N1 cores could execute a stale instruction when
+ modified by another CPU. The workaround depends on a firmware
+ counterpart.
+
+ Workaround the issue by hiding the DIC feature from EL0. This
+ forces user-space to perform cache maintenance.
+
+ If unsure, say Y.
+
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
@@ -617,6 +648,23 @@ config CAVIUM_ERRATUM_30115
If unsure, say Y.
+config CAVIUM_TX2_ERRATUM_219
+ bool "Cavium ThunderX2 erratum 219: PRFM between TTBR change and ISB fails"
+ default y
+ help
+ On Cavium ThunderX2, a load, store or prefetch instruction between a
+ TTBR update and the corresponding context synchronizing operation can
+ cause a spurious Data Abort to be delivered to any hardware thread in
+ the CPU core.
+
+ Work around the issue by avoiding the problematic code sequence and
+ trapping KVM guest TTBRx_EL1 writes to EL2 when SMT is enabled. The
+ trap handler performs the corresponding register access, skips the
+ instruction and ensures context synchronization by virtue of the
+ exception return.
+
+ If unsure, say Y.
+
config QCOM_FALKOR_ERRATUM_1003
bool "Falkor E1003: Incorrect translation due to ASID change"
default y
@@ -829,10 +877,26 @@ config ARM64_PA_BITS
default 48 if ARM64_PA_BITS_48
default 52 if ARM64_PA_BITS_52
+choice
+ prompt "Endianness"
+ default CPU_LITTLE_ENDIAN
+ help
+ Select the endianness of data accesses performed by the CPU. Userspace
+ applications will need to be compiled and linked for the endianness
+ that is selected here.
+
config CPU_BIG_ENDIAN
bool "Build big-endian kernel"
help
- Say Y if you plan on running a kernel in big-endian mode.
+ Say Y if you plan on running a kernel with a big-endian userspace.
+
+config CPU_LITTLE_ENDIAN
+ bool "Build little-endian kernel"
+ help
+ Say Y if you plan on running a kernel with a little-endian userspace.
+ This is usually the case for distributions targeting arm64.
+
+endchoice
config SCHED_MC
bool "Multi-core scheduler support"
@@ -1159,7 +1223,7 @@ menuconfig COMPAT
if COMPAT
config KUSER_HELPERS
- bool "Enable kuser helpers page for 32 bit applications"
+ bool "Enable kuser helpers page for 32-bit applications"
default y
help
Warning: disabling this option may break 32-bit user programs.
@@ -1185,6 +1249,18 @@ config KUSER_HELPERS
Say N here only if you are absolutely certain that you do not
need these helpers; otherwise, the safe option is to say Y.
+config COMPAT_VDSO
+ bool "Enable vDSO for 32-bit applications"
+ depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != ""
+ select GENERIC_COMPAT_VDSO
+ default y
+ help
+ Place in the process address space of 32-bit applications an
+ ELF shared object providing fast implementations of gettimeofday
+ and clock_gettime.
+
+ You must have a 32-bit build of glibc 2.22 or later for programs
+ to seamlessly take advantage of this.
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
@@ -1569,6 +1645,7 @@ config CMDLINE
config CMDLINE_FORCE
bool "Always use the default kernel command string"
+ depends on CMDLINE != ""
help
Always use the default kernel command string, even if the boot
loader passes other arguments to the kernel.
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 84a3d502c5a5..1fbe24d4fdb6 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -53,22 +53,6 @@ $(warning Detected assembler with broken .inst; disassembly will be unreliable)
endif
endif
-ifeq ($(CONFIG_GENERIC_COMPAT_VDSO), y)
- CROSS_COMPILE_COMPAT ?= $(CONFIG_CROSS_COMPILE_COMPAT_VDSO:"%"=%)
-
- ifeq ($(CONFIG_CC_IS_CLANG), y)
- $(warning CROSS_COMPILE_COMPAT is clang, the compat vDSO will not be built)
- else ifeq ($(strip $(CROSS_COMPILE_COMPAT)),)
- $(warning CROSS_COMPILE_COMPAT not defined or empty, the compat vDSO will not be built)
- else ifeq ($(shell which $(CROSS_COMPILE_COMPAT)gcc 2> /dev/null),)
- $(error $(CROSS_COMPILE_COMPAT)gcc not found, check CROSS_COMPILE_COMPAT)
- else
- export CROSS_COMPILE_COMPAT
- export CONFIG_COMPAT_VDSO := y
- compat_vdso := -DCONFIG_COMPAT_VDSO=1
- endif
-endif
-
KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) \
$(compat_vdso) $(cc_has_k_constraint)
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
@@ -111,6 +95,11 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
KBUILD_LDS_MODULE += $(srctree)/arch/arm64/kernel/module.lds
endif
+ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+endif
+
# Default value
head-y := arch/arm64/kernel/head.o
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
index 24f1aac366d6..d5b6e8159a33 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts
@@ -63,3 +63,12 @@
reg = <1>;
};
};
+
+&reg_dc1sw {
+ /*
+ * Ethernet PHY needs 30ms to properly power up and some more
+ * to initialize. 100ms should be plenty of time to finish
+ * whole process.
+ */
+ regulator-enable-ramp-delay = <100000>;
+};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts
index 2b6345db7dc0..78c82a665c84 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts
@@ -104,6 +104,7 @@
&ehci0 {
phys = <&usbphy 0>;
+ phy-names = "usb";
status = "okay";
};
@@ -150,6 +151,7 @@
&ohci0 {
phys = <&usbphy 0>;
+ phy-names = "usb";
status = "okay";
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
index e6fb9683f213..25099202c52c 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts
@@ -159,6 +159,12 @@
};
&reg_dc1sw {
+ /*
+ * Ethernet PHY needs 30ms to properly power up and some more
+ * to initialize. 100ms should be plenty of time to finish
+ * whole process.
+ */
+ regulator-enable-ramp-delay = <100000>;
regulator-name = "vcc-phy";
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
index 69128a6dfc46..70f4cce6be43 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi
@@ -142,15 +142,6 @@
clock-output-names = "ext-osc32k";
};
- pmu {
- compatible = "arm,cortex-a53-pmu";
- interrupts = <GIC_SPI 152 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 153 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 154 IRQ_TYPE_LEVEL_HIGH>,
- <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, <&cpu3>;
- };
-
psci {
compatible = "arm,psci-0.2";
method = "smc";
@@ -553,6 +544,7 @@
resets = <&ccu RST_BUS_OHCI1>,
<&ccu RST_BUS_EHCI1>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
@@ -564,6 +556,7 @@
<&ccu CLK_USB_OHCI1>;
resets = <&ccu RST_BUS_OHCI1>;
phys = <&usbphy 1>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
index 4020a1aafa3e..0d5ea19336a1 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi
@@ -547,6 +547,7 @@
resets = <&ccu RST_BUS_OHCI3>,
<&ccu RST_BUS_EHCI3>;
phys = <&usb2phy 3>;
+ phy-names = "usb";
status = "disabled";
};
@@ -558,6 +559,7 @@
<&ccu CLK_USB_OHCI3>;
resets = <&ccu RST_BUS_OHCI3>;
phys = <&usb2phy 3>;
+ phy-names = "usb";
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi
index 8a3a770e8f2c..56789ccf9454 100644
--- a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi
@@ -42,13 +42,14 @@
pinmux: pinmux@14029c {
compatible = "pinctrl-single";
- reg = <0x0014029c 0x250>;
+ reg = <0x0014029c 0x26c>;
#address-cells = <1>;
#size-cells = <1>;
pinctrl-single,register-width = <32>;
pinctrl-single,function-mask = <0xf>;
pinctrl-single,gpio-range = <
- &range 0 154 MODE_GPIO
+ &range 0 91 MODE_GPIO
+ &range 95 60 MODE_GPIO
>;
range: gpio-range {
#pinctrl-single,gpio-range-cells = <3>;
diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi
index 71e2e34400d4..0098dfdef96c 100644
--- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi
+++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi
@@ -464,8 +464,7 @@
<&pinmux 108 16 27>,
<&pinmux 135 77 6>,
<&pinmux 141 67 4>,
- <&pinmux 145 149 6>,
- <&pinmux 151 91 4>;
+ <&pinmux 145 149 6>;
};
i2c1: i2c@e0000 {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
index d98346da01df..078a5010228c 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts
@@ -127,7 +127,7 @@
status = "okay";
i2c-mux@77 {
- compatible = "nxp,pca9847";
+ compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
#size-cells = <0>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
index 408e0ecdce6a..b032f3890c8c 100644
--- a/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-lx2160a.dtsi
@@ -33,7 +33,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster0_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@1 {
@@ -49,7 +49,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster0_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@100 {
@@ -65,7 +65,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster1_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@101 {
@@ -81,7 +81,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster1_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@200 {
@@ -97,7 +97,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster2_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@201 {
@@ -113,7 +113,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster2_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@300 {
@@ -129,7 +129,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster3_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@301 {
@@ -145,7 +145,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster3_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@400 {
@@ -161,7 +161,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster4_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@401 {
@@ -177,7 +177,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster4_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@500 {
@@ -193,7 +193,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster5_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@501 {
@@ -209,7 +209,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster5_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@600 {
@@ -225,7 +225,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster6_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@601 {
@@ -241,7 +241,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster6_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@700 {
@@ -257,7 +257,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster7_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cpu@701 {
@@ -273,7 +273,7 @@
i-cache-line-size = <64>;
i-cache-sets = <192>;
next-level-cache = <&cluster7_l2>;
- cpu-idle-states = <&cpu_pw20>;
+ cpu-idle-states = <&cpu_pw15>;
};
cluster0_l2: l2-cache0 {
@@ -340,9 +340,9 @@
cache-level = <2>;
};
- cpu_pw20: cpu-pw20 {
+ cpu_pw15: cpu-pw15 {
compatible = "arm,idle-state";
- idle-state-name = "PW20";
+ idle-state-name = "PW15";
arm,psci-suspend-param = <0x0>;
entry-latency-us = <2000>;
exit-latency-us = <2000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
index 5f9d0da196e1..23c8fad7932b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
@@ -394,7 +394,7 @@
};
sdma2: dma-controller@302c0000 {
- compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma";
+ compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma";
reg = <0x302c0000 0x10000>;
interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MM_CLK_SDMA2_ROOT>,
@@ -405,7 +405,7 @@
};
sdma3: dma-controller@302b0000 {
- compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma";
+ compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma";
reg = <0x302b0000 0x10000>;
interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MM_CLK_SDMA3_ROOT>,
@@ -694,7 +694,7 @@
compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc";
reg = <0x30b40000 0x10000>;
interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk IMX8MM_CLK_DUMMY>,
+ clocks = <&clk IMX8MM_CLK_IPG_ROOT>,
<&clk IMX8MM_CLK_NAND_USDHC_BUS>,
<&clk IMX8MM_CLK_USDHC1_ROOT>;
clock-names = "ipg", "ahb", "per";
@@ -710,7 +710,7 @@
compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc";
reg = <0x30b50000 0x10000>;
interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk IMX8MM_CLK_DUMMY>,
+ clocks = <&clk IMX8MM_CLK_IPG_ROOT>,
<&clk IMX8MM_CLK_NAND_USDHC_BUS>,
<&clk IMX8MM_CLK_USDHC2_ROOT>;
clock-names = "ipg", "ahb", "per";
@@ -724,7 +724,7 @@
compatible = "fsl,imx8mm-usdhc", "fsl,imx7d-usdhc";
reg = <0x30b60000 0x10000>;
interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk IMX8MM_CLK_DUMMY>,
+ clocks = <&clk IMX8MM_CLK_IPG_ROOT>,
<&clk IMX8MM_CLK_NAND_USDHC_BUS>,
<&clk IMX8MM_CLK_USDHC3_ROOT>;
clock-names = "ipg", "ahb", "per";
@@ -737,7 +737,7 @@
};
sdma1: dma-controller@30bd0000 {
- compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma";
+ compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma";
reg = <0x30bd0000 0x10000>;
interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>,
diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
index 785f4c420fa4..43c4db312146 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
@@ -288,7 +288,7 @@
};
sdma3: dma-controller@302b0000 {
- compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma";
+ compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma";
reg = <0x302b0000 0x10000>;
interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SDMA3_ROOT>,
@@ -299,7 +299,7 @@
};
sdma2: dma-controller@302c0000 {
- compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma";
+ compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma";
reg = <0x302c0000 0x10000>;
interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SDMA2_ROOT>,
@@ -569,7 +569,7 @@
compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc";
reg = <0x30b40000 0x10000>;
interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk IMX8MN_CLK_DUMMY>,
+ clocks = <&clk IMX8MN_CLK_IPG_ROOT>,
<&clk IMX8MN_CLK_NAND_USDHC_BUS>,
<&clk IMX8MN_CLK_USDHC1_ROOT>;
clock-names = "ipg", "ahb", "per";
@@ -585,7 +585,7 @@
compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc";
reg = <0x30b50000 0x10000>;
interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk IMX8MN_CLK_DUMMY>,
+ clocks = <&clk IMX8MN_CLK_IPG_ROOT>,
<&clk IMX8MN_CLK_NAND_USDHC_BUS>,
<&clk IMX8MN_CLK_USDHC2_ROOT>;
clock-names = "ipg", "ahb", "per";
@@ -599,7 +599,7 @@
compatible = "fsl,imx8mn-usdhc", "fsl,imx7d-usdhc";
reg = <0x30b60000 0x10000>;
interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk IMX8MN_CLK_DUMMY>,
+ clocks = <&clk IMX8MN_CLK_IPG_ROOT>,
<&clk IMX8MN_CLK_NAND_USDHC_BUS>,
<&clk IMX8MN_CLK_USDHC3_ROOT>;
clock-names = "ipg", "ahb", "per";
@@ -612,7 +612,7 @@
};
sdma1: dma-controller@30bd0000 {
- compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma";
+ compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma";
reg = <0x30bd0000 0x10000>;
interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MN_CLK_SDMA1_ROOT>,
diff --git a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
index af99473ada04..32ce14936b01 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi
@@ -88,9 +88,9 @@
regulator-name = "0V9_ARM";
regulator-min-microvolt = <900000>;
regulator-max-microvolt = <1000000>;
- gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
- states = <1000000 0x0
- 900000 0x1>;
+ gpios = <&gpio3 16 GPIO_ACTIVE_HIGH>;
+ states = <1000000 0x1
+ 900000 0x0>;
regulator-always-on;
};
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 04115ca6bfb5..55a3d1c4bdf0 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -850,7 +850,7 @@
"fsl,imx7d-usdhc";
reg = <0x30b40000 0x10000>;
interrupts = <GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk IMX8MQ_CLK_DUMMY>,
+ clocks = <&clk IMX8MQ_CLK_IPG_ROOT>,
<&clk IMX8MQ_CLK_NAND_USDHC_BUS>,
<&clk IMX8MQ_CLK_USDHC1_ROOT>;
clock-names = "ipg", "ahb", "per";
@@ -867,7 +867,7 @@
"fsl,imx7d-usdhc";
reg = <0x30b50000 0x10000>;
interrupts = <GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk IMX8MQ_CLK_DUMMY>,
+ clocks = <&clk IMX8MQ_CLK_IPG_ROOT>,
<&clk IMX8MQ_CLK_NAND_USDHC_BUS>,
<&clk IMX8MQ_CLK_USDHC2_ROOT>;
clock-names = "ipg", "ahb", "per";
diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
index d105986c6be1..5f350cc71a2f 100644
--- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
+++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts
@@ -60,11 +60,6 @@
gpio = <&gpiosb 0 GPIO_ACTIVE_HIGH>;
};
- usb3_phy: usb3-phy {
- compatible = "usb-nop-xceiv";
- vcc-supply = <&exp_usb3_vbus>;
- };
-
vsdc_reg: vsdc-reg {
compatible = "regulator-gpio";
regulator-name = "vsdc";
@@ -255,10 +250,16 @@
status = "okay";
};
+&comphy2 {
+ connector {
+ compatible = "usb-a-connector";
+ phy-supply = <&exp_usb3_vbus>;
+ };
+};
+
&usb3 {
status = "okay";
phys = <&comphy2 0>;
- usb-phy = <&usb3_phy>;
};
&mdio {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
index e152b0ca0290..b8066868a3fe 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-kevin.dts
@@ -44,7 +44,7 @@
power-supply = <&pp3300_disp>;
panel-timing {
- clock-frequency = <266604720>;
+ clock-frequency = <266666667>;
hactive = <2400>;
hfront-porch = <48>;
hback-porch = <84>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts b/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts
index 0d1f5f9a0de9..c133e8d64b2a 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-hugsun-x99.dts
@@ -644,7 +644,7 @@
status = "okay";
u2phy0_host: host-port {
- phy-supply = <&vcc5v0_host>;
+ phy-supply = <&vcc5v0_typec>;
status = "okay";
};
@@ -712,7 +712,7 @@
&usbdrd_dwc3_0 {
status = "okay";
- dr_mode = "otg";
+ dr_mode = "host";
};
&usbdrd3_1 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts
index 0401d4ec1f45..e544deb61d28 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts
@@ -173,7 +173,7 @@
regulator-always-on;
regulator-boot-on;
regulator-min-microvolt = <800000>;
- regulator-max-microvolt = <1400000>;
+ regulator-max-microvolt = <1700000>;
vin-supply = <&vcc5v0_sys>;
};
};
@@ -247,8 +247,8 @@
rk808: pmic@1b {
compatible = "rockchip,rk808";
reg = <0x1b>;
- interrupt-parent = <&gpio1>;
- interrupts = <21 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <10 IRQ_TYPE_LEVEL_LOW>;
#clock-cells = <1>;
clock-output-names = "xin32k", "rk808-clkout2";
pinctrl-names = "default";
@@ -574,7 +574,7 @@
pmic {
pmic_int_l: pmic-int-l {
- rockchip,pins = <1 RK_PC5 RK_FUNC_GPIO &pcfg_pull_up>;
+ rockchip,pins = <3 RK_PB2 RK_FUNC_GPIO &pcfg_pull_up>;
};
vsel1_gpio: vsel1-gpio {
@@ -624,7 +624,6 @@
&sdmmc {
bus-width = <4>;
- cap-mmc-highspeed;
cap-sd-highspeed;
cd-gpios = <&gpio0 7 GPIO_ACTIVE_LOW>;
disable-wp;
@@ -636,8 +635,7 @@
&sdhci {
bus-width = <8>;
- mmc-hs400-1_8v;
- mmc-hs400-enhanced-strobe;
+ mmc-hs200-1_8v;
non-removable;
status = "okay";
};
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index f74909ba29bd..f68a0e64482a 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -58,30 +58,4 @@ alternative_else_nop_endif
.endm
#endif
-/*
- * These macros are no-ops when UAO is present.
- */
- .macro uaccess_disable_not_uao, tmp1, tmp2
- uaccess_ttbr0_disable \tmp1, \tmp2
-alternative_if ARM64_ALT_PAN_NOT_UAO
- SET_PSTATE_PAN(1)
-alternative_else_nop_endif
- .endm
-
- .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3
- uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3
-alternative_if ARM64_ALT_PAN_NOT_UAO
- SET_PSTATE_PAN(0)
-alternative_else_nop_endif
- .endm
-
-/*
- * Remove the address tag from a virtual address, if present.
- */
- .macro clear_address_tag, dst, addr
- tst \addr, #(1 << 55)
- bic \dst, \addr, #(0xff << 56)
- csel \dst, \dst, \addr, eq
- .endm
-
#endif
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index c6bd87d2915b..574808b9df4c 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -321,7 +321,8 @@ static inline s64 __lse_atomic64_dec_if_positive(atomic64_t *v)
}
#define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...) \
-static inline u##sz __lse__cmpxchg_case_##name##sz(volatile void *ptr, \
+static __always_inline u##sz \
+__lse__cmpxchg_case_##name##sz(volatile void *ptr, \
u##sz old, \
u##sz new) \
{ \
@@ -362,7 +363,8 @@ __CMPXCHG_CASE(x, , mb_, 64, al, "memory")
#undef __CMPXCHG_CASE
#define __CMPXCHG_DBL(name, mb, cl...) \
-static inline long __lse__cmpxchg_double##name(unsigned long old1, \
+static __always_inline long \
+__lse__cmpxchg_double##name(unsigned long old1, \
unsigned long old2, \
unsigned long new1, \
unsigned long new2, \
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index e0e2b1946f42..7d9cc5ec4971 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -29,6 +29,18 @@
SB_BARRIER_INSN"nop\n", \
ARM64_HAS_SB))
+#ifdef CONFIG_ARM64_PSEUDO_NMI
+#define pmr_sync() \
+ do { \
+ extern struct static_key_false gic_pmr_sync; \
+ \
+ if (static_branch_unlikely(&gic_pmr_sync)) \
+ dsb(sy); \
+ } while(0)
+#else
+#define pmr_sync() do {} while (0)
+#endif
+
#define mb() dsb(sy)
#define rmb() dsb(ld)
#define wmb() dsb(st)
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 43da6dd29592..806e9dc2a852 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -11,6 +11,7 @@
#define CTR_L1IP_MASK 3
#define CTR_DMINLINE_SHIFT 16
#define CTR_IMINLINE_SHIFT 0
+#define CTR_IMINLINE_MASK 0xf
#define CTR_ERG_SHIFT 20
#define CTR_CWG_SHIFT 24
#define CTR_CWG_MASK 15
@@ -18,7 +19,7 @@
#define CTR_DIC_SHIFT 29
#define CTR_CACHE_MINLINE_MASK \
- (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT)
+ (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT)
#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK)
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index f19fe4b9acc4..b92683871119 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -52,7 +52,11 @@
#define ARM64_HAS_IRQ_PRIO_MASKING 42
#define ARM64_HAS_DCPODP 43
#define ARM64_WORKAROUND_1463225 44
+#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45
+#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46
+#define ARM64_WORKAROUND_1542419 47
+#define ARM64_WORKAROUND_1319367 48
-#define ARM64_NCAPS 45
+#define ARM64_NCAPS 49
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 9cde5d2e768f..4261d55e8506 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -659,6 +659,20 @@ static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
default: return CONFIG_ARM64_PA_BITS;
}
}
+
+/* Check whether hardware update of the Access flag is supported */
+static inline bool cpu_has_hw_af(void)
+{
+ u64 mmfr1;
+
+ if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM))
+ return false;
+
+ mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
+ return cpuid_feature_extract_unsigned_field(mmfr1,
+ ID_AA64MMFR1_HADBS_SHIFT);
+}
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index b1454d117cd2..aca07c2f6e6e 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -79,6 +79,7 @@
#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
#define CAVIUM_CPU_PART_THUNDERX2 0x0AF
+#define BRCM_CPU_PART_BRAHMA_B53 0x100
#define BRCM_CPU_PART_VULCAN 0x516
#define QCOM_CPU_PART_FALKOR_V1 0x800
@@ -105,6 +106,7 @@
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
+#define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53)
#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h
index 063c964af705..72acd2db167f 100644
--- a/arch/arm64/include/asm/daifflags.h
+++ b/arch/arm64/include/asm/daifflags.h
@@ -8,7 +8,9 @@
#include <linux/irqflags.h>
#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
#include <asm/cpufeature.h>
+#include <asm/ptrace.h>
#define DAIF_PROCCTX 0
#define DAIF_PROCCTX_NOIRQ PSR_I_BIT
@@ -65,7 +67,7 @@ static inline void local_daif_restore(unsigned long flags)
if (system_uses_irq_prio_masking()) {
gic_write_pmr(GIC_PRIO_IRQON);
- dsb(sy);
+ pmr_sync();
}
} else if (system_uses_irq_prio_masking()) {
u64 pmr;
@@ -109,4 +111,19 @@ static inline void local_daif_restore(unsigned long flags)
trace_hardirqs_off();
}
+/*
+ * Called by synchronous exception handlers to restore the DAIF bits that were
+ * modified by taking an exception.
+ */
+static inline void local_daif_inherit(struct pt_regs *regs)
+{
+ unsigned long flags = regs->pstate & DAIF_MASK;
+
+ /*
+ * We can't use local_daif_restore(regs->pstate) here as
+ * system_has_prio_mask_debugging() won't restore the I bit if it can
+ * use the pmr instead.
+ */
+ write_sysreg(flags, daif);
+}
#endif
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index a17393ff6677..4d5f3b5f50cd 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -8,14 +8,15 @@
#define __ASM_EXCEPTION_H
#include <asm/esr.h>
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
#include <linux/interrupt.h>
-#define __exception __attribute__((section(".exception.text")))
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#define __exception_irq_entry __irq_entry
#else
-#define __exception_irq_entry __exception
+#define __exception_irq_entry __kprobes
#endif
static inline u32 disr_to_esr(u64 disr)
@@ -31,5 +32,22 @@ static inline u32 disr_to_esr(u64 disr)
}
asmlinkage void enter_from_user_mode(void);
+void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void do_undefinstr(struct pt_regs *regs);
+asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr);
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+ struct pt_regs *regs);
+void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
+void do_sve_acc(unsigned int esr, struct pt_regs *regs);
+void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
+void do_sysinstr(unsigned int esr, struct pt_regs *regs);
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr);
+void do_cp15instr(unsigned int esr, struct pt_regs *regs);
+void el0_svc_handler(struct pt_regs *regs);
+void el0_svc_compat_handler(struct pt_regs *regs);
+void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr,
+ struct pt_regs *regs);
#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index d48667b04c41..91fa4baa1a93 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -11,9 +11,20 @@
#include <asm/insn.h>
#define HAVE_FUNCTION_GRAPH_FP_TEST
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#else
#define MCOUNT_ADDR ((unsigned long)_mcount)
+#endif
+
+/* The BL at the callsite's adjusted rec->ip */
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
+#define FTRACE_PLT_IDX 0
+#define FTRACE_REGS_PLT_IDX 1
+#define NR_FTRACE_PLTS 2
+
/*
* Currently, gcc tends to save the link register after the local variables
* on the stack. This causes the max stack tracer to report the function
@@ -44,12 +55,24 @@ extern void return_to_handler(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
/*
+ * Adjust addr to point at the BL in the callsite.
+ * See ftrace_init_nop() for the callsite sequence.
+ */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ return addr + AARCH64_INSN_SIZE;
+ /*
* addr is the address of the mcount call instruction.
* recordmcount does the necessary offset calculation.
*/
return addr;
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+struct dyn_ftrace;
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+#endif
+
#define ftrace_return_address(n) return_address(n)
/*
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 39e7780bedd6..bb313dde58a4 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -440,6 +440,9 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
int shift,
enum aarch64_insn_variant variant,
enum aarch64_insn_logic_type type);
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_variant variant);
u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type,
enum aarch64_insn_variant variant,
enum aarch64_insn_register Rn,
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 1a59f0ed1ae3..aa4b6521ef14 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -6,6 +6,7 @@
#define __ASM_IRQFLAGS_H
#include <asm/alternative.h>
+#include <asm/barrier.h>
#include <asm/ptrace.h>
#include <asm/sysreg.h>
@@ -34,14 +35,14 @@ static inline void arch_local_irq_enable(void)
}
asm volatile(ALTERNATIVE(
- "msr daifclr, #2 // arch_local_irq_enable\n"
- "nop",
- __msr_s(SYS_ICC_PMR_EL1, "%0")
- "dsb sy",
+ "msr daifclr, #2 // arch_local_irq_enable",
+ __msr_s(SYS_ICC_PMR_EL1, "%0"),
ARM64_HAS_IRQ_PRIO_MASKING)
:
: "r" ((unsigned long) GIC_PRIO_IRQON)
: "memory");
+
+ pmr_sync();
}
static inline void arch_local_irq_disable(void)
@@ -116,14 +117,14 @@ static inline unsigned long arch_local_irq_save(void)
static inline void arch_local_irq_restore(unsigned long flags)
{
asm volatile(ALTERNATIVE(
- "msr daif, %0\n"
- "nop",
- __msr_s(SYS_ICC_PMR_EL1, "%0")
- "dsb sy",
- ARM64_HAS_IRQ_PRIO_MASKING)
+ "msr daif, %0",
+ __msr_s(SYS_ICC_PMR_EL1, "%0"),
+ ARM64_HAS_IRQ_PRIO_MASKING)
:
: "r" (flags)
: "memory");
+
+ pmr_sync();
}
#endif /* __ASM_IRQFLAGS_H */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f656169db8c3..5ecb091c8576 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -600,8 +600,7 @@ static inline void kvm_arm_vhe_guest_enter(void)
* local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a
* dsb to ensure the redistributor is forwards EL2 IRQs to the CPU.
*/
- if (system_uses_irq_prio_masking())
- dsb(sy);
+ pmr_sync();
}
static inline void kvm_arm_vhe_guest_exit(void)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index b61b50bf68b1..a4f9ca5479b0 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -69,12 +69,6 @@
#define KERNEL_START _text
#define KERNEL_END _end
-#ifdef CONFIG_ARM64_VA_BITS_52
-#define MAX_USER_VA_BITS 52
-#else
-#define MAX_USER_VA_BITS VA_BITS
-#endif
-
/*
* Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual
* address space for the shadow region respectively. They can bloat the stack
@@ -215,12 +209,18 @@ static inline unsigned long kaslr_offset(void)
* up with a tagged userland pointer. Clear the tag to get a sane pointer to
* pass on to access_ok(), for instance.
*/
-#define untagged_addr(addr) \
+#define __untagged_addr(addr) \
((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55))
+#define untagged_addr(addr) ({ \
+ u64 __addr = (__force u64)addr; \
+ __addr &= __untagged_addr(__addr); \
+ (__force __typeof__(addr))__addr; \
+})
+
#ifdef CONFIG_KASAN_SW_TAGS
#define __tag_shifted(tag) ((u64)(tag) << 56)
-#define __tag_reset(addr) untagged_addr(addr)
+#define __tag_reset(addr) __untagged_addr(addr)
#define __tag_get(addr) (__u8)((u64)(addr) >> 56)
#else
#define __tag_shifted(tag) 0UL
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index f80e13cbf8ec..1e93de68c044 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -21,7 +21,7 @@ struct mod_arch_specific {
struct mod_plt_sec init;
/* for CONFIG_DYNAMIC_FTRACE */
- struct plt_entry *ftrace_trampoline;
+ struct plt_entry *ftrace_trampolines;
};
#endif
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index 3df60f97da1f..d9fbd433cc17 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -69,7 +69,7 @@
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD (1 << (MAX_USER_VA_BITS - PGDIR_SHIFT))
+#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
/*
* Section address mask and size definitions.
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 9a21b84536f2..8dc6c5cdabe6 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -32,11 +32,11 @@
#define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG)
#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG)
-#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -80,8 +80,9 @@
#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
-#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
+/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */
+#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 7576df00eb50..5d15b4735a0e 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -17,7 +17,7 @@
* VMALLOC range.
*
* VMALLOC_START: beginning of the kernel vmalloc space
- * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space
+ * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
* and fixed mappings
*/
#define VMALLOC_START (MODULES_END)
@@ -283,23 +283,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
set_pte(ptep, pte);
}
-#define __HAVE_ARCH_PTE_SAME
-static inline int pte_same(pte_t pte_a, pte_t pte_b)
-{
- pteval_t lhs, rhs;
-
- lhs = pte_val(pte_a);
- rhs = pte_val(pte_b);
-
- if (pte_present(pte_a))
- lhs &= ~PTE_RDONLY;
-
- if (pte_present(pte_b))
- rhs &= ~PTE_RDONLY;
-
- return (lhs == rhs);
-}
-
/*
* Huge pte definitions.
*/
@@ -876,15 +859,26 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-#define kc_vaddr_to_offset(v) ((v) & ~PAGE_END)
-#define kc_offset_to_vaddr(o) ((o) | PAGE_END)
-
#ifdef CONFIG_ARM64_PA_BITS_52
#define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52)
#else
#define phys_to_ttbr(addr) (addr)
#endif
+/*
+ * On arm64 without hardware Access Flag, copying from user will fail because
+ * the pte is old and cannot be marked young. So we always end up with zeroed
+ * page after fork() + CoW for pfn mappings. We don't always have a
+ * hardware-managed access flag on arm64.
+ */
+static inline bool arch_faults_on_old_pte(void)
+{
+ WARN_ON(preemptible());
+
+ return !cpu_has_hw_af();
+}
+#define arch_faults_on_old_pte arch_faults_on_old_pte
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5623685c7d13..5ba63204d078 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -9,7 +9,7 @@
#define __ASM_PROCESSOR_H
#define KERNEL_DS UL(-1)
-#define USER_DS ((UL(1) << MAX_USER_VA_BITS) - 1)
+#define USER_DS ((UL(1) << VA_BITS) - 1)
/*
* On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
@@ -26,10 +26,12 @@
#include <linux/init.h>
#include <linux/stddef.h>
#include <linux/string.h>
+#include <linux/thread_info.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/hw_breakpoint.h>
+#include <asm/kasan.h>
#include <asm/lse.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pointer_auth.h>
@@ -214,6 +216,18 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc,
regs->sp = sp;
}
+static inline bool is_ttbr0_addr(unsigned long addr)
+{
+ /* entry assembly clears tags for TTBR0 addrs */
+ return addr < TASK_SIZE;
+}
+
+static inline bool is_ttbr1_addr(unsigned long addr)
+{
+ /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
+ return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
+}
+
#ifdef CONFIG_COMPAT
static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h
index 06d880b3526c..b383b4802a7b 100644
--- a/arch/arm64/include/asm/syscall_wrapper.h
+++ b/arch/arm64/include/asm/syscall_wrapper.h
@@ -66,24 +66,18 @@ struct pt_regs;
} \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
-#ifndef SYSCALL_DEFINE0
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused); \
ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \
asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
-#endif
-#ifndef COND_SYSCALL
#define COND_SYSCALL(name) \
asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \
{ \
return sys_ni_syscall(); \
}
-#endif
-#ifndef SYS_NI
#define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers);
-#endif
#endif /* __ASM_SYSCALL_WRAPPER_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 972d196c7714..6e919fafb43d 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -212,7 +212,7 @@
#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0)
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
-#define SYS_PAR_EL1_F BIT(1)
+#define SYS_PAR_EL1_F BIT(0)
#define SYS_PAR_EL1_FST GENMASK(6, 1)
/*** Statistical Profiling Extension ***/
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 59690613ac31..cee5928e1b7d 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -42,16 +42,6 @@ static inline int __in_irqentry_text(unsigned long ptr)
ptr < (unsigned long)&__irqentry_text_end;
}
-static inline int in_exception_text(unsigned long ptr)
-{
- int in;
-
- in = ptr >= (unsigned long)&__exception_text_start &&
- ptr < (unsigned long)&__exception_text_end;
-
- return in ? : __in_irqentry_text(ptr);
-}
-
static inline int in_entry_text(unsigned long ptr)
{
return ptr >= (unsigned long)&__entry_text_start &&
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 097d6bfac0b7..127712b0b970 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -378,20 +378,34 @@ do { \
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
#define raw_copy_from_user(to, from, n) \
({ \
- __arch_copy_from_user((to), __uaccess_mask_ptr(from), (n)); \
+ unsigned long __acfu_ret; \
+ uaccess_enable_not_uao(); \
+ __acfu_ret = __arch_copy_from_user((to), \
+ __uaccess_mask_ptr(from), (n)); \
+ uaccess_disable_not_uao(); \
+ __acfu_ret; \
})
extern unsigned long __must_check __arch_copy_to_user(void __user *to, const void *from, unsigned long n);
#define raw_copy_to_user(to, from, n) \
({ \
- __arch_copy_to_user(__uaccess_mask_ptr(to), (from), (n)); \
+ unsigned long __actu_ret; \
+ uaccess_enable_not_uao(); \
+ __actu_ret = __arch_copy_to_user(__uaccess_mask_ptr(to), \
+ (from), (n)); \
+ uaccess_disable_not_uao(); \
+ __actu_ret; \
})
extern unsigned long __must_check __arch_copy_in_user(void __user *to, const void __user *from, unsigned long n);
#define raw_copy_in_user(to, from, n) \
({ \
- __arch_copy_in_user(__uaccess_mask_ptr(to), \
- __uaccess_mask_ptr(from), (n)); \
+ unsigned long __aciu_ret; \
+ uaccess_enable_not_uao(); \
+ __aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to), \
+ __uaccess_mask_ptr(from), (n)); \
+ uaccess_disable_not_uao(); \
+ __aciu_ret; \
})
#define INLINE_COPY_TO_USER
@@ -400,8 +414,11 @@ extern unsigned long __must_check __arch_copy_in_user(void __user *to, const voi
extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned long n);
static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n)
{
- if (access_ok(to, n))
+ if (access_ok(to, n)) {
+ uaccess_enable_not_uao();
n = __arch_clear_user(__uaccess_mask_ptr(to), n);
+ uaccess_disable_not_uao();
+ }
return n;
}
#define clear_user __clear_user
diff --git a/arch/arm64/include/asm/vdso/compat_barrier.h b/arch/arm64/include/asm/vdso/compat_barrier.h
index fb60a88b5ed4..3fd8fd6d8fc2 100644
--- a/arch/arm64/include/asm/vdso/compat_barrier.h
+++ b/arch/arm64/include/asm/vdso/compat_barrier.h
@@ -20,7 +20,7 @@
#define dmb(option) __asm__ __volatile__ ("dmb " #option : : : "memory")
-#if __LINUX_ARM_ARCH__ >= 8
+#if __LINUX_ARM_ARCH__ >= 8 && defined(CONFIG_AS_DMB_ISHLD)
#define aarch32_smp_mb() dmb(ish)
#define aarch32_smp_rmb() dmb(ishld)
#define aarch32_smp_wmb() dmb(ishst)
diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h
index 0c731bfc7c8c..0c20a7c1bee5 100644
--- a/arch/arm64/include/asm/vdso/vsyscall.h
+++ b/arch/arm64/include/asm/vdso/vsyscall.h
@@ -31,13 +31,6 @@ int __arm64_get_clock_mode(struct timekeeper *tk)
#define __arch_get_clock_mode __arm64_get_clock_mode
static __always_inline
-int __arm64_use_vsyscall(struct vdso_data *vdata)
-{
- return !vdata[CS_HRES_COARSE].clock_mode;
-}
-#define __arch_use_vsyscall __arm64_use_vsyscall
-
-static __always_inline
void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
{
vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;
diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
deleted file mode 100644
index 1f38bf330a6e..000000000000
--- a/arch/arm64/include/asm/vdso_datapage.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2012 ARM Limited
- */
-#ifndef __ASM_VDSO_DATAPAGE_H
-#define __ASM_VDSO_DATAPAGE_H
-
-#ifndef __ASSEMBLY__
-
-struct vdso_data {
- __u64 cs_cycle_last; /* Timebase at clocksource init */
- __u64 raw_time_sec; /* Raw time */
- __u64 raw_time_nsec;
- __u64 xtime_clock_sec; /* Kernel time */
- __u64 xtime_clock_nsec;
- __u64 xtime_coarse_sec; /* Coarse time */
- __u64 xtime_coarse_nsec;
- __u64 wtm_clock_sec; /* Wall to monotonic time */
- __u64 wtm_clock_nsec;
- __u32 tb_seq_count; /* Timebase sequence counter */
- /* cs_* members must be adjacent and in this order (ldp accesses) */
- __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */
- __u32 cs_shift; /* Clocksource shift (mono = raw) */
- __u32 cs_raw_mult; /* Raw clocksource multiplier */
- __u32 tz_minuteswest; /* Whacky timezone stuff */
- __u32 tz_dsttime;
- __u32 use_syscall;
- __u32 hrtimer_res;
-};
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* __ASM_VDSO_DATAPAGE_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 478491f07b4f..fc6488660f64 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -13,9 +13,9 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
# Object file lists.
obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
- entry-fpsimd.o process.o ptrace.o setup.o signal.o \
- sys.o stacktrace.o time.o traps.o io.o vdso.o \
- hyp-stub.o psci.o cpu_ops.o insn.o \
+ entry-common.o entry-fpsimd.o process.o ptrace.o \
+ setup.o signal.o sys.o stacktrace.o time.o traps.o \
+ io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o \
return_address.o cpuinfo.o cpu_errata.o \
cpufeature.o alternative.o cacheinfo.o \
smp.o smp_spin_table.o topology.o smccc-call.o \
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index 2ec09debc2bb..ca158be21f83 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -174,6 +174,9 @@ static void __init register_insn_emulation(struct insn_emulation_ops *ops)
struct insn_emulation *insn;
insn = kzalloc(sizeof(*insn), GFP_KERNEL);
+ if (!insn)
+ return;
+
insn->ops = ops;
insn->min = INSN_UNDEF;
@@ -233,6 +236,8 @@ static void __init register_insn_emulation_sysctl(void)
insns_sysctl = kcalloc(nr_insn_emulated + 1, sizeof(*sysctl),
GFP_KERNEL);
+ if (!insns_sysctl)
+ return;
raw_spin_lock_irqsave(&insn_emulation_lock, flags);
list_for_each_entry(insn, &insn_emulation, node) {
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 214685760e1c..a5bdce8af65b 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -56,6 +56,7 @@ int main(void)
DEFINE(S_X24, offsetof(struct pt_regs, regs[24]));
DEFINE(S_X26, offsetof(struct pt_regs, regs[26]));
DEFINE(S_X28, offsetof(struct pt_regs, regs[28]));
+ DEFINE(S_FP, offsetof(struct pt_regs, regs[29]));
DEFINE(S_LR, offsetof(struct pt_regs, regs[30]));
DEFINE(S_SP, offsetof(struct pt_regs, sp));
DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate));
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 1e43ba5c79b7..bbdc95ee4642 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -6,12 +6,12 @@
*/
#include <linux/arm-smccc.h>
-#include <linux/psci.h>
#include <linux/types.h>
#include <linux/cpu.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/cpufeature.h>
+#include <asm/smp_plat.h>
static bool __maybe_unused
is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
@@ -87,13 +87,21 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry,
}
static void
-cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
+cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap)
{
u64 mask = arm64_ftr_reg_ctrel0.strict_mask;
+ bool enable_uct_trap = false;
/* Trap CTR_EL0 access on this CPU, only if it has a mismatch */
if ((read_cpuid_cachetype() & mask) !=
(arm64_ftr_reg_ctrel0.sys_val & mask))
+ enable_uct_trap = true;
+
+ /* ... or if the system is affected by an erratum */
+ if (cap->capability == ARM64_WORKAROUND_1542419)
+ enable_uct_trap = true;
+
+ if (enable_uct_trap)
sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0);
}
@@ -128,8 +136,8 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
int cpu, slot = -1;
/*
- * enable_smccc_arch_workaround_1() passes NULL for the hyp_vecs
- * start/end if we're a guest. Skip the hyp-vectors work.
+ * detect_harden_bp_fw() passes NULL for the hyp_vecs start/end if
+ * we're a guest. Skip the hyp-vectors work.
*/
if (!hyp_vecs_start) {
__this_cpu_write(bp_hardening_data.fn, fn);
@@ -166,9 +174,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
}
#endif /* CONFIG_KVM_INDIRECT_VECTORS */
-#include <uapi/linux/psci.h>
#include <linux/arm-smccc.h>
-#include <linux/psci.h>
static void call_smc_arch_workaround_1(void)
{
@@ -212,11 +218,8 @@ static int detect_harden_bp_fw(void)
struct arm_smccc_res res;
u32 midr = read_cpuid_id();
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
- return -1;
-
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
switch ((int)res.a0) {
@@ -234,7 +237,7 @@ static int detect_harden_bp_fw(void)
}
break;
- case PSCI_CONDUIT_SMC:
+ case SMCCC_CONDUIT_SMC:
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
switch ((int)res.a0) {
@@ -308,11 +311,11 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt,
BUG_ON(nr_inst != 1);
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
insn = aarch64_insn_get_hvc_value();
break;
- case PSCI_CONDUIT_SMC:
+ case SMCCC_CONDUIT_SMC:
insn = aarch64_insn_get_smc_value();
break;
default:
@@ -351,12 +354,12 @@ void arm64_set_ssbd_mitigation(bool state)
return;
}
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
break;
- case PSCI_CONDUIT_SMC:
+ case SMCCC_CONDUIT_SMC:
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
break;
@@ -390,20 +393,13 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
goto out_printmsg;
}
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
- ssbd_state = ARM64_SSBD_UNKNOWN;
- if (!this_cpu_safe)
- __ssb_safe = false;
- return false;
- }
-
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_ARCH_WORKAROUND_2, &res);
break;
- case PSCI_CONDUIT_SMC:
+ case SMCCC_CONDUIT_SMC:
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_ARCH_WORKAROUND_2, &res);
break;
@@ -488,6 +484,7 @@ static const struct midr_range arm64_ssb_cpus[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
{},
};
@@ -572,6 +569,7 @@ static const struct midr_range spectre_v2_safe_list[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
{ /* sentinel */ }
};
@@ -623,9 +621,45 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
return (need_wa > 0);
}
-#ifdef CONFIG_HARDEN_EL2_VECTORS
+static const __maybe_unused struct midr_range tx2_family_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+ MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
+ {},
+};
+
+static bool __maybe_unused
+needs_tx2_tvm_workaround(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ int i;
+
+ if (!is_affected_midr_range_list(entry, scope) ||
+ !is_hyp_mode_available())
+ return false;
+
+ for_each_possible_cpu(i) {
+ if (MPIDR_AFFINITY_LEVEL(cpu_logical_map(i), 0) != 0)
+ return true;
+ }
-static const struct midr_range arm64_harden_el2_vectors[] = {
+ return false;
+}
+
+static bool __maybe_unused
+has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry,
+ int scope)
+{
+ u32 midr = read_cpuid_id();
+ bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT);
+ const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1);
+
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+ return is_midr_in_range(midr, &range) && has_dic;
+}
+
+#if defined(CONFIG_HARDEN_EL2_VECTORS) || defined(CONFIG_ARM64_ERRATUM_1319367)
+
+static const struct midr_range ca57_a72[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
{},
@@ -634,17 +668,23 @@ static const struct midr_range arm64_harden_el2_vectors[] = {
#endif
#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
-
-static const struct midr_range arm64_repeat_tlbi_cpus[] = {
+static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = {
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
- MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0, 0, 0, 0),
+ {
+ ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0)
+ },
+ {
+ .midr_range.model = MIDR_QCOM_KRYO,
+ .matches = is_kryo_midr,
+ },
#endif
#ifdef CONFIG_ARM64_ERRATUM_1286807
- MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+ {
+ ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0),
+ },
#endif
{},
};
-
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_27456
@@ -712,6 +752,33 @@ static const struct midr_range erratum_1418040_list[] = {
};
#endif
+#ifdef CONFIG_ARM64_ERRATUM_845719
+static const struct midr_range erratum_845719_list[] = {
+ /* Cortex-A53 r0p[01234] */
+ MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
+ /* Brahma-B53 r0p[0] */
+ MIDR_REV(MIDR_BRAHMA_B53, 0, 0),
+ {},
+};
+#endif
+
+#ifdef CONFIG_ARM64_ERRATUM_843419
+static const struct arm64_cpu_capabilities erratum_843419_list[] = {
+ {
+ /* Cortex-A53 r0p[01234] */
+ .matches = is_affected_midr_range,
+ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
+ MIDR_FIXED(0x4, BIT(8)),
+ },
+ {
+ /* Brahma-B53 r0p[0] */
+ .matches = is_affected_midr_range,
+ ERRATA_MIDR_REV(MIDR_BRAHMA_B53, 0, 0),
+ },
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -743,19 +810,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
#endif
#ifdef CONFIG_ARM64_ERRATUM_843419
{
- /* Cortex-A53 r0p[01234] */
.desc = "ARM erratum 843419",
.capability = ARM64_WORKAROUND_843419,
- ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
- MIDR_FIXED(0x4, BIT(8)),
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = cpucap_multi_entry_cap_matches,
+ .match_list = erratum_843419_list,
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_845719
{
- /* Cortex-A53 r0p[01234] */
.desc = "ARM erratum 845719",
.capability = ARM64_WORKAROUND_845719,
- ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4),
+ ERRATA_MIDR_RANGE_LIST(erratum_845719_list),
},
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_23154
@@ -791,6 +857,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.desc = "Qualcomm Technologies Falkor/Kryo erratum 1003",
.capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = cpucap_multi_entry_cap_matches,
.match_list = qcom_erratum_1003_list,
},
@@ -799,7 +866,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.desc = "Qualcomm erratum 1009, ARM erratum 1286807",
.capability = ARM64_WORKAROUND_REPEAT_TLBI,
- ERRATA_MIDR_RANGE_LIST(arm64_repeat_tlbi_cpus),
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = cpucap_multi_entry_cap_matches,
+ .match_list = arm64_repeat_tlbi_list,
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_858921
@@ -819,7 +888,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
.desc = "EL2 vector hardening",
.capability = ARM64_HARDEN_EL2_VECTORS,
- ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
+ ERRATA_MIDR_RANGE_LIST(ca57_a72),
},
#endif
{
@@ -852,6 +921,36 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.matches = has_cortex_a76_erratum_1463225,
},
#endif
+#ifdef CONFIG_CAVIUM_TX2_ERRATUM_219
+ {
+ .desc = "Cavium ThunderX2 erratum 219 (KVM guest sysreg trapping)",
+ .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_TVM,
+ ERRATA_MIDR_RANGE_LIST(tx2_family_cpus),
+ .matches = needs_tx2_tvm_workaround,
+ },
+ {
+ .desc = "Cavium ThunderX2 erratum 219 (PRFM removal)",
+ .capability = ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM,
+ ERRATA_MIDR_RANGE_LIST(tx2_family_cpus),
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1542419
+ {
+ /* we depend on the firmware portion for correctness */
+ .desc = "ARM erratum 1542419 (kernel portion)",
+ .capability = ARM64_WORKAROUND_1542419,
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = has_neoverse_n1_erratum_1542419,
+ .cpu_enable = cpu_enable_trap_ctr_access,
+ },
+#endif
+#ifdef CONFIG_ARM64_ERRATUM_1319367
+ {
+ .desc = "ARM erratum 1319367",
+ .capability = ARM64_WORKAROUND_1319367,
+ ERRATA_MIDR_RANGE_LIST(ca57_a72),
+ },
+#endif
{
}
};
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 9323bcc40a58..04cf64e9f0c9 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -136,6 +136,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FRINTTS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_GPI_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
@@ -175,11 +176,16 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
- ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
+ FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
ARM64_FTR_END,
};
@@ -976,6 +982,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
+ MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
{ /* sentinel */ }
};
char const *str = "kpti command line option";
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 05933c065732..56bba746da1c 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -329,7 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
info->reg_cntfrq = arch_timer_get_cntfrq();
/*
* Use the effective value of the CTR_EL0 than the raw value
- * exposed by the CPU. CTR_E0.IDC field value must be interpreted
+ * exposed by the CPU. CTR_EL0.IDC field value must be interpreted
* with the CLIDR_EL1 fields to avoid triggering false warnings
* when there is a mismatch across the CPUs. Keep track of the
* effective value of the CTR_EL0 in our internal records for
diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c
new file mode 100644
index 000000000000..5dce5e56995a
--- /dev/null
+++ b/arch/arm64/kernel/entry-common.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Exception handling code
+ *
+ * Copyright (C) 2019 ARM Ltd.
+ */
+
+#include <linux/context_tracking.h>
+#include <linux/ptrace.h>
+#include <linux/thread_info.h>
+
+#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
+#include <asm/esr.h>
+#include <asm/exception.h>
+#include <asm/kprobes.h>
+#include <asm/mmu.h>
+#include <asm/sysreg.h>
+
+static void notrace el1_abort(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ local_daif_inherit(regs);
+ far = untagged_addr(far);
+ do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_abort);
+
+static void notrace el1_pc(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ local_daif_inherit(regs);
+ do_sp_pc_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_pc);
+
+static void el1_undef(struct pt_regs *regs)
+{
+ local_daif_inherit(regs);
+ do_undefinstr(regs);
+}
+NOKPROBE_SYMBOL(el1_undef);
+
+static void el1_inv(struct pt_regs *regs, unsigned long esr)
+{
+ local_daif_inherit(regs);
+ bad_mode(regs, 0, esr);
+}
+NOKPROBE_SYMBOL(el1_inv);
+
+static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ /*
+ * The CPU masked interrupts, and we are leaving them masked during
+ * do_debug_exception(). Update PMR as if we had called
+ * local_mask_daif().
+ */
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+ do_debug_exception(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el1_dbg);
+
+asmlinkage void notrace el1_sync_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_DABT_CUR:
+ case ESR_ELx_EC_IABT_CUR:
+ el1_abort(regs, esr);
+ break;
+ /*
+ * We don't handle ESR_ELx_EC_SP_ALIGN, since we will have hit a
+ * recursive exception when trying to push the initial pt_regs.
+ */
+ case ESR_ELx_EC_PC_ALIGN:
+ el1_pc(regs, esr);
+ break;
+ case ESR_ELx_EC_SYS64:
+ case ESR_ELx_EC_UNKNOWN:
+ el1_undef(regs);
+ break;
+ case ESR_ELx_EC_BREAKPT_CUR:
+ case ESR_ELx_EC_SOFTSTP_CUR:
+ case ESR_ELx_EC_WATCHPT_CUR:
+ case ESR_ELx_EC_BRK64:
+ el1_dbg(regs, esr);
+ break;
+ default:
+ el1_inv(regs, esr);
+ };
+}
+NOKPROBE_SYMBOL(el1_sync_handler);
+
+static void notrace el0_da(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ far = untagged_addr(far);
+ do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_da);
+
+static void notrace el0_ia(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ /*
+ * We've taken an instruction abort from userspace and not yet
+ * re-enabled IRQs. If the address is a kernel address, apply
+ * BP hardening prior to enabling IRQs and pre-emption.
+ */
+ if (!is_ttbr0_addr(far))
+ arm64_apply_bp_hardening();
+
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_mem_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_ia);
+
+static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_fpsimd_acc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_fpsimd_acc);
+
+static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_sve_acc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sve_acc);
+
+static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_fpsimd_exc(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_fpsimd_exc);
+
+static void notrace el0_sys(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_sysinstr(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sys);
+
+static void notrace el0_pc(struct pt_regs *regs, unsigned long esr)
+{
+ unsigned long far = read_sysreg(far_el1);
+
+ if (!is_ttbr0_addr(instruction_pointer(regs)))
+ arm64_apply_bp_hardening();
+
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_sp_pc_abort(far, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_pc);
+
+static void notrace el0_sp(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+ do_sp_pc_abort(regs->sp, esr, regs);
+}
+NOKPROBE_SYMBOL(el0_sp);
+
+static void notrace el0_undef(struct pt_regs *regs)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_undefinstr(regs);
+}
+NOKPROBE_SYMBOL(el0_undef);
+
+static void notrace el0_inv(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ bad_el0_sync(regs, 0, esr);
+}
+NOKPROBE_SYMBOL(el0_inv);
+
+static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr)
+{
+ /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
+ unsigned long far = read_sysreg(far_el1);
+
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+ user_exit_irqoff();
+ do_debug_exception(far, esr, regs);
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+}
+NOKPROBE_SYMBOL(el0_dbg);
+
+static void notrace el0_svc(struct pt_regs *regs)
+{
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+ el0_svc_handler(regs);
+}
+NOKPROBE_SYMBOL(el0_svc);
+
+asmlinkage void notrace el0_sync_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_SVC64:
+ el0_svc(regs);
+ break;
+ case ESR_ELx_EC_DABT_LOW:
+ el0_da(regs, esr);
+ break;
+ case ESR_ELx_EC_IABT_LOW:
+ el0_ia(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_ASIMD:
+ el0_fpsimd_acc(regs, esr);
+ break;
+ case ESR_ELx_EC_SVE:
+ el0_sve_acc(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_EXC64:
+ el0_fpsimd_exc(regs, esr);
+ break;
+ case ESR_ELx_EC_SYS64:
+ case ESR_ELx_EC_WFx:
+ el0_sys(regs, esr);
+ break;
+ case ESR_ELx_EC_SP_ALIGN:
+ el0_sp(regs, esr);
+ break;
+ case ESR_ELx_EC_PC_ALIGN:
+ el0_pc(regs, esr);
+ break;
+ case ESR_ELx_EC_UNKNOWN:
+ el0_undef(regs);
+ break;
+ case ESR_ELx_EC_BREAKPT_LOW:
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ case ESR_ELx_EC_WATCHPT_LOW:
+ case ESR_ELx_EC_BRK64:
+ el0_dbg(regs, esr);
+ break;
+ default:
+ el0_inv(regs, esr);
+ }
+}
+NOKPROBE_SYMBOL(el0_sync_handler);
+
+#ifdef CONFIG_COMPAT
+static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr)
+{
+ user_exit_irqoff();
+ local_daif_restore(DAIF_PROCCTX);
+ do_cp15instr(esr, regs);
+}
+NOKPROBE_SYMBOL(el0_cp15);
+
+static void notrace el0_svc_compat(struct pt_regs *regs)
+{
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+
+ el0_svc_compat_handler(regs);
+}
+NOKPROBE_SYMBOL(el0_svc_compat);
+
+asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs)
+{
+ unsigned long esr = read_sysreg(esr_el1);
+
+ switch (ESR_ELx_EC(esr)) {
+ case ESR_ELx_EC_SVC32:
+ el0_svc_compat(regs);
+ break;
+ case ESR_ELx_EC_DABT_LOW:
+ el0_da(regs, esr);
+ break;
+ case ESR_ELx_EC_IABT_LOW:
+ el0_ia(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_ASIMD:
+ el0_fpsimd_acc(regs, esr);
+ break;
+ case ESR_ELx_EC_FP_EXC32:
+ el0_fpsimd_exc(regs, esr);
+ break;
+ case ESR_ELx_EC_PC_ALIGN:
+ el0_pc(regs, esr);
+ break;
+ case ESR_ELx_EC_UNKNOWN:
+ case ESR_ELx_EC_CP14_MR:
+ case ESR_ELx_EC_CP14_LS:
+ case ESR_ELx_EC_CP14_64:
+ el0_undef(regs);
+ break;
+ case ESR_ELx_EC_CP15_32:
+ case ESR_ELx_EC_CP15_64:
+ el0_cp15(regs, esr);
+ break;
+ case ESR_ELx_EC_BREAKPT_LOW:
+ case ESR_ELx_EC_SOFTSTP_LOW:
+ case ESR_ELx_EC_WATCHPT_LOW:
+ case ESR_ELx_EC_BKPT32:
+ el0_dbg(regs, esr);
+ break;
+ default:
+ el0_inv(regs, esr);
+ }
+}
+NOKPROBE_SYMBOL(el0_sync_compat_handler);
+#endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 33d003d80121..4fe1514fcbfd 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -7,10 +7,137 @@
*/
#include <linux/linkage.h>
+#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/ftrace.h>
#include <asm/insn.h>
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+/*
+ * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
+ * the regular function prologue. For an enabled callsite, ftrace_init_nop() and
+ * ftrace_make_call() have patched those NOPs to:
+ *
+ * MOV X9, LR
+ * BL <entry>
+ *
+ * ... where <entry> is either ftrace_caller or ftrace_regs_caller.
+ *
+ * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are
+ * live, and x9-x18 are safe to clobber.
+ *
+ * We save the callsite's context into a pt_regs before invoking any ftrace
+ * callbacks. So that we can get a sensible backtrace, we create a stack record
+ * for the callsite and the ftrace entry assembly. This is not sufficient for
+ * reliable stacktrace: until we create the callsite stack record, its caller
+ * is missing from the LR and existing chain of frame records.
+ */
+ .macro ftrace_regs_entry, allregs=0
+ /* Make room for pt_regs, plus a callee frame */
+ sub sp, sp, #(S_FRAME_SIZE + 16)
+
+ /* Save function arguments (and x9 for simplicity) */
+ stp x0, x1, [sp, #S_X0]
+ stp x2, x3, [sp, #S_X2]
+ stp x4, x5, [sp, #S_X4]
+ stp x6, x7, [sp, #S_X6]
+ stp x8, x9, [sp, #S_X8]
+
+ /* Optionally save the callee-saved registers, always save the FP */
+ .if \allregs == 1
+ stp x10, x11, [sp, #S_X10]
+ stp x12, x13, [sp, #S_X12]
+ stp x14, x15, [sp, #S_X14]
+ stp x16, x17, [sp, #S_X16]
+ stp x18, x19, [sp, #S_X18]
+ stp x20, x21, [sp, #S_X20]
+ stp x22, x23, [sp, #S_X22]
+ stp x24, x25, [sp, #S_X24]
+ stp x26, x27, [sp, #S_X26]
+ stp x28, x29, [sp, #S_X28]
+ .else
+ str x29, [sp, #S_FP]
+ .endif
+
+ /* Save the callsite's SP and LR */
+ add x10, sp, #(S_FRAME_SIZE + 16)
+ stp x9, x10, [sp, #S_LR]
+
+ /* Save the PC after the ftrace callsite */
+ str x30, [sp, #S_PC]
+
+ /* Create a frame record for the callsite above pt_regs */
+ stp x29, x9, [sp, #S_FRAME_SIZE]
+ add x29, sp, #S_FRAME_SIZE
+
+ /* Create our frame record within pt_regs. */
+ stp x29, x30, [sp, #S_STACKFRAME]
+ add x29, sp, #S_STACKFRAME
+ .endm
+
+ENTRY(ftrace_regs_caller)
+ ftrace_regs_entry 1
+ b ftrace_common
+ENDPROC(ftrace_regs_caller)
+
+ENTRY(ftrace_caller)
+ ftrace_regs_entry 0
+ b ftrace_common
+ENDPROC(ftrace_caller)
+
+ENTRY(ftrace_common)
+ sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
+ mov x1, x9 // parent_ip (callsite's LR)
+ ldr_l x2, function_trace_op // op
+ mov x3, sp // regs
+
+GLOBAL(ftrace_call)
+ bl ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
+ nop // If enabled, this will be replaced
+ // "b ftrace_graph_caller"
+#endif
+
+/*
+ * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved
+ * x19-x29 per the AAPCS, and we created frame records upon entry, so we need
+ * to restore x0-x8, x29, and x30.
+ */
+ftrace_common_return:
+ /* Restore function arguments */
+ ldp x0, x1, [sp]
+ ldp x2, x3, [sp, #S_X2]
+ ldp x4, x5, [sp, #S_X4]
+ ldp x6, x7, [sp, #S_X6]
+ ldr x8, [sp, #S_X8]
+
+ /* Restore the callsite's FP, LR, PC */
+ ldr x29, [sp, #S_FP]
+ ldr x30, [sp, #S_LR]
+ ldr x9, [sp, #S_PC]
+
+ /* Restore the callsite's SP */
+ add sp, sp, #S_FRAME_SIZE + 16
+
+ ret x9
+ENDPROC(ftrace_common)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ ldr x0, [sp, #S_PC]
+ sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
+ add x1, sp, #S_LR // parent_ip (callsite's LR)
+ ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP)
+ bl prepare_ftrace_return
+ b ftrace_common_return
+ENDPROC(ftrace_graph_caller)
+#else
+#endif
+
+#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
/*
* Gcc with -pg will put the following code in the beginning of each function:
* mov x0, x30
@@ -160,11 +287,6 @@ GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
mcount_exit
ENDPROC(ftrace_caller)
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-ENTRY(ftrace_stub)
- ret
-ENDPROC(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
@@ -184,7 +306,15 @@ ENTRY(ftrace_graph_caller)
mcount_exit
ENDPROC(ftrace_graph_caller)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+ENTRY(ftrace_stub)
+ ret
+ENDPROC(ftrace_stub)
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* void return_to_handler(void)
*
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 84a822748c84..583f71abbe98 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -269,8 +269,10 @@ alternative_else_nop_endif
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
ldr x20, [sp, #S_PMR_SAVE]
msr_s SYS_ICC_PMR_EL1, x20
- /* Ensure priority change is seen by redistributor */
- dsb sy
+ mrs_s x21, SYS_ICC_CTLR_EL1
+ tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE
+ dsb sy // Ensure priority change is seen by redistributor
+.L__skip_pmr_sync\@:
alternative_else_nop_endif
ldp x21, x22, [sp, #S_PC] // load ELR, SPSR
@@ -578,76 +580,9 @@ ENDPROC(el1_error_invalid)
.align 6
el1_sync:
kernel_entry 1
- mrs x1, esr_el1 // read the syndrome register
- lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class
- cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1
- b.eq el1_da
- cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1
- b.eq el1_ia
- cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
- b.eq el1_undef
- cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el1_pc
- cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1
- b.eq el1_undef
- cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1
- b.ge el1_dbg
- b el1_inv
-
-el1_ia:
- /*
- * Fall through to the Data abort case
- */
-el1_da:
- /*
- * Data abort handling
- */
- mrs x3, far_el1
- inherit_daif pstate=x23, tmp=x2
- clear_address_tag x0, x3
- mov x2, sp // struct pt_regs
- bl do_mem_abort
-
- kernel_exit 1
-el1_pc:
- /*
- * PC alignment exception handling. We don't handle SP alignment faults,
- * since we will have hit a recursive exception when trying to push the
- * initial pt_regs.
- */
- mrs x0, far_el1
- inherit_daif pstate=x23, tmp=x2
- mov x2, sp
- bl do_sp_pc_abort
- ASM_BUG()
-el1_undef:
- /*
- * Undefined instruction
- */
- inherit_daif pstate=x23, tmp=x2
mov x0, sp
- bl do_undefinstr
+ bl el1_sync_handler
kernel_exit 1
-el1_dbg:
- /*
- * Debug exception handling
- */
- cmp x24, #ESR_ELx_EC_BRK64 // if BRK64
- cinc x24, x24, eq // set bit '0'
- tbz x24, #0, el1_inv // EL1 only
- gic_prio_kentry_setup tmp=x3
- mrs x0, far_el1
- mov x2, sp // struct pt_regs
- bl do_debug_exception
- kernel_exit 1
-el1_inv:
- // TODO: add support for undefined instructions in kernel mode
- inherit_daif pstate=x23, tmp=x2
- mov x0, sp
- mov x2, x1
- mov x1, #BAD_SYNC
- bl bad_mode
- ASM_BUG()
ENDPROC(el1_sync)
.align 6
@@ -680,7 +615,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING
orr x24, x24, x0
alternative_else_nop_endif
cbnz x24, 1f // preempt count != 0 || NMI return path
- bl preempt_schedule_irq // irq en/disable is done inside
+ bl arm64_preempt_schedule_irq // irq en/disable is done inside
1:
#endif
@@ -714,70 +649,18 @@ ENDPROC(el1_irq)
.align 6
el0_sync:
kernel_entry 0
- mrs x25, esr_el1 // read the syndrome register
- lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class
- cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state
- b.eq el0_svc
- cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0
- b.eq el0_da
- cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0
- b.eq el0_ia
- cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access
- b.eq el0_fpsimd_acc
- cmp x24, #ESR_ELx_EC_SVE // SVE access
- b.eq el0_sve_acc
- cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception
- b.eq el0_fpsimd_exc
- cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
- ccmp x24, #ESR_ELx_EC_WFx, #4, ne
- b.eq el0_sys
- cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
- b.eq el0_sp
- cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el0_pc
- cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0
- b.ge el0_dbg
- b el0_inv
+ mov x0, sp
+ bl el0_sync_handler
+ b ret_to_user
#ifdef CONFIG_COMPAT
.align 6
el0_sync_compat:
kernel_entry 0, 32
- mrs x25, esr_el1 // read the syndrome register
- lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class
- cmp x24, #ESR_ELx_EC_SVC32 // SVC in 32-bit state
- b.eq el0_svc_compat
- cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0
- b.eq el0_da
- cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0
- b.eq el0_ia
- cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access
- b.eq el0_fpsimd_acc
- cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception
- b.eq el0_fpsimd_exc
- cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el0_pc
- cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap
- b.eq el0_cp15
- cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap
- b.eq el0_cp15
- cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_CP14_64 // CP14 MRRC/MCRR trap
- b.eq el0_undef
- cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0
- b.ge el0_dbg
- b el0_inv
-el0_svc_compat:
mov x0, sp
- bl el0_svc_compat_handler
+ bl el0_sync_compat_handler
b ret_to_user
+ENDPROC(el0_sync)
.align 6
el0_irq_compat:
@@ -787,140 +670,8 @@ el0_irq_compat:
el0_error_compat:
kernel_entry 0, 32
b el0_error_naked
-
-el0_cp15:
- /*
- * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_cp15instr
- b ret_to_user
#endif
-el0_da:
- /*
- * Data abort handling
- */
- mrs x26, far_el1
- ct_user_exit_irqoff
- enable_daif
- clear_address_tag x0, x26
- mov x1, x25
- mov x2, sp
- bl do_mem_abort
- b ret_to_user
-el0_ia:
- /*
- * Instruction abort handling
- */
- mrs x26, far_el1
- gic_prio_kentry_setup tmp=x0
- ct_user_exit_irqoff
- enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
-#endif
- mov x0, x26
- mov x1, x25
- mov x2, sp
- bl do_el0_ia_bp_hardening
- b ret_to_user
-el0_fpsimd_acc:
- /*
- * Floating Point or Advanced SIMD access
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_fpsimd_acc
- b ret_to_user
-el0_sve_acc:
- /*
- * Scalable Vector Extension access
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_sve_acc
- b ret_to_user
-el0_fpsimd_exc:
- /*
- * Floating Point, Advanced SIMD or SVE exception
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_fpsimd_exc
- b ret_to_user
-el0_sp:
- ldr x26, [sp, #S_SP]
- b el0_sp_pc
-el0_pc:
- mrs x26, far_el1
-el0_sp_pc:
- /*
- * Stack or PC alignment exception handling
- */
- gic_prio_kentry_setup tmp=x0
- ct_user_exit_irqoff
- enable_da_f
-#ifdef CONFIG_TRACE_IRQFLAGS
- bl trace_hardirqs_off
-#endif
- mov x0, x26
- mov x1, x25
- mov x2, sp
- bl do_sp_pc_abort
- b ret_to_user
-el0_undef:
- /*
- * Undefined instruction
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, sp
- bl do_undefinstr
- b ret_to_user
-el0_sys:
- /*
- * System instructions, for trapped cache maintenance instructions
- */
- ct_user_exit_irqoff
- enable_daif
- mov x0, x25
- mov x1, sp
- bl do_sysinstr
- b ret_to_user
-el0_dbg:
- /*
- * Debug exception handling
- */
- tbnz x24, #0, el0_inv // EL0 only
- mrs x24, far_el1
- gic_prio_kentry_setup tmp=x3
- ct_user_exit_irqoff
- mov x0, x24
- mov x1, x25
- mov x2, sp
- bl do_debug_exception
- enable_da_f
- b ret_to_user
-el0_inv:
- ct_user_exit_irqoff
- enable_daif
- mov x0, sp
- mov x1, #BAD_SYNC
- mov x2, x25
- bl bad_el0_sync
- b ret_to_user
-ENDPROC(el0_sync)
-
.align 6
el0_irq:
kernel_entry 0
@@ -998,17 +749,6 @@ finish_ret_to_user:
kernel_exit 0
ENDPROC(ret_to_user)
-/*
- * SVC handler.
- */
- .align 6
-el0_svc:
- gic_prio_kentry_setup tmp=x1
- mov x0, sp
- bl el0_svc_handler
- b ret_to_user
-ENDPROC(el0_svc)
-
.popsection // .entry.text
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
@@ -1070,7 +810,9 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
#else
ldr x30, =vectors
#endif
+alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM
prfm plil1strm, [x30, #(1b - tramp_vectors)]
+alternative_else_nop_endif
msr vbar_el1, x30
add x30, x30, #(1b - tramp_vectors)
isb
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 37d3912cfe06..3eb338f14386 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -920,7 +920,7 @@ void fpsimd_release_task(struct task_struct *dead_task)
* would have disabled the SVE access trap for userspace during
* ret_to_user, making an SVE access trap impossible in that case.
*/
-asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
+void do_sve_acc(unsigned int esr, struct pt_regs *regs)
{
/* Even if we chose not to use SVE, the hardware could still trap: */
if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) {
@@ -947,7 +947,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs)
/*
* Trapped FP/ASIMD access.
*/
-asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
{
/* TODO: implement lazy context saving/restoring */
WARN_ON(1);
@@ -956,7 +956,7 @@ asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs)
/*
* Raise a SIGFPE for the current process.
*/
-asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
+void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs)
{
unsigned int si_code = FPE_FLTUNK;
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 171773257974..8618faa82e6d 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -62,6 +62,19 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
return ftrace_modify_code(pc, 0, new, false);
}
+static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
+{
+#ifdef CONFIG_ARM64_MODULE_PLTS
+ struct plt_entry *plt = mod->arch.ftrace_trampolines;
+
+ if (addr == FTRACE_ADDR)
+ return &plt[FTRACE_PLT_IDX];
+ if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS))
+ return &plt[FTRACE_REGS_PLT_IDX];
+#endif
+ return NULL;
+}
+
/*
* Turn on the call to ftrace_caller() in instrumented function
*/
@@ -72,9 +85,11 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
long offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
-#ifdef CONFIG_ARM64_MODULE_PLTS
- struct plt_entry trampoline, *dst;
struct module *mod;
+ struct plt_entry *plt;
+
+ if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+ return -EINVAL;
/*
* On kernels that support module PLTs, the offset between the
@@ -93,43 +108,13 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (WARN_ON(!mod))
return -EINVAL;
- /*
- * There is only one ftrace trampoline per module. For now,
- * this is not a problem since on arm64, all dynamic ftrace
- * invocations are routed via ftrace_caller(). This will need
- * to be revisited if support for multiple ftrace entry points
- * is added in the future, but for now, the pr_err() below
- * deals with a theoretical issue only.
- *
- * Note that PLTs are place relative, and plt_entries_equal()
- * checks whether they point to the same target. Here, we need
- * to check if the actual opcodes are in fact identical,
- * regardless of the offset in memory so use memcmp() instead.
- */
- dst = mod->arch.ftrace_trampoline;
- trampoline = get_plt_entry(addr, dst);
- if (memcmp(dst, &trampoline, sizeof(trampoline))) {
- if (plt_entry_is_initialized(dst)) {
- pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
- return -EINVAL;
- }
-
- /* point the trampoline to our ftrace entry point */
- module_disable_ro(mod);
- *dst = trampoline;
- module_enable_ro(mod, true);
-
- /*
- * Ensure updated trampoline is visible to instruction
- * fetch before we patch in the branch.
- */
- __flush_icache_range((unsigned long)&dst[0],
- (unsigned long)&dst[1]);
+ plt = get_ftrace_plt(mod, addr);
+ if (!plt) {
+ pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
+ return -EINVAL;
}
- addr = (unsigned long)dst;
-#else /* CONFIG_ARM64_MODULE_PLTS */
- return -EINVAL;
-#endif /* CONFIG_ARM64_MODULE_PLTS */
+
+ addr = (unsigned long)plt;
}
old = aarch64_insn_gen_nop();
@@ -138,6 +123,55 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
return ftrace_modify_code(pc, old, new, true);
}
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ old = aarch64_insn_gen_branch_imm(pc, old_addr,
+ AARCH64_INSN_BRANCH_LINK);
+ new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
+
+ return ftrace_modify_code(pc, old, new, true);
+}
+
+/*
+ * The compiler has inserted two NOPs before the regular function prologue.
+ * All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live,
+ * and x9-x18 are free for our use.
+ *
+ * At runtime we want to be able to swing a single NOP <-> BL to enable or
+ * disable the ftrace call. The BL requires us to save the original LR value,
+ * so here we insert a <MOV X9, LR> over the first NOP so the instructions
+ * before the regular prologue are:
+ *
+ * | Compiled | Disabled | Enabled |
+ * +----------+------------+------------+
+ * | NOP | MOV X9, LR | MOV X9, LR |
+ * | NOP | NOP | BL <entry> |
+ *
+ * The LR value will be recovered by ftrace_regs_entry, and restored into LR
+ * before returning to the regular function prologue. When a function is not
+ * being traced, the MOV is not harmful given x9 is not live per the AAPCS.
+ *
+ * Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of
+ * the BL.
+ */
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+ unsigned long pc = rec->ip - AARCH64_INSN_SIZE;
+ u32 old, new;
+
+ old = aarch64_insn_gen_nop();
+ new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9,
+ AARCH64_INSN_REG_LR,
+ AARCH64_INSN_VARIANT_64BIT);
+ return ftrace_modify_code(pc, old, new, true);
+}
+#endif
+
/*
* Turn off the call to ftrace_caller() in instrumented function
*/
@@ -150,9 +184,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
long offset = (long)pc - (long)addr;
if (offset < -SZ_128M || offset >= SZ_128M) {
-#ifdef CONFIG_ARM64_MODULE_PLTS
u32 replaced;
+ if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+ return -EINVAL;
+
/*
* 'mod' is only set at module load time, but if we end up
* dealing with an out-of-range condition, we can assume it
@@ -183,9 +219,6 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
return -EINVAL;
validate = false;
-#else /* CONFIG_ARM64_MODULE_PLTS */
- return -EINVAL;
-#endif /* CONFIG_ARM64_MODULE_PLTS */
} else {
old = aarch64_insn_gen_branch_imm(pc, addr,
AARCH64_INSN_BRANCH_LINK);
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index e0a7fce0e01c..a96b2921d22c 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -201,6 +201,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
gfp_t mask)
{
int rc = 0;
+ pgd_t *trans_pgd;
pgd_t *pgdp;
pud_t *pudp;
pmd_t *pmdp;
@@ -215,7 +216,13 @@ static int create_safe_exec_page(void *src_start, size_t length,
memcpy((void *)dst, src_start, length);
__flush_icache_range(dst, dst + length);
- pgdp = pgd_offset_raw(allocator(mask), dst_addr);
+ trans_pgd = allocator(mask);
+ if (!trans_pgd) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ pgdp = pgd_offset_raw(trans_pgd, dst_addr);
if (pgd_none(READ_ONCE(*pgdp))) {
pudp = allocator(mask);
if (!pudp) {
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index d801a7094076..513b29c3e735 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -1268,6 +1268,19 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
}
+/*
+ * MOV (register) is architecturally an alias of ORR (shifted register) where
+ * MOV <*d>, <*m> is equivalent to ORR <*d>, <*ZR>, <*m>
+ */
+u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst,
+ enum aarch64_insn_register src,
+ enum aarch64_insn_variant variant)
+{
+ return aarch64_insn_gen_logical_shifted_reg(dst, AARCH64_INSN_REG_ZR,
+ src, 0, variant,
+ AARCH64_INSN_LOGIC_ORR);
+}
+
u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
enum aarch64_insn_register reg,
enum aarch64_insn_adr_type type)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index 416f537bf614..2a11a962e571 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -19,6 +19,14 @@
#include <asm/pgtable.h>
#include <asm/sections.h>
+enum kaslr_status {
+ KASLR_ENABLED,
+ KASLR_DISABLED_CMDLINE,
+ KASLR_DISABLED_NO_SEED,
+ KASLR_DISABLED_FDT_REMAP,
+};
+
+static enum kaslr_status __initdata kaslr_status;
u64 __ro_after_init module_alloc_base;
u16 __initdata memstart_offset_seed;
@@ -91,15 +99,15 @@ u64 __init kaslr_early_init(u64 dt_phys)
*/
early_fixmap_init();
fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
- if (!fdt)
+ if (!fdt) {
+ kaslr_status = KASLR_DISABLED_FDT_REMAP;
return 0;
+ }
/*
* Retrieve (and wipe) the seed from the FDT
*/
seed = get_kaslr_seed(fdt);
- if (!seed)
- return 0;
/*
* Check if 'nokaslr' appears on the command line, and
@@ -107,8 +115,15 @@ u64 __init kaslr_early_init(u64 dt_phys)
*/
cmdline = kaslr_get_cmdline(fdt);
str = strstr(cmdline, "nokaslr");
- if (str == cmdline || (str > cmdline && *(str - 1) == ' '))
+ if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) {
+ kaslr_status = KASLR_DISABLED_CMDLINE;
+ return 0;
+ }
+
+ if (!seed) {
+ kaslr_status = KASLR_DISABLED_NO_SEED;
return 0;
+ }
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
@@ -170,3 +185,24 @@ u64 __init kaslr_early_init(u64 dt_phys)
return offset;
}
+
+static int __init kaslr_init(void)
+{
+ switch (kaslr_status) {
+ case KASLR_ENABLED:
+ pr_info("KASLR enabled\n");
+ break;
+ case KASLR_DISABLED_CMDLINE:
+ pr_info("KASLR disabled on command line\n");
+ break;
+ case KASLR_DISABLED_NO_SEED:
+ pr_warn("KASLR disabled due to lack of seed\n");
+ break;
+ case KASLR_DISABLED_FDT_REMAP:
+ pr_warn("KASLR disabled due to FDT remapping failure\n");
+ break;
+ }
+
+ return 0;
+}
+core_initcall(kaslr_init)
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index b182442b87a3..65b08a74aec6 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -4,6 +4,7 @@
*/
#include <linux/elf.h>
+#include <linux/ftrace.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sort.h>
@@ -330,7 +331,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
tramp->sh_type = SHT_NOBITS;
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
tramp->sh_addralign = __alignof__(struct plt_entry);
- tramp->sh_size = sizeof(struct plt_entry);
+ tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
}
return 0;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 03ff15bffbb6..1cd1a4d0ed30 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -9,6 +9,7 @@
#include <linux/bitops.h>
#include <linux/elf.h>
+#include <linux/ftrace.h>
#include <linux/gfp.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
@@ -470,22 +471,58 @@ overflow:
return -ENOEXEC;
}
-int module_finalize(const Elf_Ehdr *hdr,
- const Elf_Shdr *sechdrs,
- struct module *me)
+static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ const char *name)
{
const Elf_Shdr *s, *se;
const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
- if (strcmp(".altinstructions", secstrs + s->sh_name) == 0)
- apply_alternatives_module((void *)s->sh_addr, s->sh_size);
-#ifdef CONFIG_ARM64_MODULE_PLTS
- if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
- !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name))
- me->arch.ftrace_trampoline = (void *)s->sh_addr;
-#endif
+ if (strcmp(name, secstrs + s->sh_name) == 0)
+ return s;
}
+ return NULL;
+}
+
+static inline void __init_plt(struct plt_entry *plt, unsigned long addr)
+{
+ *plt = get_plt_entry(addr, plt);
+}
+
+static int module_init_ftrace_plt(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *mod)
+{
+#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE)
+ const Elf_Shdr *s;
+ struct plt_entry *plts;
+
+ s = find_section(hdr, sechdrs, ".text.ftrace_trampoline");
+ if (!s)
+ return -ENOEXEC;
+
+ plts = (void *)s->sh_addr;
+
+ __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR);
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ __init_plt(&plts[FTRACE_REGS_PLT_IDX], FTRACE_REGS_ADDR);
+
+ mod->arch.ftrace_trampolines = plts;
+#endif
return 0;
}
+
+int module_finalize(const Elf_Ehdr *hdr,
+ const Elf_Shdr *sechdrs,
+ struct module *me)
+{
+ const Elf_Shdr *s;
+ s = find_section(hdr, sechdrs, ".altinstructions");
+ if (s)
+ apply_alternatives_module((void *)s->sh_addr, s->sh_size);
+
+ return module_init_ftrace_plt(hdr, sechdrs, me);
+}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index a0b4f1bca491..e40b65645c86 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -158,133 +158,74 @@ armv8pmu_events_sysfs_show(struct device *dev,
return sprintf(page, "event=0x%03llx\n", pmu_attr->id);
}
-#define ARMV8_EVENT_ATTR(name, config) \
- PMU_EVENT_ATTR(name, armv8_event_attr_##name, \
- config, armv8pmu_events_sysfs_show)
-
-ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR);
-ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE);
-ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL);
-ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED);
-ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED);
-ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED);
-ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN);
-ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN);
-ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED);
-ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED);
-ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED);
-ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES);
-ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED);
-ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS);
-ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE);
-ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE);
-ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB);
-ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS);
-ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR);
-ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC);
-ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED);
-ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES);
-/* Don't expose the chain event in /sys, since it's useless in isolation */
-ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED);
-ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED);
-ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND);
-ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND);
-ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB);
-ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB);
-ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE);
-ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE);
-ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL);
-ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE);
-ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB);
-ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL);
-ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB);
-ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB);
-ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS);
-ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE);
-ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS);
-ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK);
-ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK);
-ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD);
-ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD);
-ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD);
-ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP);
-ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED);
-ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE);
-ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION);
+#define ARMV8_EVENT_ATTR(name, config) \
+ (&((struct perf_pmu_events_attr) { \
+ .attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL), \
+ .id = config, \
+ }).attr.attr)
static struct attribute *armv8_pmuv3_event_attrs[] = {
- &armv8_event_attr_sw_incr.attr.attr,
- &armv8_event_attr_l1i_cache_refill.attr.attr,
- &armv8_event_attr_l1i_tlb_refill.attr.attr,
- &armv8_event_attr_l1d_cache_refill.attr.attr,
- &armv8_event_attr_l1d_cache.attr.attr,
- &armv8_event_attr_l1d_tlb_refill.attr.attr,
- &armv8_event_attr_ld_retired.attr.attr,
- &armv8_event_attr_st_retired.attr.attr,
- &armv8_event_attr_inst_retired.attr.attr,
- &armv8_event_attr_exc_taken.attr.attr,
- &armv8_event_attr_exc_return.attr.attr,
- &armv8_event_attr_cid_write_retired.attr.attr,
- &armv8_event_attr_pc_write_retired.attr.attr,
- &armv8_event_attr_br_immed_retired.attr.attr,
- &armv8_event_attr_br_return_retired.attr.attr,
- &armv8_event_attr_unaligned_ldst_retired.attr.attr,
- &armv8_event_attr_br_mis_pred.attr.attr,
- &armv8_event_attr_cpu_cycles.attr.attr,
- &armv8_event_attr_br_pred.attr.attr,
- &armv8_event_attr_mem_access.attr.attr,
- &armv8_event_attr_l1i_cache.attr.attr,
- &armv8_event_attr_l1d_cache_wb.attr.attr,
- &armv8_event_attr_l2d_cache.attr.attr,
- &armv8_event_attr_l2d_cache_refill.attr.attr,
- &armv8_event_attr_l2d_cache_wb.attr.attr,
- &armv8_event_attr_bus_access.attr.attr,
- &armv8_event_attr_memory_error.attr.attr,
- &armv8_event_attr_inst_spec.attr.attr,
- &armv8_event_attr_ttbr_write_retired.attr.attr,
- &armv8_event_attr_bus_cycles.attr.attr,
- &armv8_event_attr_l1d_cache_allocate.attr.attr,
- &armv8_event_attr_l2d_cache_allocate.attr.attr,
- &armv8_event_attr_br_retired.attr.attr,
- &armv8_event_attr_br_mis_pred_retired.attr.attr,
- &armv8_event_attr_stall_frontend.attr.attr,
- &armv8_event_attr_stall_backend.attr.attr,
- &armv8_event_attr_l1d_tlb.attr.attr,
- &armv8_event_attr_l1i_tlb.attr.attr,
- &armv8_event_attr_l2i_cache.attr.attr,
- &armv8_event_attr_l2i_cache_refill.attr.attr,
- &armv8_event_attr_l3d_cache_allocate.attr.attr,
- &armv8_event_attr_l3d_cache_refill.attr.attr,
- &armv8_event_attr_l3d_cache.attr.attr,
- &armv8_event_attr_l3d_cache_wb.attr.attr,
- &armv8_event_attr_l2d_tlb_refill.attr.attr,
- &armv8_event_attr_l2i_tlb_refill.attr.attr,
- &armv8_event_attr_l2d_tlb.attr.attr,
- &armv8_event_attr_l2i_tlb.attr.attr,
- &armv8_event_attr_remote_access.attr.attr,
- &armv8_event_attr_ll_cache.attr.attr,
- &armv8_event_attr_ll_cache_miss.attr.attr,
- &armv8_event_attr_dtlb_walk.attr.attr,
- &armv8_event_attr_itlb_walk.attr.attr,
- &armv8_event_attr_ll_cache_rd.attr.attr,
- &armv8_event_attr_ll_cache_miss_rd.attr.attr,
- &armv8_event_attr_remote_access_rd.attr.attr,
- &armv8_event_attr_sample_pop.attr.attr,
- &armv8_event_attr_sample_feed.attr.attr,
- &armv8_event_attr_sample_filtrate.attr.attr,
- &armv8_event_attr_sample_collision.attr.attr,
+ ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR),
+ ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE),
+ ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL),
+ ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED),
+ ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED),
+ ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED),
+ ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN),
+ ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN),
+ ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED),
+ ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED),
+ ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED),
+ ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED),
+ ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES),
+ ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED),
+ ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS),
+ ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE),
+ ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB),
+ ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE),
+ ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB),
+ ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS),
+ ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR),
+ ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC),
+ ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED),
+ ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES),
+ /* Don't expose the chain event in /sys, since it's useless in isolation */
+ ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED),
+ ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED),
+ ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND),
+ ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND),
+ ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB),
+ ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB),
+ ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE),
+ ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE),
+ ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL),
+ ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE),
+ ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB),
+ ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL),
+ ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB),
+ ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB),
+ ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS),
+ ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE),
+ ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS),
+ ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK),
+ ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK),
+ ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD),
+ ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD),
+ ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD),
+ ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP),
+ ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED),
+ ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE),
+ ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION),
NULL,
};
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c4452827419b..d1c95dcf1d78 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -455,10 +455,6 @@ int __init arch_populate_kprobe_blacklist(void)
(unsigned long)__irqentry_text_end);
if (ret)
return ret;
- ret = kprobe_add_area_blacklist((unsigned long)__exception_text_start,
- (unsigned long)__exception_text_end);
- if (ret)
- return ret;
ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start,
(unsigned long)__idmap_text_end);
if (ret)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index a47462def04b..71f788cd2b18 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -17,6 +17,7 @@
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
+#include <linux/lockdep.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/sysctl.h>
@@ -44,6 +45,7 @@
#include <asm/alternative.h>
#include <asm/arch_gicv3.h>
#include <asm/compat.h>
+#include <asm/cpufeature.h>
#include <asm/cacheflush.h>
#include <asm/exec.h>
#include <asm/fpsimd.h>
@@ -332,22 +334,27 @@ void arch_release_task_struct(struct task_struct *tsk)
fpsimd_release_task(tsk);
}
-/*
- * src and dst may temporarily have aliased sve_state after task_struct
- * is copied. We cannot fix this properly here, because src may have
- * live SVE state and dst's thread_info may not exist yet, so tweaking
- * either src's or dst's TIF_SVE is not safe.
- *
- * The unaliasing is done in copy_thread() instead. This works because
- * dst is not schedulable or traceable until both of these functions
- * have been called.
- */
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
if (current->mm)
fpsimd_preserve_current_state();
*dst = *src;
+ /* We rely on the above assignment to initialize dst's thread_flags: */
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK));
+
+ /*
+ * Detach src's sve_state (if any) from dst so that it does not
+ * get erroneously used or freed prematurely. dst's sve_state
+ * will be allocated on demand later on if dst uses SVE.
+ * For consistency, also clear TIF_SVE here: this could be done
+ * later in copy_process(), but to avoid tripping up future
+ * maintainers it is best not to leave TIF_SVE and sve_state in
+ * an inconsistent state, even temporarily.
+ */
+ dst->thread.sve_state = NULL;
+ clear_tsk_thread_flag(dst, TIF_SVE);
+
return 0;
}
@@ -361,13 +368,6 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
/*
- * Unalias p->thread.sve_state (if any) from the parent task
- * and disable discard SVE state for p:
- */
- clear_tsk_thread_flag(p, TIF_SVE);
- p->thread.sve_state = NULL;
-
- /*
* In case p was allocated the same task_struct pointer as some
* other recently-exited task, make sure p is disassociated from
* any cpu that may have run that now-exited task recently.
@@ -633,3 +633,19 @@ static int __init tagged_addr_init(void)
core_initcall(tagged_addr_init);
#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
+
+asmlinkage void __sched arm64_preempt_schedule_irq(void)
+{
+ lockdep_assert_irqs_disabled();
+
+ /*
+ * Preempting a task from an IRQ means we leave copies of PSTATE
+ * on the stack. cpufeature's enable calls may modify PSTATE, but
+ * resuming one of these preempted tasks would undo those changes.
+ *
+ * Only allow a task to be preempted once cpufeatures have been
+ * enabled.
+ */
+ if (static_branch_likely(&arm64_const_caps_ready))
+ preempt_schedule_irq();
+}
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index c9f72b2665f1..43ae4e0c968f 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -81,7 +81,8 @@ static void cpu_psci_cpu_die(unsigned int cpu)
static int cpu_psci_cpu_kill(unsigned int cpu)
{
- int err, i;
+ int err;
+ unsigned long start, end;
if (!psci_ops.affinity_info)
return 0;
@@ -91,16 +92,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu)
* while it is dying. So, try again a few times.
*/
- for (i = 0; i < 10; i++) {
+ start = jiffies;
+ end = start + msecs_to_jiffies(100);
+ do {
err = psci_ops.affinity_info(cpu_logical_map(cpu), 0);
if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) {
- pr_info("CPU%d killed.\n", cpu);
+ pr_info("CPU%d killed (polled %d ms)\n", cpu,
+ jiffies_to_msecs(jiffies - start));
return 0;
}
- msleep(10);
- pr_info("Retrying again to check for CPU kill\n");
- }
+ usleep_range(100, 1000);
+ } while (time_before(jiffies, end));
pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n",
cpu, err);
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index ea94cf8f9dc6..d6259dac62b6 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -2,6 +2,7 @@
// Copyright (C) 2017 Arm Ltd.
#define pr_fmt(fmt) "sdei: " fmt
+#include <linux/arm-smccc.h>
#include <linux/arm_sdei.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
@@ -161,7 +162,7 @@ unsigned long sdei_arch_get_entry_point(int conduit)
return 0;
}
- sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
+ sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
if (arm64_kernel_unmapped_at_el0()) {
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index f1cb64959427..3c18c2454089 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -8,6 +8,7 @@
*/
#include <linux/compat.h>
+#include <linux/cpufeature.h>
#include <linux/personality.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
@@ -17,6 +18,7 @@
#include <asm/cacheflush.h>
#include <asm/system_misc.h>
+#include <asm/tlbflush.h>
#include <asm/unistd.h>
static long
@@ -30,6 +32,15 @@ __do_compat_cache_op(unsigned long start, unsigned long end)
if (fatal_signal_pending(current))
return 0;
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+ /*
+ * The workaround requires an inner-shareable tlbi.
+ * We pick the reserved-ASID to minimise the impact.
+ */
+ __tlbi(aside1is, __TLBI_VADDR(0, 0));
+ dsb(ish);
+ }
+
ret = __flush_cache_user_range(start, start + chunk);
if (ret)
return ret;
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 871c739f060a..9a9d98a443fc 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -154,14 +154,14 @@ static inline void sve_user_discard(void)
sve_user_disable();
}
-asmlinkage void el0_svc_handler(struct pt_regs *regs)
+void el0_svc_handler(struct pt_regs *regs)
{
sve_user_discard();
el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
}
#ifdef CONFIG_COMPAT
-asmlinkage void el0_svc_compat_handler(struct pt_regs *regs)
+void el0_svc_compat_handler(struct pt_regs *regs)
{
el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls,
compat_sys_call_table);
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 34739e80211b..73caf35c2262 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -35,6 +35,7 @@
#include <asm/debug-monitors.h>
#include <asm/esr.h>
#include <asm/insn.h>
+#include <asm/kprobes.h>
#include <asm/traps.h>
#include <asm/smp.h>
#include <asm/stack_pointer.h>
@@ -393,7 +394,7 @@ void arm64_notify_segfault(unsigned long addr)
force_signal_inject(SIGSEGV, code, addr);
}
-asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
+void do_undefinstr(struct pt_regs *regs)
{
/* check for AArch32 breakpoint instructions */
if (!aarch32_break_handler(regs))
@@ -405,6 +406,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs)
BUG_ON(!user_mode(regs));
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc);
}
+NOKPROBE_SYMBOL(do_undefinstr);
#define __user_cache_maint(insn, address, res) \
if (address >= user_addr_max()) { \
@@ -470,6 +472,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
int rt = ESR_ELx_SYS64_ISS_RT(esr);
unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0);
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) {
+ /* Hide DIC so that we can trap the unnecessary maintenance...*/
+ val &= ~BIT(CTR_DIC_SHIFT);
+
+ /* ... and fake IminLine to reduce the number of traps. */
+ val &= ~CTR_IMINLINE_MASK;
+ val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK;
+ }
+
pt_regs_write_reg(regs, rt, val);
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
@@ -667,7 +678,7 @@ static const struct sys64_hook cp15_64_hooks[] = {
{},
};
-asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs)
+void do_cp15instr(unsigned int esr, struct pt_regs *regs)
{
const struct sys64_hook *hook, *hook_base;
@@ -705,9 +716,10 @@ asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs)
*/
do_undefinstr(regs);
}
+NOKPROBE_SYMBOL(do_cp15instr);
#endif
-asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+void do_sysinstr(unsigned int esr, struct pt_regs *regs)
{
const struct sys64_hook *hook;
@@ -724,6 +736,7 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
*/
do_undefinstr(regs);
}
+NOKPROBE_SYMBOL(do_sysinstr);
static const char *esr_class_str[] = {
[0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC",
@@ -793,7 +806,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
* bad_el0_sync handles unexpected, but potentially recoverable synchronous
* exceptions taken from EL0. Unlike bad_mode, this returns.
*/
-asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
+void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr)
{
void __user *pc = (void __user *)instruction_pointer(regs);
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 1fba0776ed40..76b327f88fbb 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -8,15 +8,21 @@
ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
include $(srctree)/lib/vdso/Makefile
-COMPATCC := $(CROSS_COMPILE_COMPAT)gcc
+# Same as cc-*option, but using CC_COMPAT instead of CC
+ifeq ($(CONFIG_CC_IS_CLANG), y)
+CC_COMPAT ?= $(CC)
+else
+CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc
+endif
-# Same as cc-*option, but using COMPATCC instead of CC
cc32-option = $(call try-run,\
- $(COMPATCC) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+ $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
cc32-disable-warning = $(call try-run,\
- $(COMPATCC) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+ $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
cc32-ldoption = $(call try-run,\
- $(COMPATCC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+ $(CC_COMPAT) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
+cc32-as-instr = $(call try-run,\
+ printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
# We cannot use the global flags to compile the vDSO files, the main reason
# being that the 32-bit compiler may be older than the main (64-bit) compiler
@@ -25,22 +31,21 @@ cc32-ldoption = $(call try-run,\
# arm64 one.
# As a result we set our own flags here.
-# From top-level Makefile
-# NOSTDINC_FLAGS
-VDSO_CPPFLAGS := -nostdinc -isystem $(shell $(COMPATCC) -print-file-name=include)
+# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile
+VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include)
VDSO_CPPFLAGS += $(LINUXINCLUDE)
-VDSO_CPPFLAGS += $(KBUILD_CPPFLAGS)
# Common C and assembly flags
# From top-level Makefile
VDSO_CAFLAGS := $(VDSO_CPPFLAGS)
+ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),)
+VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
+endif
+
VDSO_CAFLAGS += $(call cc32-option,-fno-PIE)
ifdef CONFIG_DEBUG_INFO
VDSO_CAFLAGS += -g
endif
-ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(COMPATCC)), y)
-VDSO_CAFLAGS += -DCC_HAVE_ASM_GOTO
-endif
# From arm Makefile
VDSO_CAFLAGS += $(call cc32-option,-fno-dwarf2-cfi-asm)
@@ -55,6 +60,7 @@ endif
VDSO_CAFLAGS += -fPIC -fno-builtin -fno-stack-protector
VDSO_CAFLAGS += -DDISABLE_BRANCH_PROFILING
+
# Try to compile for ARMv8. If the compiler is too old and doesn't support it,
# fall back to v7. There is no easy way to check for what architecture the code
# is being compiled, so define a macro specifying that (see arch/arm/Makefile).
@@ -91,6 +97,12 @@ VDSO_CFLAGS += -Wno-int-to-pointer-cast
VDSO_AFLAGS := $(VDSO_CAFLAGS)
VDSO_AFLAGS += -D__ASSEMBLY__
+# Check for binutils support for dmb ishld
+dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1)
+
+VDSO_CFLAGS += $(dmbinstr)
+VDSO_AFLAGS += $(dmbinstr)
+
VDSO_LDFLAGS := $(VDSO_CPPFLAGS)
# From arm vDSO Makefile
VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1
@@ -159,14 +171,14 @@ quiet_cmd_vdsold_and_vdso_check = LD32 $@
cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
quiet_cmd_vdsold = LD32 $@
- cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
+ cmd_vdsold = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
-Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
quiet_cmd_vdsocc = CC32 $@
- cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
+ cmd_vdsocc = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
quiet_cmd_vdsocc_gettimeofday = CC32 $@
- cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
+ cmd_vdsocc_gettimeofday = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
quiet_cmd_vdsoas = AS32 $@
- cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
+ cmd_vdsoas = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
quiet_cmd_vdsomunge = MUNGE $@
cmd_vdsomunge = $(obj)/$(munge) $< $@
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index aa76f7259668..009057517bdd 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -111,9 +111,6 @@ SECTIONS
}
.text : { /* Real text segment */
_stext = .; /* Text and read-only data */
- __exception_text_start = .;
- *(.exception.text)
- __exception_text_end = .;
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
ENTRY_TEXT
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 3d3815020e36..72fbbd86eb5e 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -12,7 +12,7 @@
#include <kvm/arm_psci.h>
-#include <asm/arch_gicv3.h>
+#include <asm/barrier.h>
#include <asm/cpufeature.h>
#include <asm/kprobes.h>
#include <asm/kvm_asm.h>
@@ -118,12 +118,29 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
}
write_sysreg(val, cptr_el2);
+
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+ struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * configured and enabled. We can now restore the guest's S1
+ * configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
+ }
}
static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
{
u64 hcr = vcpu->arch.hcr_el2;
+ if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
+ hcr |= HCR_TVM;
+
write_sysreg(hcr, hcr_el2);
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
@@ -156,6 +173,23 @@ static void __hyp_text __deactivate_traps_nvhe(void)
{
u64 mdcr_el2 = read_sysreg(mdcr_el2);
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+ u64 val;
+
+ /*
+ * Set the TCR and SCTLR registers in the exact opposite
+ * sequence as __activate_traps_nvhe (first prevent walks,
+ * then force the MMU on). A generous sprinkling of isb()
+ * ensure that things happen in this exact order.
+ */
+ val = read_sysreg_el1(SYS_TCR);
+ write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
+ isb();
+ val = read_sysreg_el1(SYS_SCTLR);
+ write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
+ isb();
+ }
+
__deactivate_traps_common();
mdcr_el2 &= MDCR_EL2_HPMN_MASK;
@@ -174,8 +208,10 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
* the crucial bit is "On taking a vSError interrupt,
* HCR_EL2.VSE is cleared to 0."
*/
- if (vcpu->arch.hcr_el2 & HCR_VSE)
- vcpu->arch.hcr_el2 = read_sysreg(hcr_el2);
+ if (vcpu->arch.hcr_el2 & HCR_VSE) {
+ vcpu->arch.hcr_el2 &= ~HCR_VSE;
+ vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+ }
if (has_vhe())
deactivate_traps_vhe();
@@ -380,6 +416,61 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
return true;
}
+static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu)
+{
+ u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu));
+ int rt = kvm_vcpu_sys_get_rt(vcpu);
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ /*
+ * The normal sysreg handling code expects to see the traps,
+ * let's not do anything here.
+ */
+ if (vcpu->arch.hcr_el2 & HCR_TVM)
+ return false;
+
+ switch (sysreg) {
+ case SYS_SCTLR_EL1:
+ write_sysreg_el1(val, SYS_SCTLR);
+ break;
+ case SYS_TTBR0_EL1:
+ write_sysreg_el1(val, SYS_TTBR0);
+ break;
+ case SYS_TTBR1_EL1:
+ write_sysreg_el1(val, SYS_TTBR1);
+ break;
+ case SYS_TCR_EL1:
+ write_sysreg_el1(val, SYS_TCR);
+ break;
+ case SYS_ESR_EL1:
+ write_sysreg_el1(val, SYS_ESR);
+ break;
+ case SYS_FAR_EL1:
+ write_sysreg_el1(val, SYS_FAR);
+ break;
+ case SYS_AFSR0_EL1:
+ write_sysreg_el1(val, SYS_AFSR0);
+ break;
+ case SYS_AFSR1_EL1:
+ write_sysreg_el1(val, SYS_AFSR1);
+ break;
+ case SYS_MAIR_EL1:
+ write_sysreg_el1(val, SYS_MAIR);
+ break;
+ case SYS_AMAIR_EL1:
+ write_sysreg_el1(val, SYS_AMAIR);
+ break;
+ case SYS_CONTEXTIDR_EL1:
+ write_sysreg_el1(val, SYS_CONTEXTIDR);
+ break;
+ default:
+ return false;
+ }
+
+ __kvm_skip_instr(vcpu);
+ return true;
+}
+
/*
* Return true when we were able to fixup the guest exit and should return to
* the guest, false when we should restore the host state and return to the
@@ -399,6 +490,11 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
if (*exit_code != ARM_EXCEPTION_TRAP)
goto exit;
+ if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
+ handle_tx2_tvm(vcpu))
+ return true;
+
/*
* We trap the first access to the FP/SIMD to save the host context
* and restore the guest context lazily.
@@ -592,7 +688,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
*/
if (system_uses_irq_prio_masking()) {
gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
- dsb(sy);
+ pmr_sync();
}
vcpu = kern_hyp_va(vcpu);
@@ -605,18 +701,23 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
__sysreg_save_state_nvhe(host_ctxt);
- __activate_vm(kern_hyp_va(vcpu->kvm));
- __activate_traps(vcpu);
-
- __hyp_vgic_restore_state(vcpu);
- __timer_enable_traps(vcpu);
-
/*
* We must restore the 32-bit state before the sysregs, thanks
* to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+ *
+ * Also, and in order to be able to deal with erratum #1319537 (A57)
+ * and #1319367 (A72), we must ensure that all VM-related sysreg are
+ * restored before we enable S2 translation.
*/
__sysreg32_restore_state(vcpu);
__sysreg_restore_state_nvhe(guest_ctxt);
+
+ __activate_vm(kern_hyp_va(vcpu->kvm));
+ __activate_traps(vcpu);
+
+ __hyp_vgic_restore_state(vcpu);
+ __timer_enable_traps(vcpu);
+
__debug_switch_to_guest(vcpu);
__set_guest_arch_workaround_state(vcpu);
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 7ddbc849b580..22b8128d19f6 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -117,12 +117,26 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
- write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+
+ if (!cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
+ } else if (!ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for guest registers, hence the context
+ * test. We're coming from the host, so SCTLR.M is already
+ * set. Pairs with __activate_traps_nvhe().
+ */
+ write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
+ TCR_EPD1_MASK | TCR_EPD0_MASK),
+ SYS_TCR);
+ isb();
+ }
+
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR);
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0);
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1);
- write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR);
write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0);
write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1);
@@ -135,6 +149,23 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1319367) &&
+ ctxt->__hyp_running_vcpu) {
+ /*
+ * Must only be done for host registers, hence the context
+ * test. Pairs with __deactivate_traps_nvhe().
+ */
+ isb();
+ /*
+ * At this stage, and thanks to the above isb(), S2 is
+ * deconfigured and disabled. We can now restore the host's
+ * S1 configuration: SCTLR, and only then TCR.
+ */
+ write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
+ isb();
+ write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
+ }
+
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index eb0efc5557f3..c2bc17ca6430 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -63,6 +63,22 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
struct tlb_inv_context *cxt)
{
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+ u64 val;
+
+ /*
+ * For CPUs that are affected by ARM 1319367, we need to
+ * avoid a host Stage-1 walk while we have the guest's
+ * VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the S1 MMU is enabled, so we can
+ * simply set the EPD bits to avoid any further TLB fill.
+ */
+ val = cxt->tcr = read_sysreg_el1(SYS_TCR);
+ val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
+ write_sysreg_el1(val, SYS_TCR);
+ isb();
+ }
+
__load_guest_stage2(kvm);
isb();
}
@@ -100,6 +116,13 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
struct tlb_inv_context *cxt)
{
write_sysreg(0, vttbr_el2);
+
+ if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) {
+ /* Ensure write of the host VMID */
+ isb();
+ /* Restore the host's TCR_EL1 */
+ write_sysreg_el1(cxt->tcr, SYS_TCR);
+ }
}
static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 2071260a275b..46822afc57e0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -632,6 +632,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
*/
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
| (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
+ if (!system_supports_32bit_el0())
+ val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, r->reg) = val;
}
@@ -682,6 +684,8 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
val = __vcpu_sys_reg(vcpu, PMCR_EL0);
val &= ~ARMV8_PMU_PMCR_MASK;
val |= p->regval & ARMV8_PMU_PMCR_MASK;
+ if (!system_supports_32bit_el0())
+ val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
kvm_pmu_handle_pmcr(vcpu, val);
kvm_vcpu_pmu_restore_guest(vcpu);
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 10415572e82f..aeafc03e961a 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -20,7 +20,6 @@
* Alignment fixed up by hardware.
*/
ENTRY(__arch_clear_user)
- uaccess_enable_not_uao x2, x3, x4
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
@@ -40,7 +39,6 @@ uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
b.mi 5f
uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5: mov x0, #0
- uaccess_disable_not_uao x2, x3
ret
ENDPROC(__arch_clear_user)
EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 680e74409ff9..ebb3c06cbb5d 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -54,10 +54,8 @@
end .req x5
ENTRY(__arch_copy_from_user)
- uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3, x4
mov x0, #0 // Nothing to copy
ret
ENDPROC(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 0bedae3f3792..3d8153a1ebce 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -56,10 +56,8 @@
end .req x5
ENTRY(__arch_copy_in_user)
- uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__arch_copy_in_user)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 2d88c736e8f2..357eae2c18eb 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -53,10 +53,8 @@
end .req x5
ENTRY(__arch_copy_to_user)
- uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__arch_copy_to_user)
diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c
index cbfcbe6470a5..bfa30b75b2b8 100644
--- a/arch/arm64/lib/uaccess_flushcache.c
+++ b/arch/arm64/lib/uaccess_flushcache.c
@@ -28,7 +28,11 @@ void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
unsigned long __copy_user_flushcache(void *to, const void __user *from,
unsigned long n)
{
- unsigned long rc = __arch_copy_from_user(to, from, n);
+ unsigned long rc;
+
+ uaccess_enable_not_uao();
+ rc = __arch_copy_from_user(to, from, n);
+ uaccess_disable_not_uao();
/* See above */
__clean_dcache_area_pop(to, n - rc);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 115d7a0e4b08..077b02a2d4d3 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -32,7 +32,8 @@
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/esr.h>
-#include <asm/kasan.h>
+#include <asm/kprobes.h>
+#include <asm/processor.h>
#include <asm/sysreg.h>
#include <asm/system_misc.h>
#include <asm/pgtable.h>
@@ -101,16 +102,13 @@ static void mem_abort_decode(unsigned int esr)
data_abort_decode(esr);
}
-static inline bool is_ttbr0_addr(unsigned long addr)
+static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm)
{
- /* entry assembly clears tags for TTBR0 addrs */
- return addr < TASK_SIZE;
-}
+ /* Either init_pg_dir or swapper_pg_dir */
+ if (mm == &init_mm)
+ return __pa_symbol(mm->pgd);
-static inline bool is_ttbr1_addr(unsigned long addr)
-{
- /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */
- return arch_kasan_reset_tag(addr) >= PAGE_OFFSET;
+ return (unsigned long)virt_to_phys(mm->pgd);
}
/*
@@ -141,7 +139,7 @@ static void show_pte(unsigned long addr)
pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n",
mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
- vabits_actual, (unsigned long)virt_to_phys(mm->pgd));
+ vabits_actual, mm_to_pgd_phys(mm));
pgdp = pgd_offset(mm, addr);
pgd = READ_ONCE(*pgdp);
pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
@@ -259,14 +257,18 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr,
par = read_sysreg(par_el1);
local_irq_restore(flags);
+ /*
+ * If we now have a valid translation, treat the translation fault as
+ * spurious.
+ */
if (!(par & SYS_PAR_EL1_F))
- return false;
+ return true;
/*
* If we got a different type of fault from the AT instruction,
* treat the translation fault as spurious.
*/
- dfsc = FIELD_PREP(SYS_PAR_EL1_FST, par);
+ dfsc = FIELD_GET(SYS_PAR_EL1_FST, par);
return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT;
}
@@ -305,6 +307,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
if (is_el1_permission_fault(addr, esr, regs)) {
if (esr & ESR_ELx_WNR)
msg = "write to read-only memory";
+ else if (is_el1_instruction_abort(esr))
+ msg = "execute from non-executable memory";
else
msg = "read from unreadable memory";
} else if (addr < PAGE_SIZE) {
@@ -723,8 +727,7 @@ static const struct fault_info fault_info[] = {
{ do_bad, SIGKILL, SI_KERNEL, "unknown 63" },
};
-asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
- struct pt_regs *regs)
+void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_fault_info(esr);
@@ -740,43 +743,21 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
arm64_notify_die(inf->name, regs,
inf->sig, inf->code, (void __user *)addr, esr);
}
+NOKPROBE_SYMBOL(do_mem_abort);
-asmlinkage void __exception do_el0_irq_bp_hardening(void)
+void do_el0_irq_bp_hardening(void)
{
/* PC has already been checked in entry.S */
arm64_apply_bp_hardening();
}
+NOKPROBE_SYMBOL(do_el0_irq_bp_hardening);
-asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
- unsigned int esr,
- struct pt_regs *regs)
-{
- /*
- * We've taken an instruction abort from userspace and not yet
- * re-enabled IRQs. If the address is a kernel address, apply
- * BP hardening prior to enabling IRQs and pre-emption.
- */
- if (!is_ttbr0_addr(addr))
- arm64_apply_bp_hardening();
-
- local_daif_restore(DAIF_PROCCTX);
- do_mem_abort(addr, esr, regs);
-}
-
-
-asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
- unsigned int esr,
- struct pt_regs *regs)
+void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs)
{
- if (user_mode(regs)) {
- if (!is_ttbr0_addr(instruction_pointer(regs)))
- arm64_apply_bp_hardening();
- local_daif_restore(DAIF_PROCCTX);
- }
-
arm64_notify_die("SP/PC alignment exception", regs,
SIGBUS, BUS_ADRALN, (void __user *)addr, esr);
}
+NOKPROBE_SYMBOL(do_sp_pc_abort);
int __init early_brk64(unsigned long addr, unsigned int esr,
struct pt_regs *regs);
@@ -859,8 +840,7 @@ NOKPROBE_SYMBOL(debug_exception_exit);
#ifdef CONFIG_ARM64_ERRATUM_1463225
DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa);
-static int __exception
-cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
{
if (user_mode(regs))
return 0;
@@ -879,16 +859,15 @@ cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
return 1;
}
#else
-static int __exception
-cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
+static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
{
return 0;
}
#endif /* CONFIG_ARM64_ERRATUM_1463225 */
+NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler);
-asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
- unsigned int esr,
- struct pt_regs *regs)
+void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
+ struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_debug_fault_info(esr);
unsigned long pc = instruction_pointer(regs);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 45c00a54909c..be9481cdf3b9 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -20,6 +20,7 @@
#include <linux/sort.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
+#include <linux/dma-direct.h>
#include <linux/dma-mapping.h>
#include <linux/dma-contiguous.h>
#include <linux/efi.h>
@@ -41,6 +42,8 @@
#include <asm/tlb.h>
#include <asm/alternative.h>
+#define ARM64_ZONE_DMA_BITS 30
+
/*
* We need to be able to catch inadvertent references to memstart_addr
* that occur (potentially in generic code) before arm64_memblock_init()
@@ -56,7 +59,14 @@ EXPORT_SYMBOL(physvirt_offset);
struct page *vmemmap __ro_after_init;
EXPORT_SYMBOL(vmemmap);
+/*
+ * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
+ * memory as some devices, namely the Raspberry Pi 4, have peripherals with
+ * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
+ * bit addressable memory area.
+ */
phys_addr_t arm64_dma_phys_limit __ro_after_init;
+static phys_addr_t arm64_dma32_phys_limit __ro_after_init;
#ifdef CONFIG_KEXEC_CORE
/*
@@ -81,7 +91,7 @@ static void __init reserve_crashkernel(void)
if (crash_base == 0) {
/* Current arm64 boot protocol requires 2MB alignment */
- crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT,
+ crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit,
crash_size, SZ_2M);
if (crash_base == 0) {
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
@@ -169,15 +179,16 @@ static void __init reserve_elfcorehdr(void)
{
}
#endif /* CONFIG_CRASH_DUMP */
+
/*
- * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It
- * currently assumes that for memory starting above 4G, 32-bit devices will
- * use a DMA offset.
+ * Return the maximum physical address for a zone with a given address size
+ * limit. It currently assumes that for memory starting above 4G, 32-bit
+ * devices will use a DMA offset.
*/
-static phys_addr_t __init max_zone_dma_phys(void)
+static phys_addr_t __init max_zone_phys(unsigned int zone_bits)
{
- phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
- return min(offset + (1ULL << 32), memblock_end_of_DRAM());
+ phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits);
+ return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM());
}
#ifdef CONFIG_NUMA
@@ -186,8 +197,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
+#ifdef CONFIG_ZONE_DMA
+ max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
+#endif
#ifdef CONFIG_ZONE_DMA32
- max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys());
+ max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
#endif
max_zone_pfns[ZONE_NORMAL] = max;
@@ -200,16 +214,21 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
struct memblock_region *reg;
unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
- unsigned long max_dma = min;
+ unsigned long max_dma32 = min;
+ unsigned long __maybe_unused max_dma = min;
memset(zone_size, 0, sizeof(zone_size));
- /* 4GB maximum for 32-bit only capable devices */
-#ifdef CONFIG_ZONE_DMA32
+#ifdef CONFIG_ZONE_DMA
max_dma = PFN_DOWN(arm64_dma_phys_limit);
- zone_size[ZONE_DMA32] = max_dma - min;
+ zone_size[ZONE_DMA] = max_dma - min;
+ max_dma32 = max_dma;
+#endif
+#ifdef CONFIG_ZONE_DMA32
+ max_dma32 = PFN_DOWN(arm64_dma32_phys_limit);
+ zone_size[ZONE_DMA32] = max_dma32 - max_dma;
#endif
- zone_size[ZONE_NORMAL] = max - max_dma;
+ zone_size[ZONE_NORMAL] = max - max_dma32;
memcpy(zhole_size, zone_size, sizeof(zhole_size));
@@ -219,16 +238,22 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
if (start >= max)
continue;
-
-#ifdef CONFIG_ZONE_DMA32
+#ifdef CONFIG_ZONE_DMA
if (start < max_dma) {
- unsigned long dma_end = min(end, max_dma);
- zhole_size[ZONE_DMA32] -= dma_end - start;
+ unsigned long dma_end = min_not_zero(end, max_dma);
+ zhole_size[ZONE_DMA] -= dma_end - start;
}
#endif
- if (end > max_dma) {
+#ifdef CONFIG_ZONE_DMA32
+ if (start < max_dma32) {
+ unsigned long dma32_end = min(end, max_dma32);
+ unsigned long dma32_start = max(start, max_dma);
+ zhole_size[ZONE_DMA32] -= dma32_end - dma32_start;
+ }
+#endif
+ if (end > max_dma32) {
unsigned long normal_end = min(end, max);
- unsigned long normal_start = max(start, max_dma);
+ unsigned long normal_start = max(start, max_dma32);
zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
}
}
@@ -418,11 +443,15 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
- /* 4GB maximum for 32-bit only capable devices */
+ if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+ zone_dma_bits = ARM64_ZONE_DMA_BITS;
+ arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS);
+ }
+
if (IS_ENABLED(CONFIG_ZONE_DMA32))
- arm64_dma_phys_limit = max_zone_dma_phys();
+ arm64_dma32_phys_limit = max_zone_phys(32);
else
- arm64_dma_phys_limit = PHYS_MASK + 1;
+ arm64_dma32_phys_limit = PHYS_MASK + 1;
reserve_crashkernel();
@@ -430,7 +459,7 @@ void __init arm64_memblock_init(void)
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
- dma_contiguous_reserve(arm64_dma_phys_limit);
+ dma_contiguous_reserve(arm64_dma32_phys_limit);
}
void __init bootmem_init(void)
@@ -534,7 +563,7 @@ static void __init free_unused_memmap(void)
void __init mem_init(void)
{
if (swiotlb_force == SWIOTLB_FORCE ||
- max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
+ max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit))
swiotlb_init(1);
else
swiotlb_force = SWIOTLB_NO_FORCE;
@@ -571,7 +600,7 @@ void free_initmem(void)
{
free_reserved_area(lm_alias(__init_begin),
lm_alias(__init_end),
- 0, "unused kernel");
+ POISON_FREE_INITMEM, "unused kernel");
/*
* Unmap the __init region but leave the VM area in place. This
* prevents the region from being reused for kernel modules, which
@@ -580,18 +609,6 @@ void free_initmem(void)
unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
}
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init free_initrd_mem(unsigned long start, unsigned long end)
-{
- unsigned long aligned_start, aligned_end;
-
- aligned_start = __virt_to_phys(start) & PAGE_MASK;
- aligned_end = PAGE_ALIGN(__virt_to_phys(end));
- memblock_free(aligned_start, aligned_end - aligned_start);
- free_reserved_area((void *)start, (void *)end, 0, "initrd");
-}
-#endif
-
/*
* Dump out memory limit information on panic.
*/
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 60c929f3683b..a9f541912289 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -338,7 +338,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
phys_addr_t (*pgtable_alloc)(int),
int flags)
{
- unsigned long addr, length, end, next;
+ unsigned long addr, end, next;
pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
/*
@@ -350,9 +350,8 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
phys &= PAGE_MASK;
addr = virt & PAGE_MASK;
- length = PAGE_ALIGN(size + (virt & ~PAGE_MASK));
+ end = PAGE_ALIGN(virt + size);
- end = addr + length;
do {
next = pgd_addr_end(addr, end);
alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c
index 77a836e661c9..df69eaa453a1 100644
--- a/arch/mips/bcm63xx/prom.c
+++ b/arch/mips/bcm63xx/prom.c
@@ -84,7 +84,7 @@ void __init prom_init(void)
* Here we will start up CPU1 in the background and ask it to
* reconfigure itself then go back to sleep.
*/
- memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20);
+ memcpy((void *)0xa0000200, bmips_smp_movevec, 0x20);
__sync();
set_c0_cause(C_SW0);
cpumask_set_cpu(1, &bmips_booted_mask);
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 16bef819fe98..914af125a7fa 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -571,7 +571,6 @@ CONFIG_USB_SERIAL_OMNINET=m
CONFIG_USB_EMI62=m
CONFIG_USB_EMI26=m
CONFIG_USB_ADUTUX=m
-CONFIG_USB_RIO500=m
CONFIG_USB_LEGOTOWER=m
CONFIG_USB_LCD=m
CONFIG_USB_CYPRESS_CY7C63=m
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index 8762e75f5d5f..2c7adea7638f 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -314,7 +314,6 @@ CONFIG_USB_SERIAL_SAFE_PADDED=y
CONFIG_USB_SERIAL_CYBERJACK=m
CONFIG_USB_SERIAL_XIRCOM=m
CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_RIO500=m
CONFIG_USB_LEGOTOWER=m
CONFIG_USB_LCD=m
CONFIG_USB_CYTHERM=m
diff --git a/arch/mips/fw/sni/sniprom.c b/arch/mips/fw/sni/sniprom.c
index 8772617b64ce..80112f2298b6 100644
--- a/arch/mips/fw/sni/sniprom.c
+++ b/arch/mips/fw/sni/sniprom.c
@@ -43,7 +43,7 @@
/* O32 stack has to be 8-byte aligned. */
static u64 o32_stk[4096];
-#define O32_STK &o32_stk[sizeof(o32_stk)]
+#define O32_STK (&o32_stk[ARRAY_SIZE(o32_stk)])
#define __PROM_O32(fun, arg) fun arg __asm__(#fun); \
__asm__(#fun " = call_o32")
diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h
index bf6a8afd7ad2..581a6a3c66e4 100644
--- a/arch/mips/include/asm/bmips.h
+++ b/arch/mips/include/asm/bmips.h
@@ -75,11 +75,11 @@ static inline int register_bmips_smp_ops(void)
#endif
}
-extern char bmips_reset_nmi_vec;
-extern char bmips_reset_nmi_vec_end;
-extern char bmips_smp_movevec;
-extern char bmips_smp_int_vec;
-extern char bmips_smp_int_vec_end;
+extern char bmips_reset_nmi_vec[];
+extern char bmips_reset_nmi_vec_end[];
+extern char bmips_smp_movevec[];
+extern char bmips_smp_int_vec[];
+extern char bmips_smp_int_vec_end[];
extern int bmips_smp_enabled;
extern int bmips_cpu_offset;
diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h
index 79bf34efbc04..f6136871561d 100644
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -77,8 +77,8 @@ extern unsigned long __xchg_called_with_bad_pointer(void)
extern unsigned long __xchg_small(volatile void *ptr, unsigned long val,
unsigned int size);
-static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
- int size)
+static __always_inline
+unsigned long __xchg(volatile void *ptr, unsigned long x, int size)
{
switch (size) {
case 1:
@@ -153,8 +153,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x,
extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old,
unsigned long new, unsigned int size);
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long new, unsigned int size)
+static __always_inline
+unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long new, unsigned int size)
{
switch (size) {
case 1:
diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h
index e78462e8ca2e..b08825531e9f 100644
--- a/arch/mips/include/asm/vdso/gettimeofday.h
+++ b/arch/mips/include/asm/vdso/gettimeofday.h
@@ -24,6 +24,8 @@
#define VDSO_HAS_CLOCK_GETRES 1
+#define __VDSO_USE_SYSCALL ULLONG_MAX
+
#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
static __always_inline long gettimeofday_fallback(
@@ -205,7 +207,7 @@ static __always_inline u64 __arch_get_hw_counter(s32 clock_mode)
break;
#endif
default:
- cycle_now = 0;
+ cycle_now = __VDSO_USE_SYSCALL;
break;
}
diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h
index 195314732233..00d41b94ba31 100644
--- a/arch/mips/include/asm/vdso/vsyscall.h
+++ b/arch/mips/include/asm/vdso/vsyscall.h
@@ -28,13 +28,6 @@ int __mips_get_clock_mode(struct timekeeper *tk)
}
#define __arch_get_clock_mode __mips_get_clock_mode
-static __always_inline
-int __mips_use_vsyscall(struct vdso_data *vdata)
-{
- return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE);
-}
-#define __arch_use_vsyscall __mips_use_vsyscall
-
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h
index a2aba4b059e6..1ade1daa4921 100644
--- a/arch/mips/include/uapi/asm/hwcap.h
+++ b/arch/mips/include/uapi/asm/hwcap.h
@@ -6,5 +6,16 @@
#define HWCAP_MIPS_R6 (1 << 0)
#define HWCAP_MIPS_MSA (1 << 1)
#define HWCAP_MIPS_CRC32 (1 << 2)
+#define HWCAP_MIPS_MIPS16 (1 << 3)
+#define HWCAP_MIPS_MDMX (1 << 4)
+#define HWCAP_MIPS_MIPS3D (1 << 5)
+#define HWCAP_MIPS_SMARTMIPS (1 << 6)
+#define HWCAP_MIPS_DSP (1 << 7)
+#define HWCAP_MIPS_DSP2 (1 << 8)
+#define HWCAP_MIPS_DSP3 (1 << 9)
+#define HWCAP_MIPS_MIPS16E2 (1 << 10)
+#define HWCAP_LOONGSON_MMI (1 << 11)
+#define HWCAP_LOONGSON_EXT (1 << 12)
+#define HWCAP_LOONGSON_EXT2 (1 << 13)
#endif /* _UAPI_ASM_HWCAP_H */
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index c2eb392597bf..f521cbf934e7 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -2180,6 +2180,39 @@ void cpu_probe(void)
elf_hwcap |= HWCAP_MIPS_MSA;
}
+ if (cpu_has_mips16)
+ elf_hwcap |= HWCAP_MIPS_MIPS16;
+
+ if (cpu_has_mdmx)
+ elf_hwcap |= HWCAP_MIPS_MDMX;
+
+ if (cpu_has_mips3d)
+ elf_hwcap |= HWCAP_MIPS_MIPS3D;
+
+ if (cpu_has_smartmips)
+ elf_hwcap |= HWCAP_MIPS_SMARTMIPS;
+
+ if (cpu_has_dsp)
+ elf_hwcap |= HWCAP_MIPS_DSP;
+
+ if (cpu_has_dsp2)
+ elf_hwcap |= HWCAP_MIPS_DSP2;
+
+ if (cpu_has_dsp3)
+ elf_hwcap |= HWCAP_MIPS_DSP3;
+
+ if (cpu_has_mips16e2)
+ elf_hwcap |= HWCAP_MIPS_MIPS16E2;
+
+ if (cpu_has_loongson_mmi)
+ elf_hwcap |= HWCAP_LOONGSON_MMI;
+
+ if (cpu_has_loongson_ext)
+ elf_hwcap |= HWCAP_LOONGSON_EXT;
+
+ if (cpu_has_loongson_ext2)
+ elf_hwcap |= HWCAP_LOONGSON_EXT2;
+
if (cpu_has_vz)
cpu_probe_vz(c);
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 76fae9b79f13..712c15de6ab9 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -464,10 +464,10 @@ static void bmips_wr_vec(unsigned long dst, char *start, char *end)
static inline void bmips_nmi_handler_setup(void)
{
- bmips_wr_vec(BMIPS_NMI_RESET_VEC, &bmips_reset_nmi_vec,
- &bmips_reset_nmi_vec_end);
- bmips_wr_vec(BMIPS_WARM_RESTART_VEC, &bmips_smp_int_vec,
- &bmips_smp_int_vec_end);
+ bmips_wr_vec(BMIPS_NMI_RESET_VEC, bmips_reset_nmi_vec,
+ bmips_reset_nmi_vec_end);
+ bmips_wr_vec(BMIPS_WARM_RESTART_VEC, bmips_smp_int_vec,
+ bmips_smp_int_vec_end);
}
struct reset_vec_info {
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index c1a4d4dc4665..9f79908f5063 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -66,6 +66,10 @@ else
$(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
endif
+# Some -march= flags enable MMI instructions, and GCC complains about that
+# support being enabled alongside -msoft-float. Thus explicitly disable MMI.
+cflags-y += $(call cc-option,-mno-loongson-mmi)
+
#
# Loongson Machines' Support
#
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index e01cb33bfa1a..41bb91f05688 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -653,6 +653,13 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r,
int restore_scratch)
{
if (restore_scratch) {
+ /*
+ * Ensure the MFC0 below observes the value written to the
+ * KScratch register by the prior MTC0.
+ */
+ if (scratch_reg >= 0)
+ uasm_i_ehb(p);
+
/* Reset default page size */
if (PM_DEFAULT_MASK >> 16) {
uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16);
@@ -667,12 +674,10 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r,
uasm_i_mtc0(p, 0, C0_PAGEMASK);
uasm_il_b(p, r, lid);
}
- if (scratch_reg >= 0) {
- uasm_i_ehb(p);
+ if (scratch_reg >= 0)
UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
- } else {
+ else
UASM_i_LW(p, 1, scratchpad_offset(0), 0);
- }
} else {
/* Reset default page size */
if (PM_DEFAULT_MASK >> 16) {
@@ -921,6 +926,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
}
if (mode != not_refill && check_for_high_segbits) {
uasm_l_large_segbits_fault(l, *p);
+
+ if (mode == refill_scratch && scratch_reg >= 0)
+ uasm_i_ehb(p);
+
/*
* We get here if we are an xsseg address, or if we are
* an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary.
@@ -939,12 +948,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
uasm_i_jr(p, ptr);
if (mode == refill_scratch) {
- if (scratch_reg >= 0) {
- uasm_i_ehb(p);
+ if (scratch_reg >= 0)
UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg);
- } else {
+ else
UASM_i_LW(p, 1, scratchpad_offset(0), 0);
- }
} else {
uasm_i_nop(p);
}
diff --git a/arch/mips/sgi-ip27/Kconfig b/arch/mips/sgi-ip27/Kconfig
index ef3847e7aee0..e5b6cadbec85 100644
--- a/arch/mips/sgi-ip27/Kconfig
+++ b/arch/mips/sgi-ip27/Kconfig
@@ -38,10 +38,3 @@ config REPLICATE_KTEXT
Say Y here to enable replicating the kernel text across multiple
nodes in a NUMA cluster. This trades memory for speed.
-config REPLICATE_EXHANDLERS
- bool "Exception handler replication support"
- depends on SGI_IP27
- help
- Say Y here to enable replicating the kernel exception handlers
- across multiple nodes in a NUMA cluster. This trades memory for
- speed.
diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c
index 59d5375c9021..79a52c472782 100644
--- a/arch/mips/sgi-ip27/ip27-init.c
+++ b/arch/mips/sgi-ip27/ip27-init.c
@@ -69,23 +69,14 @@ static void per_hub_init(cnodeid_t cnode)
hub_rtc_init(cnode);
-#ifdef CONFIG_REPLICATE_EXHANDLERS
- /*
- * If this is not a headless node initialization,
- * copy over the caliased exception handlers.
- */
- if (get_compact_nodeid() == cnode) {
- extern char except_vec2_generic, except_vec3_generic;
- extern void build_tlb_refill_handler(void);
-
- memcpy((void *)(CKSEG0 + 0x100), &except_vec2_generic, 0x80);
- memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x80);
- build_tlb_refill_handler();
- memcpy((void *)(CKSEG0 + 0x100), (void *) CKSEG0, 0x80);
- memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x100);
+ if (nasid) {
+ /* copy exception handlers from first node to current node */
+ memcpy((void *)NODE_OFFSET_TO_K0(nasid, 0),
+ (void *)CKSEG0, 0x200);
__flush_cache_all();
+ /* switch to node local exception handlers */
+ REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K);
}
-#endif
}
void per_cpu_init(void)
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index fb077a947575..8624a885d95b 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -332,11 +332,7 @@ static void __init mlreset(void)
* thinks it is a node 0 address.
*/
REMOTE_HUB_S(nasid, PI_REGION_PRESENT, (region_mask | 1));
-#ifdef CONFIG_REPLICATE_EXHANDLERS
- REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K);
-#else
REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_0);
-#endif
#ifdef LATER
/*
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 807f0f782f75..996a934ece7d 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -15,6 +15,7 @@ ccflags-vdso := \
$(filter -mmicromips,$(KBUILD_CFLAGS)) \
$(filter -march=%,$(KBUILD_CFLAGS)) \
$(filter -m%-float,$(KBUILD_CFLAGS)) \
+ $(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \
-D__VDSO__
ifdef CONFIG_CC_IS_CLANG
diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile
index 36b834f1c933..dca8f2de8cf5 100644
--- a/arch/parisc/Makefile
+++ b/arch/parisc/Makefile
@@ -60,7 +60,6 @@ KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \
-DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT)
CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1)))
-KBUILD_LDS_MODULE += $(srctree)/arch/parisc/kernel/module.lds
endif
OBJCOPY_FLAGS =-O binary -R .note -R .comment -S
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index 73ca89a47f49..e5de3f897633 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -22,7 +22,7 @@
#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
-#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+#define __read_mostly __section(.data..read_mostly)
void parisc_cache_init(void); /* initializes cache-flushing */
void disable_sr_hashing_asm(int); /* low level support for above */
diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index 3eb4bfc1fb36..e080143e79a3 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h
@@ -52,7 +52,7 @@
})
#ifdef CONFIG_SMP
-# define __lock_aligned __attribute__((__section__(".data..lock_aligned")))
+# define __lock_aligned __section(.data..lock_aligned)
#endif
#endif /* __PARISC_LDCW_H */
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 1d1d748c227f..b96d74496977 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -2125,7 +2125,7 @@ ftrace_regs_caller:
copy %rp, %r26
LDREG -FTRACE_FRAME_SIZE-PT_SZ_ALGN(%sp), %r25
ldo -8(%r25), %r25
- copy %r3, %arg2
+ ldo -FTRACE_FRAME_SIZE(%r1), %arg2
b,l ftrace_function_trampoline, %rp
copy %r1, %arg3 /* struct pt_regs */
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index ac5f34993b53..1c50093e2ebe 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -43,6 +43,7 @@
#include <linux/elf.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+#include <linux/ftrace.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/bug.h>
@@ -862,7 +863,7 @@ int module_finalize(const Elf_Ehdr *hdr,
const char *strtab = NULL;
const Elf_Shdr *s;
char *secstrings;
- int err, symindex = -1;
+ int symindex = -1;
Elf_Sym *newptr, *oldptr;
Elf_Shdr *symhdr = NULL;
#ifdef DEBUG
@@ -946,11 +947,13 @@ int module_finalize(const Elf_Ehdr *hdr,
/* patch .altinstructions */
apply_alternatives(aseg, aseg + s->sh_size, me->name);
+#ifdef CONFIG_DYNAMIC_FTRACE
/* For 32 bit kernels we're compiling modules with
* -ffunction-sections so we must relocate the addresses in the
- *__mcount_loc section.
+ * ftrace callsite section.
*/
- if (symindex != -1 && !strcmp(secname, "__mcount_loc")) {
+ if (symindex != -1 && !strcmp(secname, FTRACE_CALLSITE_SECTION)) {
+ int err;
if (s->sh_type == SHT_REL)
err = apply_relocate((Elf_Shdr *)sechdrs,
strtab, symindex,
@@ -962,6 +965,7 @@ int module_finalize(const Elf_Ehdr *hdr,
if (err)
return err;
}
+#endif
}
return 0;
}
diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds
deleted file mode 100644
index 1a9a92aca5c8..000000000000
--- a/arch/parisc/kernel/module.lds
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-SECTIONS {
- __mcount_loc : {
- *(__patchable_function_entries)
- }
-}
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index 92a9b5f12f98..f29f682352f0 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -3,7 +3,7 @@
* arch/parisc/mm/ioremap.c
*
* (C) Copyright 1995 1996 Linus Torvalds
- * (C) Copyright 2001-2006 Helge Deller <deller@gmx.de>
+ * (C) Copyright 2001-2019 Helge Deller <deller@gmx.de>
* (C) Copyright 2005 Kyle McMartin <kyle@parisc-linux.org>
*/
@@ -84,7 +84,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
addr = (void __iomem *) area->addr;
if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
phys_addr, pgprot)) {
- vfree(addr);
+ vunmap(addr);
return NULL;
}
@@ -92,9 +92,11 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
}
EXPORT_SYMBOL(__ioremap);
-void iounmap(const volatile void __iomem *addr)
+void iounmap(const volatile void __iomem *io_addr)
{
- if (addr > high_memory)
- return vfree((void *) (PAGE_MASK & (unsigned long __force) addr));
+ unsigned long addr = (unsigned long)io_addr & PAGE_MASK;
+
+ if (is_vmalloc_addr((void *)addr))
+ vunmap((void *)addr);
}
EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
index 677e9babef80..f9dc597b0b86 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -91,6 +91,7 @@
static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
{
+ addr &= 0xf0000000; /* align addr to start of segment */
barrier(); /* make sure thread.kuap is updated before playing with SRs */
while (addr < end) {
mtsrin(sr, addr);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 4ce795d30377..ca8db193ae38 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -35,6 +35,10 @@ static inline void radix__flush_all_lpid(unsigned int lpid)
{
WARN_ON(1);
}
+static inline void radix__flush_all_lpid_guest(unsigned int lpid)
+{
+ WARN_ON(1);
+}
#endif
extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 409c9bfb43d9..57c229a86f08 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -175,4 +175,7 @@ do { \
ARCH_DLINFO_CACHE_GEOMETRY; \
} while (0)
+/* Relocate the kernel image to @final_address */
+void relocate(unsigned long final_address);
+
#endif /* _ASM_POWERPC_ELF_H */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index c8bb14ff4713..f6c562acc3f8 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -329,13 +329,4 @@ struct vm_area_struct;
#endif /* __ASSEMBLY__ */
#include <asm/slice.h>
-/*
- * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
- */
-#ifdef CONFIG_PPC32
-#define ARCH_ZONE_DMA_BITS 30
-#else
-#define ARCH_ZONE_DMA_BITS 31
-#endif
-
#endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index a4e7762dd286..100f1b57ec2f 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -3249,7 +3249,20 @@ static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
/* Switch to secure mode. */
prom_printf("Switching to secure mode.\n");
+ /*
+ * The ultravisor will do an integrity check of the kernel image but we
+ * relocated it so the check will fail. Restore the original image by
+ * relocating it back to the kernel virtual base address.
+ */
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate(KERNELBASE);
+
ret = enter_secure_mode(kbase, fdt);
+
+ /* Relocate the kernel again. */
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ relocate(kbase);
+
if (ret != U_SUCCESS) {
prom_printf("Returned %d from switching to secure mode.\n", ret);
prom_rtas_os_term("Switch to secure mode failed.\n");
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 78bab17b1396..b183ab9c5107 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -26,7 +26,8 @@ _end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
logo_linux_clut224 btext_prepare_BAT
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
-__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
+__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC.
+relocate"
NM="$1"
OBJ="$2"
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 74a9cfe84aee..faebcbb8c4db 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1921,6 +1921,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_PCR, r6
18:
/* Signal secondary CPUs to continue */
+ li r0, 0
stb r0,VCORE_IN_GUEST(r5)
19: lis r8,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC,r8
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 591bfb4bfd0f..a3f9c665bb5b 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1217,6 +1217,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvmppc_xive *xive = dev->private;
struct kvmppc_xive_vcpu *xc;
int i, r = -EBUSY;
+ u32 vp_id;
pr_devel("connect_vcpu(cpu=%d)\n", cpu);
@@ -1228,25 +1229,32 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
return -EPERM;
if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
- if (kvmppc_xive_find_server(vcpu->kvm, cpu)) {
- pr_devel("Duplicate !\n");
- return -EEXIST;
- }
if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
pr_devel("Out of bounds !\n");
return -EINVAL;
}
- xc = kzalloc(sizeof(*xc), GFP_KERNEL);
- if (!xc)
- return -ENOMEM;
/* We need to synchronize with queue provisioning */
mutex_lock(&xive->lock);
+
+ vp_id = kvmppc_xive_vp(xive, cpu);
+ if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
+ pr_devel("Duplicate !\n");
+ r = -EEXIST;
+ goto bail;
+ }
+
+ xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+ if (!xc) {
+ r = -ENOMEM;
+ goto bail;
+ }
+
vcpu->arch.xive_vcpu = xc;
xc->xive = xive;
xc->vcpu = vcpu;
xc->server_num = cpu;
- xc->vp_id = kvmppc_xive_vp(xive, cpu);
+ xc->vp_id = vp_id;
xc->mfrr = 0xff;
xc->valid = true;
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 955b820ffd6d..fe3ed50e0818 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -220,6 +220,18 @@ static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
}
+static inline bool kvmppc_xive_vp_in_use(struct kvm *kvm, u32 vp_id)
+{
+ struct kvm_vcpu *vcpu = NULL;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.xive_vcpu && vp_id == vcpu->arch.xive_vcpu->vp_id)
+ return true;
+ }
+ return false;
+}
+
/*
* Mapping between guest priorities and host priorities
* is as follow.
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 248c1ea9e788..78b906ffa0d2 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -106,6 +106,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
struct kvmppc_xive *xive = dev->private;
struct kvmppc_xive_vcpu *xc = NULL;
int rc;
+ u32 vp_id;
pr_devel("native_connect_vcpu(server=%d)\n", server_num);
@@ -124,7 +125,8 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
mutex_lock(&xive->lock);
- if (kvmppc_xive_find_server(vcpu->kvm, server_num)) {
+ vp_id = kvmppc_xive_vp(xive, server_num);
+ if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
pr_devel("Duplicate !\n");
rc = -EEXIST;
goto bail;
@@ -141,7 +143,7 @@ int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
xc->vcpu = vcpu;
xc->server_num = server_num;
- xc->vp_id = kvmppc_xive_vp(xive, server_num);
+ xc->vp_id = vp_id;
xc->valid = true;
vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index be941d382c8d..c95b7fe9f298 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/memremap.h>
+#include <linux/dma-direct.h>
#include <asm/pgalloc.h>
#include <asm/prom.h>
@@ -201,10 +202,10 @@ static int __init mark_nonram_nosave(void)
* everything else. GFP_DMA32 page allocations automatically fall back to
* ZONE_DMA.
*
- * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to
- * inform the generic DMA mapping code. 32-bit only devices (if not handled
- * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get
- * otherwise served by ZONE_DMA.
+ * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the
+ * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
*/
static unsigned long max_zone_pfns[MAX_NR_ZONES];
@@ -237,9 +238,18 @@ void __init paging_init(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(long int)((top_of_ram - total_ram) >> 20));
+ /*
+ * Allow 30-bit DMA for very limited Broadcom wifi chips on many
+ * powerbooks.
+ */
+ if (IS_ENABLED(CONFIG_PPC32))
+ zone_dma_bits = 30;
+ else
+ zone_dma_bits = 31;
+
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
+ 1UL << (zone_dma_bits - PAGE_SHIFT));
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 02a59946a78a..be3517ef0574 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -1142,6 +1142,19 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
}
/*
+ * If we have seen a tail call, we need a second pass.
+ * This is because bpf_jit_emit_common_epilogue() is called
+ * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
+ */
+ if (cgctx.seen & SEEN_TAILCALL) {
+ cgctx.idx = 0;
+ if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
+ fp = org_fp;
+ goto out_addrs;
+ }
+ }
+
+ /*
* Pretend to build prologue, given the features we've seen. This will
* update ctgtx.idx as it pretends to output instructions, then we can
* calculate total size from idx.
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 1d93e55a2de1..2dd452a047cd 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -761,6 +761,7 @@ static int spufs_init_fs_context(struct fs_context *fc)
ctx->gid = current_gid();
ctx->mode = 0755;
+ fc->fs_private = ctx;
fc->s_fs_info = sbi;
fc->ops = &spufs_context_ops;
return 0;
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 6bc24a47e9ef..6f300ab7f0e9 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -42,7 +42,7 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
{
struct pci_dn *pdn = pci_get_pdn(pdev);
- if (eeh_has_flag(EEH_FORCE_DISABLED))
+ if (!pdn || eeh_has_flag(EEH_FORCE_DISABLED))
return;
dev_dbg(&pdev->dev, "EEH: Setting up device\n");
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index fbd6e6b7bbf2..13e251699346 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -146,20 +146,25 @@ static int pnv_smp_cpu_disable(void)
return 0;
}
+static void pnv_flush_interrupts(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (xive_enabled())
+ xive_flush_interrupt();
+ else
+ icp_opal_flush_interrupt();
+ } else {
+ icp_native_flush_interrupt();
+ }
+}
+
static void pnv_smp_cpu_kill_self(void)
{
+ unsigned long srr1, unexpected_mask, wmask;
unsigned int cpu;
- unsigned long srr1, wmask;
u64 lpcr_val;
/* Standard hot unplug procedure */
- /*
- * This hard disables local interurpts, ensuring we have no lazy
- * irqs pending.
- */
- WARN_ON(irqs_disabled());
- hard_irq_disable();
- WARN_ON(lazy_irq_pending());
idle_task_exit();
current->active_mm = NULL; /* for sanity */
@@ -173,6 +178,27 @@ static void pnv_smp_cpu_kill_self(void)
wmask = SRR1_WAKEMASK_P8;
/*
+ * This turns the irq soft-disabled state we're called with, into a
+ * hard-disabled state with pending irq_happened interrupts cleared.
+ *
+ * PACA_IRQ_DEC - Decrementer should be ignored.
+ * PACA_IRQ_HMI - Can be ignored, processing is done in real mode.
+ * PACA_IRQ_DBELL, EE, PMI - Unexpected.
+ */
+ hard_irq_disable();
+ if (generic_check_cpu_restart(cpu))
+ goto out;
+
+ unexpected_mask = ~(PACA_IRQ_DEC | PACA_IRQ_HMI | PACA_IRQ_HARD_DIS);
+ if (local_paca->irq_happened & unexpected_mask) {
+ if (local_paca->irq_happened & PACA_IRQ_EE)
+ pnv_flush_interrupts();
+ DBG("CPU%d Unexpected exit while offline irq_happened=%lx!\n",
+ cpu, local_paca->irq_happened);
+ }
+ local_paca->irq_happened = PACA_IRQ_HARD_DIS;
+
+ /*
* We don't want to take decrementer interrupts while we are
* offline, so clear LPCR:PECE1. We keep PECE2 (and
* LPCR_PECE_HVEE on P9) enabled so as to let IPIs in.
@@ -197,6 +223,7 @@ static void pnv_smp_cpu_kill_self(void)
srr1 = pnv_cpu_offline(cpu);
+ WARN_ON_ONCE(!irqs_disabled());
WARN_ON(lazy_irq_pending());
/*
@@ -212,13 +239,7 @@ static void pnv_smp_cpu_kill_self(void)
*/
if (((srr1 & wmask) == SRR1_WAKEEE) ||
((srr1 & wmask) == SRR1_WAKEHVI)) {
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (xive_enabled())
- xive_flush_interrupt();
- else
- icp_opal_flush_interrupt();
- } else
- icp_native_flush_interrupt();
+ pnv_flush_interrupts();
} else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
@@ -266,7 +287,7 @@ static void pnv_smp_cpu_kill_self(void)
*/
lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1;
pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val);
-
+out:
DBG("CPU%d coming online...\n", cpu);
}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index b53359258d99..f87a5c64e24d 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1419,6 +1419,9 @@ void __init pseries_lpar_read_hblkrm_characteristics(void)
unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
int call_status, len, idx, bpsize;
+ if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+ return;
+
spin_lock(&rtas_data_buf_lock);
memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
index 104d334511cd..88cfcb96bf23 100644
--- a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
+++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
@@ -13,6 +13,7 @@
compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000";
chosen {
+ stdout-path = "serial0";
};
cpus {
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index 07ceee8b1747..75604fec1b1b 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -12,7 +12,6 @@
#include <asm/asm.h>
-#ifdef CONFIG_GENERIC_BUG
#define __INSN_LENGTH_MASK _UL(0x3)
#define __INSN_LENGTH_32 _UL(0x3)
#define __COMPRESSED_INSN_MASK _UL(0xffff)
@@ -20,7 +19,6 @@
#define __BUG_INSN_32 _UL(0x00100073) /* ebreak */
#define __BUG_INSN_16 _UL(0x9002) /* c.ebreak */
-#ifndef __ASSEMBLY__
typedef u32 bug_insn_t;
#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
@@ -43,6 +41,7 @@ typedef u32 bug_insn_t;
RISCV_SHORT " %2"
#endif
+#ifdef CONFIG_GENERIC_BUG
#define __BUG_FLAGS(flags) \
do { \
__asm__ __volatile__ ( \
@@ -58,14 +57,10 @@ do { \
"i" (flags), \
"i" (sizeof(struct bug_entry))); \
} while (0)
-
-#endif /* !__ASSEMBLY__ */
#else /* CONFIG_GENERIC_BUG */
-#ifndef __ASSEMBLY__
#define __BUG_FLAGS(flags) do { \
__asm__ __volatile__ ("ebreak\n"); \
} while (0)
-#endif /* !__ASSEMBLY__ */
#endif /* CONFIG_GENERIC_BUG */
#define BUG() do { \
@@ -79,15 +74,10 @@ do { \
#include <asm-generic/bug.h>
-#ifndef __ASSEMBLY__
-
struct pt_regs;
struct task_struct;
-extern void die(struct pt_regs *regs, const char *str);
-extern void do_trap(struct pt_regs *regs, int signo, int code,
- unsigned long addr);
-
-#endif /* !__ASSEMBLY__ */
+void die(struct pt_regs *regs, const char *str);
+void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr);
#endif /* _ASM_RISCV_BUG_H */
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index fc1189ad3777..3ba4d93721d3 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -13,6 +13,7 @@
#include <linux/types.h>
#include <asm/mmiowb.h>
+#include <asm/pgtable.h>
extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
@@ -162,6 +163,12 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
#endif
/*
+ * I/O port access constants.
+ */
+#define IO_SPACE_LIMIT (PCI_IO_SIZE - 1)
+#define PCI_IOBASE ((void __iomem *)PCI_IO_START)
+
+/*
* Emulation routines for the port-mapped IO space used by some PCI drivers.
* These are defined as being "fully synchronous", but also "not guaranteed to
* be fully ordered with respect to other memory and I/O operations". We're
diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h
index 75576424c0f7..6e1b0e0325eb 100644
--- a/arch/riscv/include/asm/irq.h
+++ b/arch/riscv/include/asm/irq.h
@@ -7,6 +7,9 @@
#ifndef _ASM_RISCV_IRQ_H
#define _ASM_RISCV_IRQ_H
+#include <linux/interrupt.h>
+#include <linux/linkage.h>
+
#define NR_IRQS 0
void riscv_timer_interrupt(void);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7255f2d8395b..d3221017194d 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -7,6 +7,7 @@
#define _ASM_RISCV_PGTABLE_H
#include <linux/mmzone.h>
+#include <linux/sizes.h>
#include <asm/pgtable-bits.h>
@@ -86,14 +87,7 @@ extern pgd_t swapper_pg_dir[];
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END (PAGE_OFFSET - 1)
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
-
-#define FIXADDR_TOP VMALLOC_START
-#ifdef CONFIG_64BIT
-#define FIXADDR_SIZE PMD_SIZE
-#else
-#define FIXADDR_SIZE PGDIR_SIZE
-#endif
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+#define PCI_IO_SIZE SZ_16M
/*
* Roughly size the vmemmap space to be large enough to fit enough
@@ -108,6 +102,17 @@ extern pgd_t swapper_pg_dir[];
#define vmemmap ((struct page *)VMEMMAP_START)
+#define PCI_IO_END VMEMMAP_START
+#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
+#define FIXADDR_TOP PCI_IO_START
+
+#ifdef CONFIG_64BIT
+#define FIXADDR_SIZE PMD_SIZE
+#else
+#define FIXADDR_SIZE PGDIR_SIZE
+#endif
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
/*
* ZERO_PAGE is a global shared page that is always zero,
* used for zero-mapped memory areas, etc.
@@ -184,10 +189,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
return __pte((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
}
-static inline pte_t mk_pte(struct page *page, pgprot_t prot)
-{
- return pfn_pte(page_to_pfn(page), prot);
-}
+#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
@@ -428,9 +430,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#ifdef CONFIG_FLATMEM
#define kern_addr_valid(addr) (1) /* FIXME */
-#endif
extern void *dtb_early_va;
extern void setup_bootmem(void);
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index f0227bdce0f0..ee4f0ac62c9d 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -6,6 +6,7 @@
#ifndef _ASM_RISCV_SWITCH_TO_H
#define _ASM_RISCV_SWITCH_TO_H
+#include <linux/sched/task_stack.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/csr.h>
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 37ae4e367ad2..f02188a5b0f4 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -10,10 +10,6 @@
#include <linux/mm_types.h>
#include <asm/smp.h>
-/*
- * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
- * cache as well, so a 'fence.i' is not necessary.
- */
static inline void local_flush_tlb_all(void)
{
__asm__ __volatile__ ("sfence.vma" : : : "memory");
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index b1ade9a49347..a5ad00043104 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -10,6 +10,7 @@
#include <asm/processor.h>
#include <asm/hwcap.h>
#include <asm/smp.h>
+#include <asm/switch_to.h>
unsigned long elf_hwcap __read_mostly;
#ifdef CONFIG_FPU
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 2d592da1e776..8ca479831142 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -273,12 +273,11 @@ restore_all:
resume_kernel:
REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
bnez s0, restore_all
-need_resched:
REG_L s0, TASK_TI_FLAGS(tp)
andi s0, s0, _TIF_NEED_RESCHED
beqz s0, restore_all
call preempt_schedule_irq
- j need_resched
+ j restore_all
#endif
work_pending:
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
new file mode 100644
index 000000000000..105fb0496b24
--- /dev/null
+++ b/arch/riscv/kernel/head.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 SiFive, Inc.
+ */
+#ifndef __ASM_HEAD_H
+#define __ASM_HEAD_H
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+
+extern atomic_t hart_lottery;
+
+asmlinkage void do_page_fault(struct pt_regs *regs);
+asmlinkage void __init setup_vm(uintptr_t dtb_pa);
+
+extern void *__cpu_up_stack_pointer[];
+extern void *__cpu_up_task_pointer[];
+
+void __init parse_dtb(void);
+
+#endif /* __ASM_HEAD_H */
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 6d8659388c49..fffac6ddb0e0 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -24,7 +24,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
return 0;
}
-asmlinkage void __irq_entry do_IRQ(struct pt_regs *regs)
+asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c
index c9ae48333114..e264e59e596e 100644
--- a/arch/riscv/kernel/module-sections.c
+++ b/arch/riscv/kernel/module-sections.c
@@ -8,6 +8,7 @@
#include <linux/elf.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/moduleloader.h>
unsigned long module_emit_got_entry(struct module *mod, unsigned long val)
{
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index fb3a082362eb..85e3c39bb60b 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -7,6 +7,7 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
@@ -19,6 +20,7 @@
#include <asm/csr.h>
#include <asm/string.h>
#include <asm/switch_to.h>
+#include <asm/thread_info.h>
extern asmlinkage void ret_from_fork(void);
extern asmlinkage void ret_from_kernel_thread(void);
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 368751438366..1252113ef8b2 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -148,7 +148,7 @@ long arch_ptrace(struct task_struct *child, long request,
* Allows PTRACE_SYSCALL to work. These are called from entry.S in
* {handle,ret_from}_syscall.
*/
-void do_syscall_trace_enter(struct pt_regs *regs)
+__visible void do_syscall_trace_enter(struct pt_regs *regs)
{
if (test_thread_flag(TIF_SYSCALL_TRACE))
if (tracehook_report_syscall_entry(regs))
@@ -162,7 +162,7 @@ void do_syscall_trace_enter(struct pt_regs *regs)
audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3);
}
-void do_syscall_trace_exit(struct pt_regs *regs)
+__visible void do_syscall_trace_exit(struct pt_regs *regs)
{
audit_syscall_exit(regs);
diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
index d0fe623bfb8f..aa56bb135ec4 100644
--- a/arch/riscv/kernel/reset.c
+++ b/arch/riscv/kernel/reset.c
@@ -4,6 +4,7 @@
*/
#include <linux/reboot.h>
+#include <linux/pm.h>
#include <asm/sbi.h>
static void default_power_off(void)
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index a990a6cb184f..845ae0e12115 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -24,6 +24,8 @@
#include <asm/tlbflush.h>
#include <asm/thread_info.h>
+#include "head.h"
+
#ifdef CONFIG_DUMMY_CONSOLE
struct screen_info screen_info = {
.orig_video_lines = 30,
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index b14d7647d800..d0f6f212f5df 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -26,7 +26,7 @@ struct rt_sigframe {
#ifdef CONFIG_FPU
static long restore_fp_state(struct pt_regs *regs,
- union __riscv_fp_state *sc_fpregs)
+ union __riscv_fp_state __user *sc_fpregs)
{
long err;
struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
@@ -53,7 +53,7 @@ static long restore_fp_state(struct pt_regs *regs,
}
static long save_fp_state(struct pt_regs *regs,
- union __riscv_fp_state *sc_fpregs)
+ union __riscv_fp_state __user *sc_fpregs)
{
long err;
struct __riscv_d_ext_state __user *state = &sc_fpregs->d;
@@ -292,8 +292,8 @@ static void do_signal(struct pt_regs *regs)
* notification of userspace execution resumption
* - triggered by the _TIF_WORK_MASK flags
*/
-asmlinkage void do_notify_resume(struct pt_regs *regs,
- unsigned long thread_info_flags)
+asmlinkage __visible void do_notify_resume(struct pt_regs *regs,
+ unsigned long thread_info_flags)
{
/* Handle pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index b18cd6c8e8fb..5c9ec78422c2 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -8,7 +8,9 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/cpu.h>
#include <linux/interrupt.h>
+#include <linux/profile.h>
#include <linux/smp.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 18ae6da5115e..261f4087cc39 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -29,6 +29,9 @@
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/sbi.h>
+#include <asm/smp.h>
+
+#include "head.h"
void *__cpu_up_stack_pointer[NR_CPUS];
void *__cpu_up_task_pointer[NR_CPUS];
@@ -130,7 +133,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
/*
* C entry point for a secondary processor.
*/
-asmlinkage void __init smp_callin(void)
+asmlinkage __visible void __init smp_callin(void)
{
struct mm_struct *mm = &init_mm;
diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c
index e5dd52d8f633..f1ead9df96ca 100644
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -8,6 +8,7 @@
#include <linux/syscalls.h>
#include <asm-generic/syscalls.h>
#include <asm/vdso.h>
+#include <asm/syscall.h>
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call),
diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
index 9dd1f2e64db1..6a53c02e9c73 100644
--- a/arch/riscv/kernel/time.c
+++ b/arch/riscv/kernel/time.c
@@ -7,6 +7,7 @@
#include <linux/clocksource.h>
#include <linux/delay.h>
#include <asm/sbi.h>
+#include <asm/processor.h>
unsigned long riscv_timebase;
EXPORT_SYMBOL_GPL(riscv_timebase);
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 424eb72d56b1..473de3ae8bb7 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -3,6 +3,7 @@
* Copyright (C) 2012 Regents of the University of California
*/
+#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
@@ -83,7 +84,7 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code,
}
#define DO_ERROR_INFO(name, signo, code, str) \
-asmlinkage void name(struct pt_regs *regs) \
+asmlinkage __visible void name(struct pt_regs *regs) \
{ \
do_trap_error(regs, signo, code, regs->sepc, "Oops - " str); \
}
@@ -111,7 +112,6 @@ DO_ERROR_INFO(do_trap_ecall_s,
DO_ERROR_INFO(do_trap_ecall_m,
SIGILL, ILL_ILLTRP, "environment call from M-mode");
-#ifdef CONFIG_GENERIC_BUG
static inline unsigned long get_break_insn_length(unsigned long pc)
{
bug_insn_t insn;
@@ -120,28 +120,15 @@ static inline unsigned long get_break_insn_length(unsigned long pc)
return 0;
return (((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? 4UL : 2UL);
}
-#endif /* CONFIG_GENERIC_BUG */
-asmlinkage void do_trap_break(struct pt_regs *regs)
+asmlinkage __visible void do_trap_break(struct pt_regs *regs)
{
-#ifdef CONFIG_GENERIC_BUG
- if (!user_mode(regs)) {
- enum bug_trap_type type;
-
- type = report_bug(regs->sepc, regs);
- switch (type) {
- case BUG_TRAP_TYPE_NONE:
- break;
- case BUG_TRAP_TYPE_WARN:
- regs->sepc += get_break_insn_length(regs->sepc);
- break;
- case BUG_TRAP_TYPE_BUG:
- die(regs, "Kernel BUG");
- }
- }
-#endif /* CONFIG_GENERIC_BUG */
-
- force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc));
+ if (user_mode(regs))
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->sepc);
+ else if (report_bug(regs->sepc, regs) == BUG_TRAP_TYPE_WARN)
+ regs->sepc += get_break_insn_length(regs->sepc);
+ else
+ die(regs, "Kernel BUG");
}
#ifdef CONFIG_GENERIC_BUG
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index c9c21e0d5641..484d95a70907 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -6,6 +6,7 @@
* Copyright (C) 2015 Regents of the University of California
*/
+#include <linux/elf.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/binfmts.h>
@@ -25,7 +26,7 @@ static union {
struct vdso_data data;
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
+static struct vdso_data *vdso_data = &vdso_data_store.data;
static int __init vdso_init(void)
{
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index beeb5d7f92ea..ca66d44156b6 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
/*
* When necessary, performs a deferred icache flush for the given MM context,
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 96add1427a75..247b8c859c44 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -18,6 +18,8 @@
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
+#include "../kernel/head.h"
+
/*
* This routine handles page faults. It determines the address and the
* problem, and then passes it off to one of the appropriate routines.
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 83f7d12042fb..573463d1c799 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -19,6 +19,8 @@
#include <asm/pgtable.h>
#include <asm/io.h>
+#include "../kernel/head.h"
+
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
@@ -337,8 +339,7 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
*/
#ifndef __riscv_cmodel_medany
-#error "setup_vm() is called from head.S before relocate so it should "
- "not use absolute addressing."
+#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
#endif
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
@@ -458,7 +459,7 @@ void __init paging_init(void)
zone_sizes_init();
}
-#ifdef CONFIG_SPARSEMEM
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
diff --git a/arch/riscv/mm/sifive_l2_cache.c b/arch/riscv/mm/sifive_l2_cache.c
index 2e637ad71c05..a9ffff3277c7 100644
--- a/arch/riscv/mm/sifive_l2_cache.c
+++ b/arch/riscv/mm/sifive_l2_cache.c
@@ -142,7 +142,7 @@ static irqreturn_t l2_int_handler(int irq, void *device)
return IRQ_HANDLED;
}
-int __init sifive_l2_init(void)
+static int __init sifive_l2_init(void)
{
struct device_node *np;
struct resource res;
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 1a41545becec..fbd341ea03b8 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -101,10 +101,18 @@ static void handle_relocs(unsigned long offset)
dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
for (rela = rela_start; rela < rela_end; rela++) {
loc = rela->r_offset + offset;
- val = rela->r_addend + offset;
+ val = rela->r_addend;
r_sym = ELF64_R_SYM(rela->r_info);
- if (r_sym)
- val += dynsym[r_sym].st_value;
+ if (r_sym) {
+ if (dynsym[r_sym].st_shndx != SHN_UNDEF)
+ val += dynsym[r_sym].st_value + offset;
+ } else {
+ /*
+ * 0 == undefined symbol table index (STN_UNDEF),
+ * used for R_390_RELATIVE, only add KASLR offset
+ */
+ val += offset;
+ }
r_type = ELF64_R_TYPE(rela->r_info);
rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
if (rc)
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 823578c6b9e2..a4d38092530a 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-#define ARCH_ZONE_DMA_BITS 31
-
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index bd2fd9a7821d..a470f1fa9f2a 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -83,7 +83,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n);
__rc; \
})
-static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
+static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
{
unsigned long spec = 0x010000UL;
int rc;
@@ -113,7 +113,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size)
return rc;
}
-static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
+static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size)
{
unsigned long spec = 0x01UL;
int rc;
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
index d827b5b9a32c..eaaefeceef6f 100644
--- a/arch/s390/include/asm/unwind.h
+++ b/arch/s390/include/asm/unwind.h
@@ -35,6 +35,7 @@ struct unwind_state {
struct task_struct *task;
struct pt_regs *regs;
unsigned long sp, ip;
+ bool reuse_sp;
int graph_idx;
bool reliable;
bool error;
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index b9d8fe45737a..8f8456816d83 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
static ssize_t show_idle_time(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ unsigned long long now, idle_time, idle_enter, idle_exit, in_idle;
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long long now, idle_time, idle_enter, idle_exit;
unsigned int seq;
do {
- now = get_tod_clock();
seq = read_seqcount_begin(&idle->seqcount);
idle_time = READ_ONCE(idle->idle_time);
idle_enter = READ_ONCE(idle->clock_idle_enter);
idle_exit = READ_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq));
- idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
+ in_idle = 0;
+ now = get_tod_clock();
+ if (idle_enter) {
+ if (idle_exit) {
+ in_idle = idle_exit - idle_enter;
+ } else if (now > idle_enter) {
+ in_idle = now - idle_enter;
+ }
+ }
+ idle_time += in_idle;
return sprintf(buf, "%llu\n", idle_time >> 12);
}
DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
@@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
u64 arch_cpu_idle_time(int cpu)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
- unsigned long long now, idle_enter, idle_exit;
+ unsigned long long now, idle_enter, idle_exit, in_idle;
unsigned int seq;
do {
- now = get_tod_clock();
seq = read_seqcount_begin(&idle->seqcount);
idle_enter = READ_ONCE(idle->clock_idle_enter);
idle_exit = READ_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq));
-
- return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0);
+ in_idle = 0;
+ now = get_tod_clock();
+ if (idle_enter) {
+ if (idle_exit) {
+ in_idle = idle_exit - idle_enter;
+ } else if (now > idle_enter) {
+ in_idle = now - idle_enter;
+ }
+ }
+ return cputime_to_nsecs(in_idle);
}
void arch_cpu_idle_enter(void)
diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c
index 3b664cb3ec4d..d5035de9020e 100644
--- a/arch/s390/kernel/machine_kexec_reloc.c
+++ b/arch/s390/kernel/machine_kexec_reloc.c
@@ -27,6 +27,7 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
*(u32 *)loc = val;
break;
case R_390_64: /* Direct 64 bit. */
+ case R_390_GLOB_DAT:
*(u64 *)loc = val;
break;
case R_390_PC16: /* PC relative 16 bit. */
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 5699e820c621..fa111d3d378f 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -46,10 +46,15 @@ bool unwind_next_frame(struct unwind_state *state)
regs = state->regs;
if (unlikely(regs)) {
- sp = READ_ONCE_NOCHECK(regs->gprs[15]);
- if (unlikely(outside_of_stack(state, sp))) {
- if (!update_stack_info(state, sp))
- goto out_err;
+ if (state->reuse_sp) {
+ sp = state->sp;
+ state->reuse_sp = false;
+ } else {
+ sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+ if (unlikely(outside_of_stack(state, sp))) {
+ if (!update_stack_info(state, sp))
+ goto out_err;
+ }
}
sf = (struct stack_frame *) sp;
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
@@ -102,9 +107,9 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
{
struct stack_info *info = &state->stack_info;
unsigned long *mask = &state->stack_mask;
+ bool reliable, reuse_sp;
struct stack_frame *sf;
unsigned long ip;
- bool reliable;
memset(state, 0, sizeof(*state));
state->task = task;
@@ -129,10 +134,12 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
if (regs) {
ip = READ_ONCE_NOCHECK(regs->psw.addr);
reliable = true;
+ reuse_sp = true;
} else {
sf = (struct stack_frame *) sp;
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
+ reuse_sp = false;
}
ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
@@ -141,5 +148,6 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
state->sp = sp;
state->ip = ip;
state->reliable = reliable;
+ state->reuse_sp = reuse_sp;
}
EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 510a18299196..a51c892f14f3 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -298,16 +298,16 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
}
if (write) {
- len = *lenp;
- if (copy_from_user(buf, buffer,
- len > sizeof(buf) ? sizeof(buf) : len))
+ len = min(*lenp, sizeof(buf));
+ if (copy_from_user(buf, buffer, len))
return -EFAULT;
- buf[sizeof(buf) - 1] = '\0';
+ buf[len - 1] = '\0';
cmm_skip_blanks(buf, &p);
nr = simple_strtoul(p, &p, 0);
cmm_skip_blanks(p, &p);
seconds = simple_strtoul(p, &p, 0);
cmm_set_timeout(nr, seconds);
+ *ppos += *lenp;
} else {
len = sprintf(buf, "%ld %ld\n",
cmm_timeout_pages, cmm_timeout_seconds);
@@ -315,9 +315,9 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
len = *lenp;
if (copy_to_user(buffer, buf, len))
return -EFAULT;
+ *lenp = len;
+ *ppos += len;
}
- *lenp = len;
- *ppos += len;
return 0;
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index a124f19f7b3c..f0ce22220565 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -118,6 +118,7 @@ void __init paging_init(void)
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
+ zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index fbc1aecf0f94..eb24cb1afc11 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -29,7 +29,6 @@ config SPARC
select RTC_DRV_M48T59
select RTC_SYSTOHC
select HAVE_ARCH_JUMP_LABEL if SPARC64
- select HAVE_FAST_GUP if SPARC64
select GENERIC_IRQ_SHOW
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_PCI_IOMAP
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index 324a23947585..997ffe46e953 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -65,14 +65,14 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(SPARC_REG_CFLAGS
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
#
-CFLAGS_REMOVE_vdso-note.o = -pg
CFLAGS_REMOVE_vclock_gettime.o = -pg
+CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
$(obj)/%.so: OBJCOPYFLAGS := -S
$(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
-CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
+CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1
#This makes sure the $(obj) subdirectory exists even though vdso32/
diff --git a/arch/um/configs/kunit_defconfig b/arch/um/configs/kunit_defconfig
new file mode 100644
index 000000000000..9235b7d42d38
--- /dev/null
+++ b/arch/um/configs/kunit_defconfig
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_KUNIT_TEST=y
+CONFIG_KUNIT_EXAMPLE_TEST=y
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 612535cd9706..6627d7c30f37 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -1403,8 +1403,12 @@ static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_unlock_irq(&ubd_dev->lock);
- if (ret < 0)
- blk_mq_requeue_request(req, true);
+ if (ret < 0) {
+ if (ret == -ENOMEM)
+ res = BLK_STS_RESOURCE;
+ else
+ res = BLK_STS_DEV_RESOURCE;
+ }
return res;
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d6e1faa28c58..8ef85139553f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1940,6 +1940,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS
If unsure, say y.
+choice
+ prompt "TSX enable mode"
+ depends on CPU_SUP_INTEL
+ default X86_INTEL_TSX_MODE_OFF
+ help
+ Intel's TSX (Transactional Synchronization Extensions) feature
+ allows to optimize locking protocols through lock elision which
+ can lead to a noticeable performance boost.
+
+ On the other hand it has been shown that TSX can be exploited
+ to form side channel attacks (e.g. TAA) and chances are there
+ will be more of those attacks discovered in the future.
+
+ Therefore TSX is not enabled by default (aka tsx=off). An admin
+ might override this decision by tsx=on the command line parameter.
+ Even with TSX enabled, the kernel will attempt to enable the best
+ possible TAA mitigation setting depending on the microcode available
+ for the particular machine.
+
+ This option allows to set the default tsx mode between tsx=on, =off
+ and =auto. See Documentation/admin-guide/kernel-parameters.txt for more
+ details.
+
+ Say off if not sure, auto if TSX is in use but it should be used on safe
+ platforms or on if TSX is in use and the security aspect of tsx is not
+ relevant.
+
+config X86_INTEL_TSX_MODE_OFF
+ bool "off"
+ help
+ TSX is disabled if possible - equals to tsx=off command line parameter.
+
+config X86_INTEL_TSX_MODE_ON
+ bool "on"
+ help
+ TSX is always enabled on TSX capable HW - equals the tsx=on command
+ line parameter.
+
+config X86_INTEL_TSX_MODE_AUTO
+ bool "auto"
+ help
+ TSX is enabled on TSX capable HW that is believed to be safe against
+ side channel attacks- equals the tsx=auto command line parameter.
+endchoice
+
config EFI
bool "EFI runtime service support"
depends on ACPI
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 149795c369f2..25019d42ae93 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -21,30 +21,6 @@
struct mem_vector immovable_mem[MAX_NUMNODES*2];
/*
- * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
- * digits, and '\0' for termination.
- */
-#define MAX_ADDR_LEN 19
-
-static acpi_physical_address get_cmdline_acpi_rsdp(void)
-{
- acpi_physical_address addr = 0;
-
-#ifdef CONFIG_KEXEC
- char val[MAX_ADDR_LEN] = { };
- int ret;
-
- ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
- if (ret < 0)
- return 0;
-
- if (kstrtoull(val, 16, &addr))
- return 0;
-#endif
- return addr;
-}
-
-/*
* Search EFI system tables for RSDP. If both ACPI_20_TABLE_GUID and
* ACPI_TABLE_GUID are found, take the former, which has more features.
*/
@@ -298,6 +274,30 @@ acpi_physical_address get_rsdp_addr(void)
}
#if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE)
+/*
+ * Max length of 64-bit hex address string is 19, prefix "0x" + 16 hex
+ * digits, and '\0' for termination.
+ */
+#define MAX_ADDR_LEN 19
+
+static acpi_physical_address get_cmdline_acpi_rsdp(void)
+{
+ acpi_physical_address addr = 0;
+
+#ifdef CONFIG_KEXEC
+ char val[MAX_ADDR_LEN] = { };
+ int ret;
+
+ ret = cmdline_find_option("acpi_rsdp", val, MAX_ADDR_LEN);
+ if (ret < 0)
+ return 0;
+
+ if (kstrtoull(val, 16, &addr))
+ return 0;
+#endif
+ return addr;
+}
+
/* Compute SRAT address from RSDP. */
static unsigned long get_acpi_srat_table(void)
{
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index d6662fdef300..82bc60c8acb2 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -13,6 +13,7 @@
#include <asm/e820/types.h>
#include <asm/setup.h>
#include <asm/desc.h>
+#include <asm/boot.h>
#include "../string.h"
#include "eboot.h"
@@ -813,7 +814,8 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
status = efi_relocate_kernel(sys_table, &bzimage_addr,
hdr->init_size, hdr->init_size,
hdr->pref_address,
- hdr->kernel_alignment);
+ hdr->kernel_alignment,
+ LOAD_PHYSICAL_ADDR);
if (status != EFI_SUCCESS) {
efi_printk(sys_table, "efi_relocate_kernel() failed!\n");
goto fail;
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 53ac0cb2396d..9652d5c2afda 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -345,6 +345,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
{
const unsigned long kernel_total_size = VO__end - VO__text;
unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+ unsigned long needed_size;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
boot_params = rmode;
@@ -379,26 +380,38 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
+ /*
+ * The memory hole needed for the kernel is the larger of either
+ * the entire decompressed kernel plus relocation table, or the
+ * entire decompressed kernel plus .bss and .brk sections.
+ *
+ * On X86_64, the memory is mapped with PMD pages. Round the
+ * size up so that the full extent of PMD pages mapped is
+ * included in the check against the valid memory table
+ * entries. This ensures the full mapped area is usable RAM
+ * and doesn't include any reserved areas.
+ */
+ needed_size = max(output_len, kernel_total_size);
+#ifdef CONFIG_X86_64
+ needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
+#endif
+
/* Report initial kernel position details. */
debug_putaddr(input_data);
debug_putaddr(input_len);
debug_putaddr(output);
debug_putaddr(output_len);
debug_putaddr(kernel_total_size);
+ debug_putaddr(needed_size);
#ifdef CONFIG_X86_64
/* Report address of 32-bit trampoline */
debug_putaddr(trampoline_32bit);
#endif
- /*
- * The memory hole needed for the kernel is the larger of either
- * the entire decompressed kernel plus relocation table, or the
- * entire decompressed kernel plus .bss and .brk sections.
- */
choose_random_location((unsigned long)input_data, input_len,
(unsigned long *)&output,
- max(output_len, kernel_total_size),
+ needed_size,
&virt_addr);
/* Validate memory location choices. */
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index e7d35f60d53f..64c3e70b0556 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -5,12 +5,14 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/delay.h>
+#include <linux/jiffies.h>
#include <asm/apicdef.h>
#include <asm/nmi.h>
#include "../perf_event.h"
-static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
+static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp);
+static unsigned long perf_nmi_window;
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
@@ -641,11 +643,12 @@ static void amd_pmu_disable_event(struct perf_event *event)
* handler when multiple PMCs are active or PMC overflow while handling some
* other source of an NMI.
*
- * Attempt to mitigate this by using the number of active PMCs to determine
- * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
- * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
- * number of active PMCs or 2. The value of 2 is used in case an NMI does not
- * arrive at the LAPIC in time to be collapsed into an already pending NMI.
+ * Attempt to mitigate this by creating an NMI window in which un-handled NMIs
+ * received during this window will be claimed. This prevents extending the
+ * window past when it is possible that latent NMIs should be received. The
+ * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has
+ * handled a counter. When an un-handled NMI is received, it will be claimed
+ * only if arriving within that window.
*/
static int amd_pmu_handle_irq(struct pt_regs *regs)
{
@@ -663,21 +666,19 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
handled = x86_pmu_handle_irq(regs);
/*
- * If a counter was handled, record the number of possible remaining
- * NMIs that can occur.
+ * If a counter was handled, record a timestamp such that un-handled
+ * NMIs will be claimed if arriving within that window.
*/
if (handled) {
- this_cpu_write(perf_nmi_counter,
- min_t(unsigned int, 2, active));
+ this_cpu_write(perf_nmi_tstamp,
+ jiffies + perf_nmi_window);
return handled;
}
- if (!this_cpu_read(perf_nmi_counter))
+ if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp)))
return NMI_DONE;
- this_cpu_dec(perf_nmi_counter);
-
return NMI_HANDLED;
}
@@ -909,6 +910,9 @@ static int __init amd_core_pmu_init(void)
if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE))
return 0;
+ /* Avoid calulating the value each time in the NMI handler */
+ perf_nmi_window = msecs_to_jiffies(100);
+
switch (boot_cpu_data.x86) {
case 0x15:
pr_cont("Fam15h ");
diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index 5b35b7ea5d72..26c36357c4c9 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -377,7 +377,8 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
struct hw_perf_event *hwc, u64 config)
{
config &= ~perf_ibs->cnt_mask;
- wrmsrl(hwc->config_base, config);
+ if (boot_cpu_data.x86 == 0x10)
+ wrmsrl(hwc->config_base, config);
config &= ~perf_ibs->enable_mask;
wrmsrl(hwc->config_base, config);
}
@@ -553,7 +554,8 @@ static struct perf_ibs perf_ibs_op = {
},
.msr = MSR_AMD64_IBSOPCTL,
.config_mask = IBS_OP_CONFIG_MASK,
- .cnt_mask = IBS_OP_MAX_CNT,
+ .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT |
+ IBS_OP_CUR_CNT_RAND,
.enable_mask = IBS_OP_ENABLE,
.valid_mask = IBS_OP_VAL,
.max_period = IBS_OP_MAX_CNT << 4,
@@ -614,7 +616,7 @@ fail:
if (event->attr.sample_type & PERF_SAMPLE_RAW)
offset_max = perf_ibs->offset_max;
else if (check_rip)
- offset_max = 2;
+ offset_max = 3;
else
offset_max = 1;
do {
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 27ee47a7be66..fcef678c3423 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4983,6 +4983,8 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_SKYLAKE:
case INTEL_FAM6_KABYLAKE_L:
case INTEL_FAM6_KABYLAKE:
+ case INTEL_FAM6_COMETLAKE_L:
+ case INTEL_FAM6_COMETLAKE:
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
@@ -5031,6 +5033,8 @@ __init int intel_pmu_init(void)
/* fall through */
case INTEL_FAM6_ICELAKE_L:
case INTEL_FAM6_ICELAKE:
+ case INTEL_FAM6_TIGERLAKE_L:
+ case INTEL_FAM6_TIGERLAKE:
x86_pmu.late_ack = true;
memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 9f2f39003d96..e1daf4151e11 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -45,46 +45,49 @@
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,GLM,
- CNL
+ * CNL,KBL,CML
* Scope: Core
* MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
- * SKL,KNL,GLM,CNL
+ * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
* Scope: Core
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
* perf code: 0x03
- * Available model: SNB,IVB,HSW,BDW,SKL,CNL
+ * Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
+ * ICL,TGL
* Scope: Core
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
* perf code: 0x00
- * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL
+ * Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
+ * KBL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
* perf code: 0x01
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
- * GLM,CNL
+ * GLM,CNL,KBL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
* perf code: 0x02
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW
- * SKL,KNL,GLM,CNL
+ * SKL,KNL,GLM,CNL,KBL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
* perf code: 0x03
- * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL
+ * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,CNL,
+ * KBL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C8_RESIDENCY: Package C8 Residency Counter.
* perf code: 0x04
- * Available model: HSW ULT,KBL,CNL
+ * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C9_RESIDENCY: Package C9 Residency Counter.
* perf code: 0x05
- * Available model: HSW ULT,KBL,CNL
+ * Available model: HSW ULT,KBL,CNL,CML,ICL,TGL
* Scope: Package (physical package)
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
* perf code: 0x06
- * Available model: HSW ULT,KBL,GLM,CNL
+ * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL
* Scope: Package (physical package)
*
*/
@@ -544,6 +547,19 @@ static const struct cstate_model cnl_cstates __initconst = {
BIT(PERF_CSTATE_PKG_C10_RES),
};
+static const struct cstate_model icl_cstates __initconst = {
+ .core_events = BIT(PERF_CSTATE_CORE_C6_RES) |
+ BIT(PERF_CSTATE_CORE_C7_RES),
+
+ .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
+ BIT(PERF_CSTATE_PKG_C3_RES) |
+ BIT(PERF_CSTATE_PKG_C6_RES) |
+ BIT(PERF_CSTATE_PKG_C7_RES) |
+ BIT(PERF_CSTATE_PKG_C8_RES) |
+ BIT(PERF_CSTATE_PKG_C9_RES) |
+ BIT(PERF_CSTATE_PKG_C10_RES),
+};
+
static const struct cstate_model slm_cstates __initconst = {
.core_events = BIT(PERF_CSTATE_CORE_C1_RES) |
BIT(PERF_CSTATE_CORE_C6_RES),
@@ -614,6 +630,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_L, hswult_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE, hswult_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE_L, hswult_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_COMETLAKE, hswult_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_CANNONLAKE_L, cnl_cstates),
@@ -625,8 +643,10 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, snb_cstates),
- X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, snb_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_L, icl_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE, icl_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE_L, icl_cstates),
+ X86_CSTATES_MODEL(INTEL_FAM6_TIGERLAKE, icl_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 74e80ed9c6c4..05e43d0f430b 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -627,7 +627,7 @@ static struct topa *topa_alloc(int cpu, gfp_t gfp)
* link as the 2nd entry in the table
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
- TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p);
+ TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p) >> TOPA_SHIFT;
TOPA_ENTRY(&tp->topa, 1)->end = 1;
}
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 6fc2e06ab4c6..86467f85c383 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -502,10 +502,8 @@ void uncore_pmu_event_start(struct perf_event *event, int flags)
local64_set(&event->hw.prev_count, uncore_read_counter(box, event));
uncore_enable_event(box, event);
- if (box->n_active == 1) {
- uncore_enable_box(box);
+ if (box->n_active == 1)
uncore_pmu_start_hrtimer(box);
- }
}
void uncore_pmu_event_stop(struct perf_event *event, int flags)
@@ -529,10 +527,8 @@ void uncore_pmu_event_stop(struct perf_event *event, int flags)
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
hwc->state |= PERF_HES_STOPPED;
- if (box->n_active == 0) {
- uncore_disable_box(box);
+ if (box->n_active == 0)
uncore_pmu_cancel_hrtimer(box);
- }
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
@@ -778,6 +774,40 @@ static int uncore_pmu_event_init(struct perf_event *event)
return ret;
}
+static void uncore_pmu_enable(struct pmu *pmu)
+{
+ struct intel_uncore_pmu *uncore_pmu;
+ struct intel_uncore_box *box;
+
+ uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
+ if (!uncore_pmu)
+ return;
+
+ box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
+ if (!box)
+ return;
+
+ if (uncore_pmu->type->ops->enable_box)
+ uncore_pmu->type->ops->enable_box(box);
+}
+
+static void uncore_pmu_disable(struct pmu *pmu)
+{
+ struct intel_uncore_pmu *uncore_pmu;
+ struct intel_uncore_box *box;
+
+ uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu);
+ if (!uncore_pmu)
+ return;
+
+ box = uncore_pmu_to_box(uncore_pmu, smp_processor_id());
+ if (!box)
+ return;
+
+ if (uncore_pmu->type->ops->disable_box)
+ uncore_pmu->type->ops->disable_box(box);
+}
+
static ssize_t uncore_get_attr_cpumask(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -803,6 +833,8 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
pmu->pmu = (struct pmu) {
.attr_groups = pmu->type->attr_groups,
.task_ctx_nr = perf_invalid_context,
+ .pmu_enable = uncore_pmu_enable,
+ .pmu_disable = uncore_pmu_disable,
.event_init = uncore_pmu_event_init,
.add = uncore_pmu_event_add,
.del = uncore_pmu_event_del,
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index f36f7bebbc1b..bbfdaa720b45 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -441,18 +441,6 @@ static inline int uncore_freerunning_hw_config(struct intel_uncore_box *box,
return -EINVAL;
}
-static inline void uncore_disable_box(struct intel_uncore_box *box)
-{
- if (box->pmu->type->ops->disable_box)
- box->pmu->type->ops->disable_box(box);
-}
-
-static inline void uncore_enable_box(struct intel_uncore_box *box)
-{
- if (box->pmu->type->ops->enable_box)
- box->pmu->type->ops->enable_box(box);
-}
-
static inline void uncore_disable_event(struct intel_uncore_box *box,
struct perf_event *event)
{
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index b1afc77f0704..6f86650b3f77 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -89,7 +89,14 @@ static bool test_intel(int idx, void *data)
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_L:
case INTEL_FAM6_KABYLAKE:
+ case INTEL_FAM6_COMETLAKE_L:
+ case INTEL_FAM6_COMETLAKE:
case INTEL_FAM6_ICELAKE_L:
+ case INTEL_FAM6_ICELAKE:
+ case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_D:
+ case INTEL_FAM6_TIGERLAKE_L:
+ case INTEL_FAM6_TIGERLAKE:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 5c056b8aebef..e01078e93dd3 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -260,11 +260,21 @@ void __init hv_apic_init(void)
}
if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
- pr_info("Hyper-V: Using MSR based APIC access\n");
+ pr_info("Hyper-V: Using enlightened APIC (%s mode)",
+ x2apic_enabled() ? "x2apic" : "xapic");
+ /*
+ * With x2apic, architectural x2apic MSRs are equivalent to the
+ * respective synthetic MSRs, so there's no need to override
+ * the apic accessors. The only exception is
+ * hv_apic_eoi_write, because it benefits from lazy EOI when
+ * available, but it works for both xapic and x2apic modes.
+ */
apic_set_eoi_write(hv_apic_eoi_write);
- apic->read = hv_apic_read;
- apic->write = hv_apic_write;
- apic->icr_write = hv_apic_icr_write;
- apic->icr_read = hv_apic_icr_read;
+ if (!x2apic_enabled()) {
+ apic->read = hv_apic_read;
+ apic->write = hv_apic_write;
+ apic->icr_write = hv_apic_icr_write;
+ apic->icr_read = hv_apic_icr_read;
+ }
}
}
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index cff3f3f3bfe0..8348f7d69fd5 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_CPU_ENTRY_AREA_H
#define _ASM_X86_CPU_ENTRY_AREA_H
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0652d3eed9bd..c4fbe379cc0b 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -399,5 +399,7 @@
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index f04622500da3..c606c0b70738 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -83,6 +83,9 @@
#define INTEL_FAM6_TIGERLAKE_L 0x8C
#define INTEL_FAM6_TIGERLAKE 0x8D
+#define INTEL_FAM6_COMETLAKE 0xA5
+#define INTEL_FAM6_COMETLAKE_L 0xA6
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 50eb430b0ad8..4fc61483919a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -312,9 +312,12 @@ struct kvm_rmap_head {
struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
+ struct list_head lpage_disallowed_link;
+
bool unsync;
u8 mmu_valid_gen;
bool mmio_cached;
+ bool lpage_disallowed; /* Can't be replaced by an equiv large page */
/*
* The following two entries are used to key the shadow page in the
@@ -859,6 +862,7 @@ struct kvm_arch {
*/
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
+ struct list_head lpage_disallowed_mmu_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;
@@ -933,6 +937,7 @@ struct kvm_arch {
bool exception_payload_enabled;
struct kvm_pmu_event_filter *pmu_event_filter;
+ struct task_struct *nx_lpage_recovery_thread;
};
struct kvm_vm_stat {
@@ -946,6 +951,7 @@ struct kvm_vm_stat {
ulong mmu_unsync;
ulong remote_tlb_flush;
ulong lpages;
+ ulong nx_lpage_splits;
ulong max_mmu_page_hash_collisions;
};
@@ -1189,7 +1195,7 @@ struct kvm_x86_ops {
int (*set_nested_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
struct kvm_nested_state *kvm_state);
- void (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+ bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
int (*smi_allowed)(struct kvm_vcpu *vcpu);
int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 20ce682a2540..6a3124664289 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -93,6 +93,18 @@
* Microarchitectural Data
* Sampling (MDS) vulnerabilities.
*/
+#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /*
+ * The processor is not susceptible to a
+ * machine check error due to modifying the
+ * code page size along with either the
+ * physical address or cache type
+ * without TLB invalidation.
+ */
+#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */
+#define ARCH_CAP_TAA_NO BIT(8) /*
+ * Not susceptible to
+ * TSX Async Abort (TAA) vulnerabilities.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -103,6 +115,10 @@
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
+#define MSR_IA32_TSX_CTRL 0x00000122
+#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
+#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
+
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index e28f8b723b5c..9d5252c9685c 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -21,7 +21,7 @@
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
#define MWAITX_MAX_LOOPS ((u32)-1)
-#define MWAITX_DISABLE_CSTATES 0xf
+#define MWAITX_DISABLE_CSTATES 0xf0
static inline void __monitor(const void *eax, unsigned long ecx,
unsigned long edx)
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 80bc209c0708..5c24a7b35166 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -314,7 +314,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
#include <asm/segment.h>
/**
- * mds_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
*
* This uses the otherwise unused and obsolete VERW instruction in
* combination with microcode which triggers a CPU buffer flush when the
@@ -337,7 +337,7 @@ static inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability
+ * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
*/
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 0bc530c4eb13..ad97dc155195 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1463,6 +1463,12 @@ static inline bool arch_has_pfn_modify_check(void)
return boot_cpu_has_bug(X86_BUG_L1TF);
}
+#define arch_faults_on_old_pte arch_faults_on_old_pte
+static inline bool arch_faults_on_old_pte(void)
+{
+ return false;
+}
+
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6e0a3b43d027..54f5d54280f6 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -988,4 +988,11 @@ enum mds_mitigations {
MDS_MITIGATION_VMWERV,
};
+enum taa_mitigations {
+ TAA_MITIGATION_OFF,
+ TAA_MITIGATION_UCODE_NEEDED,
+ TAA_MITIGATION_VERW,
+ TAA_MITIGATION_TSX_DISABLED,
+};
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index 5df09a0b80b8..07375b476c4f 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PTI_H
#define _ASM_X86_PTI_H
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c225ede0e4..61d93f062a36 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -734,5 +734,28 @@ do { \
if (unlikely(__gu_err)) goto err_label; \
} while (0)
+/*
+ * We want the unsafe accessors to always be inlined and use
+ * the error labels - thus the macro games.
+ */
+#define unsafe_copy_loop(dst, src, len, type, label) \
+ while (len >= sizeof(type)) { \
+ unsafe_put_user(*(type *)src,(type __user *)dst,label); \
+ dst += sizeof(type); \
+ src += sizeof(type); \
+ len -= sizeof(type); \
+ }
+
+#define unsafe_copy_to_user(_dst,_src,_len,label) \
+do { \
+ char __user *__ucu_dst = (_dst); \
+ const char *__ucu_src = (_src); \
+ size_t __ucu_len = (_len); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \
+ unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \
+} while (0)
+
#endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
index e00c9e875933..ac9fc51e2b18 100644
--- a/arch/x86/include/asm/vmware.h
+++ b/arch/x86/include/asm/vmware.h
@@ -4,6 +4,7 @@
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
+#include <linux/stringify.h>
/*
* The hypercall definitions differ in the low word of the %edx argument
@@ -20,8 +21,8 @@
*/
/* Old port-based version */
-#define VMWARE_HYPERVISOR_PORT "0x5658"
-#define VMWARE_HYPERVISOR_PORT_HB "0x5659"
+#define VMWARE_HYPERVISOR_PORT 0x5658
+#define VMWARE_HYPERVISOR_PORT_HB 0x5659
/* Current vmcall / vmmcall version */
#define VMWARE_HYPERVISOR_HB BIT(0)
@@ -29,7 +30,8 @@
/* The low bandwidth call. The low word of edx is presumed clear. */
#define VMWARE_HYPERCALL \
- ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT ", %%dx; inl (%%dx)", \
+ ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \
+ "inl (%%dx), %%eax", \
"vmcall", X86_FEATURE_VMCALL, \
"vmmcall", X86_FEATURE_VMW_VMMCALL)
@@ -38,7 +40,8 @@
* HB and OUT bits set.
*/
#define VMWARE_HYPERCALL_HB_OUT \
- ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT_HB ", %%dx; rep outsb", \
+ ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
+ "rep outsb", \
"vmcall", X86_FEATURE_VMCALL, \
"vmmcall", X86_FEATURE_VMW_VMMCALL)
@@ -47,7 +50,8 @@
* HB bit set.
*/
#define VMWARE_HYPERCALL_HB_IN \
- ALTERNATIVE_2("movw $" VMWARE_HYPERVISOR_PORT_HB ", %%dx; rep insb", \
+ ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
+ "rep insb", \
"vmcall", X86_FEATURE_VMCALL, \
"vmmcall", X86_FEATURE_VMW_VMMCALL)
#endif
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 9e2dd2b296cd..2b0faf86da1b 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1586,9 +1586,6 @@ static void setup_local_APIC(void)
{
int cpu = smp_processor_id();
unsigned int value;
-#ifdef CONFIG_X86_32
- int logical_apicid, ldr_apicid;
-#endif
if (disable_apic) {
disable_ioapic_support();
@@ -1626,16 +1623,21 @@ static void setup_local_APIC(void)
apic->init_apic_ldr();
#ifdef CONFIG_X86_32
- /*
- * APIC LDR is initialized. If logical_apicid mapping was
- * initialized during get_smp_config(), make sure it matches the
- * actual value.
- */
- logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
- ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
- WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid);
- /* always use the value from LDR */
- early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
+ if (apic->dest_logical) {
+ int logical_apicid, ldr_apicid;
+
+ /*
+ * APIC LDR is initialized. If logical_apicid mapping was
+ * initialized during get_smp_config(), make sure it matches
+ * the actual value.
+ */
+ logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+ ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+ if (logical_apicid != BAD_APICID)
+ WARN_ON(logical_apicid != ldr_apicid);
+ /* Always use the value from LDR. */
+ early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
+ }
#endif
/*
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 45e92cba92f5..b0889c48a2ac 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -156,7 +156,8 @@ static int x2apic_dead_cpu(unsigned int dead_cpu)
{
struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
- cpumask_clear_cpu(dead_cpu, &cmsk->mask);
+ if (cmsk)
+ cpumask_clear_cpu(dead_cpu, &cmsk->mask);
free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
return 0;
}
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index d7a1e5a9331c..890f60083eca 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -30,7 +30,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
ifdef CONFIG_CPU_SUP_INTEL
-obj-y += intel.o intel_pconfig.o
+obj-y += intel.o intel_pconfig.o tsx.o
obj-$(CONFIG_PM) += intel_epb.o
endif
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 91c2561b905f..4c7b0fa15a19 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
+static void __init taa_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
u64 x86_spec_ctrl_base;
@@ -105,6 +106,7 @@ void __init check_bugs(void)
ssb_select_mitigation();
l1tf_select_mitigation();
mds_select_mitigation();
+ taa_select_mitigation();
arch_smt_update();
@@ -269,6 +271,100 @@ static int __init mds_cmdline(char *str)
early_param("mds", mds_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "TAA: " fmt
+
+/* Default mitigation for TAA-affected CPUs */
+static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW;
+static bool taa_nosmt __ro_after_init;
+
+static const char * const taa_strings[] = {
+ [TAA_MITIGATION_OFF] = "Vulnerable",
+ [TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",
+ [TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers",
+ [TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled",
+};
+
+static void __init taa_select_mitigation(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has_bug(X86_BUG_TAA)) {
+ taa_mitigation = TAA_MITIGATION_OFF;
+ return;
+ }
+
+ /* TSX previously disabled by tsx=off */
+ if (!boot_cpu_has(X86_FEATURE_RTM)) {
+ taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
+ goto out;
+ }
+
+ if (cpu_mitigations_off()) {
+ taa_mitigation = TAA_MITIGATION_OFF;
+ return;
+ }
+
+ /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
+ if (taa_mitigation == TAA_MITIGATION_OFF)
+ goto out;
+
+ if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
+ taa_mitigation = TAA_MITIGATION_VERW;
+ else
+ taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+
+ /*
+ * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1.
+ * A microcode update fixes this behavior to clear CPU buffers. It also
+ * adds support for MSR_IA32_TSX_CTRL which is enumerated by the
+ * ARCH_CAP_TSX_CTRL_MSR bit.
+ *
+ * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
+ * update is required.
+ */
+ ia32_cap = x86_read_arch_cap_msr();
+ if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
+ !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+ taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
+
+ /*
+ * TSX is enabled, select alternate mitigation for TAA which is
+ * the same as MDS. Enable MDS static branch to clear CPU buffers.
+ *
+ * For guests that can't determine whether the correct microcode is
+ * present on host, enable the mitigation for UCODE_NEEDED as well.
+ */
+ static_branch_enable(&mds_user_clear);
+
+ if (taa_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+
+out:
+ pr_info("%s\n", taa_strings[taa_mitigation]);
+}
+
+static int __init tsx_async_abort_parse_cmdline(char *str)
+{
+ if (!boot_cpu_has_bug(X86_BUG_TAA))
+ return 0;
+
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off")) {
+ taa_mitigation = TAA_MITIGATION_OFF;
+ } else if (!strcmp(str, "full")) {
+ taa_mitigation = TAA_MITIGATION_VERW;
+ } else if (!strcmp(str, "full,nosmt")) {
+ taa_mitigation = TAA_MITIGATION_VERW;
+ taa_nosmt = true;
+ }
+
+ return 0;
+}
+early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V1 : " fmt
enum spectre_v1_mitigation {
@@ -786,13 +882,10 @@ static void update_mds_branch_idle(void)
}
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
+#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
void cpu_bugs_smt_update(void)
{
- /* Enhanced IBRS implies STIBP. No update required. */
- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
- return;
-
mutex_lock(&spec_ctrl_mutex);
switch (spectre_v2_user) {
@@ -819,6 +912,17 @@ void cpu_bugs_smt_update(void)
break;
}
+ switch (taa_mitigation) {
+ case TAA_MITIGATION_VERW:
+ case TAA_MITIGATION_UCODE_NEEDED:
+ if (sched_smt_active())
+ pr_warn_once(TAA_MSG_SMT);
+ break;
+ case TAA_MITIGATION_TSX_DISABLED:
+ case TAA_MITIGATION_OFF:
+ break;
+ }
+
mutex_unlock(&spec_ctrl_mutex);
}
@@ -1149,6 +1253,9 @@ void x86_spec_ctrl_setup_ap(void)
x86_amd_ssb_disable();
}
+bool itlb_multihit_kvm_mitigation;
+EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation);
+
#undef pr_fmt
#define pr_fmt(fmt) "L1TF: " fmt
@@ -1304,11 +1411,24 @@ static ssize_t l1tf_show_state(char *buf)
l1tf_vmx_states[l1tf_vmx_mitigation],
sched_smt_active() ? "vulnerable" : "disabled");
}
+
+static ssize_t itlb_multihit_show_state(char *buf)
+{
+ if (itlb_multihit_kvm_mitigation)
+ return sprintf(buf, "KVM: Mitigation: Split huge pages\n");
+ else
+ return sprintf(buf, "KVM: Vulnerable\n");
+}
#else
static ssize_t l1tf_show_state(char *buf)
{
return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG);
}
+
+static ssize_t itlb_multihit_show_state(char *buf)
+{
+ return sprintf(buf, "Processor vulnerable\n");
+}
#endif
static ssize_t mds_show_state(char *buf)
@@ -1328,6 +1448,21 @@ static ssize_t mds_show_state(char *buf)
sched_smt_active() ? "vulnerable" : "disabled");
}
+static ssize_t tsx_async_abort_show_state(char *buf)
+{
+ if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) ||
+ (taa_mitigation == TAA_MITIGATION_OFF))
+ return sprintf(buf, "%s\n", taa_strings[taa_mitigation]);
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ return sprintf(buf, "%s; SMT Host state unknown\n",
+ taa_strings[taa_mitigation]);
+ }
+
+ return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation],
+ sched_smt_active() ? "vulnerable" : "disabled");
+}
+
static char *stibp_state(void)
{
if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
@@ -1398,6 +1533,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_MDS:
return mds_show_state(buf);
+ case X86_BUG_TAA:
+ return tsx_async_abort_show_state(buf);
+
+ case X86_BUG_ITLB_MULTIHIT:
+ return itlb_multihit_show_state(buf);
+
default:
break;
}
@@ -1434,4 +1575,14 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu
{
return cpu_show_common(dev, attr, buf, X86_BUG_MDS);
}
+
+ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_TAA);
+}
+
+ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 9ae7d1bcd4f4..fffe21945374 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1016,13 +1016,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
#endif
}
-#define NO_SPECULATION BIT(0)
-#define NO_MELTDOWN BIT(1)
-#define NO_SSB BIT(2)
-#define NO_L1TF BIT(3)
-#define NO_MDS BIT(4)
-#define MSBDS_ONLY BIT(5)
-#define NO_SWAPGS BIT(6)
+#define NO_SPECULATION BIT(0)
+#define NO_MELTDOWN BIT(1)
+#define NO_SSB BIT(2)
+#define NO_L1TF BIT(3)
+#define NO_MDS BIT(4)
+#define MSBDS_ONLY BIT(5)
+#define NO_SWAPGS BIT(6)
+#define NO_ITLB_MULTIHIT BIT(7)
#define VULNWL(_vendor, _family, _model, _whitelist) \
{ X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist }
@@ -1043,27 +1044,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION),
/* Intel Family 6 */
- VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION),
- VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION),
- VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION),
- VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION),
- VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION),
-
- VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT),
+
+ VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
VULNWL_INTEL(CORE_YONAH, NO_SSB),
- VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS),
- VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS),
- VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS),
- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS),
+ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT),
/*
* Technically, swapgs isn't serializing on AMD (despite it previously
@@ -1073,15 +1074,17 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
* good enough for our purposes.
*/
+ VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT),
+
/* AMD Family 0xf - 0x12 */
- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
/* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
- VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS),
+ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
+ VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT),
{}
};
@@ -1092,19 +1095,30 @@ static bool __init cpu_matches(unsigned long which)
return m && !!(m->driver_data & which);
}
-static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+u64 x86_read_arch_cap_msr(void)
{
u64 ia32_cap = 0;
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+
+ return ia32_cap;
+}
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
+{
+ u64 ia32_cap = x86_read_arch_cap_msr();
+
+ /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
+ if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+ setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
+
if (cpu_matches(NO_SPECULATION))
return;
setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
- if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
-
if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) &&
!cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
@@ -1121,6 +1135,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (!cpu_matches(NO_SWAPGS))
setup_force_cpu_bug(X86_BUG_SWAPGS);
+ /*
+ * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when:
+ * - TSX is supported or
+ * - TSX_CTRL is present
+ *
+ * TSX_CTRL check is needed for cases when TSX could be disabled before
+ * the kernel boot e.g. kexec.
+ * TSX_CTRL check alone is not sufficient for cases when the microcode
+ * update is not present or running as guest that don't get TSX_CTRL.
+ */
+ if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+ (cpu_has(c, X86_FEATURE_RTM) ||
+ (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+ setup_force_cpu_bug(X86_BUG_TAA);
+
if (cpu_matches(NO_MELTDOWN))
return;
@@ -1554,6 +1583,8 @@ void __init identify_boot_cpu(void)
#endif
cpu_detect_tlb(&boot_cpu_data);
setup_cr_pinning();
+
+ tsx_init();
}
void identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index c0e2407abdd6..38ab6e115eac 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -44,6 +44,22 @@ struct _tlb_table {
extern const struct cpu_dev *const __x86_cpu_dev_start[],
*const __x86_cpu_dev_end[];
+#ifdef CONFIG_CPU_SUP_INTEL
+enum tsx_ctrl_states {
+ TSX_CTRL_ENABLE,
+ TSX_CTRL_DISABLE,
+ TSX_CTRL_NOT_SUPPORTED,
+};
+
+extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state;
+
+extern void __init tsx_init(void);
+extern void tsx_enable(void);
+extern void tsx_disable(void);
+#else
+static inline void tsx_init(void) { }
+#endif /* CONFIG_CPU_SUP_INTEL */
+
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
@@ -62,4 +78,6 @@ unsigned int aperfmperf_get_khz(int cpu);
extern void x86_spec_ctrl_setup_ap(void);
+extern u64 x86_read_arch_cap_msr(void);
+
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index c2fdc00df163..11d5c5950e2d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -762,6 +762,11 @@ static void init_intel(struct cpuinfo_x86 *c)
detect_tme(c);
init_intel_misc_features(c);
+
+ if (tsx_ctrl_state == TSX_CTRL_ENABLE)
+ tsx_enable();
+ if (tsx_ctrl_state == TSX_CTRL_DISABLE)
+ tsx_disable();
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 267daad8c036..c656d92cd708 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -216,6 +216,10 @@ static void __init ms_hyperv_init_platform(void)
int hv_host_info_ecx;
int hv_host_info_edx;
+#ifdef CONFIG_PARAVIRT
+ pv_info.name = "Hyper-V";
+#endif
+
/*
* Extract the features and hints
*/
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index efbd54cc4e69..055c8613b531 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -522,6 +522,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
int ret = 0;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (!rdtgrp) {
+ ret = -ENOENT;
+ goto out;
+ }
md.priv = of->kn->priv;
resid = md.u.rid;
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index a46dee8e78db..2e3b06d6bbc6 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -461,10 +461,8 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
}
rdtgrp = rdtgroup_kn_lock_live(of->kn);
- rdt_last_cmd_clear();
if (!rdtgrp) {
ret = -ENOENT;
- rdt_last_cmd_puts("Directory was removed\n");
goto unlock;
}
@@ -2648,10 +2646,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
int ret;
prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
- rdt_last_cmd_clear();
if (!prdtgrp) {
ret = -ENODEV;
- rdt_last_cmd_puts("Directory was removed\n");
goto out_unlock;
}
diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c
new file mode 100644
index 000000000000..3e20d322bc98
--- /dev/null
+++ b/arch/x86/kernel/cpu/tsx.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Transactional Synchronization Extensions (TSX) control.
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author:
+ * Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+ */
+
+#include <linux/cpufeature.h>
+
+#include <asm/cmdline.h>
+
+#include "cpu.h"
+
+enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED;
+
+void tsx_disable(void)
+{
+ u64 tsx;
+
+ rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+
+ /* Force all transactions to immediately abort */
+ tsx |= TSX_CTRL_RTM_DISABLE;
+
+ /*
+ * Ensure TSX support is not enumerated in CPUID.
+ * This is visible to userspace and will ensure they
+ * do not waste resources trying TSX transactions that
+ * will always abort.
+ */
+ tsx |= TSX_CTRL_CPUID_CLEAR;
+
+ wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+}
+
+void tsx_enable(void)
+{
+ u64 tsx;
+
+ rdmsrl(MSR_IA32_TSX_CTRL, tsx);
+
+ /* Enable the RTM feature in the cpu */
+ tsx &= ~TSX_CTRL_RTM_DISABLE;
+
+ /*
+ * Ensure TSX support is enumerated in CPUID.
+ * This is visible to userspace and will ensure they
+ * can enumerate and use the TSX feature.
+ */
+ tsx &= ~TSX_CTRL_CPUID_CLEAR;
+
+ wrmsrl(MSR_IA32_TSX_CTRL, tsx);
+}
+
+static bool __init tsx_ctrl_is_supported(void)
+{
+ u64 ia32_cap = x86_read_arch_cap_msr();
+
+ /*
+ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this
+ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES.
+ *
+ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a
+ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES
+ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get
+ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus,
+ * tsx= cmdline requests will do nothing on CPUs without
+ * MSR_IA32_TSX_CTRL support.
+ */
+ return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR);
+}
+
+static enum tsx_ctrl_states x86_get_tsx_auto_mode(void)
+{
+ if (boot_cpu_has_bug(X86_BUG_TAA))
+ return TSX_CTRL_DISABLE;
+
+ return TSX_CTRL_ENABLE;
+}
+
+void __init tsx_init(void)
+{
+ char arg[5] = {};
+ int ret;
+
+ if (!tsx_ctrl_is_supported())
+ return;
+
+ ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg));
+ if (ret >= 0) {
+ if (!strcmp(arg, "on")) {
+ tsx_ctrl_state = TSX_CTRL_ENABLE;
+ } else if (!strcmp(arg, "off")) {
+ tsx_ctrl_state = TSX_CTRL_DISABLE;
+ } else if (!strcmp(arg, "auto")) {
+ tsx_ctrl_state = x86_get_tsx_auto_mode();
+ } else {
+ tsx_ctrl_state = TSX_CTRL_DISABLE;
+ pr_err("tsx: invalid option, defaulting to off\n");
+ }
+ } else {
+ /* tsx= not provided */
+ if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO))
+ tsx_ctrl_state = x86_get_tsx_auto_mode();
+ else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF))
+ tsx_ctrl_state = TSX_CTRL_DISABLE;
+ else
+ tsx_ctrl_state = TSX_CTRL_ENABLE;
+ }
+
+ if (tsx_ctrl_state == TSX_CTRL_DISABLE) {
+ tsx_disable();
+
+ /*
+ * tsx_disable() will change the state of the
+ * RTM CPUID bit. Clear it here since it is now
+ * expected to be not set.
+ */
+ setup_clear_cpu_cap(X86_FEATURE_RTM);
+ } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) {
+
+ /*
+ * HW defaults TSX to be enabled at bootup.
+ * We may still need the TSX enable support
+ * during init for special cases like
+ * kexec after TSX is disabled.
+ */
+ tsx_enable();
+
+ /*
+ * tsx_enable() will change the state of the
+ * RTM CPUID bit. Force it here since it is now
+ * expected to be set.
+ */
+ setup_force_cpu_cap(X86_FEATURE_RTM);
+ }
+}
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 9735139cfdf8..46d732696c1c 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -49,7 +49,7 @@
#define VMWARE_CMD_VCPU_RESERVED 31
#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
- __asm__("inl (%%dx)" : \
+ __asm__("inl (%%dx), %%eax" : \
"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \
"a"(VMWARE_HYPERVISOR_MAGIC), \
"c"(VMWARE_CMD_##cmd), \
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 753b8cfe8b8a..87b97897a881 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -94,6 +94,13 @@ static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
+ /*
+ * Handle the case where stack trace is collected _before_
+ * cea_exception_stacks had been initialized.
+ */
+ if (!begin)
+ return false;
+
end = begin + sizeof(struct cea_exception_stacks);
/* Bail if @stack is outside the exception stack area. */
if (stk < begin || stk >= end)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 6f6b1d04dadf..4cba91ec8049 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -710,6 +710,8 @@ static struct chipset early_qrk[] __initdata = {
*/
{ PCI_VENDOR_ID_INTEL, 0x0f00,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+ { PCI_VENDOR_ID_INTEL, 0x3ec4,
+ PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{ PCI_VENDOR_ID_BROADCOM, 0x4331,
PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
{}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 29ffa495bd1c..206a4b6144c2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -222,13 +222,31 @@ unsigned long __head __startup_64(unsigned long physaddr,
* we might write invalid pmds, when the kernel is relocated
* cleanup_highmap() fixes this up along with the mappings
* beyond _end.
+ *
+ * Only the region occupied by the kernel image has so far
+ * been checked against the table of usable memory regions
+ * provided by the firmware, so invalidate pages outside that
+ * region. A page table entry that maps to a reserved area of
+ * memory would allow processor speculation into that area,
+ * and on some hardware (particularly the UV platform) even
+ * speculative access to some reserved areas is caught as an
+ * error, causing the BIOS to halt the system.
*/
pmd = fixup_pointer(level2_kernel_pgt, physaddr);
- for (i = 0; i < PTRS_PER_PMD; i++) {
+
+ /* invalidate pages before the kernel image */
+ for (i = 0; i < pmd_index((unsigned long)_text); i++)
+ pmd[i] &= ~_PAGE_PRESENT;
+
+ /* fixup pages that are part of the kernel image */
+ for (; i <= pmd_index((unsigned long)_end); i++)
if (pmd[i] & _PAGE_PRESENT)
pmd[i] += load_delta;
- }
+
+ /* invalidate pages after the kernel image */
+ for (; i < PTRS_PER_PMD; i++)
+ pmd[i] &= ~_PAGE_PRESENT;
/*
* Fixup phys_base - remove the memory encryption mask to obtain
diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
index 320ab978fb1f..1d0797b2338a 100644
--- a/arch/x86/kernel/process.h
+++ b/arch/x86/kernel/process.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
//
// Code shared between 32 and 64 bit
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c59454c382fd..7e322e2daaf5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1505,6 +1505,9 @@ void __init tsc_init(void)
return;
}
+ if (tsc_clocksource_reliable || no_tsc_watchdog)
+ clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
+
clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
detect_art();
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9c5029cf6f3f..f68c0c753c38 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -363,7 +363,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
- F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
+ F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 87b0fcc23ef8..b29d00b661ff 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -111,11 +111,6 @@ static inline int apic_enabled(struct kvm_lapic *apic)
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
-static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
-{
- return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
-}
-
static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
{
return apic->vcpu->vcpu_id;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2aad7e226fc0..1f5014852e20 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -242,4 +242,9 @@ static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
}
+static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
+{
+ return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
+}
+
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 24c23c66b226..2ce9da58611e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/hash.h>
#include <linux/kern_levels.h>
+#include <linux/kthread.h>
#include <asm/page.h>
#include <asm/pat.h>
@@ -47,6 +48,35 @@
#include <asm/kvm_page_track.h>
#include "trace.h"
+extern bool itlb_multihit_kvm_mitigation;
+
+static int __read_mostly nx_huge_pages = -1;
+#ifdef CONFIG_PREEMPT_RT
+/* Recovery can cause latency spikes, disable it for PREEMPT_RT. */
+static uint __read_mostly nx_huge_pages_recovery_ratio = 0;
+#else
+static uint __read_mostly nx_huge_pages_recovery_ratio = 60;
+#endif
+
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp);
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp);
+
+static struct kernel_param_ops nx_huge_pages_ops = {
+ .set = set_nx_huge_pages,
+ .get = param_get_bool,
+};
+
+static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = {
+ .set = set_nx_huge_pages_recovery_ratio,
+ .get = param_get_uint,
+};
+
+module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages, "bool");
+module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops,
+ &nx_huge_pages_recovery_ratio, 0644);
+__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint");
+
/*
* When setting this variable to true it enables Two-Dimensional-Paging
* where the hardware walks 2 page tables:
@@ -352,6 +382,11 @@ static inline bool spte_ad_need_write_protect(u64 spte)
return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
}
+static bool is_nx_huge_page_enabled(void)
+{
+ return READ_ONCE(nx_huge_pages);
+}
+
static inline u64 spte_shadow_accessed_mask(u64 spte)
{
MMU_WARN_ON(is_mmio_spte(spte));
@@ -1190,6 +1225,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_disallow_lpage(slot, gfn);
}
+static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ if (sp->lpage_disallowed)
+ return;
+
+ ++kvm->stat.nx_lpage_splits;
+ list_add_tail(&sp->lpage_disallowed_link,
+ &kvm->arch.lpage_disallowed_mmu_pages);
+ sp->lpage_disallowed = true;
+}
+
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
{
struct kvm_memslots *slots;
@@ -1207,6 +1253,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
+static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+ --kvm->stat.nx_lpage_splits;
+ sp->lpage_disallowed = false;
+ list_del(&sp->lpage_disallowed_link);
+}
+
static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
@@ -2792,6 +2845,9 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
kvm_reload_remote_mmus(kvm);
}
+ if (sp->lpage_disallowed)
+ unaccount_huge_nx_page(kvm, sp);
+
sp->role.invalid = 1;
return list_unstable;
}
@@ -3013,6 +3069,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (!speculative)
spte |= spte_shadow_accessed_mask(spte);
+ if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) &&
+ is_nx_huge_page_enabled()) {
+ pte_access &= ~ACC_EXEC_MASK;
+ }
+
if (pte_access & ACC_EXEC_MASK)
spte |= shadow_x_mask;
else
@@ -3233,9 +3294,32 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep);
}
+static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it,
+ gfn_t gfn, kvm_pfn_t *pfnp, int *levelp)
+{
+ int level = *levelp;
+ u64 spte = *it.sptep;
+
+ if (it.level == level && level > PT_PAGE_TABLE_LEVEL &&
+ is_nx_huge_page_enabled() &&
+ is_shadow_present_pte(spte) &&
+ !is_large_pte(spte)) {
+ /*
+ * A small SPTE exists for this pfn, but FNAME(fetch)
+ * and __direct_map would like to create a large PTE
+ * instead: just force them to go down another level,
+ * patching back for them into pfn the next 9 bits of
+ * the address.
+ */
+ u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1);
+ *pfnp |= gfn & page_mask;
+ (*levelp)--;
+ }
+}
+
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
int map_writable, int level, kvm_pfn_t pfn,
- bool prefault)
+ bool prefault, bool lpage_disallowed)
{
struct kvm_shadow_walk_iterator it;
struct kvm_mmu_page *sp;
@@ -3248,6 +3332,12 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
trace_kvm_mmu_spte_requested(gpa, level, pfn);
for_each_shadow_entry(vcpu, gpa, it) {
+ /*
+ * We cannot overwrite existing page tables with an NX
+ * large page, as the leaf could be executable.
+ */
+ disallowed_hugepage_adjust(it, gfn, &pfn, &level);
+
base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
if (it.level == level)
break;
@@ -3258,6 +3348,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
it.level - 1, true, ACC_ALL);
link_shadow_page(vcpu, it.sptep, sp);
+ if (lpage_disallowed)
+ account_huge_nx_page(vcpu->kvm, sp);
}
}
@@ -3306,7 +3398,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
* here.
*/
if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
- level == PT_PAGE_TABLE_LEVEL &&
+ !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL &&
PageTransCompoundMap(pfn_to_page(pfn)) &&
!mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) {
unsigned long mask;
@@ -3550,11 +3642,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
{
int r;
int level;
- bool force_pt_level = false;
+ bool force_pt_level;
kvm_pfn_t pfn;
unsigned long mmu_seq;
bool map_writable, write = error_code & PFERR_WRITE_MASK;
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+ is_nx_huge_page_enabled();
+ force_pt_level = lpage_disallowed;
level = mapping_level(vcpu, gfn, &force_pt_level);
if (likely(!force_pt_level)) {
/*
@@ -3588,7 +3683,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
- r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
+ r = __direct_map(vcpu, v, write, map_writable, level, pfn,
+ prefault, false);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
@@ -4174,6 +4270,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
unsigned long mmu_seq;
int write = error_code & PFERR_WRITE_MASK;
bool map_writable;
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+ is_nx_huge_page_enabled();
MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));
@@ -4184,8 +4282,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (r)
return r;
- force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn,
- PT_DIRECTORY_LEVEL);
+ force_pt_level =
+ lpage_disallowed ||
+ !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL);
level = mapping_level(vcpu, gfn, &force_pt_level);
if (likely(!force_pt_level)) {
if (level > PT_DIRECTORY_LEVEL &&
@@ -4214,7 +4313,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
- r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
+ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn,
+ prefault, lpage_disallowed);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
@@ -5914,9 +6014,9 @@ restart:
* the guest, and the guest page table is using 4K page size
* mapping if the indirect sp has level = 1.
*/
- if (sp->role.direct &&
- !kvm_is_reserved_pfn(pfn) &&
- PageTransCompoundMap(pfn_to_page(pfn))) {
+ if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
+ !kvm_is_zone_device_pfn(pfn) &&
+ PageTransCompoundMap(pfn_to_page(pfn))) {
pte_list_remove(rmap_head, sptep);
if (kvm_available_flush_tlb_with_range())
@@ -6155,10 +6255,59 @@ static void kvm_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
}
+static bool get_nx_auto_mode(void)
+{
+ /* Return true when CPU has the bug, and mitigations are ON */
+ return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off();
+}
+
+static void __set_nx_huge_pages(bool val)
+{
+ nx_huge_pages = itlb_multihit_kvm_mitigation = val;
+}
+
+static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
+{
+ bool old_val = nx_huge_pages;
+ bool new_val;
+
+ /* In "auto" mode deploy workaround only if CPU has the bug. */
+ if (sysfs_streq(val, "off"))
+ new_val = 0;
+ else if (sysfs_streq(val, "force"))
+ new_val = 1;
+ else if (sysfs_streq(val, "auto"))
+ new_val = get_nx_auto_mode();
+ else if (strtobool(val, &new_val) < 0)
+ return -EINVAL;
+
+ __set_nx_huge_pages(new_val);
+
+ if (new_val != old_val) {
+ struct kvm *kvm;
+
+ mutex_lock(&kvm_lock);
+
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ mutex_lock(&kvm->slots_lock);
+ kvm_mmu_zap_all_fast(kvm);
+ mutex_unlock(&kvm->slots_lock);
+
+ wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+ }
+ mutex_unlock(&kvm_lock);
+ }
+
+ return 0;
+}
+
int kvm_mmu_module_init(void)
{
int ret = -ENOMEM;
+ if (nx_huge_pages == -1)
+ __set_nx_huge_pages(get_nx_auto_mode());
+
/*
* MMU roles use union aliasing which is, generally speaking, an
* undefined behavior. However, we supposedly know how compilers behave
@@ -6238,3 +6387,116 @@ void kvm_mmu_module_exit(void)
unregister_shrinker(&mmu_shrinker);
mmu_audit_disable();
}
+
+static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp)
+{
+ unsigned int old_val;
+ int err;
+
+ old_val = nx_huge_pages_recovery_ratio;
+ err = param_set_uint(val, kp);
+ if (err)
+ return err;
+
+ if (READ_ONCE(nx_huge_pages) &&
+ !old_val && nx_huge_pages_recovery_ratio) {
+ struct kvm *kvm;
+
+ mutex_lock(&kvm_lock);
+
+ list_for_each_entry(kvm, &vm_list, vm_list)
+ wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+
+ mutex_unlock(&kvm_lock);
+ }
+
+ return err;
+}
+
+static void kvm_recover_nx_lpages(struct kvm *kvm)
+{
+ int rcu_idx;
+ struct kvm_mmu_page *sp;
+ unsigned int ratio;
+ LIST_HEAD(invalid_list);
+ ulong to_zap;
+
+ rcu_idx = srcu_read_lock(&kvm->srcu);
+ spin_lock(&kvm->mmu_lock);
+
+ ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+ to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0;
+ while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) {
+ /*
+ * We use a separate list instead of just using active_mmu_pages
+ * because the number of lpage_disallowed pages is expected to
+ * be relatively small compared to the total.
+ */
+ sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
+ struct kvm_mmu_page,
+ lpage_disallowed_link);
+ WARN_ON_ONCE(!sp->lpage_disallowed);
+ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
+ WARN_ON_ONCE(sp->lpage_disallowed);
+
+ if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ kvm_mmu_commit_zap_page(kvm, &invalid_list);
+ if (to_zap)
+ cond_resched_lock(&kvm->mmu_lock);
+ }
+ }
+
+ spin_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, rcu_idx);
+}
+
+static long get_nx_lpage_recovery_timeout(u64 start_time)
+{
+ return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio)
+ ? start_time + 60 * HZ - get_jiffies_64()
+ : MAX_SCHEDULE_TIMEOUT;
+}
+
+static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
+{
+ u64 start_time;
+ long remaining_time;
+
+ while (true) {
+ start_time = get_jiffies_64();
+ remaining_time = get_nx_lpage_recovery_timeout(start_time);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ while (!kthread_should_stop() && remaining_time > 0) {
+ schedule_timeout(remaining_time);
+ remaining_time = get_nx_lpage_recovery_timeout(start_time);
+ set_current_state(TASK_INTERRUPTIBLE);
+ }
+
+ set_current_state(TASK_RUNNING);
+
+ if (kthread_should_stop())
+ return 0;
+
+ kvm_recover_nx_lpages(kvm);
+ }
+}
+
+int kvm_mmu_post_init_vm(struct kvm *kvm)
+{
+ int err;
+
+ err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
+ "kvm-nx-lpage-recovery",
+ &kvm->arch.nx_lpage_recovery_thread);
+ if (!err)
+ kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
+
+ return err;
+}
+
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
+{
+ if (kvm->arch.nx_lpage_recovery_thread)
+ kthread_stop(kvm->arch.nx_lpage_recovery_thread);
+}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 11f8ec89433b..d55674f44a18 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -210,4 +210,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn);
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
struct kvm_memory_slot *slot, u64 gfn);
int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
+
+int kvm_mmu_post_init_vm(struct kvm *kvm);
+void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
+
#endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7d5cdb3af594..97b21e7fd013 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -614,13 +614,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw,
int write_fault, int hlevel,
- kvm_pfn_t pfn, bool map_writable, bool prefault)
+ kvm_pfn_t pfn, bool map_writable, bool prefault,
+ bool lpage_disallowed)
{
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
unsigned direct_access, access = gw->pt_access;
int top_level, ret;
- gfn_t base_gfn;
+ gfn_t gfn, base_gfn;
direct_access = gw->pte_access;
@@ -665,13 +666,25 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
link_shadow_page(vcpu, it.sptep, sp);
}
- base_gfn = gw->gfn;
+ /*
+ * FNAME(page_fault) might have clobbered the bottom bits of
+ * gw->gfn, restore them from the virtual address.
+ */
+ gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT);
+ base_gfn = gfn;
trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
clear_sp_write_flooding_count(it.sptep);
- base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
+
+ /*
+ * We cannot overwrite existing page tables with an NX
+ * large page, as the leaf could be executable.
+ */
+ disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel);
+
+ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
if (it.level == hlevel)
break;
@@ -683,6 +696,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
it.level - 1, true, direct_access);
link_shadow_page(vcpu, it.sptep, sp);
+ if (lpage_disallowed)
+ account_huge_nx_page(vcpu->kvm, sp);
}
}
@@ -759,9 +774,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
int r;
kvm_pfn_t pfn;
int level = PT_PAGE_TABLE_LEVEL;
- bool force_pt_level = false;
unsigned long mmu_seq;
bool map_writable, is_self_change_mapping;
+ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) &&
+ is_nx_huge_page_enabled();
+ bool force_pt_level = lpage_disallowed;
pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
@@ -851,7 +868,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
- level, pfn, map_writable, prefault);
+ level, pfn, map_writable, prefault, lpage_disallowed);
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
out_unlock:
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index f8ecb6df5106..c5673bda4b66 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -734,8 +734,14 @@ static int get_npt_level(struct kvm_vcpu *vcpu)
static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
{
vcpu->arch.efer = efer;
- if (!npt_enabled && !(efer & EFER_LMA))
- efer &= ~EFER_LME;
+
+ if (!npt_enabled) {
+ /* Shadow paging assumes NX to be available. */
+ efer |= EFER_NX;
+
+ if (!(efer & EFER_LMA))
+ efer &= ~EFER_LME;
+ }
to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
@@ -4591,6 +4597,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
int ret = 0;
struct vcpu_svm *svm = to_svm(vcpu);
u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
+ u32 id = kvm_xapic_id(vcpu->arch.apic);
if (ldr == svm->ldr_reg)
return 0;
@@ -4598,7 +4605,7 @@ static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
avic_invalidate_logical_id_entry(vcpu);
if (ldr)
- ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr);
+ ret = avic_ldr_write(vcpu, id, ldr);
if (!ret)
svm->ldr_reg = ldr;
@@ -4610,8 +4617,7 @@ static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
{
u64 *old, *new;
struct vcpu_svm *svm = to_svm(vcpu);
- u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
- u32 id = (apic_id_reg >> 24) & 0xff;
+ u32 id = kvm_xapic_id(vcpu->arch.apic);
if (vcpu->vcpu_id == id)
return 0;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e76eb4f07f6c..0e7c9301fe86 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2917,7 +2917,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12);
-static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
+static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2937,19 +2937,18 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
vmx->nested.apic_access_page = NULL;
}
page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
- /*
- * If translation failed, no matter: This feature asks
- * to exit when accessing the given address, and if it
- * can never be accessed, this feature won't do
- * anything anyway.
- */
if (!is_error_page(page)) {
vmx->nested.apic_access_page = page;
hpa = page_to_phys(vmx->nested.apic_access_page);
vmcs_write64(APIC_ACCESS_ADDR, hpa);
} else {
- secondary_exec_controls_clearbit(vmx,
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+ pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
+ __func__);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return false;
}
}
@@ -2994,6 +2993,7 @@ static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
else
exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
+ return true;
}
/*
@@ -3032,13 +3032,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
/*
* If from_vmentry is false, this is being called from state restore (either RSM
* or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
-+ *
-+ * Returns:
-+ * 0 - success, i.e. proceed with actual VMEnter
-+ * 1 - consistency check VMExit
-+ * -1 - consistency check VMFail
+ *
+ * Returns:
+ * NVMX_ENTRY_SUCCESS: Entered VMX non-root mode
+ * NVMX_ENTRY_VMFAIL: Consistency check VMFail
+ * NVMX_ENTRY_VMEXIT: Consistency check VMExit
+ * NVMX_ENTRY_KVM_INTERNAL_ERROR: KVM internal error
*/
-int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
+enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ bool from_vmentry)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -3081,11 +3083,12 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
prepare_vmcs02_early(vmx, vmcs12);
if (from_vmentry) {
- nested_get_vmcs12_pages(vcpu);
+ if (unlikely(!nested_get_vmcs12_pages(vcpu)))
+ return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
if (nested_vmx_check_vmentry_hw(vcpu)) {
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
- return -1;
+ return NVMX_VMENTRY_VMFAIL;
}
if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
@@ -3149,7 +3152,7 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
* returned as far as L1 is concerned. It will only return (and set
* the success flag) when L2 exits (see nested_vmx_vmexit()).
*/
- return 0;
+ return NVMX_VMENTRY_SUCCESS;
/*
* A failed consistency check that leads to a VMExit during L1's
@@ -3165,14 +3168,14 @@ vmentry_fail_vmexit:
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
if (!from_vmentry)
- return 1;
+ return NVMX_VMENTRY_VMEXIT;
load_vmcs12_host_state(vcpu, vmcs12);
vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY;
vmcs12->exit_qualification = exit_qual;
if (enable_shadow_vmcs || vmx->nested.hv_evmcs)
vmx->nested.need_vmcs12_to_shadow_sync = true;
- return 1;
+ return NVMX_VMENTRY_VMEXIT;
}
/*
@@ -3182,9 +3185,9 @@ vmentry_fail_vmexit:
static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
{
struct vmcs12 *vmcs12;
+ enum nvmx_vmentry_status status;
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
- int ret;
if (!nested_vmx_check_permission(vcpu))
return 1;
@@ -3244,13 +3247,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
* the nested entry.
*/
vmx->nested.nested_run_pending = 1;
- ret = nested_vmx_enter_non_root_mode(vcpu, true);
- vmx->nested.nested_run_pending = !ret;
- if (ret > 0)
- return 1;
- else if (ret)
- return nested_vmx_failValid(vcpu,
- VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+ status = nested_vmx_enter_non_root_mode(vcpu, true);
+ if (unlikely(status != NVMX_VMENTRY_SUCCESS))
+ goto vmentry_failed;
/* Hide L1D cache contents from the nested guest. */
vmx->vcpu.arch.l1tf_flush_l1d = true;
@@ -3281,6 +3280,15 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return kvm_vcpu_halt(vcpu);
}
return 1;
+
+vmentry_failed:
+ vmx->nested.nested_run_pending = 0;
+ if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
+ return 0;
+ if (status == NVMX_VMENTRY_VMEXIT)
+ return 1;
+ WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
+ return nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
}
/*
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 187d39bf0bf1..6280f33e5fa6 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -6,6 +6,16 @@
#include "vmcs12.h"
#include "vmx.h"
+/*
+ * Status returned by nested_vmx_enter_non_root_mode():
+ */
+enum nvmx_vmentry_status {
+ NVMX_VMENTRY_SUCCESS, /* Entered VMX non-root mode */
+ NVMX_VMENTRY_VMFAIL, /* Consistency check VMFail */
+ NVMX_VMENTRY_VMEXIT, /* Consistency check VMExit */
+ NVMX_VMENTRY_KVM_INTERNAL_ERROR,/* KVM internal error */
+};
+
void vmx_leave_nested(struct kvm_vcpu *vcpu);
void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps,
bool apicv);
@@ -13,7 +23,8 @@ void nested_vmx_hardware_unsetup(void);
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *));
void nested_vmx_vcpu_setup(void);
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu);
-int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
+enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+ bool from_vmentry);
bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
u32 exit_intr_info, unsigned long exit_qualification);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e7970a2e8eae..04a8212704c1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -969,17 +969,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
u64 guest_efer = vmx->vcpu.arch.efer;
u64 ignore_bits = 0;
- if (!enable_ept) {
- /*
- * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing
- * host CPUID is more efficient than testing guest CPUID
- * or CR4. Host SMEP is anyway a requirement for guest SMEP.
- */
- if (boot_cpu_has(X86_FEATURE_SMEP))
- guest_efer |= EFER_NX;
- else if (!(guest_efer & EFER_NX))
- ignore_bits |= EFER_NX;
- }
+ /* Shadow paging assumes NX to be available. */
+ if (!enable_ept)
+ guest_efer |= EFER_NX;
/*
* LMA and LME handled by hardware; SCE meaningless outside long mode.
@@ -1276,6 +1268,18 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
return;
+ /*
+ * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+ * PI.NDST: pi_post_block is the one expected to change PID.NDST and the
+ * wakeup handler expects the vCPU to be on the blocked_vcpu_list that
+ * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up
+ * correctly.
+ */
+ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) {
+ pi_clear_sn(pi_desc);
+ goto after_clear_sn;
+ }
+
/* The full case. */
do {
old.control = new.control = pi_desc->control;
@@ -1291,6 +1295,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
} while (cmpxchg64(&pi_desc->control, old.control,
new.control) != old.control);
+after_clear_sn:
+
/*
* Clear SN before reading the bitmap. The VT-d firmware
* writes the bitmap and reads SN atomically (5.2.3 in the
@@ -1299,7 +1305,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
*/
smp_mb__after_atomic();
- if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS))
+ if (!pi_is_pir_empty(pi_desc))
pi_set_on(pi_desc);
}
@@ -5543,14 +5549,6 @@ static int handle_encls(struct kvm_vcpu *vcpu)
return 1;
}
-static int handle_unexpected_vmexit(struct kvm_vcpu *vcpu)
-{
- kvm_skip_emulated_instruction(vcpu);
- WARN_ONCE(1, "Unexpected VM-Exit Reason = 0x%x",
- vmcs_read32(VM_EXIT_REASON));
- return 1;
-}
-
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@@ -5602,15 +5600,11 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_INVVPID] = handle_vmx_instruction,
[EXIT_REASON_RDRAND] = handle_invalid_op,
[EXIT_REASON_RDSEED] = handle_invalid_op,
- [EXIT_REASON_XSAVES] = handle_unexpected_vmexit,
- [EXIT_REASON_XRSTORS] = handle_unexpected_vmexit,
[EXIT_REASON_PML_FULL] = handle_pml_full,
[EXIT_REASON_INVPCID] = handle_invpcid,
[EXIT_REASON_VMFUNC] = handle_vmx_instruction,
[EXIT_REASON_PREEMPTION_TIMER] = handle_preemption_timer,
[EXIT_REASON_ENCLS] = handle_encls,
- [EXIT_REASON_UMWAIT] = handle_unexpected_vmexit,
- [EXIT_REASON_TPAUSE] = handle_unexpected_vmexit,
};
static const int kvm_vmx_max_exit_handlers =
@@ -6157,7 +6151,7 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
if (pi_test_on(&vmx->pi_desc)) {
pi_clear_on(&vmx->pi_desc);
/*
- * IOMMU can write to PIR.ON, so the barrier matters even on UP.
+ * IOMMU can write to PID.ON, so the barrier matters even on UP.
* But on x86 this is just a compiler barrier anyway.
*/
smp_mb__after_atomic();
@@ -6187,7 +6181,10 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
{
- return pi_test_on(vcpu_to_pi_desc(vcpu));
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+
+ return pi_test_on(pi_desc) ||
+ (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc));
}
static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index bee16687dc0b..5a0f34b1e226 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -355,6 +355,11 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
}
+static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
+{
+ return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
+}
+
static inline void pi_set_sn(struct pi_desc *pi_desc)
{
set_bit(POSTED_INTR_SN,
@@ -373,6 +378,12 @@ static inline void pi_clear_on(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_SN,
+ (unsigned long *)&pi_desc->control);
+}
+
static inline int pi_test_on(struct pi_desc *pi_desc)
{
return test_bit(POSTED_INTR_ON,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 661e2bf38526..5d530521f11d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -213,6 +213,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "mmu_unsync", VM_STAT(mmu_unsync) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages, .mode = 0444) },
+ { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) },
{ "max_mmu_page_hash_collisions",
VM_STAT(max_mmu_page_hash_collisions) },
{ NULL }
@@ -360,8 +361,7 @@ EXPORT_SYMBOL_GPL(kvm_set_apic_base);
asmlinkage __visible void kvm_spurious_fault(void)
{
/* Fault while not rebooting. We want the trace. */
- if (!kvm_rebooting)
- BUG();
+ BUG_ON(!kvm_rebooting);
}
EXPORT_SYMBOL_GPL(kvm_spurious_fault);
@@ -1133,13 +1133,15 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
*
- * This list is modified at module load time to reflect the
+ * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
+ * extract the supported MSRs from the related const lists.
+ * msrs_to_save is selected from the msrs_to_save_all to reflect the
* capabilities of the host cpu. This capabilities test skips MSRs that are
- * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs
+ * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
* may depend on host virtualization features rather than host cpu features.
*/
-static u32 msrs_to_save[] = {
+static const u32 msrs_to_save_all[] = {
MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
MSR_STAR,
#ifdef CONFIG_X86_64
@@ -1180,9 +1182,10 @@ static u32 msrs_to_save[] = {
MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17,
};
+static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)];
static unsigned num_msrs_to_save;
-static u32 emulated_msrs[] = {
+static const u32 emulated_msrs_all[] = {
MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
@@ -1221,7 +1224,7 @@ static u32 emulated_msrs[] = {
* by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
* We always support the "true" VMX control MSRs, even if the host
* processor does not, so I am putting these registers here rather
- * than in msrs_to_save.
+ * than in msrs_to_save_all.
*/
MSR_IA32_VMX_BASIC,
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
@@ -1240,13 +1243,14 @@ static u32 emulated_msrs[] = {
MSR_KVM_POLL_CONTROL,
};
+static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)];
static unsigned num_emulated_msrs;
/*
* List of msr numbers which are used to expose MSR-based features that
* can be used by a hypervisor to validate requested CPU features.
*/
-static u32 msr_based_features[] = {
+static const u32 msr_based_features_all[] = {
MSR_IA32_VMX_BASIC,
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
MSR_IA32_VMX_PINBASED_CTLS,
@@ -1271,6 +1275,7 @@ static u32 msr_based_features[] = {
MSR_IA32_ARCH_CAPABILITIES,
};
+static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
static unsigned int num_msr_based_features;
static u64 kvm_get_arch_capabilities(void)
@@ -1281,6 +1286,14 @@ static u64 kvm_get_arch_capabilities(void)
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
/*
+ * If nx_huge_pages is enabled, KVM's shadow paging will ensure that
+ * the nested hypervisor runs with NX huge pages. If it is not,
+ * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other
+ * L1 guests, so it need not worry about its own (L2) guests.
+ */
+ data |= ARCH_CAP_PSCHANGE_MC_NO;
+
+ /*
* If we're doing cache flushes (either "always" or "cond")
* we will do one whenever the guest does a vmlaunch/vmresume.
* If an outer hypervisor is doing the cache flush for us
@@ -1299,6 +1312,25 @@ static u64 kvm_get_arch_capabilities(void)
if (!boot_cpu_has_bug(X86_BUG_MDS))
data |= ARCH_CAP_MDS_NO;
+ /*
+ * On TAA affected systems, export MDS_NO=0 when:
+ * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1.
+ * - Updated microcode is present. This is detected by
+ * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures
+ * that VERW clears CPU buffers.
+ *
+ * When MDS_NO=0 is exported, guests deploy clear CPU buffer
+ * mitigation and don't complain:
+ *
+ * "Vulnerable: Clear CPU buffers attempted, no microcode"
+ *
+ * If TSX is disabled on the system, guests are also mitigated against
+ * TAA and clear CPU buffer mitigation is not required for guests.
+ */
+ if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) &&
+ (data & ARCH_CAP_TSX_CTRL_MSR))
+ data &= ~ARCH_CAP_MDS_NO;
+
return data;
}
@@ -2537,6 +2569,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
static void kvmclock_reset(struct kvm_vcpu *vcpu)
{
vcpu->arch.pv_time_enabled = false;
+ vcpu->arch.time = 0;
}
static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa)
@@ -2702,8 +2735,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_KVM_SYSTEM_TIME: {
struct kvm_arch *ka = &vcpu->kvm->arch;
- kvmclock_reset(vcpu);
-
if (vcpu->vcpu_id == 0 && !msr_info->host_initiated) {
bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
@@ -2717,14 +2748,13 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
/* we verify if the enable bit is set... */
+ vcpu->arch.pv_time_enabled = false;
if (!(data & 1))
break;
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ if (!kvm_gfn_to_hva_cache_init(vcpu->kvm,
&vcpu->arch.pv_time, data & ~1ULL,
sizeof(struct pvclock_vcpu_time_info)))
- vcpu->arch.pv_time_enabled = false;
- else
vcpu->arch.pv_time_enabled = true;
break;
@@ -5093,22 +5123,26 @@ static void kvm_init_msr_list(void)
{
struct x86_pmu_capability x86_pmu;
u32 dummy[2];
- unsigned i, j;
+ unsigned i;
BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4,
- "Please update the fixed PMCs in msrs_to_save[]");
+ "Please update the fixed PMCs in msrs_to_saved_all[]");
perf_get_x86_pmu_capability(&x86_pmu);
- for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
- if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
+ num_msrs_to_save = 0;
+ num_emulated_msrs = 0;
+ num_msr_based_features = 0;
+
+ for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) {
+ if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0)
continue;
/*
* Even MSRs that are valid in the host may not be exposed
* to the guests in some cases.
*/
- switch (msrs_to_save[i]) {
+ switch (msrs_to_save_all[i]) {
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported())
continue;
@@ -5136,17 +5170,17 @@ static void kvm_init_msr_list(void)
break;
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
if (!kvm_x86_ops->pt_supported() ||
- msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
+ msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >=
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
continue;
break;
case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17:
- if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
+ if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >=
min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
continue;
break;
case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17:
- if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
+ if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >=
min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp))
continue;
}
@@ -5154,34 +5188,25 @@ static void kvm_init_msr_list(void)
break;
}
- if (j < i)
- msrs_to_save[j] = msrs_to_save[i];
- j++;
+ msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i];
}
- num_msrs_to_save = j;
- for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
- if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+ for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
+ if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i]))
continue;
- if (j < i)
- emulated_msrs[j] = emulated_msrs[i];
- j++;
+ emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
}
- num_emulated_msrs = j;
- for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) {
+ for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) {
struct kvm_msr_entry msr;
- msr.index = msr_based_features[i];
+ msr.index = msr_based_features_all[i];
if (kvm_get_msr_feature(&msr))
continue;
- if (j < i)
- msr_based_features[j] = msr_based_features[i];
- j++;
+ msr_based_features[num_msr_based_features++] = msr_based_features_all[i];
}
- num_msr_based_features = j;
}
static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
@@ -7941,8 +7966,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
bool req_immediate_exit = false;
if (kvm_request_pending(vcpu)) {
- if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu))
- kvm_x86_ops->get_vmcs12_pages(vcpu);
+ if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) {
+ if (unlikely(!kvm_x86_ops->get_vmcs12_pages(vcpu))) {
+ r = 0;
+ goto out;
+ }
+ }
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
@@ -9427,6 +9456,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
+ INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
atomic_set(&kvm->arch.noncoherent_dma_count, 0);
@@ -9455,6 +9485,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return kvm_x86_ops->vm_init(kvm);
}
+int kvm_arch_post_init_vm(struct kvm *kvm)
+{
+ return kvm_mmu_post_init_vm(kvm);
+}
+
static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
{
vcpu_load(vcpu);
@@ -9556,6 +9591,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
}
EXPORT_SYMBOL_GPL(x86_set_memory_region);
+void kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+ kvm_mmu_pre_destroy_vm(kvm);
+}
+
void kvm_arch_destroy_vm(struct kvm *kvm)
{
if (current->mm == kvm->mm) {
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index b7375dc6898f..c126571e5e2e 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -113,8 +113,8 @@ static void delay_mwaitx(unsigned long __loops)
__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
/*
- * AMD, like Intel, supports the EAX hint and EAX=0xf
- * means, do not enter any deep C-state and we use it
+ * AMD, like Intel's MWAIT version, supports the EAX hint and
+ * EAX=0xf0 means, do not enter any deep C-state and we use it
* here in delay() to minimize wakeup latency.
*/
__mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index c202e1b07e29..425e025341db 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -917,9 +917,6 @@ static void __init kexec_enter_virtual_mode(void)
if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
runtime_code_page_mkexec();
-
- /* clean DUMMY object */
- efi_delete_dummy_variable();
#endif
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 750f46ad018a..205b1176084f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -269,19 +269,41 @@ void xen_reboot(int reason)
BUG();
}
+static int reboot_reason = SHUTDOWN_reboot;
+static bool xen_legacy_crash;
void xen_emergency_restart(void)
{
- xen_reboot(SHUTDOWN_reboot);
+ xen_reboot(reboot_reason);
}
static int
xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
{
- if (!kexec_crash_loaded())
- xen_reboot(SHUTDOWN_crash);
+ if (!kexec_crash_loaded()) {
+ if (xen_legacy_crash)
+ xen_reboot(SHUTDOWN_crash);
+
+ reboot_reason = SHUTDOWN_crash;
+
+ /*
+ * If panic_timeout==0 then we are supposed to wait forever.
+ * However, to preserve original dom0 behavior we have to drop
+ * into hypervisor. (domU behavior is controlled by its
+ * config file)
+ */
+ if (panic_timeout == 0)
+ panic_timeout = -1;
+ }
return NOTIFY_DONE;
}
+static int __init parse_xen_legacy_crash(char *arg)
+{
+ xen_legacy_crash = true;
+ return 0;
+}
+early_param("xen_legacy_crash", parse_xen_legacy_crash);
+
static struct notifier_block xen_panic_block = {
.notifier_call = xen_panic_event,
.priority = INT_MIN
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 58f79ab32358..5bfea374a160 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -117,6 +117,14 @@ static void __init xen_banner(void)
printk(KERN_INFO "Xen version: %d.%d%s%s\n",
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
+
+#ifdef CONFIG_X86_32
+ pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
+ "Support for running as 32-bit PV-guest under Xen will soon be removed\n"
+ "from the Linux kernel!\n"
+ "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
+ "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
+#endif
}
static void __init xen_pv_init_platform(void)
diff --git a/arch/xtensa/boot/dts/virt.dts b/arch/xtensa/boot/dts/virt.dts
index a9dcd87b6eb1..611b98a02a65 100644
--- a/arch/xtensa/boot/dts/virt.dts
+++ b/arch/xtensa/boot/dts/virt.dts
@@ -56,7 +56,7 @@
reg = <0xf0100000 0x03f00000>;
// BUS_ADDRESS(3) CPU_PHYSICAL(1) SIZE(2)
- ranges = <0x01000000 0x0 0xf0000000 0xf0000000 0x0 0x00010000>,
+ ranges = <0x01000000 0x0 0x00000000 0xf0000000 0x0 0x00010000>,
<0x02000000 0x0 0xf4000000 0xf4000000 0x0 0x08000000>;
// PCI_DEVICE(3) INT#(1) CONTROLLER(PHANDLE) CONTROLLER_DATA(2)
diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h
index aeb15f4c755b..be8b2be5a98b 100644
--- a/arch/xtensa/include/asm/bitops.h
+++ b/arch/xtensa/include/asm/bitops.h
@@ -148,7 +148,7 @@ static inline void change_bit(unsigned int bit, volatile unsigned long *p)
" getex %0\n"
" beqz %0, 1b\n"
: "=&a" (tmp)
- : "a" (~mask), "a" (p)
+ : "a" (mask), "a" (p)
: "memory");
}
diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h
index 6792928ba84a..3f80386f1883 100644
--- a/arch/xtensa/include/asm/uaccess.h
+++ b/arch/xtensa/include/asm/uaccess.h
@@ -100,7 +100,7 @@ do { \
case 4: __put_user_asm(x, ptr, retval, 4, "s32i", __cb); break; \
case 8: { \
__typeof__(*ptr) __v64 = x; \
- retval = __copy_to_user(ptr, &__v64, 8); \
+ retval = __copy_to_user(ptr, &__v64, 8) ? -EFAULT : 0; \
break; \
} \
default: __put_user_bad(); \
@@ -132,14 +132,14 @@ do { \
#define __check_align_1 ""
#define __check_align_2 \
- " _bbci.l %3, 0, 1f \n" \
- " movi %0, %4 \n" \
+ " _bbci.l %[addr], 0, 1f \n" \
+ " movi %[err], %[efault] \n" \
" _j 2f \n"
#define __check_align_4 \
- " _bbsi.l %3, 0, 0f \n" \
- " _bbci.l %3, 1, 1f \n" \
- "0: movi %0, %4 \n" \
+ " _bbsi.l %[addr], 0, 0f \n" \
+ " _bbci.l %[addr], 1, 1f \n" \
+ "0: movi %[err], %[efault] \n" \
" _j 2f \n"
@@ -151,40 +151,40 @@ do { \
* WARNING: If you modify this macro at all, verify that the
* __check_align_* macros still work.
*/
-#define __put_user_asm(x, addr, err, align, insn, cb) \
+#define __put_user_asm(x_, addr_, err_, align, insn, cb)\
__asm__ __volatile__( \
__check_align_##align \
- "1: "insn" %2, %3, 0 \n" \
+ "1: "insn" %[x], %[addr], 0 \n" \
"2: \n" \
" .section .fixup,\"ax\" \n" \
" .align 4 \n" \
" .literal_position \n" \
"5: \n" \
- " movi %1, 2b \n" \
- " movi %0, %4 \n" \
- " jx %1 \n" \
+ " movi %[tmp], 2b \n" \
+ " movi %[err], %[efault] \n" \
+ " jx %[tmp] \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
" .long 1b, 5b \n" \
" .previous" \
- :"=r" (err), "=r" (cb) \
- :"r" ((int)(x)), "r" (addr), "i" (-EFAULT), "0" (err))
+ :[err] "+r"(err_), [tmp] "=r"(cb) \
+ :[x] "r"(x_), [addr] "r"(addr_), [efault] "i"(-EFAULT))
#define __get_user_nocheck(x, ptr, size) \
({ \
- long __gu_err, __gu_val; \
- __get_user_size(__gu_val, (ptr), (size), __gu_err); \
- (x) = (__force __typeof__(*(ptr)))__gu_val; \
+ long __gu_err; \
+ __get_user_size((x), (ptr), (size), __gu_err); \
__gu_err; \
})
#define __get_user_check(x, ptr, size) \
({ \
- long __gu_err = -EFAULT, __gu_val = 0; \
+ long __gu_err = -EFAULT; \
const __typeof__(*(ptr)) *__gu_addr = (ptr); \
- if (access_ok(__gu_addr, size)) \
- __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__force __typeof__(*(ptr)))__gu_val; \
+ if (access_ok(__gu_addr, size)) \
+ __get_user_size((x), __gu_addr, (size), __gu_err); \
+ else \
+ (x) = 0; \
__gu_err; \
})
@@ -198,8 +198,17 @@ do { \
case 1: __get_user_asm(x, ptr, retval, 1, "l8ui", __cb); break;\
case 2: __get_user_asm(x, ptr, retval, 2, "l16ui", __cb); break;\
case 4: __get_user_asm(x, ptr, retval, 4, "l32i", __cb); break;\
- case 8: retval = __copy_from_user(&x, ptr, 8); break; \
- default: (x) = __get_user_bad(); \
+ case 8: { \
+ u64 __x; \
+ if (unlikely(__copy_from_user(&__x, ptr, 8))) { \
+ retval = -EFAULT; \
+ (x) = 0; \
+ } else { \
+ (x) = *(__force __typeof__((ptr)))&__x; \
+ } \
+ break; \
+ } \
+ default: (x) = 0; __get_user_bad(); \
} \
} while (0)
@@ -208,25 +217,28 @@ do { \
* WARNING: If you modify this macro at all, verify that the
* __check_align_* macros still work.
*/
-#define __get_user_asm(x, addr, err, align, insn, cb) \
-__asm__ __volatile__( \
- __check_align_##align \
- "1: "insn" %2, %3, 0 \n" \
- "2: \n" \
- " .section .fixup,\"ax\" \n" \
- " .align 4 \n" \
- " .literal_position \n" \
- "5: \n" \
- " movi %1, 2b \n" \
- " movi %2, 0 \n" \
- " movi %0, %4 \n" \
- " jx %1 \n" \
- " .previous \n" \
- " .section __ex_table,\"a\" \n" \
- " .long 1b, 5b \n" \
- " .previous" \
- :"=r" (err), "=r" (cb), "=r" (x) \
- :"r" (addr), "i" (-EFAULT), "0" (err))
+#define __get_user_asm(x_, addr_, err_, align, insn, cb) \
+do { \
+ u32 __x = 0; \
+ __asm__ __volatile__( \
+ __check_align_##align \
+ "1: "insn" %[x], %[addr], 0 \n" \
+ "2: \n" \
+ " .section .fixup,\"ax\" \n" \
+ " .align 4 \n" \
+ " .literal_position \n" \
+ "5: \n" \
+ " movi %[tmp], 2b \n" \
+ " movi %[err], %[efault] \n" \
+ " jx %[tmp] \n" \
+ " .previous \n" \
+ " .section __ex_table,\"a\" \n" \
+ " .long 1b, 5b \n" \
+ " .previous" \
+ :[err] "+r"(err_), [tmp] "=r"(cb), [x] "+r"(__x) \
+ :[addr] "r"(addr_), [efault] "i"(-EFAULT)); \
+ (x_) = (__force __typeof__(*(addr_)))__x; \
+} while (0)
/*
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index 04f19de46700..4092555828b1 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -119,13 +119,6 @@ EXPORT_SYMBOL(__invalidate_icache_range);
// FIXME EXPORT_SYMBOL(screen_info);
#endif
-EXPORT_SYMBOL(outsb);
-EXPORT_SYMBOL(outsw);
-EXPORT_SYMBOL(outsl);
-EXPORT_SYMBOL(insb);
-EXPORT_SYMBOL(insw);
-EXPORT_SYMBOL(insl);
-
extern long common_exception_return;
EXPORT_SYMBOL(common_exception_return);
diff --git a/block/Kconfig b/block/Kconfig
index 41c0917ce622..c23094a14a2b 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -32,6 +32,9 @@ config BLK_RQ_ALLOC_TIME
config BLK_SCSI_REQUEST
bool
+config BLK_CGROUP_RWSTAT
+ bool
+
config BLK_DEV_BSG
bool "Block layer SG support v4"
default y
@@ -86,6 +89,7 @@ config BLK_DEV_ZONED
config BLK_DEV_THROTTLING
bool "Block layer bio throttling support"
depends on BLK_CGROUP=y
+ select BLK_CGROUP_RWSTAT
---help---
Block layer bio throttling support. It can be used to limit
the IO rate to a device. IO rate policies are per cgroup and
diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched
index b89310a022ad..7df14133adc8 100644
--- a/block/Kconfig.iosched
+++ b/block/Kconfig.iosched
@@ -31,6 +31,7 @@ config IOSCHED_BFQ
config BFQ_GROUP_IOSCHED
bool "BFQ hierarchical scheduling support"
depends on IOSCHED_BFQ && BLK_CGROUP
+ select BLK_CGROUP_RWSTAT
---help---
Enable hierarchical scheduling in BFQ, using the blkio
diff --git a/block/Makefile b/block/Makefile
index 9ef57ace90d4..205a5f2fef17 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_BLK_SCSI_REQUEST) += scsi_ioctl.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o
obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o
+obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 86a607cf19a1..cea0ae12f937 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -347,6 +347,14 @@ void bfqg_and_blkg_put(struct bfq_group *bfqg)
bfqg_put(bfqg);
}
+void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq)
+{
+ struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg);
+
+ blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq));
+ blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1);
+}
+
/* @stats = 0 */
static void bfqg_stats_reset(struct bfqg_stats *stats)
{
@@ -431,6 +439,8 @@ void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg)
static void bfqg_stats_exit(struct bfqg_stats *stats)
{
+ blkg_rwstat_exit(&stats->bytes);
+ blkg_rwstat_exit(&stats->ios);
#ifdef CONFIG_BFQ_CGROUP_DEBUG
blkg_rwstat_exit(&stats->merged);
blkg_rwstat_exit(&stats->service_time);
@@ -448,6 +458,10 @@ static void bfqg_stats_exit(struct bfqg_stats *stats)
static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp)
{
+ if (blkg_rwstat_init(&stats->bytes, gfp) ||
+ blkg_rwstat_init(&stats->ios, gfp))
+ return -ENOMEM;
+
#ifdef CONFIG_BFQ_CGROUP_DEBUG
if (blkg_rwstat_init(&stats->merged, gfp) ||
blkg_rwstat_init(&stats->service_time, gfp) ||
@@ -1057,18 +1071,35 @@ static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
return bfq_io_set_device_weight(of, buf, nbytes, off);
}
-#ifdef CONFIG_BFQ_CGROUP_DEBUG
-static int bfqg_print_stat(struct seq_file *sf, void *v)
+static int bfqg_print_rwstat(struct seq_file *sf, void *v)
{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
- &blkcg_policy_bfq, seq_cft(sf)->private, false);
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
+ &blkcg_policy_bfq, seq_cft(sf)->private, true);
return 0;
}
-static int bfqg_print_rwstat(struct seq_file *sf, void *v)
+static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
- &blkcg_policy_bfq, seq_cft(sf)->private, true);
+ struct blkg_rwstat_sample sum;
+
+ blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
+ return __blkg_prfill_rwstat(sf, pd, &sum);
+}
+
+static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
+ seq_cft(sf)->private, true);
+ return 0;
+}
+
+#ifdef CONFIG_BFQ_CGROUP_DEBUG
+static int bfqg_print_stat(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
+ &blkcg_policy_bfq, seq_cft(sf)->private, false);
return 0;
}
@@ -1097,15 +1128,6 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
return __blkg_prfill_u64(sf, pd, sum);
}
-static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- struct blkg_rwstat_sample sum;
-
- blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum);
- return __blkg_prfill_rwstat(sf, pd, &sum);
-}
-
static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
{
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
@@ -1114,18 +1136,11 @@ static int bfqg_print_stat_recursive(struct seq_file *sf, void *v)
return 0;
}
-static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq,
- seq_cft(sf)->private, true);
- return 0;
-}
-
static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd,
int off)
{
- u64 sum = blkg_rwstat_total(&pd->blkg->stat_bytes);
+ struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg);
+ u64 sum = blkg_rwstat_total(&bfqg->stats.bytes);
return __blkg_prfill_u64(sf, pd, sum >> 9);
}
@@ -1142,8 +1157,8 @@ static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf,
{
struct blkg_rwstat_sample tmp;
- blkg_rwstat_recursive_sum(pd->blkg, NULL,
- offsetof(struct blkcg_gq, stat_bytes), &tmp);
+ blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq,
+ offsetof(struct bfq_group, stats.bytes), &tmp);
return __blkg_prfill_u64(sf, pd,
(tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9);
@@ -1226,13 +1241,13 @@ struct cftype bfq_blkcg_legacy_files[] = {
/* statistics, covers only the tasks in the bfqg */
{
.name = "bfq.io_service_bytes",
- .private = (unsigned long)&blkcg_policy_bfq,
- .seq_show = blkg_print_stat_bytes,
+ .private = offsetof(struct bfq_group, stats.bytes),
+ .seq_show = bfqg_print_rwstat,
},
{
.name = "bfq.io_serviced",
- .private = (unsigned long)&blkcg_policy_bfq,
- .seq_show = blkg_print_stat_ios,
+ .private = offsetof(struct bfq_group, stats.ios),
+ .seq_show = bfqg_print_rwstat,
},
#ifdef CONFIG_BFQ_CGROUP_DEBUG
{
@@ -1269,13 +1284,13 @@ struct cftype bfq_blkcg_legacy_files[] = {
/* the same statistics which cover the bfqg and its descendants */
{
.name = "bfq.io_service_bytes_recursive",
- .private = (unsigned long)&blkcg_policy_bfq,
- .seq_show = blkg_print_stat_bytes_recursive,
+ .private = offsetof(struct bfq_group, stats.bytes),
+ .seq_show = bfqg_print_rwstat_recursive,
},
{
.name = "bfq.io_serviced_recursive",
- .private = (unsigned long)&blkcg_policy_bfq,
- .seq_show = blkg_print_stat_ios_recursive,
+ .private = offsetof(struct bfq_group, stats.ios),
+ .seq_show = bfqg_print_rwstat_recursive,
},
#ifdef CONFIG_BFQ_CGROUP_DEBUG
{
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0319d6339822..ad4af4aaf2ce 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -2713,6 +2713,28 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq)
}
}
+
+static
+void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq)
+{
+ /*
+ * To prevent bfqq's service guarantees from being violated,
+ * bfqq may be left busy, i.e., queued for service, even if
+ * empty (see comments in __bfq_bfqq_expire() for
+ * details). But, if no process will send requests to bfqq any
+ * longer, then there is no point in keeping bfqq queued for
+ * service. In addition, keeping bfqq queued for service, but
+ * with no process ref any longer, may have caused bfqq to be
+ * freed when dequeued from service. But this is assumed to
+ * never happen.
+ */
+ if (bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) &&
+ bfqq != bfqd->in_service_queue)
+ bfq_del_bfqq_busy(bfqd, bfqq, false);
+
+ bfq_put_queue(bfqq);
+}
+
static void
bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
struct bfq_queue *bfqq, struct bfq_queue *new_bfqq)
@@ -2783,8 +2805,7 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic,
*/
new_bfqq->pid = -1;
bfqq->bic = NULL;
- /* release process reference to bfqq */
- bfq_put_queue(bfqq);
+ bfq_release_process_ref(bfqd, bfqq);
}
static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq,
@@ -4899,7 +4920,7 @@ static void bfq_exit_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq)
bfq_put_cooperator(bfqq);
- bfq_put_queue(bfqq); /* release process reference */
+ bfq_release_process_ref(bfqd, bfqq);
}
static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync)
@@ -5001,8 +5022,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio)
bfqq = bic_to_bfqq(bic, false);
if (bfqq) {
- /* release process reference on this queue */
- bfq_put_queue(bfqq);
+ bfq_release_process_ref(bfqd, bfqq);
bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic);
bic_set_bfqq(bic, bfqq, false);
}
@@ -5464,6 +5484,10 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool idle_timer_disabled = false;
unsigned int cmd_flags;
+#ifdef CONFIG_BFQ_GROUP_IOSCHED
+ if (!cgroup_subsys_on_dfl(io_cgrp_subsys) && rq->bio)
+ bfqg_stats_update_legacy_io(q, rq);
+#endif
spin_lock_irq(&bfqd->lock);
if (blk_mq_sched_try_insert_merge(q, rq)) {
spin_unlock_irq(&bfqd->lock);
@@ -5963,7 +5987,7 @@ bfq_split_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq)
bfq_put_cooperator(bfqq);
- bfq_put_queue(bfqq);
+ bfq_release_process_ref(bfqq->bfqd, bfqq);
return NULL;
}
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 5d1a519640f6..8526f20c53bc 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -10,6 +10,8 @@
#include <linux/hrtimer.h>
#include <linux/blk-cgroup.h>
+#include "blk-cgroup-rwstat.h"
+
#define BFQ_IOPRIO_CLASSES 3
#define BFQ_CL_IDLE_TIMEOUT (HZ/5)
@@ -809,6 +811,9 @@ struct bfq_stat {
};
struct bfqg_stats {
+ /* basic stats */
+ struct blkg_rwstat bytes;
+ struct blkg_rwstat ios;
#ifdef CONFIG_BFQ_CGROUP_DEBUG
/* number of ios merged */
struct blkg_rwstat merged;
@@ -956,6 +961,7 @@ void bfq_put_async_queues(struct bfq_data *bfqd, struct bfq_group *bfqg);
/* ---------------- cgroups-support interface ---------------- */
+void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq);
void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq,
unsigned int op);
void bfqg_stats_update_io_remove(struct bfq_group *bfqg, unsigned int op);
@@ -1062,6 +1068,8 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
char pid_str[MAX_PID_STR_LENGTH]; \
+ if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \
+ break; \
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \
blk_add_cgroup_trace_msg((bfqd)->queue, \
bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
@@ -1078,6 +1086,8 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
char pid_str[MAX_PID_STR_LENGTH]; \
+ if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \
+ break; \
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \
blk_add_trace_msg((bfqd)->queue, "bfq%s%c " fmt, pid_str, \
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
diff --git a/block/bio.c b/block/bio.c
index 8f0ed6228fc5..b1170ec18464 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -751,7 +751,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return false;
- if (bio->bi_vcnt > 0) {
+ if (bio->bi_vcnt > 0 && !bio_full(bio, len)) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (page_is_mergeable(bv, page, len, off, same_page)) {
diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c
new file mode 100644
index 000000000000..85d5790ac49b
--- /dev/null
+++ b/block/blk-cgroup-rwstat.c
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT.
+ * Do not use in new code.
+ */
+#include "blk-cgroup-rwstat.h"
+
+int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
+{
+ int i, ret;
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++) {
+ ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
+ if (ret) {
+ while (--i >= 0)
+ percpu_counter_destroy(&rwstat->cpu_cnt[i]);
+ return ret;
+ }
+ atomic64_set(&rwstat->aux_cnt[i], 0);
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(blkg_rwstat_init);
+
+void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
+{
+ int i;
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++)
+ percpu_counter_destroy(&rwstat->cpu_cnt[i]);
+}
+EXPORT_SYMBOL_GPL(blkg_rwstat_exit);
+
+/**
+ * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
+ * @sf: seq_file to print to
+ * @pd: policy private data of interest
+ * @rwstat: rwstat to print
+ *
+ * Print @rwstat to @sf for the device assocaited with @pd.
+ */
+u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+ const struct blkg_rwstat_sample *rwstat)
+{
+ static const char *rwstr[] = {
+ [BLKG_RWSTAT_READ] = "Read",
+ [BLKG_RWSTAT_WRITE] = "Write",
+ [BLKG_RWSTAT_SYNC] = "Sync",
+ [BLKG_RWSTAT_ASYNC] = "Async",
+ [BLKG_RWSTAT_DISCARD] = "Discard",
+ };
+ const char *dname = blkg_dev_name(pd->blkg);
+ u64 v;
+ int i;
+
+ if (!dname)
+ return 0;
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++)
+ seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
+ rwstat->cnt[i]);
+
+ v = rwstat->cnt[BLKG_RWSTAT_READ] +
+ rwstat->cnt[BLKG_RWSTAT_WRITE] +
+ rwstat->cnt[BLKG_RWSTAT_DISCARD];
+ seq_printf(sf, "%s Total %llu\n", dname, v);
+ return v;
+}
+EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
+
+/**
+ * blkg_prfill_rwstat - prfill callback for blkg_rwstat
+ * @sf: seq_file to print to
+ * @pd: policy private data of interest
+ * @off: offset to the blkg_rwstat in @pd
+ *
+ * prfill callback for printing a blkg_rwstat.
+ */
+u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+ int off)
+{
+ struct blkg_rwstat_sample rwstat = { };
+
+ blkg_rwstat_read((void *)pd + off, &rwstat);
+ return __blkg_prfill_rwstat(sf, pd, &rwstat);
+}
+EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
+
+/**
+ * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
+ * @blkg: blkg of interest
+ * @pol: blkcg_policy which contains the blkg_rwstat
+ * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
+ * @sum: blkg_rwstat_sample structure containing the results
+ *
+ * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
+ * online descendants and their aux counts. The caller must be holding the
+ * queue lock for online tests.
+ *
+ * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
+ * is at @off bytes into @blkg's blkg_policy_data of the policy.
+ */
+void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
+ int off, struct blkg_rwstat_sample *sum)
+{
+ struct blkcg_gq *pos_blkg;
+ struct cgroup_subsys_state *pos_css;
+ unsigned int i;
+
+ lockdep_assert_held(&blkg->q->queue_lock);
+
+ rcu_read_lock();
+ blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
+ struct blkg_rwstat *rwstat;
+
+ if (!pos_blkg->online)
+ continue;
+
+ if (pol)
+ rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
+ else
+ rwstat = (void *)pos_blkg + off;
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++)
+ sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
diff --git a/block/blk-cgroup-rwstat.h b/block/blk-cgroup-rwstat.h
new file mode 100644
index 000000000000..ee746919c41f
--- /dev/null
+++ b/block/blk-cgroup-rwstat.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Legacy blkg rwstat helpers enabled by CONFIG_BLK_CGROUP_RWSTAT.
+ * Do not use in new code.
+ */
+#ifndef _BLK_CGROUP_RWSTAT_H
+#define _BLK_CGROUP_RWSTAT_H
+
+#include <linux/blk-cgroup.h>
+
+enum blkg_rwstat_type {
+ BLKG_RWSTAT_READ,
+ BLKG_RWSTAT_WRITE,
+ BLKG_RWSTAT_SYNC,
+ BLKG_RWSTAT_ASYNC,
+ BLKG_RWSTAT_DISCARD,
+
+ BLKG_RWSTAT_NR,
+ BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
+};
+
+/*
+ * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
+ * recursive. Used to carry stats of dead children.
+ */
+struct blkg_rwstat {
+ struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
+ atomic64_t aux_cnt[BLKG_RWSTAT_NR];
+};
+
+struct blkg_rwstat_sample {
+ u64 cnt[BLKG_RWSTAT_NR];
+};
+
+static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat,
+ unsigned int idx)
+{
+ return atomic64_read(&rwstat->aux_cnt[idx]) +
+ percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]);
+}
+
+int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp);
+void blkg_rwstat_exit(struct blkg_rwstat *rwstat);
+u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+ const struct blkg_rwstat_sample *rwstat);
+u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+ int off);
+void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
+ int off, struct blkg_rwstat_sample *sum);
+
+
+/**
+ * blkg_rwstat_add - add a value to a blkg_rwstat
+ * @rwstat: target blkg_rwstat
+ * @op: REQ_OP and flags
+ * @val: value to add
+ *
+ * Add @val to @rwstat. The counters are chosen according to @rw. The
+ * caller is responsible for synchronizing calls to this function.
+ */
+static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
+ unsigned int op, uint64_t val)
+{
+ struct percpu_counter *cnt;
+
+ if (op_is_discard(op))
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
+ else if (op_is_write(op))
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
+ else
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
+
+ percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
+
+ if (op_is_sync(op))
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
+ else
+ cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
+
+ percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
+}
+
+/**
+ * blkg_rwstat_read - read the current values of a blkg_rwstat
+ * @rwstat: blkg_rwstat to read
+ *
+ * Read the current snapshot of @rwstat and return it in the aux counts.
+ */
+static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat,
+ struct blkg_rwstat_sample *result)
+{
+ int i;
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++)
+ result->cnt[i] =
+ percpu_counter_sum_positive(&rwstat->cpu_cnt[i]);
+}
+
+/**
+ * blkg_rwstat_total - read the total count of a blkg_rwstat
+ * @rwstat: blkg_rwstat to read
+ *
+ * Return the total count of @rwstat regardless of the IO direction. This
+ * function can be called without synchronization and takes care of u64
+ * atomicity.
+ */
+static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
+{
+ struct blkg_rwstat_sample tmp = { };
+
+ blkg_rwstat_read(rwstat, &tmp);
+ return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
+}
+
+/**
+ * blkg_rwstat_reset - reset a blkg_rwstat
+ * @rwstat: blkg_rwstat to reset
+ */
+static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
+{
+ int i;
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++) {
+ percpu_counter_set(&rwstat->cpu_cnt[i], 0);
+ atomic64_set(&rwstat->aux_cnt[i], 0);
+ }
+}
+
+/**
+ * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
+ * @to: the destination blkg_rwstat
+ * @from: the source
+ *
+ * Add @from's count including the aux one to @to's aux count.
+ */
+static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
+ struct blkg_rwstat *from)
+{
+ u64 sum[BLKG_RWSTAT_NR];
+ int i;
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++)
+ sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++)
+ atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
+ &to->aux_cnt[i]);
+}
+#endif /* _BLK_CGROUP_RWSTAT_H */
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index b6f20be0fc78..708dea92dac8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -80,8 +80,7 @@ static void blkg_free(struct blkcg_gq *blkg)
if (blkg->pd[i])
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
- blkg_rwstat_exit(&blkg->stat_ios);
- blkg_rwstat_exit(&blkg->stat_bytes);
+ free_percpu(blkg->iostat_cpu);
percpu_ref_exit(&blkg->refcnt);
kfree(blkg);
}
@@ -146,7 +145,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
gfp_t gfp_mask)
{
struct blkcg_gq *blkg;
- int i;
+ int i, cpu;
/* alloc and init base part */
blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node);
@@ -156,8 +155,8 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
if (percpu_ref_init(&blkg->refcnt, blkg_release, 0, gfp_mask))
goto err_free;
- if (blkg_rwstat_init(&blkg->stat_bytes, gfp_mask) ||
- blkg_rwstat_init(&blkg->stat_ios, gfp_mask))
+ blkg->iostat_cpu = alloc_percpu_gfp(struct blkg_iostat_set, gfp_mask);
+ if (!blkg->iostat_cpu)
goto err_free;
blkg->q = q;
@@ -167,6 +166,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
blkg->blkcg = blkcg;
+ u64_stats_init(&blkg->iostat.sync);
+ for_each_possible_cpu(cpu)
+ u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
+
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
struct blkg_policy_data *pd;
@@ -393,7 +396,6 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
static void blkg_destroy(struct blkcg_gq *blkg)
{
struct blkcg *blkcg = blkg->blkcg;
- struct blkcg_gq *parent = blkg->parent;
int i;
lockdep_assert_held(&blkg->q->queue_lock);
@@ -410,11 +412,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
pol->pd_offline_fn(blkg->pd[i]);
}
- if (parent) {
- blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
- blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
- }
-
blkg->online = false;
radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
@@ -464,7 +461,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
{
struct blkcg *blkcg = css_to_blkcg(css);
struct blkcg_gq *blkg;
- int i;
+ int i, cpu;
mutex_lock(&blkcg_pol_mutex);
spin_lock_irq(&blkcg->lock);
@@ -475,8 +472,12 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
* anyway. If you get hit by a race, retry.
*/
hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
- blkg_rwstat_reset(&blkg->stat_bytes);
- blkg_rwstat_reset(&blkg->stat_ios);
+ for_each_possible_cpu(cpu) {
+ struct blkg_iostat_set *bis =
+ per_cpu_ptr(blkg->iostat_cpu, cpu);
+ memset(bis, 0, sizeof(*bis));
+ }
+ memset(&blkg->iostat, 0, sizeof(blkg->iostat));
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
@@ -560,186 +561,6 @@ u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
}
EXPORT_SYMBOL_GPL(__blkg_prfill_u64);
-/**
- * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
- * @sf: seq_file to print to
- * @pd: policy private data of interest
- * @rwstat: rwstat to print
- *
- * Print @rwstat to @sf for the device assocaited with @pd.
- */
-u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
- const struct blkg_rwstat_sample *rwstat)
-{
- static const char *rwstr[] = {
- [BLKG_RWSTAT_READ] = "Read",
- [BLKG_RWSTAT_WRITE] = "Write",
- [BLKG_RWSTAT_SYNC] = "Sync",
- [BLKG_RWSTAT_ASYNC] = "Async",
- [BLKG_RWSTAT_DISCARD] = "Discard",
- };
- const char *dname = blkg_dev_name(pd->blkg);
- u64 v;
- int i;
-
- if (!dname)
- return 0;
-
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
- seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
- rwstat->cnt[i]);
-
- v = rwstat->cnt[BLKG_RWSTAT_READ] +
- rwstat->cnt[BLKG_RWSTAT_WRITE] +
- rwstat->cnt[BLKG_RWSTAT_DISCARD];
- seq_printf(sf, "%s Total %llu\n", dname, v);
- return v;
-}
-EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat);
-
-/**
- * blkg_prfill_rwstat - prfill callback for blkg_rwstat
- * @sf: seq_file to print to
- * @pd: policy private data of interest
- * @off: offset to the blkg_rwstat in @pd
- *
- * prfill callback for printing a blkg_rwstat.
- */
-u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
- int off)
-{
- struct blkg_rwstat_sample rwstat = { };
-
- blkg_rwstat_read((void *)pd + off, &rwstat);
- return __blkg_prfill_rwstat(sf, pd, &rwstat);
-}
-EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
-
-static u64 blkg_prfill_rwstat_field(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- struct blkg_rwstat_sample rwstat = { };
-
- blkg_rwstat_read((void *)pd->blkg + off, &rwstat);
- return __blkg_prfill_rwstat(sf, pd, &rwstat);
-}
-
-/**
- * blkg_print_stat_bytes - seq_show callback for blkg->stat_bytes
- * @sf: seq_file to print to
- * @v: unused
- *
- * To be used as cftype->seq_show to print blkg->stat_bytes.
- * cftype->private must be set to the blkcg_policy.
- */
-int blkg_print_stat_bytes(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
- offsetof(struct blkcg_gq, stat_bytes), true);
- return 0;
-}
-EXPORT_SYMBOL_GPL(blkg_print_stat_bytes);
-
-/**
- * blkg_print_stat_bytes - seq_show callback for blkg->stat_ios
- * @sf: seq_file to print to
- * @v: unused
- *
- * To be used as cftype->seq_show to print blkg->stat_ios. cftype->private
- * must be set to the blkcg_policy.
- */
-int blkg_print_stat_ios(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- blkg_prfill_rwstat_field, (void *)seq_cft(sf)->private,
- offsetof(struct blkcg_gq, stat_ios), true);
- return 0;
-}
-EXPORT_SYMBOL_GPL(blkg_print_stat_ios);
-
-static u64 blkg_prfill_rwstat_field_recursive(struct seq_file *sf,
- struct blkg_policy_data *pd,
- int off)
-{
- struct blkg_rwstat_sample rwstat;
-
- blkg_rwstat_recursive_sum(pd->blkg, NULL, off, &rwstat);
- return __blkg_prfill_rwstat(sf, pd, &rwstat);
-}
-
-/**
- * blkg_print_stat_bytes_recursive - recursive version of blkg_print_stat_bytes
- * @sf: seq_file to print to
- * @v: unused
- */
-int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- blkg_prfill_rwstat_field_recursive,
- (void *)seq_cft(sf)->private,
- offsetof(struct blkcg_gq, stat_bytes), true);
- return 0;
-}
-EXPORT_SYMBOL_GPL(blkg_print_stat_bytes_recursive);
-
-/**
- * blkg_print_stat_ios_recursive - recursive version of blkg_print_stat_ios
- * @sf: seq_file to print to
- * @v: unused
- */
-int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v)
-{
- blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
- blkg_prfill_rwstat_field_recursive,
- (void *)seq_cft(sf)->private,
- offsetof(struct blkcg_gq, stat_ios), true);
- return 0;
-}
-EXPORT_SYMBOL_GPL(blkg_print_stat_ios_recursive);
-
-/**
- * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
- * @blkg: blkg of interest
- * @pol: blkcg_policy which contains the blkg_rwstat
- * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
- * @sum: blkg_rwstat_sample structure containing the results
- *
- * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
- * online descendants and their aux counts. The caller must be holding the
- * queue lock for online tests.
- *
- * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
- * is at @off bytes into @blkg's blkg_policy_data of the policy.
- */
-void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
- int off, struct blkg_rwstat_sample *sum)
-{
- struct blkcg_gq *pos_blkg;
- struct cgroup_subsys_state *pos_css;
- unsigned int i;
-
- lockdep_assert_held(&blkg->q->queue_lock);
-
- rcu_read_lock();
- blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
- struct blkg_rwstat *rwstat;
-
- if (!pos_blkg->online)
- continue;
-
- if (pol)
- rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
- else
- rwstat = (void *)pos_blkg + off;
-
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
- sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
- }
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
-
/* Performs queue bypass and policy enabled checks then looks up blkg. */
static struct blkcg_gq *blkg_lookup_check(struct blkcg *blkcg,
const struct blkcg_policy *pol,
@@ -923,20 +744,27 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
struct blkcg_gq *blkg;
+ cgroup_rstat_flush(blkcg->css.cgroup);
rcu_read_lock();
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+ struct blkg_iostat_set *bis = &blkg->iostat;
const char *dname;
char *buf;
- struct blkg_rwstat_sample rwstat;
u64 rbytes, wbytes, rios, wios, dbytes, dios;
size_t size = seq_get_buf(sf, &buf), off = 0;
int i;
bool has_stats = false;
+ unsigned seq;
+
+ spin_lock_irq(&blkg->q->queue_lock);
+
+ if (!blkg->online)
+ goto skip;
dname = blkg_dev_name(blkg);
if (!dname)
- continue;
+ goto skip;
/*
* Hooray string manipulation, count is the size written NOT
@@ -946,21 +774,16 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
*/
off += scnprintf(buf+off, size-off, "%s ", dname);
- spin_lock_irq(&blkg->q->queue_lock);
-
- blkg_rwstat_recursive_sum(blkg, NULL,
- offsetof(struct blkcg_gq, stat_bytes), &rwstat);
- rbytes = rwstat.cnt[BLKG_RWSTAT_READ];
- wbytes = rwstat.cnt[BLKG_RWSTAT_WRITE];
- dbytes = rwstat.cnt[BLKG_RWSTAT_DISCARD];
+ do {
+ seq = u64_stats_fetch_begin(&bis->sync);
- blkg_rwstat_recursive_sum(blkg, NULL,
- offsetof(struct blkcg_gq, stat_ios), &rwstat);
- rios = rwstat.cnt[BLKG_RWSTAT_READ];
- wios = rwstat.cnt[BLKG_RWSTAT_WRITE];
- dios = rwstat.cnt[BLKG_RWSTAT_DISCARD];
-
- spin_unlock_irq(&blkg->q->queue_lock);
+ rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
+ wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
+ dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
+ rios = bis->cur.ios[BLKG_IOSTAT_READ];
+ wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
+ dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
+ } while (u64_stats_fetch_retry(&bis->sync, seq));
if (rbytes || wbytes || rios || wios) {
has_stats = true;
@@ -999,6 +822,8 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
seq_commit(sf, -1);
}
}
+ skip:
+ spin_unlock_irq(&blkg->q->queue_lock);
}
rcu_read_unlock();
@@ -1294,6 +1119,77 @@ static int blkcg_can_attach(struct cgroup_taskset *tset)
return ret;
}
+static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+ int i;
+
+ for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+ dst->bytes[i] = src->bytes[i];
+ dst->ios[i] = src->ios[i];
+ }
+}
+
+static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+ int i;
+
+ for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+ dst->bytes[i] += src->bytes[i];
+ dst->ios[i] += src->ios[i];
+ }
+}
+
+static void blkg_iostat_sub(struct blkg_iostat *dst, struct blkg_iostat *src)
+{
+ int i;
+
+ for (i = 0; i < BLKG_IOSTAT_NR; i++) {
+ dst->bytes[i] -= src->bytes[i];
+ dst->ios[i] -= src->ios[i];
+ }
+}
+
+static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
+{
+ struct blkcg *blkcg = css_to_blkcg(css);
+ struct blkcg_gq *blkg;
+
+ rcu_read_lock();
+
+ hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
+ struct blkcg_gq *parent = blkg->parent;
+ struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu);
+ struct blkg_iostat cur, delta;
+ unsigned seq;
+
+ /* fetch the current per-cpu values */
+ do {
+ seq = u64_stats_fetch_begin(&bisc->sync);
+ blkg_iostat_set(&cur, &bisc->cur);
+ } while (u64_stats_fetch_retry(&bisc->sync, seq));
+
+ /* propagate percpu delta to global */
+ u64_stats_update_begin(&blkg->iostat.sync);
+ blkg_iostat_set(&delta, &cur);
+ blkg_iostat_sub(&delta, &bisc->last);
+ blkg_iostat_add(&blkg->iostat.cur, &delta);
+ blkg_iostat_add(&bisc->last, &delta);
+ u64_stats_update_end(&blkg->iostat.sync);
+
+ /* propagate global delta to parent */
+ if (parent) {
+ u64_stats_update_begin(&parent->iostat.sync);
+ blkg_iostat_set(&delta, &blkg->iostat.cur);
+ blkg_iostat_sub(&delta, &blkg->iostat.last);
+ blkg_iostat_add(&parent->iostat.cur, &delta);
+ blkg_iostat_add(&blkg->iostat.last, &delta);
+ u64_stats_update_end(&parent->iostat.sync);
+ }
+ }
+
+ rcu_read_unlock();
+}
+
static void blkcg_bind(struct cgroup_subsys_state *root_css)
{
int i;
@@ -1326,6 +1222,7 @@ struct cgroup_subsys io_cgrp_subsys = {
.css_offline = blkcg_css_offline,
.css_free = blkcg_css_free,
.can_attach = blkcg_can_attach,
+ .css_rstat_flush = blkcg_rstat_flush,
.bind = blkcg_bind,
.dfl_cftypes = blkcg_files,
.legacy_cftypes = blkcg_legacy_files,
@@ -1362,7 +1259,7 @@ int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol)
{
struct blkg_policy_data *pd_prealloc = NULL;
- struct blkcg_gq *blkg;
+ struct blkcg_gq *blkg, *pinned_blkg = NULL;
int ret;
if (blkcg_policy_enabled(q, pol))
@@ -1370,49 +1267,82 @@ int blkcg_activate_policy(struct request_queue *q,
if (queue_is_mq(q))
blk_mq_freeze_queue(q);
-pd_prealloc:
- if (!pd_prealloc) {
- pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
- if (!pd_prealloc) {
- ret = -ENOMEM;
- goto out_bypass_end;
- }
- }
-
+retry:
spin_lock_irq(&q->queue_lock);
- /* blkg_list is pushed at the head, reverse walk to init parents first */
+ /* blkg_list is pushed at the head, reverse walk to allocate parents first */
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
struct blkg_policy_data *pd;
if (blkg->pd[pol->plid])
continue;
- pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root);
- if (!pd)
- swap(pd, pd_prealloc);
+ /* If prealloc matches, use it; otherwise try GFP_NOWAIT */
+ if (blkg == pinned_blkg) {
+ pd = pd_prealloc;
+ pd_prealloc = NULL;
+ } else {
+ pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
+ blkg->blkcg);
+ }
+
if (!pd) {
+ /*
+ * GFP_NOWAIT failed. Free the existing one and
+ * prealloc for @blkg w/ GFP_KERNEL.
+ */
+ if (pinned_blkg)
+ blkg_put(pinned_blkg);
+ blkg_get(blkg);
+ pinned_blkg = blkg;
+
spin_unlock_irq(&q->queue_lock);
- goto pd_prealloc;
+
+ if (pd_prealloc)
+ pol->pd_free_fn(pd_prealloc);
+ pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
+ blkg->blkcg);
+ if (pd_prealloc)
+ goto retry;
+ else
+ goto enomem;
}
blkg->pd[pol->plid] = pd;
pd->blkg = blkg;
pd->plid = pol->plid;
- if (pol->pd_init_fn)
- pol->pd_init_fn(pd);
}
+ /* all allocated, init in the same order */
+ if (pol->pd_init_fn)
+ list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
+ pol->pd_init_fn(blkg->pd[pol->plid]);
+
__set_bit(pol->plid, q->blkcg_pols);
ret = 0;
spin_unlock_irq(&q->queue_lock);
-out_bypass_end:
+out:
if (queue_is_mq(q))
blk_mq_unfreeze_queue(q);
+ if (pinned_blkg)
+ blkg_put(pinned_blkg);
if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
return ret;
+
+enomem:
+ /* alloc failed, nothing's initialized yet, free everything */
+ spin_lock_irq(&q->queue_lock);
+ list_for_each_entry(blkg, &q->blkg_list, q_node) {
+ if (blkg->pd[pol->plid]) {
+ pol->pd_free_fn(blkg->pd[pol->plid]);
+ blkg->pd[pol->plid] = NULL;
+ }
+ }
+ spin_unlock_irq(&q->queue_lock);
+ ret = -ENOMEM;
+ goto out;
}
EXPORT_SYMBOL_GPL(blkcg_activate_policy);
diff --git a/block/blk-core.c b/block/blk-core.c
index d5e668ec751b..a1e228752083 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -132,6 +132,9 @@ static const char *const blk_op_name[] = {
REQ_OP_NAME(SECURE_ERASE),
REQ_OP_NAME(ZONE_RESET),
REQ_OP_NAME(ZONE_RESET_ALL),
+ REQ_OP_NAME(ZONE_OPEN),
+ REQ_OP_NAME(ZONE_CLOSE),
+ REQ_OP_NAME(ZONE_FINISH),
REQ_OP_NAME(WRITE_SAME),
REQ_OP_NAME(WRITE_ZEROES),
REQ_OP_NAME(SCSI_IN),
@@ -336,14 +339,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
*/
void blk_cleanup_queue(struct request_queue *q)
{
+ WARN_ON_ONCE(blk_queue_registered(q));
+
/* mark @q DYING, no new request or merges will be allowed afterwards */
- mutex_lock(&q->sysfs_lock);
blk_set_queue_dying(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_DYING, q);
- mutex_unlock(&q->sysfs_lock);
/*
* Drain all requests queued before DYING marking. Set DEAD flag to
@@ -848,11 +851,7 @@ static inline int blk_partition_remap(struct bio *bio)
if (unlikely(bio_check_ro(bio, p)))
goto out;
- /*
- * Zone reset does not include bi_size so bio_sectors() is always 0.
- * Include a test for the reset op code and perform the remap if needed.
- */
- if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
+ if (bio_sectors(bio)) {
if (bio_check_eod(bio, part_nr_sects_read(p)))
goto out;
bio->bi_iter.bi_sector += p->start_sect;
@@ -936,6 +935,9 @@ generic_make_request_checks(struct bio *bio)
goto not_supported;
break;
case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
if (!blk_queue_is_zoned(q))
goto not_supported;
break;
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 1db44ca0f4a6..e20a852ae432 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -55,6 +55,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
rq->rq_disk = bd_disk;
rq->end_io = done;
+ blk_account_io_start(rq, true);
+
/*
* don't check dying flag for MQ because the request won't
* be reused after dying flag is set
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 1eec9cbe5a0a..1777346baf06 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -136,6 +136,17 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front)
blk_mq_add_to_requeue_list(rq, add_front, true);
}
+static void blk_account_io_flush(struct request *rq)
+{
+ struct hd_struct *part = &rq->rq_disk->part0;
+
+ part_stat_lock();
+ part_stat_inc(part, ios[STAT_FLUSH]);
+ part_stat_add(part, nsecs[STAT_FLUSH],
+ ktime_get_ns() - rq->start_time_ns);
+ part_stat_unlock();
+}
+
/**
* blk_flush_complete_seq - complete flush sequence
* @rq: PREFLUSH/FUA request being sequenced
@@ -185,7 +196,7 @@ static void blk_flush_complete_seq(struct request *rq,
case REQ_FSEQ_DONE:
/*
- * @rq was previously adjusted by blk_flush_issue() for
+ * @rq was previously adjusted by blk_insert_flush() for
* flush sequencing and may already have gone through the
* flush data request completion path. Restore @rq for
* normal completion and end it.
@@ -212,6 +223,8 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
struct blk_mq_hw_ctx *hctx;
+ blk_account_io_flush(flush_rq);
+
/* release the tag's ownership to the req cloned from */
spin_lock_irqsave(&fq->mq_flush_lock, flags);
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 2a3db80c1dce..e01267f99183 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1057,9 +1057,12 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
atomic64_set(&iocg->active_period, cur_period);
/* already activated or breaking leaf-only constraint? */
- for (i = iocg->level; i > 0; i--)
- if (!list_empty(&iocg->active_list))
+ if (!list_empty(&iocg->active_list))
+ goto succeed_unlock;
+ for (i = iocg->level - 1; i > 0; i--)
+ if (!list_empty(&iocg->ancestors[i]->active_list))
goto fail_unlock;
+
if (iocg->child_active_sum)
goto fail_unlock;
@@ -1101,6 +1104,7 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
ioc_start_period(ioc, now);
}
+succeed_unlock:
spin_unlock_irq(&ioc->lock);
return true;
@@ -2110,10 +2114,10 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
goto einval;
}
- spin_lock_irq(&iocg->ioc->lock);
+ spin_lock(&iocg->ioc->lock);
iocg->cfg_weight = v;
weight_updated(iocg);
- spin_unlock_irq(&iocg->ioc->lock);
+ spin_unlock(&iocg->ioc->lock);
blkg_conf_finish(&ctx);
return nbytes;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 48e6725b32ee..d783bdc4559b 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -293,7 +293,7 @@ split:
void __blk_queue_split(struct request_queue *q, struct bio **bio,
unsigned int *nr_segs)
{
- struct bio *split;
+ struct bio *split = NULL;
switch (bio_op(*bio)) {
case REQ_OP_DISCARD:
@@ -309,6 +309,21 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
nr_segs);
break;
default:
+ /*
+ * All drivers must accept single-segments bios that are <=
+ * PAGE_SIZE. This is a quick and dirty check that relies on
+ * the fact that bi_io_vec[0] is always valid if a bio has data.
+ * The check might lead to occasional false negatives when bios
+ * are cloned, but compared to the performance impact of cloned
+ * bios themselves the loop below doesn't matter anyway.
+ */
+ if (!q->limits.chunk_sectors &&
+ (*bio)->bi_vcnt == 1 &&
+ ((*bio)->bi_io_vec[0].bv_len +
+ (*bio)->bi_io_vec[0].bv_offset) <= PAGE_SIZE) {
+ *nr_segs = 1;
+ break;
+ }
split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
break;
}
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index a0d3ce30fa08..062229395a50 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -74,10 +74,8 @@ static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
if (!entry->show)
return -EIO;
- res = -ENOENT;
mutex_lock(&q->sysfs_lock);
- if (!blk_queue_dying(q))
- res = entry->show(ctx, page);
+ res = entry->show(ctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
}
@@ -97,10 +95,8 @@ static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
if (!entry->store)
return -EIO;
- res = -ENOENT;
mutex_lock(&q->sysfs_lock);
- if (!blk_queue_dying(q))
- res = entry->store(ctx, page, length);
+ res = entry->store(ctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
}
@@ -120,10 +116,8 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
if (!entry->show)
return -EIO;
- res = -ENOENT;
mutex_lock(&q->sysfs_lock);
- if (!blk_queue_dying(q))
- res = entry->show(hctx, page);
+ res = entry->show(hctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
}
@@ -144,10 +138,8 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
if (!entry->store)
return -EIO;
- res = -ENOENT;
mutex_lock(&q->sysfs_lock);
- if (!blk_queue_dying(q))
- res = entry->store(hctx, page, length);
+ res = entry->store(hctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
}
@@ -166,20 +158,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
{
+ const size_t size = PAGE_SIZE - 1;
unsigned int i, first = 1;
- ssize_t ret = 0;
+ int ret = 0, pos = 0;
for_each_cpu(i, hctx->cpumask) {
if (first)
- ret += sprintf(ret + page, "%u", i);
+ ret = snprintf(pos + page, size - pos, "%u", i);
else
- ret += sprintf(ret + page, ", %u", i);
+ ret = snprintf(pos + page, size - pos, ", %u", i);
+
+ if (ret >= size - pos)
+ break;
first = 0;
+ pos += ret;
}
- ret += sprintf(ret + page, "\n");
- return ret;
+ ret = snprintf(pos + page, size + 1 - pos, "\n");
+ return pos + ret;
}
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 008388e82b5c..fbacde454718 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -15,14 +15,6 @@
#include "blk-mq.h"
#include "blk-mq-tag.h"
-bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
-{
- if (!tags)
- return true;
-
- return sbitmap_any_bit_clear(&tags->bitmap_tags.sb);
-}
-
/*
* If a previously inactive queue goes active, bump the active user count.
* We need to do this before try to allocate driver tag, then even if fail
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 61deab0b5a5a..15bc74acb57e 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -28,7 +28,6 @@ extern void blk_mq_free_tags(struct blk_mq_tags *tags);
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, struct blk_mq_tags *tags,
struct blk_mq_ctx *ctx, unsigned int tag);
-extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
struct blk_mq_tags **tags,
unsigned int depth, bool can_grow);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ec791156e9cc..323c9cb28066 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -93,7 +93,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
struct mq_inflight {
struct hd_struct *part;
- unsigned int *inflight;
+ unsigned int inflight[2];
};
static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
@@ -102,45 +102,29 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
{
struct mq_inflight *mi = priv;
- /*
- * index[0] counts the specific partition that was asked for.
- */
if (rq->part == mi->part)
- mi->inflight[0]++;
+ mi->inflight[rq_data_dir(rq)]++;
return true;
}
unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
{
- unsigned inflight[2];
- struct mq_inflight mi = { .part = part, .inflight = inflight, };
+ struct mq_inflight mi = { .part = part };
- inflight[0] = inflight[1] = 0;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
- return inflight[0];
-}
-
-static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
- struct request *rq, void *priv,
- bool reserved)
-{
- struct mq_inflight *mi = priv;
-
- if (rq->part == mi->part)
- mi->inflight[rq_data_dir(rq)]++;
-
- return true;
+ return mi.inflight[0] + mi.inflight[1];
}
void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2])
{
- struct mq_inflight mi = { .part = part, .inflight = inflight, };
+ struct mq_inflight mi = { .part = part };
- inflight[0] = inflight[1] = 0;
- blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
+ blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
+ inflight[0] = mi.inflight[0];
+ inflight[1] = mi.inflight[1];
}
void blk_freeze_queue_start(struct request_queue *q)
@@ -276,12 +260,6 @@ void blk_mq_wake_waiters(struct request_queue *q)
blk_mq_tag_wakeup_all(hctx->tags, true);
}
-bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
-{
- return blk_mq_has_free_tags(hctx->tags);
-}
-EXPORT_SYMBOL(blk_mq_can_queue);
-
/*
* Only need start/end time stamping if we have iostat or
* blk stats enabled, or using an IO scheduler.
@@ -663,18 +641,6 @@ bool blk_mq_complete_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_complete_request);
-int blk_mq_request_started(struct request *rq)
-{
- return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
-}
-EXPORT_SYMBOL_GPL(blk_mq_request_started);
-
-int blk_mq_request_completed(struct request *rq)
-{
- return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
-}
-EXPORT_SYMBOL_GPL(blk_mq_request_completed);
-
void blk_mq_start_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -1064,7 +1030,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
bool shared;
if (rq->tag != -1)
- goto done;
+ return true;
if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
data.flags |= BLK_MQ_REQ_RESERVED;
@@ -1079,7 +1045,6 @@ bool blk_mq_get_driver_tag(struct request *rq)
data.hctx->tags->rqs[rq->tag] = rq;
}
-done:
return rq->tag != -1;
}
@@ -1486,7 +1451,7 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
-bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
int srcu_idx;
bool need_run;
@@ -1504,12 +1469,8 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
blk_mq_hctx_has_pending(hctx);
hctx_unlock(hctx, srcu_idx);
- if (need_run) {
+ if (need_run)
__blk_mq_delay_run_hw_queue(hctx, async, 0);
- return true;
- }
-
- return false;
}
EXPORT_SYMBOL(blk_mq_run_hw_queue);
@@ -2789,6 +2750,23 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
int i, j, end;
struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
+ if (q->nr_hw_queues < set->nr_hw_queues) {
+ struct blk_mq_hw_ctx **new_hctxs;
+
+ new_hctxs = kcalloc_node(set->nr_hw_queues,
+ sizeof(*new_hctxs), GFP_KERNEL,
+ set->numa_node);
+ if (!new_hctxs)
+ return;
+ if (hctxs)
+ memcpy(new_hctxs, hctxs, q->nr_hw_queues *
+ sizeof(*hctxs));
+ q->queue_hw_ctx = new_hctxs;
+ q->nr_hw_queues = set->nr_hw_queues;
+ kfree(hctxs);
+ hctxs = new_hctxs;
+ }
+
/* protect against switching io scheduler */
mutex_lock(&q->sysfs_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
@@ -2844,19 +2822,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
mutex_unlock(&q->sysfs_lock);
}
-/*
- * Maximum number of hardware queues we support. For single sets, we'll never
- * have more than the CPUs (software queues). For multiple sets, the tag_set
- * user may have set ->nr_hw_queues larger.
- */
-static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
-{
- if (set->nr_maps == 1)
- return nr_cpu_ids;
-
- return max(set->nr_hw_queues, nr_cpu_ids);
-}
-
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q,
bool elevator_init)
@@ -2876,12 +2841,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/* init q->mq_kobj and sw queues' kobjects */
blk_mq_sysfs_init(q);
- q->nr_queues = nr_hw_queues(set);
- q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
- GFP_KERNEL, set->numa_node);
- if (!q->queue_hw_ctx)
- goto err_sys_init;
-
INIT_LIST_HEAD(&q->unused_hctx_list);
spin_lock_init(&q->unused_hctx_lock);
@@ -2929,7 +2888,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
err_hctxs:
kfree(q->queue_hw_ctx);
q->nr_hw_queues = 0;
-err_sys_init:
blk_mq_sysfs_deinit(q);
err_poll:
blk_stat_free_callback(q->poll_cb);
@@ -3030,6 +2988,29 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
}
}
+static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
+ int cur_nr_hw_queues, int new_nr_hw_queues)
+{
+ struct blk_mq_tags **new_tags;
+
+ if (cur_nr_hw_queues >= new_nr_hw_queues)
+ return 0;
+
+ new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
+ GFP_KERNEL, set->numa_node);
+ if (!new_tags)
+ return -ENOMEM;
+
+ if (set->tags)
+ memcpy(new_tags, set->tags, cur_nr_hw_queues *
+ sizeof(*set->tags));
+ kfree(set->tags);
+ set->tags = new_tags;
+ set->nr_hw_queues = new_nr_hw_queues;
+
+ return 0;
+}
+
/*
* Alloc a tag set to be associated with one or more request queues.
* May fail with EINVAL for various error conditions. May adjust the
@@ -3083,9 +3064,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
set->nr_hw_queues = nr_cpu_ids;
- set->tags = kcalloc_node(nr_hw_queues(set), sizeof(struct blk_mq_tags *),
- GFP_KERNEL, set->numa_node);
- if (!set->tags)
+ if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0)
return -ENOMEM;
ret = -ENOMEM;
@@ -3126,7 +3105,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
{
int i, j;
- for (i = 0; i < nr_hw_queues(set); i++)
+ for (i = 0; i < set->nr_hw_queues; i++)
blk_mq_free_map_and_requests(set, i);
for (j = 0; j < set->nr_maps; j++) {
@@ -3271,10 +3250,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_freeze_queue(q);
/*
- * Sync with blk_mq_queue_tag_busy_iter.
- */
- synchronize_rcu();
- /*
* Switch IO scheduler to 'none', cleaning up the data associated
* with the previous scheduler. We will switch back once we are done
* updating the new sw to hw queue mappings.
@@ -3288,6 +3263,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_sysfs_unregister(q);
}
+ if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
+ 0)
+ goto reregister;
+
prev_nr_hw_queues = set->nr_hw_queues;
set->nr_hw_queues = nr_hw_queues;
blk_mq_update_queue_map(set);
@@ -3304,6 +3283,7 @@ fallback:
blk_mq_map_swqueue(q);
}
+reregister:
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_sysfs_register(q);
blk_mq_debugfs_register_hctxs(q);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 32c62c64e6c2..eaaca8fc1c28 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -128,15 +128,6 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
void blk_mq_release(struct request_queue *q);
-/**
- * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
- * @rq: target request.
- */
-static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
-{
- return READ_ONCE(rq->state);
-}
-
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
unsigned int cpu)
{
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 61b635bc2a31..656460636ad3 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -160,24 +160,27 @@ bool rq_depth_calc_max_depth(struct rq_depth *rqd)
return ret;
}
-void rq_depth_scale_up(struct rq_depth *rqd)
+/* Returns true on success and false if scaling up wasn't possible */
+bool rq_depth_scale_up(struct rq_depth *rqd)
{
/*
* Hit max in previous round, stop here
*/
if (rqd->scaled_max)
- return;
+ return false;
rqd->scale_step--;
rqd->scaled_max = rq_depth_calc_max_depth(rqd);
+ return true;
}
/*
* Scale rwb down. If 'hard_throttle' is set, do it quicker, since we
- * had a latency violation.
+ * had a latency violation. Returns true on success and returns false if
+ * scaling down wasn't possible.
*/
-void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
+bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
{
/*
* Stop scaling down when we've hit the limit. This also prevents
@@ -185,7 +188,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
* keep up.
*/
if (rqd->max_depth == 1)
- return;
+ return false;
if (rqd->scale_step < 0 && hard_throttle)
rqd->scale_step = 0;
@@ -194,6 +197,7 @@ void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle)
rqd->scaled_max = false;
rq_depth_calc_max_depth(rqd);
+ return true;
}
struct rq_qos_wait_data {
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 08a09dbe0f4b..2bc43e94f4c4 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -108,16 +108,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
{
- struct rq_qos *cur, *prev = NULL;
- for (cur = q->rq_qos; cur; cur = cur->next) {
- if (cur == rqos) {
- if (prev)
- prev->next = rqos->next;
- else
- q->rq_qos = cur;
+ struct rq_qos **cur;
+
+ for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
+ if (*cur == rqos) {
+ *cur = rqos->next;
break;
}
- prev = cur;
}
blk_mq_debugfs_unregister_rqos(rqos);
@@ -130,8 +127,8 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
acquire_inflight_cb_t *acquire_inflight_cb,
cleanup_cb_t *cleanup_cb);
bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit);
-void rq_depth_scale_up(struct rq_depth *rqd);
-void rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
+bool rq_depth_scale_up(struct rq_depth *rqd);
+bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle);
bool rq_depth_calc_max_depth(struct rq_depth *rqd);
void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 457d9ba3eb20..6e7ec87d49fa 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -42,17 +42,13 @@ static __latent_entropy void blk_done_softirq(struct softirq_action *h)
static void trigger_softirq(void *data)
{
struct request *rq = data;
- unsigned long flags;
struct list_head *list;
- local_irq_save(flags);
list = this_cpu_ptr(&blk_cpu_done);
list_add_tail(&rq->ipi_list, list);
if (list->next == &rq->ipi_list)
raise_softirq_irqoff(BLOCK_SOFTIRQ);
-
- local_irq_restore(flags);
}
/*
diff --git a/block/blk-stat.c b/block/blk-stat.c
index 940f15d600f8..7da302ff88d0 100644
--- a/block/blk-stat.c
+++ b/block/blk-stat.c
@@ -53,7 +53,7 @@ void blk_stat_add(struct request *rq, u64 now)
struct request_queue *q = rq->q;
struct blk_stat_callback *cb;
struct blk_rq_stat *stat;
- int bucket;
+ int bucket, cpu;
u64 value;
value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
@@ -61,6 +61,7 @@ void blk_stat_add(struct request *rq, u64 now)
blk_throtl_stat_add(rq, value);
rcu_read_lock();
+ cpu = get_cpu();
list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
if (!blk_stat_is_active(cb))
continue;
@@ -69,10 +70,10 @@ void blk_stat_add(struct request *rq, u64 now)
if (bucket < 0)
continue;
- stat = &get_cpu_ptr(cb->cpu_stat)[bucket];
+ stat = &per_cpu_ptr(cb->cpu_stat, cpu)[bucket];
blk_rq_stat_add(stat, value);
- put_cpu_ptr(cb->cpu_stat);
}
+ put_cpu();
rcu_read_unlock();
}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 46f5198be017..fca9b158f4a0 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -801,10 +801,6 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
if (!entry->show)
return -EIO;
mutex_lock(&q->sysfs_lock);
- if (blk_queue_dying(q)) {
- mutex_unlock(&q->sysfs_lock);
- return -ENOENT;
- }
res = entry->show(q, page);
mutex_unlock(&q->sysfs_lock);
return res;
@@ -823,10 +819,6 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
q = container_of(kobj, struct request_queue, kobj);
mutex_lock(&q->sysfs_lock);
- if (blk_queue_dying(q)) {
- mutex_unlock(&q->sysfs_lock);
- return -ENOENT;
- }
res = entry->store(q, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 18f773e52dfb..98233c9c65a8 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -12,6 +12,7 @@
#include <linux/blktrace_api.h>
#include <linux/blk-cgroup.h>
#include "blk.h"
+#include "blk-cgroup-rwstat.h"
/* Max dispatch from a group in 1 round */
static int throtl_grp_quantum = 8;
@@ -176,6 +177,9 @@ struct throtl_grp {
unsigned int bio_cnt; /* total bios */
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
unsigned long bio_cnt_reset_time;
+
+ struct blkg_rwstat stat_bytes;
+ struct blkg_rwstat stat_ios;
};
/* We measure latency for request size from <= 4k to >= 1M */
@@ -489,6 +493,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
if (!tg)
return NULL;
+ if (blkg_rwstat_init(&tg->stat_bytes, gfp))
+ goto err_free_tg;
+
+ if (blkg_rwstat_init(&tg->stat_ios, gfp))
+ goto err_exit_stat_bytes;
+
throtl_service_queue_init(&tg->service_queue);
for (rw = READ; rw <= WRITE; rw++) {
@@ -513,6 +523,12 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp,
tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
return &tg->pd;
+
+err_exit_stat_bytes:
+ blkg_rwstat_exit(&tg->stat_bytes);
+err_free_tg:
+ kfree(tg);
+ return NULL;
}
static void throtl_pd_init(struct blkg_policy_data *pd)
@@ -611,6 +627,8 @@ static void throtl_pd_free(struct blkg_policy_data *pd)
struct throtl_grp *tg = pd_to_tg(pd);
del_timer_sync(&tg->service_queue.pending_timer);
+ blkg_rwstat_exit(&tg->stat_bytes);
+ blkg_rwstat_exit(&tg->stat_ios);
kfree(tg);
}
@@ -1464,6 +1482,32 @@ static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
return tg_set_conf(of, buf, nbytes, off, false);
}
+static int tg_print_rwstat(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ blkg_prfill_rwstat, &blkcg_policy_throtl,
+ seq_cft(sf)->private, true);
+ return 0;
+}
+
+static u64 tg_prfill_rwstat_recursive(struct seq_file *sf,
+ struct blkg_policy_data *pd, int off)
+{
+ struct blkg_rwstat_sample sum;
+
+ blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_throtl, off,
+ &sum);
+ return __blkg_prfill_rwstat(sf, pd, &sum);
+}
+
+static int tg_print_rwstat_recursive(struct seq_file *sf, void *v)
+{
+ blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
+ tg_prfill_rwstat_recursive, &blkcg_policy_throtl,
+ seq_cft(sf)->private, true);
+ return 0;
+}
+
static struct cftype throtl_legacy_files[] = {
{
.name = "throttle.read_bps_device",
@@ -1491,23 +1535,23 @@ static struct cftype throtl_legacy_files[] = {
},
{
.name = "throttle.io_service_bytes",
- .private = (unsigned long)&blkcg_policy_throtl,
- .seq_show = blkg_print_stat_bytes,
+ .private = offsetof(struct throtl_grp, stat_bytes),
+ .seq_show = tg_print_rwstat,
},
{
.name = "throttle.io_service_bytes_recursive",
- .private = (unsigned long)&blkcg_policy_throtl,
- .seq_show = blkg_print_stat_bytes_recursive,
+ .private = offsetof(struct throtl_grp, stat_bytes),
+ .seq_show = tg_print_rwstat_recursive,
},
{
.name = "throttle.io_serviced",
- .private = (unsigned long)&blkcg_policy_throtl,
- .seq_show = blkg_print_stat_ios,
+ .private = offsetof(struct throtl_grp, stat_ios),
+ .seq_show = tg_print_rwstat,
},
{
.name = "throttle.io_serviced_recursive",
- .private = (unsigned long)&blkcg_policy_throtl,
- .seq_show = blkg_print_stat_ios_recursive,
+ .private = offsetof(struct throtl_grp, stat_ios),
+ .seq_show = tg_print_rwstat_recursive,
},
{ } /* terminate */
};
@@ -2127,7 +2171,16 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
WARN_ON_ONCE(!rcu_read_lock_held());
/* see throtl_charge_bio() */
- if (bio_flagged(bio, BIO_THROTTLED) || !tg->has_rules[rw])
+ if (bio_flagged(bio, BIO_THROTTLED))
+ goto out;
+
+ if (!cgroup_subsys_on_dfl(io_cgrp_subsys)) {
+ blkg_rwstat_add(&tg->stat_bytes, bio->bi_opf,
+ bio->bi_iter.bi_size);
+ blkg_rwstat_add(&tg->stat_ios, bio->bi_opf, 1);
+ }
+
+ if (!tg->has_rules[rw])
goto out;
spin_lock_irq(&q->queue_lock);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 8af553a0ba00..8641ba9793c5 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -308,7 +308,8 @@ static void calc_wb_limits(struct rq_wb *rwb)
static void scale_up(struct rq_wb *rwb)
{
- rq_depth_scale_up(&rwb->rq_depth);
+ if (!rq_depth_scale_up(&rwb->rq_depth))
+ return;
calc_wb_limits(rwb);
rwb->unknown_cnt = 0;
rwb_wake_all(rwb);
@@ -317,7 +318,8 @@ static void scale_up(struct rq_wb *rwb)
static void scale_down(struct rq_wb *rwb, bool hard_throttle)
{
- rq_depth_scale_down(&rwb->rq_depth, hard_throttle);
+ if (!rq_depth_scale_down(&rwb->rq_depth, hard_throttle))
+ return;
calc_wb_limits(rwb);
rwb->unknown_cnt = 0;
rwb_trace_step(rwb, "scale down");
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 4bc5f260248a..6fad6f3f6980 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -93,172 +93,85 @@ unsigned int blkdev_nr_zones(struct block_device *bdev)
if (!blk_queue_is_zoned(q))
return 0;
- return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
+ return __blkdev_nr_zones(q, get_capacity(bdev->bd_disk));
}
EXPORT_SYMBOL_GPL(blkdev_nr_zones);
-/*
- * Check that a zone report belongs to this partition, and if yes, fix its start
- * sector and write pointer and return true. Return false otherwise.
- */
-static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep)
-{
- sector_t offset = get_start_sect(bdev);
-
- if (rep->start < offset)
- return false;
-
- rep->start -= offset;
- if (rep->start + rep->len > bdev->bd_part->nr_sects)
- return false;
-
- if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
- rep->wp = rep->start + rep->len;
- else
- rep->wp -= offset;
- return true;
-}
-
-static int blk_report_zones(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
-{
- struct request_queue *q = disk->queue;
- unsigned int z = 0, n, nrz = *nr_zones;
- sector_t capacity = get_capacity(disk);
- int ret;
-
- while (z < nrz && sector < capacity) {
- n = nrz - z;
- ret = disk->fops->report_zones(disk, sector, &zones[z], &n);
- if (ret)
- return ret;
- if (!n)
- break;
- sector += blk_queue_zone_sectors(q) * n;
- z += n;
- }
-
- WARN_ON(z > *nr_zones);
- *nr_zones = z;
-
- return 0;
-}
-
/**
* blkdev_report_zones - Get zones information
* @bdev: Target block device
* @sector: Sector from which to report zones
- * @zones: Array of zone structures where to return the zones information
- * @nr_zones: Number of zone structures in the zone array
+ * @nr_zones: Maximum number of zones to report
+ * @cb: Callback function called for each reported zone
+ * @data: Private data for the callback
*
* Description:
- * Get zone information starting from the zone containing @sector.
- * The number of zone information reported may be less than the number
- * requested by @nr_zones. The number of zones actually reported is
- * returned in @nr_zones.
- * The caller must use memalloc_noXX_save/restore() calls to control
- * memory allocations done within this function (zone array and command
- * buffer allocation by the device driver).
+ * Get zone information starting from the zone containing @sector for at most
+ * @nr_zones, and call @cb for each zone reported by the device.
+ * To report all zones in a device starting from @sector, the BLK_ALL_ZONES
+ * constant can be passed to @nr_zones.
+ * Returns the number of zones reported by the device, or a negative errno
+ * value in case of failure.
+ *
+ * Note: The caller must use memalloc_noXX_save/restore() calls to control
+ * memory allocations done within this function.
*/
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+ unsigned int nr_zones, report_zones_cb cb, void *data)
{
- struct request_queue *q = bdev_get_queue(bdev);
- unsigned int i, nrz;
- int ret;
-
- if (!blk_queue_is_zoned(q))
- return -EOPNOTSUPP;
+ struct gendisk *disk = bdev->bd_disk;
+ sector_t capacity = get_capacity(disk);
- /*
- * A block device that advertized itself as zoned must have a
- * report_zones method. If it does not have one defined, the device
- * driver has a bug. So warn about that.
- */
- if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zones))
+ if (!blk_queue_is_zoned(bdev_get_queue(bdev)) ||
+ WARN_ON_ONCE(!disk->fops->report_zones))
return -EOPNOTSUPP;
- if (!*nr_zones || sector >= bdev->bd_part->nr_sects) {
- *nr_zones = 0;
+ if (!nr_zones || sector >= capacity)
return 0;
- }
-
- nrz = min(*nr_zones,
- __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector));
- ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector,
- zones, &nrz);
- if (ret)
- return ret;
-
- for (i = 0; i < nrz; i++) {
- if (!blkdev_report_zone(bdev, zones))
- break;
- zones++;
- }
- *nr_zones = i;
-
- return 0;
+ return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);
-/*
- * Special case of zone reset operation to reset all zones in one command,
- * useful for applications like mkfs.
- */
-static int __blkdev_reset_all_zones(struct block_device *bdev, gfp_t gfp_mask)
-{
- struct bio *bio = bio_alloc(gfp_mask, 0);
- int ret;
-
- /* across the zones operations, don't need any sectors */
- bio_set_dev(bio, bdev);
- bio_set_op_attrs(bio, REQ_OP_ZONE_RESET_ALL, 0);
-
- ret = submit_bio_wait(bio);
- bio_put(bio);
-
- return ret;
-}
-
static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
+ sector_t sector,
sector_t nr_sectors)
{
if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
return false;
- if (nr_sectors != part_nr_sects_read(bdev->bd_part))
- return false;
/*
- * REQ_OP_ZONE_RESET_ALL can be executed only if the block device is
- * the entire disk, that is, if the blocks device start offset is 0 and
- * its capacity is the same as the entire disk.
+ * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
+ * of the applicable zone range is the entire disk.
*/
- return get_start_sect(bdev) == 0 &&
- part_nr_sects_read(bdev->bd_part) == get_capacity(bdev->bd_disk);
+ return !sector && nr_sectors == get_capacity(bdev->bd_disk);
}
/**
- * blkdev_reset_zones - Reset zones write pointer
+ * blkdev_zone_mgmt - Execute a zone management operation on a range of zones
* @bdev: Target block device
- * @sector: Start sector of the first zone to reset
- * @nr_sectors: Number of sectors, at least the length of one zone
+ * @op: Operation to be performed on the zones
+ * @sector: Start sector of the first zone to operate on
+ * @nr_sectors: Number of sectors, should be at least the length of one zone and
+ * must be zone size aligned.
* @gfp_mask: Memory allocation flags (for bio_alloc)
*
* Description:
- * Reset the write pointer of the zones contained in the range
+ * Perform the specified operation on the range of zones specified by
* @sector..@sector+@nr_sectors. Specifying the entire disk sector range
* is valid, but the specified range should not contain conventional zones.
+ * The operation to execute on each zone can be a zone reset, open, close
+ * or finish request.
*/
-int blkdev_reset_zones(struct block_device *bdev,
- sector_t sector, sector_t nr_sectors,
- gfp_t gfp_mask)
+int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
+ sector_t sector, sector_t nr_sectors,
+ gfp_t gfp_mask)
{
struct request_queue *q = bdev_get_queue(bdev);
- sector_t zone_sectors;
+ sector_t zone_sectors = blk_queue_zone_sectors(q);
+ sector_t capacity = get_capacity(bdev->bd_disk);
sector_t end_sector = sector + nr_sectors;
struct bio *bio = NULL;
- struct blk_plug plug;
int ret;
if (!blk_queue_is_zoned(q))
@@ -267,45 +180,62 @@ int blkdev_reset_zones(struct block_device *bdev,
if (bdev_read_only(bdev))
return -EPERM;
- if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
+ if (!op_is_zone_mgmt(op))
+ return -EOPNOTSUPP;
+
+ if (!nr_sectors || end_sector > capacity)
/* Out of range */
return -EINVAL;
- if (blkdev_allow_reset_all_zones(bdev, nr_sectors))
- return __blkdev_reset_all_zones(bdev, gfp_mask);
-
/* Check alignment (handle eventual smaller last zone) */
- zone_sectors = blk_queue_zone_sectors(q);
if (sector & (zone_sectors - 1))
return -EINVAL;
- if ((nr_sectors & (zone_sectors - 1)) &&
- end_sector != bdev->bd_part->nr_sects)
+ if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
return -EINVAL;
- blk_start_plug(&plug);
while (sector < end_sector) {
-
bio = blk_next_bio(bio, 0, gfp_mask);
- bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
- bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
+ /*
+ * Special case for the zone reset operation that reset all
+ * zones, this is useful for applications like mkfs.
+ */
+ if (op == REQ_OP_ZONE_RESET &&
+ blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
+ bio->bi_opf = REQ_OP_ZONE_RESET_ALL;
+ break;
+ }
+
+ bio->bi_opf = op;
+ bio->bi_iter.bi_sector = sector;
sector += zone_sectors;
/* This may take a while, so be nice to others */
cond_resched();
-
}
ret = submit_bio_wait(bio);
bio_put(bio);
- blk_finish_plug(&plug);
-
return ret;
}
-EXPORT_SYMBOL_GPL(blkdev_reset_zones);
+EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
+
+struct zone_report_args {
+ struct blk_zone __user *zones;
+};
+
+static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
+ struct zone_report_args *args = data;
+
+ if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
+ return -EFAULT;
+ return 0;
+}
/*
* BLKREPORTZONE ioctl processing.
@@ -315,9 +245,9 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
+ struct zone_report_args args;
struct request_queue *q;
struct blk_zone_report rep;
- struct blk_zone *zones;
int ret;
if (!argp)
@@ -339,44 +269,29 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
if (!rep.nr_zones)
return -EINVAL;
- rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);
-
- zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
- GFP_KERNEL | __GFP_ZERO);
- if (!zones)
- return -ENOMEM;
-
- ret = blkdev_report_zones(bdev, rep.sector, zones, &rep.nr_zones);
- if (ret)
- goto out;
-
- if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
- ret = -EFAULT;
- goto out;
- }
-
- if (rep.nr_zones) {
- if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
- sizeof(struct blk_zone) * rep.nr_zones))
- ret = -EFAULT;
- }
-
- out:
- kvfree(zones);
+ args.zones = argp + sizeof(struct blk_zone_report);
+ ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
+ blkdev_copy_zone_to_user, &args);
+ if (ret < 0)
+ return ret;
- return ret;
+ rep.nr_zones = ret;
+ if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
+ return -EFAULT;
+ return 0;
}
/*
- * BLKRESETZONE ioctl processing.
+ * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
* Called from blkdev_ioctl.
*/
-int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
+int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct request_queue *q;
struct blk_zone_range zrange;
+ enum req_opf op;
if (!argp)
return -EINVAL;
@@ -397,8 +312,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
return -EFAULT;
- return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
- GFP_KERNEL);
+ switch (cmd) {
+ case BLKRESETZONE:
+ op = REQ_OP_ZONE_RESET;
+ break;
+ case BLKOPENZONE:
+ op = REQ_OP_ZONE_OPEN;
+ break;
+ case BLKCLOSEZONE:
+ op = REQ_OP_ZONE_CLOSE;
+ break;
+ case BLKFINISHZONE:
+ op = REQ_OP_ZONE_FINISH;
+ break;
+ default:
+ return -ENOTTY;
+ }
+
+ return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
+ GFP_KERNEL);
}
static inline unsigned long *blk_alloc_zone_bitmap(int node,
@@ -408,37 +340,99 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node,
GFP_NOIO, node);
}
+void blk_queue_free_zone_bitmaps(struct request_queue *q)
+{
+ kfree(q->seq_zones_bitmap);
+ q->seq_zones_bitmap = NULL;
+ kfree(q->seq_zones_wlock);
+ q->seq_zones_wlock = NULL;
+}
+
+struct blk_revalidate_zone_args {
+ struct gendisk *disk;
+ unsigned long *seq_zones_bitmap;
+ unsigned long *seq_zones_wlock;
+ sector_t sector;
+};
+
/*
- * Allocate an array of struct blk_zone to get nr_zones zone information.
- * The allocated array may be smaller than nr_zones.
+ * Helper function to check the validity of zones of a zoned block device.
*/
-static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones)
+static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
{
- struct blk_zone *zones;
- size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES);
+ struct blk_revalidate_zone_args *args = data;
+ struct gendisk *disk = args->disk;
+ struct request_queue *q = disk->queue;
+ sector_t zone_sectors = blk_queue_zone_sectors(q);
+ sector_t capacity = get_capacity(disk);
/*
- * GFP_KERNEL here is meaningless as the caller task context has
- * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones()
- * with memalloc_noio_save().
+ * All zones must have the same size, with the exception on an eventual
+ * smaller last zone.
*/
- zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL);
- if (!zones) {
- *nr_zones = 0;
- return NULL;
+ if (zone->start + zone_sectors < capacity &&
+ zone->len != zone_sectors) {
+ pr_warn("%s: Invalid zoned device with non constant zone size\n",
+ disk->disk_name);
+ return false;
}
- *nr_zones = nrz;
+ if (zone->start + zone->len >= capacity &&
+ zone->len > zone_sectors) {
+ pr_warn("%s: Invalid zoned device with larger last zone size\n",
+ disk->disk_name);
+ return -ENODEV;
+ }
+
+ /* Check for holes in the zone report */
+ if (zone->start != args->sector) {
+ pr_warn("%s: Zone gap at sectors %llu..%llu\n",
+ disk->disk_name, args->sector, zone->start);
+ return -ENODEV;
+ }
- return zones;
+ /* Check zone type */
+ switch (zone->type) {
+ case BLK_ZONE_TYPE_CONVENTIONAL:
+ case BLK_ZONE_TYPE_SEQWRITE_REQ:
+ case BLK_ZONE_TYPE_SEQWRITE_PREF:
+ break;
+ default:
+ pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
+ disk->disk_name, (int)zone->type, zone->start);
+ return -ENODEV;
+ }
+
+ if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
+ set_bit(idx, args->seq_zones_bitmap);
+
+ args->sector += zone->len;
+ return 0;
}
-void blk_queue_free_zone_bitmaps(struct request_queue *q)
+static int blk_update_zone_info(struct gendisk *disk, unsigned int nr_zones,
+ struct blk_revalidate_zone_args *args)
{
- kfree(q->seq_zones_bitmap);
- q->seq_zones_bitmap = NULL;
- kfree(q->seq_zones_wlock);
- q->seq_zones_wlock = NULL;
+ /*
+ * Ensure that all memory allocations in this context are done as
+ * if GFP_NOIO was specified.
+ */
+ unsigned int noio_flag = memalloc_noio_save();
+ struct request_queue *q = disk->queue;
+ int ret;
+
+ args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
+ if (!args->seq_zones_wlock)
+ return -ENOMEM;
+ args->seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
+ if (!args->seq_zones_bitmap)
+ return -ENOMEM;
+
+ ret = disk->fops->report_zones(disk, 0, nr_zones,
+ blk_revalidate_zone_cb, args);
+ memalloc_noio_restore(noio_flag);
+ return ret;
}
/**
@@ -454,13 +448,12 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
- unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
- unsigned int i, rep_nr_zones = 0, z = 0, nrz;
- struct blk_zone *zones = NULL;
- unsigned int noio_flag;
- sector_t sector = 0;
+ struct blk_revalidate_zone_args args = { .disk = disk };
int ret = 0;
+ if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
+ return -EIO;
+
/*
* BIO based queues do not use a scheduler so only q->nr_zones
* needs to be updated so that the sysfs exposed value is correct.
@@ -470,78 +463,28 @@ int blk_revalidate_disk_zones(struct gendisk *disk)
return 0;
}
- /*
- * Ensure that all memory allocations in this context are done as
- * if GFP_NOIO was specified.
- */
- noio_flag = memalloc_noio_save();
-
- if (!blk_queue_is_zoned(q) || !nr_zones) {
- nr_zones = 0;
- goto update;
- }
-
- /* Allocate bitmaps */
- ret = -ENOMEM;
- seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
- if (!seq_zones_wlock)
- goto out;
- seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
- if (!seq_zones_bitmap)
- goto out;
-
- /* Get zone information and initialize seq_zones_bitmap */
- rep_nr_zones = nr_zones;
- zones = blk_alloc_zones(&rep_nr_zones);
- if (!zones)
- goto out;
-
- while (z < nr_zones) {
- nrz = min(nr_zones - z, rep_nr_zones);
- ret = blk_report_zones(disk, sector, zones, &nrz);
- if (ret)
- goto out;
- if (!nrz)
- break;
- for (i = 0; i < nrz; i++) {
- if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
- set_bit(z, seq_zones_bitmap);
- z++;
- }
- sector += nrz * blk_queue_zone_sectors(q);
- }
-
- if (WARN_ON(z != nr_zones)) {
- ret = -EIO;
- goto out;
- }
+ if (nr_zones)
+ ret = blk_update_zone_info(disk, nr_zones, &args);
-update:
/*
* Install the new bitmaps, making sure the queue is stopped and
* all I/Os are completed (i.e. a scheduler is not referencing the
* bitmaps).
*/
blk_mq_freeze_queue(q);
- q->nr_zones = nr_zones;
- swap(q->seq_zones_wlock, seq_zones_wlock);
- swap(q->seq_zones_bitmap, seq_zones_bitmap);
- blk_mq_unfreeze_queue(q);
-
-out:
- memalloc_noio_restore(noio_flag);
-
- kvfree(zones);
- kfree(seq_zones_wlock);
- kfree(seq_zones_bitmap);
-
- if (ret) {
+ if (ret >= 0) {
+ q->nr_zones = nr_zones;
+ swap(q->seq_zones_wlock, args.seq_zones_wlock);
+ swap(q->seq_zones_bitmap, args.seq_zones_bitmap);
+ ret = 0;
+ } else {
pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
- blk_mq_freeze_queue(q);
blk_queue_free_zone_bitmaps(q);
- blk_mq_unfreeze_queue(q);
}
+ blk_mq_unfreeze_queue(q);
+ kfree(args.seq_zones_wlock);
+ kfree(args.seq_zones_bitmap);
return ret;
}
EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
diff --git a/block/blk.h b/block/blk.h
index 47fba9362e60..2bea40180b6f 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -242,14 +242,11 @@ int blk_dev_init(void);
* Contribute to IO statistics IFF:
*
* a) it's attached to a gendisk, and
- * b) the queue had IO stats enabled when this request was started, and
- * c) it's a file system request
+ * b) the queue had IO stats enabled when this request was started
*/
static inline bool blk_do_io_stat(struct request *rq)
{
- return rq->rq_disk &&
- (rq->rq_flags & RQF_IO_STAT) &&
- !blk_rq_is_passthrough(rq);
+ return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT);
}
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
diff --git a/block/elevator.c b/block/elevator.c
index 5437059c9261..4eab3d70e880 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -616,7 +616,8 @@ out:
static inline bool elv_support_iosched(struct request_queue *q)
{
- if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
+ if (!q->mq_ops ||
+ (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)))
return false;
return true;
}
@@ -831,3 +832,12 @@ struct request *elv_rb_latter_request(struct request_queue *q,
return NULL;
}
EXPORT_SYMBOL(elv_rb_latter_request);
+
+static int __init elevator_setup(char *str)
+{
+ pr_warn("Kernel parameter elevator= does not have any effect anymore.\n"
+ "Please use sysfs to set IO scheduler for individual devices.\n");
+ return 1;
+}
+
+__setup("elevator=", elevator_setup);
diff --git a/block/genhd.c b/block/genhd.c
index 26b31fcae217..ff6268970ddc 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1385,7 +1385,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
"%lu %lu %lu %u "
"%lu %lu %lu %u "
"%u %u %u "
- "%lu %lu %lu %u\n",
+ "%lu %lu %lu %u "
+ "%lu %u"
+ "\n",
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
disk_name(gp, hd->partno, buf),
part_stat_read(hd, ios[STAT_READ]),
@@ -1402,7 +1404,9 @@ static int diskstats_show(struct seq_file *seqf, void *v)
part_stat_read(hd, ios[STAT_DISCARD]),
part_stat_read(hd, merges[STAT_DISCARD]),
part_stat_read(hd, sectors[STAT_DISCARD]),
- (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD)
+ (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD),
+ part_stat_read(hd, ios[STAT_FLUSH]),
+ (unsigned int)part_stat_read_msecs(hd, STAT_FLUSH)
);
}
disk_part_iter_exit(&piter);
diff --git a/block/ioctl.c b/block/ioctl.c
index 15a0eb80ada9..7ac8a66c9787 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -155,48 +155,21 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
}
}
-/*
- * This is an exported API for the block driver, and will not
- * acquire bd_mutex. This API should be used in case that
- * caller has held bd_mutex already.
- */
-int __blkdev_reread_part(struct block_device *bdev)
+static int blkdev_reread_part(struct block_device *bdev)
{
- struct gendisk *disk = bdev->bd_disk;
+ int ret;
- if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains)
+ if (!disk_part_scan_enabled(bdev->bd_disk) || bdev != bdev->bd_contains)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- lockdep_assert_held(&bdev->bd_mutex);
-
- return rescan_partitions(disk, bdev);
-}
-EXPORT_SYMBOL(__blkdev_reread_part);
-
-/*
- * This is an exported API for the block driver, and will
- * try to acquire bd_mutex. If bd_mutex has been held already
- * in current context, please call __blkdev_reread_part().
- *
- * Make sure the held locks in current context aren't required
- * in open()/close() handler and I/O path for avoiding ABBA deadlock:
- * - bd_mutex is held before calling block driver's open/close
- * handler
- * - reading partition table may submit I/O to the block device
- */
-int blkdev_reread_part(struct block_device *bdev)
-{
- int res;
-
mutex_lock(&bdev->bd_mutex);
- res = __blkdev_reread_part(bdev);
+ ret = bdev_disk_changed(bdev, false);
mutex_unlock(&bdev->bd_mutex);
- return res;
+ return ret;
}
-EXPORT_SYMBOL(blkdev_reread_part);
static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
unsigned long arg, unsigned long flags)
@@ -532,7 +505,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKREPORTZONE:
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
case BLKRESETZONE:
- return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
+ case BLKOPENZONE:
+ case BLKCLOSEZONE:
+ case BLKFINISHZONE:
+ return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
case BLKGETZONESZ:
return put_uint(arg, bdev_zone_sectors(bdev));
case BLKGETNRZONES:
diff --git a/block/opal_proto.h b/block/opal_proto.h
index 5532412d567c..325cbba2465f 100644
--- a/block/opal_proto.h
+++ b/block/opal_proto.h
@@ -76,7 +76,6 @@ enum opal_response_token {
* Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
* Section: 6.3 Assigned UIDs
*/
-#define OPAL_UID_LENGTH 8
#define OPAL_METHOD_LENGTH 8
#define OPAL_MSID_KEYLEN 15
#define OPAL_UID_LENGTH_HALF 4
@@ -108,6 +107,7 @@ enum opal_uid {
OPAL_C_PIN_TABLE,
OPAL_LOCKING_INFO_TABLE,
OPAL_ENTERPRISE_LOCKING_INFO_TABLE,
+ OPAL_DATASTORE,
/* C_PIN_TABLE object ID's */
OPAL_C_PIN_MSID,
OPAL_C_PIN_SID,
@@ -205,6 +205,10 @@ enum opal_lockingstate {
OPAL_LOCKING_LOCKED = 0x03,
};
+enum opal_parameter {
+ OPAL_SUM_SET_LIST = 0x060000,
+};
+
/* Packets derived from:
* TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
* Secion: 3.2.3 ComPackets, Packets & Subpackets
diff --git a/block/partition-generic.c b/block/partition-generic.c
index aee643ce13d1..1d20c9cf213f 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -127,7 +127,8 @@ ssize_t part_stat_show(struct device *dev,
"%8lu %8lu %8llu %8u "
"%8lu %8lu %8llu %8u "
"%8u %8u %8u "
- "%8lu %8lu %8llu %8u"
+ "%8lu %8lu %8llu %8u "
+ "%8lu %8u"
"\n",
part_stat_read(p, ios[STAT_READ]),
part_stat_read(p, merges[STAT_READ]),
@@ -143,7 +144,9 @@ ssize_t part_stat_show(struct device *dev,
part_stat_read(p, ios[STAT_DISCARD]),
part_stat_read(p, merges[STAT_DISCARD]),
(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
- (unsigned int)part_stat_read_msecs(p, STAT_DISCARD));
+ (unsigned int)part_stat_read_msecs(p, STAT_DISCARD),
+ part_stat_read(p, ios[STAT_FLUSH]),
+ (unsigned int)part_stat_read_msecs(p, STAT_FLUSH));
}
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
@@ -439,12 +442,14 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
}
}
-static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
+int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev)
{
struct disk_part_iter piter;
struct hd_struct *part;
int res;
+ if (!disk_part_scan_enabled(disk))
+ return 0;
if (bdev->bd_part_count || bdev->bd_super)
return -EBUSY;
res = invalidate_partition(disk, 0);
@@ -459,204 +464,124 @@ static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
return 0;
}
-static bool part_zone_aligned(struct gendisk *disk,
- struct block_device *bdev,
- sector_t from, sector_t size)
+static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev,
+ struct parsed_partitions *state, int p)
{
- unsigned int zone_sectors = bdev_zone_sectors(bdev);
+ sector_t size = state->parts[p].size;
+ sector_t from = state->parts[p].from;
+ struct hd_struct *part;
- /*
- * If this function is called, then the disk is a zoned block device
- * (host-aware or host-managed). This can be detected even if the
- * zoned block device support is disabled (CONFIG_BLK_DEV_ZONED not
- * set). In this case, however, only host-aware devices will be seen
- * as a block device is not created for host-managed devices. Without
- * zoned block device support, host-aware drives can still be used as
- * regular block devices (no zone operation) and their zone size will
- * be reported as 0. Allow this case.
- */
- if (!zone_sectors)
+ if (!size)
return true;
- /*
- * Check partition start and size alignement. If the drive has a
- * smaller last runt zone, ignore it and allow the partition to
- * use it. Check the zone size too: it should be a power of 2 number
- * of sectors.
- */
- if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) {
- u32 rem;
-
- div_u64_rem(from, zone_sectors, &rem);
- if (rem)
+ if (from >= get_capacity(disk)) {
+ printk(KERN_WARNING
+ "%s: p%d start %llu is beyond EOD, ",
+ disk->disk_name, p, (unsigned long long) from);
+ if (disk_unlock_native_capacity(disk))
return false;
- if ((from + size) < get_capacity(disk)) {
- div_u64_rem(size, zone_sectors, &rem);
- if (rem)
- return false;
- }
+ return true;
+ }
- } else {
+ if (from + size > get_capacity(disk)) {
+ printk(KERN_WARNING
+ "%s: p%d size %llu extends beyond EOD, ",
+ disk->disk_name, p, (unsigned long long) size);
- if (from & (zone_sectors - 1))
- return false;
- if ((from + size) < get_capacity(disk) &&
- (size & (zone_sectors - 1)))
+ if (disk_unlock_native_capacity(disk))
return false;
+ /*
+ * We can not ignore partitions of broken tables created by for
+ * example camera firmware, but we limit them to the end of the
+ * disk to avoid creating invalid block devices.
+ */
+ size = get_capacity(disk) - from;
+ }
+
+ part = add_partition(disk, p, from, size, state->parts[p].flags,
+ &state->parts[p].info);
+ if (IS_ERR(part)) {
+ printk(KERN_ERR " %s: p%d could not be added: %ld\n",
+ disk->disk_name, p, -PTR_ERR(part));
+ return true;
}
+#ifdef CONFIG_BLK_DEV_MD
+ if (state->parts[p].flags & ADDPART_FLAG_RAID)
+ md_autodetect_dev(part_to_dev(part)->devt);
+#endif
return true;
}
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+int blk_add_partitions(struct gendisk *disk, struct block_device *bdev)
{
- struct parsed_partitions *state = NULL;
- struct hd_struct *part;
- int p, highest, res;
-rescan:
- if (state && !IS_ERR(state)) {
- free_partitions(state);
- state = NULL;
- }
+ struct parsed_partitions *state;
+ int ret = -EAGAIN, p, highest;
- res = drop_partitions(disk, bdev);
- if (res)
- return res;
+ if (!disk_part_scan_enabled(disk))
+ return 0;
- if (disk->fops->revalidate_disk)
- disk->fops->revalidate_disk(disk);
- check_disk_size_change(disk, bdev, true);
- bdev->bd_invalidated = 0;
- if (!get_capacity(disk) || !(state = check_partition(disk, bdev)))
+ state = check_partition(disk, bdev);
+ if (!state)
return 0;
if (IS_ERR(state)) {
/*
- * I/O error reading the partition table. If any
- * partition code tried to read beyond EOD, retry
- * after unlocking native capacity.
+ * I/O error reading the partition table. If we tried to read
+ * beyond EOD, retry after unlocking the native capacity.
*/
if (PTR_ERR(state) == -ENOSPC) {
printk(KERN_WARNING "%s: partition table beyond EOD, ",
disk->disk_name);
if (disk_unlock_native_capacity(disk))
- goto rescan;
+ return -EAGAIN;
}
return -EIO;
}
+
/*
- * If any partition code tried to read beyond EOD, try
- * unlocking native capacity even if partition table is
- * successfully read as we could be missing some partitions.
+ * Partitions are not supported on zoned block devices.
+ */
+ if (bdev_is_zoned(bdev)) {
+ pr_warn("%s: ignoring partition table on zoned block device\n",
+ disk->disk_name);
+ ret = 0;
+ goto out_free_state;
+ }
+
+ /*
+ * If we read beyond EOD, try unlocking native capacity even if the
+ * partition table was successfully read as we could be missing some
+ * partitions.
*/
if (state->access_beyond_eod) {
printk(KERN_WARNING
"%s: partition table partially beyond EOD, ",
disk->disk_name);
if (disk_unlock_native_capacity(disk))
- goto rescan;
+ goto out_free_state;
}
/* tell userspace that the media / partition table may have changed */
kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
- /* Detect the highest partition number and preallocate
- * disk->part_tbl. This is an optimization and not strictly
- * necessary.
+ /*
+ * Detect the highest partition number and preallocate disk->part_tbl.
+ * This is an optimization and not strictly necessary.
*/
for (p = 1, highest = 0; p < state->limit; p++)
if (state->parts[p].size)
highest = p;
-
disk_expand_part_tbl(disk, highest);
- /* add partitions */
- for (p = 1; p < state->limit; p++) {
- sector_t size, from;
-
- size = state->parts[p].size;
- if (!size)
- continue;
-
- from = state->parts[p].from;
- if (from >= get_capacity(disk)) {
- printk(KERN_WARNING
- "%s: p%d start %llu is beyond EOD, ",
- disk->disk_name, p, (unsigned long long) from);
- if (disk_unlock_native_capacity(disk))
- goto rescan;
- continue;
- }
-
- if (from + size > get_capacity(disk)) {
- printk(KERN_WARNING
- "%s: p%d size %llu extends beyond EOD, ",
- disk->disk_name, p, (unsigned long long) size);
-
- if (disk_unlock_native_capacity(disk)) {
- /* free state and restart */
- goto rescan;
- } else {
- /*
- * we can not ignore partitions of broken tables
- * created by for example camera firmware, but
- * we limit them to the end of the disk to avoid
- * creating invalid block devices
- */
- size = get_capacity(disk) - from;
- }
- }
-
- /*
- * On a zoned block device, partitions should be aligned on the
- * device zone size (i.e. zone boundary crossing not allowed).
- * Otherwise, resetting the write pointer of the last zone of
- * one partition may impact the following partition.
- */
- if (bdev_is_zoned(bdev) &&
- !part_zone_aligned(disk, bdev, from, size)) {
- printk(KERN_WARNING
- "%s: p%d start %llu+%llu is not zone aligned\n",
- disk->disk_name, p, (unsigned long long) from,
- (unsigned long long) size);
- continue;
- }
+ for (p = 1; p < state->limit; p++)
+ if (!blk_add_partition(disk, bdev, state, p))
+ goto out_free_state;
- part = add_partition(disk, p, from, size,
- state->parts[p].flags,
- &state->parts[p].info);
- if (IS_ERR(part)) {
- printk(KERN_ERR " %s: p%d could not be added: %ld\n",
- disk->disk_name, p, -PTR_ERR(part));
- continue;
- }
-#ifdef CONFIG_BLK_DEV_MD
- if (state->parts[p].flags & ADDPART_FLAG_RAID)
- md_autodetect_dev(part_to_dev(part)->devt);
-#endif
- }
+ ret = 0;
+out_free_state:
free_partitions(state);
- return 0;
-}
-
-int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
-{
- int res;
-
- if (!bdev->bd_invalidated)
- return 0;
-
- res = drop_partitions(disk, bdev);
- if (res)
- return res;
-
- set_capacity(disk, 0);
- check_disk_size_change(disk, bdev, false);
- bdev->bd_invalidated = 0;
- /* tell userspace that the media / partition table may have changed */
- kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
-
- return 0;
+ return ret;
}
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
diff --git a/block/sed-opal.c b/block/sed-opal.c
index b4c761973ac1..880cc57a5f6b 100644
--- a/block/sed-opal.c
+++ b/block/sed-opal.c
@@ -149,6 +149,8 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
{ 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x01 },
[OPAL_ENTERPRISE_LOCKING_INFO_TABLE] =
{ 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 },
+ [OPAL_DATASTORE] =
+ { 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00 },
/* C_PIN_TABLE object ID's */
[OPAL_C_PIN_MSID] =
@@ -1139,11 +1141,11 @@ static int generic_get_column(struct opal_dev *dev, const u8 *table,
*
* the result is provided in dev->resp->tok[4]
*/
-static int generic_get_table_info(struct opal_dev *dev, enum opal_uid table,
+static int generic_get_table_info(struct opal_dev *dev, const u8 *table_uid,
u64 column)
{
u8 uid[OPAL_UID_LENGTH];
- const unsigned int half = OPAL_UID_LENGTH/2;
+ const unsigned int half = OPAL_UID_LENGTH_HALF;
/* sed-opal UIDs can be split in two halves:
* first: actual table index
@@ -1152,7 +1154,7 @@ static int generic_get_table_info(struct opal_dev *dev, enum opal_uid table,
* first part of the target table as relative index into that table
*/
memcpy(uid, opaluid[OPAL_TABLE_TABLE], half);
- memcpy(uid+half, opaluid[table], half);
+ memcpy(uid + half, table_uid, half);
return generic_get_column(dev, uid, column);
}
@@ -1221,6 +1223,75 @@ static int get_active_key(struct opal_dev *dev, void *data)
return get_active_key_cont(dev);
}
+static int generic_table_write_data(struct opal_dev *dev, const u64 data,
+ u64 offset, u64 size, const u8 *uid)
+{
+ const u8 __user *src = (u8 __user *)(uintptr_t)data;
+ u8 *dst;
+ u64 len;
+ size_t off = 0;
+ int err;
+
+ /* do we fit in the available space? */
+ err = generic_get_table_info(dev, uid, OPAL_TABLE_ROWS);
+ if (err) {
+ pr_debug("Couldn't get the table size\n");
+ return err;
+ }
+
+ len = response_get_u64(&dev->parsed, 4);
+ if (size > len || offset > len - size) {
+ pr_debug("Does not fit in the table (%llu vs. %llu)\n",
+ offset + size, len);
+ return -ENOSPC;
+ }
+
+ /* do the actual transmission(s) */
+ while (off < size) {
+ err = cmd_start(dev, uid, opalmethod[OPAL_SET]);
+ add_token_u8(&err, dev, OPAL_STARTNAME);
+ add_token_u8(&err, dev, OPAL_WHERE);
+ add_token_u64(&err, dev, offset + off);
+ add_token_u8(&err, dev, OPAL_ENDNAME);
+
+ add_token_u8(&err, dev, OPAL_STARTNAME);
+ add_token_u8(&err, dev, OPAL_VALUES);
+
+ /*
+ * The bytestring header is either 1 or 2 bytes, so assume 2.
+ * There also needs to be enough space to accommodate the
+ * trailing OPAL_ENDNAME (1 byte) and tokens added by
+ * cmd_finalize.
+ */
+ len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED),
+ (size_t)(size - off));
+ pr_debug("Write bytes %zu+%llu/%llu\n", off, len, size);
+
+ dst = add_bytestring_header(&err, dev, len);
+ if (!dst)
+ break;
+
+ if (copy_from_user(dst, src + off, len)) {
+ err = -EFAULT;
+ break;
+ }
+
+ dev->pos += len;
+
+ add_token_u8(&err, dev, OPAL_ENDNAME);
+ if (err)
+ break;
+
+ err = finalize_and_send(dev, parse_and_check_status);
+ if (err)
+ break;
+
+ off += len;
+ }
+
+ return err;
+}
+
static int generic_lr_enable_disable(struct opal_dev *dev,
u8 *uid, bool rle, bool wle,
bool rl, bool wl)
@@ -1583,68 +1654,9 @@ static int set_mbr_enable_disable(struct opal_dev *dev, void *data)
static int write_shadow_mbr(struct opal_dev *dev, void *data)
{
struct opal_shadow_mbr *shadow = data;
- const u8 __user *src;
- u8 *dst;
- size_t off = 0;
- u64 len;
- int err = 0;
-
- /* do we fit in the available shadow mbr space? */
- err = generic_get_table_info(dev, OPAL_MBR, OPAL_TABLE_ROWS);
- if (err) {
- pr_debug("MBR: could not get shadow size\n");
- return err;
- }
-
- len = response_get_u64(&dev->parsed, 4);
- if (shadow->size > len || shadow->offset > len - shadow->size) {
- pr_debug("MBR: does not fit in shadow (%llu vs. %llu)\n",
- shadow->offset + shadow->size, len);
- return -ENOSPC;
- }
-
- /* do the actual transmission(s) */
- src = (u8 __user *)(uintptr_t)shadow->data;
- while (off < shadow->size) {
- err = cmd_start(dev, opaluid[OPAL_MBR], opalmethod[OPAL_SET]);
- add_token_u8(&err, dev, OPAL_STARTNAME);
- add_token_u8(&err, dev, OPAL_WHERE);
- add_token_u64(&err, dev, shadow->offset + off);
- add_token_u8(&err, dev, OPAL_ENDNAME);
-
- add_token_u8(&err, dev, OPAL_STARTNAME);
- add_token_u8(&err, dev, OPAL_VALUES);
-
- /*
- * The bytestring header is either 1 or 2 bytes, so assume 2.
- * There also needs to be enough space to accommodate the
- * trailing OPAL_ENDNAME (1 byte) and tokens added by
- * cmd_finalize.
- */
- len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED),
- (size_t)(shadow->size - off));
- pr_debug("MBR: write bytes %zu+%llu/%llu\n",
- off, len, shadow->size);
-
- dst = add_bytestring_header(&err, dev, len);
- if (!dst)
- break;
- if (copy_from_user(dst, src + off, len))
- err = -EFAULT;
- dev->pos += len;
-
- add_token_u8(&err, dev, OPAL_ENDNAME);
- if (err)
- break;
-
- err = finalize_and_send(dev, parse_and_check_status);
- if (err)
- break;
-
- off += len;
- }
- return err;
+ return generic_table_write_data(dev, shadow->data, shadow->offset,
+ shadow->size, opaluid[OPAL_MBR]);
}
static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid,
@@ -1874,7 +1886,6 @@ static int activate_lsp(struct opal_dev *dev, void *data)
{
struct opal_lr_act *opal_act = data;
u8 user_lr[OPAL_UID_LENGTH];
- u8 uint_3 = 0x83;
int err, i;
err = cmd_start(dev, opaluid[OPAL_LOCKINGSP_UID],
@@ -1887,10 +1898,7 @@ static int activate_lsp(struct opal_dev *dev, void *data)
return err;
add_token_u8(&err, dev, OPAL_STARTNAME);
- add_token_u8(&err, dev, uint_3);
- add_token_u8(&err, dev, 6);
- add_token_u8(&err, dev, 0);
- add_token_u8(&err, dev, 0);
+ add_token_u64(&err, dev, OPAL_SUM_SET_LIST);
add_token_u8(&err, dev, OPAL_STARTLIST);
add_token_bytestring(&err, dev, user_lr, OPAL_UID_LENGTH);
@@ -1957,6 +1965,113 @@ static int get_msid_cpin_pin(struct opal_dev *dev, void *data)
return 0;
}
+static int write_table_data(struct opal_dev *dev, void *data)
+{
+ struct opal_read_write_table *write_tbl = data;
+
+ return generic_table_write_data(dev, write_tbl->data, write_tbl->offset,
+ write_tbl->size, write_tbl->table_uid);
+}
+
+static int read_table_data_cont(struct opal_dev *dev)
+{
+ int err;
+ const char *data_read;
+
+ err = parse_and_check_status(dev);
+ if (err)
+ return err;
+
+ dev->prev_d_len = response_get_string(&dev->parsed, 1, &data_read);
+ dev->prev_data = (void *)data_read;
+ if (!dev->prev_data) {
+ pr_debug("%s: Couldn't read data from the table.\n", __func__);
+ return OPAL_INVAL_PARAM;
+ }
+
+ return 0;
+}
+
+/*
+ * IO_BUFFER_LENGTH = 2048
+ * sizeof(header) = 56
+ * No. of Token Bytes in the Response = 11
+ * MAX size of data that can be carried in response buffer
+ * at a time is : 2048 - (56 + 11) = 1981 = 0x7BD.
+ */
+#define OPAL_MAX_READ_TABLE (0x7BD)
+
+static int read_table_data(struct opal_dev *dev, void *data)
+{
+ struct opal_read_write_table *read_tbl = data;
+ int err;
+ size_t off = 0, max_read_size = OPAL_MAX_READ_TABLE;
+ u64 table_len, len;
+ u64 offset = read_tbl->offset, read_size = read_tbl->size - 1;
+ u8 __user *dst;
+
+ err = generic_get_table_info(dev, read_tbl->table_uid, OPAL_TABLE_ROWS);
+ if (err) {
+ pr_debug("Couldn't get the table size\n");
+ return err;
+ }
+
+ table_len = response_get_u64(&dev->parsed, 4);
+
+ /* Check if the user is trying to read from the table limits */
+ if (read_size > table_len || offset > table_len - read_size) {
+ pr_debug("Read size exceeds the Table size limits (%llu vs. %llu)\n",
+ offset + read_size, table_len);
+ return -EINVAL;
+ }
+
+ while (off < read_size) {
+ err = cmd_start(dev, read_tbl->table_uid, opalmethod[OPAL_GET]);
+
+ add_token_u8(&err, dev, OPAL_STARTLIST);
+ add_token_u8(&err, dev, OPAL_STARTNAME);
+ add_token_u8(&err, dev, OPAL_STARTROW);
+ add_token_u64(&err, dev, offset + off); /* start row value */
+ add_token_u8(&err, dev, OPAL_ENDNAME);
+
+ add_token_u8(&err, dev, OPAL_STARTNAME);
+ add_token_u8(&err, dev, OPAL_ENDROW);
+
+ len = min(max_read_size, (size_t)(read_size - off));
+ add_token_u64(&err, dev, offset + off + len); /* end row value
+ */
+ add_token_u8(&err, dev, OPAL_ENDNAME);
+ add_token_u8(&err, dev, OPAL_ENDLIST);
+
+ if (err) {
+ pr_debug("Error building read table data command.\n");
+ break;
+ }
+
+ err = finalize_and_send(dev, read_table_data_cont);
+ if (err)
+ break;
+
+ /* len+1: This includes the NULL terminator at the end*/
+ if (dev->prev_d_len > len + 1) {
+ err = -EOVERFLOW;
+ break;
+ }
+
+ dst = (u8 __user *)(uintptr_t)read_tbl->data;
+ if (copy_to_user(dst + off, dev->prev_data, dev->prev_d_len)) {
+ pr_debug("Error copying data to userspace\n");
+ err = -EFAULT;
+ break;
+ }
+ dev->prev_data = NULL;
+
+ off += len;
+ }
+
+ return err;
+}
+
static int end_opal_session(struct opal_dev *dev, void *data)
{
int err = 0;
@@ -2443,6 +2558,68 @@ bool opal_unlock_from_suspend(struct opal_dev *dev)
}
EXPORT_SYMBOL(opal_unlock_from_suspend);
+static int opal_read_table(struct opal_dev *dev,
+ struct opal_read_write_table *rw_tbl)
+{
+ const struct opal_step read_table_steps[] = {
+ { start_admin1LSP_opal_session, &rw_tbl->key },
+ { read_table_data, rw_tbl },
+ { end_opal_session, }
+ };
+ int ret = 0;
+
+ if (!rw_tbl->size)
+ return ret;
+
+ return execute_steps(dev, read_table_steps,
+ ARRAY_SIZE(read_table_steps));
+}
+
+static int opal_write_table(struct opal_dev *dev,
+ struct opal_read_write_table *rw_tbl)
+{
+ const struct opal_step write_table_steps[] = {
+ { start_admin1LSP_opal_session, &rw_tbl->key },
+ { write_table_data, rw_tbl },
+ { end_opal_session, }
+ };
+ int ret = 0;
+
+ if (!rw_tbl->size)
+ return ret;
+
+ return execute_steps(dev, write_table_steps,
+ ARRAY_SIZE(write_table_steps));
+}
+
+static int opal_generic_read_write_table(struct opal_dev *dev,
+ struct opal_read_write_table *rw_tbl)
+{
+ int ret, bit_set;
+
+ mutex_lock(&dev->dev_lock);
+ setup_opal_dev(dev);
+
+ bit_set = fls64(rw_tbl->flags) - 1;
+ switch (bit_set) {
+ case OPAL_READ_TABLE:
+ ret = opal_read_table(dev, rw_tbl);
+ break;
+ case OPAL_WRITE_TABLE:
+ ret = opal_write_table(dev, rw_tbl);
+ break;
+ default:
+ pr_debug("Invalid bit set in the flag (%016llx).\n",
+ rw_tbl->flags);
+ ret = -EINVAL;
+ break;
+ }
+
+ mutex_unlock(&dev->dev_lock);
+
+ return ret;
+}
+
int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
{
void *p;
@@ -2505,6 +2682,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg)
case IOC_OPAL_PSID_REVERT_TPR:
ret = opal_reverttper(dev, p, true);
break;
+ case IOC_OPAL_GENERIC_TABLE_RW:
+ ret = opal_generic_read_write_table(dev, p);
+ break;
default:
break;
}
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 9803c7e0376e..f4907d941f03 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -235,16 +235,12 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
}
-/**
- * Type 3 does not have a reference tag so no remapping is required.
- */
+/* Type 3 does not have a reference tag so no remapping is required. */
static void t10_pi_type3_prepare(struct request *rq)
{
}
-/**
- * Type 3 does not have a reference tag so no remapping is required.
- */
+/* Type 3 does not have a reference tag so no remapping is required. */
static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
{
}
diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index 76d2ce3a1b5b..d16d893bd195 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -13,7 +13,7 @@
#include <crypto/sha.h>
#include <asm/unaligned.h>
#include <keys/asymmetric-subtype.h>
-#include <keys/trusted.h>
+#include <keys/trusted_tpm.h>
#include <crypto/asym_tpm_subtype.h>
#include <crypto/public_key.h>
@@ -21,10 +21,6 @@
#define TPM_ORD_LOADKEY2 65
#define TPM_ORD_UNBIND 30
#define TPM_ORD_SIGN 60
-#define TPM_LOADKEY2_SIZE 59
-#define TPM_FLUSHSPECIFIC_SIZE 18
-#define TPM_UNBIND_SIZE 63
-#define TPM_SIGN_SIZE 63
#define TPM_RT_KEY 0x00000001
@@ -68,16 +64,13 @@ static int tpm_loadkey2(struct tpm_buf *tb,
return ret;
/* build the request buffer */
- INIT_BUF(tb);
- store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
- store32(tb, TPM_LOADKEY2_SIZE + keybloblen);
- store32(tb, TPM_ORD_LOADKEY2);
- store32(tb, keyhandle);
- storebytes(tb, keyblob, keybloblen);
- store32(tb, authhandle);
- storebytes(tb, nonceodd, TPM_NONCE_SIZE);
- store8(tb, cont);
- storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+ tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_LOADKEY2);
+ tpm_buf_append_u32(tb, keyhandle);
+ tpm_buf_append(tb, keyblob, keybloblen);
+ tpm_buf_append_u32(tb, authhandle);
+ tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
+ tpm_buf_append_u8(tb, cont);
+ tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
if (ret < 0) {
@@ -101,12 +94,9 @@ static int tpm_loadkey2(struct tpm_buf *tb,
*/
static int tpm_flushspecific(struct tpm_buf *tb, uint32_t handle)
{
- INIT_BUF(tb);
- store16(tb, TPM_TAG_RQU_COMMAND);
- store32(tb, TPM_FLUSHSPECIFIC_SIZE);
- store32(tb, TPM_ORD_FLUSHSPECIFIC);
- store32(tb, handle);
- store32(tb, TPM_RT_KEY);
+ tpm_buf_reset(tb, TPM_TAG_RQU_COMMAND, TPM_ORD_FLUSHSPECIFIC);
+ tpm_buf_append_u32(tb, handle);
+ tpm_buf_append_u32(tb, TPM_RT_KEY);
return trusted_tpm_send(tb->data, MAX_BUF_SIZE);
}
@@ -155,17 +145,14 @@ static int tpm_unbind(struct tpm_buf *tb,
return ret;
/* build the request buffer */
- INIT_BUF(tb);
- store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
- store32(tb, TPM_UNBIND_SIZE + bloblen);
- store32(tb, TPM_ORD_UNBIND);
- store32(tb, keyhandle);
- store32(tb, bloblen);
- storebytes(tb, blob, bloblen);
- store32(tb, authhandle);
- storebytes(tb, nonceodd, TPM_NONCE_SIZE);
- store8(tb, cont);
- storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+ tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_UNBIND);
+ tpm_buf_append_u32(tb, keyhandle);
+ tpm_buf_append_u32(tb, bloblen);
+ tpm_buf_append(tb, blob, bloblen);
+ tpm_buf_append_u32(tb, authhandle);
+ tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
+ tpm_buf_append_u8(tb, cont);
+ tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
if (ret < 0) {
@@ -241,17 +228,14 @@ static int tpm_sign(struct tpm_buf *tb,
return ret;
/* build the request buffer */
- INIT_BUF(tb);
- store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
- store32(tb, TPM_SIGN_SIZE + bloblen);
- store32(tb, TPM_ORD_SIGN);
- store32(tb, keyhandle);
- store32(tb, bloblen);
- storebytes(tb, blob, bloblen);
- store32(tb, authhandle);
- storebytes(tb, nonceodd, TPM_NONCE_SIZE);
- store8(tb, cont);
- storebytes(tb, authdata, SHA1_DIGEST_SIZE);
+ tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_SIGN);
+ tpm_buf_append_u32(tb, keyhandle);
+ tpm_buf_append_u32(tb, bloblen);
+ tpm_buf_append(tb, blob, bloblen);
+ tpm_buf_append_u32(tb, authhandle);
+ tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
+ tpm_buf_append_u8(tb, cont);
+ tpm_buf_append(tb, authdata, SHA1_DIGEST_SIZE);
ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
if (ret < 0) {
@@ -519,7 +503,7 @@ static int tpm_key_decrypt(struct tpm_key *tk,
struct kernel_pkey_params *params,
const void *in, void *out)
{
- struct tpm_buf *tb;
+ struct tpm_buf tb;
uint32_t keyhandle;
uint8_t srkauth[SHA1_DIGEST_SIZE];
uint8_t keyauth[SHA1_DIGEST_SIZE];
@@ -533,14 +517,14 @@ static int tpm_key_decrypt(struct tpm_key *tk,
if (strcmp(params->encoding, "pkcs1"))
return -ENOPKG;
- tb = kzalloc(sizeof(*tb), GFP_KERNEL);
- if (!tb)
- return -ENOMEM;
+ r = tpm_buf_init(&tb, 0, 0);
+ if (r)
+ return r;
/* TODO: Handle a non-all zero SRK authorization */
memset(srkauth, 0, sizeof(srkauth));
- r = tpm_loadkey2(tb, SRKHANDLE, srkauth,
+ r = tpm_loadkey2(&tb, SRKHANDLE, srkauth,
tk->blob, tk->blob_len, &keyhandle);
if (r < 0) {
pr_devel("loadkey2 failed (%d)\n", r);
@@ -550,16 +534,16 @@ static int tpm_key_decrypt(struct tpm_key *tk,
/* TODO: Handle a non-all zero key authorization */
memset(keyauth, 0, sizeof(keyauth));
- r = tpm_unbind(tb, keyhandle, keyauth,
+ r = tpm_unbind(&tb, keyhandle, keyauth,
in, params->in_len, out, params->out_len);
if (r < 0)
pr_devel("tpm_unbind failed (%d)\n", r);
- if (tpm_flushspecific(tb, keyhandle) < 0)
+ if (tpm_flushspecific(&tb, keyhandle) < 0)
pr_devel("flushspecific failed (%d)\n", r);
error:
- kzfree(tb);
+ tpm_buf_destroy(&tb);
pr_devel("<==%s() = %d\n", __func__, r);
return r;
}
@@ -643,7 +627,7 @@ static int tpm_key_sign(struct tpm_key *tk,
struct kernel_pkey_params *params,
const void *in, void *out)
{
- struct tpm_buf *tb;
+ struct tpm_buf tb;
uint32_t keyhandle;
uint8_t srkauth[SHA1_DIGEST_SIZE];
uint8_t keyauth[SHA1_DIGEST_SIZE];
@@ -681,15 +665,14 @@ static int tpm_key_sign(struct tpm_key *tk,
goto error_free_asn1_wrapped;
}
- r = -ENOMEM;
- tb = kzalloc(sizeof(*tb), GFP_KERNEL);
- if (!tb)
+ r = tpm_buf_init(&tb, 0, 0);
+ if (r)
goto error_free_asn1_wrapped;
/* TODO: Handle a non-all zero SRK authorization */
memset(srkauth, 0, sizeof(srkauth));
- r = tpm_loadkey2(tb, SRKHANDLE, srkauth,
+ r = tpm_loadkey2(&tb, SRKHANDLE, srkauth,
tk->blob, tk->blob_len, &keyhandle);
if (r < 0) {
pr_devel("loadkey2 failed (%d)\n", r);
@@ -699,15 +682,15 @@ static int tpm_key_sign(struct tpm_key *tk,
/* TODO: Handle a non-all zero key authorization */
memset(keyauth, 0, sizeof(keyauth));
- r = tpm_sign(tb, keyhandle, keyauth, in, in_len, out, params->out_len);
+ r = tpm_sign(&tb, keyhandle, keyauth, in, in_len, out, params->out_len);
if (r < 0)
pr_devel("tpm_sign failed (%d)\n", r);
- if (tpm_flushspecific(tb, keyhandle) < 0)
+ if (tpm_flushspecific(&tb, keyhandle) < 0)
pr_devel("flushspecific failed (%d)\n", r);
error_free_tb:
- kzfree(tb);
+ tpm_buf_destroy(&tb);
error_free_asn1_wrapped:
kfree(asn1_wrapped);
pr_devel("<==%s() = %d\n", __func__, r);
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 3b2525908dd8..a1a858ad4d18 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -905,8 +905,8 @@ void acpi_cppc_processor_exit(struct acpi_processor *pr)
pcc_data[pcc_ss_id]->refcount--;
if (!pcc_data[pcc_ss_id]->refcount) {
pcc_mbox_free_channel(pcc_data[pcc_ss_id]->pcc_channel);
- pcc_data[pcc_ss_id]->pcc_channel_acquired = 0;
kfree(pcc_data[pcc_ss_id]);
+ pcc_data[pcc_ss_id] = NULL;
}
}
}
diff --git a/drivers/acpi/hmat/hmat.c b/drivers/acpi/hmat/hmat.c
index 8f9a28a870b0..8b0de8a3c647 100644
--- a/drivers/acpi/hmat/hmat.c
+++ b/drivers/acpi/hmat/hmat.c
@@ -403,7 +403,7 @@ static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *heade
pr_info("HMAT: Memory Flags:%04x Processor Domain:%d Memory Domain:%d\n",
p->flags, p->processor_PD, p->memory_PD);
- if (p->flags & ACPI_HMAT_MEMORY_PD_VALID) {
+ if (p->flags & ACPI_HMAT_MEMORY_PD_VALID && hmat_revision == 1) {
target = find_mem_target(p->memory_PD);
if (!target) {
pr_debug("HMAT: Memory Domain missing from SRAT\n");
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 1413324982f0..14e68f202f81 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1322,7 +1322,7 @@ static ssize_t scrub_show(struct device *dev,
nfit_device_lock(dev);
nd_desc = dev_get_drvdata(dev);
if (!nd_desc) {
- device_unlock(dev);
+ nfit_device_unlock(dev);
return rc;
}
acpi_desc = to_acpi_desc(nd_desc);
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 08da9c29f1e9..62114a03a51a 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -290,14 +290,13 @@ static int acpi_processor_notifier(struct notifier_block *nb,
unsigned long event, void *data)
{
struct cpufreq_policy *policy = data;
- int cpu = policy->cpu;
if (event == CPUFREQ_CREATE_POLICY) {
- acpi_thermal_cpufreq_init(cpu);
- acpi_processor_ppc_init(cpu);
+ acpi_thermal_cpufreq_init(policy);
+ acpi_processor_ppc_init(policy);
} else if (event == CPUFREQ_REMOVE_POLICY) {
- acpi_processor_ppc_exit(cpu);
- acpi_thermal_cpufreq_exit(cpu);
+ acpi_processor_ppc_exit(policy);
+ acpi_thermal_cpufreq_exit(policy);
}
return 0;
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 2261713d1aec..5909e8fa4013 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -81,10 +81,10 @@ static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
pr->performance_platform_limit = (int)ppc;
if (ppc >= pr->performance->state_count ||
- unlikely(!dev_pm_qos_request_active(&pr->perflib_req)))
+ unlikely(!freq_qos_request_active(&pr->perflib_req)))
return 0;
- ret = dev_pm_qos_update_request(&pr->perflib_req,
+ ret = freq_qos_update_request(&pr->perflib_req,
pr->performance->states[ppc].core_frequency * 1000);
if (ret < 0) {
pr_warn("Failed to update perflib freq constraint: CPU%d (%d)\n",
@@ -157,26 +157,36 @@ void acpi_processor_ignore_ppc_init(void)
ignore_ppc = 0;
}
-void acpi_processor_ppc_init(int cpu)
+void acpi_processor_ppc_init(struct cpufreq_policy *policy)
{
- struct acpi_processor *pr = per_cpu(processors, cpu);
- int ret;
+ unsigned int cpu;
- ret = dev_pm_qos_add_request(get_cpu_device(cpu),
- &pr->perflib_req, DEV_PM_QOS_MAX_FREQUENCY,
- INT_MAX);
- if (ret < 0) {
- pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu,
- ret);
- return;
+ for_each_cpu(cpu, policy->related_cpus) {
+ struct acpi_processor *pr = per_cpu(processors, cpu);
+ int ret;
+
+ if (!pr)
+ continue;
+
+ ret = freq_qos_add_request(&policy->constraints,
+ &pr->perflib_req,
+ FREQ_QOS_MAX, INT_MAX);
+ if (ret < 0)
+ pr_err("Failed to add freq constraint for CPU%d (%d)\n",
+ cpu, ret);
}
}
-void acpi_processor_ppc_exit(int cpu)
+void acpi_processor_ppc_exit(struct cpufreq_policy *policy)
{
- struct acpi_processor *pr = per_cpu(processors, cpu);
+ unsigned int cpu;
- dev_pm_qos_remove_request(&pr->perflib_req);
+ for_each_cpu(cpu, policy->related_cpus) {
+ struct acpi_processor *pr = per_cpu(processors, cpu);
+
+ if (pr)
+ freq_qos_remove_request(&pr->perflib_req);
+ }
}
static int acpi_processor_get_performance_control(struct acpi_processor *pr)
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index ec2638f1df4f..41feb88ee92d 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -105,7 +105,7 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
pr = per_cpu(processors, i);
- if (unlikely(!dev_pm_qos_request_active(&pr->thermal_req)))
+ if (unlikely(!freq_qos_request_active(&pr->thermal_req)))
continue;
policy = cpufreq_cpu_get(i);
@@ -116,7 +116,7 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
cpufreq_cpu_put(policy);
- ret = dev_pm_qos_update_request(&pr->thermal_req, max_freq);
+ ret = freq_qos_update_request(&pr->thermal_req, max_freq);
if (ret < 0) {
pr_warn("Failed to update thermal freq constraint: CPU%d (%d)\n",
pr->id, ret);
@@ -125,26 +125,36 @@ static int cpufreq_set_cur_state(unsigned int cpu, int state)
return 0;
}
-void acpi_thermal_cpufreq_init(int cpu)
+void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy)
{
- struct acpi_processor *pr = per_cpu(processors, cpu);
- int ret;
-
- ret = dev_pm_qos_add_request(get_cpu_device(cpu),
- &pr->thermal_req, DEV_PM_QOS_MAX_FREQUENCY,
- INT_MAX);
- if (ret < 0) {
- pr_err("Failed to add freq constraint for CPU%d (%d)\n", cpu,
- ret);
- return;
+ unsigned int cpu;
+
+ for_each_cpu(cpu, policy->related_cpus) {
+ struct acpi_processor *pr = per_cpu(processors, cpu);
+ int ret;
+
+ if (!pr)
+ continue;
+
+ ret = freq_qos_add_request(&policy->constraints,
+ &pr->thermal_req,
+ FREQ_QOS_MAX, INT_MAX);
+ if (ret < 0)
+ pr_err("Failed to add freq constraint for CPU%d (%d)\n",
+ cpu, ret);
}
}
-void acpi_thermal_cpufreq_exit(int cpu)
+void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
{
- struct acpi_processor *pr = per_cpu(processors, cpu);
+ unsigned int cpu;
+
+ for_each_cpu(cpu, policy->related_cpus) {
+ struct acpi_processor *pr = per_cpu(processors, policy->cpu);
- dev_pm_qos_remove_request(&pr->thermal_req);
+ if (pr)
+ freq_qos_remove_request(&pr->thermal_req);
+ }
}
#else /* ! CONFIG_CPU_FREQ */
static int cpufreq_get_max_state(unsigned int cpu)
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 9fa77d72ef27..2af937a8b1c5 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -362,19 +362,6 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = {
},
},
/*
- * https://bugzilla.kernel.org/show_bug.cgi?id=196907
- * Some Dell XPS13 9360 cannot do suspend-to-idle using the Low Power
- * S0 Idle firmware interface.
- */
- {
- .callback = init_default_s3,
- .ident = "Dell XPS13 9360",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"),
- },
- },
- /*
* ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using
* the Low Power S0 Idle firmware interface (see
* https://bugzilla.kernel.org/show_bug.cgi?id=199057).
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index f39f075abff9..fe1523664816 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -409,9 +409,11 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
*/
rstc = of_reset_control_array_get_optional_shared(dev->dev.of_node);
if (IS_ERR(rstc)) {
- if (PTR_ERR(rstc) != -EPROBE_DEFER)
- dev_err(&dev->dev, "Can't get amba reset!\n");
- return PTR_ERR(rstc);
+ ret = PTR_ERR(rstc);
+ if (ret != -EPROBE_DEFER)
+ dev_err(&dev->dev, "can't get reset: %d\n",
+ ret);
+ goto err_reset;
}
reset_control_deassert(rstc);
reset_control_put(rstc);
@@ -472,6 +474,12 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent)
release_resource(&dev->res);
err_out:
return ret;
+
+ err_reset:
+ amba_put_disable_pclk(dev);
+ iounmap(tmp);
+ dev_pm_domain_detach(&dev->dev, true);
+ goto err_release;
}
/*
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index c0a491277aca..265d9dd46a5e 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -57,6 +57,7 @@
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/seq_file.h>
+#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/pid_namespace.h>
#include <linux/security.h>
@@ -66,6 +67,7 @@
#include <linux/task_work.h>
#include <uapi/linux/android/binder.h>
+#include <uapi/linux/android/binderfs.h>
#include <asm/cacheflush.h>
@@ -95,10 +97,6 @@ DEFINE_SHOW_ATTRIBUTE(proc);
#define SZ_1K 0x400
#endif
-#ifndef SZ_4M
-#define SZ_4M 0x400000
-#endif
-
#define FORBIDDEN_MMAP_FLAGS (VM_WRITE)
enum {
@@ -2876,7 +2874,7 @@ static void binder_transaction(struct binder_proc *proc,
e->target_handle = tr->target.handle;
e->data_size = tr->data_size;
e->offsets_size = tr->offsets_size;
- e->context_name = proc->context->name;
+ strscpy(e->context_name, proc->context->name, BINDERFS_MAX_NAME);
if (reply) {
binder_inner_proc_lock(proc);
@@ -5175,9 +5173,6 @@ static int binder_mmap(struct file *filp, struct vm_area_struct *vma)
if (proc->tsk != current->group_leader)
return -EINVAL;
- if ((vma->vm_end - vma->vm_start) > SZ_4M)
- vma->vm_end = vma->vm_start + SZ_4M;
-
binder_debug(BINDER_DEBUG_OPEN_CLOSE,
"%s: %d %lx-%lx (%ld K) vma %lx pagep %lx\n",
__func__, proc->pid, vma->vm_start, vma->vm_end,
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 6d79a1b0d446..eb76a823fbb2 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -22,6 +22,7 @@
#include <asm/cacheflush.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
+#include <linux/sizes.h>
#include "binder_alloc.h"
#include "binder_trace.h"
@@ -156,7 +157,7 @@ static struct binder_buffer *binder_alloc_prepare_to_free_locked(
}
/**
- * binder_alloc_buffer_lookup() - get buffer given user ptr
+ * binder_alloc_prepare_to_free() - get buffer given user ptr
* @alloc: binder_alloc for this proc
* @user_ptr: User pointer to buffer data
*
@@ -689,7 +690,9 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
alloc->buffer = (void __user *)vma->vm_start;
mutex_unlock(&binder_alloc_mmap_lock);
- alloc->pages = kcalloc((vma->vm_end - vma->vm_start) / PAGE_SIZE,
+ alloc->buffer_size = min_t(unsigned long, vma->vm_end - vma->vm_start,
+ SZ_4M);
+ alloc->pages = kcalloc(alloc->buffer_size / PAGE_SIZE,
sizeof(alloc->pages[0]),
GFP_KERNEL);
if (alloc->pages == NULL) {
@@ -697,7 +700,6 @@ int binder_alloc_mmap_handler(struct binder_alloc *alloc,
failure_string = "alloc page array";
goto err_alloc_pages_failed;
}
- alloc->buffer_size = vma->vm_end - vma->vm_start;
buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
if (!buffer) {
diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
index bd47f7f72075..ae991097d14d 100644
--- a/drivers/android/binder_internal.h
+++ b/drivers/android/binder_internal.h
@@ -130,7 +130,7 @@ struct binder_transaction_log_entry {
int return_error_line;
uint32_t return_error;
uint32_t return_error_param;
- const char *context_name;
+ char context_name[BINDERFS_MAX_NAME + 1];
};
struct binder_transaction_log {
diff --git a/drivers/ata/acard-ahci.c b/drivers/ata/acard-ahci.c
index 753985c01517..46dc54d18f0b 100644
--- a/drivers/ata/acard-ahci.c
+++ b/drivers/ata/acard-ahci.c
@@ -56,7 +56,7 @@ struct acard_sg {
__le32 size; /* bit 31 (EOT) max==0x10000 (64k) */
};
-static void acard_ahci_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors acard_ahci_qc_prep(struct ata_queued_cmd *qc);
static bool acard_ahci_qc_fill_rtf(struct ata_queued_cmd *qc);
static int acard_ahci_port_start(struct ata_port *ap);
static int acard_ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
@@ -210,7 +210,7 @@ static unsigned int acard_ahci_fill_sg(struct ata_queued_cmd *qc, void *cmd_tbl)
return si;
}
-static void acard_ahci_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors acard_ahci_qc_prep(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
struct ahci_port_priv *pp = ap->private_data;
@@ -248,6 +248,8 @@ static void acard_ahci_qc_prep(struct ata_queued_cmd *qc)
opts |= AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH;
ahci_fill_cmd_slot(pp, qc->hw_tag, opts);
+
+ return AC_ERR_OK;
}
static bool acard_ahci_qc_fill_rtf(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index dd92faf197d5..ec6c64fce74a 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -56,6 +56,7 @@ enum board_ids {
board_ahci_yes_fbs,
/* board IDs for specific chipsets in alphabetical order */
+ board_ahci_al,
board_ahci_avn,
board_ahci_mcp65,
board_ahci_mcp77,
@@ -167,6 +168,13 @@ static const struct ata_port_info ahci_port_info[] = {
.port_ops = &ahci_ops,
},
/* by chipsets */
+ [board_ahci_al] = {
+ AHCI_HFLAGS (AHCI_HFLAG_NO_PMP | AHCI_HFLAG_NO_MSI),
+ .flags = AHCI_FLAG_COMMON,
+ .pio_mask = ATA_PIO4,
+ .udma_mask = ATA_UDMA6,
+ .port_ops = &ahci_ops,
+ },
[board_ahci_avn] = {
.flags = AHCI_FLAG_COMMON,
.pio_mask = ATA_PIO4,
@@ -415,6 +423,11 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(ATI, 0x4394), board_ahci_sb700 }, /* ATI SB700/800 */
{ PCI_VDEVICE(ATI, 0x4395), board_ahci_sb700 }, /* ATI SB700/800 */
+ /* Amazon's Annapurna Labs support */
+ { PCI_DEVICE(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031),
+ .class = PCI_CLASS_STORAGE_SATA_AHCI,
+ .class_mask = 0xffffff,
+ board_ahci_al },
/* AMD */
{ PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */
{ PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */
@@ -1600,7 +1613,9 @@ static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hp
*/
if (!id || id->vendor != PCI_VENDOR_ID_INTEL)
return;
- if (((enum board_ids) id->driver_data) < board_ahci_pcs7)
+
+ /* Skip applying the quirk on Denverton and beyond */
+ if (((enum board_ids) id->driver_data) >= board_ahci_pcs7)
return;
/*
diff --git a/drivers/ata/ahci_tegra.c b/drivers/ata/ahci_tegra.c
index e3163dae5e85..cb55ebc1725b 100644
--- a/drivers/ata/ahci_tegra.c
+++ b/drivers/ata/ahci_tegra.c
@@ -483,7 +483,6 @@ static int tegra_ahci_probe(struct platform_device *pdev)
struct tegra_ahci_priv *tegra;
struct resource *res;
int ret;
- unsigned int i;
hpriv = ahci_platform_get_resources(pdev, 0);
if (IS_ERR(hpriv))
@@ -543,8 +542,9 @@ static int tegra_ahci_probe(struct platform_device *pdev)
if (!tegra->supplies)
return -ENOMEM;
- for (i = 0; i < tegra->soc->num_supplies; i++)
- tegra->supplies[i].supply = tegra->soc->supply_names[i];
+ regulator_bulk_set_supply_names(tegra->supplies,
+ tegra->soc->supply_names,
+ tegra->soc->num_supplies);
ret = devm_regulator_bulk_get(&pdev->dev,
tegra->soc->num_supplies,
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index e4da725381d3..3ca7720e7d8f 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -841,6 +841,12 @@ static int piix_broken_suspend(void)
},
},
{
+ .ident = "TECRA M3",
+ .matches = {
+ DMI_MATCH(DMI_OEM_STRING, "Tecra M3,"),
+ },
+ },
+ {
.ident = "TECRA M4",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
@@ -955,18 +961,10 @@ static int piix_broken_suspend(void)
{ } /* terminate list */
};
- static const char *oemstrs[] = {
- "Tecra M3,",
- };
- int i;
if (dmi_check_system(sysids))
return 1;
- for (i = 0; i < ARRAY_SIZE(oemstrs); i++)
- if (dmi_find_device(DMI_DEV_TYPE_OEM_STRING, oemstrs[i], NULL))
- return 1;
-
/* TECRA M4 sometimes forgets its identify and reports bogus
* DMI information. As the bogus information is a bit
* generic, match as many entries as possible. This manual
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index bff369d9a1a7..ea5bf5f4cbed 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -57,7 +57,7 @@ static int ahci_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
static bool ahci_qc_fill_rtf(struct ata_queued_cmd *qc);
static int ahci_port_start(struct ata_port *ap);
static void ahci_port_stop(struct ata_port *ap);
-static void ahci_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors ahci_qc_prep(struct ata_queued_cmd *qc);
static int ahci_pmp_qc_defer(struct ata_queued_cmd *qc);
static void ahci_freeze(struct ata_port *ap);
static void ahci_thaw(struct ata_port *ap);
@@ -1624,7 +1624,7 @@ static int ahci_pmp_qc_defer(struct ata_queued_cmd *qc)
return sata_pmp_qc_defer_cmd_switch(qc);
}
-static void ahci_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors ahci_qc_prep(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
struct ahci_port_priv *pp = ap->private_data;
@@ -1660,6 +1660,8 @@ static void ahci_qc_prep(struct ata_queued_cmd *qc)
opts |= AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH;
ahci_fill_cmd_slot(pp, qc->hw_tag, opts);
+
+ return AC_ERR_OK;
}
static void ahci_fbs_dec_intr(struct ata_port *ap)
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index e742780950de..8befce036af8 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -153,17 +153,13 @@ int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv)
{
int rc, i;
- if (hpriv->ahci_regulator) {
- rc = regulator_enable(hpriv->ahci_regulator);
- if (rc)
- return rc;
- }
+ rc = regulator_enable(hpriv->ahci_regulator);
+ if (rc)
+ return rc;
- if (hpriv->phy_regulator) {
- rc = regulator_enable(hpriv->phy_regulator);
- if (rc)
- goto disable_ahci_pwrs;
- }
+ rc = regulator_enable(hpriv->phy_regulator);
+ if (rc)
+ goto disable_ahci_pwrs;
for (i = 0; i < hpriv->nports; i++) {
if (!hpriv->target_pwrs[i])
@@ -181,11 +177,9 @@ disable_target_pwrs:
if (hpriv->target_pwrs[i])
regulator_disable(hpriv->target_pwrs[i]);
- if (hpriv->phy_regulator)
- regulator_disable(hpriv->phy_regulator);
+ regulator_disable(hpriv->phy_regulator);
disable_ahci_pwrs:
- if (hpriv->ahci_regulator)
- regulator_disable(hpriv->ahci_regulator);
+ regulator_disable(hpriv->ahci_regulator);
return rc;
}
EXPORT_SYMBOL_GPL(ahci_platform_enable_regulators);
@@ -207,10 +201,8 @@ void ahci_platform_disable_regulators(struct ahci_host_priv *hpriv)
regulator_disable(hpriv->target_pwrs[i]);
}
- if (hpriv->ahci_regulator)
- regulator_disable(hpriv->ahci_regulator);
- if (hpriv->phy_regulator)
- regulator_disable(hpriv->phy_regulator);
+ regulator_disable(hpriv->ahci_regulator);
+ regulator_disable(hpriv->phy_regulator);
}
EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators);
/**
@@ -359,7 +351,7 @@ static int ahci_platform_get_regulator(struct ahci_host_priv *hpriv, u32 port,
struct regulator *target_pwr;
int rc = 0;
- target_pwr = regulator_get_optional(dev, "target");
+ target_pwr = regulator_get(dev, "target");
if (!IS_ERR(target_pwr))
hpriv->target_pwrs[port] = target_pwr;
@@ -436,16 +428,14 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev,
hpriv->clks[i] = clk;
}
- hpriv->ahci_regulator = devm_regulator_get_optional(dev, "ahci");
+ hpriv->ahci_regulator = devm_regulator_get(dev, "ahci");
if (IS_ERR(hpriv->ahci_regulator)) {
rc = PTR_ERR(hpriv->ahci_regulator);
- if (rc == -EPROBE_DEFER)
+ if (rc != 0)
goto err_out;
- rc = 0;
- hpriv->ahci_regulator = NULL;
}
- hpriv->phy_regulator = devm_regulator_get_optional(dev, "phy");
+ hpriv->phy_regulator = devm_regulator_get(dev, "phy");
if (IS_ERR(hpriv->phy_regulator)) {
rc = PTR_ERR(hpriv->phy_regulator);
if (rc == -EPROBE_DEFER)
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 28c492be0a57..e9017c570bc5 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4980,7 +4980,10 @@ int ata_std_qc_defer(struct ata_queued_cmd *qc)
return ATA_DEFER_LINK;
}
-void ata_noop_qc_prep(struct ata_queued_cmd *qc) { }
+enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc)
+{
+ return AC_ERR_OK;
+}
/**
* ata_sg_init - Associate command with scatter-gather table.
@@ -5443,7 +5446,9 @@ void ata_qc_issue(struct ata_queued_cmd *qc)
return;
}
- ap->ops->qc_prep(qc);
+ qc->err_mask |= ap->ops->qc_prep(qc);
+ if (unlikely(qc->err_mask))
+ goto err;
trace_ata_qc_issue(qc);
qc->err_mask |= ap->ops->qc_issue(qc);
if (unlikely(qc->err_mask))
@@ -6708,6 +6713,9 @@ void ata_host_detach(struct ata_host *host)
{
int i;
+ /* Ensure ata_port probe has completed */
+ async_synchronize_full();
+
for (i = 0; i < host->n_ports; i++)
ata_port_detach(host->ports[i]);
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 76d0f9de767b..58e09ffe8b9c 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -4791,27 +4791,6 @@ void ata_scsi_hotplug(struct work_struct *work)
return;
}
- /*
- * XXX - UGLY HACK
- *
- * The block layer suspend/resume path is fundamentally broken due
- * to freezable kthreads and workqueue and may deadlock if a block
- * device gets removed while resume is in progress. I don't know
- * what the solution is short of removing freezable kthreads and
- * workqueues altogether.
- *
- * The following is an ugly hack to avoid kicking off device
- * removal while freezer is active. This is a joke but does avoid
- * this particular deadlock scenario.
- *
- * https://bugzilla.kernel.org/show_bug.cgi?id=62801
- * http://marc.info/?l=linux-kernel&m=138695698516487
- */
-#ifdef CONFIG_FREEZER
- while (pm_freezing)
- msleep(10);
-#endif
-
DPRINTK("ENTER\n");
mutex_lock(&ap->scsi_scan_mutex);
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 4ed682da52ae..038db94216a9 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2679,12 +2679,14 @@ static void ata_bmdma_fill_sg_dumb(struct ata_queued_cmd *qc)
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-void ata_bmdma_qc_prep(struct ata_queued_cmd *qc)
+enum ata_completion_errors ata_bmdma_qc_prep(struct ata_queued_cmd *qc)
{
if (!(qc->flags & ATA_QCFLAG_DMAMAP))
- return;
+ return AC_ERR_OK;
ata_bmdma_fill_sg(qc);
+
+ return AC_ERR_OK;
}
EXPORT_SYMBOL_GPL(ata_bmdma_qc_prep);
@@ -2697,12 +2699,14 @@ EXPORT_SYMBOL_GPL(ata_bmdma_qc_prep);
* LOCKING:
* spin_lock_irqsave(host lock)
*/
-void ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc)
+enum ata_completion_errors ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc)
{
if (!(qc->flags & ATA_QCFLAG_DMAMAP))
- return;
+ return AC_ERR_OK;
ata_bmdma_fill_sg_dumb(qc);
+
+ return AC_ERR_OK;
}
EXPORT_SYMBOL_GPL(ata_bmdma_dumb_qc_prep);
diff --git a/drivers/ata/pata_artop.c b/drivers/ata/pata_artop.c
index 3aa006c5ed0c..6bd2228bb6ff 100644
--- a/drivers/ata/pata_artop.c
+++ b/drivers/ata/pata_artop.c
@@ -100,7 +100,7 @@ static void artop6210_load_piomode(struct ata_port *ap, struct ata_device *adev,
{
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
int dn = adev->devno + 2 * ap->port_no;
- const u16 timing[2][5] = {
+ static const u16 timing[2][5] = {
{ 0x0000, 0x000A, 0x0008, 0x0303, 0x0301 },
{ 0x0700, 0x070A, 0x0708, 0x0403, 0x0401 }
@@ -154,7 +154,7 @@ static void artop6260_load_piomode (struct ata_port *ap, struct ata_device *adev
{
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
int dn = adev->devno + 2 * ap->port_no;
- const u8 timing[2][5] = {
+ static const u8 timing[2][5] = {
{ 0x00, 0x0A, 0x08, 0x33, 0x31 },
{ 0x70, 0x7A, 0x78, 0x43, 0x41 }
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 57f2ec71cfc3..1bfd0154dad5 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -510,7 +510,7 @@ static int pata_macio_cable_detect(struct ata_port *ap)
return ATA_CBL_PATA40;
}
-static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc)
{
unsigned int write = (qc->tf.flags & ATA_TFLAG_WRITE);
struct ata_port *ap = qc->ap;
@@ -523,7 +523,7 @@ static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
__func__, qc, qc->flags, write, qc->dev->devno);
if (!(qc->flags & ATA_QCFLAG_DMAMAP))
- return;
+ return AC_ERR_OK;
table = (struct dbdma_cmd *) priv->dma_table_cpu;
@@ -568,6 +568,8 @@ static void pata_macio_qc_prep(struct ata_queued_cmd *qc)
table->command = cpu_to_le16(DBDMA_STOP);
dev_dbgdma(priv->dev, "%s: %d DMA list entries\n", __func__, pi);
+
+ return AC_ERR_OK;
}
diff --git a/drivers/ata/pata_pxa.c b/drivers/ata/pata_pxa.c
index 4afcb8e63e21..41430f79663c 100644
--- a/drivers/ata/pata_pxa.c
+++ b/drivers/ata/pata_pxa.c
@@ -44,25 +44,27 @@ static void pxa_ata_dma_irq(void *d)
/*
* Prepare taskfile for submission.
*/
-static void pxa_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors pxa_qc_prep(struct ata_queued_cmd *qc)
{
struct pata_pxa_data *pd = qc->ap->private_data;
struct dma_async_tx_descriptor *tx;
enum dma_transfer_direction dir;
if (!(qc->flags & ATA_QCFLAG_DMAMAP))
- return;
+ return AC_ERR_OK;
dir = (qc->dma_dir == DMA_TO_DEVICE ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM);
tx = dmaengine_prep_slave_sg(pd->dma_chan, qc->sg, qc->n_elem, dir,
DMA_PREP_INTERRUPT);
if (!tx) {
ata_dev_err(qc->dev, "prep_slave_sg() failed\n");
- return;
+ return AC_ERR_OK;
}
tx->callback = pxa_ata_dma_irq;
tx->callback_param = pd;
pd->dma_cookie = dmaengine_submit(tx);
+
+ return AC_ERR_OK;
}
/*
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index cb490531b62e..5db55e1e2a61 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -116,7 +116,7 @@ static int adma_ata_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent);
static int adma_port_start(struct ata_port *ap);
static void adma_port_stop(struct ata_port *ap);
-static void adma_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors adma_qc_prep(struct ata_queued_cmd *qc);
static unsigned int adma_qc_issue(struct ata_queued_cmd *qc);
static int adma_check_atapi_dma(struct ata_queued_cmd *qc);
static void adma_freeze(struct ata_port *ap);
@@ -295,7 +295,7 @@ static int adma_fill_sg(struct ata_queued_cmd *qc)
return i;
}
-static void adma_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors adma_qc_prep(struct ata_queued_cmd *qc)
{
struct adma_port_priv *pp = qc->ap->private_data;
u8 *buf = pp->pkt;
@@ -306,7 +306,7 @@ static void adma_qc_prep(struct ata_queued_cmd *qc)
adma_enter_reg_mode(qc->ap);
if (qc->tf.protocol != ATA_PROT_DMA)
- return;
+ return AC_ERR_OK;
buf[i++] = 0; /* Response flags */
buf[i++] = 0; /* reserved */
@@ -371,6 +371,7 @@ static void adma_qc_prep(struct ata_queued_cmd *qc)
printk("%s\n", obuf);
}
#endif
+ return AC_ERR_OK;
}
static inline void adma_packet_start(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 8e9cb198fcd1..9239615d8a04 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -502,7 +502,7 @@ static unsigned int sata_fsl_fill_sg(struct ata_queued_cmd *qc, void *cmd_desc,
return num_prde;
}
-static void sata_fsl_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors sata_fsl_qc_prep(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
struct sata_fsl_port_priv *pp = ap->private_data;
@@ -548,6 +548,8 @@ static void sata_fsl_qc_prep(struct ata_queued_cmd *qc)
VPRINTK("SATA FSL : xx_qc_prep, di = 0x%x, ttl = %d, num_prde = %d\n",
desc_info, ttl_dwords, num_prde);
+
+ return AC_ERR_OK;
}
static unsigned int sata_fsl_qc_issue(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index 7f99e23bff88..a6b76cc12a66 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -478,7 +478,7 @@ static void inic_fill_sg(struct inic_prd *prd, struct ata_queued_cmd *qc)
prd[-1].flags |= PRD_END;
}
-static void inic_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors inic_qc_prep(struct ata_queued_cmd *qc)
{
struct inic_port_priv *pp = qc->ap->private_data;
struct inic_pkt *pkt = pp->pkt;
@@ -538,6 +538,8 @@ static void inic_qc_prep(struct ata_queued_cmd *qc)
inic_fill_sg(prd, qc);
pp->cpb_tbl[0] = pp->pkt_dma;
+
+ return AC_ERR_OK;
}
static unsigned int inic_qc_issue(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index ad385a113391..277f11909fc1 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -592,8 +592,8 @@ static int mv5_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val)
static int mv_port_start(struct ata_port *ap);
static void mv_port_stop(struct ata_port *ap);
static int mv_qc_defer(struct ata_queued_cmd *qc);
-static void mv_qc_prep(struct ata_queued_cmd *qc);
-static void mv_qc_prep_iie(struct ata_queued_cmd *qc);
+static enum ata_completion_errors mv_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors mv_qc_prep_iie(struct ata_queued_cmd *qc);
static unsigned int mv_qc_issue(struct ata_queued_cmd *qc);
static int mv_hardreset(struct ata_link *link, unsigned int *class,
unsigned long deadline);
@@ -2031,7 +2031,7 @@ static void mv_rw_multi_errata_sata24(struct ata_queued_cmd *qc)
* LOCKING:
* Inherited from caller.
*/
-static void mv_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors mv_qc_prep(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
struct mv_port_priv *pp = ap->private_data;
@@ -2043,15 +2043,15 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
switch (tf->protocol) {
case ATA_PROT_DMA:
if (tf->command == ATA_CMD_DSM)
- return;
+ return AC_ERR_OK;
/* fall-thru */
case ATA_PROT_NCQ:
break; /* continue below */
case ATA_PROT_PIO:
mv_rw_multi_errata_sata24(qc);
- return;
+ return AC_ERR_OK;
default:
- return;
+ return AC_ERR_OK;
}
/* Fill in command request block
@@ -2098,12 +2098,10 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
* non-NCQ mode are: [RW] STREAM DMA and W DMA FUA EXT, none
* of which are defined/used by Linux. If we get here, this
* driver needs work.
- *
- * FIXME: modify libata to give qc_prep a return value and
- * return error here.
*/
- BUG_ON(tf->command);
- break;
+ ata_port_err(ap, "%s: unsupported command: %.2x\n", __func__,
+ tf->command);
+ return AC_ERR_INVALID;
}
mv_crqb_pack_cmd(cw++, tf->nsect, ATA_REG_NSECT, 0);
mv_crqb_pack_cmd(cw++, tf->hob_lbal, ATA_REG_LBAL, 0);
@@ -2116,8 +2114,10 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
mv_crqb_pack_cmd(cw++, tf->command, ATA_REG_CMD, 1); /* last */
if (!(qc->flags & ATA_QCFLAG_DMAMAP))
- return;
+ return AC_ERR_OK;
mv_fill_sg(qc);
+
+ return AC_ERR_OK;
}
/**
@@ -2132,7 +2132,7 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
* LOCKING:
* Inherited from caller.
*/
-static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
+static enum ata_completion_errors mv_qc_prep_iie(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
struct mv_port_priv *pp = ap->private_data;
@@ -2143,9 +2143,9 @@ static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
if ((tf->protocol != ATA_PROT_DMA) &&
(tf->protocol != ATA_PROT_NCQ))
- return;
+ return AC_ERR_OK;
if (tf->command == ATA_CMD_DSM)
- return; /* use bmdma for this */
+ return AC_ERR_OK; /* use bmdma for this */
/* Fill in Gen IIE command request block */
if (!(tf->flags & ATA_TFLAG_WRITE))
@@ -2186,8 +2186,10 @@ static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
);
if (!(qc->flags & ATA_QCFLAG_DMAMAP))
- return;
+ return AC_ERR_OK;
mv_fill_sg(qc);
+
+ return AC_ERR_OK;
}
/**
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 56946012d113..65ec8dff1c51 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -297,7 +297,7 @@ static void nv_ck804_freeze(struct ata_port *ap);
static void nv_ck804_thaw(struct ata_port *ap);
static int nv_adma_slave_config(struct scsi_device *sdev);
static int nv_adma_check_atapi_dma(struct ata_queued_cmd *qc);
-static void nv_adma_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors nv_adma_qc_prep(struct ata_queued_cmd *qc);
static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc);
static irqreturn_t nv_adma_interrupt(int irq, void *dev_instance);
static void nv_adma_irq_clear(struct ata_port *ap);
@@ -319,7 +319,7 @@ static void nv_mcp55_freeze(struct ata_port *ap);
static void nv_swncq_error_handler(struct ata_port *ap);
static int nv_swncq_slave_config(struct scsi_device *sdev);
static int nv_swncq_port_start(struct ata_port *ap);
-static void nv_swncq_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors nv_swncq_qc_prep(struct ata_queued_cmd *qc);
static void nv_swncq_fill_sg(struct ata_queued_cmd *qc);
static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc);
static void nv_swncq_irq_clear(struct ata_port *ap, u16 fis);
@@ -1344,7 +1344,7 @@ static int nv_adma_use_reg_mode(struct ata_queued_cmd *qc)
return 1;
}
-static void nv_adma_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors nv_adma_qc_prep(struct ata_queued_cmd *qc)
{
struct nv_adma_port_priv *pp = qc->ap->private_data;
struct nv_adma_cpb *cpb = &pp->cpb[qc->hw_tag];
@@ -1356,7 +1356,7 @@ static void nv_adma_qc_prep(struct ata_queued_cmd *qc)
(qc->flags & ATA_QCFLAG_DMAMAP));
nv_adma_register_mode(qc->ap);
ata_bmdma_qc_prep(qc);
- return;
+ return AC_ERR_OK;
}
cpb->resp_flags = NV_CPB_RESP_DONE;
@@ -1388,6 +1388,8 @@ static void nv_adma_qc_prep(struct ata_queued_cmd *qc)
cpb->ctl_flags = ctl_flags;
wmb();
cpb->resp_flags = 0;
+
+ return AC_ERR_OK;
}
static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc)
@@ -1950,17 +1952,19 @@ static int nv_swncq_port_start(struct ata_port *ap)
return 0;
}
-static void nv_swncq_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors nv_swncq_qc_prep(struct ata_queued_cmd *qc)
{
if (qc->tf.protocol != ATA_PROT_NCQ) {
ata_bmdma_qc_prep(qc);
- return;
+ return AC_ERR_OK;
}
if (!(qc->flags & ATA_QCFLAG_DMAMAP))
- return;
+ return AC_ERR_OK;
nv_swncq_fill_sg(qc);
+
+ return AC_ERR_OK;
}
static void nv_swncq_fill_sg(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 5fd464765ddc..c451d7d1c817 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -139,7 +139,7 @@ static int pdc_sata_scr_write(struct ata_link *link, unsigned int sc_reg, u32 va
static int pdc_ata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
static int pdc_common_port_start(struct ata_port *ap);
static int pdc_sata_port_start(struct ata_port *ap);
-static void pdc_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors pdc_qc_prep(struct ata_queued_cmd *qc);
static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
static int pdc_check_atapi_dma(struct ata_queued_cmd *qc);
@@ -633,7 +633,7 @@ static void pdc_fill_sg(struct ata_queued_cmd *qc)
prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
}
-static void pdc_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors pdc_qc_prep(struct ata_queued_cmd *qc)
{
struct pdc_port_priv *pp = qc->ap->private_data;
unsigned int i;
@@ -665,6 +665,8 @@ static void pdc_qc_prep(struct ata_queued_cmd *qc)
default:
break;
}
+
+ return AC_ERR_OK;
}
static int pdc_is_sataii_tx4(unsigned long flags)
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index c53c5a47204d..ef00ab644afb 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -100,7 +100,7 @@ static int qs_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
static int qs_ata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent);
static int qs_port_start(struct ata_port *ap);
static void qs_host_stop(struct ata_host *host);
-static void qs_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors qs_qc_prep(struct ata_queued_cmd *qc);
static unsigned int qs_qc_issue(struct ata_queued_cmd *qc);
static int qs_check_atapi_dma(struct ata_queued_cmd *qc);
static void qs_freeze(struct ata_port *ap);
@@ -260,7 +260,7 @@ static unsigned int qs_fill_sg(struct ata_queued_cmd *qc)
return si;
}
-static void qs_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors qs_qc_prep(struct ata_queued_cmd *qc)
{
struct qs_port_priv *pp = qc->ap->private_data;
u8 dflags = QS_DF_PORD, *buf = pp->pkt;
@@ -272,7 +272,7 @@ static void qs_qc_prep(struct ata_queued_cmd *qc)
qs_enter_reg_mode(qc->ap);
if (qc->tf.protocol != ATA_PROT_DMA)
- return;
+ return AC_ERR_OK;
nelem = qs_fill_sg(qc);
@@ -295,6 +295,8 @@ static void qs_qc_prep(struct ata_queued_cmd *qc)
/* frame information structure (FIS) */
ata_tf_to_fis(&qc->tf, 0, 1, &buf[32]);
+
+ return AC_ERR_OK;
}
static inline void qs_packet_start(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_rcar.c b/drivers/ata/sata_rcar.c
index 3495e1733a8e..980aacdbcf3b 100644
--- a/drivers/ata/sata_rcar.c
+++ b/drivers/ata/sata_rcar.c
@@ -550,12 +550,14 @@ static void sata_rcar_bmdma_fill_sg(struct ata_queued_cmd *qc)
prd[si - 1].addr |= cpu_to_le32(SATA_RCAR_DTEND);
}
-static void sata_rcar_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors sata_rcar_qc_prep(struct ata_queued_cmd *qc)
{
if (!(qc->flags & ATA_QCFLAG_DMAMAP))
- return;
+ return AC_ERR_OK;
sata_rcar_bmdma_fill_sg(qc);
+
+ return AC_ERR_OK;
}
static void sata_rcar_bmdma_setup(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index e6fbae2f645a..75321f1ceba5 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -103,7 +103,7 @@ static void sil_dev_config(struct ata_device *dev);
static int sil_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val);
static int sil_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val);
static int sil_set_mode(struct ata_link *link, struct ata_device **r_failed);
-static void sil_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors sil_qc_prep(struct ata_queued_cmd *qc);
static void sil_bmdma_setup(struct ata_queued_cmd *qc);
static void sil_bmdma_start(struct ata_queued_cmd *qc);
static void sil_bmdma_stop(struct ata_queued_cmd *qc);
@@ -317,12 +317,14 @@ static void sil_fill_sg(struct ata_queued_cmd *qc)
last_prd->flags_len |= cpu_to_le32(ATA_PRD_EOT);
}
-static void sil_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors sil_qc_prep(struct ata_queued_cmd *qc)
{
if (!(qc->flags & ATA_QCFLAG_DMAMAP))
- return;
+ return AC_ERR_OK;
sil_fill_sg(qc);
+
+ return AC_ERR_OK;
}
static unsigned char sil_get_device_cache_line(struct pci_dev *pdev)
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 7bef82de53ca..560070d4f1d0 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -326,7 +326,7 @@ static void sil24_dev_config(struct ata_device *dev);
static int sil24_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val);
static int sil24_scr_write(struct ata_link *link, unsigned sc_reg, u32 val);
static int sil24_qc_defer(struct ata_queued_cmd *qc);
-static void sil24_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors sil24_qc_prep(struct ata_queued_cmd *qc);
static unsigned int sil24_qc_issue(struct ata_queued_cmd *qc);
static bool sil24_qc_fill_rtf(struct ata_queued_cmd *qc);
static void sil24_pmp_attach(struct ata_port *ap);
@@ -830,7 +830,7 @@ static int sil24_qc_defer(struct ata_queued_cmd *qc)
return ata_std_qc_defer(qc);
}
-static void sil24_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors sil24_qc_prep(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
struct sil24_port_priv *pp = ap->private_data;
@@ -874,6 +874,8 @@ static void sil24_qc_prep(struct ata_queued_cmd *qc)
if (qc->flags & ATA_QCFLAG_DMAMAP)
sil24_fill_sg(qc, sge);
+
+ return AC_ERR_OK;
}
static unsigned int sil24_qc_issue(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index 2277ba0c9c7f..2c7b30c5ea3d 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -202,7 +202,7 @@ static void pdc_error_handler(struct ata_port *ap);
static void pdc_freeze(struct ata_port *ap);
static void pdc_thaw(struct ata_port *ap);
static int pdc_port_start(struct ata_port *ap);
-static void pdc20621_qc_prep(struct ata_queued_cmd *qc);
+static enum ata_completion_errors pdc20621_qc_prep(struct ata_queued_cmd *qc);
static void pdc_tf_load_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
static void pdc_exec_command_mmio(struct ata_port *ap, const struct ata_taskfile *tf);
static unsigned int pdc20621_dimm_init(struct ata_host *host);
@@ -530,7 +530,7 @@ static void pdc20621_nodata_prep(struct ata_queued_cmd *qc)
VPRINTK("ata pkt buf ofs %u, mmio copied\n", i);
}
-static void pdc20621_qc_prep(struct ata_queued_cmd *qc)
+static enum ata_completion_errors pdc20621_qc_prep(struct ata_queued_cmd *qc)
{
switch (qc->tf.protocol) {
case ATA_PROT_DMA:
@@ -542,6 +542,8 @@ static void pdc20621_qc_prep(struct ata_queued_cmd *qc)
default:
break;
}
+
+ return AC_ERR_OK;
}
static void __pdc20621_push_hdma(struct ata_queued_cmd *qc,
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 2db62d98e395..7bd9cd366d41 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -9,6 +9,7 @@
*/
#include <linux/acpi.h>
+#include <linux/cpufreq.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/fwnode.h>
@@ -3179,6 +3180,8 @@ void device_shutdown(void)
wait_for_device_probe();
device_block_probing();
+ cpufreq_suspend();
+
spin_lock(&devices_kset->list_lock);
/*
* Walk the devices list backward, shutting down each in turn.
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index cc37511de866..6265871a4af2 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -554,12 +554,27 @@ ssize_t __weak cpu_show_mds(struct device *dev,
return sprintf(buf, "Not affected\n");
}
+ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Not affected\n");
+}
+
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL);
static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL);
static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL);
+static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
+static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -568,6 +583,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_spec_store_bypass.attr,
&dev_attr_l1tf.attr,
&dev_attr_mds.attr,
+ &dev_attr_tsx_async_abort.attr,
+ &dev_attr_itlb_multihit.attr,
NULL
};
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 6bea4f3f8040..84c4e1f72cbd 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -540,6 +540,9 @@ static ssize_t soft_offline_page_store(struct device *dev,
pfn >>= PAGE_SHIFT;
if (!pfn_valid(pfn))
return -ENXIO;
+ /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */
+ if (!pfn_to_online_page(pfn))
+ return -EIO;
ret = soft_offline_page(pfn_to_page(pfn), 0);
return ret == 0 ? count : ret;
}
@@ -869,3 +872,39 @@ int walk_memory_blocks(unsigned long start, unsigned long size,
}
return ret;
}
+
+struct for_each_memory_block_cb_data {
+ walk_memory_blocks_func_t func;
+ void *arg;
+};
+
+static int for_each_memory_block_cb(struct device *dev, void *data)
+{
+ struct memory_block *mem = to_memory_block(dev);
+ struct for_each_memory_block_cb_data *cb_data = data;
+
+ return cb_data->func(mem, cb_data->arg);
+}
+
+/**
+ * for_each_memory_block - walk through all present memory blocks
+ *
+ * @arg: argument passed to func
+ * @func: callback for each memory block walked
+ *
+ * This function walks through all present memory blocks, calling func on
+ * each memory block.
+ *
+ * In case func() returns an error, walking is aborted and the error is
+ * returned.
+ */
+int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
+{
+ struct for_each_memory_block_cb_data cb_data = {
+ .func = func,
+ .arg = arg,
+ };
+
+ return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
+ for_each_memory_block_cb);
+}
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index b6c6c7d97d5b..b230beb6ccb4 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -241,12 +241,8 @@ struct resource *platform_get_resource_byname(struct platform_device *dev,
}
EXPORT_SYMBOL_GPL(platform_get_resource_byname);
-/**
- * platform_get_irq_byname - get an IRQ for a device by name
- * @dev: platform device
- * @name: IRQ name
- */
-int platform_get_irq_byname(struct platform_device *dev, const char *name)
+static int __platform_get_irq_byname(struct platform_device *dev,
+ const char *name)
{
struct resource *r;
@@ -262,12 +258,48 @@ int platform_get_irq_byname(struct platform_device *dev, const char *name)
if (r)
return r->start;
- dev_err(&dev->dev, "IRQ %s not found\n", name);
return -ENXIO;
}
+
+/**
+ * platform_get_irq_byname - get an IRQ for a device by name
+ * @dev: platform device
+ * @name: IRQ name
+ *
+ * Get an IRQ like platform_get_irq(), but then by name rather then by index.
+ *
+ * Return: IRQ number on success, negative error number on failure.
+ */
+int platform_get_irq_byname(struct platform_device *dev, const char *name)
+{
+ int ret;
+
+ ret = __platform_get_irq_byname(dev, name);
+ if (ret < 0 && ret != -EPROBE_DEFER)
+ dev_err(&dev->dev, "IRQ %s not found\n", name);
+
+ return ret;
+}
EXPORT_SYMBOL_GPL(platform_get_irq_byname);
/**
+ * platform_get_irq_byname_optional - get an optional IRQ for a device by name
+ * @dev: platform device
+ * @name: IRQ name
+ *
+ * Get an optional IRQ by name like platform_get_irq_byname(). Except that it
+ * does not print an error message if an IRQ can not be obtained.
+ *
+ * Return: IRQ number on success, negative error number on failure.
+ */
+int platform_get_irq_byname_optional(struct platform_device *dev,
+ const char *name)
+{
+ return __platform_get_irq_byname(dev, name);
+}
+EXPORT_SYMBOL_GPL(platform_get_irq_byname_optional);
+
+/**
* platform_add_devices - add a numbers of platform devices
* @devs: array of platform devices to add
* @num: number of platform devices in array
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 6c90fd7e2ff8..350dcafd751f 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -115,20 +115,10 @@ s32 dev_pm_qos_read_value(struct device *dev, enum dev_pm_qos_req_type type)
spin_lock_irqsave(&dev->power.lock, flags);
- switch (type) {
- case DEV_PM_QOS_RESUME_LATENCY:
+ if (type == DEV_PM_QOS_RESUME_LATENCY) {
ret = IS_ERR_OR_NULL(qos) ? PM_QOS_RESUME_LATENCY_NO_CONSTRAINT
: pm_qos_read_value(&qos->resume_latency);
- break;
- case DEV_PM_QOS_MIN_FREQUENCY:
- ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE
- : pm_qos_read_value(&qos->min_frequency);
- break;
- case DEV_PM_QOS_MAX_FREQUENCY:
- ret = IS_ERR_OR_NULL(qos) ? PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE
- : pm_qos_read_value(&qos->max_frequency);
- break;
- default:
+ } else {
WARN_ON(1);
ret = 0;
}
@@ -169,14 +159,6 @@ static int apply_constraint(struct dev_pm_qos_request *req,
req->dev->power.set_latency_tolerance(req->dev, value);
}
break;
- case DEV_PM_QOS_MIN_FREQUENCY:
- ret = pm_qos_update_target(&qos->min_frequency,
- &req->data.pnode, action, value);
- break;
- case DEV_PM_QOS_MAX_FREQUENCY:
- ret = pm_qos_update_target(&qos->max_frequency,
- &req->data.pnode, action, value);
- break;
case DEV_PM_QOS_FLAGS:
ret = pm_qos_update_flags(&qos->flags, &req->data.flr,
action, value);
@@ -227,24 +209,6 @@ static int dev_pm_qos_constraints_allocate(struct device *dev)
c->no_constraint_value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT;
c->type = PM_QOS_MIN;
- c = &qos->min_frequency;
- plist_head_init(&c->list);
- c->target_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE;
- c->default_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE;
- c->no_constraint_value = PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE;
- c->type = PM_QOS_MAX;
- c->notifiers = ++n;
- BLOCKING_INIT_NOTIFIER_HEAD(n);
-
- c = &qos->max_frequency;
- plist_head_init(&c->list);
- c->target_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
- c->default_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
- c->no_constraint_value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
- c->type = PM_QOS_MIN;
- c->notifiers = ++n;
- BLOCKING_INIT_NOTIFIER_HEAD(n);
-
INIT_LIST_HEAD(&qos->flags.list);
spin_lock_irq(&dev->power.lock);
@@ -305,18 +269,6 @@ void dev_pm_qos_constraints_destroy(struct device *dev)
memset(req, 0, sizeof(*req));
}
- c = &qos->min_frequency;
- plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
- apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE);
- memset(req, 0, sizeof(*req));
- }
-
- c = &qos->max_frequency;
- plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) {
- apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
- memset(req, 0, sizeof(*req));
- }
-
f = &qos->flags;
list_for_each_entry_safe(req, tmp, &f->list, data.flr.node) {
apply_constraint(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
@@ -428,8 +380,6 @@ static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req,
switch(req->type) {
case DEV_PM_QOS_RESUME_LATENCY:
case DEV_PM_QOS_LATENCY_TOLERANCE:
- case DEV_PM_QOS_MIN_FREQUENCY:
- case DEV_PM_QOS_MAX_FREQUENCY:
curr_value = req->data.pnode.prio;
break;
case DEV_PM_QOS_FLAGS:
@@ -557,14 +507,6 @@ int dev_pm_qos_add_notifier(struct device *dev, struct notifier_block *notifier,
ret = blocking_notifier_chain_register(dev->power.qos->resume_latency.notifiers,
notifier);
break;
- case DEV_PM_QOS_MIN_FREQUENCY:
- ret = blocking_notifier_chain_register(dev->power.qos->min_frequency.notifiers,
- notifier);
- break;
- case DEV_PM_QOS_MAX_FREQUENCY:
- ret = blocking_notifier_chain_register(dev->power.qos->max_frequency.notifiers,
- notifier);
- break;
default:
WARN_ON(1);
ret = -EINVAL;
@@ -604,14 +546,6 @@ int dev_pm_qos_remove_notifier(struct device *dev,
ret = blocking_notifier_chain_unregister(dev->power.qos->resume_latency.notifiers,
notifier);
break;
- case DEV_PM_QOS_MIN_FREQUENCY:
- ret = blocking_notifier_chain_unregister(dev->power.qos->min_frequency.notifiers,
- notifier);
- break;
- case DEV_PM_QOS_MAX_FREQUENCY:
- ret = blocking_notifier_chain_unregister(dev->power.qos->max_frequency.notifiers,
- notifier);
- break;
default:
WARN_ON(1);
ret = -EINVAL;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 5b248763a672..a18155cdce41 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -786,7 +786,6 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm
if (nc->tentative && connection->agreed_pro_version < 92) {
rcu_read_unlock();
- mutex_unlock(&sock->mutex);
drbd_err(connection, "--dry-run is not supported by peer");
return -EOPNOTSUPP;
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f6f77eaa7217..739b372a5112 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -417,18 +417,20 @@ out_free_page:
return ret;
}
-static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos)
+static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
+ int mode)
{
/*
- * We use punch hole to reclaim the free space used by the
- * image a.k.a. discard. However we do not support discard if
- * encryption is enabled, because it may give an attacker
- * useful information.
+ * We use fallocate to manipulate the space mappings used by the image
+ * a.k.a. discard/zerorange. However we do not support this if
+ * encryption is enabled, because it may give an attacker useful
+ * information.
*/
struct file *file = lo->lo_backing_file;
- int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
int ret;
+ mode |= FALLOC_FL_KEEP_SIZE;
+
if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) {
ret = -EOPNOTSUPP;
goto out;
@@ -596,9 +598,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
switch (req_op(rq)) {
case REQ_OP_FLUSH:
return lo_req_flush(lo, rq);
- case REQ_OP_DISCARD:
case REQ_OP_WRITE_ZEROES:
- return lo_discard(lo, rq, pos);
+ /*
+ * If the caller doesn't want deallocation, call zeroout to
+ * write zeroes the range. Otherwise, punch them out.
+ */
+ return lo_fallocate(lo, rq, pos,
+ (rq->cmd_flags & REQ_NOUNMAP) ?
+ FALLOC_FL_ZERO_RANGE :
+ FALLOC_FL_PUNCH_HOLE);
+ case REQ_OP_DISCARD:
+ return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE);
case REQ_OP_WRITE:
if (lo->transfer)
return lo_write_transfer(lo, rq, pos);
@@ -630,7 +640,9 @@ static void loop_reread_partitions(struct loop_device *lo,
{
int rc;
- rc = blkdev_reread_part(bdev);
+ mutex_lock(&bdev->bd_mutex);
+ rc = bdev_disk_changed(bdev, false);
+ mutex_unlock(&bdev->bd_mutex);
if (rc)
pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -1154,10 +1166,11 @@ out_unlock:
* must be at least one and it can only become zero when the
* current holder is released.
*/
- if (release)
- err = __blkdev_reread_part(bdev);
- else
- err = blkdev_reread_part(bdev);
+ if (!release)
+ mutex_lock(&bdev->bd_mutex);
+ err = bdev_disk_changed(bdev, false);
+ if (!release)
+ mutex_unlock(&bdev->bd_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo_number, err);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 964f78cfffa0..f6bafa9a68b9 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -129,7 +129,7 @@ struct mtip_compat_ide_task_request_s {
/*
* This function check_for_surprise_removal is called
* while card is removed from the system and it will
- * read the vendor id from the configration space
+ * read the vendor id from the configuration space
*
* @pdev Pointer to the pci_dev structure.
*
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index ac07e8c94c79..57532465fb83 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -248,8 +248,8 @@ static void nbd_put(struct nbd_device *nbd)
if (refcount_dec_and_mutex_lock(&nbd->refs,
&nbd_index_mutex)) {
idr_remove(&nbd_index_idr, nbd->index);
- mutex_unlock(&nbd_index_mutex);
nbd_dev_remove(nbd);
+ mutex_unlock(&nbd_index_mutex);
}
}
@@ -385,17 +385,16 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
struct nbd_device *nbd = cmd->nbd;
struct nbd_config *config;
+ if (!mutex_trylock(&cmd->lock))
+ return BLK_EH_RESET_TIMER;
+
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
+ mutex_unlock(&cmd->lock);
goto done;
}
config = nbd->config;
- if (!mutex_trylock(&cmd->lock)) {
- nbd_config_put(nbd);
- return BLK_EH_RESET_TIMER;
- }
-
if (config->num_connections > 1) {
dev_err_ratelimited(nbd_to_dev(nbd),
"Connection timed out, retrying (%d/%d alive)\n",
@@ -711,6 +710,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
ret = -ENOENT;
goto out;
}
+ if (cmd->status != BLK_STS_OK) {
+ dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n",
+ req);
+ ret = -ENOENT;
+ goto out;
+ }
if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) {
dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n",
req);
@@ -792,7 +797,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved)
{
struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
+ mutex_lock(&cmd->lock);
cmd->status = BLK_STS_IOERR;
+ mutex_unlock(&cmd->lock);
+
blk_mq_complete_request(req);
return true;
}
@@ -972,6 +980,26 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
return ret;
}
+static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd,
+ int *err)
+{
+ struct socket *sock;
+
+ *err = 0;
+ sock = sockfd_lookup(fd, err);
+ if (!sock)
+ return NULL;
+
+ if (sock->ops->shutdown == sock_no_shutdown) {
+ dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n");
+ *err = -EINVAL;
+ sockfd_put(sock);
+ return NULL;
+ }
+
+ return sock;
+}
+
static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
bool netlink)
{
@@ -981,7 +1009,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
struct nbd_sock *nsock;
int err;
- sock = sockfd_lookup(arg, &err);
+ sock = nbd_get_socket(nbd, arg, &err);
if (!sock)
return err;
@@ -1004,14 +1032,15 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg,
sockfd_put(sock);
return -ENOMEM;
}
+
+ config->socks = socks;
+
nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL);
if (!nsock) {
sockfd_put(sock);
return -ENOMEM;
}
- config->socks = socks;
-
nsock->fallback_index = -1;
nsock->dead = false;
mutex_init(&nsock->tx_lock);
@@ -1033,7 +1062,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg)
int i;
int err;
- sock = sockfd_lookup(arg, &err);
+ sock = nbd_get_socket(nbd, arg, &err);
if (!sock)
return err;
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index a235c45e22a7..bc837862b767 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -91,11 +91,13 @@ struct nullb {
#ifdef CONFIG_BLK_DEV_ZONED
int null_zone_init(struct nullb_device *dev);
void null_zone_exit(struct nullb_device *dev);
-int null_zone_report(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones);
+int null_report_zones(struct gendisk *disk, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data);
blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector,
sector_t nr_sectors);
+size_t null_zone_valid_read_len(struct nullb *nullb,
+ sector_t sector, unsigned int len);
#else
static inline int null_zone_init(struct nullb_device *dev)
{
@@ -103,17 +105,18 @@ static inline int null_zone_init(struct nullb_device *dev)
return -EINVAL;
}
static inline void null_zone_exit(struct nullb_device *dev) {}
-static inline int null_zone_report(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones,
- unsigned int *nr_zones)
-{
- return -EOPNOTSUPP;
-}
static inline blk_status_t null_handle_zoned(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector,
sector_t nr_sectors)
{
return BLK_STS_NOTSUPP;
}
+static inline size_t null_zone_valid_read_len(struct nullb *nullb,
+ sector_t sector,
+ unsigned int len)
+{
+ return len;
+}
+#define null_report_zones NULL
#endif /* CONFIG_BLK_DEV_ZONED */
#endif /* __NULL_BLK_H */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 0e7da5015ccd..795fda576824 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -227,7 +227,7 @@ static ssize_t nullb_device_uint_attr_store(unsigned int *val,
int result;
result = kstrtouint(page, 0, &tmp);
- if (result)
+ if (result < 0)
return result;
*val = tmp;
@@ -241,7 +241,7 @@ static ssize_t nullb_device_ulong_attr_store(unsigned long *val,
unsigned long tmp;
result = kstrtoul(page, 0, &tmp);
- if (result)
+ if (result < 0)
return result;
*val = tmp;
@@ -255,7 +255,7 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
int result;
result = kstrtobool(page, &tmp);
- if (result)
+ if (result < 0)
return result;
*val = tmp;
@@ -263,7 +263,7 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page,
}
/* The following macro should only be used with TYPE = {uint, ulong, bool}. */
-#define NULLB_DEVICE_ATTR(NAME, TYPE) \
+#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \
static ssize_t \
nullb_device_##NAME##_show(struct config_item *item, char *page) \
{ \
@@ -274,31 +274,57 @@ static ssize_t \
nullb_device_##NAME##_store(struct config_item *item, const char *page, \
size_t count) \
{ \
- if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags)) \
- return -EBUSY; \
- return nullb_device_##TYPE##_attr_store( \
- &to_nullb_device(item)->NAME, page, count); \
+ int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY; \
+ struct nullb_device *dev = to_nullb_device(item); \
+ TYPE new_value; \
+ int ret; \
+ \
+ ret = nullb_device_##TYPE##_attr_store(&new_value, page, count); \
+ if (ret < 0) \
+ return ret; \
+ if (apply_fn) \
+ ret = apply_fn(dev, new_value); \
+ else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \
+ ret = -EBUSY; \
+ if (ret < 0) \
+ return ret; \
+ dev->NAME = new_value; \
+ return count; \
} \
CONFIGFS_ATTR(nullb_device_, NAME);
-NULLB_DEVICE_ATTR(size, ulong);
-NULLB_DEVICE_ATTR(completion_nsec, ulong);
-NULLB_DEVICE_ATTR(submit_queues, uint);
-NULLB_DEVICE_ATTR(home_node, uint);
-NULLB_DEVICE_ATTR(queue_mode, uint);
-NULLB_DEVICE_ATTR(blocksize, uint);
-NULLB_DEVICE_ATTR(irqmode, uint);
-NULLB_DEVICE_ATTR(hw_queue_depth, uint);
-NULLB_DEVICE_ATTR(index, uint);
-NULLB_DEVICE_ATTR(blocking, bool);
-NULLB_DEVICE_ATTR(use_per_node_hctx, bool);
-NULLB_DEVICE_ATTR(memory_backed, bool);
-NULLB_DEVICE_ATTR(discard, bool);
-NULLB_DEVICE_ATTR(mbps, uint);
-NULLB_DEVICE_ATTR(cache_size, ulong);
-NULLB_DEVICE_ATTR(zoned, bool);
-NULLB_DEVICE_ATTR(zone_size, ulong);
-NULLB_DEVICE_ATTR(zone_nr_conv, uint);
+static int nullb_apply_submit_queues(struct nullb_device *dev,
+ unsigned int submit_queues)
+{
+ struct nullb *nullb = dev->nullb;
+ struct blk_mq_tag_set *set;
+
+ if (!nullb)
+ return 0;
+
+ set = nullb->tag_set;
+ blk_mq_update_nr_hw_queues(set, submit_queues);
+ return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM;
+}
+
+NULLB_DEVICE_ATTR(size, ulong, NULL);
+NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL);
+NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues);
+NULLB_DEVICE_ATTR(home_node, uint, NULL);
+NULLB_DEVICE_ATTR(queue_mode, uint, NULL);
+NULLB_DEVICE_ATTR(blocksize, uint, NULL);
+NULLB_DEVICE_ATTR(irqmode, uint, NULL);
+NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL);
+NULLB_DEVICE_ATTR(index, uint, NULL);
+NULLB_DEVICE_ATTR(blocking, bool, NULL);
+NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL);
+NULLB_DEVICE_ATTR(memory_backed, bool, NULL);
+NULLB_DEVICE_ATTR(discard, bool, NULL);
+NULLB_DEVICE_ATTR(mbps, uint, NULL);
+NULLB_DEVICE_ATTR(cache_size, ulong, NULL);
+NULLB_DEVICE_ATTR(zoned, bool, NULL);
+NULLB_DEVICE_ATTR(zone_size, ulong, NULL);
+NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -467,7 +493,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
- return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size\n");
+ return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_nr_conv\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -996,6 +1022,16 @@ next:
return 0;
}
+static void nullb_fill_pattern(struct nullb *nullb, struct page *page,
+ unsigned int len, unsigned int off)
+{
+ void *dst;
+
+ dst = kmap_atomic(page);
+ memset(dst + off, 0xFF, len);
+ kunmap_atomic(dst);
+}
+
static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n)
{
size_t temp;
@@ -1036,10 +1072,24 @@ static int null_transfer(struct nullb *nullb, struct page *page,
unsigned int len, unsigned int off, bool is_write, sector_t sector,
bool is_fua)
{
+ struct nullb_device *dev = nullb->dev;
+ unsigned int valid_len = len;
int err = 0;
if (!is_write) {
- err = copy_from_nullb(nullb, page, off, sector, len);
+ if (dev->zoned)
+ valid_len = null_zone_valid_read_len(nullb,
+ sector, len);
+
+ if (valid_len) {
+ err = copy_from_nullb(nullb, page, off,
+ sector, valid_len);
+ off += valid_len;
+ len -= valid_len;
+ }
+
+ if (len)
+ nullb_fill_pattern(nullb, page, len, off);
flush_dcache_page(page);
} else {
flush_dcache_page(page);
@@ -1418,20 +1468,9 @@ static void null_config_discard(struct nullb *nullb)
blk_queue_flag_set(QUEUE_FLAG_DISCARD, nullb->q);
}
-static int null_open(struct block_device *bdev, fmode_t mode)
-{
- return 0;
-}
-
-static void null_release(struct gendisk *disk, fmode_t mode)
-{
-}
-
-static const struct block_device_operations null_fops = {
- .owner = THIS_MODULE,
- .open = null_open,
- .release = null_release,
- .report_zones = null_zone_report,
+static const struct block_device_operations null_ops = {
+ .owner = THIS_MODULE,
+ .report_zones = null_report_zones,
};
static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
@@ -1532,7 +1571,7 @@ static int null_gendisk_register(struct nullb *nullb)
disk->flags |= GENHD_FL_EXT_DEVT | GENHD_FL_SUPPRESS_PARTITION_INFO;
disk->major = null_major;
disk->first_minor = nullb->index;
- disk->fops = &null_fops;
+ disk->fops = &null_ops;
disk->private_data = nullb;
disk->queue = nullb->q;
strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index eabc116832a7..d4d88b581822 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -66,22 +66,53 @@ void null_zone_exit(struct nullb_device *dev)
kvfree(dev->zones);
}
-int null_zone_report(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+int null_report_zones(struct gendisk *disk, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct nullb *nullb = disk->private_data;
struct nullb_device *dev = nullb->dev;
- unsigned int zno, nrz = 0;
-
- zno = null_zone_no(dev, sector);
- if (zno < dev->nr_zones) {
- nrz = min_t(unsigned int, *nr_zones, dev->nr_zones - zno);
- memcpy(zones, &dev->zones[zno], nrz * sizeof(struct blk_zone));
+ unsigned int first_zone, i;
+ struct blk_zone zone;
+ int error;
+
+ first_zone = null_zone_no(dev, sector);
+ if (first_zone >= dev->nr_zones)
+ return 0;
+
+ nr_zones = min(nr_zones, dev->nr_zones - first_zone);
+ for (i = 0; i < nr_zones; i++) {
+ /*
+ * Stacked DM target drivers will remap the zone information by
+ * modifying the zone information passed to the report callback.
+ * So use a local copy to avoid corruption of the device zone
+ * array.
+ */
+ memcpy(&zone, &dev->zones[first_zone + i],
+ sizeof(struct blk_zone));
+ error = cb(&zone, i, data);
+ if (error)
+ return error;
}
- *nr_zones = nrz;
+ return nr_zones;
+}
- return 0;
+size_t null_zone_valid_read_len(struct nullb *nullb,
+ sector_t sector, unsigned int len)
+{
+ struct nullb_device *dev = nullb->dev;
+ struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
+ unsigned int nr_sectors = len >> SECTOR_SHIFT;
+
+ /* Read must be below the write pointer position */
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL ||
+ sector + nr_sectors <= zone->wp)
+ return len;
+
+ if (sector > zone->wp)
+ return 0;
+
+ return (zone->wp - sector) << SECTOR_SHIFT;
}
static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
@@ -118,14 +149,14 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
return BLK_STS_OK;
}
-static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
+static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
+ sector_t sector)
{
struct nullb_device *dev = cmd->nq->dev;
- unsigned int zno = null_zone_no(dev, sector);
- struct blk_zone *zone = &dev->zones[zno];
+ struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
size_t i;
- switch (req_op(cmd->rq)) {
+ switch (op) {
case REQ_OP_ZONE_RESET_ALL:
for (i = 0; i < dev->nr_zones; i++) {
if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL)
@@ -141,9 +172,31 @@ static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector)
zone->cond = BLK_ZONE_COND_EMPTY;
zone->wp = zone->start;
break;
- default:
- cmd->error = BLK_STS_NOTSUPP;
+ case REQ_OP_ZONE_OPEN:
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return BLK_STS_IOERR;
+ if (zone->cond == BLK_ZONE_COND_FULL)
+ return BLK_STS_IOERR;
+
+ zone->cond = BLK_ZONE_COND_EXP_OPEN;
+ break;
+ case REQ_OP_ZONE_CLOSE:
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return BLK_STS_IOERR;
+ if (zone->cond == BLK_ZONE_COND_FULL)
+ return BLK_STS_IOERR;
+
+ zone->cond = BLK_ZONE_COND_CLOSED;
break;
+ case REQ_OP_ZONE_FINISH:
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return BLK_STS_IOERR;
+
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = zone->start + zone->len;
+ break;
+ default:
+ return BLK_STS_NOTSUPP;
}
return BLK_STS_OK;
}
@@ -156,7 +209,10 @@ blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op,
return null_zone_write(cmd, sector, nr_sectors);
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_RESET_ALL:
- return null_zone_reset(cmd, sector);
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
+ return null_zone_mgmt(cmd, op, sector);
default:
return BLK_STS_OK;
}
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 7c4350c0fb77..13527a0b4e44 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2087,7 +2087,7 @@ static int rbd_object_map_update_finish(struct rbd_obj_request *obj_req,
struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
struct ceph_osd_data *osd_data;
u64 objno;
- u8 state, new_state, current_state;
+ u8 state, new_state, uninitialized_var(current_state);
bool has_current_state;
void *p;
@@ -6639,10 +6639,13 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
queue_delayed_work(rbd_dev->task_wq, &rbd_dev->lock_dwork, 0);
ret = wait_for_completion_killable_timeout(&rbd_dev->acquire_wait,
ceph_timeout_jiffies(rbd_dev->opts->lock_timeout));
- if (ret > 0)
+ if (ret > 0) {
ret = rbd_dev->acquire_err;
- else if (!ret)
- ret = -ETIMEDOUT;
+ } else {
+ cancel_delayed_work_sync(&rbd_dev->lock_dwork);
+ if (!ret)
+ ret = -ETIMEDOUT;
+ }
if (ret) {
rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 76b73ddf8fd7..10f6368117d8 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -1000,8 +1000,10 @@ static void rsxx_pci_remove(struct pci_dev *dev)
cancel_work_sync(&card->event_work);
+ destroy_workqueue(card->event_wq);
rsxx_destroy_dev(card);
rsxx_dma_destroy(card);
+ destroy_workqueue(card->creg_ctrl.creg_wq);
spin_lock_irqsave(&card->irq_lock, flags);
rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d58a359a6622..4285e75e52c3 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -413,13 +413,14 @@ static void reset_bdev(struct zram *zram)
static ssize_t backing_dev_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ struct file *file;
struct zram *zram = dev_to_zram(dev);
- struct file *file = zram->backing_dev;
char *p;
ssize_t ret;
down_read(&zram->init_lock);
- if (!zram->backing_dev) {
+ file = zram->backing_dev;
+ if (!file) {
memcpy(buf, "none\n", 5);
up_read(&zram->init_lock);
return 5;
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index ad50efb470aa..2b6670daf7fc 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -74,6 +74,7 @@ static const char * const clock_names[SYSC_MAX_CLOCKS] = {
* @clk_disable_quirk: module specific clock disable quirk
* @reset_done_quirk: module specific reset done quirk
* @module_enable_quirk: module specific enable quirk
+ * @module_disable_quirk: module specific disable quirk
*/
struct sysc {
struct device *dev;
@@ -100,6 +101,7 @@ struct sysc {
void (*clk_disable_quirk)(struct sysc *sysc);
void (*reset_done_quirk)(struct sysc *sysc);
void (*module_enable_quirk)(struct sysc *sysc);
+ void (*module_disable_quirk)(struct sysc *sysc);
};
static void sysc_parse_dts_quirks(struct sysc *ddata, struct device_node *np,
@@ -959,6 +961,9 @@ static int sysc_disable_module(struct device *dev)
if (ddata->offsets[SYSC_SYSCONFIG] == -ENODEV)
return 0;
+ if (ddata->module_disable_quirk)
+ ddata->module_disable_quirk(ddata);
+
regbits = ddata->cap->regbits;
reg = sysc_read(ddata, ddata->offsets[SYSC_SYSCONFIG]);
@@ -1248,6 +1253,9 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
SYSC_MODULE_QUIRK_SGX),
SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0,
SYSC_MODULE_QUIRK_WDT),
+ /* Watchdog on am3 and am4 */
+ SYSC_QUIRK("wdt", 0x44e35000, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0,
+ SYSC_MODULE_QUIRK_WDT | SYSC_QUIRK_SWSUP_SIDLE),
#ifdef DEBUG
SYSC_QUIRK("adc", 0, 0, 0x10, -1, 0x47300001, 0xffffffff, 0),
@@ -1440,14 +1448,14 @@ static void sysc_reset_done_quirk_wdt(struct sysc *ddata)
!(val & 0x10), 100,
MAX_MODULE_SOFTRESET_WAIT);
if (error)
- dev_warn(ddata->dev, "wdt disable spr failed\n");
+ dev_warn(ddata->dev, "wdt disable step1 failed\n");
- sysc_write(ddata, wps, 0x5555);
+ sysc_write(ddata, spr, 0x5555);
error = readl_poll_timeout(ddata->module_va + wps, val,
!(val & 0x10), 100,
MAX_MODULE_SOFTRESET_WAIT);
if (error)
- dev_warn(ddata->dev, "wdt disable wps failed\n");
+ dev_warn(ddata->dev, "wdt disable step2 failed\n");
}
static void sysc_init_module_quirks(struct sysc *ddata)
@@ -1471,8 +1479,10 @@ static void sysc_init_module_quirks(struct sysc *ddata)
if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_SGX)
ddata->module_enable_quirk = sysc_module_enable_quirk_sgx;
- if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_WDT)
+ if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_WDT) {
ddata->reset_done_quirk = sysc_reset_done_quirk_wdt;
+ ddata->module_disable_quirk = sysc_reset_done_quirk_wdt;
+ }
}
static int sysc_clockdomain_init(struct sysc *ddata)
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 80b850ef1bf6..8d53b8ef545c 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -13,7 +13,6 @@
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/err.h>
-#include <linux/freezer.h>
#include <linux/fs.h>
#include <linux/hw_random.h>
#include <linux/kernel.h>
@@ -422,9 +421,7 @@ static int hwrng_fillfn(void *unused)
{
long rc;
- set_freezable();
-
- while (!kthread_freezable_should_stop(NULL)) {
+ while (!kthread_should_stop()) {
struct hwrng *rng;
rng = get_current_rng();
diff --git a/drivers/char/random.c b/drivers/char/random.c
index de434feb873a..01b8868b9bed 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -327,7 +327,6 @@
#include <linux/percpu.h>
#include <linux/cryptohash.h>
#include <linux/fips.h>
-#include <linux/freezer.h>
#include <linux/ptrace.h>
#include <linux/workqueue.h>
#include <linux/irq.h>
@@ -2500,8 +2499,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
* We'll be woken up again once below random_write_wakeup_thresh,
* or when the calling thread is about to terminate.
*/
- wait_event_freezable(random_write_wait,
- kthread_should_stop() ||
+ wait_event_interruptible(random_write_wait, kthread_should_stop() ||
ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
mix_pool_bytes(poolp, buffer, count);
credit_entropy_bits(poolp, entropy);
diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig
index 9c37047f4b56..aacdeed93320 100644
--- a/drivers/char/tpm/Kconfig
+++ b/drivers/char/tpm/Kconfig
@@ -67,6 +67,13 @@ config TCG_TIS_SPI
within Linux. To compile this driver as a module, choose M here;
the module will be called tpm_tis_spi.
+config TCG_TIS_SPI_CR50
+ bool "Cr50 SPI Interface"
+ depends on TCG_TIS_SPI
+ help
+ If you have a H1 secure module running Cr50 firmware on SPI bus,
+ say Yes and it will be accessible from within Linux.
+
config TCG_TIS_I2C_ATMEL
tristate "TPM Interface Specification 1.2 Interface (I2C - Atmel)"
depends on I2C
diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile
index c354cdff9c62..5a0d99d4fec0 100644
--- a/drivers/char/tpm/Makefile
+++ b/drivers/char/tpm/Makefile
@@ -21,7 +21,9 @@ tpm-$(CONFIG_EFI) += eventlog/efi.o
tpm-$(CONFIG_OF) += eventlog/of.o
obj-$(CONFIG_TCG_TIS_CORE) += tpm_tis_core.o
obj-$(CONFIG_TCG_TIS) += tpm_tis.o
-obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi.o
+obj-$(CONFIG_TCG_TIS_SPI) += tpm_tis_spi_mod.o
+tpm_tis_spi_mod-y := tpm_tis_spi.o
+tpm_tis_spi_mod-$(CONFIG_TCG_TIS_SPI_CR50) += tpm_tis_spi_cr50.o
obj-$(CONFIG_TCG_TIS_I2C_ATMEL) += tpm_i2c_atmel.o
obj-$(CONFIG_TCG_TIS_I2C_INFINEON) += tpm_i2c_infineon.o
obj-$(CONFIG_TCG_TIS_I2C_NUVOTON) += tpm_i2c_nuvoton.o
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index d7a3888ad80f..a438b1206fcb 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
+#include <linux/suspend.h>
#include <linux/freezer.h>
#include <linux/tpm_eventlog.h>
@@ -394,7 +395,11 @@ int tpm_pm_suspend(struct device *dev)
return -ENODEV;
if (chip->flags & TPM_CHIP_FLAG_ALWAYS_POWERED)
- return 0;
+ goto suspended;
+
+ if ((chip->flags & TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED) &&
+ !pm_suspend_via_firmware())
+ goto suspended;
if (!tpm_chip_start(chip)) {
if (chip->flags & TPM_CHIP_FLAG_TPM2)
@@ -405,6 +410,7 @@ int tpm_pm_suspend(struct device *dev)
tpm_chip_stop(chip);
}
+suspended:
return rc;
}
EXPORT_SYMBOL_GPL(tpm_pm_suspend);
@@ -453,62 +459,6 @@ int tpm_get_random(struct tpm_chip *chip, u8 *out, size_t max)
}
EXPORT_SYMBOL_GPL(tpm_get_random);
-/**
- * tpm_seal_trusted() - seal a trusted key payload
- * @chip: a &struct tpm_chip instance, %NULL for the default chip
- * @options: authentication values and other options
- * @payload: the key data in clear and encrypted form
- *
- * Note: only TPM 2.0 chip are supported. TPM 1.x implementation is located in
- * the keyring subsystem.
- *
- * Return: same as with tpm_transmit_cmd()
- */
-int tpm_seal_trusted(struct tpm_chip *chip, struct trusted_key_payload *payload,
- struct trusted_key_options *options)
-{
- int rc;
-
- chip = tpm_find_get_ops(chip);
- if (!chip || !(chip->flags & TPM_CHIP_FLAG_TPM2))
- return -ENODEV;
-
- rc = tpm2_seal_trusted(chip, payload, options);
-
- tpm_put_ops(chip);
- return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_seal_trusted);
-
-/**
- * tpm_unseal_trusted() - unseal a trusted key
- * @chip: a &struct tpm_chip instance, %NULL for the default chip
- * @options: authentication values and other options
- * @payload: the key data in clear and encrypted form
- *
- * Note: only TPM 2.0 chip are supported. TPM 1.x implementation is located in
- * the keyring subsystem.
- *
- * Return: same as with tpm_transmit_cmd()
- */
-int tpm_unseal_trusted(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options)
-{
- int rc;
-
- chip = tpm_find_get_ops(chip);
- if (!chip || !(chip->flags & TPM_CHIP_FLAG_TPM2))
- return -ENODEV;
-
- rc = tpm2_unseal_trusted(chip, payload, options);
-
- tpm_put_ops(chip);
-
- return rc;
-}
-EXPORT_SYMBOL_GPL(tpm_unseal_trusted);
-
static int __init tpm_init(void)
{
int rc;
diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c
index edfa89160010..3b53b3e5ec3e 100644
--- a/drivers/char/tpm/tpm-sysfs.c
+++ b/drivers/char/tpm/tpm-sysfs.c
@@ -217,6 +217,7 @@ static ssize_t caps_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct tpm_chip *chip = to_tpm_chip(dev);
+ struct tpm1_version *version;
ssize_t rc = 0;
char *str = buf;
cap_t cap;
@@ -232,31 +233,31 @@ static ssize_t caps_show(struct device *dev, struct device_attribute *attr,
str += sprintf(str, "Manufacturer: 0x%x\n",
be32_to_cpu(cap.manufacturer_id));
- /* Try to get a TPM version 1.2 TPM_CAP_VERSION_INFO */
- rc = tpm1_getcap(chip, TPM_CAP_VERSION_1_2, &cap,
+ /* TPM 1.2 */
+ if (!tpm1_getcap(chip, TPM_CAP_VERSION_1_2, &cap,
"attempting to determine the 1.2 version",
- sizeof(cap.tpm_version_1_2));
- if (!rc) {
- str += sprintf(str,
- "TCG version: %d.%d\nFirmware version: %d.%d\n",
- cap.tpm_version_1_2.Major,
- cap.tpm_version_1_2.Minor,
- cap.tpm_version_1_2.revMajor,
- cap.tpm_version_1_2.revMinor);
- } else {
- /* Otherwise just use TPM_STRUCT_VER */
- if (tpm1_getcap(chip, TPM_CAP_VERSION_1_1, &cap,
- "attempting to determine the 1.1 version",
- sizeof(cap.tpm_version)))
- goto out_ops;
- str += sprintf(str,
- "TCG version: %d.%d\nFirmware version: %d.%d\n",
- cap.tpm_version.Major,
- cap.tpm_version.Minor,
- cap.tpm_version.revMajor,
- cap.tpm_version.revMinor);
+ sizeof(cap.version2))) {
+ version = &cap.version2.version;
+ goto out_print;
}
+
+ /* TPM 1.1 */
+ if (tpm1_getcap(chip, TPM_CAP_VERSION_1_1, &cap,
+ "attempting to determine the 1.1 version",
+ sizeof(cap.version1))) {
+ goto out_ops;
+ }
+
+ version = &cap.version1;
+
+out_print:
+ str += sprintf(str,
+ "TCG version: %d.%d\nFirmware version: %d.%d\n",
+ version->major, version->minor,
+ version->rev_major, version->rev_minor);
+
rc = str - buf;
+
out_ops:
tpm_put_ops(chip);
return rc;
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index a7fea3e0ca86..b9e1547be6b5 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -25,7 +25,6 @@
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/tpm.h>
-#include <linux/highmem.h>
#include <linux/tpm_eventlog.h>
#ifdef CONFIG_X86
@@ -58,123 +57,6 @@ enum tpm_addr {
#define TPM_ERR_DISABLED 0x7
#define TPM_ERR_INVALID_POSTINIT 38
-#define TPM_HEADER_SIZE 10
-
-enum tpm2_const {
- TPM2_PLATFORM_PCR = 24,
- TPM2_PCR_SELECT_MIN = ((TPM2_PLATFORM_PCR + 7) / 8),
-};
-
-enum tpm2_timeouts {
- TPM2_TIMEOUT_A = 750,
- TPM2_TIMEOUT_B = 2000,
- TPM2_TIMEOUT_C = 200,
- TPM2_TIMEOUT_D = 30,
- TPM2_DURATION_SHORT = 20,
- TPM2_DURATION_MEDIUM = 750,
- TPM2_DURATION_LONG = 2000,
- TPM2_DURATION_LONG_LONG = 300000,
- TPM2_DURATION_DEFAULT = 120000,
-};
-
-enum tpm2_structures {
- TPM2_ST_NO_SESSIONS = 0x8001,
- TPM2_ST_SESSIONS = 0x8002,
-};
-
-/* Indicates from what layer of the software stack the error comes from */
-#define TSS2_RC_LAYER_SHIFT 16
-#define TSS2_RESMGR_TPM_RC_LAYER (11 << TSS2_RC_LAYER_SHIFT)
-
-enum tpm2_return_codes {
- TPM2_RC_SUCCESS = 0x0000,
- TPM2_RC_HASH = 0x0083, /* RC_FMT1 */
- TPM2_RC_HANDLE = 0x008B,
- TPM2_RC_INITIALIZE = 0x0100, /* RC_VER1 */
- TPM2_RC_FAILURE = 0x0101,
- TPM2_RC_DISABLED = 0x0120,
- TPM2_RC_COMMAND_CODE = 0x0143,
- TPM2_RC_TESTING = 0x090A, /* RC_WARN */
- TPM2_RC_REFERENCE_H0 = 0x0910,
- TPM2_RC_RETRY = 0x0922,
-};
-
-enum tpm2_command_codes {
- TPM2_CC_FIRST = 0x011F,
- TPM2_CC_HIERARCHY_CONTROL = 0x0121,
- TPM2_CC_HIERARCHY_CHANGE_AUTH = 0x0129,
- TPM2_CC_CREATE_PRIMARY = 0x0131,
- TPM2_CC_SEQUENCE_COMPLETE = 0x013E,
- TPM2_CC_SELF_TEST = 0x0143,
- TPM2_CC_STARTUP = 0x0144,
- TPM2_CC_SHUTDOWN = 0x0145,
- TPM2_CC_NV_READ = 0x014E,
- TPM2_CC_CREATE = 0x0153,
- TPM2_CC_LOAD = 0x0157,
- TPM2_CC_SEQUENCE_UPDATE = 0x015C,
- TPM2_CC_UNSEAL = 0x015E,
- TPM2_CC_CONTEXT_LOAD = 0x0161,
- TPM2_CC_CONTEXT_SAVE = 0x0162,
- TPM2_CC_FLUSH_CONTEXT = 0x0165,
- TPM2_CC_VERIFY_SIGNATURE = 0x0177,
- TPM2_CC_GET_CAPABILITY = 0x017A,
- TPM2_CC_GET_RANDOM = 0x017B,
- TPM2_CC_PCR_READ = 0x017E,
- TPM2_CC_PCR_EXTEND = 0x0182,
- TPM2_CC_EVENT_SEQUENCE_COMPLETE = 0x0185,
- TPM2_CC_HASH_SEQUENCE_START = 0x0186,
- TPM2_CC_CREATE_LOADED = 0x0191,
- TPM2_CC_LAST = 0x0193, /* Spec 1.36 */
-};
-
-enum tpm2_permanent_handles {
- TPM2_RS_PW = 0x40000009,
-};
-
-enum tpm2_capabilities {
- TPM2_CAP_HANDLES = 1,
- TPM2_CAP_COMMANDS = 2,
- TPM2_CAP_PCRS = 5,
- TPM2_CAP_TPM_PROPERTIES = 6,
-};
-
-enum tpm2_properties {
- TPM_PT_TOTAL_COMMANDS = 0x0129,
-};
-
-enum tpm2_startup_types {
- TPM2_SU_CLEAR = 0x0000,
- TPM2_SU_STATE = 0x0001,
-};
-
-enum tpm2_cc_attrs {
- TPM2_CC_ATTR_CHANDLES = 25,
- TPM2_CC_ATTR_RHANDLE = 28,
-};
-
-#define TPM_VID_INTEL 0x8086
-#define TPM_VID_WINBOND 0x1050
-#define TPM_VID_STM 0x104A
-
-enum tpm_chip_flags {
- TPM_CHIP_FLAG_TPM2 = BIT(1),
- TPM_CHIP_FLAG_IRQ = BIT(2),
- TPM_CHIP_FLAG_VIRTUAL = BIT(3),
- TPM_CHIP_FLAG_HAVE_TIMEOUTS = BIT(4),
- TPM_CHIP_FLAG_ALWAYS_POWERED = BIT(5),
-};
-
-#define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
-
-struct tpm_header {
- __be16 tag;
- __be32 length;
- union {
- __be32 ordinal;
- __be32 return_code;
- };
-} __packed;
-
#define TPM_TAG_RQU_COMMAND 193
struct stclear_flags_t {
@@ -186,19 +68,16 @@ struct stclear_flags_t {
u8 bGlobalLock;
} __packed;
-struct tpm_version_t {
- u8 Major;
- u8 Minor;
- u8 revMajor;
- u8 revMinor;
+struct tpm1_version {
+ u8 major;
+ u8 minor;
+ u8 rev_major;
+ u8 rev_minor;
} __packed;
-struct tpm_version_1_2_t {
- __be16 tag;
- u8 Major;
- u8 Minor;
- u8 revMajor;
- u8 revMinor;
+struct tpm1_version2 {
+ __be16 tag;
+ struct tpm1_version version;
} __packed;
struct timeout_t {
@@ -243,8 +122,8 @@ typedef union {
struct stclear_flags_t stclear_flags;
__u8 owned;
__be32 num_pcrs;
- struct tpm_version_t tpm_version;
- struct tpm_version_1_2_t tpm_version_1_2;
+ struct tpm1_version version1;
+ struct tpm1_version2 version2;
__be32 manufacturer_id;
struct timeout_t timeout;
struct duration_t duration;
@@ -274,102 +153,6 @@ enum tpm_sub_capabilities {
* compiler warnings about stack frame size. */
#define TPM_MAX_RNG_DATA 128
-/* A string buffer type for constructing TPM commands. This is based on the
- * ideas of string buffer code in security/keys/trusted.h but is heap based
- * in order to keep the stack usage minimal.
- */
-
-enum tpm_buf_flags {
- TPM_BUF_OVERFLOW = BIT(0),
-};
-
-struct tpm_buf {
- struct page *data_page;
- unsigned int flags;
- u8 *data;
-};
-
-static inline void tpm_buf_reset(struct tpm_buf *buf, u16 tag, u32 ordinal)
-{
- struct tpm_header *head = (struct tpm_header *)buf->data;
-
- head->tag = cpu_to_be16(tag);
- head->length = cpu_to_be32(sizeof(*head));
- head->ordinal = cpu_to_be32(ordinal);
-}
-
-static inline int tpm_buf_init(struct tpm_buf *buf, u16 tag, u32 ordinal)
-{
- buf->data_page = alloc_page(GFP_HIGHUSER);
- if (!buf->data_page)
- return -ENOMEM;
-
- buf->flags = 0;
- buf->data = kmap(buf->data_page);
- tpm_buf_reset(buf, tag, ordinal);
- return 0;
-}
-
-static inline void tpm_buf_destroy(struct tpm_buf *buf)
-{
- kunmap(buf->data_page);
- __free_page(buf->data_page);
-}
-
-static inline u32 tpm_buf_length(struct tpm_buf *buf)
-{
- struct tpm_header *head = (struct tpm_header *)buf->data;
-
- return be32_to_cpu(head->length);
-}
-
-static inline u16 tpm_buf_tag(struct tpm_buf *buf)
-{
- struct tpm_header *head = (struct tpm_header *)buf->data;
-
- return be16_to_cpu(head->tag);
-}
-
-static inline void tpm_buf_append(struct tpm_buf *buf,
- const unsigned char *new_data,
- unsigned int new_len)
-{
- struct tpm_header *head = (struct tpm_header *)buf->data;
- u32 len = tpm_buf_length(buf);
-
- /* Return silently if overflow has already happened. */
- if (buf->flags & TPM_BUF_OVERFLOW)
- return;
-
- if ((len + new_len) > PAGE_SIZE) {
- WARN(1, "tpm_buf: overflow\n");
- buf->flags |= TPM_BUF_OVERFLOW;
- return;
- }
-
- memcpy(&buf->data[len], new_data, new_len);
- head->length = cpu_to_be32(len + new_len);
-}
-
-static inline void tpm_buf_append_u8(struct tpm_buf *buf, const u8 value)
-{
- tpm_buf_append(buf, &value, 1);
-}
-
-static inline void tpm_buf_append_u16(struct tpm_buf *buf, const u16 value)
-{
- __be16 value2 = cpu_to_be16(value);
-
- tpm_buf_append(buf, (u8 *) &value2, 2);
-}
-
-static inline void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value)
-{
- __be32 value2 = cpu_to_be32(value);
-
- tpm_buf_append(buf, (u8 *) &value2, 4);
-}
-
extern struct class *tpm_class;
extern struct class *tpmrm_class;
extern dev_t tpm_devt;
@@ -429,11 +212,6 @@ static inline void tpm_add_ppi(struct tpm_chip *chip)
}
#endif
-static inline u32 tpm2_rc_value(u32 rc)
-{
- return (rc & BIT(7)) ? rc & 0xff : rc;
-}
-
int tpm2_get_timeouts(struct tpm_chip *chip);
int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx,
struct tpm_digest *digest, u16 *digest_size_ptr);
@@ -441,12 +219,6 @@ int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
struct tpm_digest *digests);
int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max);
void tpm2_flush_context(struct tpm_chip *chip, u32 handle);
-int tpm2_seal_trusted(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options);
-int tpm2_unseal_trusted(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options);
ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id,
u32 *value, const char *desc);
diff --git a/drivers/char/tpm/tpm1-cmd.c b/drivers/char/tpm/tpm1-cmd.c
index 149e953ca369..ca7158fa6e6c 100644
--- a/drivers/char/tpm/tpm1-cmd.c
+++ b/drivers/char/tpm/tpm1-cmd.c
@@ -343,6 +343,7 @@ int tpm1_get_timeouts(struct tpm_chip *chip)
{
cap_t cap;
unsigned long timeout_old[4], timeout_chip[4], timeout_eff[4];
+ unsigned long durations[3];
ssize_t rc;
rc = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, NULL,
@@ -427,6 +428,20 @@ int tpm1_get_timeouts(struct tpm_chip *chip)
usecs_to_jiffies(be32_to_cpu(cap.duration.tpm_long));
chip->duration[TPM_LONG_LONG] = 0; /* not used under 1.2 */
+ /*
+ * Provide the ability for vendor overrides of duration values in case
+ * of misreporting.
+ */
+ if (chip->ops->update_durations)
+ chip->ops->update_durations(chip, durations);
+
+ if (chip->duration_adjusted) {
+ dev_info(&chip->dev, HW_ERR "Adjusting reported durations.");
+ chip->duration[TPM_SHORT] = durations[0];
+ chip->duration[TPM_MEDIUM] = durations[1];
+ chip->duration[TPM_LONG] = durations[2];
+ }
+
/* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above
* value wrong and apparently reports msecs rather than usecs. So we
* fix up the resulting too-small TPM_SHORT value to make things work.
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index ba9acae83bff..fdb457704aa7 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -13,20 +13,6 @@
#include "tpm.h"
#include <crypto/hash_info.h>
-#include <keys/trusted-type.h>
-
-enum tpm2_object_attributes {
- TPM2_OA_USER_WITH_AUTH = BIT(6),
-};
-
-enum tpm2_session_attributes {
- TPM2_SA_CONTINUE_SESSION = BIT(0),
-};
-
-struct tpm2_hash {
- unsigned int crypto_id;
- unsigned int tpm_id;
-};
static struct tpm2_hash tpm2_hash_map[] = {
{HASH_ALGO_SHA1, TPM_ALG_SHA1},
@@ -377,299 +363,6 @@ void tpm2_flush_context(struct tpm_chip *chip, u32 handle)
tpm_buf_destroy(&buf);
}
-/**
- * tpm_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
- *
- * @buf: an allocated tpm_buf instance
- * @session_handle: session handle
- * @nonce: the session nonce, may be NULL if not used
- * @nonce_len: the session nonce length, may be 0 if not used
- * @attributes: the session attributes
- * @hmac: the session HMAC or password, may be NULL if not used
- * @hmac_len: the session HMAC or password length, maybe 0 if not used
- */
-static void tpm2_buf_append_auth(struct tpm_buf *buf, u32 session_handle,
- const u8 *nonce, u16 nonce_len,
- u8 attributes,
- const u8 *hmac, u16 hmac_len)
-{
- tpm_buf_append_u32(buf, 9 + nonce_len + hmac_len);
- tpm_buf_append_u32(buf, session_handle);
- tpm_buf_append_u16(buf, nonce_len);
-
- if (nonce && nonce_len)
- tpm_buf_append(buf, nonce, nonce_len);
-
- tpm_buf_append_u8(buf, attributes);
- tpm_buf_append_u16(buf, hmac_len);
-
- if (hmac && hmac_len)
- tpm_buf_append(buf, hmac, hmac_len);
-}
-
-/**
- * tpm2_seal_trusted() - seal the payload of a trusted key
- *
- * @chip: TPM chip to use
- * @payload: the key data in clear and encrypted form
- * @options: authentication values and other options
- *
- * Return: < 0 on error and 0 on success.
- */
-int tpm2_seal_trusted(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options)
-{
- unsigned int blob_len;
- struct tpm_buf buf;
- u32 hash;
- int i;
- int rc;
-
- for (i = 0; i < ARRAY_SIZE(tpm2_hash_map); i++) {
- if (options->hash == tpm2_hash_map[i].crypto_id) {
- hash = tpm2_hash_map[i].tpm_id;
- break;
- }
- }
-
- if (i == ARRAY_SIZE(tpm2_hash_map))
- return -EINVAL;
-
- rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_CREATE);
- if (rc)
- return rc;
-
- tpm_buf_append_u32(&buf, options->keyhandle);
- tpm2_buf_append_auth(&buf, TPM2_RS_PW,
- NULL /* nonce */, 0,
- 0 /* session_attributes */,
- options->keyauth /* hmac */,
- TPM_DIGEST_SIZE);
-
- /* sensitive */
- tpm_buf_append_u16(&buf, 4 + TPM_DIGEST_SIZE + payload->key_len + 1);
-
- tpm_buf_append_u16(&buf, TPM_DIGEST_SIZE);
- tpm_buf_append(&buf, options->blobauth, TPM_DIGEST_SIZE);
- tpm_buf_append_u16(&buf, payload->key_len + 1);
- tpm_buf_append(&buf, payload->key, payload->key_len);
- tpm_buf_append_u8(&buf, payload->migratable);
-
- /* public */
- tpm_buf_append_u16(&buf, 14 + options->policydigest_len);
- tpm_buf_append_u16(&buf, TPM_ALG_KEYEDHASH);
- tpm_buf_append_u16(&buf, hash);
-
- /* policy */
- if (options->policydigest_len) {
- tpm_buf_append_u32(&buf, 0);
- tpm_buf_append_u16(&buf, options->policydigest_len);
- tpm_buf_append(&buf, options->policydigest,
- options->policydigest_len);
- } else {
- tpm_buf_append_u32(&buf, TPM2_OA_USER_WITH_AUTH);
- tpm_buf_append_u16(&buf, 0);
- }
-
- /* public parameters */
- tpm_buf_append_u16(&buf, TPM_ALG_NULL);
- tpm_buf_append_u16(&buf, 0);
-
- /* outside info */
- tpm_buf_append_u16(&buf, 0);
-
- /* creation PCR */
- tpm_buf_append_u32(&buf, 0);
-
- if (buf.flags & TPM_BUF_OVERFLOW) {
- rc = -E2BIG;
- goto out;
- }
-
- rc = tpm_transmit_cmd(chip, &buf, 4, "sealing data");
- if (rc)
- goto out;
-
- blob_len = be32_to_cpup((__be32 *) &buf.data[TPM_HEADER_SIZE]);
- if (blob_len > MAX_BLOB_SIZE) {
- rc = -E2BIG;
- goto out;
- }
- if (tpm_buf_length(&buf) < TPM_HEADER_SIZE + 4 + blob_len) {
- rc = -EFAULT;
- goto out;
- }
-
- memcpy(payload->blob, &buf.data[TPM_HEADER_SIZE + 4], blob_len);
- payload->blob_len = blob_len;
-
-out:
- tpm_buf_destroy(&buf);
-
- if (rc > 0) {
- if (tpm2_rc_value(rc) == TPM2_RC_HASH)
- rc = -EINVAL;
- else
- rc = -EPERM;
- }
-
- return rc;
-}
-
-/**
- * tpm2_load_cmd() - execute a TPM2_Load command
- *
- * @chip: TPM chip to use
- * @payload: the key data in clear and encrypted form
- * @options: authentication values and other options
- * @blob_handle: returned blob handle
- *
- * Return: 0 on success.
- * -E2BIG on wrong payload size.
- * -EPERM on tpm error status.
- * < 0 error from tpm_transmit_cmd.
- */
-static int tpm2_load_cmd(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options,
- u32 *blob_handle)
-{
- struct tpm_buf buf;
- unsigned int private_len;
- unsigned int public_len;
- unsigned int blob_len;
- int rc;
-
- private_len = be16_to_cpup((__be16 *) &payload->blob[0]);
- if (private_len > (payload->blob_len - 2))
- return -E2BIG;
-
- public_len = be16_to_cpup((__be16 *) &payload->blob[2 + private_len]);
- blob_len = private_len + public_len + 4;
- if (blob_len > payload->blob_len)
- return -E2BIG;
-
- rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_LOAD);
- if (rc)
- return rc;
-
- tpm_buf_append_u32(&buf, options->keyhandle);
- tpm2_buf_append_auth(&buf, TPM2_RS_PW,
- NULL /* nonce */, 0,
- 0 /* session_attributes */,
- options->keyauth /* hmac */,
- TPM_DIGEST_SIZE);
-
- tpm_buf_append(&buf, payload->blob, blob_len);
-
- if (buf.flags & TPM_BUF_OVERFLOW) {
- rc = -E2BIG;
- goto out;
- }
-
- rc = tpm_transmit_cmd(chip, &buf, 4, "loading blob");
- if (!rc)
- *blob_handle = be32_to_cpup(
- (__be32 *) &buf.data[TPM_HEADER_SIZE]);
-
-out:
- tpm_buf_destroy(&buf);
-
- if (rc > 0)
- rc = -EPERM;
-
- return rc;
-}
-
-/**
- * tpm2_unseal_cmd() - execute a TPM2_Unload command
- *
- * @chip: TPM chip to use
- * @payload: the key data in clear and encrypted form
- * @options: authentication values and other options
- * @blob_handle: blob handle
- *
- * Return: 0 on success
- * -EPERM on tpm error status
- * < 0 error from tpm_transmit_cmd
- */
-static int tpm2_unseal_cmd(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options,
- u32 blob_handle)
-{
- struct tpm_buf buf;
- u16 data_len;
- u8 *data;
- int rc;
-
- rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_UNSEAL);
- if (rc)
- return rc;
-
- tpm_buf_append_u32(&buf, blob_handle);
- tpm2_buf_append_auth(&buf,
- options->policyhandle ?
- options->policyhandle : TPM2_RS_PW,
- NULL /* nonce */, 0,
- TPM2_SA_CONTINUE_SESSION,
- options->blobauth /* hmac */,
- TPM_DIGEST_SIZE);
-
- rc = tpm_transmit_cmd(chip, &buf, 6, "unsealing");
- if (rc > 0)
- rc = -EPERM;
-
- if (!rc) {
- data_len = be16_to_cpup(
- (__be16 *) &buf.data[TPM_HEADER_SIZE + 4]);
- if (data_len < MIN_KEY_SIZE || data_len > MAX_KEY_SIZE + 1) {
- rc = -EFAULT;
- goto out;
- }
-
- if (tpm_buf_length(&buf) < TPM_HEADER_SIZE + 6 + data_len) {
- rc = -EFAULT;
- goto out;
- }
- data = &buf.data[TPM_HEADER_SIZE + 6];
-
- memcpy(payload->key, data, data_len - 1);
- payload->key_len = data_len - 1;
- payload->migratable = data[data_len - 1];
- }
-
-out:
- tpm_buf_destroy(&buf);
- return rc;
-}
-
-/**
- * tpm2_unseal_trusted() - unseal the payload of a trusted key
- *
- * @chip: TPM chip to use
- * @payload: the key data in clear and encrypted form
- * @options: authentication values and other options
- *
- * Return: Same as with tpm_transmit_cmd.
- */
-int tpm2_unseal_trusted(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options)
-{
- u32 blob_handle;
- int rc;
-
- rc = tpm2_load_cmd(chip, payload, options, &blob_handle);
- if (rc)
- return rc;
-
- rc = tpm2_unseal_cmd(chip, payload, options, blob_handle);
- tpm2_flush_context(chip, blob_handle);
- return rc;
-}
-
struct tpm2_get_cap_out {
u8 more_data;
__be32 subcap_id;
@@ -939,6 +632,10 @@ static int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
chip->cc_attrs_tbl = devm_kcalloc(&chip->dev, 4, nr_commands,
GFP_KERNEL);
+ if (!chip->cc_attrs_tbl) {
+ rc = -ENOMEM;
+ goto out;
+ }
rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_GET_CAPABILITY);
if (rc)
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index e59f1f91d7f3..a9dcf31eadd2 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -22,6 +22,7 @@
#include "tpm.h"
#define ACPI_SIG_TPM2 "TPM2"
+#define TPM_CRB_MAX_RESOURCES 3
static const guid_t crb_acpi_start_guid =
GUID_INIT(0x6BBF6CAB, 0x5463, 0x4714,
@@ -91,7 +92,6 @@ enum crb_status {
struct crb_priv {
u32 sm;
const char *hid;
- void __iomem *iobase;
struct crb_regs_head __iomem *regs_h;
struct crb_regs_tail __iomem *regs_t;
u8 __iomem *cmd;
@@ -434,21 +434,27 @@ static const struct tpm_class_ops tpm_crb = {
static int crb_check_resource(struct acpi_resource *ares, void *data)
{
- struct resource *io_res = data;
+ struct resource *iores_array = data;
struct resource_win win;
struct resource *res = &(win.res);
+ int i;
if (acpi_dev_resource_memory(ares, res) ||
acpi_dev_resource_address_space(ares, &win)) {
- *io_res = *res;
- io_res->name = NULL;
+ for (i = 0; i < TPM_CRB_MAX_RESOURCES + 1; ++i) {
+ if (resource_type(iores_array + i) != IORESOURCE_MEM) {
+ iores_array[i] = *res;
+ iores_array[i].name = NULL;
+ break;
+ }
+ }
}
return 1;
}
-static void __iomem *crb_map_res(struct device *dev, struct crb_priv *priv,
- struct resource *io_res, u64 start, u32 size)
+static void __iomem *crb_map_res(struct device *dev, struct resource *iores,
+ void __iomem **iobase_ptr, u64 start, u32 size)
{
struct resource new_res = {
.start = start,
@@ -460,10 +466,16 @@ static void __iomem *crb_map_res(struct device *dev, struct crb_priv *priv,
if (start != new_res.start)
return (void __iomem *) ERR_PTR(-EINVAL);
- if (!resource_contains(io_res, &new_res))
+ if (!iores)
return devm_ioremap_resource(dev, &new_res);
- return priv->iobase + (new_res.start - io_res->start);
+ if (!*iobase_ptr) {
+ *iobase_ptr = devm_ioremap_resource(dev, iores);
+ if (IS_ERR(*iobase_ptr))
+ return *iobase_ptr;
+ }
+
+ return *iobase_ptr + (new_res.start - iores->start);
}
/*
@@ -490,9 +502,13 @@ static u64 crb_fixup_cmd_size(struct device *dev, struct resource *io_res,
static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
struct acpi_table_tpm2 *buf)
{
- struct list_head resources;
- struct resource io_res;
+ struct list_head acpi_resource_list;
+ struct resource iores_array[TPM_CRB_MAX_RESOURCES + 1] = { {0} };
+ void __iomem *iobase_array[TPM_CRB_MAX_RESOURCES] = {NULL};
struct device *dev = &device->dev;
+ struct resource *iores;
+ void __iomem **iobase_ptr;
+ int i;
u32 pa_high, pa_low;
u64 cmd_pa;
u32 cmd_size;
@@ -501,21 +517,41 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
u32 rsp_size;
int ret;
- INIT_LIST_HEAD(&resources);
- ret = acpi_dev_get_resources(device, &resources, crb_check_resource,
- &io_res);
+ INIT_LIST_HEAD(&acpi_resource_list);
+ ret = acpi_dev_get_resources(device, &acpi_resource_list,
+ crb_check_resource, iores_array);
if (ret < 0)
return ret;
- acpi_dev_free_resource_list(&resources);
+ acpi_dev_free_resource_list(&acpi_resource_list);
- if (resource_type(&io_res) != IORESOURCE_MEM) {
+ if (resource_type(iores_array) != IORESOURCE_MEM) {
dev_err(dev, FW_BUG "TPM2 ACPI table does not define a memory resource\n");
return -EINVAL;
+ } else if (resource_type(iores_array + TPM_CRB_MAX_RESOURCES) ==
+ IORESOURCE_MEM) {
+ dev_warn(dev, "TPM2 ACPI table defines too many memory resources\n");
+ memset(iores_array + TPM_CRB_MAX_RESOURCES,
+ 0, sizeof(*iores_array));
+ iores_array[TPM_CRB_MAX_RESOURCES].flags = 0;
}
- priv->iobase = devm_ioremap_resource(dev, &io_res);
- if (IS_ERR(priv->iobase))
- return PTR_ERR(priv->iobase);
+ iores = NULL;
+ iobase_ptr = NULL;
+ for (i = 0; resource_type(iores_array + i) == IORESOURCE_MEM; ++i) {
+ if (buf->control_address >= iores_array[i].start &&
+ buf->control_address + sizeof(struct crb_regs_tail) - 1 <=
+ iores_array[i].end) {
+ iores = iores_array + i;
+ iobase_ptr = iobase_array + i;
+ break;
+ }
+ }
+
+ priv->regs_t = crb_map_res(dev, iores, iobase_ptr, buf->control_address,
+ sizeof(struct crb_regs_tail));
+
+ if (IS_ERR(priv->regs_t))
+ return PTR_ERR(priv->regs_t);
/* The ACPI IO region starts at the head area and continues to include
* the control area, as one nice sane region except for some older
@@ -523,9 +559,10 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
*/
if ((priv->sm == ACPI_TPM2_COMMAND_BUFFER) ||
(priv->sm == ACPI_TPM2_MEMORY_MAPPED)) {
- if (buf->control_address == io_res.start +
+ if (iores &&
+ buf->control_address == iores->start +
sizeof(*priv->regs_h))
- priv->regs_h = priv->iobase;
+ priv->regs_h = *iobase_ptr;
else
dev_warn(dev, FW_BUG "Bad ACPI memory layout");
}
@@ -534,13 +571,6 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
if (ret)
return ret;
- priv->regs_t = crb_map_res(dev, priv, &io_res, buf->control_address,
- sizeof(struct crb_regs_tail));
- if (IS_ERR(priv->regs_t)) {
- ret = PTR_ERR(priv->regs_t);
- goto out_relinquish_locality;
- }
-
/*
* PTT HW bug w/a: wake up the device to access
* possibly not retained registers.
@@ -552,13 +582,26 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
pa_high = ioread32(&priv->regs_t->ctrl_cmd_pa_high);
pa_low = ioread32(&priv->regs_t->ctrl_cmd_pa_low);
cmd_pa = ((u64)pa_high << 32) | pa_low;
- cmd_size = crb_fixup_cmd_size(dev, &io_res, cmd_pa,
- ioread32(&priv->regs_t->ctrl_cmd_size));
+ cmd_size = ioread32(&priv->regs_t->ctrl_cmd_size);
+
+ iores = NULL;
+ iobase_ptr = NULL;
+ for (i = 0; iores_array[i].end; ++i) {
+ if (cmd_pa >= iores_array[i].start &&
+ cmd_pa <= iores_array[i].end) {
+ iores = iores_array + i;
+ iobase_ptr = iobase_array + i;
+ break;
+ }
+ }
+
+ if (iores)
+ cmd_size = crb_fixup_cmd_size(dev, iores, cmd_pa, cmd_size);
dev_dbg(dev, "cmd_hi = %X cmd_low = %X cmd_size %X\n",
pa_high, pa_low, cmd_size);
- priv->cmd = crb_map_res(dev, priv, &io_res, cmd_pa, cmd_size);
+ priv->cmd = crb_map_res(dev, iores, iobase_ptr, cmd_pa, cmd_size);
if (IS_ERR(priv->cmd)) {
ret = PTR_ERR(priv->cmd);
goto out;
@@ -566,11 +609,25 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
memcpy_fromio(&__rsp_pa, &priv->regs_t->ctrl_rsp_pa, 8);
rsp_pa = le64_to_cpu(__rsp_pa);
- rsp_size = crb_fixup_cmd_size(dev, &io_res, rsp_pa,
- ioread32(&priv->regs_t->ctrl_rsp_size));
+ rsp_size = ioread32(&priv->regs_t->ctrl_rsp_size);
+
+ iores = NULL;
+ iobase_ptr = NULL;
+ for (i = 0; resource_type(iores_array + i) == IORESOURCE_MEM; ++i) {
+ if (rsp_pa >= iores_array[i].start &&
+ rsp_pa <= iores_array[i].end) {
+ iores = iores_array + i;
+ iobase_ptr = iobase_array + i;
+ break;
+ }
+ }
+
+ if (iores)
+ rsp_size = crb_fixup_cmd_size(dev, iores, rsp_pa, rsp_size);
if (cmd_pa != rsp_pa) {
- priv->rsp = crb_map_res(dev, priv, &io_res, rsp_pa, rsp_size);
+ priv->rsp = crb_map_res(dev, iores, iobase_ptr,
+ rsp_pa, rsp_size);
ret = PTR_ERR_OR_ZERO(priv->rsp);
goto out;
}
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index e4fdde93ed4c..e7df342a317d 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -286,7 +286,7 @@ static int tpm_tis_plat_probe(struct platform_device *pdev)
}
tpm_info.res = *res;
- tpm_info.irq = platform_get_irq(pdev, 0);
+ tpm_info.irq = platform_get_irq_optional(pdev, 0);
if (tpm_info.irq <= 0) {
if (pdev != force_pdev)
tpm_info.irq = -1;
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 270f43acbb77..8af2cee1a762 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -506,6 +506,84 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
return rc;
}
+struct tis_vendor_durations_override {
+ u32 did_vid;
+ struct tpm1_version version;
+ unsigned long durations[3];
+};
+
+static const struct tis_vendor_durations_override vendor_dur_overrides[] = {
+ /* STMicroelectronics 0x104a */
+ { 0x0000104a,
+ { 1, 2, 8, 28 },
+ { (2 * 60 * HZ), (2 * 60 * HZ), (2 * 60 * HZ) } },
+};
+
+static void tpm_tis_update_durations(struct tpm_chip *chip,
+ unsigned long *duration_cap)
+{
+ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+ struct tpm1_version *version;
+ u32 did_vid;
+ int i, rc;
+ cap_t cap;
+
+ chip->duration_adjusted = false;
+
+ if (chip->ops->clk_enable != NULL)
+ chip->ops->clk_enable(chip, true);
+
+ rc = tpm_tis_read32(priv, TPM_DID_VID(0), &did_vid);
+ if (rc < 0) {
+ dev_warn(&chip->dev, "%s: failed to read did_vid. %d\n",
+ __func__, rc);
+ goto out;
+ }
+
+ /* Try to get a TPM version 1.2 or 1.1 TPM_CAP_VERSION_INFO */
+ rc = tpm1_getcap(chip, TPM_CAP_VERSION_1_2, &cap,
+ "attempting to determine the 1.2 version",
+ sizeof(cap.version2));
+ if (!rc) {
+ version = &cap.version2.version;
+ } else {
+ rc = tpm1_getcap(chip, TPM_CAP_VERSION_1_1, &cap,
+ "attempting to determine the 1.1 version",
+ sizeof(cap.version1));
+
+ if (rc)
+ goto out;
+
+ version = &cap.version1;
+ }
+
+ for (i = 0; i != ARRAY_SIZE(vendor_dur_overrides); i++) {
+ if (vendor_dur_overrides[i].did_vid != did_vid)
+ continue;
+
+ if ((version->major ==
+ vendor_dur_overrides[i].version.major) &&
+ (version->minor ==
+ vendor_dur_overrides[i].version.minor) &&
+ (version->rev_major ==
+ vendor_dur_overrides[i].version.rev_major) &&
+ (version->rev_minor ==
+ vendor_dur_overrides[i].version.rev_minor)) {
+
+ memcpy(duration_cap,
+ vendor_dur_overrides[i].durations,
+ sizeof(vendor_dur_overrides[i].durations));
+
+ chip->duration_adjusted = true;
+ goto out;
+ }
+ }
+
+out:
+ if (chip->ops->clk_enable != NULL)
+ chip->ops->clk_enable(chip, false);
+}
+
struct tis_vendor_timeout_override {
u32 did_vid;
unsigned long timeout_us[4];
@@ -842,6 +920,7 @@ static const struct tpm_class_ops tpm_tis = {
.send = tpm_tis_send,
.cancel = tpm_tis_ready,
.update_timeouts = tpm_tis_update_timeouts,
+ .update_durations = tpm_tis_update_durations,
.req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
.req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID,
.req_canceled = tpm_tis_req_canceled,
diff --git a/drivers/char/tpm/tpm_tis_spi.c b/drivers/char/tpm/tpm_tis_spi.c
index 19513e622053..d1754fd6c573 100644
--- a/drivers/char/tpm/tpm_tis_spi.c
+++ b/drivers/char/tpm/tpm_tis_spi.c
@@ -20,42 +20,64 @@
* Dorn and Kyleen Hall and Jarko Sakkinnen.
*/
+#include <linux/acpi.h>
+#include <linux/completion.h>
#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/slab.h>
-#include <linux/interrupt.h>
-#include <linux/wait.h>
-#include <linux/acpi.h>
-#include <linux/freezer.h>
+#include <linux/of_device.h>
#include <linux/spi/spi.h>
-#include <linux/gpio.h>
-#include <linux/of_irq.h>
-#include <linux/of_gpio.h>
#include <linux/tpm.h>
+
#include "tpm.h"
#include "tpm_tis_core.h"
+#include "tpm_tis_spi.h"
#define MAX_SPI_FRAMESIZE 64
-struct tpm_tis_spi_phy {
- struct tpm_tis_data priv;
- struct spi_device *spi_device;
- u8 *iobuf;
-};
-
-static inline struct tpm_tis_spi_phy *to_tpm_tis_spi_phy(struct tpm_tis_data *data)
+/*
+ * TCG SPI flow control is documented in section 6.4 of the spec[1]. In short,
+ * keep trying to read from the device until MISO goes high indicating the
+ * wait state has ended.
+ *
+ * [1] https://trustedcomputinggroup.org/resource/pc-client-platform-tpm-profile-ptp-specification/
+ */
+static int tpm_tis_spi_flow_control(struct tpm_tis_spi_phy *phy,
+ struct spi_transfer *spi_xfer)
{
- return container_of(data, struct tpm_tis_spi_phy, priv);
+ struct spi_message m;
+ int ret, i;
+
+ if ((phy->iobuf[3] & 0x01) == 0) {
+ // handle SPI wait states
+ phy->iobuf[0] = 0;
+
+ for (i = 0; i < TPM_RETRY; i++) {
+ spi_xfer->len = 1;
+ spi_message_init(&m);
+ spi_message_add_tail(spi_xfer, &m);
+ ret = spi_sync_locked(phy->spi_device, &m);
+ if (ret < 0)
+ return ret;
+ if (phy->iobuf[0] & 0x01)
+ break;
+ }
+
+ if (i == TPM_RETRY)
+ return -ETIMEDOUT;
+ }
+
+ return 0;
}
-static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
- u8 *in, const u8 *out)
+int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
+ u8 *in, const u8 *out)
{
struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
int ret = 0;
- int i;
struct spi_message m;
struct spi_transfer spi_xfer;
u8 transfer_len;
@@ -82,26 +104,9 @@ static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
if (ret < 0)
goto exit;
- if ((phy->iobuf[3] & 0x01) == 0) {
- // handle SPI wait states
- phy->iobuf[0] = 0;
-
- for (i = 0; i < TPM_RETRY; i++) {
- spi_xfer.len = 1;
- spi_message_init(&m);
- spi_message_add_tail(&spi_xfer, &m);
- ret = spi_sync_locked(phy->spi_device, &m);
- if (ret < 0)
- goto exit;
- if (phy->iobuf[0] & 0x01)
- break;
- }
-
- if (i == TPM_RETRY) {
- ret = -ETIMEDOUT;
- goto exit;
- }
- }
+ ret = phy->flow_control(phy, &spi_xfer);
+ if (ret < 0)
+ goto exit;
spi_xfer.cs_change = 0;
spi_xfer.len = transfer_len;
@@ -117,6 +122,7 @@ static int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
spi_message_init(&m);
spi_message_add_tail(&spi_xfer, &m);
+ reinit_completion(&phy->ready);
ret = spi_sync_locked(phy->spi_device, &m);
if (ret < 0)
goto exit;
@@ -146,7 +152,7 @@ static int tpm_tis_spi_write_bytes(struct tpm_tis_data *data, u32 addr,
return tpm_tis_spi_transfer(data, addr, len, NULL, value);
}
-static int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result)
+int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result)
{
__le16 result_le;
int rc;
@@ -159,7 +165,7 @@ static int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result)
return rc;
}
-static int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result)
+int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result)
{
__le32 result_le;
int rc;
@@ -172,7 +178,7 @@ static int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result)
return rc;
}
-static int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value)
+int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value)
{
__le32 value_le;
int rc;
@@ -184,6 +190,18 @@ static int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value)
return rc;
}
+int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy,
+ int irq, const struct tpm_tis_phy_ops *phy_ops)
+{
+ phy->iobuf = devm_kmalloc(&spi->dev, MAX_SPI_FRAMESIZE, GFP_KERNEL);
+ if (!phy->iobuf)
+ return -ENOMEM;
+
+ phy->spi_device = spi;
+
+ return tpm_tis_core_init(&spi->dev, &phy->priv, irq, phy_ops, NULL);
+}
+
static const struct tpm_tis_phy_ops tpm_spi_phy_ops = {
.read_bytes = tpm_tis_spi_read_bytes,
.write_bytes = tpm_tis_spi_write_bytes,
@@ -202,11 +220,7 @@ static int tpm_tis_spi_probe(struct spi_device *dev)
if (!phy)
return -ENOMEM;
- phy->spi_device = dev;
-
- phy->iobuf = devm_kmalloc(&dev->dev, MAX_SPI_FRAMESIZE, GFP_KERNEL);
- if (!phy->iobuf)
- return -ENOMEM;
+ phy->flow_control = tpm_tis_spi_flow_control;
/* If the SPI device has an IRQ then use that */
if (dev->irq > 0)
@@ -214,11 +228,27 @@ static int tpm_tis_spi_probe(struct spi_device *dev)
else
irq = -1;
- return tpm_tis_core_init(&dev->dev, &phy->priv, irq, &tpm_spi_phy_ops,
- NULL);
+ init_completion(&phy->ready);
+ return tpm_tis_spi_init(dev, phy, irq, &tpm_spi_phy_ops);
+}
+
+typedef int (*tpm_tis_spi_probe_func)(struct spi_device *);
+
+static int tpm_tis_spi_driver_probe(struct spi_device *spi)
+{
+ const struct spi_device_id *spi_dev_id = spi_get_device_id(spi);
+ tpm_tis_spi_probe_func probe_func;
+
+ probe_func = of_device_get_match_data(&spi->dev);
+ if (!probe_func && spi_dev_id)
+ probe_func = (tpm_tis_spi_probe_func)spi_dev_id->driver_data;
+ if (!probe_func)
+ return -ENODEV;
+
+ return probe_func(spi);
}
-static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_resume);
+static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_spi_resume);
static int tpm_tis_spi_remove(struct spi_device *dev)
{
@@ -230,15 +260,17 @@ static int tpm_tis_spi_remove(struct spi_device *dev)
}
static const struct spi_device_id tpm_tis_spi_id[] = {
- {"tpm_tis_spi", 0},
+ { "tpm_tis_spi", (unsigned long)tpm_tis_spi_probe },
+ { "cr50", (unsigned long)cr50_spi_probe },
{}
};
MODULE_DEVICE_TABLE(spi, tpm_tis_spi_id);
static const struct of_device_id of_tis_spi_match[] = {
- { .compatible = "st,st33htpm-spi", },
- { .compatible = "infineon,slb9670", },
- { .compatible = "tcg,tpm_tis-spi", },
+ { .compatible = "st,st33htpm-spi", .data = tpm_tis_spi_probe },
+ { .compatible = "infineon,slb9670", .data = tpm_tis_spi_probe },
+ { .compatible = "tcg,tpm_tis-spi", .data = tpm_tis_spi_probe },
+ { .compatible = "google,cr50", .data = cr50_spi_probe },
{}
};
MODULE_DEVICE_TABLE(of, of_tis_spi_match);
@@ -251,13 +283,12 @@ MODULE_DEVICE_TABLE(acpi, acpi_tis_spi_match);
static struct spi_driver tpm_tis_spi_driver = {
.driver = {
- .owner = THIS_MODULE,
.name = "tpm_tis_spi",
.pm = &tpm_tis_pm,
.of_match_table = of_match_ptr(of_tis_spi_match),
.acpi_match_table = ACPI_PTR(acpi_tis_spi_match),
},
- .probe = tpm_tis_spi_probe,
+ .probe = tpm_tis_spi_driver_probe,
.remove = tpm_tis_spi_remove,
.id_table = tpm_tis_spi_id,
};
diff --git a/drivers/char/tpm/tpm_tis_spi.h b/drivers/char/tpm/tpm_tis_spi.h
new file mode 100644
index 000000000000..bba73979c368
--- /dev/null
+++ b/drivers/char/tpm/tpm_tis_spi.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2015 Infineon Technologies AG
+ * Copyright (C) 2016 STMicroelectronics SAS
+ */
+
+#ifndef TPM_TIS_SPI_H
+#define TPM_TIS_SPI_H
+
+#include "tpm_tis_core.h"
+
+struct tpm_tis_spi_phy {
+ struct tpm_tis_data priv;
+ struct spi_device *spi_device;
+ int (*flow_control)(struct tpm_tis_spi_phy *phy,
+ struct spi_transfer *xfer);
+ struct completion ready;
+ unsigned long wake_after;
+
+ u8 *iobuf;
+};
+
+static inline struct tpm_tis_spi_phy *to_tpm_tis_spi_phy(struct tpm_tis_data *data)
+{
+ return container_of(data, struct tpm_tis_spi_phy, priv);
+}
+
+extern int tpm_tis_spi_init(struct spi_device *spi, struct tpm_tis_spi_phy *phy,
+ int irq, const struct tpm_tis_phy_ops *phy_ops);
+
+extern int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
+ u8 *in, const u8 *out);
+
+extern int tpm_tis_spi_read16(struct tpm_tis_data *data, u32 addr, u16 *result);
+extern int tpm_tis_spi_read32(struct tpm_tis_data *data, u32 addr, u32 *result);
+extern int tpm_tis_spi_write32(struct tpm_tis_data *data, u32 addr, u32 value);
+
+#ifdef CONFIG_TCG_TIS_SPI_CR50
+extern int cr50_spi_probe(struct spi_device *spi);
+#else
+static inline int cr50_spi_probe(struct spi_device *spi)
+{
+ return -ENODEV;
+}
+#endif
+
+#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_TCG_TIS_SPI_CR50)
+extern int tpm_tis_spi_resume(struct device *dev);
+#else
+#define tpm_tis_spi_resume NULL
+#endif
+
+#endif
diff --git a/drivers/char/tpm/tpm_tis_spi_cr50.c b/drivers/char/tpm/tpm_tis_spi_cr50.c
new file mode 100644
index 000000000000..37d72e818335
--- /dev/null
+++ b/drivers/char/tpm/tpm_tis_spi_cr50.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This device driver implements a TCG PTP FIFO interface over SPI for chips
+ * with Cr50 firmware.
+ * It is based on tpm_tis_spi driver by Peter Huewe and Christophe Ricard.
+ */
+
+#include <linux/completion.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pm.h>
+#include <linux/spi/spi.h>
+#include <linux/wait.h>
+
+#include "tpm_tis_core.h"
+#include "tpm_tis_spi.h"
+
+/*
+ * Cr50 timing constants:
+ * - can go to sleep not earlier than after CR50_SLEEP_DELAY_MSEC.
+ * - needs up to CR50_WAKE_START_DELAY_USEC to wake after sleep.
+ * - requires waiting for "ready" IRQ, if supported; or waiting for at least
+ * CR50_NOIRQ_ACCESS_DELAY_MSEC between transactions, if IRQ is not supported.
+ * - waits for up to CR50_FLOW_CONTROL for flow control 'ready' indication.
+ */
+#define CR50_SLEEP_DELAY_MSEC 1000
+#define CR50_WAKE_START_DELAY_USEC 1000
+#define CR50_NOIRQ_ACCESS_DELAY msecs_to_jiffies(2)
+#define CR50_READY_IRQ_TIMEOUT msecs_to_jiffies(TPM2_TIMEOUT_A)
+#define CR50_FLOW_CONTROL msecs_to_jiffies(TPM2_TIMEOUT_A)
+#define MAX_IRQ_CONFIRMATION_ATTEMPTS 3
+
+#define TPM_CR50_FW_VER(l) (0x0f90 | ((l) << 12))
+#define TPM_CR50_MAX_FW_VER_LEN 64
+
+struct cr50_spi_phy {
+ struct tpm_tis_spi_phy spi_phy;
+
+ struct mutex time_track_mutex;
+ unsigned long last_access;
+
+ unsigned long access_delay;
+
+ unsigned int irq_confirmation_attempt;
+ bool irq_needs_confirmation;
+ bool irq_confirmed;
+};
+
+static inline struct cr50_spi_phy *to_cr50_spi_phy(struct tpm_tis_spi_phy *phy)
+{
+ return container_of(phy, struct cr50_spi_phy, spi_phy);
+}
+
+/*
+ * The cr50 interrupt handler just signals waiting threads that the
+ * interrupt was asserted. It does not do any processing triggered
+ * by interrupts but is instead used to avoid fixed delays.
+ */
+static irqreturn_t cr50_spi_irq_handler(int dummy, void *dev_id)
+{
+ struct cr50_spi_phy *cr50_phy = dev_id;
+
+ cr50_phy->irq_confirmed = true;
+ complete(&cr50_phy->spi_phy.ready);
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Cr50 needs to have at least some delay between consecutive
+ * transactions. Make sure we wait.
+ */
+static void cr50_ensure_access_delay(struct cr50_spi_phy *phy)
+{
+ unsigned long allowed_access = phy->last_access + phy->access_delay;
+ unsigned long time_now = jiffies;
+ struct device *dev = &phy->spi_phy.spi_device->dev;
+
+ /*
+ * Note: There is a small chance, if Cr50 is not accessed in a few days,
+ * that time_in_range will not provide the correct result after the wrap
+ * around for jiffies. In this case, we'll have an unneeded short delay,
+ * which is fine.
+ */
+ if (time_in_range_open(time_now, phy->last_access, allowed_access)) {
+ unsigned long remaining, timeout = allowed_access - time_now;
+
+ remaining = wait_for_completion_timeout(&phy->spi_phy.ready,
+ timeout);
+ if (!remaining && phy->irq_confirmed)
+ dev_warn(dev, "Timeout waiting for TPM ready IRQ\n");
+ }
+
+ if (phy->irq_needs_confirmation) {
+ unsigned int attempt = ++phy->irq_confirmation_attempt;
+
+ if (phy->irq_confirmed) {
+ phy->irq_needs_confirmation = false;
+ phy->access_delay = CR50_READY_IRQ_TIMEOUT;
+ dev_info(dev, "TPM ready IRQ confirmed on attempt %u\n",
+ attempt);
+ } else if (attempt > MAX_IRQ_CONFIRMATION_ATTEMPTS) {
+ phy->irq_needs_confirmation = false;
+ dev_warn(dev, "IRQ not confirmed - will use delays\n");
+ }
+ }
+}
+
+/*
+ * Cr50 might go to sleep if there is no SPI activity for some time and
+ * miss the first few bits/bytes on the bus. In such case, wake it up
+ * by asserting CS and give it time to start up.
+ */
+static bool cr50_needs_waking(struct cr50_spi_phy *phy)
+{
+ /*
+ * Note: There is a small chance, if Cr50 is not accessed in a few days,
+ * that time_in_range will not provide the correct result after the wrap
+ * around for jiffies. In this case, we'll probably timeout or read
+ * incorrect value from TPM_STS and just retry the operation.
+ */
+ return !time_in_range_open(jiffies, phy->last_access,
+ phy->spi_phy.wake_after);
+}
+
+static void cr50_wake_if_needed(struct cr50_spi_phy *cr50_phy)
+{
+ struct tpm_tis_spi_phy *phy = &cr50_phy->spi_phy;
+
+ if (cr50_needs_waking(cr50_phy)) {
+ /* Assert CS, wait 1 msec, deassert CS */
+ struct spi_transfer spi_cs_wake = { .delay_usecs = 1000 };
+
+ spi_sync_transfer(phy->spi_device, &spi_cs_wake, 1);
+ /* Wait for it to fully wake */
+ usleep_range(CR50_WAKE_START_DELAY_USEC,
+ CR50_WAKE_START_DELAY_USEC * 2);
+ }
+
+ /* Reset the time when we need to wake Cr50 again */
+ phy->wake_after = jiffies + msecs_to_jiffies(CR50_SLEEP_DELAY_MSEC);
+}
+
+/*
+ * Flow control: clock the bus and wait for cr50 to set LSB before
+ * sending/receiving data. TCG PTP spec allows it to happen during
+ * the last byte of header, but cr50 never does that in practice,
+ * and earlier versions had a bug when it was set too early, so don't
+ * check for it during header transfer.
+ */
+static int cr50_spi_flow_control(struct tpm_tis_spi_phy *phy,
+ struct spi_transfer *spi_xfer)
+{
+ struct device *dev = &phy->spi_device->dev;
+ unsigned long timeout = jiffies + CR50_FLOW_CONTROL;
+ struct spi_message m;
+ int ret;
+
+ spi_xfer->len = 1;
+
+ do {
+ spi_message_init(&m);
+ spi_message_add_tail(spi_xfer, &m);
+ ret = spi_sync_locked(phy->spi_device, &m);
+ if (ret < 0)
+ return ret;
+
+ if (time_after(jiffies, timeout)) {
+ dev_warn(dev, "Timeout during flow control\n");
+ return -EBUSY;
+ }
+ } while (!(phy->iobuf[0] & 0x01));
+
+ return 0;
+}
+
+static int tpm_tis_spi_cr50_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
+ u8 *in, const u8 *out)
+{
+ struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
+ struct cr50_spi_phy *cr50_phy = to_cr50_spi_phy(phy);
+ int ret;
+
+ mutex_lock(&cr50_phy->time_track_mutex);
+ /*
+ * Do this outside of spi_bus_lock in case cr50 is not the
+ * only device on that spi bus.
+ */
+ cr50_ensure_access_delay(cr50_phy);
+ cr50_wake_if_needed(cr50_phy);
+
+ ret = tpm_tis_spi_transfer(data, addr, len, in, out);
+
+ cr50_phy->last_access = jiffies;
+ mutex_unlock(&cr50_phy->time_track_mutex);
+
+ return ret;
+}
+
+static int tpm_tis_spi_cr50_read_bytes(struct tpm_tis_data *data, u32 addr,
+ u16 len, u8 *result)
+{
+ return tpm_tis_spi_cr50_transfer(data, addr, len, result, NULL);
+}
+
+static int tpm_tis_spi_cr50_write_bytes(struct tpm_tis_data *data, u32 addr,
+ u16 len, const u8 *value)
+{
+ return tpm_tis_spi_cr50_transfer(data, addr, len, NULL, value);
+}
+
+static const struct tpm_tis_phy_ops tpm_spi_cr50_phy_ops = {
+ .read_bytes = tpm_tis_spi_cr50_read_bytes,
+ .write_bytes = tpm_tis_spi_cr50_write_bytes,
+ .read16 = tpm_tis_spi_read16,
+ .read32 = tpm_tis_spi_read32,
+ .write32 = tpm_tis_spi_write32,
+};
+
+static void cr50_print_fw_version(struct tpm_tis_data *data)
+{
+ struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
+ int i, len = 0;
+ char fw_ver[TPM_CR50_MAX_FW_VER_LEN + 1];
+ char fw_ver_block[4];
+
+ /*
+ * Write anything to TPM_CR50_FW_VER to start from the beginning
+ * of the version string
+ */
+ tpm_tis_write8(data, TPM_CR50_FW_VER(data->locality), 0);
+
+ /* Read the string, 4 bytes at a time, until we get '\0' */
+ do {
+ tpm_tis_read_bytes(data, TPM_CR50_FW_VER(data->locality), 4,
+ fw_ver_block);
+ for (i = 0; i < 4 && fw_ver_block[i]; ++len, ++i)
+ fw_ver[len] = fw_ver_block[i];
+ } while (i == 4 && len < TPM_CR50_MAX_FW_VER_LEN);
+ fw_ver[len] = '\0';
+
+ dev_info(&phy->spi_device->dev, "Cr50 firmware version: %s\n", fw_ver);
+}
+
+int cr50_spi_probe(struct spi_device *spi)
+{
+ struct tpm_tis_spi_phy *phy;
+ struct cr50_spi_phy *cr50_phy;
+ int ret;
+ struct tpm_chip *chip;
+
+ cr50_phy = devm_kzalloc(&spi->dev, sizeof(*cr50_phy), GFP_KERNEL);
+ if (!cr50_phy)
+ return -ENOMEM;
+
+ phy = &cr50_phy->spi_phy;
+ phy->flow_control = cr50_spi_flow_control;
+ phy->wake_after = jiffies;
+ init_completion(&phy->ready);
+
+ cr50_phy->access_delay = CR50_NOIRQ_ACCESS_DELAY;
+ cr50_phy->last_access = jiffies;
+ mutex_init(&cr50_phy->time_track_mutex);
+
+ if (spi->irq > 0) {
+ ret = devm_request_irq(&spi->dev, spi->irq,
+ cr50_spi_irq_handler,
+ IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+ "cr50_spi", cr50_phy);
+ if (ret < 0) {
+ if (ret == -EPROBE_DEFER)
+ return ret;
+ dev_warn(&spi->dev, "Requesting IRQ %d failed: %d\n",
+ spi->irq, ret);
+ /*
+ * This is not fatal, the driver will fall back to
+ * delays automatically, since ready will never
+ * be completed without a registered irq handler.
+ * So, just fall through.
+ */
+ } else {
+ /*
+ * IRQ requested, let's verify that it is actually
+ * triggered, before relying on it.
+ */
+ cr50_phy->irq_needs_confirmation = true;
+ }
+ } else {
+ dev_warn(&spi->dev,
+ "No IRQ - will use delays between transactions.\n");
+ }
+
+ ret = tpm_tis_spi_init(spi, phy, -1, &tpm_spi_cr50_phy_ops);
+ if (ret)
+ return ret;
+
+ cr50_print_fw_version(&phy->priv);
+
+ chip = dev_get_drvdata(&spi->dev);
+ chip->flags |= TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED;
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+int tpm_tis_spi_resume(struct device *dev)
+{
+ struct tpm_chip *chip = dev_get_drvdata(dev);
+ struct tpm_tis_data *data = dev_get_drvdata(&chip->dev);
+ struct tpm_tis_spi_phy *phy = to_tpm_tis_spi_phy(data);
+ /*
+ * Jiffies not increased during suspend, so we need to reset
+ * the time to wake Cr50 after resume.
+ */
+ phy->wake_after = jiffies;
+
+ return tpm_tis_resume(dev);
+}
+#endif
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 7270e7b69262..3259426f01dc 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1325,24 +1325,24 @@ static void set_console_size(struct port *port, u16 rows, u16 cols)
port->cons.ws.ws_col = cols;
}
-static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock)
+static int fill_queue(struct virtqueue *vq, spinlock_t *lock)
{
struct port_buffer *buf;
- unsigned int nr_added_bufs;
+ int nr_added_bufs;
int ret;
nr_added_bufs = 0;
do {
buf = alloc_buf(vq->vdev, PAGE_SIZE, 0);
if (!buf)
- break;
+ return -ENOMEM;
spin_lock_irq(lock);
ret = add_inbuf(vq, buf);
if (ret < 0) {
spin_unlock_irq(lock);
free_buf(buf, true);
- break;
+ return ret;
}
nr_added_bufs++;
spin_unlock_irq(lock);
@@ -1362,7 +1362,6 @@ static int add_port(struct ports_device *portdev, u32 id)
char debugfs_name[16];
struct port *port;
dev_t devt;
- unsigned int nr_added_bufs;
int err;
port = kmalloc(sizeof(*port), GFP_KERNEL);
@@ -1421,11 +1420,13 @@ static int add_port(struct ports_device *portdev, u32 id)
spin_lock_init(&port->outvq_lock);
init_waitqueue_head(&port->waitqueue);
- /* Fill the in_vq with buffers so the host can send us data. */
- nr_added_bufs = fill_queue(port->in_vq, &port->inbuf_lock);
- if (!nr_added_bufs) {
+ /* We can safely ignore ENOSPC because it means
+ * the queue already has buffers. Buffers are removed
+ * only by virtcons_remove(), not by unplug_port()
+ */
+ err = fill_queue(port->in_vq, &port->inbuf_lock);
+ if (err < 0 && err != -ENOSPC) {
dev_err(port->dev, "Error allocating inbufs\n");
- err = -ENOMEM;
goto free_device;
}
@@ -2059,14 +2060,11 @@ static int virtcons_probe(struct virtio_device *vdev)
INIT_WORK(&portdev->control_work, &control_work_handler);
if (multiport) {
- unsigned int nr_added_bufs;
-
spin_lock_init(&portdev->c_ivq_lock);
spin_lock_init(&portdev->c_ovq_lock);
- nr_added_bufs = fill_queue(portdev->c_ivq,
- &portdev->c_ivq_lock);
- if (!nr_added_bufs) {
+ err = fill_queue(portdev->c_ivq, &portdev->c_ivq_lock);
+ if (err < 0) {
dev_err(&vdev->dev,
"Error allocating buffers for control queue\n");
/*
@@ -2077,7 +2075,7 @@ static int virtcons_probe(struct virtio_device *vdev)
VIRTIO_CONSOLE_DEVICE_READY, 0);
/* Device was functional: we need full cleanup. */
virtcons_remove(vdev);
- return -ENOMEM;
+ return err;
}
} else {
/*
diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c
index 87083b3a2769..37c22667e831 100644
--- a/drivers/clk/at91/clk-main.c
+++ b/drivers/clk/at91/clk-main.c
@@ -297,7 +297,10 @@ static int clk_main_probe_frequency(struct regmap *regmap)
regmap_read(regmap, AT91_CKGR_MCFR, &mcfr);
if (mcfr & AT91_PMC_MAINRDY)
return 0;
- usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT);
+ if (system_state < SYSTEM_RUNNING)
+ udelay(MAINF_LOOP_MIN_WAIT);
+ else
+ usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT);
} while (time_before(prep_time, timeout));
return -ETIMEDOUT;
diff --git a/drivers/clk/at91/sam9x60.c b/drivers/clk/at91/sam9x60.c
index 9790ddfa5b3c..86238d5ecb4d 100644
--- a/drivers/clk/at91/sam9x60.c
+++ b/drivers/clk/at91/sam9x60.c
@@ -43,6 +43,7 @@ static const struct clk_pll_characteristics upll_characteristics = {
};
static const struct clk_programmable_layout sam9x60_programmable_layout = {
+ .pres_mask = 0xff,
.pres_shift = 8,
.css_mask = 0x1f,
.have_slck_mck = 0,
diff --git a/drivers/clk/at91/sckc.c b/drivers/clk/at91/sckc.c
index 9bfe9a28294a..fac0ca56d42d 100644
--- a/drivers/clk/at91/sckc.c
+++ b/drivers/clk/at91/sckc.c
@@ -76,7 +76,10 @@ static int clk_slow_osc_prepare(struct clk_hw *hw)
writel(tmp | osc->bits->cr_osc32en, sckcr);
- usleep_range(osc->startup_usec, osc->startup_usec + 1);
+ if (system_state < SYSTEM_RUNNING)
+ udelay(osc->startup_usec);
+ else
+ usleep_range(osc->startup_usec, osc->startup_usec + 1);
return 0;
}
@@ -187,7 +190,10 @@ static int clk_slow_rc_osc_prepare(struct clk_hw *hw)
writel(readl(sckcr) | osc->bits->cr_rcen, sckcr);
- usleep_range(osc->startup_usec, osc->startup_usec + 1);
+ if (system_state < SYSTEM_RUNNING)
+ udelay(osc->startup_usec);
+ else
+ usleep_range(osc->startup_usec, osc->startup_usec + 1);
return 0;
}
@@ -288,7 +294,10 @@ static int clk_sam9x5_slow_set_parent(struct clk_hw *hw, u8 index)
writel(tmp, sckcr);
- usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1);
+ if (system_state < SYSTEM_RUNNING)
+ udelay(SLOWCK_SW_TIME_USEC);
+ else
+ usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1);
return 0;
}
@@ -533,7 +542,10 @@ static int clk_sama5d4_slow_osc_prepare(struct clk_hw *hw)
return 0;
}
- usleep_range(osc->startup_usec, osc->startup_usec + 1);
+ if (system_state < SYSTEM_RUNNING)
+ udelay(osc->startup_usec);
+ else
+ usleep_range(osc->startup_usec, osc->startup_usec + 1);
osc->prepared = true;
return 0;
diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c
index 1c1bb39bb04e..b1318e6b655b 100644
--- a/drivers/clk/clk-ast2600.c
+++ b/drivers/clk/clk-ast2600.c
@@ -266,10 +266,11 @@ static int aspeed_g6_clk_enable(struct clk_hw *hw)
/* Enable clock */
if (gate->flags & CLK_GATE_SET_TO_DISABLE) {
- regmap_write(gate->map, get_clock_reg(gate), clk);
- } else {
- /* Use set to clear register */
+ /* Clock is clear to enable, so use set to clear register */
regmap_write(gate->map, get_clock_reg(gate) + 0x04, clk);
+ } else {
+ /* Clock is set to enable, so use write to set register */
+ regmap_write(gate->map, get_clock_reg(gate), clk);
}
if (gate->reset_idx >= 0) {
diff --git a/drivers/clk/imx/clk-imx8mm.c b/drivers/clk/imx/clk-imx8mm.c
index 067ab876911d..172589e94f60 100644
--- a/drivers/clk/imx/clk-imx8mm.c
+++ b/drivers/clk/imx/clk-imx8mm.c
@@ -638,7 +638,7 @@ static int imx8mm_clocks_probe(struct platform_device *pdev)
clks[IMX8MM_CLK_A53_DIV],
clks[IMX8MM_CLK_A53_SRC],
clks[IMX8MM_ARM_PLL_OUT],
- clks[IMX8MM_CLK_24M]);
+ clks[IMX8MM_SYS_PLL1_800M]);
imx_check_clocks(clks, ARRAY_SIZE(clks));
diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c
index 47a4b44ba3cb..58b5acee3830 100644
--- a/drivers/clk/imx/clk-imx8mn.c
+++ b/drivers/clk/imx/clk-imx8mn.c
@@ -610,7 +610,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev)
clks[IMX8MN_CLK_A53_DIV],
clks[IMX8MN_CLK_A53_SRC],
clks[IMX8MN_ARM_PLL_OUT],
- clks[IMX8MN_CLK_24M]);
+ clks[IMX8MN_SYS_PLL1_800M]);
imx_check_clocks(clks, ARRAY_SIZE(clks));
diff --git a/drivers/clk/meson/g12a.c b/drivers/clk/meson/g12a.c
index ea4c791f106d..b3af61cc6fb9 100644
--- a/drivers/clk/meson/g12a.c
+++ b/drivers/clk/meson/g12a.c
@@ -343,6 +343,7 @@ static struct clk_regmap g12a_cpu_clk_premux0 = {
.offset = HHI_SYS_CPU_CLK_CNTL0,
.mask = 0x3,
.shift = 0,
+ .flags = CLK_MUX_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "cpu_clk_dyn0_sel",
@@ -353,8 +354,7 @@ static struct clk_regmap g12a_cpu_clk_premux0 = {
{ .hw = &g12a_fclk_div3.hw },
},
.num_parents = 3,
- /* This sub-tree is used a parking clock */
- .flags = CLK_SET_RATE_NO_REPARENT,
+ .flags = CLK_SET_RATE_PARENT,
},
};
@@ -410,6 +410,7 @@ static struct clk_regmap g12a_cpu_clk_postmux0 = {
.offset = HHI_SYS_CPU_CLK_CNTL0,
.mask = 0x1,
.shift = 2,
+ .flags = CLK_MUX_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "cpu_clk_dyn0",
@@ -466,6 +467,7 @@ static struct clk_regmap g12a_cpu_clk_dyn = {
.offset = HHI_SYS_CPU_CLK_CNTL0,
.mask = 0x1,
.shift = 10,
+ .flags = CLK_MUX_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "cpu_clk_dyn",
@@ -485,6 +487,7 @@ static struct clk_regmap g12a_cpu_clk = {
.offset = HHI_SYS_CPU_CLK_CNTL0,
.mask = 0x1,
.shift = 11,
+ .flags = CLK_MUX_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "cpu_clk",
@@ -504,6 +507,7 @@ static struct clk_regmap g12b_cpu_clk = {
.offset = HHI_SYS_CPU_CLK_CNTL0,
.mask = 0x1,
.shift = 11,
+ .flags = CLK_MUX_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "cpu_clk",
@@ -523,6 +527,7 @@ static struct clk_regmap g12b_cpub_clk_premux0 = {
.offset = HHI_SYS_CPUB_CLK_CNTL,
.mask = 0x3,
.shift = 0,
+ .flags = CLK_MUX_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "cpub_clk_dyn0_sel",
@@ -533,6 +538,7 @@ static struct clk_regmap g12b_cpub_clk_premux0 = {
{ .hw = &g12a_fclk_div3.hw },
},
.num_parents = 3,
+ .flags = CLK_SET_RATE_PARENT,
},
};
@@ -567,6 +573,7 @@ static struct clk_regmap g12b_cpub_clk_postmux0 = {
.offset = HHI_SYS_CPUB_CLK_CNTL,
.mask = 0x1,
.shift = 2,
+ .flags = CLK_MUX_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "cpub_clk_dyn0",
@@ -644,6 +651,7 @@ static struct clk_regmap g12b_cpub_clk_dyn = {
.offset = HHI_SYS_CPUB_CLK_CNTL,
.mask = 0x1,
.shift = 10,
+ .flags = CLK_MUX_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "cpub_clk_dyn",
@@ -663,6 +671,7 @@ static struct clk_regmap g12b_cpub_clk = {
.offset = HHI_SYS_CPUB_CLK_CNTL,
.mask = 0x1,
.shift = 11,
+ .flags = CLK_MUX_ROUND_CLOSEST,
},
.hw.init = &(struct clk_init_data){
.name = "cpub_clk",
diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index 7cfb998eeb3e..1f9c056e684c 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -935,6 +935,7 @@ static struct clk_regmap gxbb_sar_adc_clk_div = {
&gxbb_sar_adc_clk_sel.hw
},
.num_parents = 1,
+ .flags = CLK_SET_RATE_PARENT,
},
};
diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c
index 7670cc596c74..31466cd1842f 100644
--- a/drivers/clk/samsung/clk-exynos5420.c
+++ b/drivers/clk/samsung/clk-exynos5420.c
@@ -165,12 +165,18 @@ static const unsigned long exynos5x_clk_regs[] __initconst = {
GATE_BUS_CPU,
GATE_SCLK_CPU,
CLKOUT_CMU_CPU,
+ CPLL_CON0,
+ DPLL_CON0,
EPLL_CON0,
EPLL_CON1,
EPLL_CON2,
RPLL_CON0,
RPLL_CON1,
RPLL_CON2,
+ IPLL_CON0,
+ SPLL_CON0,
+ VPLL_CON0,
+ MPLL_CON0,
SRC_TOP0,
SRC_TOP1,
SRC_TOP2,
@@ -1172,8 +1178,6 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = {
GATE(CLK_SCLK_ISP_SENSOR2, "sclk_isp_sensor2", "dout_isp_sensor2",
GATE_TOP_SCLK_ISP, 12, CLK_SET_RATE_PARENT, 0),
- GATE(CLK_G3D, "g3d", "mout_user_aclk_g3d", GATE_IP_G3D, 9, 0, 0),
-
/* CDREX */
GATE(CLK_CLKM_PHY0, "clkm_phy0", "dout_sclk_cdrex",
GATE_BUS_CDREX0, 0, 0, 0),
@@ -1248,6 +1252,15 @@ static struct exynos5_subcmu_reg_dump exynos5x_gsc_suspend_regs[] = {
{ DIV2_RATIO0, 0, 0x30 }, /* DIV dout_gscl_blk_300 */
};
+static const struct samsung_gate_clock exynos5x_g3d_gate_clks[] __initconst = {
+ GATE(CLK_G3D, "g3d", "mout_user_aclk_g3d", GATE_IP_G3D, 9, 0, 0),
+};
+
+static struct exynos5_subcmu_reg_dump exynos5x_g3d_suspend_regs[] = {
+ { GATE_IP_G3D, 0x3ff, 0x3ff }, /* G3D gates */
+ { SRC_TOP5, 0, BIT(16) }, /* MUX mout_user_aclk_g3d */
+};
+
static const struct samsung_div_clock exynos5x_mfc_div_clks[] __initconst = {
DIV(0, "dout_mfc_blk", "mout_user_aclk333", DIV4_RATIO, 0, 2),
};
@@ -1320,6 +1333,14 @@ static const struct exynos5_subcmu_info exynos5x_gsc_subcmu = {
.pd_name = "GSC",
};
+static const struct exynos5_subcmu_info exynos5x_g3d_subcmu = {
+ .gate_clks = exynos5x_g3d_gate_clks,
+ .nr_gate_clks = ARRAY_SIZE(exynos5x_g3d_gate_clks),
+ .suspend_regs = exynos5x_g3d_suspend_regs,
+ .nr_suspend_regs = ARRAY_SIZE(exynos5x_g3d_suspend_regs),
+ .pd_name = "G3D",
+};
+
static const struct exynos5_subcmu_info exynos5x_mfc_subcmu = {
.div_clks = exynos5x_mfc_div_clks,
.nr_div_clks = ARRAY_SIZE(exynos5x_mfc_div_clks),
@@ -1351,6 +1372,7 @@ static const struct exynos5_subcmu_info exynos5800_mau_subcmu = {
static const struct exynos5_subcmu_info *exynos5x_subcmus[] = {
&exynos5x_disp_subcmu,
&exynos5x_gsc_subcmu,
+ &exynos5x_g3d_subcmu,
&exynos5x_mfc_subcmu,
&exynos5x_mscl_subcmu,
};
@@ -1358,6 +1380,7 @@ static const struct exynos5_subcmu_info *exynos5x_subcmus[] = {
static const struct exynos5_subcmu_info *exynos5800_subcmus[] = {
&exynos5x_disp_subcmu,
&exynos5x_gsc_subcmu,
+ &exynos5x_g3d_subcmu,
&exynos5x_mfc_subcmu,
&exynos5x_mscl_subcmu,
&exynos5800_mau_subcmu,
diff --git a/drivers/clk/samsung/clk-exynos5433.c b/drivers/clk/samsung/clk-exynos5433.c
index 7824c2ba3d8e..4b1aa9382ad2 100644
--- a/drivers/clk/samsung/clk-exynos5433.c
+++ b/drivers/clk/samsung/clk-exynos5433.c
@@ -13,6 +13,7 @@
#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
+#include <linux/slab.h>
#include <dt-bindings/clock/exynos5433.h>
@@ -5584,6 +5585,8 @@ static int __init exynos5433_cmu_probe(struct platform_device *pdev)
data->clk_save = samsung_clk_alloc_reg_dump(info->clk_regs,
info->nr_clk_regs);
+ if (!data->clk_save)
+ return -ENOMEM;
data->nr_clk_save = info->nr_clk_regs;
data->clk_suspend = info->suspend_regs;
data->nr_clk_suspend = info->nr_suspend_regs;
@@ -5592,12 +5595,19 @@ static int __init exynos5433_cmu_probe(struct platform_device *pdev)
if (data->nr_pclks > 0) {
data->pclks = devm_kcalloc(dev, sizeof(struct clk *),
data->nr_pclks, GFP_KERNEL);
-
+ if (!data->pclks) {
+ kfree(data->clk_save);
+ return -ENOMEM;
+ }
for (i = 0; i < data->nr_pclks; i++) {
struct clk *clk = of_clk_get(dev->of_node, i);
- if (IS_ERR(clk))
+ if (IS_ERR(clk)) {
+ kfree(data->clk_save);
+ while (--i >= 0)
+ clk_put(data->pclks[i]);
return PTR_ERR(clk);
+ }
data->pclks[i] = clk;
}
}
diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
index dcac1391767f..ef29582676f6 100644
--- a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
+++ b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c
@@ -1224,7 +1224,7 @@ static int sun9i_a80_ccu_probe(struct platform_device *pdev)
/* Enforce d1 = 0, d2 = 0 for Audio PLL */
val = readl(reg + SUN9I_A80_PLL_AUDIO_REG);
- val &= (BIT(16) & BIT(18));
+ val &= ~(BIT(16) | BIT(18));
writel(val, reg + SUN9I_A80_PLL_AUDIO_REG);
/* Enforce P = 1 for both CPU cluster PLLs */
diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index d3a43381a792..27201fd26e44 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c
@@ -1080,8 +1080,8 @@ static struct clk ** __init sunxi_divs_clk_setup(struct device_node *node,
rate_hw, rate_ops,
gate_hw, &clk_gate_ops,
clkflags |
- data->div[i].critical ?
- CLK_IS_CRITICAL : 0);
+ (data->div[i].critical ?
+ CLK_IS_CRITICAL : 0));
WARN_ON(IS_ERR(clk_data->clks[i]));
}
diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c
index a01ca9395179..f65e16c4f3c4 100644
--- a/drivers/clk/ti/clk-dra7-atl.c
+++ b/drivers/clk/ti/clk-dra7-atl.c
@@ -174,7 +174,6 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node)
struct clk_init_data init = { NULL };
const char **parent_names = NULL;
struct clk *clk;
- int ret;
clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
if (!clk_hw) {
@@ -207,11 +206,6 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node)
clk = ti_clk_register(NULL, &clk_hw->hw, node->name);
if (!IS_ERR(clk)) {
- ret = ti_clk_add_alias(NULL, clk, node->name);
- if (ret) {
- clk_unregister(clk);
- goto cleanup;
- }
of_clk_add_provider(node, of_clk_src_simple_get, clk);
kfree(parent_names);
return;
diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c
index 975995eea15c..b0c0690a5a12 100644
--- a/drivers/clk/ti/clkctrl.c
+++ b/drivers/clk/ti/clkctrl.c
@@ -100,11 +100,12 @@ static bool _omap4_is_timeout(union omap4_timeout *time, u32 timeout)
* can be from a timer that requires pm_runtime access, which
* will eventually bring us here with timekeeping_suspended,
* during both suspend entry and resume paths. This happens
- * at least on am43xx platform.
+ * at least on am43xx platform. Account for flakeyness
+ * with udelay() by multiplying the timeout value by 2.
*/
if (unlikely(_early_timeout || timekeeping_suspended)) {
if (time->cycles++ < timeout) {
- udelay(1);
+ udelay(1 * 2);
return false;
}
} else {
diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c
index 354b27d14a19..62812f80b5cc 100644
--- a/drivers/clocksource/sh_mtu2.c
+++ b/drivers/clocksource/sh_mtu2.c
@@ -328,12 +328,13 @@ static int sh_mtu2_register(struct sh_mtu2_channel *ch, const char *name)
return 0;
}
+static const unsigned int sh_mtu2_channel_offsets[] = {
+ 0x300, 0x380, 0x000,
+};
+
static int sh_mtu2_setup_channel(struct sh_mtu2_channel *ch, unsigned int index,
struct sh_mtu2_device *mtu)
{
- static const unsigned int channel_offsets[] = {
- 0x300, 0x380, 0x000,
- };
char name[6];
int irq;
int ret;
@@ -356,7 +357,7 @@ static int sh_mtu2_setup_channel(struct sh_mtu2_channel *ch, unsigned int index,
return ret;
}
- ch->base = mtu->mapbase + channel_offsets[index];
+ ch->base = mtu->mapbase + sh_mtu2_channel_offsets[index];
ch->index = index;
return sh_mtu2_register(ch, dev_name(&mtu->pdev->dev));
@@ -408,7 +409,12 @@ static int sh_mtu2_setup(struct sh_mtu2_device *mtu,
}
/* Allocate and setup the channels. */
- mtu->num_channels = 3;
+ ret = platform_irq_count(pdev);
+ if (ret < 0)
+ goto err_unmap;
+
+ mtu->num_channels = min_t(unsigned int, ret,
+ ARRAY_SIZE(sh_mtu2_channel_offsets));
mtu->channels = kcalloc(mtu->num_channels, sizeof(*mtu->channels),
GFP_KERNEL);
diff --git a/drivers/clocksource/timer-mediatek.c b/drivers/clocksource/timer-mediatek.c
index a562f491b0f8..9318edcd8963 100644
--- a/drivers/clocksource/timer-mediatek.c
+++ b/drivers/clocksource/timer-mediatek.c
@@ -268,15 +268,12 @@ static int __init mtk_syst_init(struct device_node *node)
ret = timer_of_init(node, &to);
if (ret)
- goto err;
+ return ret;
clockevents_config_and_register(&to.clkevt, timer_of_rate(&to),
TIMER_SYNC_TICKS, 0xffffffff);
return 0;
-err:
- timer_of_cleanup(&to);
- return ret;
}
static int __init mtk_gpt_init(struct device_node *node)
@@ -293,7 +290,7 @@ static int __init mtk_gpt_init(struct device_node *node)
ret = timer_of_init(node, &to);
if (ret)
- goto err;
+ return ret;
/* Configure clock source */
mtk_gpt_setup(&to, TIMER_CLK_SRC, GPT_CTRL_OP_FREERUN);
@@ -311,9 +308,6 @@ static int __init mtk_gpt_init(struct device_node *node)
mtk_gpt_enable_irq(&to, TIMER_CLK_EVT);
return 0;
-err:
- timer_of_cleanup(&to);
- return ret;
}
TIMER_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_gpt_init);
TIMER_OF_DECLARE(mtk_mt6765, "mediatek,mt6765-timer", mtk_syst_init);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index c52d6fa32aac..48a224a6b178 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -720,7 +720,7 @@ static ssize_t store_##file_name \
if (ret != 1) \
return -EINVAL; \
\
- ret = dev_pm_qos_update_request(policy->object##_freq_req, val);\
+ ret = freq_qos_update_request(policy->object##_freq_req, val);\
return ret >= 0 ? count : ret; \
}
@@ -1202,19 +1202,21 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
goto err_free_real_cpus;
}
+ freq_constraints_init(&policy->constraints);
+
policy->nb_min.notifier_call = cpufreq_notifier_min;
policy->nb_max.notifier_call = cpufreq_notifier_max;
- ret = dev_pm_qos_add_notifier(dev, &policy->nb_min,
- DEV_PM_QOS_MIN_FREQUENCY);
+ ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MIN,
+ &policy->nb_min);
if (ret) {
dev_err(dev, "Failed to register MIN QoS notifier: %d (%*pbl)\n",
ret, cpumask_pr_args(policy->cpus));
goto err_kobj_remove;
}
- ret = dev_pm_qos_add_notifier(dev, &policy->nb_max,
- DEV_PM_QOS_MAX_FREQUENCY);
+ ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MAX,
+ &policy->nb_max);
if (ret) {
dev_err(dev, "Failed to register MAX QoS notifier: %d (%*pbl)\n",
ret, cpumask_pr_args(policy->cpus));
@@ -1232,8 +1234,8 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
return policy;
err_min_qos_notifier:
- dev_pm_qos_remove_notifier(dev, &policy->nb_min,
- DEV_PM_QOS_MIN_FREQUENCY);
+ freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MIN,
+ &policy->nb_min);
err_kobj_remove:
cpufreq_policy_put_kobj(policy);
err_free_real_cpus:
@@ -1250,7 +1252,6 @@ err_free_policy:
static void cpufreq_policy_free(struct cpufreq_policy *policy)
{
- struct device *dev = get_cpu_device(policy->cpu);
unsigned long flags;
int cpu;
@@ -1262,10 +1263,13 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
per_cpu(cpufreq_cpu_data, cpu) = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
- dev_pm_qos_remove_notifier(dev, &policy->nb_max,
- DEV_PM_QOS_MAX_FREQUENCY);
- dev_pm_qos_remove_notifier(dev, &policy->nb_min,
- DEV_PM_QOS_MIN_FREQUENCY);
+ freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MAX,
+ &policy->nb_max);
+ freq_qos_remove_notifier(&policy->constraints, FREQ_QOS_MIN,
+ &policy->nb_min);
+
+ /* Cancel any pending policy->update work before freeing the policy. */
+ cancel_work_sync(&policy->update);
if (policy->max_freq_req) {
/*
@@ -1274,10 +1278,10 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
*/
blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
CPUFREQ_REMOVE_POLICY, policy);
- dev_pm_qos_remove_request(policy->max_freq_req);
+ freq_qos_remove_request(policy->max_freq_req);
}
- dev_pm_qos_remove_request(policy->min_freq_req);
+ freq_qos_remove_request(policy->min_freq_req);
kfree(policy->min_freq_req);
cpufreq_policy_put_kobj(policy);
@@ -1357,8 +1361,6 @@ static int cpufreq_online(unsigned int cpu)
cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
if (new_policy) {
- struct device *dev = get_cpu_device(cpu);
-
for_each_cpu(j, policy->related_cpus) {
per_cpu(cpufreq_cpu_data, j) = policy;
add_cpu_dev_symlink(policy, j);
@@ -1369,36 +1371,31 @@ static int cpufreq_online(unsigned int cpu)
if (!policy->min_freq_req)
goto out_destroy_policy;
- ret = dev_pm_qos_add_request(dev, policy->min_freq_req,
- DEV_PM_QOS_MIN_FREQUENCY,
- policy->min);
+ ret = freq_qos_add_request(&policy->constraints,
+ policy->min_freq_req, FREQ_QOS_MIN,
+ policy->min);
if (ret < 0) {
/*
- * So we don't call dev_pm_qos_remove_request() for an
+ * So we don't call freq_qos_remove_request() for an
* uninitialized request.
*/
kfree(policy->min_freq_req);
policy->min_freq_req = NULL;
-
- dev_err(dev, "Failed to add min-freq constraint (%d)\n",
- ret);
goto out_destroy_policy;
}
/*
* This must be initialized right here to avoid calling
- * dev_pm_qos_remove_request() on uninitialized request in case
+ * freq_qos_remove_request() on uninitialized request in case
* of errors.
*/
policy->max_freq_req = policy->min_freq_req + 1;
- ret = dev_pm_qos_add_request(dev, policy->max_freq_req,
- DEV_PM_QOS_MAX_FREQUENCY,
- policy->max);
+ ret = freq_qos_add_request(&policy->constraints,
+ policy->max_freq_req, FREQ_QOS_MAX,
+ policy->max);
if (ret < 0) {
policy->max_freq_req = NULL;
- dev_err(dev, "Failed to add max-freq constraint (%d)\n",
- ret);
goto out_destroy_policy;
}
@@ -2374,7 +2371,6 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
struct cpufreq_policy *new_policy)
{
struct cpufreq_governor *old_gov;
- struct device *cpu_dev = get_cpu_device(policy->cpu);
int ret;
pr_debug("setting new policy for CPU %u: %u - %u kHz\n",
@@ -2386,8 +2382,8 @@ int cpufreq_set_policy(struct cpufreq_policy *policy,
* PM QoS framework collects all the requests from users and provide us
* the final aggregated value here.
*/
- new_policy->min = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MIN_FREQUENCY);
- new_policy->max = dev_pm_qos_read_value(cpu_dev, DEV_PM_QOS_MAX_FREQUENCY);
+ new_policy->min = freq_qos_read_value(&policy->constraints, FREQ_QOS_MIN);
+ new_policy->max = freq_qos_read_value(&policy->constraints, FREQ_QOS_MAX);
/* verify the cpu speed can be set within this limit */
ret = cpufreq_driver->verify(new_policy);
@@ -2518,7 +2514,7 @@ static int cpufreq_boost_set_sw(int state)
break;
}
- ret = dev_pm_qos_update_request(policy->max_freq_req, policy->max);
+ ret = freq_qos_update_request(policy->max_freq_req, policy->max);
if (ret < 0)
break;
}
@@ -2737,14 +2733,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
}
EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
-/*
- * Stop cpufreq at shutdown to make sure it isn't holding any locks
- * or mutexes when secondary CPUs are halted.
- */
-static struct syscore_ops cpufreq_syscore_ops = {
- .shutdown = cpufreq_suspend,
-};
-
struct kobject *cpufreq_global_kobject;
EXPORT_SYMBOL(cpufreq_global_kobject);
@@ -2756,8 +2744,6 @@ static int __init cpufreq_core_init(void)
cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
BUG_ON(!cpufreq_global_kobject);
- register_syscore_ops(&cpufreq_syscore_ops);
-
return 0;
}
module_param(off, int, 0444);
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 9f02de9a1b47..8ab31702cf6a 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -847,11 +847,9 @@ static void intel_pstate_hwp_force_min_perf(int cpu)
value |= HWP_MAX_PERF(min_perf);
value |= HWP_MIN_PERF(min_perf);
- /* Set EPP/EPB to min */
+ /* Set EPP to min */
if (boot_cpu_has(X86_FEATURE_HWP_EPP))
value |= HWP_ENERGY_PERF_PREFERENCE(HWP_EPP_POWERSAVE);
- else
- intel_pstate_set_epb(cpu, HWP_EPP_BALANCE_POWERSAVE);
wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
}
@@ -1088,10 +1086,10 @@ static ssize_t store_no_turbo(struct kobject *a, struct kobj_attribute *b,
static struct cpufreq_driver intel_pstate;
-static void update_qos_request(enum dev_pm_qos_req_type type)
+static void update_qos_request(enum freq_qos_req_type type)
{
int max_state, turbo_max, freq, i, perf_pct;
- struct dev_pm_qos_request *req;
+ struct freq_qos_request *req;
struct cpufreq_policy *policy;
for_each_possible_cpu(i) {
@@ -1112,7 +1110,7 @@ static void update_qos_request(enum dev_pm_qos_req_type type)
else
turbo_max = cpu->pstate.turbo_pstate;
- if (type == DEV_PM_QOS_MIN_FREQUENCY) {
+ if (type == FREQ_QOS_MIN) {
perf_pct = global.min_perf_pct;
} else {
req++;
@@ -1122,7 +1120,7 @@ static void update_qos_request(enum dev_pm_qos_req_type type)
freq = DIV_ROUND_UP(turbo_max * perf_pct, 100);
freq *= cpu->pstate.scaling;
- if (dev_pm_qos_update_request(req, freq) < 0)
+ if (freq_qos_update_request(req, freq) < 0)
pr_warn("Failed to update freq constraint: CPU%d\n", i);
}
}
@@ -1153,7 +1151,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct kobj_attribute *b,
if (intel_pstate_driver == &intel_pstate)
intel_pstate_update_policies();
else
- update_qos_request(DEV_PM_QOS_MAX_FREQUENCY);
+ update_qos_request(FREQ_QOS_MAX);
mutex_unlock(&intel_pstate_driver_lock);
@@ -1187,7 +1185,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct kobj_attribute *b,
if (intel_pstate_driver == &intel_pstate)
intel_pstate_update_policies();
else
- update_qos_request(DEV_PM_QOS_MIN_FREQUENCY);
+ update_qos_request(FREQ_QOS_MIN);
mutex_unlock(&intel_pstate_driver_lock);
@@ -2381,7 +2379,7 @@ static unsigned int intel_cpufreq_fast_switch(struct cpufreq_policy *policy,
static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
int max_state, turbo_max, min_freq, max_freq, ret;
- struct dev_pm_qos_request *req;
+ struct freq_qos_request *req;
struct cpudata *cpu;
struct device *dev;
@@ -2416,15 +2414,15 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
max_freq = DIV_ROUND_UP(turbo_max * global.max_perf_pct, 100);
max_freq *= cpu->pstate.scaling;
- ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_MIN_FREQUENCY,
- min_freq);
+ ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MIN,
+ min_freq);
if (ret < 0) {
dev_err(dev, "Failed to add min-freq constraint (%d)\n", ret);
goto free_req;
}
- ret = dev_pm_qos_add_request(dev, req + 1, DEV_PM_QOS_MAX_FREQUENCY,
- max_freq);
+ ret = freq_qos_add_request(&policy->constraints, req + 1, FREQ_QOS_MAX,
+ max_freq);
if (ret < 0) {
dev_err(dev, "Failed to add max-freq constraint (%d)\n", ret);
goto remove_min_req;
@@ -2435,7 +2433,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy)
return 0;
remove_min_req:
- dev_pm_qos_remove_request(req);
+ freq_qos_remove_request(req);
free_req:
kfree(req);
pstate_exit:
@@ -2446,12 +2444,12 @@ pstate_exit:
static int intel_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
- struct dev_pm_qos_request *req;
+ struct freq_qos_request *req;
req = policy->driver_data;
- dev_pm_qos_remove_request(req + 1);
- dev_pm_qos_remove_request(req);
+ freq_qos_remove_request(req + 1);
+ freq_qos_remove_request(req);
kfree(req);
return intel_pstate_cpu_exit(policy);
diff --git a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
index bc9dd30395c4..037fe23bc6ed 100644
--- a/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
+++ b/drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
@@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi);
static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
{
struct cpufreq_policy *policy;
- struct dev_pm_qos_request *req;
+ struct freq_qos_request *req;
u8 node, slow_mode;
int cpu, ret;
@@ -86,7 +86,7 @@ static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
req = policy->driver_data;
- ret = dev_pm_qos_update_request(req,
+ ret = freq_qos_update_request(req,
policy->freq_table[slow_mode].frequency);
if (ret < 0)
pr_warn("Failed to update freq constraint: %d\n", ret);
@@ -103,7 +103,7 @@ static struct pmi_handler cbe_pmi_handler = {
void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy)
{
- struct dev_pm_qos_request *req;
+ struct freq_qos_request *req;
int ret;
if (!cbe_cpufreq_has_pmi)
@@ -113,9 +113,8 @@ void cbe_cpufreq_pmi_policy_init(struct cpufreq_policy *policy)
if (!req)
return;
- ret = dev_pm_qos_add_request(get_cpu_device(policy->cpu), req,
- DEV_PM_QOS_MAX_FREQUENCY,
- policy->freq_table[0].frequency);
+ ret = freq_qos_add_request(&policy->constraints, req, FREQ_QOS_MAX,
+ policy->freq_table[0].frequency);
if (ret < 0) {
pr_err("Failed to add freq constraint (%d)\n", ret);
kfree(req);
@@ -128,10 +127,10 @@ EXPORT_SYMBOL_GPL(cbe_cpufreq_pmi_policy_init);
void cbe_cpufreq_pmi_policy_exit(struct cpufreq_policy *policy)
{
- struct dev_pm_qos_request *req = policy->driver_data;
+ struct freq_qos_request *req = policy->driver_data;
if (cbe_cpufreq_has_pmi) {
- dev_pm_qos_remove_request(req);
+ freq_qos_remove_request(req);
kfree(req);
}
}
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c
index 932390b028f1..b0ce9bc78113 100644
--- a/drivers/cpuidle/cpuidle-haltpoll.c
+++ b/drivers/cpuidle/cpuidle-haltpoll.c
@@ -95,6 +95,10 @@ static int __init haltpoll_init(void)
int ret;
struct cpuidle_driver *drv = &haltpoll_driver;
+ /* Do not load haltpoll if idle= is passed */
+ if (boot_option_idle_override != IDLE_NO_OVERRIDE)
+ return -ENODEV;
+
cpuidle_poll_state_init(drv);
if (!kvm_para_available() ||
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
index 774d991d7cca..aca75237bbcf 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -1297,7 +1297,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
tp->write_seq = snd_isn;
tp->snd_nxt = snd_isn;
tp->snd_una = snd_isn;
- inet_sk(sk)->inet_id = tp->write_seq ^ jiffies;
+ inet_sk(sk)->inet_id = prandom_u32();
assign_rxopt(sk, opt);
if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c
index 0891ab829b1b..98bc5a4cd5e7 100644
--- a/drivers/crypto/chelsio/chtls/chtls_io.c
+++ b/drivers/crypto/chelsio/chtls/chtls_io.c
@@ -1702,7 +1702,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return peekmsg(sk, msg, len, nonblock, flags);
if (sk_can_busy_loop(sk) &&
- skb_queue_empty(&sk->sk_receive_queue) &&
+ skb_queue_empty_lockless(&sk->sk_receive_queue) &&
sk->sk_state == TCP_ESTABLISHED)
sk_busy_loop(sk, nonblock);
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 42a8f3f11681..709002515550 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -471,7 +471,7 @@ unlock:
if (pfence_excl)
*pfence_excl = fence_excl;
else if (fence_excl)
- shared[++shared_count] = fence_excl;
+ shared[shared_count++] = fence_excl;
if (!shared_count) {
kfree(shared);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 9ba74ab7e912..c27e206a764c 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1707,6 +1707,14 @@ static void sdma_add_scripts(struct sdma_engine *sdma,
if (!sdma->script_number)
sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1;
+ if (sdma->script_number > sizeof(struct sdma_script_start_addrs)
+ / sizeof(s32)) {
+ dev_err(sdma->dev,
+ "SDMA script number %d not match with firmware.\n",
+ sdma->script_number);
+ return;
+ }
+
for (i = 0; i < sdma->script_number; i++)
if (addr_arr[i] > 0)
saddr_arr[i] = addr_arr[i];
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index 8e90a405939d..ef73f65224b1 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -694,6 +694,25 @@ static int bam_dma_terminate_all(struct dma_chan *chan)
/* remove all transactions, including active transaction */
spin_lock_irqsave(&bchan->vc.lock, flag);
+ /*
+ * If we have transactions queued, then some might be committed to the
+ * hardware in the desc fifo. The only way to reset the desc fifo is
+ * to do a hardware reset (either by pipe or the entire block).
+ * bam_chan_init_hw() will trigger a pipe reset, and also reinit the
+ * pipe. If the pipe is left disabled (default state after pipe reset)
+ * and is accessed by a connected hardware engine, a fatal error in
+ * the BAM will occur. There is a small window where this could happen
+ * with bam_chan_init_hw(), but it is assumed that the caller has
+ * stopped activity on any attached hardware engine. Make sure to do
+ * this first so that the BAM hardware doesn't cause memory corruption
+ * by accessing freed resources.
+ */
+ if (!list_empty(&bchan->desc_list)) {
+ async_desc = list_first_entry(&bchan->desc_list,
+ struct bam_async_desc, desc_node);
+ bam_chan_init_hw(bchan, async_desc->dir);
+ }
+
list_for_each_entry_safe(async_desc, tmp,
&bchan->desc_list, desc_node) {
list_add(&async_desc->vd.node, &bchan->vc.desc_issued);
diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c
index 525dc7338fe3..8546ad034720 100644
--- a/drivers/dma/sprd-dma.c
+++ b/drivers/dma/sprd-dma.c
@@ -134,6 +134,10 @@
#define SPRD_DMA_SRC_TRSF_STEP_OFFSET 0
#define SPRD_DMA_TRSF_STEP_MASK GENMASK(15, 0)
+/* SPRD DMA_SRC_BLK_STEP register definition */
+#define SPRD_DMA_LLIST_HIGH_MASK GENMASK(31, 28)
+#define SPRD_DMA_LLIST_HIGH_SHIFT 28
+
/* define DMA channel mode & trigger mode mask */
#define SPRD_DMA_CHN_MODE_MASK GENMASK(7, 0)
#define SPRD_DMA_TRG_MODE_MASK GENMASK(7, 0)
@@ -208,6 +212,7 @@ struct sprd_dma_dev {
struct sprd_dma_chn channels[0];
};
+static void sprd_dma_free_desc(struct virt_dma_desc *vd);
static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param);
static struct of_dma_filter_info sprd_dma_info = {
.filter_fn = sprd_dma_filter_fn,
@@ -609,12 +614,19 @@ static int sprd_dma_alloc_chan_resources(struct dma_chan *chan)
static void sprd_dma_free_chan_resources(struct dma_chan *chan)
{
struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ struct virt_dma_desc *cur_vd = NULL;
unsigned long flags;
spin_lock_irqsave(&schan->vc.lock, flags);
+ if (schan->cur_desc)
+ cur_vd = &schan->cur_desc->vd;
+
sprd_dma_stop(schan);
spin_unlock_irqrestore(&schan->vc.lock, flags);
+ if (cur_vd)
+ sprd_dma_free_desc(cur_vd);
+
vchan_free_chan_resources(&schan->vc);
pm_runtime_put(chan->device->dev);
}
@@ -717,6 +729,7 @@ static int sprd_dma_fill_desc(struct dma_chan *chan,
u32 int_mode = flags & SPRD_DMA_INT_MASK;
int src_datawidth, dst_datawidth, src_step, dst_step;
u32 temp, fix_mode = 0, fix_en = 0;
+ phys_addr_t llist_ptr;
if (dir == DMA_MEM_TO_DEV) {
src_step = sprd_dma_get_step(slave_cfg->src_addr_width);
@@ -814,13 +827,16 @@ static int sprd_dma_fill_desc(struct dma_chan *chan,
* Set the link-list pointer point to next link-list
* configuration's physical address.
*/
- hw->llist_ptr = schan->linklist.phy_addr + temp;
+ llist_ptr = schan->linklist.phy_addr + temp;
+ hw->llist_ptr = lower_32_bits(llist_ptr);
+ hw->src_blk_step = (upper_32_bits(llist_ptr) << SPRD_DMA_LLIST_HIGH_SHIFT) &
+ SPRD_DMA_LLIST_HIGH_MASK;
} else {
hw->llist_ptr = 0;
+ hw->src_blk_step = 0;
}
hw->frg_step = 0;
- hw->src_blk_step = 0;
hw->des_blk_step = 0;
return 0;
}
@@ -1023,15 +1039,22 @@ static int sprd_dma_resume(struct dma_chan *chan)
static int sprd_dma_terminate_all(struct dma_chan *chan)
{
struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+ struct virt_dma_desc *cur_vd = NULL;
unsigned long flags;
LIST_HEAD(head);
spin_lock_irqsave(&schan->vc.lock, flags);
+ if (schan->cur_desc)
+ cur_vd = &schan->cur_desc->vd;
+
sprd_dma_stop(schan);
vchan_get_all_descriptors(&schan->vc, &head);
spin_unlock_irqrestore(&schan->vc.lock, flags);
+ if (cur_vd)
+ sprd_dma_free_desc(cur_vd);
+
vchan_dma_desc_free_list(&schan->vc, &head);
return 0;
}
diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
index 5f8adf5c1f20..6e1268552f74 100644
--- a/drivers/dma/tegra210-adma.c
+++ b/drivers/dma/tegra210-adma.c
@@ -40,6 +40,7 @@
#define ADMA_CH_CONFIG_MAX_BURST_SIZE 16
#define ADMA_CH_CONFIG_WEIGHT_FOR_WRR(val) ((val) & 0xf)
#define ADMA_CH_CONFIG_MAX_BUFS 8
+#define TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(reqs) (reqs << 4)
#define ADMA_CH_FIFO_CTRL 0x2c
#define TEGRA210_ADMA_CH_FIFO_CTRL_TXSIZE(val) (((val) & 0xf) << 8)
@@ -77,6 +78,7 @@ struct tegra_adma;
* @ch_req_tx_shift: Register offset for AHUB transmit channel select.
* @ch_req_rx_shift: Register offset for AHUB receive channel select.
* @ch_base_offset: Register offset of DMA channel registers.
+ * @has_outstanding_reqs: If DMA channel can have outstanding requests.
* @ch_fifo_ctrl: Default value for channel FIFO CTRL register.
* @ch_req_mask: Mask for Tx or Rx channel select.
* @ch_req_max: Maximum number of Tx or Rx channels available.
@@ -95,6 +97,7 @@ struct tegra_adma_chip_data {
unsigned int ch_req_max;
unsigned int ch_reg_size;
unsigned int nr_channels;
+ bool has_outstanding_reqs;
};
/*
@@ -594,6 +597,8 @@ static int tegra_adma_set_xfer_params(struct tegra_adma_chan *tdc,
ADMA_CH_CTRL_FLOWCTRL_EN;
ch_regs->config |= cdata->adma_get_burst_config(burst_size);
ch_regs->config |= ADMA_CH_CONFIG_WEIGHT_FOR_WRR(1);
+ if (cdata->has_outstanding_reqs)
+ ch_regs->config |= TEGRA186_ADMA_CH_CONFIG_OUTSTANDING_REQS(8);
ch_regs->fifo_ctrl = cdata->ch_fifo_ctrl;
ch_regs->tc = desc->period_len & ADMA_CH_TC_COUNT_MASK;
@@ -778,6 +783,7 @@ static const struct tegra_adma_chip_data tegra210_chip_data = {
.ch_req_tx_shift = 28,
.ch_req_rx_shift = 24,
.ch_base_offset = 0,
+ .has_outstanding_reqs = false,
.ch_fifo_ctrl = TEGRA210_FIFO_CTRL_DEFAULT,
.ch_req_mask = 0xf,
.ch_req_max = 10,
@@ -792,6 +798,7 @@ static const struct tegra_adma_chip_data tegra186_chip_data = {
.ch_req_tx_shift = 27,
.ch_req_rx_shift = 22,
.ch_base_offset = 0x10000,
+ .has_outstanding_reqs = true,
.ch_fifo_ctrl = TEGRA186_FIFO_CTRL_DEFAULT,
.ch_req_mask = 0x1f,
.ch_req_max = 20,
diff --git a/drivers/dma/ti/cppi41.c b/drivers/dma/ti/cppi41.c
index 2f946f55076c..8c2f7ebe998c 100644
--- a/drivers/dma/ti/cppi41.c
+++ b/drivers/dma/ti/cppi41.c
@@ -586,9 +586,22 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg(
enum dma_transfer_direction dir, unsigned long tx_flags, void *context)
{
struct cppi41_channel *c = to_cpp41_chan(chan);
+ struct dma_async_tx_descriptor *txd = NULL;
+ struct cppi41_dd *cdd = c->cdd;
struct cppi41_desc *d;
struct scatterlist *sg;
unsigned int i;
+ int error;
+
+ error = pm_runtime_get(cdd->ddev.dev);
+ if (error < 0) {
+ pm_runtime_put_noidle(cdd->ddev.dev);
+
+ return NULL;
+ }
+
+ if (cdd->is_suspended)
+ goto err_out_not_ready;
d = c->desc;
for_each_sg(sgl, sg, sg_len, i) {
@@ -611,7 +624,13 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg(
d++;
}
- return &c->txd;
+ txd = &c->txd;
+
+err_out_not_ready:
+ pm_runtime_mark_last_busy(cdd->ddev.dev);
+ pm_runtime_put_autosuspend(cdd->ddev.dev);
+
+ return txd;
}
static void cppi41_compute_td_desc(struct cppi41_desc *d)
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index e7dc3c4dc8e0..5d56f1e4d332 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -68,6 +68,9 @@
#define XILINX_DMA_DMACR_CIRC_EN BIT(1)
#define XILINX_DMA_DMACR_RUNSTOP BIT(0)
#define XILINX_DMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5)
+#define XILINX_DMA_DMACR_DELAY_MASK GENMASK(31, 24)
+#define XILINX_DMA_DMACR_FRAME_COUNT_MASK GENMASK(23, 16)
+#define XILINX_DMA_DMACR_MASTER_MASK GENMASK(11, 8)
#define XILINX_DMA_REG_DMASR 0x0004
#define XILINX_DMA_DMASR_EOL_LATE_ERR BIT(15)
@@ -1354,7 +1357,8 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
node);
hw = &segment->hw;
- xilinx_write(chan, XILINX_DMA_REG_SRCDSTADDR, hw->buf_addr);
+ xilinx_write(chan, XILINX_DMA_REG_SRCDSTADDR,
+ xilinx_prep_dma_addr_t(hw->buf_addr));
/* Start the transfer */
dma_ctrl_write(chan, XILINX_DMA_REG_BTT,
@@ -2117,8 +2121,10 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
chan->config.gen_lock = cfg->gen_lock;
chan->config.master = cfg->master;
+ dmacr &= ~XILINX_DMA_DMACR_GENLOCK_EN;
if (cfg->gen_lock && chan->genlock) {
dmacr |= XILINX_DMA_DMACR_GENLOCK_EN;
+ dmacr &= ~XILINX_DMA_DMACR_MASTER_MASK;
dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT;
}
@@ -2134,11 +2140,13 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
chan->config.delay = cfg->delay;
if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) {
+ dmacr &= ~XILINX_DMA_DMACR_FRAME_COUNT_MASK;
dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT;
chan->config.coalesc = cfg->coalesc;
}
if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) {
+ dmacr &= ~XILINX_DMA_DMACR_DELAY_MASK;
dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT;
chan->config.delay = cfg->delay;
}
diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index d413a0bdc9ad..0bb62857ffb2 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -553,7 +553,11 @@ void ghes_edac_unregister(struct ghes *ghes)
if (!ghes_pvt)
return;
+ if (atomic_dec_return(&ghes_init))
+ return;
+
mci = ghes_pvt->mci;
+ ghes_pvt = NULL;
edac_mc_del_mc(mci->pdev);
edac_mc_free(mci);
}
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 9cd70d1a5622..a479023fa036 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -967,29 +967,29 @@ static int sdei_get_conduit(struct platform_device *pdev)
if (np) {
if (of_property_read_string(np, "method", &method)) {
pr_warn("missing \"method\" property\n");
- return CONDUIT_INVALID;
+ return SMCCC_CONDUIT_NONE;
}
if (!strcmp("hvc", method)) {
sdei_firmware_call = &sdei_smccc_hvc;
- return CONDUIT_HVC;
+ return SMCCC_CONDUIT_HVC;
} else if (!strcmp("smc", method)) {
sdei_firmware_call = &sdei_smccc_smc;
- return CONDUIT_SMC;
+ return SMCCC_CONDUIT_SMC;
}
pr_warn("invalid \"method\" property: %s\n", method);
} else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) {
if (acpi_psci_use_hvc()) {
sdei_firmware_call = &sdei_smccc_hvc;
- return CONDUIT_HVC;
+ return SMCCC_CONDUIT_HVC;
} else {
sdei_firmware_call = &sdei_smccc_smc;
- return CONDUIT_SMC;
+ return SMCCC_CONDUIT_SMC;
}
}
- return CONDUIT_INVALID;
+ return SMCCC_CONDUIT_NONE;
}
static int sdei_probe(struct platform_device *pdev)
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 35ed56b9c34f..1e21fc3e9851 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -408,7 +408,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v)
bytes = ~0ull;
else if (size & 0x8000)
bytes = (u64)(size & 0x7fff) << 10;
- else if (size != 0x7fff)
+ else if (size != 0x7fff || dm->length < 0x20)
bytes = (u64)size << 20;
else
bytes = (u64)get_unaligned((u32 *)&d[0x1C]) << 20;
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 178ee8106828..b248870a9806 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -182,6 +182,7 @@ config RESET_ATTACK_MITIGATION
config EFI_RCI2_TABLE
bool "EFI Runtime Configuration Interface Table Version 2 Support"
+ depends on X86 || COMPILE_TEST
help
Displays the content of the Runtime Configuration Interface
Table version 2 on Dell EMC PowerEdge systems as a binary
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index addf0749dd8b..b1af0de2e100 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -381,7 +381,7 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
pcie->device_id.vendor_id, pcie->device_id.device_id);
p = pcie->device_id.class_code;
- printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
+ printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]);
}
if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER)
printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 8d3e778e988b..e98bbf8e56d9 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -267,6 +267,9 @@ static __init int efivar_ssdt_load(void)
void *data;
int ret;
+ if (!efivar_ssdt[0])
+ return 0;
+
ret = efivar_init(efivar_ssdt_iter, &entries, true, &entries);
list_for_each_entry_safe(entry, aux, &entries, list) {
@@ -551,7 +554,7 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
sizeof(*seed) + size);
if (seed != NULL) {
pr_notice("seeding entropy pool\n");
- add_device_randomness(seed->bits, seed->size);
+ add_bootloader_randomness(seed->bits, seed->size);
early_memunmap(seed, sizeof(*seed) + size);
} else {
pr_err("Could not map UEFI random seed!\n");
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 0460c7581220..ee0661ddb25b 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -52,6 +52,7 @@ lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o string.o random.o \
lib-$(CONFIG_ARM) += arm32-stub.o
lib-$(CONFIG_ARM64) += arm64-stub.o
+CFLAGS_arm32-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
CFLAGS_arm64-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
#
diff --git a/drivers/firmware/efi/libstub/arm32-stub.c b/drivers/firmware/efi/libstub/arm32-stub.c
index e8f7aefb6813..41213bf5fcf5 100644
--- a/drivers/firmware/efi/libstub/arm32-stub.c
+++ b/drivers/firmware/efi/libstub/arm32-stub.c
@@ -195,6 +195,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
unsigned long dram_base,
efi_loaded_image_t *image)
{
+ unsigned long kernel_base;
efi_status_t status;
/*
@@ -204,9 +205,18 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
* loaded. These assumptions are made by the decompressor,
* before any memory map is available.
*/
- dram_base = round_up(dram_base, SZ_128M);
+ kernel_base = round_up(dram_base, SZ_128M);
- status = reserve_kernel_base(sys_table, dram_base, reserve_addr,
+ /*
+ * Note that some platforms (notably, the Raspberry Pi 2) put
+ * spin-tables and other pieces of firmware at the base of RAM,
+ * abusing the fact that the window of TEXT_OFFSET bytes at the
+ * base of the kernel image is only partially used at the moment.
+ * (Up to 5 pages are used for the swapper page tables)
+ */
+ kernel_base += TEXT_OFFSET - 5 * PAGE_SIZE;
+
+ status = reserve_kernel_base(sys_table, kernel_base, reserve_addr,
reserve_size);
if (status != EFI_SUCCESS) {
pr_efi_err(sys_table, "Unable to allocate memory for uncompressed kernel.\n");
@@ -220,7 +230,7 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table,
*image_size = image->image_size;
status = efi_relocate_kernel(sys_table, image_addr, *image_size,
*image_size,
- dram_base + MAX_UNCOMP_KERNEL_SIZE, 0);
+ kernel_base + MAX_UNCOMP_KERNEL_SIZE, 0, 0);
if (status != EFI_SUCCESS) {
pr_efi_err(sys_table, "Failed to relocate kernel.\n");
efi_free(sys_table, *reserve_size, *reserve_addr);
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 3caae7f2cf56..35dbc2791c97 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -260,11 +260,11 @@ fail:
}
/*
- * Allocate at the lowest possible address.
+ * Allocate at the lowest possible address that is not below 'min'.
*/
-efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
- unsigned long size, unsigned long align,
- unsigned long *addr)
+efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
+ unsigned long size, unsigned long align,
+ unsigned long *addr, unsigned long min)
{
unsigned long map_size, desc_size, buff_size;
efi_memory_desc_t *map;
@@ -311,13 +311,8 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
start = desc->phys_addr;
end = start + desc->num_pages * EFI_PAGE_SIZE;
- /*
- * Don't allocate at 0x0. It will confuse code that
- * checks pointers against NULL. Skip the first 8
- * bytes so we start at a nice even number.
- */
- if (start == 0x0)
- start += 8;
+ if (start < min)
+ start = min;
start = round_up(start, align);
if ((start + size) > end)
@@ -698,7 +693,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
unsigned long image_size,
unsigned long alloc_size,
unsigned long preferred_addr,
- unsigned long alignment)
+ unsigned long alignment,
+ unsigned long min_addr)
{
unsigned long cur_image_addr;
unsigned long new_addr = 0;
@@ -731,8 +727,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
* possible.
*/
if (status != EFI_SUCCESS) {
- status = efi_low_alloc(sys_table_arg, alloc_size, alignment,
- &new_addr);
+ status = efi_low_alloc_above(sys_table_arg, alloc_size,
+ alignment, &new_addr, min_addr);
}
if (status != EFI_SUCCESS) {
pr_efi_err(sys_table_arg, "Failed to allocate usable memory for kernel.\n");
diff --git a/drivers/firmware/efi/rci2-table.c b/drivers/firmware/efi/rci2-table.c
index 3e290f96620a..76b0c354a027 100644
--- a/drivers/firmware/efi/rci2-table.c
+++ b/drivers/firmware/efi/rci2-table.c
@@ -76,7 +76,7 @@ static u16 checksum(void)
return chksum;
}
-int __init efi_rci2_sysfs_init(void)
+static int __init efi_rci2_sysfs_init(void)
{
struct kobject *tables_kobj;
int ret = -ENOMEM;
diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c
index 877745c3aaf2..7baf48c01e72 100644
--- a/drivers/firmware/efi/test/efi_test.c
+++ b/drivers/firmware/efi/test/efi_test.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/proc_fs.h>
#include <linux/efi.h>
+#include <linux/security.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
@@ -717,6 +718,13 @@ static long efi_test_ioctl(struct file *file, unsigned int cmd,
static int efi_test_open(struct inode *inode, struct file *file)
{
+ int ret = security_locked_down(LOCKDOWN_EFI_TEST);
+
+ if (ret)
+ return ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
/*
* nothing special to do here
* We do accept multiple open files at the same time as we
diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c
index 1d3f5ca3eaaf..31f9f0e369b9 100644
--- a/drivers/firmware/efi/tpm.c
+++ b/drivers/firmware/efi/tpm.c
@@ -40,7 +40,7 @@ int __init efi_tpm_eventlog_init(void)
{
struct linux_efi_tpm_eventlog *log_tbl;
struct efi_tcg2_final_events_table *final_tbl;
- unsigned int tbl_size;
+ int tbl_size;
int ret = 0;
if (efi.tpm_log == EFI_INVALID_TABLE_ADDR) {
@@ -75,16 +75,29 @@ int __init efi_tpm_eventlog_init(void)
goto out;
}
- tbl_size = tpm2_calc_event_log_size((void *)efi.tpm_final_log
- + sizeof(final_tbl->version)
- + sizeof(final_tbl->nr_events),
- final_tbl->nr_events,
- log_tbl->log);
+ tbl_size = 0;
+ if (final_tbl->nr_events != 0) {
+ void *events = (void *)efi.tpm_final_log
+ + sizeof(final_tbl->version)
+ + sizeof(final_tbl->nr_events);
+
+ tbl_size = tpm2_calc_event_log_size(events,
+ final_tbl->nr_events,
+ log_tbl->log);
+ }
+
+ if (tbl_size < 0) {
+ pr_err(FW_BUG "Failed to parse event in TPM Final Events Log\n");
+ ret = -EINVAL;
+ goto out_calc;
+ }
+
memblock_reserve((unsigned long)final_tbl,
tbl_size + sizeof(*final_tbl));
- early_memunmap(final_tbl, sizeof(*final_tbl));
efi_tpm_final_log_size = tbl_size;
+out_calc:
+ early_memunmap(final_tbl, sizeof(*final_tbl));
out:
early_memunmap(log_tbl, sizeof(*log_tbl));
return ret;
diff --git a/drivers/firmware/google/vpd_decode.c b/drivers/firmware/google/vpd_decode.c
index dda525c0f968..5c6f2a74f104 100644
--- a/drivers/firmware/google/vpd_decode.c
+++ b/drivers/firmware/google/vpd_decode.c
@@ -52,7 +52,7 @@ static int vpd_decode_entry(const u32 max_len, const u8 *input_buf,
if (max_len - consumed < *entry_len)
return VPD_FAIL;
- consumed += decoded_len;
+ consumed += *entry_len;
*_consumed = consumed;
return VPD_OK;
}
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index 84f4ff351c62..b3b6c15e7b36 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -53,10 +53,18 @@ bool psci_tos_resident_on(int cpu)
}
struct psci_operations psci_ops = {
- .conduit = PSCI_CONDUIT_NONE,
+ .conduit = SMCCC_CONDUIT_NONE,
.smccc_version = SMCCC_VERSION_1_0,
};
+enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void)
+{
+ if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+ return SMCCC_CONDUIT_NONE;
+
+ return psci_ops.conduit;
+}
+
typedef unsigned long (psci_fn)(unsigned long, unsigned long,
unsigned long, unsigned long);
static psci_fn *invoke_psci_fn;
@@ -212,13 +220,13 @@ static unsigned long psci_migrate_info_up_cpu(void)
0, 0, 0);
}
-static void set_conduit(enum psci_conduit conduit)
+static void set_conduit(enum arm_smccc_conduit conduit)
{
switch (conduit) {
- case PSCI_CONDUIT_HVC:
+ case SMCCC_CONDUIT_HVC:
invoke_psci_fn = __invoke_psci_fn_hvc;
break;
- case PSCI_CONDUIT_SMC:
+ case SMCCC_CONDUIT_SMC:
invoke_psci_fn = __invoke_psci_fn_smc;
break;
default:
@@ -240,9 +248,9 @@ static int get_set_conduit_method(struct device_node *np)
}
if (!strcmp("hvc", method)) {
- set_conduit(PSCI_CONDUIT_HVC);
+ set_conduit(SMCCC_CONDUIT_HVC);
} else if (!strcmp("smc", method)) {
- set_conduit(PSCI_CONDUIT_SMC);
+ set_conduit(SMCCC_CONDUIT_SMC);
} else {
pr_warn("invalid \"method\" property: %s\n", method);
return -EINVAL;
@@ -583,9 +591,9 @@ int __init psci_acpi_init(void)
pr_info("probing for conduit method from ACPI.\n");
if (acpi_psci_use_hvc())
- set_conduit(PSCI_CONDUIT_HVC);
+ set_conduit(SMCCC_CONDUIT_HVC);
else
- set_conduit(PSCI_CONDUIT_SMC);
+ set_conduit(SMCCC_CONDUIT_SMC);
return psci_probe();
}
diff --git a/drivers/gpio/gpio-bd70528.c b/drivers/gpio/gpio-bd70528.c
index 0c1ead12d883..4ba4d4a67881 100644
--- a/drivers/gpio/gpio-bd70528.c
+++ b/drivers/gpio/gpio-bd70528.c
@@ -25,13 +25,13 @@ static int bd70528_set_debounce(struct bd70528_gpio *bdgpio,
case 0:
val = BD70528_DEBOUNCE_DISABLE;
break;
- case 1 ... 15:
+ case 1 ... 15000:
val = BD70528_DEBOUNCE_15MS;
break;
- case 16 ... 30:
+ case 15001 ... 30000:
val = BD70528_DEBOUNCE_30MS;
break;
- case 31 ... 50:
+ case 30001 ... 50000:
val = BD70528_DEBOUNCE_50MS;
break;
default:
diff --git a/drivers/gpio/gpio-eic-sprd.c b/drivers/gpio/gpio-eic-sprd.c
index fe7a73f52329..bb287f35cf40 100644
--- a/drivers/gpio/gpio-eic-sprd.c
+++ b/drivers/gpio/gpio-eic-sprd.c
@@ -530,11 +530,12 @@ static void sprd_eic_handle_one_type(struct gpio_chip *chip)
}
for_each_set_bit(n, &reg, SPRD_EIC_PER_BANK_NR) {
- girq = irq_find_mapping(chip->irq.domain,
- bank * SPRD_EIC_PER_BANK_NR + n);
+ u32 offset = bank * SPRD_EIC_PER_BANK_NR + n;
+
+ girq = irq_find_mapping(chip->irq.domain, offset);
generic_handle_irq(girq);
- sprd_eic_toggle_trigger(chip, girq, n);
+ sprd_eic_toggle_trigger(chip, girq, offset);
}
}
}
diff --git a/drivers/gpio/gpio-intel-mid.c b/drivers/gpio/gpio-intel-mid.c
index 4d835f9089df..86a10c808ef6 100644
--- a/drivers/gpio/gpio-intel-mid.c
+++ b/drivers/gpio/gpio-intel-mid.c
@@ -293,8 +293,9 @@ static void intel_mid_irq_handler(struct irq_desc *desc)
chip->irq_eoi(data);
}
-static void intel_mid_irq_init_hw(struct intel_mid_gpio *priv)
+static int intel_mid_irq_init_hw(struct gpio_chip *chip)
{
+ struct intel_mid_gpio *priv = gpiochip_get_data(chip);
void __iomem *reg;
unsigned base;
@@ -309,6 +310,8 @@ static void intel_mid_irq_init_hw(struct intel_mid_gpio *priv)
reg = gpio_reg(&priv->chip, base, GEDR);
writel(~0, reg);
}
+
+ return 0;
}
static int __maybe_unused intel_gpio_runtime_idle(struct device *dev)
@@ -372,6 +375,7 @@ static int intel_gpio_probe(struct pci_dev *pdev,
girq = &priv->chip.irq;
girq->chip = &intel_mid_irqchip;
+ girq->init_hw = intel_mid_irq_init_hw;
girq->parent_handler = intel_mid_irq_handler;
girq->num_parents = 1;
girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
@@ -384,9 +388,8 @@ static int intel_gpio_probe(struct pci_dev *pdev,
girq->default_type = IRQ_TYPE_NONE;
girq->handler = handle_simple_irq;
- intel_mid_irq_init_hw(priv);
-
pci_set_drvdata(pdev, priv);
+
retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
if (retval) {
dev_err(&pdev->dev, "gpiochip_add error %d\n", retval);
diff --git a/drivers/gpio/gpio-lynxpoint.c b/drivers/gpio/gpio-lynxpoint.c
index 6bb9741ad036..e9e47c0d5be7 100644
--- a/drivers/gpio/gpio-lynxpoint.c
+++ b/drivers/gpio/gpio-lynxpoint.c
@@ -294,8 +294,9 @@ static struct irq_chip lp_irqchip = {
.flags = IRQCHIP_SKIP_SET_WAKE,
};
-static void lp_gpio_irq_init_hw(struct lp_gpio *lg)
+static int lp_gpio_irq_init_hw(struct gpio_chip *chip)
{
+ struct lp_gpio *lg = gpiochip_get_data(chip);
unsigned long reg;
unsigned base;
@@ -307,6 +308,8 @@ static void lp_gpio_irq_init_hw(struct lp_gpio *lg)
reg = lp_gpio_reg(&lg->chip, base, LP_INT_STAT);
outl(0xffffffff, reg);
}
+
+ return 0;
}
static int lp_gpio_probe(struct platform_device *pdev)
@@ -364,6 +367,7 @@ static int lp_gpio_probe(struct platform_device *pdev)
girq = &gc->irq;
girq->chip = &lp_irqchip;
+ girq->init_hw = lp_gpio_irq_init_hw;
girq->parent_handler = lp_gpio_irq_handler;
girq->num_parents = 1;
girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
@@ -373,9 +377,7 @@ static int lp_gpio_probe(struct platform_device *pdev)
return -ENOMEM;
girq->parents[0] = (unsigned)irq_rc->start;
girq->default_type = IRQ_TYPE_NONE;
- girq->handler = handle_simple_irq;
-
- lp_gpio_irq_init_hw(lg);
+ girq->handler = handle_bad_irq;
}
ret = devm_gpiochip_add_data(dev, gc, lg);
diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c
index 47d05e357e61..642c6321c22a 100644
--- a/drivers/gpio/gpio-max77620.c
+++ b/drivers/gpio/gpio-max77620.c
@@ -192,13 +192,13 @@ static int max77620_gpio_set_debounce(struct max77620_gpio *mgpio,
case 0:
val = MAX77620_CNFG_GPIO_DBNC_None;
break;
- case 1 ... 8:
+ case 1 ... 8000:
val = MAX77620_CNFG_GPIO_DBNC_8ms;
break;
- case 9 ... 16:
+ case 8001 ... 16000:
val = MAX77620_CNFG_GPIO_DBNC_16ms;
break;
- case 17 ... 32:
+ case 16001 ... 32000:
val = MAX77620_CNFG_GPIO_DBNC_32ms;
break;
default:
diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c
index 4f27ddfe1e2f..3302125e5265 100644
--- a/drivers/gpio/gpio-merrifield.c
+++ b/drivers/gpio/gpio-merrifield.c
@@ -397,7 +397,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
{
const struct mrfld_gpio_pinrange *range;
const char *pinctrl_dev_name;
- struct gpio_irq_chip *girq;
struct mrfld_gpio *priv;
u32 gpio_base, irq_base;
void __iomem *base;
@@ -445,21 +444,6 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
raw_spin_lock_init(&priv->lock);
- girq = &priv->chip.irq;
- girq->chip = &mrfld_irqchip;
- girq->parent_handler = mrfld_irq_handler;
- girq->num_parents = 1;
- girq->parents = devm_kcalloc(&pdev->dev, girq->num_parents,
- sizeof(*girq->parents),
- GFP_KERNEL);
- if (!girq->parents)
- return -ENOMEM;
- girq->parents[0] = pdev->irq;
- girq->default_type = IRQ_TYPE_NONE;
- girq->handler = handle_bad_irq;
-
- mrfld_irq_init_hw(priv);
-
pci_set_drvdata(pdev, priv);
retval = devm_gpiochip_add_data(&pdev->dev, &priv->chip, priv);
if (retval) {
@@ -481,6 +465,18 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id
}
}
+ retval = gpiochip_irqchip_add(&priv->chip, &mrfld_irqchip, irq_base,
+ handle_bad_irq, IRQ_TYPE_NONE);
+ if (retval) {
+ dev_err(&pdev->dev, "could not connect irqchip to gpiochip\n");
+ return retval;
+ }
+
+ mrfld_irq_init_hw(priv);
+
+ gpiochip_set_chained_irqchip(&priv->chip, &mrfld_irqchip, pdev->irq,
+ mrfld_irq_handler);
+
return 0;
}
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 609ed16ae933..59ccfd24627d 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -1304,11 +1304,28 @@ late_initcall_sync(acpi_gpio_handle_deferred_request_irqs);
static const struct dmi_system_id run_edge_events_on_boot_blacklist[] = {
{
+ /*
+ * The Minix Neo Z83-4 has a micro-USB-B id-pin handler for
+ * a non existing micro-USB-B connector which puts the HDMI
+ * DDC pins in GPIO mode, breaking HDMI support.
+ */
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "MINIX"),
DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"),
}
},
+ {
+ /*
+ * The Terra Pad 1061 has a micro-USB-B id-pin handler, which
+ * instead of controlling the actual micro-USB-B turns the 5V
+ * boost for its USB-A connector off. The actual micro-USB-B
+ * connector is wired for charging only.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"),
+ }
+ },
{} /* Terminating entry */
};
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 1eea2c6c2e1d..80ea49f570f4 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -317,7 +317,7 @@ struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
transitory = flags & OF_GPIO_TRANSITORY;
ret = gpiod_request(desc, label);
- if (ret == -EBUSY && (flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE))
+ if (ret == -EBUSY && (dflags & GPIOD_FLAGS_BIT_NONEXCLUSIVE))
return desc;
if (ret)
return ERR_PTR(ret);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index bdbc1649eafa..104ed299d5ea 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -86,6 +86,7 @@ static int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
struct lock_class_key *lock_key,
struct lock_class_key *request_key);
static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
+static int gpiochip_irqchip_init_hw(struct gpio_chip *gpiochip);
static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip);
static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gpiochip);
@@ -1406,6 +1407,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
machine_gpiochip_add(chip);
+ ret = gpiochip_irqchip_init_hw(chip);
+ if (ret)
+ goto err_remove_acpi_chip;
+
ret = gpiochip_irqchip_init_valid_mask(chip);
if (ret)
goto err_remove_acpi_chip;
@@ -1622,6 +1627,16 @@ static struct gpio_chip *find_chip_by_name(const char *name)
* The following is irqchip helper code for gpiochips.
*/
+static int gpiochip_irqchip_init_hw(struct gpio_chip *gc)
+{
+ struct gpio_irq_chip *girq = &gc->irq;
+
+ if (!girq->init_hw)
+ return 0;
+
+ return girq->init_hw(gc);
+}
+
static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc)
{
struct gpio_irq_chip *girq = &gc->irq;
@@ -2446,8 +2461,13 @@ static inline int gpiochip_add_irqchip(struct gpio_chip *gpiochip,
{
return 0;
}
-
static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip) {}
+
+static inline int gpiochip_irqchip_init_hw(struct gpio_chip *gpiochip)
+{
+ return 0;
+}
+
static inline int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gpiochip)
{
return 0;
@@ -3070,8 +3090,10 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
if (!ret)
goto set_output_value;
/* Emulate open drain by not actively driving the line high */
- if (value)
- return gpiod_direction_input(desc);
+ if (value) {
+ ret = gpiod_direction_input(desc);
+ goto set_output_flag;
+ }
}
else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) {
ret = gpio_set_config(gc, gpio_chip_hwgpio(desc),
@@ -3079,8 +3101,10 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
if (!ret)
goto set_output_value;
/* Emulate open source by not actively driving the line low */
- if (!value)
- return gpiod_direction_input(desc);
+ if (!value) {
+ ret = gpiod_direction_input(desc);
+ goto set_output_flag;
+ }
} else {
gpio_set_config(gc, gpio_chip_hwgpio(desc),
PIN_CONFIG_DRIVE_PUSH_PULL);
@@ -3088,6 +3112,17 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
set_output_value:
return gpiod_direction_output_raw_commit(desc, value);
+
+set_output_flag:
+ /*
+ * When emulating open-source or open-drain functionalities by not
+ * actively driving the line (setting mode to input) we still need to
+ * set the IS_OUT flag or otherwise we won't be able to set the line
+ * value anymore.
+ */
+ if (ret == 0)
+ set_bit(FLAG_IS_OUT, &desc->flags);
+ return ret;
}
EXPORT_SYMBOL_GPL(gpiod_direction_output);
@@ -3448,8 +3483,6 @@ static void gpio_set_open_drain_value_commit(struct gpio_desc *desc, bool value)
if (value) {
ret = chip->direction_input(chip, offset);
- if (!ret)
- clear_bit(FLAG_IS_OUT, &desc->flags);
} else {
ret = chip->direction_output(chip, offset, 0);
if (!ret)
@@ -3479,8 +3512,6 @@ static void gpio_set_open_source_value_commit(struct gpio_desc *desc, bool value
set_bit(FLAG_IS_OUT, &desc->flags);
} else {
ret = chip->direction_input(chip, offset);
- if (!ret)
- clear_bit(FLAG_IS_OUT, &desc->flags);
}
trace_gpio_direction(desc_to_gpio(desc), !value, ret);
if (ret < 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 7bcf86c61999..85b0515c0fdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -140,7 +140,12 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
return 0;
error_free:
- while (i--) {
+ for (i = 0; i < last_entry; ++i) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
+
+ amdgpu_bo_unref(&bo);
+ }
+ for (i = first_userptr; i < num_entries; ++i) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo);
amdgpu_bo_unref(&bo);
@@ -270,7 +275,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
r = amdgpu_bo_create_list_entry_array(&args->in, &info);
if (r)
- goto error_free;
+ return r;
switch (args->in.operation) {
case AMDGPU_BO_LIST_OP_CREATE:
@@ -283,8 +288,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
mutex_unlock(&fpriv->bo_list_lock);
if (r < 0) {
- amdgpu_bo_list_put(list);
- return r;
+ goto error_put_list;
}
handle = r;
@@ -306,9 +310,8 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
mutex_unlock(&fpriv->bo_list_lock);
if (IS_ERR(old)) {
- amdgpu_bo_list_put(list);
r = PTR_ERR(old);
- goto error_free;
+ goto error_put_list;
}
amdgpu_bo_list_put(old);
@@ -325,8 +328,10 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
return 0;
+error_put_list:
+ amdgpu_bo_list_put(list);
+
error_free:
- if (info)
- kvfree(info);
+ kvfree(info);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2e53feed40e2..82823d9a8ba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -536,7 +536,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
list_for_each_entry(lobj, validated, tv.head) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo);
- bool binding_userptr = false;
struct mm_struct *usermm;
usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
@@ -553,7 +552,6 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
lobj->user_pages);
- binding_userptr = true;
}
if (p->evictable == lobj)
@@ -563,10 +561,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
if (r)
return r;
- if (binding_userptr) {
- kvfree(lobj->user_pages);
- lobj->user_pages = NULL;
- }
+ kvfree(lobj->user_pages);
+ lobj->user_pages = NULL;
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 6614d8a6f4c8..2cdaf3b2a721 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -604,8 +604,11 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue;
}
- for (i = 0; i < num_entities; i++)
+ for (i = 0; i < num_entities; i++) {
+ mutex_lock(&ctx->adev->lock_reset);
drm_sched_entity_fini(&ctx->entities[0][i].entity);
+ mutex_unlock(&ctx->adev->lock_reset);
+ }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5a1939dbd4e3..7a6c837c0a85 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2885,6 +2885,13 @@ fence_driver_init:
DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
}
+ /*
+ * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
+ * Otherwise the mgpu fan boost feature will be skipped due to the
+ * gpu instance is counted less.
+ */
+ amdgpu_register_gpu_instance(adev);
+
/* enable clockgating, etc. after ib tests, etc. since some blocks require
* explicit gating rather than handling it automatically.
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 1d4aaa9580f4..82efc1e22e61 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -511,7 +511,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
* Also, don't allow GTT domain if the BO doens't have USWC falg set.
*/
if (adev->asic_type >= CHIP_CARRIZO &&
- adev->asic_type <= CHIP_RAVEN &&
+ adev->asic_type < CHIP_RAVEN &&
(adev->flags & AMD_IS_APU) &&
(bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
amdgpu_bo_support_uswc(bo_flags) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6f8aaf655a9f..b19157b19fa0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1013,9 +1013,10 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
{0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
/* Navi14 */
- {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
- {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
+ {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14},
/* Renoir */
{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT},
@@ -1048,6 +1049,41 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
+#ifdef CONFIG_DRM_AMDGPU_SI
+ if (!amdgpu_si_support) {
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_TAHITI:
+ case CHIP_PITCAIRN:
+ case CHIP_VERDE:
+ case CHIP_OLAND:
+ case CHIP_HAINAN:
+ dev_info(&pdev->dev,
+ "SI support provided by radeon.\n");
+ dev_info(&pdev->dev,
+ "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
+ );
+ return -ENODEV;
+ }
+ }
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ if (!amdgpu_cik_support) {
+ switch (flags & AMD_ASIC_MASK) {
+ case CHIP_KAVERI:
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ dev_info(&pdev->dev,
+ "CIK support provided by radeon.\n");
+ dev_info(&pdev->dev,
+ "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
+ );
+ return -ENODEV;
+ }
+ }
+#endif
+
/* Get rid of things like offb */
ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb");
if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 6ee4021910e2..6d19183b478b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -289,6 +289,7 @@ struct amdgpu_gfx {
uint32_t mec2_feature_version;
bool mec_fw_write_wait;
bool me_fw_write_wait;
+ bool cp_fw_write_wait;
struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS];
unsigned num_gfx_rings;
struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 9d76e0923a5a..96b2a31ccfed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -218,7 +218,7 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
struct dma_fence *fence = NULL, *finished;
struct amdgpu_job *job;
- int r;
+ int r = 0;
job = to_amdgpu_job(sched_job);
finished = &job->base.s_fence->finished;
@@ -243,6 +243,8 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
job->fence = dma_fence_get(fence);
amdgpu_job_free_resources(job);
+
+ fence = r ? ERR_PTR(r) : fence;
return fence;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index f2c097983f48..a73206784cba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -144,41 +144,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
struct amdgpu_device *adev;
int r, acpi_status;
-#ifdef CONFIG_DRM_AMDGPU_SI
- if (!amdgpu_si_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
- dev_info(dev->dev,
- "SI support provided by radeon.\n");
- dev_info(dev->dev,
- "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- if (!amdgpu_cik_support) {
- switch (flags & AMD_ASIC_MASK) {
- case CHIP_KAVERI:
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- case CHIP_KABINI:
- case CHIP_MULLINS:
- dev_info(dev->dev,
- "CIK support provided by radeon.\n");
- dev_info(dev->dev,
- "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n"
- );
- return -ENODEV;
- }
- }
-#endif
-
adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL);
if (adev == NULL) {
return -ENOMEM;
@@ -225,7 +190,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
pm_runtime_put_autosuspend(dev->dev);
}
- amdgpu_register_gpu_instance(adev);
out:
if (r) {
/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
@@ -685,15 +649,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return -ENOMEM;
alloc_size = info->read_mmr_reg.count * sizeof(*regs);
- for (i = 0; i < info->read_mmr_reg.count; i++)
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < info->read_mmr_reg.count; i++) {
if (amdgpu_asic_read_register(adev, se_num, sh_num,
info->read_mmr_reg.dword_offset + i,
&regs[i])) {
DRM_DEBUG_KMS("unallowed offset %#x\n",
info->read_mmr_reg.dword_offset + i);
kfree(regs);
+ amdgpu_gfx_off_ctrl(adev, true);
return -EFAULT;
}
+ }
+ amdgpu_gfx_off_ctrl(adev, true);
n = copy_to_user(out, regs, min(size, alloc_size));
kfree(regs);
return n ? -EFAULT : 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 1fead0e8b890..7289e1b4fb60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -453,7 +453,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
.interruptible = (bp->type != ttm_bo_type_kernel),
.no_wait_gpu = false,
.resv = bp->resv,
- .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
+ .flags = bp->type != ttm_bo_type_kernel ?
+ TTM_OPT_FLAG_ALLOW_RES_EVICT : 0
};
struct amdgpu_bo *bo;
unsigned long page_align, size = bp->size;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 4d71537a960d..a46090071034 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -950,21 +950,7 @@ static void psp_print_fw_hdr(struct psp_context *psp,
struct amdgpu_firmware_info *ucode)
{
struct amdgpu_device *adev = psp->adev;
- const struct sdma_firmware_header_v1_0 *sdma_hdr =
- (const struct sdma_firmware_header_v1_0 *)
- adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
- const struct gfx_firmware_header_v1_0 *ce_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
- const struct gfx_firmware_header_v1_0 *pfp_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
- const struct gfx_firmware_header_v1_0 *me_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
- const struct gfx_firmware_header_v1_0 *mec_hdr =
- (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
- const struct rlc_firmware_header_v2_0 *rlc_hdr =
- (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
- const struct smc_firmware_header_v1_0 *smc_hdr =
- (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
+ struct common_firmware_header *hdr;
switch (ucode->ucode_id) {
case AMDGPU_UCODE_ID_SDMA0:
@@ -975,25 +961,33 @@ static void psp_print_fw_hdr(struct psp_context *psp,
case AMDGPU_UCODE_ID_SDMA5:
case AMDGPU_UCODE_ID_SDMA6:
case AMDGPU_UCODE_ID_SDMA7:
- amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header);
+ hdr = (struct common_firmware_header *)
+ adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data;
+ amdgpu_ucode_print_sdma_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_CE:
- amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_PFP:
- amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_ME:
- amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.me_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_CP_MEC1:
- amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data;
+ amdgpu_ucode_print_gfx_hdr(hdr);
break;
case AMDGPU_UCODE_ID_RLC_G:
- amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header);
+ hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data;
+ amdgpu_ucode_print_rlc_hdr(hdr);
break;
case AMDGPU_UCODE_ID_SMC:
- amdgpu_ucode_print_smc_hdr(&smc_hdr->header);
+ hdr = (struct common_firmware_header *)adev->pm.fw->data;
+ amdgpu_ucode_print_smc_hdr(hdr);
break;
default:
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index b70b3c45bb29..65044b1b3d4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -429,13 +429,14 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
* Open up a stream for HW test
*/
int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 1024;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -444,7 +445,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
/* stitch together an VCE create msg */
ib->length_dw = 0;
@@ -476,8 +477,8 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[ib->length_dw++] = 0x00000014; /* len */
ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000001;
for (i = ib->length_dw; i < ib_size_dw; ++i)
@@ -1110,13 +1111,20 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
/* skip vce ring1/2 ib test for now, since it's not reliable */
if (ring != &ring->adev->vce.ring[0])
return 0;
- r = amdgpu_vce_get_create_msg(ring, 1, NULL);
+ r = amdgpu_bo_create_reserved(ring->adev, 512, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_vce_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
@@ -1132,5 +1140,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 30ea54dd9117..e802f7d9db0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -59,6 +59,7 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev);
int amdgpu_vce_suspend(struct amdgpu_device *adev);
int amdgpu_vce_resume(struct amdgpu_device *adev);
int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence);
int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 7a6beb2e7c4e..3199e4a5ff12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -569,13 +569,14 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
}
static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -583,14 +584,14 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x0000000b;
ib->ptr[ib->length_dw++] = 0x00000014;
@@ -621,13 +622,14 @@ err:
}
static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -635,14 +637,14 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x0000000b;
ib->ptr[ib->length_dw++] = 0x00000014;
@@ -675,13 +677,20 @@ err:
int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
- r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL);
+ r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
- r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence);
+ r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
if (r)
goto error;
@@ -693,6 +702,8 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 957811b73672..53090eae0082 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -93,7 +93,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
{
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0x60000ff0, 0x60000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000000, 0x40000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -140,7 +140,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -179,7 +179,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100),
- SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0x0d000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100),
@@ -564,6 +564,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev)
kfree(adev->gfx.rlc.register_list_format);
}
+static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev)
+{
+ adev->gfx.cp_fw_write_wait = false;
+
+ switch (adev->asic_type) {
+ case CHIP_NAVI10:
+ case CHIP_NAVI12:
+ case CHIP_NAVI14:
+ if ((adev->gfx.me_fw_version >= 0x00000046) &&
+ (adev->gfx.me_feature_version >= 27) &&
+ (adev->gfx.pfp_fw_version >= 0x00000068) &&
+ (adev->gfx.pfp_feature_version >= 27) &&
+ (adev->gfx.mec_fw_version >= 0x0000005b) &&
+ (adev->gfx.mec_feature_version >= 27))
+ adev->gfx.cp_fw_write_wait = true;
+ break;
+ default:
+ break;
+ }
+
+ if (adev->gfx.cp_fw_write_wait == false)
+ DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
+ GRBM requires 1-cycle delay in cp firmware\n");
+}
+
+
static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
{
const struct rlc_firmware_header_v2_1 *rlc_hdr;
@@ -832,6 +858,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
}
}
+ gfx_v10_0_check_fw_write_wait(adev);
out:
if (err) {
dev_err(adev->dev,
@@ -4765,6 +4792,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
}
+static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+ struct amdgpu_device *adev = ring->adev;
+ bool fw_version_ok = false;
+
+ fw_version_ok = adev->gfx.cp_fw_write_wait;
+
+ if (fw_version_ok)
+ gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
+ ref, mask, 0x20);
+ else
+ amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
+ ref, mask);
+}
+
static void
gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
uint32_t me, uint32_t pipe,
@@ -5155,6 +5200,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_tmz = gfx_v10_0_ring_emit_tmz,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
@@ -5188,6 +5234,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
@@ -5218,6 +5265,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
.emit_rreg = gfx_v10_0_ring_emit_rreg,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
};
static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index dcadc73bffd2..97cf0b536873 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -973,6 +973,13 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
adev->gfx.me_fw_write_wait = false;
adev->gfx.mec_fw_write_wait = false;
+ if ((adev->gfx.mec_fw_version < 0x000001a5) ||
+ (adev->gfx.mec_feature_version < 46) ||
+ (adev->gfx.pfp_fw_version < 0x000000b7) ||
+ (adev->gfx.pfp_feature_version < 46))
+ DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
+ GRBM requires 1-cycle delay in cp firmware\n");
+
switch (adev->asic_type) {
case CHIP_VEGA10:
if ((adev->gfx.me_fw_version >= 0x0000009c) &&
@@ -1031,8 +1038,13 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
case CHIP_VEGA20:
break;
case CHIP_RAVEN:
- if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
- &&((adev->gfx.rlc_fw_version != 106 &&
+ /* Disable GFXOFF on original raven. There are combinations
+ * of sbios and platforms that are not stable.
+ */
+ if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
+ adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+ else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
+ &&((adev->gfx.rlc_fw_version != 106 &&
adev->gfx.rlc_fw_version < 531) ||
(adev->gfx.rlc_fw_version == 53815) ||
(adev->gfx.rlc_feature_version < 1) ||
@@ -1044,6 +1056,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
AMD_PG_SUPPORT_CP |
AMD_PG_SUPPORT_RLC_SMU_HS;
break;
+ case CHIP_RENOIR:
+ if (adev->pm.pp_feature & PP_GFXOFF_MASK)
+ adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_RLC_SMU_HS;
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
index 8b789f750b72..db10640a3b2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c
@@ -151,6 +151,15 @@ static void gfxhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, tmp);
tmp = mmGCVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, tmp);
tmp = mmGCVM_L2_CNTL4_DEFAULT;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 241a4e57cf4a..5c7d5f73f54f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -309,6 +309,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
job->vm_needs_flush = true;
+ job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
@@ -343,11 +344,9 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
upper_32_bits(pd_addr));
- amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
-
- /* wait for the invalidate to complete */
- amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
- 1 << vmid, 1 << vmid);
+ amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng,
+ hub->vm_inv_eng0_ack + eng,
+ req, 1 << vmid);
return pd_addr;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 3542c203c3c8..b39bea6f54e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -137,6 +137,15 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL2, tmp);
tmp = mmMMVM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL3, tmp);
tmp = mmMMVM_L2_CNTL4_DEFAULT;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 0cf7ef44b4b5..9ed178fa241c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -219,6 +219,15 @@ static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid)
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT;
+ if (adev->gmc.translate_further) {
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
+ } else {
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9);
+ tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3,
+ L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
+ }
WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3,
hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 78452cf0115d..4554e72c8378 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -254,6 +254,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4_3[] = {
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000),
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000)
};
static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index fa2f70ce2e2b..8493bfbbc148 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1129,7 +1129,7 @@ static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
amdgpu_ring_write(ring, seq); /* reference */
- amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, 0xffffffff); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
}
@@ -1173,6 +1173,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
}
+static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg0, uint32_t reg1,
+ uint32_t ref, uint32_t mask)
+{
+ amdgpu_ring_emit_wreg(ring, reg0, ref);
+ /* wait for a cycle to reset vm_inv_eng*_ack */
+ amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
+ amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
+}
+
static int sdma_v5_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1588,7 +1598,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
6 + /* sdma_v5_0_ring_emit_pipeline_sync */
/* sdma_v5_0_ring_emit_vm_flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 +
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
@@ -1602,6 +1612,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
.pad_ib = sdma_v5_0_ring_pad_ib,
.emit_wreg = sdma_v5_0_ring_emit_wreg,
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
+ .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
.preempt_ib = sdma_v5_0_ring_preempt_ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index f8ab80c8801b..4ccfcdf8f16a 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1186,11 +1186,6 @@ static int soc15_common_early_init(void *handle)
AMD_PG_SUPPORT_VCN |
AMD_PG_SUPPORT_VCN_DPG;
adev->external_rev_id = adev->rev_id + 0x91;
-
- if (adev->pm.pp_feature & PP_GFXOFF_MASK)
- adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
- AMD_PG_SUPPORT_CP |
- AMD_PG_SUPPORT_RLC_SMU_HS;
break;
default:
/* FIXME: not supported yet */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 670784a78512..217084d56ab8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -206,13 +206,14 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring)
* Open up a stream for HW test
*/
static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -220,15 +221,15 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00010000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -268,13 +269,14 @@ err:
*/
static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -282,15 +284,15 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00010000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -327,13 +329,20 @@ err:
static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
- r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL);
+ r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = uvd_v6_0_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
- r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence);
+ r = uvd_v6_0_enc_get_destroy_msg(ring, 1, bo, &fence);
if (r)
goto error;
@@ -345,6 +354,8 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 01f658fa72c6..0995378d8263 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -214,13 +214,14 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
* Open up a stream for HW test
*/
static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+ struct amdgpu_bo *bo,
struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -228,15 +229,15 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00000000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@@ -275,13 +276,14 @@ err:
* Close up a stream for HW test or if userspace failed to do so
*/
static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct dma_fence **fence)
+ struct amdgpu_bo *bo,
+ struct dma_fence **fence)
{
const unsigned ib_size_dw = 16;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct dma_fence *f = NULL;
- uint64_t dummy;
+ uint64_t addr;
int i, r;
r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -289,15 +291,15 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
return r;
ib = &job->ibs[0];
- dummy = ib->gpu_addr + 1024;
+ addr = amdgpu_bo_gpu_offset(bo);
ib->length_dw = 0;
ib->ptr[ib->length_dw++] = 0x00000018;
ib->ptr[ib->length_dw++] = 0x00000001;
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = 0x00000000;
- ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
- ib->ptr[ib->length_dw++] = dummy;
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002;
@@ -334,13 +336,20 @@ err:
static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct dma_fence *fence = NULL;
+ struct amdgpu_bo *bo = NULL;
long r;
- r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL);
+ r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = uvd_v7_0_enc_get_create_msg(ring, 1, bo, NULL);
if (r)
goto error;
- r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence);
+ r = uvd_v7_0_enc_get_destroy_msg(ring, 1, bo, &fence);
if (r)
goto error;
@@ -352,6 +361,8 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
error:
dma_fence_put(fence);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a52f0b13a2c8..4139f129eafb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -688,7 +688,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
*/
if (adev->flags & AMD_IS_APU &&
adev->asic_type >= CHIP_CARRIZO &&
- adev->asic_type <= CHIP_RAVEN)
+ adev->asic_type < CHIP_RAVEN)
init_data.flags.gpu_vm_support = true;
if (amdgpu_dc_feature_mask & DC_FBC_MASK)
diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
index 985633c08a26..26c6d735cdc7 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile
@@ -24,15 +24,20 @@
# It calculates Bandwidth and Watermarks values for HW programming
#
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+calcs_ccflags := -mhard-float -msse
-calcs_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+calcs_ccflags += -mpreferred-stack-boundary=4
+else
calcs_ccflags += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 5d1adeda4d90..4b8819c27fcd 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -580,6 +580,10 @@ static bool construct(struct dc *dc,
#ifdef CONFIG_DRM_AMD_DC_DCN2_0
// Allocate memory for the vm_helper
dc->vm_helper = kzalloc(sizeof(struct vm_helper), GFP_KERNEL);
+ if (!dc->vm_helper) {
+ dm_error("%s: failed to create dc->vm_helper\n", __func__);
+ goto fail;
+ }
#endif
memcpy(&dc->bb_overrides, &init_params->bb_overrides, sizeof(dc->bb_overrides));
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
index 505967b48e14..51991bf26a93 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
@@ -374,6 +374,7 @@ void dal_ddc_service_i2c_query_dp_dual_mode_adaptor(
enum display_dongle_type *dongle = &sink_cap->dongle_type;
uint8_t type2_dongle_buf[DP_ADAPTOR_TYPE2_SIZE];
bool is_type2_dongle = false;
+ int retry_count = 2;
struct dp_hdmi_dongle_signature_data *dongle_signature;
/* Assume we have no valid DP passive dongle connected */
@@ -386,13 +387,24 @@ void dal_ddc_service_i2c_query_dp_dual_mode_adaptor(
DP_HDMI_DONGLE_ADDRESS,
type2_dongle_buf,
sizeof(type2_dongle_buf))) {
- *dongle = DISPLAY_DONGLE_DP_DVI_DONGLE;
- sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK;
+ /* Passive HDMI dongles can sometimes fail here without retrying*/
+ while (retry_count > 0) {
+ if (i2c_read(ddc,
+ DP_HDMI_DONGLE_ADDRESS,
+ type2_dongle_buf,
+ sizeof(type2_dongle_buf)))
+ break;
+ retry_count--;
+ }
+ if (retry_count == 0) {
+ *dongle = DISPLAY_DONGLE_DP_DVI_DONGLE;
+ sink_cap->max_hdmi_pixel_clock = DP_ADAPTOR_DVI_MAX_TMDS_CLK;
- CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf),
- "DP-DVI passive dongle %dMhz: ",
- DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000);
- return;
+ CONN_DATA_DETECT(ddc->link, type2_dongle_buf, sizeof(type2_dongle_buf),
+ "DP-DVI passive dongle %dMhz: ",
+ DP_ADAPTOR_DVI_MAX_TMDS_CLK / 1000);
+ return;
+ }
}
/* Check if Type 2 dongle.*/
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 8f70295179ff..f25ac17f47fa 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -404,6 +404,9 @@ bool resource_are_streams_timing_synchronizable(
if (stream1->view_format != stream2->view_format)
return false;
+ if (stream1->ignore_msa_timing_param || stream2->ignore_msa_timing_param)
+ return false;
+
return true;
}
static bool is_dp_and_hdmi_sharable(
@@ -1540,6 +1543,9 @@ bool dc_is_stream_unchanged(
if (!are_stream_backends_same(old_stream, stream))
return false;
+ if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param)
+ return false;
+
return true;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
index 01c7e30b9ce1..bbd6e01b3eca 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
@@ -393,6 +393,10 @@ bool cm_helper_translate_curve_to_hw_format(
rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
+ rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red;
+ rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green;
+ rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue;
+
// All 3 color channels have same x
corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
dc_fixpt_from_int(region_start));
@@ -464,13 +468,6 @@ bool cm_helper_translate_curve_to_hw_format(
i = 1;
while (i != hw_points + 1) {
- if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
- rgb_plus_1->red = rgb->red;
- if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
- rgb_plus_1->green = rgb->green;
- if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
- rgb_plus_1->blue = rgb->blue;
-
rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
@@ -562,6 +559,10 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
+ rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red;
+ rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green;
+ rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue;
+
corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
dc_fixpt_from_int(region_start));
corner_points[0].green.x = corner_points[0].red.x;
@@ -624,13 +625,6 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
i = 1;
while (i != hw_points + 1) {
- if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
- rgb_plus_1->red = rgb->red;
- if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
- rgb_plus_1->green = rgb->green;
- if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
- rgb_plus_1->blue = rgb->blue;
-
rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red);
rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
rgb->delta_blue = dc_fixpt_sub(rgb_plus_1->blue, rgb->blue);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index ddb8d5649e79..63f3bddba7da 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -10,15 +10,20 @@ ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
DCN20 += dcn20_dsc.o
endif
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse
-CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4
+else
CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 5a2763daff4d..6b2f2f1a1c9c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -814,7 +814,7 @@ static const struct resource_caps res_cap_nv14 = {
.num_audio = 6,
.num_stream_encoder = 5,
.num_pll = 5,
- .num_dwb = 0,
+ .num_dwb = 1,
.num_ddc = 5,
};
@@ -1107,6 +1107,11 @@ struct stream_encoder *dcn20_stream_encoder_create(
if (!enc1)
return NULL;
+ if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) {
+ if (eng_id >= ENGINE_ID_DIGD)
+ eng_id++;
+ }
+
dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id,
&stream_enc_regs[eng_id],
&se_shift, &se_mask);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index ef673bffc241..ff50ae71fe27 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -3,15 +3,20 @@
DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse
-CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4
+else
CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index 5b2a65b42403..8df251626e22 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -24,15 +24,20 @@
# It provides the general basic services required by other DAL
# subcomponents.
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+dml_ccflags := -mhard-float -msse
-dml_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+dml_ccflags += -mpreferred-stack-boundary=4
+else
dml_ccflags += -msse2
endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
index 649883777f62..6c6c486b774a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20.c
@@ -2577,7 +2577,8 @@ static void dml20_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPer
mode_lib->vba.MinActiveDRAMClockChangeMargin
+ mode_lib->vba.DRAMClockChangeLatency;
- if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 0) {
+ if (mode_lib->vba.MinActiveDRAMClockChangeMargin > 50) {
+ mode_lib->vba.DRAMClockChangeWatermark += 25;
mode_lib->vba.DRAMClockChangeSupport[0][0] = dm_dram_clock_change_vactive;
} else {
if (mode_lib->vba.SynchronizedVBlank || mode_lib->vba.NumberOfActivePlanes == 1) {
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index b456cd23c6fa..970737217e53 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -1,15 +1,20 @@
#
# Makefile for the 'dsc' sub-component of DAL.
-ifneq ($(call cc-option, -mpreferred-stack-boundary=4),)
- cc_stack_align := -mpreferred-stack-boundary=4
-else ifneq ($(call cc-option, -mstack-alignment=16),)
- cc_stack_align := -mstack-alignment=16
-endif
+dsc_ccflags := -mhard-float -msse
-dsc_ccflags := -mhard-float -msse $(cc_stack_align)
+ifdef CONFIG_CC_IS_GCC
+ifeq ($(call cc-ifversion, -lt, 0701, y), y)
+IS_OLD_GCC = 1
+endif
+endif
-ifdef CONFIG_CC_IS_CLANG
+ifdef IS_OLD_GCC
+# Stack alignment mismatch, proceed with caution.
+# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
+# (8B stack alignment).
+dsc_ccflags += -mpreferred-stack-boundary=4
+else
dsc_ccflags += -msse2
endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 34f95e0e3ea4..203ce4b1028f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3478,18 +3478,31 @@ static int smu7_get_pp_table_entry(struct pp_hwmgr *hwmgr,
static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query)
{
+ struct amdgpu_device *adev = hwmgr->adev;
int i;
u32 tmp = 0;
if (!query)
return -EINVAL;
- smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0);
- tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
- *query = tmp;
+ /*
+ * PPSMC_MSG_GetCurrPkgPwr is not supported on:
+ * - Hawaii
+ * - Bonaire
+ * - Fiji
+ * - Tonga
+ */
+ if ((adev->asic_type != CHIP_HAWAII) &&
+ (adev->asic_type != CHIP_BONAIRE) &&
+ (adev->asic_type != CHIP_FIJI) &&
+ (adev->asic_type != CHIP_TONGA)) {
+ smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0);
+ tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+ *query = tmp;
- if (tmp != 0)
- return 0;
+ if (tmp != 0)
+ return 0;
+ }
smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart);
cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index d08493b67b67..beacfffbdc3e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -5098,9 +5098,7 @@ static void vega10_odn_update_soc_table(struct pp_hwmgr *hwmgr,
if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) {
podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk;
- for (i = 0; i < podn_vdd_dep->count - 1; i++)
- od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc;
- if (od_vddc_lookup_table->entries[i].us_vdd < podn_vdd_dep->entries[i].vddc)
+ for (i = 0; i < podn_vdd_dep->count; i++)
od_vddc_lookup_table->entries[i].us_vdd = podn_vdd_dep->entries[i].vddc;
} else if (type == PP_OD_EDIT_MCLK_VDDC_TABLE) {
podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk;
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 0b461404af6b..328e258a6895 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -205,7 +205,7 @@ static struct smu_11_0_cmn2aisc_mapping navi10_workload_map[PP_SMC_POWER_PROFILE
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT),
- WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT),
+ WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT),
};
@@ -759,6 +759,12 @@ static int navi10_force_clk_levels(struct smu_context *smu,
case SMU_UCLK:
case SMU_DCEFCLK:
case SMU_FCLK:
+ /* There is only 2 levels for fine grained DPM */
+ if (navi10_is_support_fine_grained_dpm(smu, clk_type)) {
+ soft_max_level = (soft_max_level >= 1 ? 1 : 0);
+ soft_min_level = (soft_min_level >= 1 ? 1 : 0);
+ }
+
ret = smu_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
if (ret)
return size;
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index dc754447f0dd..23c12018dbc1 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -655,7 +655,7 @@ static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
count = SMU_MAX_SMIO_LEVELS;
for (level = 0; level < count; level++) {
table->SmioTable2.Pattern[level].Voltage =
- PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE);
+ PP_HOST_TO_SMC_US(data->mvdd_voltage_table.entries[level].value * VOLTAGE_SCALE);
/* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/
table->SmioTable2.Pattern[level].Smio =
(uint8_t) level;
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
index 7c960b07746f..ae18fbcb26fb 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/vegam_smumgr.c
@@ -456,7 +456,7 @@ static int vegam_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
count = SMU_MAX_SMIO_LEVELS;
for (level = 0; level < count; level++) {
table->SmioTable2.Pattern[level].Voltage = PP_HOST_TO_SMC_US(
- data->mvdd_voltage_table.entries[count].value * VOLTAGE_SCALE);
+ data->mvdd_voltage_table.entries[level].value * VOLTAGE_SCALE);
/* Index into DpmTable.Smio. Drive bits from Smio entry to get this voltage level.*/
table->SmioTable2.Pattern[level].Smio =
(uint8_t) level;
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index bbd8ebd58434..92c393f613d3 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -219,7 +219,7 @@ static struct smu_11_0_cmn2aisc_mapping vega20_workload_map[PP_SMC_POWER_PROFILE
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT),
- WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT),
+ WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT),
WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT),
};
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
index 8820ce15ce37..ae274902ff92 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
@@ -82,7 +82,8 @@ static void komeda_kms_commit_tail(struct drm_atomic_state *old_state)
drm_atomic_helper_commit_modeset_disables(dev, old_state);
- drm_atomic_helper_commit_planes(dev, old_state, 0);
+ drm_atomic_helper_commit_planes(dev, old_state,
+ DRM_PLANE_COMMIT_ACTIVE_ONLY);
drm_atomic_helper_commit_modeset_enables(dev, old_state);
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
index ea26bc9c2d00..b848270e0a1f 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c
@@ -564,8 +564,8 @@ komeda_splitter_validate(struct komeda_splitter *splitter,
}
if (!in_range(&splitter->vsize, dflow->in_h)) {
- DRM_DEBUG_ATOMIC("split in_in: %d exceed the acceptable range.\n",
- dflow->in_w);
+ DRM_DEBUG_ATOMIC("split in_h: %d exceeds the acceptable range.\n",
+ dflow->in_h);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index cebc8e620820..8a8d605021f0 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -728,6 +728,8 @@ static int tc_set_video_mode(struct tc_data *tc,
int lower_margin = mode->vsync_start - mode->vdisplay;
int vsync_len = mode->vsync_end - mode->vsync_start;
u32 dp0_syncval;
+ u32 bits_per_pixel = 24;
+ u32 in_bw, out_bw;
/*
* Recommended maximum number of symbols transferred in a transfer unit:
@@ -735,7 +737,10 @@ static int tc_set_video_mode(struct tc_data *tc,
* (output active video bandwidth in bytes))
* Must be less than tu_size.
*/
- max_tu_symbol = TU_SIZE_RECOMMENDED - 1;
+
+ in_bw = mode->clock * bits_per_pixel / 8;
+ out_bw = tc->link.base.num_lanes * tc->link.base.rate;
+ max_tu_symbol = DIV_ROUND_UP(in_bw * TU_SIZE_RECOMMENDED, out_bw);
dev_dbg(tc->dev, "set mode %dx%d\n",
mode->hdisplay, mode->vdisplay);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 3ef2ac52ce94..2dd2cd87cdbb 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1581,8 +1581,11 @@ static void commit_tail(struct drm_atomic_state *old_state)
{
struct drm_device *dev = old_state->dev;
const struct drm_mode_config_helper_funcs *funcs;
+ struct drm_crtc_state *new_crtc_state;
+ struct drm_crtc *crtc;
ktime_t start;
s64 commit_time_ms;
+ unsigned int i, new_self_refresh_mask = 0;
funcs = dev->mode_config.helper_private;
@@ -1602,6 +1605,15 @@ static void commit_tail(struct drm_atomic_state *old_state)
drm_atomic_helper_wait_for_dependencies(old_state);
+ /*
+ * We cannot safely access new_crtc_state after
+ * drm_atomic_helper_commit_hw_done() so figure out which crtc's have
+ * self-refresh active beforehand:
+ */
+ for_each_new_crtc_in_state(old_state, crtc, new_crtc_state, i)
+ if (new_crtc_state->self_refresh_active)
+ new_self_refresh_mask |= BIT(i);
+
if (funcs && funcs->atomic_commit_tail)
funcs->atomic_commit_tail(old_state);
else
@@ -1610,7 +1622,8 @@ static void commit_tail(struct drm_atomic_state *old_state)
commit_time_ms = ktime_ms_delta(ktime_get(), start);
if (commit_time_ms > 0)
drm_self_refresh_helper_update_avg_times(old_state,
- (unsigned long)commit_time_ms);
+ (unsigned long)commit_time_ms,
+ new_self_refresh_mask);
drm_atomic_helper_commit_cleanup_done(old_state);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 82a4ceed3fcf..6b0177112e18 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -159,6 +159,9 @@ static const struct edid_quirk {
/* Medion MD 30217 PG */
{ "MED", 0x7b8, EDID_QUIRK_PREFER_LARGE_75 },
+ /* Lenovo G50 */
+ { "SDC", 18514, EDID_QUIRK_FORCE_6BPC },
+
/* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */
{ "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC },
diff --git a/drivers/gpu/drm/drm_self_refresh_helper.c b/drivers/gpu/drm/drm_self_refresh_helper.c
index 68f4765a5896..dd33fec5aabd 100644
--- a/drivers/gpu/drm/drm_self_refresh_helper.c
+++ b/drivers/gpu/drm/drm_self_refresh_helper.c
@@ -133,29 +133,33 @@ out_drop_locks:
* drm_self_refresh_helper_update_avg_times - Updates a crtc's SR time averages
* @state: the state which has just been applied to hardware
* @commit_time_ms: the amount of time in ms that this commit took to complete
+ * @new_self_refresh_mask: bitmask of crtc's that have self_refresh_active in
+ * new state
*
* Called after &drm_mode_config_funcs.atomic_commit_tail, this function will
* update the average entry/exit self refresh times on self refresh transitions.
* These averages will be used when calculating how long to delay before
* entering self refresh mode after activity.
*/
-void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
- unsigned int commit_time_ms)
+void
+drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
+ unsigned int commit_time_ms,
+ unsigned int new_self_refresh_mask)
{
struct drm_crtc *crtc;
- struct drm_crtc_state *old_crtc_state, *new_crtc_state;
+ struct drm_crtc_state *old_crtc_state;
int i;
- for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
- new_crtc_state, i) {
+ for_each_old_crtc_in_state(state, crtc, old_crtc_state, i) {
+ bool new_self_refresh_active = new_self_refresh_mask & BIT(i);
struct drm_self_refresh_data *sr_data = crtc->self_refresh_data;
struct ewma_psr_time *time;
if (old_crtc_state->self_refresh_active ==
- new_crtc_state->self_refresh_active)
+ new_self_refresh_active)
continue;
- if (new_crtc_state->self_refresh_active)
+ if (new_self_refresh_active)
time = &sr_data->entry_avg_ms;
else
time = &sr_data->exit_avg_ms;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 698db540972c..648cf0207309 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -180,6 +180,8 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
etnaviv_cmdbuf_get_va(&submit->cmdbuf,
&gpu->mmu_context->cmdbuf_mapping));
+ mutex_unlock(&gpu->mmu_context->lock);
+
/* Reserve space for the bomap */
if (n_bomap_pages) {
bomap_start = bomap = iter.data;
@@ -221,8 +223,6 @@ void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
obj->base.size);
}
- mutex_unlock(&gpu->mmu_context->lock);
-
etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data);
dev_coredumpv(gpu->dev, iter.start, iter.data - iter.start, GFP_KERNEL);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
index 043111a1d60c..f8bf488e9d71 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@@ -155,9 +155,11 @@ static void etnaviv_iommuv2_dump(struct etnaviv_iommu_context *context, void *bu
memcpy(buf, v2_context->mtlb_cpu, SZ_4K);
buf += SZ_4K;
- for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++, buf += SZ_4K)
- if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
+ for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++)
+ if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT) {
memcpy(buf, v2_context->stlb_cpu[i], SZ_4K);
+ buf += SZ_4K;
+ }
}
static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index 35ebae6a1be7..3607d348c298 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -328,12 +328,23 @@ etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
ret = etnaviv_cmdbuf_suballoc_map(suballoc, ctx, &ctx->cmdbuf_mapping,
global->memory_base);
- if (ret) {
- global->ops->free(ctx);
- return NULL;
+ if (ret)
+ goto out_free;
+
+ if (global->version == ETNAVIV_IOMMU_V1 &&
+ ctx->cmdbuf_mapping.iova > 0x80000000) {
+ dev_err(global->dev,
+ "command buffer outside valid memory window\n");
+ goto out_unmap;
}
return ctx;
+
+out_unmap:
+ etnaviv_cmdbuf_suballoc_unmap(ctx, &ctx->cmdbuf_mapping);
+out_free:
+ global->ops->free(ctx);
+ return NULL;
}
void etnaviv_iommu_restore(struct etnaviv_gpu *gpu,
diff --git a/drivers/gpu/drm/i915/display/intel_atomic.c b/drivers/gpu/drm/i915/display/intel_atomic.c
index d3fb75bb9eb1..7cb2257bbb93 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic.c
@@ -201,6 +201,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc)
crtc_state->update_wm_post = false;
crtc_state->fb_changed = false;
crtc_state->fifo_changed = false;
+ crtc_state->preload_luts = false;
crtc_state->wm.need_postvbl_update = false;
crtc_state->fb_bits = 0;
crtc_state->update_planes = 0;
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index efb39f350b19..3250c1b8dcca 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -1270,7 +1270,7 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, "
"disabling port %c DVI/HDMI support\n",
port_name(port), info->alternate_ddc_pin,
- port_name(p), port_name(port));
+ port_name(p), port_name(p));
/*
* If we have multiple ports supposedly sharing the
@@ -1278,9 +1278,14 @@ static void sanitize_ddc_pin(struct drm_i915_private *dev_priv,
* port. Otherwise they share the same ddc bin and
* system couldn't communicate with them separately.
*
- * Give child device order the priority, first come first
- * served.
+ * Give inverse child device order the priority,
+ * last one wins. Yes, there are real machines
+ * (eg. Asrock B250M-HDV) where VBT has both
+ * port A and port E with the same AUX ch and
+ * we must pick port E :(
*/
+ info = &dev_priv->vbt.ddi_port_info[p];
+
info->supports_dvi = false;
info->supports_hdmi = false;
info->alternate_ddc_pin = 0;
@@ -1316,7 +1321,7 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, "
"disabling port %c DP support\n",
port_name(port), info->alternate_aux_channel,
- port_name(p), port_name(port));
+ port_name(p), port_name(p));
/*
* If we have multiple ports supposedlt sharing the
@@ -1324,9 +1329,14 @@ static void sanitize_aux_ch(struct drm_i915_private *dev_priv,
* port. Otherwise they share the same aux channel
* and system couldn't communicate with them separately.
*
- * Give child device order the priority, first come first
- * served.
+ * Give inverse child device order the priority,
+ * last one wins. Yes, there are real machines
+ * (eg. Asrock B250M-HDV) where VBT has both
+ * port A and port E with the same AUX ch and
+ * we must pick port E :(
*/
+ info = &dev_priv->vbt.ddi_port_info[p];
+
info->supports_dp = false;
info->alternate_aux_channel = 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index 71a0201437a9..aa1e2c670bc4 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -990,6 +990,55 @@ void intel_color_commit(const struct intel_crtc_state *crtc_state)
dev_priv->display.color_commit(crtc_state);
}
+static bool intel_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct intel_atomic_state *state =
+ to_intel_atomic_state(new_crtc_state->base.state);
+ const struct intel_crtc_state *old_crtc_state =
+ intel_atomic_get_old_crtc_state(state, crtc);
+
+ return !old_crtc_state->base.gamma_lut &&
+ !old_crtc_state->base.degamma_lut;
+}
+
+static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct intel_atomic_state *state =
+ to_intel_atomic_state(new_crtc_state->base.state);
+ const struct intel_crtc_state *old_crtc_state =
+ intel_atomic_get_old_crtc_state(state, crtc);
+
+ /*
+ * CGM_PIPE_MODE is itself single buffered. We'd have to
+ * somehow split it out from chv_load_luts() if we wanted
+ * the ability to preload the CGM LUTs/CSC without tearing.
+ */
+ if (old_crtc_state->cgm_mode || new_crtc_state->cgm_mode)
+ return false;
+
+ return !old_crtc_state->base.gamma_lut;
+}
+
+static bool glk_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
+{
+ struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->base.crtc);
+ struct intel_atomic_state *state =
+ to_intel_atomic_state(new_crtc_state->base.state);
+ const struct intel_crtc_state *old_crtc_state =
+ intel_atomic_get_old_crtc_state(state, crtc);
+
+ /*
+ * The hardware degamma is active whenever the pipe
+ * CSC is active. Thus even if the old state has no
+ * software degamma we need to avoid clobbering the
+ * linear hardware degamma mid scanout.
+ */
+ return !old_crtc_state->csc_enable &&
+ !old_crtc_state->base.gamma_lut;
+}
+
int intel_color_check(struct intel_crtc_state *crtc_state)
{
struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev);
@@ -1133,6 +1182,8 @@ static int i9xx_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1185,6 +1236,8 @@ static int chv_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = chv_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1224,6 +1277,8 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1281,6 +1336,8 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1319,6 +1376,8 @@ static int glk_color_check(struct intel_crtc_state *crtc_state)
if (ret)
return ret;
+ crtc_state->preload_luts = glk_can_preload_luts(crtc_state);
+
return 0;
}
@@ -1368,6 +1427,8 @@ static int icl_color_check(struct intel_crtc_state *crtc_state)
crtc_state->csc_mode = icl_csc_mode(crtc_state);
+ crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index e6e8d4a82044..0a08354a6183 100644
--- a/drivers/gpu/drm/i915/display/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -864,6 +864,13 @@ load_detect:
out:
intel_display_power_put(dev_priv, intel_encoder->power_domain, wakeref);
+
+ /*
+ * Make sure the refs for power wells enabled during detect are
+ * dropped to avoid a new detect cycle triggered by HPD polling.
+ */
+ intel_display_power_flush_work(dev_priv);
+
return status;
}
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index ce05e805b08f..af50f05f4e9d 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2504,6 +2504,9 @@ u32 intel_plane_fb_max_stride(struct drm_i915_private *dev_priv,
* the highest stride limits of them all.
*/
crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A);
+ if (!crtc)
+ return 0;
+
plane = to_intel_plane(crtc->base.primary);
return plane->max_stride(plane, pixel_format, modifier,
@@ -3280,7 +3283,20 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb,
switch (fb->modifier) {
case DRM_FORMAT_MOD_LINEAR:
case I915_FORMAT_MOD_X_TILED:
- return 4096;
+ /*
+ * Validated limit is 4k, but has 5k should
+ * work apart from the following features:
+ * - Ytile (already limited to 4k)
+ * - FP16 (already limited to 4k)
+ * - render compression (already limited to 4k)
+ * - KVMR sprite and cursor (don't care)
+ * - horizontal panning (TODO verify this)
+ * - pipe and plane scaling (TODO verify this)
+ */
+ if (cpp == 8)
+ return 4096;
+ else
+ return 5120;
case I915_FORMAT_MOD_Y_TILED_CCS:
case I915_FORMAT_MOD_Yf_TILED_CCS:
/* FIXME AUX plane? */
@@ -9302,7 +9318,6 @@ static bool wrpll_uses_pch_ssc(struct drm_i915_private *dev_priv,
static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv)
{
struct intel_encoder *encoder;
- bool pch_ssc_in_use = false;
bool has_fdi = false;
for_each_intel_encoder(&dev_priv->drm, encoder) {
@@ -9330,22 +9345,24 @@ static void lpt_init_pch_refclk(struct drm_i915_private *dev_priv)
* clock hierarchy. That would also allow us to do
* clock bending finally.
*/
+ dev_priv->pch_ssc_use = 0;
+
if (spll_uses_pch_ssc(dev_priv)) {
DRM_DEBUG_KMS("SPLL using PCH SSC\n");
- pch_ssc_in_use = true;
+ dev_priv->pch_ssc_use |= BIT(DPLL_ID_SPLL);
}
if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL1)) {
DRM_DEBUG_KMS("WRPLL1 using PCH SSC\n");
- pch_ssc_in_use = true;
+ dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL1);
}
if (wrpll_uses_pch_ssc(dev_priv, DPLL_ID_WRPLL2)) {
DRM_DEBUG_KMS("WRPLL2 using PCH SSC\n");
- pch_ssc_in_use = true;
+ dev_priv->pch_ssc_use |= BIT(DPLL_ID_WRPLL2);
}
- if (pch_ssc_in_use)
+ if (dev_priv->pch_ssc_use)
return;
if (has_fdi) {
@@ -13726,6 +13743,11 @@ static void intel_update_crtc(struct intel_crtc *crtc,
/* vblanks work again, re-enable pipe CRC. */
intel_crtc_enable_pipe_crc(crtc);
} else {
+ if (new_crtc_state->preload_luts &&
+ (new_crtc_state->base.color_mgmt_changed ||
+ new_crtc_state->update_pipe))
+ intel_color_load_luts(new_crtc_state);
+
intel_pre_plane_update(old_crtc_state, new_crtc_state);
if (new_crtc_state->update_pipe)
@@ -14020,6 +14042,7 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state)
for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
if (new_crtc_state->base.active &&
!needs_modeset(new_crtc_state) &&
+ !new_crtc_state->preload_luts &&
(new_crtc_state->base.color_mgmt_changed ||
new_crtc_state->update_pipe))
intel_color_load_luts(new_crtc_state);
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 12099760d99e..c002f234ff31 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -4896,6 +4896,9 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume)
power_domains->initializing = true;
+ /* Must happen before power domain init on VLV/CHV */
+ intel_update_rawclk(i915);
+
if (INTEL_GEN(i915) >= 11) {
icl_display_core_init(i915, resume);
} else if (IS_CANNONLAKE(i915)) {
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 449abaea619f..4075b0387c87 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -761,6 +761,7 @@ struct intel_crtc_state {
bool update_wm_pre, update_wm_post; /* watermarks are updated */
bool fb_changed; /* fb on any of the planes is changed */
bool fifo_changed; /* FIFO split is changed */
+ bool preload_luts;
/* Pipe source size (ie. panel fitter input size)
* All planes will be positioned inside this space,
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 57e9f0ba331b..9b15ac4f2fb6 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1256,6 +1256,9 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp,
u32 unused)
{
struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+ struct drm_i915_private *i915 =
+ to_i915(intel_dig_port->base.base.dev);
+ enum phy phy = intel_port_to_phy(i915, intel_dig_port->base.port);
u32 ret;
ret = DP_AUX_CH_CTL_SEND_BUSY |
@@ -1268,7 +1271,8 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp,
DP_AUX_CH_CTL_FW_SYNC_PULSE_SKL(32) |
DP_AUX_CH_CTL_SYNC_PULSE_SKL(32);
- if (intel_dig_port->tc_mode == TC_PORT_TBT_ALT)
+ if (intel_phy_is_tc(i915, phy) &&
+ intel_dig_port->tc_mode == TC_PORT_TBT_ALT)
ret |= DP_AUX_CH_CTL_TBT_IO;
return ret;
@@ -5436,6 +5440,12 @@ out:
if (status != connector_status_connected && !intel_dp->is_mst)
intel_dp_unset_edid(intel_dp);
+ /*
+ * Make sure the refs for power wells enabled during detect are
+ * dropped to avoid a new detect cycle triggered by HPD polling.
+ */
+ intel_display_power_flush_work(dev_priv);
+
return status;
}
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index b8148f838354..d5a298c3c83b 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -525,16 +525,31 @@ static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv,
val = I915_READ(WRPLL_CTL(id));
I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE);
POSTING_READ(WRPLL_CTL(id));
+
+ /*
+ * Try to set up the PCH reference clock once all DPLLs
+ * that depend on it have been shut down.
+ */
+ if (dev_priv->pch_ssc_use & BIT(id))
+ intel_init_pch_refclk(dev_priv);
}
static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv,
struct intel_shared_dpll *pll)
{
+ enum intel_dpll_id id = pll->info->id;
u32 val;
val = I915_READ(SPLL_CTL);
I915_WRITE(SPLL_CTL, val & ~SPLL_PLL_ENABLE);
POSTING_READ(SPLL_CTL);
+
+ /*
+ * Try to set up the PCH reference clock once all DPLLs
+ * that depend on it have been shut down.
+ */
+ if (dev_priv->pch_ssc_use & BIT(id))
+ intel_init_pch_refclk(dev_priv);
}
static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
index e7588799fce5..104cf6d42333 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.h
@@ -147,11 +147,11 @@ enum intel_dpll_id {
*/
DPLL_ID_ICL_MGPLL4 = 6,
/**
- * @DPLL_ID_TGL_TCPLL5: TGL TC PLL port 5 (TC5)
+ * @DPLL_ID_TGL_MGPLL5: TGL TC PLL port 5 (TC5)
*/
DPLL_ID_TGL_MGPLL5 = 7,
/**
- * @DPLL_ID_TGL_TCPLL6: TGL TC PLL port 6 (TC6)
+ * @DPLL_ID_TGL_MGPLL6: TGL TC PLL port 6 (TC6)
*/
DPLL_ID_TGL_MGPLL6 = 8,
};
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index d59eee5c5d9c..b5c588e511dd 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -235,6 +235,11 @@ static int intelfb_create(struct drm_fb_helper *helper,
info->apertures->ranges[0].base = ggtt->gmadr.start;
info->apertures->ranges[0].size = ggtt->mappable_end;
+ /* Our framebuffer is the entirety of fbdev's system memory */
+ info->fix.smem_start =
+ (unsigned long)(ggtt->gmadr.start + vma->node.start);
+ info->fix.smem_len = vma->node.size;
+
vaddr = i915_vma_pin_iomap(vma);
if (IS_ERR(vaddr)) {
DRM_ERROR("Failed to remap framebuffer into virtual memory\n");
@@ -244,10 +249,6 @@ static int intelfb_create(struct drm_fb_helper *helper,
info->screen_base = vaddr;
info->screen_size = vma->node.size;
- /* Our framebuffer is the entirety of fbdev's system memory */
- info->fix.smem_start = (unsigned long)info->screen_base;
- info->fix.smem_len = info->screen_size;
-
drm_fb_helper_fill_info(info, &ifbdev->helper, sizes);
/* If the object is shmemfs backed, it will have given us zeroed pages.
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index e02f0faecf02..b030f7ae3302 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -2565,6 +2565,12 @@ out:
if (status != connector_status_connected)
cec_notifier_phys_addr_invalidate(intel_hdmi->cec_notifier);
+ /*
+ * Make sure the refs for power wells enabled during detect are
+ * dropped to avoid a new detect cycle triggered by HPD polling.
+ */
+ intel_display_power_flush_work(dev_priv);
+
return status;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 1cdfe05514c3..e41fd94ae5a9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -319,6 +319,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
free_engines(rcu_access_pointer(ctx->engines));
mutex_destroy(&ctx->engines_mutex);
+ kfree(ctx->jump_whitelist);
+
if (ctx->timeline)
intel_timeline_put(ctx->timeline);
@@ -441,6 +443,9 @@ __create_context(struct drm_i915_private *i915)
for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
+ ctx->jump_whitelist = NULL;
+ ctx->jump_whitelist_cmds = 0;
+
return ctx;
err_free:
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
index 260d59cc3de8..00537b9d7006 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -192,6 +192,13 @@ struct i915_gem_context {
* per vm, which may be one per context or shared with the global GTT)
*/
struct radix_tree_root handles_vma;
+
+ /** jump_whitelist: Bit array for tracking cmds during cmdparsing
+ * Guarded by struct_mutex
+ */
+ unsigned long *jump_whitelist;
+ /** jump_whitelist_cmds: No of cmd slots available */
+ u32 jump_whitelist_cmds;
};
#endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b5f6937369ea..e635e1e5f4d3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -296,7 +296,9 @@ static inline u64 gen8_noncanonical_addr(u64 address)
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
{
- return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len;
+ return intel_engine_requires_cmd_parser(eb->engine) ||
+ (intel_engine_using_cmd_parser(eb->engine) &&
+ eb->args->batch_len);
}
static int eb_create(struct i915_execbuffer *eb)
@@ -1955,40 +1957,94 @@ static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
return 0;
}
-static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master)
+static struct i915_vma *
+shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *dev_priv = eb->i915;
+ struct i915_vma * const vma = *eb->vma;
+ struct i915_address_space *vm;
+ u64 flags;
+
+ /*
+ * PPGTT backed shadow buffers must be mapped RO, to prevent
+ * post-scan tampering
+ */
+ if (CMDPARSER_USES_GGTT(dev_priv)) {
+ flags = PIN_GLOBAL;
+ vm = &dev_priv->ggtt.vm;
+ } else if (vma->vm->has_read_only) {
+ flags = PIN_USER;
+ vm = vma->vm;
+ i915_gem_object_set_readonly(obj);
+ } else {
+ DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags);
+}
+
+static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
{
struct intel_engine_pool_node *pool;
struct i915_vma *vma;
+ u64 batch_start;
+ u64 shadow_batch_start;
int err;
pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len);
if (IS_ERR(pool))
return ERR_CAST(pool);
- err = intel_engine_cmd_parser(eb->engine,
+ vma = shadow_batch_pin(eb, pool->obj);
+ if (IS_ERR(vma))
+ goto err;
+
+ batch_start = gen8_canonical_addr(eb->batch->node.start) +
+ eb->batch_start_offset;
+
+ shadow_batch_start = gen8_canonical_addr(vma->node.start);
+
+ err = intel_engine_cmd_parser(eb->gem_context,
+ eb->engine,
eb->batch->obj,
- pool->obj,
+ batch_start,
eb->batch_start_offset,
eb->batch_len,
- is_master);
+ pool->obj,
+ shadow_batch_start);
+
if (err) {
- if (err == -EACCES) /* unhandled chained batch */
+ i915_vma_unpin(vma);
+
+ /*
+ * Unsafe GGTT-backed buffers can still be submitted safely
+ * as non-secure.
+ * For PPGTT backing however, we have no choice but to forcibly
+ * reject unsafe buffers
+ */
+ if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES))
+ /* Execute original buffer non-secure */
vma = NULL;
else
vma = ERR_PTR(err);
goto err;
}
- vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0);
- if (IS_ERR(vma))
- goto err;
-
eb->vma[eb->buffer_count] = i915_vma_get(vma);
eb->flags[eb->buffer_count] =
__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
vma->exec_flags = &eb->flags[eb->buffer_count];
eb->buffer_count++;
+ eb->batch_start_offset = 0;
+ eb->batch = vma;
+
+ if (CMDPARSER_USES_GGTT(eb->i915))
+ eb->batch_flags |= I915_DISPATCH_SECURE;
+
+ /* eb->batch_len unchanged */
+
vma->private = pool;
return vma;
@@ -2421,6 +2477,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_i915_gem_exec_object2 *exec,
struct drm_syncobj **fences)
{
+ struct drm_i915_private *i915 = to_i915(dev);
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
struct dma_fence *exec_fence = NULL;
@@ -2432,7 +2489,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
~__EXEC_OBJECT_UNKNOWN_FLAGS);
- eb.i915 = to_i915(dev);
+ eb.i915 = i915;
eb.file = file;
eb.args = args;
if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
@@ -2452,8 +2509,15 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb.batch_flags = 0;
if (args->flags & I915_EXEC_SECURE) {
+ if (INTEL_GEN(i915) >= 11)
+ return -ENODEV;
+
+ /* Return -EPERM to trigger fallback code on old binaries. */
+ if (!HAS_SECURE_BATCHES(i915))
+ return -EPERM;
+
if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ return -EPERM;
eb.batch_flags |= I915_DISPATCH_SECURE;
}
@@ -2530,34 +2594,19 @@ i915_gem_do_execbuffer(struct drm_device *dev,
goto err_vma;
}
+ if (eb.batch_len == 0)
+ eb.batch_len = eb.batch->size - eb.batch_start_offset;
+
if (eb_use_cmdparser(&eb)) {
struct i915_vma *vma;
- vma = eb_parse(&eb, drm_is_current_master(file));
+ vma = eb_parse(&eb);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_vma;
}
-
- if (vma) {
- /*
- * Batch parsed and accepted:
- *
- * Set the DISPATCH_SECURE bit to remove the NON_SECURE
- * bit from MI_BATCH_BUFFER_START commands issued in
- * the dispatch_execbuffer implementations. We
- * specifically don't want that set on batches the
- * command parser has accepted.
- */
- eb.batch_flags |= I915_DISPATCH_SECURE;
- eb.batch_start_offset = 0;
- eb.batch = vma;
- }
}
- if (eb.batch_len == 0)
- eb.batch_len = eb.batch->size - eb.batch_start_offset;
-
/*
* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 261c9bd83f51..05289edbafe3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -245,11 +245,9 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
wakeref = intel_runtime_pm_get(rpm);
- srcu = intel_gt_reset_trylock(ggtt->vm.gt);
- if (srcu < 0) {
- ret = srcu;
+ ret = intel_gt_reset_trylock(ggtt->vm.gt, &srcu);
+ if (ret)
goto err_rpm;
- }
ret = i915_mutex_lock_interruptible(dev);
if (ret)
@@ -318,7 +316,11 @@ vm_fault_t i915_gem_fault(struct vm_fault *vmf)
intel_wakeref_auto(&i915->ggtt.userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
- i915_vma_set_ggtt_write(vma);
+ if (write) {
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ i915_vma_set_ggtt_write(vma);
+ obj->mm.dirty = true;
+ }
err_fence:
i915_vma_unpin_fence(vma);
@@ -362,6 +364,7 @@ err:
return VM_FAULT_OOM;
case -ENOSPC:
case -EFAULT:
+ case -ENODEV: /* bad object, how did you get here! */
return VM_FAULT_SIGBUS;
default:
WARN_ONCE(ret, "unhandled error in %s: %i\n", __func__, ret);
@@ -473,10 +476,16 @@ i915_gem_mmap_gtt(struct drm_file *file,
if (!obj)
return -ENOENT;
+ if (i915_gem_object_never_bind_ggtt(obj)) {
+ ret = -ENODEV;
+ goto out;
+ }
+
ret = create_mmap_offset(obj);
if (ret == 0)
*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
+out:
i915_gem_object_put(obj);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 5efb9936e05b..ddf3605bea8e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -153,6 +153,12 @@ i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
}
static inline bool
+i915_gem_object_never_bind_ggtt(const struct drm_i915_gem_object *obj)
+{
+ return obj->ops->flags & I915_GEM_OBJECT_NO_GGTT;
+}
+
+static inline bool
i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
{
return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index ede0eb4218a8..646859fea224 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -32,7 +32,8 @@ struct drm_i915_gem_object_ops {
#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
#define I915_GEM_OBJECT_IS_PROXY BIT(2)
-#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(3)
+#define I915_GEM_OBJECT_NO_GGTT BIT(3)
+#define I915_GEM_OBJECT_ASYNC_CANCEL BIT(4)
/* Interface between the GEM object and its backing storage.
* get_pages() is called once prior to the use of the associated set
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 92e53c25424c..ad2a63dbcac2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -241,9 +241,6 @@ void i915_gem_resume(struct drm_i915_private *i915)
mutex_lock(&i915->drm.struct_mutex);
intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
- i915_gem_restore_gtt_mappings(i915);
- i915_gem_restore_fences(i915);
-
if (i915_gem_init_hw(i915))
goto err_wedged;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 11b231c187c5..abfbac49b8e8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -671,8 +671,28 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
obj->mm.dirty = false;
for_each_sgt_page(page, sgt_iter, pages) {
- if (obj->mm.dirty)
+ if (obj->mm.dirty && trylock_page(page)) {
+ /*
+ * As this may not be anonymous memory (e.g. shmem)
+ * but exist on a real mapping, we have to lock
+ * the page in order to dirty it -- holding
+ * the page reference is not sufficient to
+ * prevent the inode from being truncated.
+ * Play safe and take the lock.
+ *
+ * However...!
+ *
+ * The mmu-notifier can be invalidated for a
+ * migrate_page, that is alreadying holding the lock
+ * on the page. Such a try_to_unmap() will result
+ * in us calling put_pages() and so recursively try
+ * to lock the page. We avoid that deadlock with
+ * a trylock_page() and in exchange we risk missing
+ * some page dirtying.
+ */
set_page_dirty(page);
+ unlock_page(page);
+ }
mark_page_accessed(page);
put_page(page);
@@ -702,6 +722,7 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE |
I915_GEM_OBJECT_IS_SHRINKABLE |
+ I915_GEM_OBJECT_NO_GGTT |
I915_GEM_OBJECT_ASYNC_CANCEL,
.get_pages = i915_gem_userptr_get_pages,
.put_pages = i915_gem_userptr_put_pages,
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index d3c6993f4f46..22aab8593abf 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -136,6 +136,20 @@ execlists_active(const struct intel_engine_execlists *execlists)
return READ_ONCE(*execlists->active);
}
+static inline void
+execlists_active_lock_bh(struct intel_engine_execlists *execlists)
+{
+ local_bh_disable(); /* prevent local softirq and lock recursion */
+ tasklet_lock(&execlists->tasklet);
+}
+
+static inline void
+execlists_active_unlock_bh(struct intel_engine_execlists *execlists)
+{
+ tasklet_unlock(&execlists->tasklet);
+ local_bh_enable(); /* restore softirq, and kick ksoftirqd! */
+}
+
struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 82630db0394b..4ce8626b140e 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -1197,9 +1197,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
struct drm_printer *m)
{
struct drm_i915_private *dev_priv = engine->i915;
- const struct intel_engine_execlists * const execlists =
- &engine->execlists;
- unsigned long flags;
+ struct intel_engine_execlists * const execlists = &engine->execlists;
u64 addr;
if (engine->id == RENDER_CLASS && IS_GEN_RANGE(dev_priv, 4, 7))
@@ -1281,7 +1279,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
idx, hws[idx * 2], hws[idx * 2 + 1]);
}
- spin_lock_irqsave(&engine->active.lock, flags);
+ execlists_active_lock_bh(execlists);
for (port = execlists->active; (rq = *port); port++) {
char hdr[80];
int len;
@@ -1309,7 +1307,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
hwsp_seqno(rq));
print_request(m, rq, hdr);
}
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ execlists_active_unlock_bh(execlists);
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
ENGINE_READ(engine, RING_PP_DIR_BASE));
@@ -1440,8 +1438,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
if (!intel_engine_supports_stats(engine))
return -ENODEV;
- spin_lock_irqsave(&engine->active.lock, flags);
- write_seqlock(&engine->stats.lock);
+ execlists_active_lock_bh(execlists);
+ write_seqlock_irqsave(&engine->stats.lock, flags);
if (unlikely(engine->stats.enabled == ~0)) {
err = -EBUSY;
@@ -1469,8 +1467,8 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
}
unlock:
- write_sequnlock(&engine->stats.lock);
- spin_unlock_irqrestore(&engine->active.lock, flags);
+ write_sequnlock_irqrestore(&engine->stats.lock, flags);
+ execlists_active_unlock_bh(execlists);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 4cd54c569911..379a91780bd4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -103,6 +103,8 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return ERR_CAST(obj);
}
+ i915_gem_object_set_readonly(obj);
+
node->obj = obj;
return node;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index a82cea95c2f2..9dd8c299cb2d 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -475,12 +475,13 @@ struct intel_engine_cs {
struct intel_engine_hangcheck hangcheck;
-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_USING_CMD_PARSER BIT(0)
#define I915_ENGINE_SUPPORTS_STATS BIT(1)
#define I915_ENGINE_HAS_PREEMPTION BIT(2)
#define I915_ENGINE_HAS_SEMAPHORES BIT(3)
#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
#define I915_ENGINE_IS_VIRTUAL BIT(5)
+#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7)
unsigned int flags;
/*
@@ -541,9 +542,15 @@ struct intel_engine_cs {
};
static inline bool
-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine)
+intel_engine_using_cmd_parser(const struct intel_engine_cs *engine)
{
- return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+ return engine->flags & I915_ENGINE_USING_CMD_PARSER;
+}
+
+static inline bool
+intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine)
+{
+ return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER;
}
static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index 1363e069ec83..fac75afed35b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -38,6 +38,9 @@ static int __gt_unpark(struct intel_wakeref *wf)
gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
GEM_BUG_ON(!gt->awake);
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
+
intel_enable_gt_powersave(i915);
i915_update_gfx_val(i915);
@@ -67,6 +70,11 @@ static int __gt_park(struct intel_wakeref *wf)
if (INTEL_GEN(i915) >= 6)
gen6_rps_idle(i915);
+ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) {
+ i915_rc6_ctx_wa_check(i915);
+ intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
+ }
+
/* Everything switched off, flush any residual interrupt just in case */
intel_synchronize_irq(i915);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index d42584439f51..06a506c29463 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -234,6 +234,13 @@ static void execlists_init_reg_state(u32 *reg_state,
struct intel_engine_cs *engine,
struct intel_ring *ring);
+static void mark_eio(struct i915_request *rq)
+{
+ if (!i915_request_signaled(rq))
+ dma_fence_set_error(&rq->fence, -EIO);
+ i915_request_mark_complete(rq);
+}
+
static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine)
{
return (i915_ggtt_offset(engine->status_page.vma) +
@@ -631,7 +638,6 @@ execlists_schedule_out(struct i915_request *rq)
struct intel_engine_cs *cur, *old;
trace_i915_request_out(rq);
- GEM_BUG_ON(intel_context_inflight(ce) != rq->engine);
old = READ_ONCE(ce->inflight);
do
@@ -797,6 +803,17 @@ static bool can_merge_rq(const struct i915_request *prev,
GEM_BUG_ON(prev == next);
GEM_BUG_ON(!assert_priority_queue(prev, next));
+ /*
+ * We do not submit known completed requests. Therefore if the next
+ * request is already completed, we can pretend to merge it in
+ * with the previous context (and we will skip updating the ELSP
+ * and tracking). Thus hopefully keeping the ELSP full with active
+ * contexts, despite the best efforts of preempt-to-busy to confuse
+ * us.
+ */
+ if (i915_request_completed(next))
+ return true;
+
if (!can_merge_ctx(prev->hw_context, next->hw_context))
return false;
@@ -893,7 +910,7 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
static struct i915_request *
last_active(const struct intel_engine_execlists *execlists)
{
- struct i915_request * const *last = execlists->active;
+ struct i915_request * const *last = READ_ONCE(execlists->active);
while (*last && i915_request_completed(*last))
last++;
@@ -1172,21 +1189,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
continue;
}
- if (i915_request_completed(rq)) {
- ve->request = NULL;
- ve->base.execlists.queue_priority_hint = INT_MIN;
- rb_erase_cached(rb, &execlists->virtual);
- RB_CLEAR_NODE(rb);
-
- rq->engine = engine;
- __i915_request_submit(rq);
-
- spin_unlock(&ve->base.active.lock);
-
- rb = rb_first_cached(&execlists->virtual);
- continue;
- }
-
if (last && !can_merge_rq(last, rq)) {
spin_unlock(&ve->base.active.lock);
return; /* leave this for another */
@@ -1237,11 +1239,24 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
GEM_BUG_ON(ve->siblings[0] != engine);
}
- __i915_request_submit(rq);
- if (!i915_request_completed(rq)) {
+ if (__i915_request_submit(rq)) {
submit = true;
last = rq;
}
+ i915_request_put(rq);
+
+ /*
+ * Hmm, we have a bunch of virtual engine requests,
+ * but the first one was already completed (thanks
+ * preempt-to-busy!). Keep looking at the veng queue
+ * until we have no more relevant requests (i.e.
+ * the normal submit queue has higher priority).
+ */
+ if (!submit) {
+ spin_unlock(&ve->base.active.lock);
+ rb = rb_first_cached(&execlists->virtual);
+ continue;
+ }
}
spin_unlock(&ve->base.active.lock);
@@ -1254,8 +1269,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- if (i915_request_completed(rq))
- goto skip;
+ bool merge = true;
/*
* Can we combine this request with the current port?
@@ -1296,14 +1310,23 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
ctx_single_port_submission(rq->hw_context))
goto done;
- *port = execlists_schedule_in(last, port - execlists->pending);
- port++;
+ merge = false;
}
- last = rq;
- submit = true;
-skip:
- __i915_request_submit(rq);
+ if (__i915_request_submit(rq)) {
+ if (!merge) {
+ *port = execlists_schedule_in(last, port - execlists->pending);
+ port++;
+ last = NULL;
+ }
+
+ GEM_BUG_ON(last &&
+ !can_merge_ctx(last->hw_context,
+ rq->hw_context));
+
+ submit = true;
+ last = rq;
+ }
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -1593,8 +1616,11 @@ static void process_csb(struct intel_engine_cs *engine)
static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
{
lockdep_assert_held(&engine->active.lock);
- if (!engine->execlists.pending[0])
+ if (!engine->execlists.pending[0]) {
+ rcu_read_lock(); /* protect peeking at execlists->active */
execlists_dequeue(engine);
+ rcu_read_unlock();
+ }
}
/*
@@ -2399,10 +2425,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
static struct i915_request *active_request(struct i915_request *rq)
{
- const struct list_head * const list = &rq->timeline->requests;
const struct intel_context * const ce = rq->hw_context;
struct i915_request *active = NULL;
+ struct list_head *list;
+ if (!i915_request_is_active(rq)) /* unwound, but incomplete! */
+ return rq;
+
+ list = &rq->timeline->requests;
list_for_each_entry_from_reverse(rq, list, link) {
if (i915_request_completed(rq))
break;
@@ -2552,12 +2582,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
__execlists_reset(engine, true);
/* Mark all executing requests as skipped. */
- list_for_each_entry(rq, &engine->active.requests, sched.link) {
- if (!i915_request_signaled(rq))
- dma_fence_set_error(&rq->fence, -EIO);
-
- i915_request_mark_complete(rq);
- }
+ list_for_each_entry(rq, &engine->active.requests, sched.link)
+ mark_eio(rq);
/* Flush the queued requests to the timeline list (for retiring). */
while ((rb = rb_first_cached(&execlists->queue))) {
@@ -2565,10 +2591,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
int i;
priolist_for_each_request_consume(rq, rn, p, i) {
- list_del_init(&rq->sched.link);
+ mark_eio(rq);
__i915_request_submit(rq);
- dma_fence_set_error(&rq->fence, -EIO);
- i915_request_mark_complete(rq);
}
rb_erase_cached(&p->node, &execlists->queue);
@@ -2584,13 +2608,15 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
RB_CLEAR_NODE(rb);
spin_lock(&ve->base.active.lock);
- if (ve->request) {
- ve->request->engine = engine;
- __i915_request_submit(ve->request);
- dma_fence_set_error(&ve->request->fence, -EIO);
- i915_request_mark_complete(ve->request);
+ rq = fetch_and_zero(&ve->request);
+ if (rq) {
+ mark_eio(rq);
+
+ rq->engine = engine;
+ __i915_request_submit(rq);
+ i915_request_put(rq);
+
ve->base.execlists.queue_priority_hint = INT_MIN;
- ve->request = NULL;
}
spin_unlock(&ve->base.active.lock);
}
@@ -3594,6 +3620,8 @@ submit_engine:
static void virtual_submit_request(struct i915_request *rq)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ struct i915_request *old;
+ unsigned long flags;
GEM_TRACE("%s: rq=%llx:%lld\n",
ve->base.name,
@@ -3602,15 +3630,31 @@ static void virtual_submit_request(struct i915_request *rq)
GEM_BUG_ON(ve->base.submit_request != virtual_submit_request);
- GEM_BUG_ON(ve->request);
- GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+ spin_lock_irqsave(&ve->base.active.lock, flags);
+
+ old = ve->request;
+ if (old) { /* background completion event from preempt-to-busy */
+ GEM_BUG_ON(!i915_request_completed(old));
+ __i915_request_submit(old);
+ i915_request_put(old);
+ }
+
+ if (i915_request_completed(rq)) {
+ __i915_request_submit(rq);
+
+ ve->base.execlists.queue_priority_hint = INT_MIN;
+ ve->request = NULL;
+ } else {
+ ve->base.execlists.queue_priority_hint = rq_prio(rq);
+ ve->request = i915_request_get(rq);
- ve->base.execlists.queue_priority_hint = rq_prio(rq);
- WRITE_ONCE(ve->request, rq);
+ GEM_BUG_ON(!list_empty(virtual_queue(ve)));
+ list_move_tail(&rq->sched.link, virtual_queue(ve));
- list_move_tail(&rq->sched.link, virtual_queue(ve));
+ tasklet_schedule(&ve->base.execlists.tasklet);
+ }
- tasklet_schedule(&ve->base.execlists.tasklet);
+ spin_unlock_irqrestore(&ve->base.active.lock, flags);
}
static struct ve_bond *
@@ -3631,18 +3675,22 @@ static void
virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal)
{
struct virtual_engine *ve = to_virtual_engine(rq->engine);
+ intel_engine_mask_t allowed, exec;
struct ve_bond *bond;
+ allowed = ~to_request(signal)->engine->mask;
+
bond = virtual_find_bond(ve, to_request(signal)->engine);
- if (bond) {
- intel_engine_mask_t old, new, cmp;
+ if (bond)
+ allowed &= bond->sibling_mask;
- cmp = READ_ONCE(rq->execution_mask);
- do {
- old = cmp;
- new = cmp & bond->sibling_mask;
- } while ((cmp = cmpxchg(&rq->execution_mask, old, new)) != old);
- }
+ /* Restrict the bonded request to run on only the available engines */
+ exec = READ_ONCE(rq->execution_mask);
+ while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed))
+ ;
+
+ /* Prevent the master from being re-run on the bonded engines */
+ to_request(signal)->execution_mask &= ~allowed;
}
struct intel_context *
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 728704bbbe18..cea184a7dde9 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -199,14 +199,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
MOCS_ENTRY(15, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
L3_3_WB), \
- /* Bypass LLC - Uncached (EHL+) */ \
- MOCS_ENTRY(16, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_1_UC), \
- /* Bypass LLC - L3 (Read-Only) (EHL+) */ \
- MOCS_ENTRY(17, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_3_WB), \
/* Self-Snoop - L3 + LLC */ \
MOCS_ENTRY(18, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
@@ -270,7 +262,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
L3_1_UC),
/* HW Special Case (Displayable) */
MOCS_ENTRY(61,
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1),
+ LE_1_UC | LE_TC_1_LLC,
L3_3_WB),
};
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b9d84d52e986..8cea42379dd7 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -42,11 +42,10 @@ static void engine_skip_context(struct i915_request *rq)
struct intel_engine_cs *engine = rq->engine;
struct i915_gem_context *hung_ctx = rq->gem_context;
- lockdep_assert_held(&engine->active.lock);
-
if (!i915_request_is_active(rq))
return;
+ lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
if (rq->gem_context == hung_ctx)
i915_request_skip(rq, -EIO);
@@ -123,7 +122,6 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
rq->fence.seqno,
yesno(guilty));
- lockdep_assert_held(&rq->engine->active.lock);
GEM_BUG_ON(i915_request_completed(rq));
if (guilty) {
@@ -1214,10 +1212,8 @@ out:
intel_runtime_pm_put(&gt->i915->runtime_pm, wakeref);
}
-int intel_gt_reset_trylock(struct intel_gt *gt)
+int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu)
{
- int srcu;
-
might_lock(&gt->reset.backoff_srcu);
might_sleep();
@@ -1232,10 +1228,10 @@ int intel_gt_reset_trylock(struct intel_gt *gt)
rcu_read_lock();
}
- srcu = srcu_read_lock(&gt->reset.backoff_srcu);
+ *srcu = srcu_read_lock(&gt->reset.backoff_srcu);
rcu_read_unlock();
- return srcu;
+ return 0;
}
void intel_gt_reset_unlock(struct intel_gt *gt, int tag)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h
index 37a987b17108..52c00199e069 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.h
+++ b/drivers/gpu/drm/i915/gt/intel_reset.h
@@ -38,7 +38,7 @@ int intel_engine_reset(struct intel_engine_cs *engine,
void __i915_request_reset(struct i915_request *rq, bool guilty);
-int __must_check intel_gt_reset_trylock(struct intel_gt *gt);
+int __must_check intel_gt_reset_trylock(struct intel_gt *gt, int *srcu);
void intel_gt_reset_unlock(struct intel_gt *gt, int tag);
void intel_gt_set_wedged(struct intel_gt *gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 601c16239fdf..bacaa7bb8c9a 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -1573,7 +1573,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
struct intel_engine_cs *engine = rq->engine;
enum intel_engine_id id;
const int num_engines =
- IS_HSW_GT1(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
+ IS_HASWELL(i915) ? RUNTIME_INFO(i915)->num_engines - 1 : 0;
bool force_restore = false;
int len;
u32 *cs;
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 45481eb1fa3c..5f6ec2fd29a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -1063,6 +1063,9 @@ static void gen9_whitelist_build(struct i915_wa_list *w)
/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
whitelist_reg(w, GEN8_HDC_CHICKEN1);
+
+ /* WaSendPushConstantsFromMMIO:skl,bxt */
+ whitelist_reg(w, COMMON_SLICE_CHICKEN2);
}
static void skl_whitelist_build(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 13044c027f27..4bfaefdf548d 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -498,8 +498,6 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
goto out_free_gem;
}
- i915_gem_object_put(obj);
-
ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR);
if (ret < 0) {
gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret);
@@ -524,6 +522,8 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
file_count(dmabuf->file),
kref_read(&obj->base.refcount));
+ i915_gem_object_put(obj);
+
return dmabuf_fd;
out_free_dmabuf:
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 24555102e198..f24096e27bef 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -53,13 +53,11 @@
* granting userspace undue privileges. There are three categories of privilege.
*
* First, commands which are explicitly defined as privileged or which should
- * only be used by the kernel driver. The parser generally rejects such
- * commands, though it may allow some from the drm master process.
+ * only be used by the kernel driver. The parser rejects such commands
*
* Second, commands which access registers. To support correct/enhanced
* userspace functionality, particularly certain OpenGL extensions, the parser
- * provides a whitelist of registers which userspace may safely access (for both
- * normal and drm master processes).
+ * provides a whitelist of registers which userspace may safely access
*
* Third, commands which access privileged memory (i.e. GGTT, HWS page, etc).
* The parser always rejects such commands.
@@ -84,9 +82,9 @@
* in the per-engine command tables.
*
* Other command table entries map fairly directly to high level categories
- * mentioned above: rejected, master-only, register whitelist. The parser
- * implements a number of checks, including the privileged memory checks, via a
- * general bitmasking mechanism.
+ * mentioned above: rejected, register whitelist. The parser implements a number
+ * of checks, including the privileged memory checks, via a general bitmasking
+ * mechanism.
*/
/*
@@ -104,8 +102,6 @@ struct drm_i915_cmd_descriptor {
* CMD_DESC_REJECT: The command is never allowed
* CMD_DESC_REGISTER: The command should be checked against the
* register whitelist for the appropriate ring
- * CMD_DESC_MASTER: The command is allowed if the submitting process
- * is the DRM master
*/
u32 flags;
#define CMD_DESC_FIXED (1<<0)
@@ -113,7 +109,6 @@ struct drm_i915_cmd_descriptor {
#define CMD_DESC_REJECT (1<<2)
#define CMD_DESC_REGISTER (1<<3)
#define CMD_DESC_BITMASK (1<<4)
-#define CMD_DESC_MASTER (1<<5)
/*
* The command's unique identification bits and the bitmask to get them.
@@ -194,7 +189,7 @@ struct drm_i915_cmd_table {
#define CMD(op, opm, f, lm, fl, ...) \
{ \
.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
- .cmd = { (op), ~0u << (opm) }, \
+ .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \
.length = { (lm) }, \
__VA_ARGS__ \
}
@@ -209,14 +204,13 @@ struct drm_i915_cmd_table {
#define R CMD_DESC_REJECT
#define W CMD_DESC_REGISTER
#define B CMD_DESC_BITMASK
-#define M CMD_DESC_MASTER
/* Command Mask Fixed Len Action
---------------------------------------------------------- */
-static const struct drm_i915_cmd_descriptor common_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = {
CMD( MI_NOOP, SMI, F, 1, S ),
CMD( MI_USER_INTERRUPT, SMI, F, 1, R ),
- CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ),
+ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ),
CMD( MI_ARB_CHECK, SMI, F, 1, S ),
CMD( MI_REPORT_HEAD, SMI, F, 1, S ),
CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
@@ -246,7 +240,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = {
CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ),
};
-static const struct drm_i915_cmd_descriptor render_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = {
CMD( MI_FLUSH, SMI, F, 1, S ),
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_PREDICATE, SMI, F, 1, S ),
@@ -313,7 +307,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_RS_CONTEXT, SMI, F, 1, S ),
- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W,
.reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ),
@@ -330,7 +324,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = {
CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ),
};
-static const struct drm_i915_cmd_descriptor video_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = {
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B,
@@ -374,7 +368,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = {
CMD( MFX_WAIT, SMFX, F, 1, S ),
};
-static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = {
CMD( MI_ARB_ON_OFF, SMI, F, 1, R ),
CMD( MI_SET_APPID, SMI, F, 1, S ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B,
@@ -412,7 +406,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = {
}}, ),
};
-static const struct drm_i915_cmd_descriptor blt_cmds[] = {
+static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = {
CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ),
CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B,
.bits = {{
@@ -446,10 +440,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = {
};
static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ),
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
};
+/*
+ * For Gen9 we can still rely on the h/w to enforce cmd security, and only
+ * need to re-enforce the register access checks. We therefore only need to
+ * teach the cmdparser how to find the end of each command, and identify
+ * register accesses. The table doesn't need to reject any commands, and so
+ * the only commands listed here are:
+ * 1) Those that touch registers
+ * 2) Those that do not have the default 8-bit length
+ *
+ * Note that the default MI length mask chosen for this table is 0xFF, not
+ * the 0x3F used on older devices. This is because the vast majority of MI
+ * cmds on Gen9 use a standard 8-bit Length field.
+ * All the Gen9 blitter instructions are standard 0xFF length mask, and
+ * none allow access to non-general registers, so in fact no BLT cmds are
+ * included in the table at all.
+ *
+ */
+static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = {
+ CMD( MI_NOOP, SMI, F, 1, S ),
+ CMD( MI_USER_INTERRUPT, SMI, F, 1, S ),
+ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ),
+ CMD( MI_FLUSH, SMI, F, 1, S ),
+ CMD( MI_ARB_CHECK, SMI, F, 1, S ),
+ CMD( MI_REPORT_HEAD, SMI, F, 1, S ),
+ CMD( MI_ARB_ON_OFF, SMI, F, 1, S ),
+ CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ),
+ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ),
+ CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ),
+ CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ),
+ CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ),
+ CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ),
+ CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC } ),
+ CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ),
+ CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC } ),
+ CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W,
+ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ),
+
+ /*
+ * We allow BB_START but apply further checks. We just sanitize the
+ * basic fields here.
+ */
+#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0)
+#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1)
+ CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B,
+ .bits = {{
+ .offset = 0,
+ .mask = MI_BB_START_OPERAND_MASK,
+ .expected = MI_BB_START_OPERAND_EXPECT,
+ }}, ),
+};
+
static const struct drm_i915_cmd_descriptor noop_desc =
CMD(MI_NOOP, SMI, F, 1, S);
@@ -463,40 +511,44 @@ static const struct drm_i915_cmd_descriptor noop_desc =
#undef R
#undef W
#undef B
-#undef M
-static const struct drm_i915_cmd_table gen7_render_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table gen7_render_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
};
-static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { render_cmds, ARRAY_SIZE(render_cmds) },
+static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) },
{ hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) },
};
-static const struct drm_i915_cmd_table gen7_video_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { video_cmds, ARRAY_SIZE(video_cmds) },
+static const struct drm_i915_cmd_table gen7_video_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) },
};
-static const struct drm_i915_cmd_table hsw_vebox_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { vecs_cmds, ARRAY_SIZE(vecs_cmds) },
+static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) },
};
-static const struct drm_i915_cmd_table gen7_blt_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
};
-static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = {
- { common_cmds, ARRAY_SIZE(common_cmds) },
- { blt_cmds, ARRAY_SIZE(blt_cmds) },
+static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = {
+ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) },
+ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) },
{ hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) },
};
+static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = {
+ { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) },
+};
+
+
/*
* Register whitelists, sorted by increasing register offset.
*/
@@ -612,17 +664,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = {
REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
};
-static const struct drm_i915_reg_descriptor ivb_master_regs[] = {
- REG32(FORCEWAKE_MT),
- REG32(DERRMR),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)),
- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)),
-};
-
-static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
- REG32(FORCEWAKE_MT),
- REG32(DERRMR),
+static const struct drm_i915_reg_descriptor gen9_blt_regs[] = {
+ REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE),
+ REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE),
+ REG32(BCS_SWCTRL),
+ REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE),
+ REG64_IDX(BCS_GPR, 0),
+ REG64_IDX(BCS_GPR, 1),
+ REG64_IDX(BCS_GPR, 2),
+ REG64_IDX(BCS_GPR, 3),
+ REG64_IDX(BCS_GPR, 4),
+ REG64_IDX(BCS_GPR, 5),
+ REG64_IDX(BCS_GPR, 6),
+ REG64_IDX(BCS_GPR, 7),
+ REG64_IDX(BCS_GPR, 8),
+ REG64_IDX(BCS_GPR, 9),
+ REG64_IDX(BCS_GPR, 10),
+ REG64_IDX(BCS_GPR, 11),
+ REG64_IDX(BCS_GPR, 12),
+ REG64_IDX(BCS_GPR, 13),
+ REG64_IDX(BCS_GPR, 14),
+ REG64_IDX(BCS_GPR, 15),
};
#undef REG64
@@ -631,28 +693,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = {
struct drm_i915_reg_table {
const struct drm_i915_reg_descriptor *regs;
int num_regs;
- bool master;
};
static const struct drm_i915_reg_table ivb_render_reg_tables[] = {
- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
+ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
};
static const struct drm_i915_reg_table ivb_blt_reg_tables[] = {
- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true },
+ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
};
static const struct drm_i915_reg_table hsw_render_reg_tables[] = {
- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false },
- { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false },
- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
+ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) },
+ { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) },
};
static const struct drm_i915_reg_table hsw_blt_reg_tables[] = {
- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false },
- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true },
+ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) },
+};
+
+static const struct drm_i915_reg_table gen9_blt_reg_tables[] = {
+ { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) },
};
static u32 gen7_render_get_cmd_length_mask(u32 cmd_header)
@@ -710,6 +771,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header)
return 0;
}
+static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header)
+{
+ u32 client = cmd_header >> INSTR_CLIENT_SHIFT;
+
+ if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT)
+ return 0xFF;
+
+ DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header);
+ return 0;
+}
+
static bool validate_cmds_sorted(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
int cmd_table_count)
@@ -867,18 +939,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
int cmd_table_count;
int ret;
- if (!IS_GEN(engine->i915, 7))
+ if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) &&
+ engine->class == COPY_ENGINE_CLASS))
return;
switch (engine->class) {
case RENDER_CLASS:
if (IS_HASWELL(engine->i915)) {
- cmd_tables = hsw_render_ring_cmds;
+ cmd_tables = hsw_render_ring_cmd_table;
cmd_table_count =
- ARRAY_SIZE(hsw_render_ring_cmds);
+ ARRAY_SIZE(hsw_render_ring_cmd_table);
} else {
- cmd_tables = gen7_render_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_render_cmds);
+ cmd_tables = gen7_render_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table);
}
if (IS_HASWELL(engine->i915)) {
@@ -888,36 +961,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
engine->reg_tables = ivb_render_reg_tables;
engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables);
}
-
engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask;
break;
case VIDEO_DECODE_CLASS:
- cmd_tables = gen7_video_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_video_cmds);
+ cmd_tables = gen7_video_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table);
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
case COPY_ENGINE_CLASS:
- if (IS_HASWELL(engine->i915)) {
- cmd_tables = hsw_blt_ring_cmds;
- cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds);
+ engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
+ if (IS_GEN(engine->i915, 9)) {
+ cmd_tables = gen9_blt_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table);
+ engine->get_cmd_length_mask =
+ gen9_blt_get_cmd_length_mask;
+
+ /* BCS Engine unsafe without parser */
+ engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER;
+ } else if (IS_HASWELL(engine->i915)) {
+ cmd_tables = hsw_blt_ring_cmd_table;
+ cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table);
} else {
- cmd_tables = gen7_blt_cmds;
- cmd_table_count = ARRAY_SIZE(gen7_blt_cmds);
+ cmd_tables = gen7_blt_cmd_table;
+ cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table);
}
- if (IS_HASWELL(engine->i915)) {
+ if (IS_GEN(engine->i915, 9)) {
+ engine->reg_tables = gen9_blt_reg_tables;
+ engine->reg_table_count =
+ ARRAY_SIZE(gen9_blt_reg_tables);
+ } else if (IS_HASWELL(engine->i915)) {
engine->reg_tables = hsw_blt_reg_tables;
engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables);
} else {
engine->reg_tables = ivb_blt_reg_tables;
engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables);
}
-
- engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask;
break;
case VIDEO_ENHANCEMENT_CLASS:
- cmd_tables = hsw_vebox_cmds;
- cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds);
+ cmd_tables = hsw_vebox_cmd_table;
+ cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table);
/* VECS can use the same length_mask function as VCS */
engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask;
break;
@@ -943,7 +1026,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
return;
}
- engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER;
+ engine->flags |= I915_ENGINE_USING_CMD_PARSER;
}
/**
@@ -955,7 +1038,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
*/
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine)
{
- if (!intel_engine_needs_cmd_parser(engine))
+ if (!intel_engine_using_cmd_parser(engine))
return;
fini_hash_table(engine);
@@ -1029,22 +1112,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
}
static const struct drm_i915_reg_descriptor *
-find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
+find_reg(const struct intel_engine_cs *engine, u32 addr)
{
const struct drm_i915_reg_table *table = engine->reg_tables;
+ const struct drm_i915_reg_descriptor *reg = NULL;
int count = engine->reg_table_count;
- for (; count > 0; ++table, --count) {
- if (!table->master || is_master) {
- const struct drm_i915_reg_descriptor *reg;
+ for (; !reg && (count > 0); ++table, --count)
+ reg = __find_reg(table->regs, table->num_regs, addr);
- reg = __find_reg(table->regs, table->num_regs, addr);
- if (reg != NULL)
- return reg;
- }
- }
-
- return NULL;
+ return reg;
}
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
@@ -1128,8 +1205,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
static bool check_cmd(const struct intel_engine_cs *engine,
const struct drm_i915_cmd_descriptor *desc,
- const u32 *cmd, u32 length,
- const bool is_master)
+ const u32 *cmd, u32 length)
{
if (desc->flags & CMD_DESC_SKIP)
return true;
@@ -1139,12 +1215,6 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return false;
}
- if ((desc->flags & CMD_DESC_MASTER) && !is_master) {
- DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n",
- *cmd);
- return false;
- }
-
if (desc->flags & CMD_DESC_REGISTER) {
/*
* Get the distance between individual register offset
@@ -1158,7 +1228,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
offset += step) {
const u32 reg_addr = cmd[offset] & desc->reg.mask;
const struct drm_i915_reg_descriptor *reg =
- find_reg(engine, is_master, reg_addr);
+ find_reg(engine, reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
@@ -1236,16 +1306,112 @@ static bool check_cmd(const struct intel_engine_cs *engine,
return true;
}
+static int check_bbstart(const struct i915_gem_context *ctx,
+ u32 *cmd, u32 offset, u32 length,
+ u32 batch_len,
+ u64 batch_start,
+ u64 shadow_batch_start)
+{
+ u64 jump_offset, jump_target;
+ u32 target_cmd_offset, target_cmd_index;
+
+ /* For igt compatibility on older platforms */
+ if (CMDPARSER_USES_GGTT(ctx->i915)) {
+ DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n");
+ return -EACCES;
+ }
+
+ if (length != 3) {
+ DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n",
+ length);
+ return -EINVAL;
+ }
+
+ jump_target = *(u64*)(cmd+1);
+ jump_offset = jump_target - batch_start;
+
+ /*
+ * Any underflow of jump_target is guaranteed to be outside the range
+ * of a u32, so >= test catches both too large and too small
+ */
+ if (jump_offset >= batch_len) {
+ DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n",
+ jump_target);
+ return -EINVAL;
+ }
+
+ /*
+ * This cannot overflow a u32 because we already checked jump_offset
+ * is within the BB, and the batch_len is a u32
+ */
+ target_cmd_offset = lower_32_bits(jump_offset);
+ target_cmd_index = target_cmd_offset / sizeof(u32);
+
+ *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset;
+
+ if (target_cmd_index == offset)
+ return 0;
+
+ if (ctx->jump_whitelist_cmds <= target_cmd_index) {
+ DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n");
+ return -EINVAL;
+ } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) {
+ DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
+ jump_target);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len)
+{
+ const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32));
+ const u32 exact_size = BITS_TO_LONGS(batch_cmds);
+ u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds));
+ unsigned long *next_whitelist;
+
+ if (CMDPARSER_USES_GGTT(ctx->i915))
+ return;
+
+ if (batch_cmds <= ctx->jump_whitelist_cmds) {
+ bitmap_zero(ctx->jump_whitelist, batch_cmds);
+ return;
+ }
+
+again:
+ next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL);
+ if (next_whitelist) {
+ kfree(ctx->jump_whitelist);
+ ctx->jump_whitelist = next_whitelist;
+ ctx->jump_whitelist_cmds =
+ next_size * BITS_PER_BYTE * sizeof(long);
+ return;
+ }
+
+ if (next_size > exact_size) {
+ next_size = exact_size;
+ goto again;
+ }
+
+ DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n");
+ bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds);
+
+ return;
+}
+
#define LENGTH_BIAS 2
/**
* i915_parse_cmds() - parse a submitted batch buffer for privilege violations
+ * @ctx: the context in which the batch is to execute
* @engine: the engine on which the batch is to execute
* @batch_obj: the batch buffer in question
- * @shadow_batch_obj: copy of the batch buffer in question
+ * @batch_start: Canonical base address of batch
* @batch_start_offset: byte offset in the batch at which execution starts
* @batch_len: length of the commands in batch_obj
- * @is_master: is the submitting process the drm master?
+ * @shadow_batch_obj: copy of the batch buffer in question
+ * @shadow_batch_start: Canonical base address of shadow_batch_obj
*
* Parses the specified batch buffer looking for privilege violations as
* described in the overview.
@@ -1253,14 +1419,17 @@ static bool check_cmd(const struct intel_engine_cs *engine,
* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
* if the batch appears legal but should use hardware parsing
*/
-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+
+int intel_engine_cmd_parser(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
- struct drm_i915_gem_object *shadow_batch_obj,
+ u64 batch_start,
u32 batch_start_offset,
u32 batch_len,
- bool is_master)
+ struct drm_i915_gem_object *shadow_batch_obj,
+ u64 shadow_batch_start)
{
- u32 *cmd, *batch_end;
+ u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool needs_clflush_after = false;
@@ -1274,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
return PTR_ERR(cmd);
}
+ init_whitelist(ctx, batch_len);
+
/*
* We use the batch length as size because the shadow object is as
* large or larger and copy_batch() will write MI_NOPs to the extra
@@ -1283,31 +1454,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
do {
u32 length;
- if (*cmd == MI_BATCH_BUFFER_END) {
- if (needs_clflush_after) {
- void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
- drm_clflush_virt_range(ptr,
- (void *)(cmd + 1) - ptr);
- }
+ if (*cmd == MI_BATCH_BUFFER_END)
break;
- }
desc = find_cmd(engine, *cmd, desc, &default_desc);
if (!desc) {
DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
*cmd);
ret = -EINVAL;
- break;
- }
-
- /*
- * If the batch buffer contains a chained batch, return an
- * error that tells the caller to abort and dispatch the
- * workload as a non-secure batch.
- */
- if (desc->cmd.value == MI_BATCH_BUFFER_START) {
- ret = -EACCES;
- break;
+ goto err;
}
if (desc->flags & CMD_DESC_FIXED)
@@ -1321,22 +1476,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
length,
batch_end - cmd);
ret = -EINVAL;
- break;
+ goto err;
}
- if (!check_cmd(engine, desc, cmd, length, is_master)) {
+ if (!check_cmd(engine, desc, cmd, length)) {
ret = -EACCES;
+ goto err;
+ }
+
+ if (desc->cmd.value == MI_BATCH_BUFFER_START) {
+ ret = check_bbstart(ctx, cmd, offset, length,
+ batch_len, batch_start,
+ shadow_batch_start);
+
+ if (ret)
+ goto err;
break;
}
+ if (ctx->jump_whitelist_cmds > offset)
+ set_bit(offset, ctx->jump_whitelist);
+
cmd += length;
+ offset += length;
if (cmd >= batch_end) {
DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n");
ret = -EINVAL;
- break;
+ goto err;
}
} while (1);
+ if (needs_clflush_after) {
+ void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
+
+ drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
+ }
+
+err:
i915_gem_object_unpin_map(shadow_batch_obj);
return ret;
}
@@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
/* If the command parser is not enabled, report 0 - unsupported */
for_each_uabi_engine(engine, dev_priv) {
- if (intel_engine_needs_cmd_parser(engine)) {
+ if (intel_engine_using_cmd_parser(engine)) {
active = true;
break;
}
@@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv)
* the parser enabled.
* 9. Don't whitelist or handle oacontrol specially, as ownership
* for oacontrol state is moving to i915-perf.
+ * 10. Support for Gen9 BCS Parsing
*/
- return 9;
+ return 10;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 020696726f9e..3d717e282908 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -364,9 +364,6 @@ static int i915_driver_modeset_probe(struct drm_device *dev)
if (ret)
goto cleanup_vga_client;
- /* must happen before intel_power_domains_init_hw() on VLV/CHV */
- intel_update_rawclk(dev_priv);
-
intel_power_domains_init_hw(dev_priv, false);
intel_csr_ucode_init(dev_priv);
@@ -1850,6 +1847,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation)
i915_gem_suspend_late(dev_priv);
+ i915_rc6_ctx_wa_suspend(dev_priv);
+
intel_uncore_suspend(&dev_priv->uncore);
intel_power_domains_suspend(dev_priv,
@@ -1924,6 +1923,11 @@ static int i915_drm_resume(struct drm_device *dev)
if (ret)
DRM_ERROR("failed to re-enable GGTT\n");
+ mutex_lock(&dev_priv->drm.struct_mutex);
+ i915_gem_restore_gtt_mappings(dev_priv);
+ i915_gem_restore_fences(dev_priv);
+ mutex_unlock(&dev_priv->drm.struct_mutex);
+
intel_csr_ucode_resume(dev_priv);
i915_restore_state(dev_priv);
@@ -2048,6 +2052,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
intel_power_domains_resume(dev_priv);
+ i915_rc6_ctx_wa_resume(dev_priv);
+
intel_gt_sanitize(&dev_priv->gt, true);
enable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 772154e4073e..89b6112bd66b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -593,6 +593,8 @@ struct intel_rps {
struct intel_rc6 {
bool enabled;
+ bool ctx_corrupted;
+ intel_wakeref_t ctx_corrupted_wakeref;
u64 prev_hw_residency[4];
u64 cur_residency[4];
};
@@ -1723,6 +1725,8 @@ struct drm_i915_private {
struct work_struct idle_work;
} gem;
+ u8 pch_ssc_use;
+
/* For i945gm vblank irq vs. C3 workaround */
struct {
struct work_struct work;
@@ -2073,9 +2077,16 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define VEBOX_MASK(dev_priv) \
ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS)
+/*
+ * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution
+ * All later gens can run the final buffer from the ppgtt
+ */
+#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7)
+
#define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc)
#define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop)
#define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb)
+#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6)
#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \
IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
@@ -2108,10 +2119,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
/* Early gen2 have a totally busted CS tlb and require pinned batches. */
#define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv))
+#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \
+ (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9))
+
/* WaRsDisableCoarsePowerGating:skl,cnl */
#define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \
- (IS_CANNONLAKE(dev_priv) || \
- IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv))
+ (IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9))
#define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4)
#define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \
@@ -2282,6 +2295,14 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
unsigned long flags);
#define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0)
+struct i915_vma * __must_check
+i915_gem_object_pin(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view,
+ u64 size,
+ u64 alignment,
+ u64 flags);
+
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
static inline int __must_check
@@ -2391,12 +2412,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
-int intel_engine_cmd_parser(struct intel_engine_cs *engine,
+int intel_engine_cmd_parser(struct i915_gem_context *cxt,
+ struct intel_engine_cs *engine,
struct drm_i915_gem_object *batch_obj,
- struct drm_i915_gem_object *shadow_batch_obj,
+ u64 user_batch_start,
u32 batch_start_offset,
u32 batch_len,
- bool is_master);
+ struct drm_i915_gem_object *shadow_batch_obj,
+ u64 shadow_batch_start);
/* intel_device_info.c */
static inline struct intel_device_info *
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 95e7c52cf8ed..98305d987ac1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -964,11 +964,28 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_address_space *vm = &dev_priv->ggtt.vm;
+
+ return i915_gem_object_pin(obj, vm, view, size, alignment,
+ flags | PIN_GLOBAL);
+}
+
+struct i915_vma *
+i915_gem_object_pin(struct drm_i915_gem_object *obj,
+ struct i915_address_space *vm,
+ const struct i915_ggtt_view *view,
+ u64 size,
+ u64 alignment,
+ u64 flags)
+{
+ struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
struct i915_vma *vma;
int ret;
lockdep_assert_held(&obj->base.dev->struct_mutex);
+ if (i915_gem_object_never_bind_ggtt(obj))
+ return ERR_PTR(-ENODEV);
+
if (flags & PIN_MAPPABLE &&
(!view || view->type == I915_GGTT_VIEW_NORMAL)) {
/* If the required space is larger than the available
@@ -1035,7 +1052,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
return ERR_PTR(ret);
}
- ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
+ ret = i915_vma_pin(vma, size, alignment, flags);
if (ret)
return ERR_PTR(ret);
diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h
index 167a7b56ed5b..6795f1daa3d5 100644
--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -77,6 +77,12 @@ struct drm_i915_private;
#define I915_GEM_IDLE_TIMEOUT (HZ / 5)
+static inline void tasklet_lock(struct tasklet_struct *t)
+{
+ while (!tasklet_trylock(t))
+ cpu_relax();
+}
+
static inline void __tasklet_disable_sync_once(struct tasklet_struct *t)
{
if (!atomic_fetch_inc(&t->count))
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 5d9101376a3d..9f1517af5b7f 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -62,7 +62,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES);
break;
case I915_PARAM_HAS_SECURE_BATCHES:
- value = capable(CAP_SYS_ADMIN);
+ value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN);
break;
case I915_PARAM_CMD_PARSER_VERSION:
value = i915_cmd_parser_get_version(i915);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 8e251e719390..212acaef581e 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -843,8 +843,8 @@ create_event_attributes(struct i915_pmu *pmu)
const char *name;
const char *unit;
} events[] = {
- __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"),
- __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"),
+ __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "M"),
+ __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"),
__event(I915_PMU_INTERRUPTS, "interrupts", NULL),
__event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"),
};
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 2abd199093c5..f8ee9aba3955 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -471,6 +471,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
#define ECOCHK_PPGTT_WT_HSW (0x2 << 3)
#define ECOCHK_PPGTT_WB_HSW (0x3 << 3)
+#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
+
#define GAC_ECO_BITS _MMIO(0x14090)
#define ECOBITS_SNB_BIT (1 << 13)
#define ECOBITS_PPGTT_CACHE64B (3 << 8)
@@ -555,6 +557,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
*/
#define BCS_SWCTRL _MMIO(0x22200)
+/* There are 16 GPR registers */
+#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8)
+#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4)
+
#define GPGPU_THREADS_DISPATCHED _MMIO(0x2290)
#define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4)
#define HS_INVOCATION_COUNT _MMIO(0x2300)
@@ -7211,6 +7217,10 @@ enum {
#define TGL_DMC_DEBUG_DC5_COUNT _MMIO(0x101084)
#define TGL_DMC_DEBUG_DC6_COUNT _MMIO(0x101088)
+/* Display Internal Timeout Register */
+#define RM_TIMEOUT _MMIO(0x42060)
+#define MMIO_TIMEOUT_US(us) ((us) << 0)
+
/* interrupts */
#define DE_MASTER_IRQ_CONTROL (1 << 31)
#define DE_SPRITEB_FLIP_DONE (1 << 29)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index a53777dd371c..1c5506822dc7 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -194,6 +194,27 @@ static void free_capture_list(struct i915_request *request)
}
}
+static void remove_from_engine(struct i915_request *rq)
+{
+ struct intel_engine_cs *engine, *locked;
+
+ /*
+ * Virtual engines complicate acquiring the engine timeline lock,
+ * as their rq->engine pointer is not stable until under that
+ * engine lock. The simple ploy we use is to take the lock then
+ * check that the rq still belongs to the newly locked engine.
+ */
+ locked = READ_ONCE(rq->engine);
+ spin_lock(&locked->active.lock);
+ while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
+ spin_unlock(&locked->active.lock);
+ spin_lock(&engine->active.lock);
+ locked = engine;
+ }
+ list_del(&rq->sched.link);
+ spin_unlock(&locked->active.lock);
+}
+
static bool i915_request_retire(struct i915_request *rq)
{
struct i915_active_request *active, *next;
@@ -259,9 +280,7 @@ static bool i915_request_retire(struct i915_request *rq)
* request that we have removed from the HW and put back on a run
* queue.
*/
- spin_lock(&rq->engine->active.lock);
- list_del(&rq->sched.link);
- spin_unlock(&rq->engine->active.lock);
+ remove_from_engine(rq);
spin_lock(&rq->lock);
i915_request_mark_complete(rq);
@@ -358,9 +377,10 @@ __i915_request_await_execution(struct i915_request *rq,
return 0;
}
-void __i915_request_submit(struct i915_request *request)
+bool __i915_request_submit(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
+ bool result = false;
GEM_TRACE("%s fence %llx:%lld, current %d\n",
engine->name,
@@ -370,6 +390,25 @@ void __i915_request_submit(struct i915_request *request)
GEM_BUG_ON(!irqs_disabled());
lockdep_assert_held(&engine->active.lock);
+ /*
+ * With the advent of preempt-to-busy, we frequently encounter
+ * requests that we have unsubmitted from HW, but left running
+ * until the next ack and so have completed in the meantime. On
+ * resubmission of that completed request, we can skip
+ * updating the payload, and execlists can even skip submitting
+ * the request.
+ *
+ * We must remove the request from the caller's priority queue,
+ * and the caller must only call us when the request is in their
+ * priority queue, under the active.lock. This ensures that the
+ * request has *not* yet been retired and we can safely move
+ * the request into the engine->active.list where it will be
+ * dropped upon retiring. (Otherwise if resubmit a *retired*
+ * request, this would be a horrible use-after-free.)
+ */
+ if (i915_request_completed(request))
+ goto xfer;
+
if (i915_gem_context_is_banned(request->gem_context))
i915_request_skip(request, -EIO);
@@ -393,13 +432,18 @@ void __i915_request_submit(struct i915_request *request)
i915_sw_fence_signaled(&request->semaphore))
engine->saturated |= request->sched.semaphores;
- /* We may be recursing from the signal callback of another i915 fence */
- spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+ engine->emit_fini_breadcrumb(request,
+ request->ring->vaddr + request->postfix);
- list_move_tail(&request->sched.link, &engine->active.requests);
+ trace_i915_request_execute(request);
+ engine->serial++;
+ result = true;
+
+xfer: /* We may be recursing from the signal callback of another i915 fence */
+ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
- GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
- set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
+ if (!test_and_set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags))
+ list_move_tail(&request->sched.link, &engine->active.requests);
if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) &&
@@ -410,12 +454,7 @@ void __i915_request_submit(struct i915_request *request)
spin_unlock(&request->lock);
- engine->emit_fini_breadcrumb(request,
- request->ring->vaddr + request->postfix);
-
- engine->serial++;
-
- trace_i915_request_execute(request);
+ return result;
}
void i915_request_submit(struct i915_request *request)
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 8ac6e1226a56..e4dd013761e8 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -292,7 +292,7 @@ int i915_request_await_execution(struct i915_request *rq,
void i915_request_add(struct i915_request *rq);
-void __i915_request_submit(struct i915_request *request);
+bool __i915_request_submit(struct i915_request *request);
void i915_request_submit(struct i915_request *request);
void i915_request_skip(struct i915_request *request, int error);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 7b84ebca2901..3eba8a2b39c2 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -177,9 +177,37 @@ static inline int rq_prio(const struct i915_request *rq)
return rq->sched.attr.priority | __NO_PREEMPTION;
}
-static void kick_submission(struct intel_engine_cs *engine, int prio)
+static inline bool need_preempt(int prio, int active)
{
- const struct i915_request *inflight = *engine->execlists.active;
+ /*
+ * Allow preemption of low -> normal -> high, but we do
+ * not allow low priority tasks to preempt other low priority
+ * tasks under the impression that latency for low priority
+ * tasks does not matter (as much as background throughput),
+ * so kiss.
+ */
+ return prio >= max(I915_PRIORITY_NORMAL, active);
+}
+
+static void kick_submission(struct intel_engine_cs *engine,
+ const struct i915_request *rq,
+ int prio)
+{
+ const struct i915_request *inflight;
+
+ /*
+ * We only need to kick the tasklet once for the high priority
+ * new context we add into the queue.
+ */
+ if (prio <= engine->execlists.queue_priority_hint)
+ return;
+
+ rcu_read_lock();
+
+ /* Nothing currently active? We're overdue for a submission! */
+ inflight = execlists_active(&engine->execlists);
+ if (!inflight)
+ goto unlock;
/*
* If we are already the currently executing context, don't
@@ -188,10 +216,15 @@ static void kick_submission(struct intel_engine_cs *engine, int prio)
* tasklet, i.e. we have not change the priority queue
* sufficiently to oust the running context.
*/
- if (!inflight || !i915_scheduler_need_preempt(prio, rq_prio(inflight)))
- return;
+ if (inflight->hw_context == rq->hw_context)
+ goto unlock;
- tasklet_hi_schedule(&engine->execlists.tasklet);
+ engine->execlists.queue_priority_hint = prio;
+ if (need_preempt(prio, rq_prio(inflight)))
+ tasklet_hi_schedule(&engine->execlists.tasklet);
+
+unlock:
+ rcu_read_unlock();
}
static void __i915_schedule(struct i915_sched_node *node,
@@ -317,13 +350,8 @@ static void __i915_schedule(struct i915_sched_node *node,
list_move_tail(&node->link, cache.priolist);
}
- if (prio <= engine->execlists.queue_priority_hint)
- continue;
-
- engine->execlists.queue_priority_hint = prio;
-
/* Defer (tasklet) submission until after all of our updates. */
- kick_submission(engine, prio);
+ kick_submission(engine, node_to_request(node), prio);
}
spin_unlock(&engine->active.lock);
diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c
index fa864d8f2b73..15f8bff141f9 100644
--- a/drivers/gpu/drm/i915/intel_pch.c
+++ b/drivers/gpu/drm/i915/intel_pch.c
@@ -69,6 +69,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id)
WARN_ON(!IS_CANNONLAKE(dev_priv) && !IS_COFFEELAKE(dev_priv));
return PCH_CNP;
case INTEL_PCH_CMP_DEVICE_ID_TYPE:
+ case INTEL_PCH_CMP2_DEVICE_ID_TYPE:
DRM_DEBUG_KMS("Found Comet Lake PCH (CMP)\n");
WARN_ON(!IS_COFFEELAKE(dev_priv));
/* CometPoint is CNP Compatible */
diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h
index e6a2d65f19c6..c29c81ec7971 100644
--- a/drivers/gpu/drm/i915/intel_pch.h
+++ b/drivers/gpu/drm/i915/intel_pch.h
@@ -41,6 +41,7 @@ enum intel_pch {
#define INTEL_PCH_CNP_DEVICE_ID_TYPE 0xA300
#define INTEL_PCH_CNP_LP_DEVICE_ID_TYPE 0x9D80
#define INTEL_PCH_CMP_DEVICE_ID_TYPE 0x0280
+#define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680
#define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480
#define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00
#define INTEL_PCH_MCC2_DEVICE_ID_TYPE 0x3880
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 75ee027abb80..2efe1d12d5a9 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -126,6 +126,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
*/
I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) |
PWM1_GATING_DIS | PWM2_GATING_DIS);
+
+ /*
+ * Lower the display internal timeout.
+ * This is needed to avoid any hard hangs when DSI port PLL
+ * is off and a MMIO access is attempted by any privilege
+ * application, using batch buffers or any other means.
+ */
+ I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950));
}
static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -8544,6 +8552,100 @@ static void intel_init_emon(struct drm_i915_private *dev_priv)
dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK);
}
+static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv)
+{
+ return !I915_READ(GEN8_RC6_CTX_INFO);
+}
+
+static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915)
+{
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return;
+
+ if (i915_rc6_ctx_corrupted(i915)) {
+ DRM_INFO("RC6 context corrupted, disabling runtime power management\n");
+ i915->gt_pm.rc6.ctx_corrupted = true;
+ i915->gt_pm.rc6.ctx_corrupted_wakeref =
+ intel_runtime_pm_get(&i915->runtime_pm);
+ }
+}
+
+static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915)
+{
+ if (i915->gt_pm.rc6.ctx_corrupted) {
+ intel_runtime_pm_put(&i915->runtime_pm,
+ i915->gt_pm.rc6.ctx_corrupted_wakeref);
+ i915->gt_pm.rc6.ctx_corrupted = false;
+ }
+}
+
+/**
+ * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA
+ * @i915: i915 device
+ *
+ * Perform any steps needed to clean up the RC6 CTX WA before system suspend.
+ */
+void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915)
+{
+ if (i915->gt_pm.rc6.ctx_corrupted)
+ intel_runtime_pm_put(&i915->runtime_pm,
+ i915->gt_pm.rc6.ctx_corrupted_wakeref);
+}
+
+/**
+ * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA
+ * @i915: i915 device
+ *
+ * Perform any steps needed to re-init the RC6 CTX WA after system resume.
+ */
+void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915)
+{
+ if (!i915->gt_pm.rc6.ctx_corrupted)
+ return;
+
+ if (i915_rc6_ctx_corrupted(i915)) {
+ i915->gt_pm.rc6.ctx_corrupted_wakeref =
+ intel_runtime_pm_get(&i915->runtime_pm);
+ return;
+ }
+
+ DRM_INFO("RC6 context restored, re-enabling runtime power management\n");
+ i915->gt_pm.rc6.ctx_corrupted = false;
+}
+
+static void intel_disable_rc6(struct drm_i915_private *dev_priv);
+
+/**
+ * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption
+ * @i915: i915 device
+ *
+ * Check if an RC6 CTX corruption has happened since the last check and if so
+ * disable RC6 and runtime power management.
+ *
+ * Return false if no context corruption has happened since the last call of
+ * this function, true otherwise.
+*/
+bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915)
+{
+ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915))
+ return false;
+
+ if (i915->gt_pm.rc6.ctx_corrupted)
+ return false;
+
+ if (!i915_rc6_ctx_corrupted(i915))
+ return false;
+
+ DRM_NOTE("RC6 context corruption, disabling runtime power management\n");
+
+ intel_disable_rc6(i915);
+ i915->gt_pm.rc6.ctx_corrupted = true;
+ i915->gt_pm.rc6.ctx_corrupted_wakeref =
+ intel_runtime_pm_get_noresume(&i915->runtime_pm);
+
+ return true;
+}
+
void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
{
struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -8557,6 +8659,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
pm_runtime_get(&dev_priv->drm.pdev->dev);
}
+ i915_rc6_ctx_wa_init(dev_priv);
+
/* Initialize RPS limits (for userspace) */
if (IS_CHERRYVIEW(dev_priv))
cherryview_init_gt_powersave(dev_priv);
@@ -8595,6 +8699,8 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
if (IS_VALLEYVIEW(dev_priv))
valleyview_cleanup_gt_powersave(dev_priv);
+ i915_rc6_ctx_wa_cleanup(dev_priv);
+
if (!HAS_RC6(dev_priv))
pm_runtime_put(&dev_priv->drm.pdev->dev);
}
@@ -8623,7 +8729,7 @@ static inline void intel_disable_llc_pstate(struct drm_i915_private *i915)
i915->gt_pm.llc_pstate.enabled = false;
}
-static void intel_disable_rc6(struct drm_i915_private *dev_priv)
+static void __intel_disable_rc6(struct drm_i915_private *dev_priv)
{
lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
@@ -8642,6 +8748,15 @@ static void intel_disable_rc6(struct drm_i915_private *dev_priv)
dev_priv->gt_pm.rc6.enabled = false;
}
+static void intel_disable_rc6(struct drm_i915_private *dev_priv)
+{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+ mutex_lock(&rps->lock);
+ __intel_disable_rc6(dev_priv);
+ mutex_unlock(&rps->lock);
+}
+
static void intel_disable_rps(struct drm_i915_private *dev_priv)
{
lockdep_assert_held(&dev_priv->gt_pm.rps.lock);
@@ -8667,7 +8782,7 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv)
{
mutex_lock(&dev_priv->gt_pm.rps.lock);
- intel_disable_rc6(dev_priv);
+ __intel_disable_rc6(dev_priv);
intel_disable_rps(dev_priv);
if (HAS_LLC(dev_priv))
intel_disable_llc_pstate(dev_priv);
@@ -8694,6 +8809,9 @@ static void intel_enable_rc6(struct drm_i915_private *dev_priv)
if (dev_priv->gt_pm.rc6.enabled)
return;
+ if (dev_priv->gt_pm.rc6.ctx_corrupted)
+ return;
+
if (IS_CHERRYVIEW(dev_priv))
cherryview_enable_rc6(dev_priv);
else if (IS_VALLEYVIEW(dev_priv))
diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h
index e3573e1e16e3..0f7390c850ec 100644
--- a/drivers/gpu/drm/i915/intel_pm.h
+++ b/drivers/gpu/drm/i915/intel_pm.h
@@ -36,6 +36,9 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv);
void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv);
void intel_enable_gt_powersave(struct drm_i915_private *dev_priv);
void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
+bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915);
+void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915);
+void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915);
void gen6_rps_busy(struct drm_i915_private *dev_priv);
void gen6_rps_idle(struct drm_i915_private *dev_priv);
void gen6_rps_boost(struct i915_request *rq);
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem.c b/drivers/gpu/drm/i915/selftests/i915_gem.c
index bb6dd54a6ff3..37593831b539 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem.c
@@ -118,6 +118,12 @@ static void pm_resume(struct drm_i915_private *i915)
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
intel_gt_sanitize(&i915->gt, false);
i915_gem_sanitize(i915);
+
+ mutex_lock(&i915->drm.struct_mutex);
+ i915_gem_restore_gtt_mappings(i915);
+ i915_gem_restore_fences(i915);
+ mutex_unlock(&i915->drm.struct_mutex);
+
i915_gem_resume(i915);
}
}
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 663ff9f4fac9..1e7b1be25bb0 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -26,6 +26,8 @@
#include "dsi_cfg.h"
#include "msm_kms.h"
+#define DSI_RESET_TOGGLE_DELAY_MS 20
+
static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor)
{
u32 ver;
@@ -986,7 +988,7 @@ static void dsi_sw_reset(struct msm_dsi_host *msm_host)
wmb(); /* clocks need to be enabled before reset */
dsi_write(msm_host, REG_DSI_RESET, 1);
- wmb(); /* make sure reset happen */
+ msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */
dsi_write(msm_host, REG_DSI_RESET, 0);
}
@@ -1396,7 +1398,7 @@ static void dsi_sw_reset_restore(struct msm_dsi_host *msm_host)
/* dsi controller can only be reset while clocks are running */
dsi_write(msm_host, REG_DSI_RESET, 1);
- wmb(); /* make sure reset happen */
+ msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */
dsi_write(msm_host, REG_DSI_RESET, 0);
wmb(); /* controller out of reset */
dsi_write(msm_host, REG_DSI_CTRL, data0);
diff --git a/drivers/gpu/drm/panel/panel-lg-lb035q02.c b/drivers/gpu/drm/panel/panel-lg-lb035q02.c
index fc82a525b071..ee4379729a5b 100644
--- a/drivers/gpu/drm/panel/panel-lg-lb035q02.c
+++ b/drivers/gpu/drm/panel/panel-lg-lb035q02.c
@@ -220,9 +220,17 @@ static const struct of_device_id lb035q02_of_match[] = {
MODULE_DEVICE_TABLE(of, lb035q02_of_match);
+static const struct spi_device_id lb035q02_ids[] = {
+ { "lb035q02", 0 },
+ { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, lb035q02_ids);
+
static struct spi_driver lb035q02_driver = {
.probe = lb035q02_probe,
.remove = lb035q02_remove,
+ .id_table = lb035q02_ids,
.driver = {
.name = "panel-lg-lb035q02",
.of_match_table = lb035q02_of_match,
@@ -231,7 +239,6 @@ static struct spi_driver lb035q02_driver = {
module_spi_driver(lb035q02_driver);
-MODULE_ALIAS("spi:lgphilips,lb035q02");
MODULE_AUTHOR("Tomi Valkeinen <tomi.valkeinen@ti.com>");
MODULE_DESCRIPTION("LG.Philips LB035Q02 LCD Panel driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
index 299b217c83e1..20f17e46e65d 100644
--- a/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
+++ b/drivers/gpu/drm/panel/panel-nec-nl8048hl11.c
@@ -230,9 +230,17 @@ static const struct of_device_id nl8048_of_match[] = {
MODULE_DEVICE_TABLE(of, nl8048_of_match);
+static const struct spi_device_id nl8048_ids[] = {
+ { "nl8048hl11", 0 },
+ { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, nl8048_ids);
+
static struct spi_driver nl8048_driver = {
.probe = nl8048_probe,
.remove = nl8048_remove,
+ .id_table = nl8048_ids,
.driver = {
.name = "panel-nec-nl8048hl11",
.pm = &nl8048_pm_ops,
@@ -242,7 +250,6 @@ static struct spi_driver nl8048_driver = {
module_spi_driver(nl8048_driver);
-MODULE_ALIAS("spi:nec,nl8048hl11");
MODULE_AUTHOR("Erik Gilling <konkers@android.com>");
MODULE_DESCRIPTION("NEC-NL8048HL11 Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-sony-acx565akm.c b/drivers/gpu/drm/panel/panel-sony-acx565akm.c
index 305259b58767..3d5b9c4f68d9 100644
--- a/drivers/gpu/drm/panel/panel-sony-acx565akm.c
+++ b/drivers/gpu/drm/panel/panel-sony-acx565akm.c
@@ -684,9 +684,17 @@ static const struct of_device_id acx565akm_of_match[] = {
MODULE_DEVICE_TABLE(of, acx565akm_of_match);
+static const struct spi_device_id acx565akm_ids[] = {
+ { "acx565akm", 0 },
+ { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, acx565akm_ids);
+
static struct spi_driver acx565akm_driver = {
.probe = acx565akm_probe,
.remove = acx565akm_remove,
+ .id_table = acx565akm_ids,
.driver = {
.name = "panel-sony-acx565akm",
.of_match_table = acx565akm_of_match,
@@ -695,7 +703,6 @@ static struct spi_driver acx565akm_driver = {
module_spi_driver(acx565akm_driver);
-MODULE_ALIAS("spi:sony,acx565akm");
MODULE_AUTHOR("Nokia Corporation");
MODULE_DESCRIPTION("Sony ACX565AKM LCD Panel Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
index d7b2e34626ef..f2baff827f50 100644
--- a/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
+++ b/drivers/gpu/drm/panel/panel-tpo-td028ttec1.c
@@ -375,8 +375,7 @@ static const struct of_device_id td028ttec1_of_match[] = {
MODULE_DEVICE_TABLE(of, td028ttec1_of_match);
static const struct spi_device_id td028ttec1_ids[] = {
- { "tpo,td028ttec1", 0},
- { "toppoly,td028ttec1", 0 },
+ { "td028ttec1", 0 },
{ /* sentinel */ }
};
diff --git a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
index 84370562910f..ba163c779084 100644
--- a/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
+++ b/drivers/gpu/drm/panel/panel-tpo-td043mtea1.c
@@ -491,9 +491,17 @@ static const struct of_device_id td043mtea1_of_match[] = {
MODULE_DEVICE_TABLE(of, td043mtea1_of_match);
+static const struct spi_device_id td043mtea1_ids[] = {
+ { "td043mtea1", 0 },
+ { /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(spi, td043mtea1_ids);
+
static struct spi_driver td043mtea1_driver = {
.probe = td043mtea1_probe,
.remove = td043mtea1_remove,
+ .id_table = td043mtea1_ids,
.driver = {
.name = "panel-tpo-td043mtea1",
.pm = &td043mtea1_pm_ops,
@@ -503,7 +511,6 @@ static struct spi_driver td043mtea1_driver = {
module_spi_driver(td043mtea1_driver);
-MODULE_ALIAS("spi:tpo,td043mtea1");
MODULE_AUTHOR("Gražvydas Ignotas <notasas@gmail.com>");
MODULE_DESCRIPTION("TPO TD043MTEA1 Panel Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index bc2ddeb55f5d..f21bc8a7ee3a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -556,11 +556,11 @@ static int panfrost_probe(struct platform_device *pdev)
return 0;
err_out2:
+ pm_runtime_disable(pfdev->dev);
panfrost_devfreq_fini(pfdev);
err_out1:
panfrost_device_fini(pfdev);
err_out0:
- pm_runtime_disable(pfdev->dev);
drm_dev_put(ddev);
return err;
}
diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c
index f67ed925c0ef..8822ec13a0d6 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gpu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c
@@ -208,6 +208,9 @@ static void panfrost_gpu_init_features(struct panfrost_device *pfdev)
pfdev->features.mem_features = gpu_read(pfdev, GPU_MEM_FEATURES);
pfdev->features.mmu_features = gpu_read(pfdev, GPU_MMU_FEATURES);
pfdev->features.thread_features = gpu_read(pfdev, GPU_THREAD_FEATURES);
+ pfdev->features.max_threads = gpu_read(pfdev, GPU_THREAD_MAX_THREADS);
+ pfdev->features.thread_max_workgroup_sz = gpu_read(pfdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
+ pfdev->features.thread_max_barrier_sz = gpu_read(pfdev, GPU_THREAD_MAX_BARRIER_SIZE);
pfdev->features.coherency_features = gpu_read(pfdev, GPU_COHERENCY_FEATURES);
for (i = 0; i < 4; i++)
pfdev->features.texture_features[i] = gpu_read(pfdev, GPU_TEXTURE_FEATURES(i));
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index a58551668d9a..21f34d44aac2 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -381,13 +381,19 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
job_read(pfdev, JS_TAIL_LO(js)),
sched_job);
- mutex_lock(&pfdev->reset_lock);
+ if (!mutex_trylock(&pfdev->reset_lock))
+ return;
- for (i = 0; i < NUM_JOB_SLOTS; i++)
- drm_sched_stop(&pfdev->js->queue[i].sched, sched_job);
+ for (i = 0; i < NUM_JOB_SLOTS; i++) {
+ struct drm_gpu_scheduler *sched = &pfdev->js->queue[i].sched;
+
+ drm_sched_stop(sched, sched_job);
+ if (js != i)
+ /* Ensure any timeouts on other slots have finished */
+ cancel_delayed_work_sync(&sched->work_tdr);
+ }
- if (sched_job)
- drm_sched_increase_karma(sched_job);
+ drm_sched_increase_karma(sched_job);
spin_lock_irqsave(&pfdev->js->job_lock, flags);
for (i = 0; i < NUM_JOB_SLOTS; i++) {
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index bdd990568476..a3ed64a1f15e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -224,9 +224,9 @@ static size_t get_pgsize(u64 addr, size_t size)
return SZ_2M;
}
-void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
- struct panfrost_mmu *mmu,
- u64 iova, size_t size)
+static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
+ struct panfrost_mmu *mmu,
+ u64 iova, size_t size)
{
if (mmu->as < 0)
return;
@@ -406,11 +406,11 @@ addr_to_drm_mm_node(struct panfrost_device *pfdev, int as, u64 addr)
spin_lock(&pfdev->as_lock);
list_for_each_entry(mmu, &pfdev->as_lru_list, list) {
if (as == mmu->as)
- break;
+ goto found_mmu;
}
- if (as != mmu->as)
- goto out;
+ goto out;
+found_mmu:
priv = container_of(mmu, struct panfrost_file_priv, mmu);
spin_lock(&priv->mm_lock);
@@ -432,7 +432,8 @@ out:
#define NUM_FAULT_PAGES (SZ_2M / PAGE_SIZE)
-int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as, u64 addr)
+static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
+ u64 addr)
{
int ret, i;
struct panfrost_gem_object *bo;
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
index 83c57d325ca8..2dba192bf198 100644
--- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
@@ -16,6 +16,7 @@
#include "panfrost_issues.h"
#include "panfrost_job.h"
#include "panfrost_mmu.h"
+#include "panfrost_perfcnt.h"
#include "panfrost_regs.h"
#define COUNTERS_PER_BLOCK 64
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index d0bc91ed7c90..4528f4dc0b2d 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -379,7 +379,9 @@ radeon_pci_remove(struct pci_dev *pdev)
static void
radeon_pci_shutdown(struct pci_dev *pdev)
{
+#ifdef CONFIG_PPC64
struct drm_device *ddev = pci_get_drvdata(pdev);
+#endif
/* if we are running in a VM, make sure the device
* torn down properly on reboot/shutdown
@@ -387,11 +389,15 @@ radeon_pci_shutdown(struct pci_dev *pdev)
if (radeon_device_is_virtual())
radeon_pci_remove(pdev);
+#ifdef CONFIG_PPC64
/* Some adapters need to be suspended before a
- * shutdown occurs in order to prevent an error
- * during kexec.
- */
+ * shutdown occurs in order to prevent an error
+ * during kexec.
+ * Make this power specific becauase it breaks
+ * some non-power boards.
+ */
radeon_suspend_kms(ddev, true, true, false);
+#endif
}
static int radeon_pmops_suspend(struct device *dev)
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 460fd98e40a7..a0b382a637a6 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -1958,6 +1958,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev)
case 0x682C:
si_pi->cac_weights = cac_weights_cape_verde_pro;
si_pi->dte_data = dte_data_sun_xt;
+ update_dte_from_pl2 = true;
break;
case 0x6825:
case 0x6827:
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 9a0ee74d82dc..f39b97ed4ade 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -479,6 +479,7 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
struct drm_sched_job *s_job, *tmp;
uint64_t guilty_context;
bool found_guilty = false;
+ struct dma_fence *fence;
list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
struct drm_sched_fence *s_fence = s_job->s_fence;
@@ -492,7 +493,16 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
dma_fence_set_error(&s_fence->finished, -ECANCELED);
dma_fence_put(s_job->s_fence->parent);
- s_job->s_fence->parent = sched->ops->run_job(s_job);
+ fence = sched->ops->run_job(s_job);
+
+ if (IS_ERR_OR_NULL(fence)) {
+ s_job->s_fence->parent = NULL;
+ dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
+ } else {
+ s_job->s_fence->parent = fence;
+ }
+
+
}
}
EXPORT_SYMBOL(drm_sched_resubmit_jobs);
@@ -720,7 +730,7 @@ static int drm_sched_main(void *param)
fence = sched->ops->run_job(sched_job);
drm_sched_fence_scheduled(s_fence);
- if (fence) {
+ if (!IS_ERR_OR_NULL(fence)) {
s_fence->parent = dma_fence_get(fence);
r = dma_fence_add_callback(fence, &sched_job->cb,
drm_sched_process_job);
@@ -730,8 +740,11 @@ static int drm_sched_main(void *param)
DRM_ERROR("fence add callback failed (%d)\n",
r);
dma_fence_put(fence);
- } else
+ } else {
+
+ dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
drm_sched_process_job(NULL, &sched_job->cb);
+ }
wake_up(&sched->job_scheduled);
}
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 04c721d0d3b9..b89439ed210d 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -488,7 +488,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
WARN_ON(!tcon->quirks->has_channel_0);
- tcon->dclk_min_div = 6;
+ tcon->dclk_min_div = 1;
tcon->dclk_max_div = 127;
sun4i_tcon0_mode_set_common(tcon, mode);
diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig
index 504763423d46..a46ac284dd5e 100644
--- a/drivers/gpu/drm/tiny/Kconfig
+++ b/drivers/gpu/drm/tiny/Kconfig
@@ -63,7 +63,6 @@ config TINYDRM_REPAPER
depends on DRM && SPI
select DRM_KMS_HELPER
select DRM_KMS_CMA_HELPER
- depends on THERMAL || !THERMAL
help
DRM driver for the following Pervasive Displays panels:
1.44" TFT EPD Panel (E1144CS021)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 20ff56f27aa4..98819462f025 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -185,8 +185,9 @@ static void ttm_bo_add_mem_to_lru(struct ttm_buffer_object *bo,
list_add_tail(&bo->lru, &man->lru[bo->priority]);
kref_get(&bo->list_kref);
- if (bo->ttm && !(bo->ttm->page_flags &
- (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SWAPPED))) {
+ if (!(man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm &&
+ !(bo->ttm->page_flags & (TTM_PAGE_FLAG_SG |
+ TTM_PAGE_FLAG_SWAPPED))) {
list_add_tail(&bo->swap, &bdev->glob->swap_lru[bo->priority]);
kref_get(&bo->list_kref);
}
@@ -878,11 +879,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
if (!bo) {
if (busy_bo)
- ttm_bo_get(busy_bo);
+ kref_get(&busy_bo->list_kref);
spin_unlock(&glob->lru_lock);
ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
if (busy_bo)
- ttm_bo_put(busy_bo);
+ kref_put(&busy_bo->list_kref, ttm_bo_release_list);
return ret;
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 76eedb963693..46dc3de7e81b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -278,15 +278,13 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
else
ret = vmf_insert_pfn(&cvma, address, pfn);
- /*
- * Somebody beat us to this PTE or prefaulting to
- * an already populated PTE, or prefaulting error.
- */
-
- if (unlikely((ret == VM_FAULT_NOPAGE && i > 0)))
- break;
- else if (unlikely(ret & VM_FAULT_ERROR))
- goto out_io_unlock;
+ /* Never error on prefaulted PTEs */
+ if (unlikely((ret & VM_FAULT_ERROR))) {
+ if (i == 0)
+ goto out_io_unlock;
+ else
+ break;
+ }
address += PAGE_SIZE;
if (unlikely(++page_offset >= page_last))
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 5d80507b539b..19c092d75266 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -557,13 +557,16 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
if (args->bcl_start != args->bcl_end) {
bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
- if (!bin)
+ if (!bin) {
+ v3d_job_put(&render->base);
return -ENOMEM;
+ }
ret = v3d_job_init(v3d, file_priv, &bin->base,
v3d_job_free, args->in_sync_bcl);
if (ret) {
v3d_job_put(&render->base);
+ kfree(bin);
return ret;
}
diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c
index ba1828acd8c9..4be49c1aef51 100644
--- a/drivers/gpu/drm/xen/xen_drm_front.c
+++ b/drivers/gpu/drm/xen/xen_drm_front.c
@@ -718,17 +718,9 @@ static int xen_drv_probe(struct xenbus_device *xb_dev,
struct device *dev = &xb_dev->dev;
int ret;
- /*
- * The device is not spawn from a device tree, so arch_setup_dma_ops
- * is not called, thus leaving the device with dummy DMA ops.
- * This makes the device return error on PRIME buffer import, which
- * is not correct: to fix this call of_dma_configure() with a NULL
- * node to set default DMA ops.
- */
- dev->coherent_dma_mask = DMA_BIT_MASK(32);
- ret = of_dma_configure(dev, NULL, true);
+ ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (ret < 0) {
- DRM_ERROR("Cannot setup DMA ops, ret %d", ret);
+ DRM_ERROR("Cannot setup DMA mask, ret %d", ret);
return ret;
}
diff --git a/drivers/hid/hid-axff.c b/drivers/hid/hid-axff.c
index 6654c1550e2e..fbe4e16ab029 100644
--- a/drivers/hid/hid-axff.c
+++ b/drivers/hid/hid-axff.c
@@ -63,13 +63,20 @@ static int axff_init(struct hid_device *hid)
{
struct axff_device *axff;
struct hid_report *report;
- struct hid_input *hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
+ struct hid_input *hidinput;
struct list_head *report_list =&hid->report_enum[HID_OUTPUT_REPORT].report_list;
- struct input_dev *dev = hidinput->input;
+ struct input_dev *dev;
int field_count = 0;
int i, j;
int error;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
+ dev = hidinput->input;
+
if (list_empty(report_list)) {
hid_err(hid, "no output reports found\n");
return -ENODEV;
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 3eaee2c37931..63fdbf09b044 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1139,6 +1139,7 @@ int hid_open_report(struct hid_device *device)
__u8 *start;
__u8 *buf;
__u8 *end;
+ __u8 *next;
int ret;
static int (*dispatch_type[])(struct hid_parser *parser,
struct hid_item *item) = {
@@ -1192,7 +1193,8 @@ int hid_open_report(struct hid_device *device)
device->collection_size = HID_DEFAULT_NUM_COLLECTIONS;
ret = -EINVAL;
- while ((start = fetch_item(start, end, &item)) != NULL) {
+ while ((next = fetch_item(start, end, &item)) != NULL) {
+ start = next;
if (item.format != HID_ITEM_FORMAT_SHORT) {
hid_err(device, "unexpected long global item\n");
@@ -1230,7 +1232,8 @@ int hid_open_report(struct hid_device *device)
}
}
- hid_err(device, "item fetching failed at offset %d\n", (int)(end - start));
+ hid_err(device, "item fetching failed at offset %u/%u\n",
+ size - (unsigned int)(end - start), size);
err:
kfree(parser->collection_stack);
alloc_err:
diff --git a/drivers/hid/hid-dr.c b/drivers/hid/hid-dr.c
index 17e17f9a597b..947f19f8685f 100644
--- a/drivers/hid/hid-dr.c
+++ b/drivers/hid/hid-dr.c
@@ -75,13 +75,19 @@ static int drff_init(struct hid_device *hid)
{
struct drff_device *drff;
struct hid_report *report;
- struct hid_input *hidinput = list_first_entry(&hid->inputs,
- struct hid_input, list);
+ struct hid_input *hidinput;
struct list_head *report_list =
&hid->report_enum[HID_OUTPUT_REPORT].report_list;
- struct input_dev *dev = hidinput->input;
+ struct input_dev *dev;
int error;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
+ dev = hidinput->input;
+
if (list_empty(report_list)) {
hid_err(hid, "no output reports found\n");
return -ENODEV;
diff --git a/drivers/hid/hid-emsff.c b/drivers/hid/hid-emsff.c
index 7cd5651872d3..c34f2e5a049f 100644
--- a/drivers/hid/hid-emsff.c
+++ b/drivers/hid/hid-emsff.c
@@ -47,13 +47,19 @@ static int emsff_init(struct hid_device *hid)
{
struct emsff_device *emsff;
struct hid_report *report;
- struct hid_input *hidinput = list_first_entry(&hid->inputs,
- struct hid_input, list);
+ struct hid_input *hidinput;
struct list_head *report_list =
&hid->report_enum[HID_OUTPUT_REPORT].report_list;
- struct input_dev *dev = hidinput->input;
+ struct input_dev *dev;
int error;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_first_entry(&hid->inputs, struct hid_input, list);
+ dev = hidinput->input;
+
if (list_empty(report_list)) {
hid_err(hid, "no output reports found\n");
return -ENODEV;
diff --git a/drivers/hid/hid-gaff.c b/drivers/hid/hid-gaff.c
index 0f95c96b70f8..ecbd3995a4eb 100644
--- a/drivers/hid/hid-gaff.c
+++ b/drivers/hid/hid-gaff.c
@@ -64,14 +64,20 @@ static int gaff_init(struct hid_device *hid)
{
struct gaff_device *gaff;
struct hid_report *report;
- struct hid_input *hidinput = list_entry(hid->inputs.next,
- struct hid_input, list);
+ struct hid_input *hidinput;
struct list_head *report_list =
&hid->report_enum[HID_OUTPUT_REPORT].report_list;
struct list_head *report_ptr = report_list;
- struct input_dev *dev = hidinput->input;
+ struct input_dev *dev;
int error;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+ dev = hidinput->input;
+
if (list_empty(report_list)) {
hid_err(hid, "no output reports found\n");
return -ENODEV;
diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
index 84f8c127ebdc..d86a9189e88f 100644
--- a/drivers/hid/hid-google-hammer.c
+++ b/drivers/hid/hid-google-hammer.c
@@ -470,6 +470,10 @@ static const struct hid_device_id hammer_devices[] = {
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) },
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MAGNEMITE) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MASTERBALL) },
+ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STAFF) },
{ HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_WAND) },
diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c
index 10a720558830..8619b80c834c 100644
--- a/drivers/hid/hid-holtekff.c
+++ b/drivers/hid/hid-holtekff.c
@@ -124,13 +124,19 @@ static int holtekff_init(struct hid_device *hid)
{
struct holtekff_device *holtekff;
struct hid_report *report;
- struct hid_input *hidinput = list_entry(hid->inputs.next,
- struct hid_input, list);
+ struct hid_input *hidinput;
struct list_head *report_list =
&hid->report_enum[HID_OUTPUT_REPORT].report_list;
- struct input_dev *dev = hidinput->input;
+ struct input_dev *dev;
int error;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+ dev = hidinput->input;
+
if (list_empty(report_list)) {
hid_err(hid, "no output report found\n");
return -ENODEV;
diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c
index cc5b09b87ab0..79a28fc91521 100644
--- a/drivers/hid/hid-hyperv.c
+++ b/drivers/hid/hid-hyperv.c
@@ -314,60 +314,24 @@ static void mousevsc_on_receive(struct hv_device *device,
static void mousevsc_on_channel_callback(void *context)
{
- const int packet_size = 0x100;
- int ret;
struct hv_device *device = context;
- u32 bytes_recvd;
- u64 req_id;
struct vmpacket_descriptor *desc;
- unsigned char *buffer;
- int bufferlen = packet_size;
-
- buffer = kmalloc(bufferlen, GFP_ATOMIC);
- if (!buffer)
- return;
-
- do {
- ret = vmbus_recvpacket_raw(device->channel, buffer,
- bufferlen, &bytes_recvd, &req_id);
-
- switch (ret) {
- case 0:
- if (bytes_recvd <= 0) {
- kfree(buffer);
- return;
- }
- desc = (struct vmpacket_descriptor *)buffer;
-
- switch (desc->type) {
- case VM_PKT_COMP:
- break;
-
- case VM_PKT_DATA_INBAND:
- mousevsc_on_receive(device, desc);
- break;
-
- default:
- pr_err("unhandled packet type %d, tid %llx len %d\n",
- desc->type, req_id, bytes_recvd);
- break;
- }
+ foreach_vmbus_pkt(desc, device->channel) {
+ switch (desc->type) {
+ case VM_PKT_COMP:
break;
- case -ENOBUFS:
- kfree(buffer);
- /* Handle large packet */
- bufferlen = bytes_recvd;
- buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
-
- if (!buffer)
- return;
+ case VM_PKT_DATA_INBAND:
+ mousevsc_on_receive(device, desc);
+ break;
+ default:
+ pr_err("Unhandled packet type %d, tid %llx len %d\n",
+ desc->type, desc->trans_id, desc->len8 * 8);
break;
}
- } while (1);
-
+ }
}
static int mousevsc_connect_to_vsp(struct hv_device *device)
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 76969a22b0f2..447e8db21174 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -476,6 +476,8 @@
#define USB_DEVICE_ID_GOOGLE_STAFF 0x502b
#define USB_DEVICE_ID_GOOGLE_WAND 0x502d
#define USB_DEVICE_ID_GOOGLE_WHISKERS 0x5030
+#define USB_DEVICE_ID_GOOGLE_MASTERBALL 0x503c
+#define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d
#define USB_VENDOR_ID_GOTOP 0x08f2
#define USB_DEVICE_ID_SUPER_Q2 0x007f
diff --git a/drivers/hid/hid-lg2ff.c b/drivers/hid/hid-lg2ff.c
index dd1a6c3a7de6..73d07e35f12a 100644
--- a/drivers/hid/hid-lg2ff.c
+++ b/drivers/hid/hid-lg2ff.c
@@ -50,11 +50,17 @@ int lg2ff_init(struct hid_device *hid)
{
struct lg2ff_device *lg2ff;
struct hid_report *report;
- struct hid_input *hidinput = list_entry(hid->inputs.next,
- struct hid_input, list);
- struct input_dev *dev = hidinput->input;
+ struct hid_input *hidinput;
+ struct input_dev *dev;
int error;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+ dev = hidinput->input;
+
/* Check that the report looks ok */
report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7);
if (!report)
diff --git a/drivers/hid/hid-lg3ff.c b/drivers/hid/hid-lg3ff.c
index 9ecb6fd06203..b7e1949f3cf7 100644
--- a/drivers/hid/hid-lg3ff.c
+++ b/drivers/hid/hid-lg3ff.c
@@ -117,12 +117,19 @@ static const signed short ff3_joystick_ac[] = {
int lg3ff_init(struct hid_device *hid)
{
- struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
- struct input_dev *dev = hidinput->input;
+ struct hid_input *hidinput;
+ struct input_dev *dev;
const signed short *ff_bits = ff3_joystick_ac;
int error;
int i;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+ dev = hidinput->input;
+
/* Check that the report looks ok */
if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 35))
return -ENODEV;
diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c
index 03f0220062ca..5e6a0cef2a06 100644
--- a/drivers/hid/hid-lg4ff.c
+++ b/drivers/hid/hid-lg4ff.c
@@ -1253,8 +1253,8 @@ static int lg4ff_handle_multimode_wheel(struct hid_device *hid, u16 *real_produc
int lg4ff_init(struct hid_device *hid)
{
- struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
- struct input_dev *dev = hidinput->input;
+ struct hid_input *hidinput;
+ struct input_dev *dev;
struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
struct hid_report *report = list_entry(report_list->next, struct hid_report, list);
const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor);
@@ -1266,6 +1266,13 @@ int lg4ff_init(struct hid_device *hid)
int mmode_ret, mmode_idx = -1;
u16 real_product_id;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+ dev = hidinput->input;
+
/* Check that the report looks ok */
if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7))
return -1;
diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c
index c79a6ec43745..aed4ddc397a9 100644
--- a/drivers/hid/hid-lgff.c
+++ b/drivers/hid/hid-lgff.c
@@ -115,12 +115,19 @@ static void hid_lgff_set_autocenter(struct input_dev *dev, u16 magnitude)
int lgff_init(struct hid_device* hid)
{
- struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
- struct input_dev *dev = hidinput->input;
+ struct hid_input *hidinput;
+ struct input_dev *dev;
const signed short *ff_bits = ff_joystick;
int error;
int i;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+ dev = hidinput->input;
+
/* Check that the report looks ok */
if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7))
return -ENODEV;
diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c
index 0179f7ed77e5..8e91e2f06cb4 100644
--- a/drivers/hid/hid-logitech-hidpp.c
+++ b/drivers/hid/hid-logitech-hidpp.c
@@ -1669,6 +1669,7 @@ static void hidpp_touchpad_raw_xy_event(struct hidpp_device *hidpp_dev,
#define HIDPP_FF_EFFECTID_NONE -1
#define HIDPP_FF_EFFECTID_AUTOCENTER -2
+#define HIDPP_AUTOCENTER_PARAMS_LENGTH 18
#define HIDPP_FF_MAX_PARAMS 20
#define HIDPP_FF_RESERVED_SLOTS 1
@@ -2009,7 +2010,7 @@ static int hidpp_ff_erase_effect(struct input_dev *dev, int effect_id)
static void hidpp_ff_set_autocenter(struct input_dev *dev, u16 magnitude)
{
struct hidpp_ff_private_data *data = dev->ff->private;
- u8 params[18];
+ u8 params[HIDPP_AUTOCENTER_PARAMS_LENGTH];
dbg_hid("Setting autocenter to %d.\n", magnitude);
@@ -2077,23 +2078,34 @@ static DEVICE_ATTR(range, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH, hidpp
static void hidpp_ff_destroy(struct ff_device *ff)
{
struct hidpp_ff_private_data *data = ff->private;
+ struct hid_device *hid = data->hidpp->hid_dev;
+ hid_info(hid, "Unloading HID++ force feedback.\n");
+
+ device_remove_file(&hid->dev, &dev_attr_range);
+ destroy_workqueue(data->wq);
kfree(data->effect_ids);
}
-static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
+static int hidpp_ff_init(struct hidpp_device *hidpp,
+ struct hidpp_ff_private_data *data)
{
struct hid_device *hid = hidpp->hid_dev;
- struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
- struct input_dev *dev = hidinput->input;
+ struct hid_input *hidinput;
+ struct input_dev *dev;
const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor);
const u16 bcdDevice = le16_to_cpu(udesc->bcdDevice);
struct ff_device *ff;
- struct hidpp_report response;
- struct hidpp_ff_private_data *data;
- int error, j, num_slots;
+ int error, j, num_slots = data->num_effects;
u8 version;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+ dev = hidinput->input;
+
if (!dev) {
hid_err(hid, "Struct input_dev not set!\n");
return -EINVAL;
@@ -2109,27 +2121,17 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
for (j = 0; hidpp_ff_effects_v2[j] >= 0; j++)
set_bit(hidpp_ff_effects_v2[j], dev->ffbit);
- /* Read number of slots available in device */
- error = hidpp_send_fap_command_sync(hidpp, feature_index,
- HIDPP_FF_GET_INFO, NULL, 0, &response);
- if (error) {
- if (error < 0)
- return error;
- hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
- __func__, error);
- return -EPROTO;
- }
-
- num_slots = response.fap.params[0] - HIDPP_FF_RESERVED_SLOTS;
-
error = input_ff_create(dev, num_slots);
if (error) {
hid_err(dev, "Failed to create FF device!\n");
return error;
}
-
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ /*
+ * Create a copy of passed data, so we can transfer memory
+ * ownership to FF core
+ */
+ data = kmemdup(data, sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->effect_ids = kcalloc(num_slots, sizeof(int), GFP_KERNEL);
@@ -2145,10 +2147,7 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
}
data->hidpp = hidpp;
- data->feature_index = feature_index;
data->version = version;
- data->slot_autocenter = 0;
- data->num_effects = num_slots;
for (j = 0; j < num_slots; j++)
data->effect_ids[j] = -1;
@@ -2162,68 +2161,20 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index)
ff->set_autocenter = hidpp_ff_set_autocenter;
ff->destroy = hidpp_ff_destroy;
-
- /* reset all forces */
- error = hidpp_send_fap_command_sync(hidpp, feature_index,
- HIDPP_FF_RESET_ALL, NULL, 0, &response);
-
- /* Read current Range */
- error = hidpp_send_fap_command_sync(hidpp, feature_index,
- HIDPP_FF_GET_APERTURE, NULL, 0, &response);
- if (error)
- hid_warn(hidpp->hid_dev, "Failed to read range from device!\n");
- data->range = error ? 900 : get_unaligned_be16(&response.fap.params[0]);
-
/* Create sysfs interface */
error = device_create_file(&(hidpp->hid_dev->dev), &dev_attr_range);
if (error)
hid_warn(hidpp->hid_dev, "Unable to create sysfs interface for \"range\", errno %d!\n", error);
- /* Read the current gain values */
- error = hidpp_send_fap_command_sync(hidpp, feature_index,
- HIDPP_FF_GET_GLOBAL_GAINS, NULL, 0, &response);
- if (error)
- hid_warn(hidpp->hid_dev, "Failed to read gain values from device!\n");
- data->gain = error ? 0xffff : get_unaligned_be16(&response.fap.params[0]);
- /* ignore boost value at response.fap.params[2] */
-
/* init the hardware command queue */
atomic_set(&data->workqueue_size, 0);
- /* initialize with zero autocenter to get wheel in usable state */
- hidpp_ff_set_autocenter(dev, 0);
-
hid_info(hid, "Force feedback support loaded (firmware release %d).\n",
version);
return 0;
}
-static int hidpp_ff_deinit(struct hid_device *hid)
-{
- struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
- struct input_dev *dev = hidinput->input;
- struct hidpp_ff_private_data *data;
-
- if (!dev) {
- hid_err(hid, "Struct input_dev not found!\n");
- return -EINVAL;
- }
-
- hid_info(hid, "Unloading HID++ force feedback.\n");
- data = dev->ff->private;
- if (!data) {
- hid_err(hid, "Private data not found!\n");
- return -EINVAL;
- }
-
- destroy_workqueue(data->wq);
- device_remove_file(&hid->dev, &dev_attr_range);
-
- return 0;
-}
-
-
/* ************************************************************************** */
/* */
/* Device Support */
@@ -2725,24 +2676,93 @@ static int k400_connect(struct hid_device *hdev, bool connected)
#define HIDPP_PAGE_G920_FORCE_FEEDBACK 0x8123
-static int g920_get_config(struct hidpp_device *hidpp)
+static int g920_ff_set_autocenter(struct hidpp_device *hidpp,
+ struct hidpp_ff_private_data *data)
{
+ struct hidpp_report response;
+ u8 params[HIDPP_AUTOCENTER_PARAMS_LENGTH] = {
+ [1] = HIDPP_FF_EFFECT_SPRING | HIDPP_FF_EFFECT_AUTOSTART,
+ };
+ int ret;
+
+ /* initialize with zero autocenter to get wheel in usable state */
+
+ dbg_hid("Setting autocenter to 0.\n");
+ ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
+ HIDPP_FF_DOWNLOAD_EFFECT,
+ params, ARRAY_SIZE(params),
+ &response);
+ if (ret)
+ hid_warn(hidpp->hid_dev, "Failed to autocenter device!\n");
+ else
+ data->slot_autocenter = response.fap.params[0];
+
+ return ret;
+}
+
+static int g920_get_config(struct hidpp_device *hidpp,
+ struct hidpp_ff_private_data *data)
+{
+ struct hidpp_report response;
u8 feature_type;
- u8 feature_index;
int ret;
+ memset(data, 0, sizeof(*data));
+
/* Find feature and store for later use */
ret = hidpp_root_get_feature(hidpp, HIDPP_PAGE_G920_FORCE_FEEDBACK,
- &feature_index, &feature_type);
+ &data->feature_index, &feature_type);
if (ret)
return ret;
- ret = hidpp_ff_init(hidpp, feature_index);
+ /* Read number of slots available in device */
+ ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
+ HIDPP_FF_GET_INFO,
+ NULL, 0,
+ &response);
+ if (ret) {
+ if (ret < 0)
+ return ret;
+ hid_err(hidpp->hid_dev,
+ "%s: received protocol error 0x%02x\n", __func__, ret);
+ return -EPROTO;
+ }
+
+ data->num_effects = response.fap.params[0] - HIDPP_FF_RESERVED_SLOTS;
+
+ /* reset all forces */
+ ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
+ HIDPP_FF_RESET_ALL,
+ NULL, 0,
+ &response);
if (ret)
- hid_warn(hidpp->hid_dev, "Unable to initialize force feedback support, errno %d\n",
- ret);
+ hid_warn(hidpp->hid_dev, "Failed to reset all forces!\n");
- return 0;
+ ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
+ HIDPP_FF_GET_APERTURE,
+ NULL, 0,
+ &response);
+ if (ret) {
+ hid_warn(hidpp->hid_dev,
+ "Failed to read range from device!\n");
+ }
+ data->range = ret ?
+ 900 : get_unaligned_be16(&response.fap.params[0]);
+
+ /* Read the current gain values */
+ ret = hidpp_send_fap_command_sync(hidpp, data->feature_index,
+ HIDPP_FF_GET_GLOBAL_GAINS,
+ NULL, 0,
+ &response);
+ if (ret)
+ hid_warn(hidpp->hid_dev,
+ "Failed to read gain values from device!\n");
+ data->gain = ret ?
+ 0xffff : get_unaligned_be16(&response.fap.params[0]);
+
+ /* ignore boost value at response.fap.params[2] */
+
+ return g920_ff_set_autocenter(hidpp, data);
}
/* -------------------------------------------------------------------------- */
@@ -3458,34 +3478,45 @@ static int hidpp_get_report_length(struct hid_device *hdev, int id)
return report->field[0]->report_count + 1;
}
-static bool hidpp_validate_report(struct hid_device *hdev, int id,
- int expected_length, bool optional)
+static bool hidpp_validate_device(struct hid_device *hdev)
{
- int report_length;
+ struct hidpp_device *hidpp = hid_get_drvdata(hdev);
+ int id, report_length, supported_reports = 0;
- if (id >= HID_MAX_IDS || id < 0) {
- hid_err(hdev, "invalid HID report id %u\n", id);
- return false;
+ id = REPORT_ID_HIDPP_SHORT;
+ report_length = hidpp_get_report_length(hdev, id);
+ if (report_length) {
+ if (report_length < HIDPP_REPORT_SHORT_LENGTH)
+ goto bad_device;
+
+ supported_reports++;
}
+ id = REPORT_ID_HIDPP_LONG;
report_length = hidpp_get_report_length(hdev, id);
- if (!report_length)
- return optional;
+ if (report_length) {
+ if (report_length < HIDPP_REPORT_LONG_LENGTH)
+ goto bad_device;
- if (report_length < expected_length) {
- hid_warn(hdev, "not enough values in hidpp report %d\n", id);
- return false;
+ supported_reports++;
}
- return true;
-}
+ id = REPORT_ID_HIDPP_VERY_LONG;
+ report_length = hidpp_get_report_length(hdev, id);
+ if (report_length) {
+ if (report_length < HIDPP_REPORT_LONG_LENGTH ||
+ report_length > HIDPP_REPORT_VERY_LONG_MAX_LENGTH)
+ goto bad_device;
-static bool hidpp_validate_device(struct hid_device *hdev)
-{
- return hidpp_validate_report(hdev, REPORT_ID_HIDPP_SHORT,
- HIDPP_REPORT_SHORT_LENGTH, false) &&
- hidpp_validate_report(hdev, REPORT_ID_HIDPP_LONG,
- HIDPP_REPORT_LONG_LENGTH, true);
+ supported_reports++;
+ hidpp->very_long_report_length = report_length;
+ }
+
+ return supported_reports;
+
+bad_device:
+ hid_warn(hdev, "not enough values in hidpp report %d\n", id);
+ return false;
}
static bool hidpp_application_equals(struct hid_device *hdev,
@@ -3505,6 +3536,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
int ret;
bool connected;
unsigned int connect_mask = HID_CONNECT_DEFAULT;
+ struct hidpp_ff_private_data data;
/* report_fixup needs drvdata to be set before we call hid_parse */
hidpp = devm_kzalloc(&hdev->dev, sizeof(*hidpp), GFP_KERNEL);
@@ -3531,11 +3563,6 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
return hid_hw_start(hdev, HID_CONNECT_DEFAULT);
}
- hidpp->very_long_report_length =
- hidpp_get_report_length(hdev, REPORT_ID_HIDPP_VERY_LONG);
- if (hidpp->very_long_report_length > HIDPP_REPORT_VERY_LONG_MAX_LENGTH)
- hidpp->very_long_report_length = HIDPP_REPORT_VERY_LONG_MAX_LENGTH;
-
if (id->group == HID_GROUP_LOGITECH_DJ_DEVICE)
hidpp->quirks |= HIDPP_QUIRK_UNIFYING;
@@ -3614,7 +3641,7 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
if (ret)
goto hid_hw_init_fail;
} else if (connected && (hidpp->quirks & HIDPP_QUIRK_CLASS_G920)) {
- ret = g920_get_config(hidpp);
+ ret = g920_get_config(hidpp, &data);
if (ret)
goto hid_hw_init_fail;
}
@@ -3636,6 +3663,14 @@ static int hidpp_probe(struct hid_device *hdev, const struct hid_device_id *id)
goto hid_hw_start_fail;
}
+ if (hidpp->quirks & HIDPP_QUIRK_CLASS_G920) {
+ ret = hidpp_ff_init(hidpp, &data);
+ if (ret)
+ hid_warn(hidpp->hid_dev,
+ "Unable to initialize force feedback support, errno %d\n",
+ ret);
+ }
+
return ret;
hid_hw_init_fail:
@@ -3658,9 +3693,6 @@ static void hidpp_remove(struct hid_device *hdev)
sysfs_remove_group(&hdev->dev.kobj, &ps_attribute_group);
- if (hidpp->quirks & HIDPP_QUIRK_CLASS_G920)
- hidpp_ff_deinit(hdev);
-
hid_hw_stop(hdev);
cancel_work_sync(&hidpp->work);
mutex_destroy(&hidpp->send_mutex);
diff --git a/drivers/hid/hid-microsoft.c b/drivers/hid/hid-microsoft.c
index 2cf83856f2e4..2d8b589201a4 100644
--- a/drivers/hid/hid-microsoft.c
+++ b/drivers/hid/hid-microsoft.c
@@ -328,11 +328,17 @@ static int ms_play_effect(struct input_dev *dev, void *data,
static int ms_init_ff(struct hid_device *hdev)
{
- struct hid_input *hidinput = list_entry(hdev->inputs.next,
- struct hid_input, list);
- struct input_dev *input_dev = hidinput->input;
+ struct hid_input *hidinput;
+ struct input_dev *input_dev;
struct ms_data *ms = hid_get_drvdata(hdev);
+ if (list_empty(&hdev->inputs)) {
+ hid_err(hdev, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(hdev->inputs.next, struct hid_input, list);
+ input_dev = hidinput->input;
+
if (!(ms->quirks & MS_QUIRK_FF))
return 0;
diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c
index 5a3b3d974d84..2666af02d5c1 100644
--- a/drivers/hid/hid-prodikeys.c
+++ b/drivers/hid/hid-prodikeys.c
@@ -516,7 +516,7 @@ static void pcmidi_setup_extra_keys(
MY PICTURES => KEY_WORDPROCESSOR
MY MUSIC=> KEY_SPREADSHEET
*/
- unsigned int keys[] = {
+ static const unsigned int keys[] = {
KEY_FN,
KEY_MESSENGER, KEY_CALENDAR,
KEY_ADDRESSBOOK, KEY_DOCUMENTS,
@@ -532,7 +532,7 @@ static void pcmidi_setup_extra_keys(
0
};
- unsigned int *pkeys = &keys[0];
+ const unsigned int *pkeys = &keys[0];
unsigned short i;
if (pm->ifnum != 1) /* only set up ONCE for interace 1 */
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index 73c0f7a95e2d..4c6ed6ef31f1 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -2254,9 +2254,15 @@ static int sony_play_effect(struct input_dev *dev, void *data,
static int sony_init_ff(struct sony_sc *sc)
{
- struct hid_input *hidinput = list_entry(sc->hdev->inputs.next,
- struct hid_input, list);
- struct input_dev *input_dev = hidinput->input;
+ struct hid_input *hidinput;
+ struct input_dev *input_dev;
+
+ if (list_empty(&sc->hdev->inputs)) {
+ hid_err(sc->hdev, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(sc->hdev->inputs.next, struct hid_input, list);
+ input_dev = hidinput->input;
input_set_capability(input_dev, EV_FF, FF_RUMBLE);
return input_ff_create_memless(input_dev, NULL, sony_play_effect);
diff --git a/drivers/hid/hid-tmff.c b/drivers/hid/hid-tmff.c
index bdfc5ff3b2c5..90acef304536 100644
--- a/drivers/hid/hid-tmff.c
+++ b/drivers/hid/hid-tmff.c
@@ -124,12 +124,18 @@ static int tmff_init(struct hid_device *hid, const signed short *ff_bits)
struct tmff_device *tmff;
struct hid_report *report;
struct list_head *report_list;
- struct hid_input *hidinput = list_entry(hid->inputs.next,
- struct hid_input, list);
- struct input_dev *input_dev = hidinput->input;
+ struct hid_input *hidinput;
+ struct input_dev *input_dev;
int error;
int i;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+ input_dev = hidinput->input;
+
tmff = kzalloc(sizeof(struct tmff_device), GFP_KERNEL);
if (!tmff)
return -ENOMEM;
diff --git a/drivers/hid/hid-zpff.c b/drivers/hid/hid-zpff.c
index f90959e94028..3abaca045869 100644
--- a/drivers/hid/hid-zpff.c
+++ b/drivers/hid/hid-zpff.c
@@ -54,11 +54,17 @@ static int zpff_init(struct hid_device *hid)
{
struct zpff_device *zpff;
struct hid_report *report;
- struct hid_input *hidinput = list_entry(hid->inputs.next,
- struct hid_input, list);
- struct input_dev *dev = hidinput->input;
+ struct hid_input *hidinput;
+ struct input_dev *dev;
int i, error;
+ if (list_empty(&hid->inputs)) {
+ hid_err(hid, "no inputs found\n");
+ return -ENODEV;
+ }
+ hidinput = list_entry(hid->inputs.next, struct hid_input, list);
+ dev = hidinput->input;
+
for (i = 0; i < 4; i++) {
report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1);
if (!report)
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index 2a7c6e33bb1c..04c088131e04 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -26,7 +26,6 @@
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/pm.h>
-#include <linux/pm_runtime.h>
#include <linux/device.h>
#include <linux/wait.h>
#include <linux/err.h>
@@ -48,8 +47,6 @@
/* quirks to control the device */
#define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV BIT(0)
#define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET BIT(1)
-#define I2C_HID_QUIRK_NO_RUNTIME_PM BIT(2)
-#define I2C_HID_QUIRK_DELAY_AFTER_SLEEP BIT(3)
#define I2C_HID_QUIRK_BOGUS_IRQ BIT(4)
/* flags */
@@ -172,14 +169,7 @@ static const struct i2c_hid_quirks {
{ USB_VENDOR_ID_WEIDA, HID_ANY_ID,
I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV },
{ I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288,
- I2C_HID_QUIRK_NO_IRQ_AFTER_RESET |
- I2C_HID_QUIRK_NO_RUNTIME_PM },
- { I2C_VENDOR_ID_RAYDIUM, I2C_PRODUCT_ID_RAYDIUM_4B33,
- I2C_HID_QUIRK_DELAY_AFTER_SLEEP },
- { USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_8001,
- I2C_HID_QUIRK_NO_RUNTIME_PM },
- { I2C_VENDOR_ID_GOODIX, I2C_DEVICE_ID_GOODIX_01F0,
- I2C_HID_QUIRK_NO_RUNTIME_PM },
+ I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
{ USB_VENDOR_ID_ELAN, HID_ANY_ID,
I2C_HID_QUIRK_BOGUS_IRQ },
{ 0, 0 }
@@ -397,7 +387,6 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state)
{
struct i2c_hid *ihid = i2c_get_clientdata(client);
int ret;
- unsigned long now, delay;
i2c_hid_dbg(ihid, "%s\n", __func__);
@@ -415,22 +404,9 @@ static int i2c_hid_set_power(struct i2c_client *client, int power_state)
goto set_pwr_exit;
}
- if (ihid->quirks & I2C_HID_QUIRK_DELAY_AFTER_SLEEP &&
- power_state == I2C_HID_PWR_ON) {
- now = jiffies;
- if (time_after(ihid->sleep_delay, now)) {
- delay = jiffies_to_usecs(ihid->sleep_delay - now);
- usleep_range(delay, delay + 1);
- }
- }
-
ret = __i2c_hid_command(client, &hid_set_power_cmd, power_state,
0, NULL, 0, NULL, 0);
- if (ihid->quirks & I2C_HID_QUIRK_DELAY_AFTER_SLEEP &&
- power_state == I2C_HID_PWR_SLEEP)
- ihid->sleep_delay = jiffies + msecs_to_jiffies(20);
-
if (ret)
dev_err(&client->dev, "failed to change power setting.\n");
@@ -471,8 +447,12 @@ static int i2c_hid_hwreset(struct i2c_client *client)
if (ret) {
dev_err(&client->dev, "failed to reset device.\n");
i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
+ goto out_unlock;
}
+ /* At least some SIS devices need this after reset */
+ ret = i2c_hid_set_power(client, I2C_HID_PWR_ON);
+
out_unlock:
mutex_unlock(&ihid->reset_lock);
return ret;
@@ -791,11 +771,6 @@ static int i2c_hid_open(struct hid_device *hid)
{
struct i2c_client *client = hid->driver_data;
struct i2c_hid *ihid = i2c_get_clientdata(client);
- int ret = 0;
-
- ret = pm_runtime_get_sync(&client->dev);
- if (ret < 0)
- return ret;
set_bit(I2C_HID_STARTED, &ihid->flags);
return 0;
@@ -807,27 +782,6 @@ static void i2c_hid_close(struct hid_device *hid)
struct i2c_hid *ihid = i2c_get_clientdata(client);
clear_bit(I2C_HID_STARTED, &ihid->flags);
-
- /* Save some power */
- pm_runtime_put(&client->dev);
-}
-
-static int i2c_hid_power(struct hid_device *hid, int lvl)
-{
- struct i2c_client *client = hid->driver_data;
- struct i2c_hid *ihid = i2c_get_clientdata(client);
-
- i2c_hid_dbg(ihid, "%s lvl:%d\n", __func__, lvl);
-
- switch (lvl) {
- case PM_HINT_FULLON:
- pm_runtime_get_sync(&client->dev);
- break;
- case PM_HINT_NORMAL:
- pm_runtime_put(&client->dev);
- break;
- }
- return 0;
}
struct hid_ll_driver i2c_hid_ll_driver = {
@@ -836,7 +790,6 @@ struct hid_ll_driver i2c_hid_ll_driver = {
.stop = i2c_hid_stop,
.open = i2c_hid_open,
.close = i2c_hid_close,
- .power = i2c_hid_power,
.output_report = i2c_hid_output_report,
.raw_request = i2c_hid_raw_request,
};
@@ -1104,9 +1057,6 @@ static int i2c_hid_probe(struct i2c_client *client,
i2c_hid_acpi_fix_up_power(&client->dev);
- pm_runtime_get_noresume(&client->dev);
- pm_runtime_set_active(&client->dev);
- pm_runtime_enable(&client->dev);
device_enable_async_suspend(&client->dev);
/* Make sure there is something at this address */
@@ -1114,16 +1064,16 @@ static int i2c_hid_probe(struct i2c_client *client,
if (ret < 0) {
dev_dbg(&client->dev, "nothing at this address: %d\n", ret);
ret = -ENXIO;
- goto err_pm;
+ goto err_regulator;
}
ret = i2c_hid_fetch_hid_descriptor(ihid);
if (ret < 0)
- goto err_pm;
+ goto err_regulator;
ret = i2c_hid_init_irq(client);
if (ret < 0)
- goto err_pm;
+ goto err_regulator;
hid = hid_allocate_device();
if (IS_ERR(hid)) {
@@ -1154,9 +1104,6 @@ static int i2c_hid_probe(struct i2c_client *client,
goto err_mem_free;
}
- if (!(ihid->quirks & I2C_HID_QUIRK_NO_RUNTIME_PM))
- pm_runtime_put(&client->dev);
-
return 0;
err_mem_free:
@@ -1165,10 +1112,6 @@ err_mem_free:
err_irq:
free_irq(client->irq, ihid);
-err_pm:
- pm_runtime_put_noidle(&client->dev);
- pm_runtime_disable(&client->dev);
-
err_regulator:
regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
ihid->pdata.supplies);
@@ -1181,12 +1124,6 @@ static int i2c_hid_remove(struct i2c_client *client)
struct i2c_hid *ihid = i2c_get_clientdata(client);
struct hid_device *hid;
- if (!(ihid->quirks & I2C_HID_QUIRK_NO_RUNTIME_PM))
- pm_runtime_get_sync(&client->dev);
- pm_runtime_disable(&client->dev);
- pm_runtime_set_suspended(&client->dev);
- pm_runtime_put_noidle(&client->dev);
-
hid = ihid->hid;
hid_destroy_device(hid);
@@ -1219,25 +1156,15 @@ static int i2c_hid_suspend(struct device *dev)
int wake_status;
if (hid->driver && hid->driver->suspend) {
- /*
- * Wake up the device so that IO issues in
- * HID driver's suspend code can succeed.
- */
- ret = pm_runtime_resume(dev);
- if (ret < 0)
- return ret;
-
ret = hid->driver->suspend(hid, PMSG_SUSPEND);
if (ret < 0)
return ret;
}
- if (!pm_runtime_suspended(dev)) {
- /* Save some power */
- i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
+ /* Save some power */
+ i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
- disable_irq(client->irq);
- }
+ disable_irq(client->irq);
if (device_may_wakeup(&client->dev)) {
wake_status = enable_irq_wake(client->irq);
@@ -1279,11 +1206,6 @@ static int i2c_hid_resume(struct device *dev)
wake_status);
}
- /* We'll resume to full power */
- pm_runtime_disable(dev);
- pm_runtime_set_active(dev);
- pm_runtime_enable(dev);
-
enable_irq(client->irq);
/* Instead of resetting device, simply powers the device on. This
@@ -1304,30 +1226,8 @@ static int i2c_hid_resume(struct device *dev)
}
#endif
-#ifdef CONFIG_PM
-static int i2c_hid_runtime_suspend(struct device *dev)
-{
- struct i2c_client *client = to_i2c_client(dev);
-
- i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
- disable_irq(client->irq);
- return 0;
-}
-
-static int i2c_hid_runtime_resume(struct device *dev)
-{
- struct i2c_client *client = to_i2c_client(dev);
-
- enable_irq(client->irq);
- i2c_hid_set_power(client, I2C_HID_PWR_ON);
- return 0;
-}
-#endif
-
static const struct dev_pm_ops i2c_hid_pm = {
SET_SYSTEM_SLEEP_PM_OPS(i2c_hid_suspend, i2c_hid_resume)
- SET_RUNTIME_PM_OPS(i2c_hid_runtime_suspend, i2c_hid_runtime_resume,
- NULL)
};
static const struct i2c_device_id i2c_hid_id_table[] = {
diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
index 75078c83be1a..d31ea82b84c1 100644
--- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
+++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c
@@ -323,6 +323,25 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = {
.driver_data = (void *)&sipodev_desc
},
{
+ /*
+ * There are at least 2 Primebook C11B versions, the older
+ * version has a product-name of "Primebook C11B", and a
+ * bios version / release / firmware revision of:
+ * V2.1.2 / 05/03/2018 / 18.2
+ * The new version has "PRIMEBOOK C11B" as product-name and a
+ * bios version / release / firmware revision of:
+ * CFALKSW05_BIOS_V1.1.2 / 11/19/2018 / 19.2
+ * Only the older version needs this quirk, note the newer
+ * version will not match as it has a different product-name.
+ */
+ .ident = "Trekstor Primebook C11B",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Primebook C11B"),
+ },
+ .driver_data = (void *)&sipodev_desc
+ },
+ {
.ident = "Direkt-Tek DTLAPY116-2",
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Direkt-Tek"),
diff --git a/drivers/hid/intel-ish-hid/ishtp/client-buffers.c b/drivers/hid/intel-ish-hid/ishtp/client-buffers.c
index 1b0a0cc605e7..513d7a4a1b8a 100644
--- a/drivers/hid/intel-ish-hid/ishtp/client-buffers.c
+++ b/drivers/hid/intel-ish-hid/ishtp/client-buffers.c
@@ -84,7 +84,7 @@ int ishtp_cl_alloc_tx_ring(struct ishtp_cl *cl)
return 0;
out:
dev_err(&cl->device->dev, "error in allocating Tx pool\n");
- ishtp_cl_free_rx_ring(cl);
+ ishtp_cl_free_tx_ring(cl);
return -ENOMEM;
}
diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h
index 4a7f8d363220..203d27d198b8 100644
--- a/drivers/hid/wacom.h
+++ b/drivers/hid/wacom.h
@@ -202,6 +202,21 @@ static inline void wacom_schedule_work(struct wacom_wac *wacom_wac,
}
}
+/*
+ * Convert a signed 32-bit integer to an unsigned n-bit integer. Undoes
+ * the normally-helpful work of 'hid_snto32' for fields that use signed
+ * ranges for questionable reasons.
+ */
+static inline __u32 wacom_s32tou(s32 value, __u8 n)
+{
+ switch (n) {
+ case 8: return ((__u8)value);
+ case 16: return ((__u16)value);
+ case 32: return ((__u32)value);
+ }
+ return value & (1 << (n - 1)) ? value & (~(~0U << n)) : value;
+}
+
extern const struct hid_device_id wacom_ids[];
void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len);
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index 2b0a5b8ca6e6..ccb74529bc78 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2303,7 +2303,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
case HID_DG_TOOLSERIALNUMBER:
if (value) {
wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL);
- wacom_wac->serial[0] |= (__u32)value;
+ wacom_wac->serial[0] |= wacom_s32tou(value, field->report_size);
}
return;
case HID_DG_TWIST:
@@ -2319,15 +2319,17 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
return;
case WACOM_HID_WD_SERIALHI:
if (value) {
+ __u32 raw_value = wacom_s32tou(value, field->report_size);
+
wacom_wac->serial[0] = (wacom_wac->serial[0] & 0xFFFFFFFF);
- wacom_wac->serial[0] |= ((__u64)value) << 32;
+ wacom_wac->serial[0] |= ((__u64)raw_value) << 32;
/*
* Non-USI EMR devices may contain additional tool type
* information here. See WACOM_HID_WD_TOOLTYPE case for
* more details.
*/
if (value >> 20 == 1) {
- wacom_wac->id[0] |= value & 0xFFFFF;
+ wacom_wac->id[0] |= raw_value & 0xFFFFF;
}
}
return;
@@ -2339,7 +2341,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
* bitwise OR so the complete value can be built
* up over time :(
*/
- wacom_wac->id[0] |= value;
+ wacom_wac->id[0] |= wacom_s32tou(value, field->report_size);
return;
case WACOM_HID_WD_OFFSETLEFT:
if (features->offset_left && value != features->offset_left)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 391f0b225c9a..53a60c81e220 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -912,6 +912,7 @@ static void vmbus_shutdown(struct device *child_device)
drv->shutdown(dev);
}
+#ifdef CONFIG_PM_SLEEP
/*
* vmbus_suspend - Suspend a vmbus device
*/
@@ -949,6 +950,7 @@ static int vmbus_resume(struct device *child_device)
return drv->resume(dev);
}
+#endif /* CONFIG_PM_SLEEP */
/*
* vmbus_device_release - Final callback release of the vmbus child device
@@ -1070,6 +1072,7 @@ msg_handled:
vmbus_signal_eom(msg, message_type);
}
+#ifdef CONFIG_PM_SLEEP
/*
* Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
* hibernation, because hv_sock connections can not persist across hibernation.
@@ -1105,6 +1108,7 @@ static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
vmbus_connection.work_queue,
&ctx->work);
}
+#endif /* CONFIG_PM_SLEEP */
/*
* Direct callback for channels using other deferred processing
@@ -2125,6 +2129,7 @@ acpi_walk_err:
return ret_val;
}
+#ifdef CONFIG_PM_SLEEP
static int vmbus_bus_suspend(struct device *dev)
{
struct vmbus_channel *channel, *sc;
@@ -2247,6 +2252,7 @@ static int vmbus_bus_resume(struct device *dev)
return 0;
}
+#endif /* CONFIG_PM_SLEEP */
static const struct acpi_device_id vmbus_acpi_device_ids[] = {
{"VMBUS", 0},
diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c
index 0037e2bdacd6..8a51dcf055ea 100644
--- a/drivers/hwmon/ina3221.c
+++ b/drivers/hwmon/ina3221.c
@@ -170,7 +170,7 @@ static inline int ina3221_wait_for_data(struct ina3221_data *ina)
/* Polling the CVRF bit to make sure read data is ready */
return regmap_field_read_poll_timeout(ina->fields[F_CVRF],
- cvrf, cvrf, wait, 100000);
+ cvrf, cvrf, wait, wait * 2);
}
static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg,
diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c
index 95b447cfa24c..281c81edabc6 100644
--- a/drivers/hwmon/nct7904.c
+++ b/drivers/hwmon/nct7904.c
@@ -82,6 +82,10 @@
#define FANCTL1_FMR_REG 0x00 /* Bank 3; 1 reg per channel */
#define FANCTL1_OUT_REG 0x10 /* Bank 3; 1 reg per channel */
+#define VOLT_MONITOR_MODE 0x0
+#define THERMAL_DIODE_MODE 0x1
+#define THERMISTOR_MODE 0x3
+
#define ENABLE_TSI BIT(1)
static const unsigned short normal_i2c[] = {
@@ -99,6 +103,8 @@ struct nct7904_data {
u8 enable_dts;
u8 has_dts;
u8 temp_mode; /* 0: TR mode, 1: TD mode */
+ u8 fan_alarm[2];
+ u8 vsen_alarm[3];
};
/* Access functions */
@@ -214,7 +220,15 @@ static int nct7904_read_fan(struct device *dev, u32 attr, int channel,
SMI_STS5_REG + (channel >> 3));
if (ret < 0)
return ret;
- *val = (ret >> (channel & 0x07)) & 1;
+ if (!data->fan_alarm[channel >> 3])
+ data->fan_alarm[channel >> 3] = ret & 0xff;
+ else
+ /* If there is new alarm showing up */
+ data->fan_alarm[channel >> 3] |= (ret & 0xff);
+ *val = (data->fan_alarm[channel >> 3] >> (channel & 0x07)) & 1;
+ /* Needs to clean the alarm if alarm existing */
+ if (*val)
+ data->fan_alarm[channel >> 3] ^= 1 << (channel & 0x07);
return 0;
default:
return -EOPNOTSUPP;
@@ -298,7 +312,15 @@ static int nct7904_read_in(struct device *dev, u32 attr, int channel,
SMI_STS1_REG + (index >> 3));
if (ret < 0)
return ret;
- *val = (ret >> (index & 0x07)) & 1;
+ if (!data->vsen_alarm[index >> 3])
+ data->vsen_alarm[index >> 3] = ret & 0xff;
+ else
+ /* If there is new alarm showing up */
+ data->vsen_alarm[index >> 3] |= (ret & 0xff);
+ *val = (data->vsen_alarm[index >> 3] >> (index & 0x07)) & 1;
+ /* Needs to clean the alarm if alarm existing */
+ if (*val)
+ data->vsen_alarm[index >> 3] ^= 1 << (index & 0x07);
return 0;
default:
return -EOPNOTSUPP;
@@ -915,12 +937,20 @@ static int nct7904_probe(struct i2c_client *client,
data->temp_mode = 0;
for (i = 0; i < 4; i++) {
- val = (ret & (0x03 << i)) >> (i * 2);
+ val = (ret >> (i * 2)) & 0x03;
bit = (1 << i);
- if (val == 0)
+ if (val == VOLT_MONITOR_MODE) {
data->tcpu_mask &= ~bit;
- else if (val == 0x1 || val == 0x2)
+ } else if (val == THERMAL_DIODE_MODE && i < 2) {
data->temp_mode |= bit;
+ data->vsen_mask &= ~(0x06 << (i * 2));
+ } else if (val == THERMISTOR_MODE) {
+ data->vsen_mask &= ~(0x02 << (i * 2));
+ } else {
+ /* Reserved */
+ data->tcpu_mask &= ~bit;
+ data->vsen_mask &= ~(0x06 << (i * 2));
+ }
}
/* PECI */
diff --git a/drivers/hwtracing/intel_th/gth.c b/drivers/hwtracing/intel_th/gth.c
index fa9d34af87ac..f72803a02391 100644
--- a/drivers/hwtracing/intel_th/gth.c
+++ b/drivers/hwtracing/intel_th/gth.c
@@ -626,6 +626,9 @@ static void intel_th_gth_switch(struct intel_th_device *thdev,
if (!count)
dev_dbg(&thdev->dev, "timeout waiting for CTS Trigger\n");
+ /* De-assert the trigger */
+ iowrite32(0, gth->base + REG_CTS_CTL);
+
intel_th_gth_stop(gth, output, false);
intel_th_gth_start(gth, output);
}
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index fc9f15f36ad4..6d240dfae9d9 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -164,7 +164,7 @@ struct msc {
};
static LIST_HEAD(msu_buffer_list);
-static struct mutex msu_buffer_mutex;
+static DEFINE_MUTEX(msu_buffer_mutex);
/**
* struct msu_buffer_entry - internal MSU buffer bookkeeping
@@ -327,7 +327,7 @@ static size_t msc_win_total_sz(struct msc_window *win)
struct msc_block_desc *bdesc = sg_virt(sg);
if (msc_block_wrapped(bdesc))
- return win->nr_blocks << PAGE_SHIFT;
+ return (size_t)win->nr_blocks << PAGE_SHIFT;
size += msc_total_sz(bdesc);
if (msc_block_last_written(bdesc))
@@ -1848,9 +1848,14 @@ mode_store(struct device *dev, struct device_attribute *attr, const char *buf,
len = cp - buf;
mode = kstrndup(buf, len, GFP_KERNEL);
+ if (!mode)
+ return -ENOMEM;
+
i = match_string(msc_mode, ARRAY_SIZE(msc_mode), mode);
- if (i >= 0)
+ if (i >= 0) {
+ kfree(mode);
goto found;
+ }
/* Buffer sinks only work with a usable IRQ */
if (!msc->do_irq) {
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index 91dfeba62485..03ca5b1bef9f 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -200,6 +200,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
.driver_data = (kernel_ulong_t)&intel_th_2x,
},
{
+ /* Comet Lake PCH */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x06a6),
+ .driver_data = (kernel_ulong_t)&intel_th_2x,
+ },
+ {
/* Ice Lake NNPI */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5),
.driver_data = (kernel_ulong_t)&intel_th_2x,
@@ -209,6 +214,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6),
.driver_data = (kernel_ulong_t)&intel_th_2x,
},
+ {
+ /* Jasper Lake PCH */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6),
+ .driver_data = (kernel_ulong_t)&intel_th_2x,
+ },
{ 0 },
};
diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c
index fa66951b05d0..7b098ff5f5dd 100644
--- a/drivers/i2c/busses/i2c-aspeed.c
+++ b/drivers/i2c/busses/i2c-aspeed.c
@@ -108,6 +108,12 @@
#define ASPEED_I2CD_S_TX_CMD BIT(2)
#define ASPEED_I2CD_M_TX_CMD BIT(1)
#define ASPEED_I2CD_M_START_CMD BIT(0)
+#define ASPEED_I2CD_MASTER_CMDS_MASK \
+ (ASPEED_I2CD_M_STOP_CMD | \
+ ASPEED_I2CD_M_S_RX_CMD_LAST | \
+ ASPEED_I2CD_M_RX_CMD | \
+ ASPEED_I2CD_M_TX_CMD | \
+ ASPEED_I2CD_M_START_CMD)
/* 0x18 : I2CD Slave Device Address Register */
#define ASPEED_I2CD_DEV_ADDR_MASK GENMASK(6, 0)
@@ -336,18 +342,19 @@ static void aspeed_i2c_do_start(struct aspeed_i2c_bus *bus)
struct i2c_msg *msg = &bus->msgs[bus->msgs_index];
u8 slave_addr = i2c_8bit_addr_from_msg(msg);
- bus->master_state = ASPEED_I2C_MASTER_START;
-
#if IS_ENABLED(CONFIG_I2C_SLAVE)
/*
* If it's requested in the middle of a slave session, set the master
* state to 'pending' then H/W will continue handling this master
* command when the bus comes back to the idle state.
*/
- if (bus->slave_state != ASPEED_I2C_SLAVE_INACTIVE)
+ if (bus->slave_state != ASPEED_I2C_SLAVE_INACTIVE) {
bus->master_state = ASPEED_I2C_MASTER_PENDING;
+ return;
+ }
#endif /* CONFIG_I2C_SLAVE */
+ bus->master_state = ASPEED_I2C_MASTER_START;
bus->buf_index = 0;
if (msg->flags & I2C_M_RD) {
@@ -422,20 +429,6 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
}
}
-#if IS_ENABLED(CONFIG_I2C_SLAVE)
- /*
- * A pending master command will be started by H/W when the bus comes
- * back to idle state after completing a slave operation so change the
- * master state from 'pending' to 'start' at here if slave is inactive.
- */
- if (bus->master_state == ASPEED_I2C_MASTER_PENDING) {
- if (bus->slave_state != ASPEED_I2C_SLAVE_INACTIVE)
- goto out_no_complete;
-
- bus->master_state = ASPEED_I2C_MASTER_START;
- }
-#endif /* CONFIG_I2C_SLAVE */
-
/* Master is not currently active, irq was for someone else. */
if (bus->master_state == ASPEED_I2C_MASTER_INACTIVE ||
bus->master_state == ASPEED_I2C_MASTER_PENDING)
@@ -462,11 +455,15 @@ static u32 aspeed_i2c_master_irq(struct aspeed_i2c_bus *bus, u32 irq_status)
#if IS_ENABLED(CONFIG_I2C_SLAVE)
/*
* If a peer master starts a xfer immediately after it queues a
- * master command, change its state to 'pending' then H/W will
- * continue the queued master xfer just after completing the
- * slave mode session.
+ * master command, clear the queued master command and change
+ * its state to 'pending'. To simplify handling of pending
+ * cases, it uses S/W solution instead of H/W command queue
+ * handling.
*/
if (unlikely(irq_status & ASPEED_I2CD_INTR_SLAVE_MATCH)) {
+ writel(readl(bus->base + ASPEED_I2C_CMD_REG) &
+ ~ASPEED_I2CD_MASTER_CMDS_MASK,
+ bus->base + ASPEED_I2C_CMD_REG);
bus->master_state = ASPEED_I2C_MASTER_PENDING;
dev_dbg(bus->dev,
"master goes pending due to a slave start\n");
@@ -629,6 +626,14 @@ static irqreturn_t aspeed_i2c_bus_irq(int irq, void *dev_id)
irq_handled |= aspeed_i2c_master_irq(bus,
irq_remaining);
}
+
+ /*
+ * Start a pending master command at here if a slave operation is
+ * completed.
+ */
+ if (bus->master_state == ASPEED_I2C_MASTER_PENDING &&
+ bus->slave_state == ASPEED_I2C_SLAVE_INACTIVE)
+ aspeed_i2c_do_start(bus);
#else
irq_handled = aspeed_i2c_master_irq(bus, irq_remaining);
#endif /* CONFIG_I2C_SLAVE */
@@ -691,6 +696,15 @@ static int aspeed_i2c_master_xfer(struct i2c_adapter *adap,
ASPEED_I2CD_BUS_BUSY_STS))
aspeed_i2c_recover_bus(bus);
+ /*
+ * If timed out and the state is still pending, drop the pending
+ * master command.
+ */
+ spin_lock_irqsave(&bus->lock, flags);
+ if (bus->master_state == ASPEED_I2C_MASTER_PENDING)
+ bus->master_state = ASPEED_I2C_MASTER_INACTIVE;
+ spin_unlock_irqrestore(&bus->lock, flags);
+
return -ETIMEDOUT;
}
diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index 29eae1bf4f86..2152ec5f535c 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -875,7 +875,7 @@ static irqreturn_t mtk_i2c_irq(int irqno, void *dev_id)
static u32 mtk_i2c_functionality(struct i2c_adapter *adap)
{
- if (adap->quirks->flags & I2C_AQ_NO_ZERO_LEN)
+ if (i2c_check_quirks(adap, I2C_AQ_NO_ZERO_LEN))
return I2C_FUNC_I2C |
(I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK);
else
diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c
index d36cf08461f7..b24e7b937f21 100644
--- a/drivers/i2c/busses/i2c-stm32f7.c
+++ b/drivers/i2c/busses/i2c-stm32f7.c
@@ -305,7 +305,7 @@ struct stm32f7_i2c_dev {
struct regmap *regmap;
};
-/**
+/*
* All these values are coming from I2C Specification, Version 6.0, 4th of
* April 2014.
*
@@ -1192,6 +1192,8 @@ static void stm32f7_i2c_slave_start(struct stm32f7_i2c_dev *i2c_dev)
STM32F7_I2C_CR1_TXIE;
stm32f7_i2c_set_bits(base + STM32F7_I2C_CR1, mask);
+ /* Write 1st data byte */
+ writel_relaxed(value, base + STM32F7_I2C_TXDR);
} else {
/* Notify i2c slave that new write transfer is starting */
i2c_slave_event(slave, I2C_SLAVE_WRITE_REQUESTED, &value);
@@ -1501,7 +1503,7 @@ static irqreturn_t stm32f7_i2c_isr_error(int irq, void *data)
void __iomem *base = i2c_dev->base;
struct device *dev = i2c_dev->dev;
struct stm32_i2c_dma *dma = i2c_dev->dma;
- u32 mask, status;
+ u32 status;
status = readl_relaxed(i2c_dev->base + STM32F7_I2C_ISR);
@@ -1526,12 +1528,15 @@ static irqreturn_t stm32f7_i2c_isr_error(int irq, void *data)
f7_msg->result = -EINVAL;
}
- /* Disable interrupts */
- if (stm32f7_i2c_is_slave_registered(i2c_dev))
- mask = STM32F7_I2C_XFER_IRQ_MASK;
- else
- mask = STM32F7_I2C_ALL_IRQ_MASK;
- stm32f7_i2c_disable_irq(i2c_dev, mask);
+ if (!i2c_dev->slave_running) {
+ u32 mask;
+ /* Disable interrupts */
+ if (stm32f7_i2c_is_slave_registered(i2c_dev))
+ mask = STM32F7_I2C_XFER_IRQ_MASK;
+ else
+ mask = STM32F7_I2C_ALL_IRQ_MASK;
+ stm32f7_i2c_disable_irq(i2c_dev, mask);
+ }
/* Disable dma */
if (i2c_dev->use_dma) {
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 9cb2aa1e20ef..62a1c92ab803 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -39,6 +39,7 @@ struct i2c_acpi_lookup {
int index;
u32 speed;
u32 min_speed;
+ u32 force_speed;
};
/**
@@ -285,6 +286,19 @@ i2c_acpi_match_device(const struct acpi_device_id *matches,
return acpi_match_device(matches, &client->dev);
}
+static const struct acpi_device_id i2c_acpi_force_400khz_device_ids[] = {
+ /*
+ * These Silead touchscreen controllers only work at 400KHz, for
+ * some reason they do not work at 100KHz. On some devices the ACPI
+ * tables list another device at their bus as only being capable
+ * of 100KHz, testing has shown that these other devices work fine
+ * at 400KHz (as can be expected of any recent i2c hw) so we force
+ * the speed of the bus to 400 KHz if a Silead device is present.
+ */
+ { "MSSL1680", 0 },
+ {}
+};
+
static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level,
void *data, void **return_value)
{
@@ -303,6 +317,9 @@ static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level,
if (lookup->speed <= lookup->min_speed)
lookup->min_speed = lookup->speed;
+ if (acpi_match_device_ids(adev, i2c_acpi_force_400khz_device_ids) == 0)
+ lookup->force_speed = 400000;
+
return AE_OK;
}
@@ -340,7 +357,16 @@ u32 i2c_acpi_find_bus_speed(struct device *dev)
return 0;
}
- return lookup.min_speed != UINT_MAX ? lookup.min_speed : 0;
+ if (lookup.force_speed) {
+ if (lookup.force_speed != lookup.min_speed)
+ dev_warn(dev, FW_BUG "DSDT uses known not-working I2C bus speed %d, forcing it to %d\n",
+ lookup.min_speed, lookup.force_speed);
+ return lookup.force_speed;
+ } else if (lookup.min_speed != UINT_MAX) {
+ return lookup.min_speed;
+ } else {
+ return 0;
+ }
}
EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed);
diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c
index 6f632d543fcc..7eb41990bd6d 100644
--- a/drivers/i2c/i2c-core-of.c
+++ b/drivers/i2c/i2c-core-of.c
@@ -245,14 +245,14 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action,
}
client = of_i2c_register_device(adap, rd->dn);
- put_device(&adap->dev);
-
if (IS_ERR(client)) {
dev_err(&adap->dev, "failed to create client for '%pOF'\n",
rd->dn);
+ put_device(&adap->dev);
of_node_clear_flag(rd->dn, OF_POPULATED);
return notifier_from_errno(PTR_ERR(client));
}
+ put_device(&adap->dev);
break;
case OF_RECONFIG_CHANGE_REMOVE:
/* already depopulated? */
diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c
index 055227cb3d43..67b8817995c0 100644
--- a/drivers/iio/accel/adxl372.c
+++ b/drivers/iio/accel/adxl372.c
@@ -474,12 +474,17 @@ static int adxl372_configure_fifo(struct adxl372_state *st)
if (ret < 0)
return ret;
- fifo_samples = st->watermark & 0xFF;
+ /*
+ * watermark stores the number of sets; we need to write the FIFO
+ * registers with the number of samples
+ */
+ fifo_samples = (st->watermark * st->fifo_set_size);
fifo_ctl = ADXL372_FIFO_CTL_FORMAT_MODE(st->fifo_format) |
ADXL372_FIFO_CTL_MODE_MODE(st->fifo_mode) |
- ADXL372_FIFO_CTL_SAMPLES_MODE(st->watermark);
+ ADXL372_FIFO_CTL_SAMPLES_MODE(fifo_samples);
- ret = regmap_write(st->regmap, ADXL372_FIFO_SAMPLES, fifo_samples);
+ ret = regmap_write(st->regmap,
+ ADXL372_FIFO_SAMPLES, fifo_samples & 0xFF);
if (ret < 0)
return ret;
@@ -548,8 +553,7 @@ static irqreturn_t adxl372_trigger_handler(int irq, void *p)
goto err;
/* Each sample is 2 bytes */
- for (i = 0; i < fifo_entries * sizeof(u16);
- i += st->fifo_set_size * sizeof(u16))
+ for (i = 0; i < fifo_entries; i += st->fifo_set_size)
iio_push_to_buffers(indio_dev, &st->fifo_buf[i]);
}
err:
@@ -571,6 +575,14 @@ static int adxl372_setup(struct adxl372_state *st)
return -ENODEV;
}
+ /*
+ * Perform a software reset to make sure the device is in a consistent
+ * state after start up.
+ */
+ ret = regmap_write(st->regmap, ADXL372_RESET, ADXL372_RESET_CODE);
+ if (ret < 0)
+ return ret;
+
ret = adxl372_set_op_mode(st, ADXL372_STANDBY);
if (ret < 0)
return ret;
diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index cf6c0e3a83d3..121b4e89f038 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -117,7 +117,7 @@
#define BMC150_ACCEL_SLEEP_1_SEC 0x0F
#define BMC150_ACCEL_REG_TEMP 0x08
-#define BMC150_ACCEL_TEMP_CENTER_VAL 24
+#define BMC150_ACCEL_TEMP_CENTER_VAL 23
#define BMC150_ACCEL_AXIS_TO_REG(axis) (BMC150_ACCEL_REG_XOUT_L + (axis * 2))
#define BMC150_AUTO_SUSPEND_DELAY_MS 2000
diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c
index 5a3ca5904ded..f658012baad8 100644
--- a/drivers/iio/adc/ad799x.c
+++ b/drivers/iio/adc/ad799x.c
@@ -810,10 +810,10 @@ static int ad799x_probe(struct i2c_client *client,
ret = ad799x_write_config(st, st->chip_config->default_config);
if (ret < 0)
- goto error_disable_reg;
+ goto error_disable_vref;
ret = ad799x_read_config(st);
if (ret < 0)
- goto error_disable_reg;
+ goto error_disable_vref;
st->config = ret;
ret = iio_triggered_buffer_setup(indio_dev, NULL,
diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c
index adc9cf7a075d..8ea2aed6d6f5 100644
--- a/drivers/iio/adc/axp288_adc.c
+++ b/drivers/iio/adc/axp288_adc.c
@@ -7,6 +7,7 @@
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
+#include <linux/dmi.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/device.h>
@@ -25,6 +26,11 @@
#define AXP288_ADC_EN_MASK 0xF0
#define AXP288_ADC_TS_ENABLE 0x01
+#define AXP288_ADC_TS_BIAS_MASK GENMASK(5, 4)
+#define AXP288_ADC_TS_BIAS_20UA (0 << 4)
+#define AXP288_ADC_TS_BIAS_40UA (1 << 4)
+#define AXP288_ADC_TS_BIAS_60UA (2 << 4)
+#define AXP288_ADC_TS_BIAS_80UA (3 << 4)
#define AXP288_ADC_TS_CURRENT_ON_OFF_MASK GENMASK(1, 0)
#define AXP288_ADC_TS_CURRENT_OFF (0 << 0)
#define AXP288_ADC_TS_CURRENT_ON_WHEN_CHARGING (1 << 0)
@@ -177,10 +183,36 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev,
return ret;
}
+/*
+ * We rely on the machine's firmware to correctly setup the TS pin bias current
+ * at boot. This lists systems with broken fw where we need to set it ourselves.
+ */
+static const struct dmi_system_id axp288_adc_ts_bias_override[] = {
+ {
+ /* Lenovo Ideapad 100S (11 inch) */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad 100S-11IBY"),
+ },
+ .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA,
+ },
+ {}
+};
+
static int axp288_adc_initialize(struct axp288_adc_info *info)
{
+ const struct dmi_system_id *bias_override;
int ret, adc_enable_val;
+ bias_override = dmi_first_match(axp288_adc_ts_bias_override);
+ if (bias_override) {
+ ret = regmap_update_bits(info->regmap, AXP288_ADC_TS_PIN_CTRL,
+ AXP288_ADC_TS_BIAS_MASK,
+ (uintptr_t)bias_override->driver_data);
+ if (ret)
+ return ret;
+ }
+
/*
* Determine if the TS pin is enabled and set the TS current-source
* accordingly.
diff --git a/drivers/iio/adc/hx711.c b/drivers/iio/adc/hx711.c
index 88c7fe15003b..62e6c8badd22 100644
--- a/drivers/iio/adc/hx711.c
+++ b/drivers/iio/adc/hx711.c
@@ -100,14 +100,14 @@ struct hx711_data {
static int hx711_cycle(struct hx711_data *hx711_data)
{
- int val;
+ unsigned long flags;
/*
* if preempted for more then 60us while PD_SCK is high:
* hx711 is going in reset
* ==> measuring is false
*/
- preempt_disable();
+ local_irq_save(flags);
gpiod_set_value(hx711_data->gpiod_pd_sck, 1);
/*
@@ -117,7 +117,6 @@ static int hx711_cycle(struct hx711_data *hx711_data)
*/
ndelay(hx711_data->data_ready_delay_ns);
- val = gpiod_get_value(hx711_data->gpiod_dout);
/*
* here we are not waiting for 0.2 us as suggested by the datasheet,
* because the oscilloscope showed in a test scenario
@@ -125,7 +124,7 @@ static int hx711_cycle(struct hx711_data *hx711_data)
* and 0.56 us for PD_SCK low on TI Sitara with 800 MHz
*/
gpiod_set_value(hx711_data->gpiod_pd_sck, 0);
- preempt_enable();
+ local_irq_restore(flags);
/*
* make it a square wave for addressing cases with capacitance on
@@ -133,7 +132,8 @@ static int hx711_cycle(struct hx711_data *hx711_data)
*/
ndelay(hx711_data->data_ready_delay_ns);
- return val;
+ /* sample as late as possible */
+ return gpiod_get_value(hx711_data->gpiod_dout);
}
static int hx711_read(struct hx711_data *hx711_data)
diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c
index 7b28d045d271..7b27306330a3 100644
--- a/drivers/iio/adc/meson_saradc.c
+++ b/drivers/iio/adc/meson_saradc.c
@@ -1219,6 +1219,11 @@ static int meson_sar_adc_probe(struct platform_device *pdev)
if (IS_ERR(base))
return PTR_ERR(base);
+ priv->regmap = devm_regmap_init_mmio(&pdev->dev, base,
+ priv->param->regmap_config);
+ if (IS_ERR(priv->regmap))
+ return PTR_ERR(priv->regmap);
+
irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
if (!irq)
return -EINVAL;
@@ -1228,11 +1233,6 @@ static int meson_sar_adc_probe(struct platform_device *pdev)
if (ret)
return ret;
- priv->regmap = devm_regmap_init_mmio(&pdev->dev, base,
- priv->param->regmap_config);
- if (IS_ERR(priv->regmap))
- return PTR_ERR(priv->regmap);
-
priv->clkin = devm_clk_get(&pdev->dev, "clkin");
if (IS_ERR(priv->clkin)) {
dev_err(&pdev->dev, "failed to get clkin\n");
diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c
index 9b85fefc0a96..93a096a91f8c 100644
--- a/drivers/iio/adc/stm32-adc-core.c
+++ b/drivers/iio/adc/stm32-adc-core.c
@@ -24,33 +24,6 @@
#include "stm32-adc-core.h"
-/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */
-#define STM32F4_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00)
-#define STM32F4_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x04)
-
-/* STM32F4_ADC_CSR - bit fields */
-#define STM32F4_EOC3 BIT(17)
-#define STM32F4_EOC2 BIT(9)
-#define STM32F4_EOC1 BIT(1)
-
-/* STM32F4_ADC_CCR - bit fields */
-#define STM32F4_ADC_ADCPRE_SHIFT 16
-#define STM32F4_ADC_ADCPRE_MASK GENMASK(17, 16)
-
-/* STM32H7 - common registers for all ADC instances */
-#define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00)
-#define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08)
-
-/* STM32H7_ADC_CSR - bit fields */
-#define STM32H7_EOC_SLV BIT(18)
-#define STM32H7_EOC_MST BIT(2)
-
-/* STM32H7_ADC_CCR - bit fields */
-#define STM32H7_PRESC_SHIFT 18
-#define STM32H7_PRESC_MASK GENMASK(21, 18)
-#define STM32H7_CKMODE_SHIFT 16
-#define STM32H7_CKMODE_MASK GENMASK(17, 16)
-
#define STM32_ADC_CORE_SLEEP_DELAY_MS 2000
/* SYSCFG registers */
@@ -71,6 +44,8 @@
* @eoc1: adc1 end of conversion flag in @csr
* @eoc2: adc2 end of conversion flag in @csr
* @eoc3: adc3 end of conversion flag in @csr
+ * @ier: interrupt enable register offset for each adc
+ * @eocie_msk: end of conversion interrupt enable mask in @ier
*/
struct stm32_adc_common_regs {
u32 csr;
@@ -78,6 +53,8 @@ struct stm32_adc_common_regs {
u32 eoc1_msk;
u32 eoc2_msk;
u32 eoc3_msk;
+ u32 ier;
+ u32 eocie_msk;
};
struct stm32_adc_priv;
@@ -303,6 +280,8 @@ static const struct stm32_adc_common_regs stm32f4_adc_common_regs = {
.eoc1_msk = STM32F4_EOC1,
.eoc2_msk = STM32F4_EOC2,
.eoc3_msk = STM32F4_EOC3,
+ .ier = STM32F4_ADC_CR1,
+ .eocie_msk = STM32F4_EOCIE,
};
/* STM32H7 common registers definitions */
@@ -311,8 +290,24 @@ static const struct stm32_adc_common_regs stm32h7_adc_common_regs = {
.ccr = STM32H7_ADC_CCR,
.eoc1_msk = STM32H7_EOC_MST,
.eoc2_msk = STM32H7_EOC_SLV,
+ .ier = STM32H7_ADC_IER,
+ .eocie_msk = STM32H7_EOCIE,
+};
+
+static const unsigned int stm32_adc_offset[STM32_ADC_MAX_ADCS] = {
+ 0, STM32_ADC_OFFSET, STM32_ADC_OFFSET * 2,
};
+static unsigned int stm32_adc_eoc_enabled(struct stm32_adc_priv *priv,
+ unsigned int adc)
+{
+ u32 ier, offset = stm32_adc_offset[adc];
+
+ ier = readl_relaxed(priv->common.base + offset + priv->cfg->regs->ier);
+
+ return ier & priv->cfg->regs->eocie_msk;
+}
+
/* ADC common interrupt for all instances */
static void stm32_adc_irq_handler(struct irq_desc *desc)
{
@@ -323,13 +318,28 @@ static void stm32_adc_irq_handler(struct irq_desc *desc)
chained_irq_enter(chip, desc);
status = readl_relaxed(priv->common.base + priv->cfg->regs->csr);
- if (status & priv->cfg->regs->eoc1_msk)
+ /*
+ * End of conversion may be handled by using IRQ or DMA. There may be a
+ * race here when two conversions complete at the same time on several
+ * ADCs. EOC may be read 'set' for several ADCs, with:
+ * - an ADC configured to use DMA (EOC triggers the DMA request, and
+ * is then automatically cleared by DR read in hardware)
+ * - an ADC configured to use IRQs (EOCIE bit is set. The handler must
+ * be called in this case)
+ * So both EOC status bit in CSR and EOCIE control bit must be checked
+ * before invoking the interrupt handler (e.g. call ISR only for
+ * IRQ-enabled ADCs).
+ */
+ if (status & priv->cfg->regs->eoc1_msk &&
+ stm32_adc_eoc_enabled(priv, 0))
generic_handle_irq(irq_find_mapping(priv->domain, 0));
- if (status & priv->cfg->regs->eoc2_msk)
+ if (status & priv->cfg->regs->eoc2_msk &&
+ stm32_adc_eoc_enabled(priv, 1))
generic_handle_irq(irq_find_mapping(priv->domain, 1));
- if (status & priv->cfg->regs->eoc3_msk)
+ if (status & priv->cfg->regs->eoc3_msk &&
+ stm32_adc_eoc_enabled(priv, 2))
generic_handle_irq(irq_find_mapping(priv->domain, 2));
chained_irq_exit(chip, desc);
diff --git a/drivers/iio/adc/stm32-adc-core.h b/drivers/iio/adc/stm32-adc-core.h
index 8af507b3f32d..2579d514c2a3 100644
--- a/drivers/iio/adc/stm32-adc-core.h
+++ b/drivers/iio/adc/stm32-adc-core.h
@@ -25,8 +25,145 @@
* --------------------------------------------------------
*/
#define STM32_ADC_MAX_ADCS 3
+#define STM32_ADC_OFFSET 0x100
#define STM32_ADCX_COMN_OFFSET 0x300
+/* STM32F4 - Registers for each ADC instance */
+#define STM32F4_ADC_SR 0x00
+#define STM32F4_ADC_CR1 0x04
+#define STM32F4_ADC_CR2 0x08
+#define STM32F4_ADC_SMPR1 0x0C
+#define STM32F4_ADC_SMPR2 0x10
+#define STM32F4_ADC_HTR 0x24
+#define STM32F4_ADC_LTR 0x28
+#define STM32F4_ADC_SQR1 0x2C
+#define STM32F4_ADC_SQR2 0x30
+#define STM32F4_ADC_SQR3 0x34
+#define STM32F4_ADC_JSQR 0x38
+#define STM32F4_ADC_JDR1 0x3C
+#define STM32F4_ADC_JDR2 0x40
+#define STM32F4_ADC_JDR3 0x44
+#define STM32F4_ADC_JDR4 0x48
+#define STM32F4_ADC_DR 0x4C
+
+/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */
+#define STM32F4_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00)
+#define STM32F4_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x04)
+
+/* STM32F4_ADC_SR - bit fields */
+#define STM32F4_STRT BIT(4)
+#define STM32F4_EOC BIT(1)
+
+/* STM32F4_ADC_CR1 - bit fields */
+#define STM32F4_RES_SHIFT 24
+#define STM32F4_RES_MASK GENMASK(25, 24)
+#define STM32F4_SCAN BIT(8)
+#define STM32F4_EOCIE BIT(5)
+
+/* STM32F4_ADC_CR2 - bit fields */
+#define STM32F4_SWSTART BIT(30)
+#define STM32F4_EXTEN_SHIFT 28
+#define STM32F4_EXTEN_MASK GENMASK(29, 28)
+#define STM32F4_EXTSEL_SHIFT 24
+#define STM32F4_EXTSEL_MASK GENMASK(27, 24)
+#define STM32F4_EOCS BIT(10)
+#define STM32F4_DDS BIT(9)
+#define STM32F4_DMA BIT(8)
+#define STM32F4_ADON BIT(0)
+
+/* STM32F4_ADC_CSR - bit fields */
+#define STM32F4_EOC3 BIT(17)
+#define STM32F4_EOC2 BIT(9)
+#define STM32F4_EOC1 BIT(1)
+
+/* STM32F4_ADC_CCR - bit fields */
+#define STM32F4_ADC_ADCPRE_SHIFT 16
+#define STM32F4_ADC_ADCPRE_MASK GENMASK(17, 16)
+
+/* STM32H7 - Registers for each ADC instance */
+#define STM32H7_ADC_ISR 0x00
+#define STM32H7_ADC_IER 0x04
+#define STM32H7_ADC_CR 0x08
+#define STM32H7_ADC_CFGR 0x0C
+#define STM32H7_ADC_SMPR1 0x14
+#define STM32H7_ADC_SMPR2 0x18
+#define STM32H7_ADC_PCSEL 0x1C
+#define STM32H7_ADC_SQR1 0x30
+#define STM32H7_ADC_SQR2 0x34
+#define STM32H7_ADC_SQR3 0x38
+#define STM32H7_ADC_SQR4 0x3C
+#define STM32H7_ADC_DR 0x40
+#define STM32H7_ADC_DIFSEL 0xC0
+#define STM32H7_ADC_CALFACT 0xC4
+#define STM32H7_ADC_CALFACT2 0xC8
+
+/* STM32H7 - common registers for all ADC instances */
+#define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00)
+#define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08)
+
+/* STM32H7_ADC_ISR - bit fields */
+#define STM32MP1_VREGREADY BIT(12)
+#define STM32H7_EOC BIT(2)
+#define STM32H7_ADRDY BIT(0)
+
+/* STM32H7_ADC_IER - bit fields */
+#define STM32H7_EOCIE STM32H7_EOC
+
+/* STM32H7_ADC_CR - bit fields */
+#define STM32H7_ADCAL BIT(31)
+#define STM32H7_ADCALDIF BIT(30)
+#define STM32H7_DEEPPWD BIT(29)
+#define STM32H7_ADVREGEN BIT(28)
+#define STM32H7_LINCALRDYW6 BIT(27)
+#define STM32H7_LINCALRDYW5 BIT(26)
+#define STM32H7_LINCALRDYW4 BIT(25)
+#define STM32H7_LINCALRDYW3 BIT(24)
+#define STM32H7_LINCALRDYW2 BIT(23)
+#define STM32H7_LINCALRDYW1 BIT(22)
+#define STM32H7_ADCALLIN BIT(16)
+#define STM32H7_BOOST BIT(8)
+#define STM32H7_ADSTP BIT(4)
+#define STM32H7_ADSTART BIT(2)
+#define STM32H7_ADDIS BIT(1)
+#define STM32H7_ADEN BIT(0)
+
+/* STM32H7_ADC_CFGR bit fields */
+#define STM32H7_EXTEN_SHIFT 10
+#define STM32H7_EXTEN_MASK GENMASK(11, 10)
+#define STM32H7_EXTSEL_SHIFT 5
+#define STM32H7_EXTSEL_MASK GENMASK(9, 5)
+#define STM32H7_RES_SHIFT 2
+#define STM32H7_RES_MASK GENMASK(4, 2)
+#define STM32H7_DMNGT_SHIFT 0
+#define STM32H7_DMNGT_MASK GENMASK(1, 0)
+
+enum stm32h7_adc_dmngt {
+ STM32H7_DMNGT_DR_ONLY, /* Regular data in DR only */
+ STM32H7_DMNGT_DMA_ONESHOT, /* DMA one shot mode */
+ STM32H7_DMNGT_DFSDM, /* DFSDM mode */
+ STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */
+};
+
+/* STM32H7_ADC_CALFACT - bit fields */
+#define STM32H7_CALFACT_D_SHIFT 16
+#define STM32H7_CALFACT_D_MASK GENMASK(26, 16)
+#define STM32H7_CALFACT_S_SHIFT 0
+#define STM32H7_CALFACT_S_MASK GENMASK(10, 0)
+
+/* STM32H7_ADC_CALFACT2 - bit fields */
+#define STM32H7_LINCALFACT_SHIFT 0
+#define STM32H7_LINCALFACT_MASK GENMASK(29, 0)
+
+/* STM32H7_ADC_CSR - bit fields */
+#define STM32H7_EOC_SLV BIT(18)
+#define STM32H7_EOC_MST BIT(2)
+
+/* STM32H7_ADC_CCR - bit fields */
+#define STM32H7_PRESC_SHIFT 18
+#define STM32H7_PRESC_MASK GENMASK(21, 18)
+#define STM32H7_CKMODE_SHIFT 16
+#define STM32H7_CKMODE_MASK GENMASK(17, 16)
+
/**
* struct stm32_adc_common - stm32 ADC driver common data (for all instances)
* @base: control registers base cpu addr
diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c
index 6a7dd08b1e0b..73aee5949b6b 100644
--- a/drivers/iio/adc/stm32-adc.c
+++ b/drivers/iio/adc/stm32-adc.c
@@ -28,115 +28,6 @@
#include "stm32-adc-core.h"
-/* STM32F4 - Registers for each ADC instance */
-#define STM32F4_ADC_SR 0x00
-#define STM32F4_ADC_CR1 0x04
-#define STM32F4_ADC_CR2 0x08
-#define STM32F4_ADC_SMPR1 0x0C
-#define STM32F4_ADC_SMPR2 0x10
-#define STM32F4_ADC_HTR 0x24
-#define STM32F4_ADC_LTR 0x28
-#define STM32F4_ADC_SQR1 0x2C
-#define STM32F4_ADC_SQR2 0x30
-#define STM32F4_ADC_SQR3 0x34
-#define STM32F4_ADC_JSQR 0x38
-#define STM32F4_ADC_JDR1 0x3C
-#define STM32F4_ADC_JDR2 0x40
-#define STM32F4_ADC_JDR3 0x44
-#define STM32F4_ADC_JDR4 0x48
-#define STM32F4_ADC_DR 0x4C
-
-/* STM32F4_ADC_SR - bit fields */
-#define STM32F4_STRT BIT(4)
-#define STM32F4_EOC BIT(1)
-
-/* STM32F4_ADC_CR1 - bit fields */
-#define STM32F4_RES_SHIFT 24
-#define STM32F4_RES_MASK GENMASK(25, 24)
-#define STM32F4_SCAN BIT(8)
-#define STM32F4_EOCIE BIT(5)
-
-/* STM32F4_ADC_CR2 - bit fields */
-#define STM32F4_SWSTART BIT(30)
-#define STM32F4_EXTEN_SHIFT 28
-#define STM32F4_EXTEN_MASK GENMASK(29, 28)
-#define STM32F4_EXTSEL_SHIFT 24
-#define STM32F4_EXTSEL_MASK GENMASK(27, 24)
-#define STM32F4_EOCS BIT(10)
-#define STM32F4_DDS BIT(9)
-#define STM32F4_DMA BIT(8)
-#define STM32F4_ADON BIT(0)
-
-/* STM32H7 - Registers for each ADC instance */
-#define STM32H7_ADC_ISR 0x00
-#define STM32H7_ADC_IER 0x04
-#define STM32H7_ADC_CR 0x08
-#define STM32H7_ADC_CFGR 0x0C
-#define STM32H7_ADC_SMPR1 0x14
-#define STM32H7_ADC_SMPR2 0x18
-#define STM32H7_ADC_PCSEL 0x1C
-#define STM32H7_ADC_SQR1 0x30
-#define STM32H7_ADC_SQR2 0x34
-#define STM32H7_ADC_SQR3 0x38
-#define STM32H7_ADC_SQR4 0x3C
-#define STM32H7_ADC_DR 0x40
-#define STM32H7_ADC_DIFSEL 0xC0
-#define STM32H7_ADC_CALFACT 0xC4
-#define STM32H7_ADC_CALFACT2 0xC8
-
-/* STM32H7_ADC_ISR - bit fields */
-#define STM32MP1_VREGREADY BIT(12)
-#define STM32H7_EOC BIT(2)
-#define STM32H7_ADRDY BIT(0)
-
-/* STM32H7_ADC_IER - bit fields */
-#define STM32H7_EOCIE STM32H7_EOC
-
-/* STM32H7_ADC_CR - bit fields */
-#define STM32H7_ADCAL BIT(31)
-#define STM32H7_ADCALDIF BIT(30)
-#define STM32H7_DEEPPWD BIT(29)
-#define STM32H7_ADVREGEN BIT(28)
-#define STM32H7_LINCALRDYW6 BIT(27)
-#define STM32H7_LINCALRDYW5 BIT(26)
-#define STM32H7_LINCALRDYW4 BIT(25)
-#define STM32H7_LINCALRDYW3 BIT(24)
-#define STM32H7_LINCALRDYW2 BIT(23)
-#define STM32H7_LINCALRDYW1 BIT(22)
-#define STM32H7_ADCALLIN BIT(16)
-#define STM32H7_BOOST BIT(8)
-#define STM32H7_ADSTP BIT(4)
-#define STM32H7_ADSTART BIT(2)
-#define STM32H7_ADDIS BIT(1)
-#define STM32H7_ADEN BIT(0)
-
-/* STM32H7_ADC_CFGR bit fields */
-#define STM32H7_EXTEN_SHIFT 10
-#define STM32H7_EXTEN_MASK GENMASK(11, 10)
-#define STM32H7_EXTSEL_SHIFT 5
-#define STM32H7_EXTSEL_MASK GENMASK(9, 5)
-#define STM32H7_RES_SHIFT 2
-#define STM32H7_RES_MASK GENMASK(4, 2)
-#define STM32H7_DMNGT_SHIFT 0
-#define STM32H7_DMNGT_MASK GENMASK(1, 0)
-
-enum stm32h7_adc_dmngt {
- STM32H7_DMNGT_DR_ONLY, /* Regular data in DR only */
- STM32H7_DMNGT_DMA_ONESHOT, /* DMA one shot mode */
- STM32H7_DMNGT_DFSDM, /* DFSDM mode */
- STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */
-};
-
-/* STM32H7_ADC_CALFACT - bit fields */
-#define STM32H7_CALFACT_D_SHIFT 16
-#define STM32H7_CALFACT_D_MASK GENMASK(26, 16)
-#define STM32H7_CALFACT_S_SHIFT 0
-#define STM32H7_CALFACT_S_MASK GENMASK(10, 0)
-
-/* STM32H7_ADC_CALFACT2 - bit fields */
-#define STM32H7_LINCALFACT_SHIFT 0
-#define STM32H7_LINCALFACT_MASK GENMASK(29, 0)
-
/* Number of linear calibration shadow registers / LINCALRDYW control bits */
#define STM32H7_LINCALFACT_NUM 6
@@ -1508,7 +1399,7 @@ static int stm32_adc_dma_start(struct iio_dev *indio_dev)
cookie = dmaengine_submit(desc);
ret = dma_submit_error(cookie);
if (ret) {
- dmaengine_terminate_all(adc->dma_chan);
+ dmaengine_terminate_sync(adc->dma_chan);
return ret;
}
@@ -1586,7 +1477,7 @@ static void __stm32_adc_buffer_predisable(struct iio_dev *indio_dev)
stm32_adc_conv_irq_disable(adc);
if (adc->dma_chan)
- dmaengine_terminate_all(adc->dma_chan);
+ dmaengine_terminate_sync(adc->dma_chan);
if (stm32_adc_set_trig(indio_dev, NULL))
dev_err(&indio_dev->dev, "Can't clear trigger\n");
diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c
index b99d73887c9f..8743b2f376e2 100644
--- a/drivers/iio/imu/adis16480.c
+++ b/drivers/iio/imu/adis16480.c
@@ -317,8 +317,11 @@ static int adis16480_set_freq(struct iio_dev *indio_dev, int val, int val2)
struct adis16480 *st = iio_priv(indio_dev);
unsigned int t, reg;
+ if (val < 0 || val2 < 0)
+ return -EINVAL;
+
t = val * 1000 + val2 / 1000;
- if (t <= 0)
+ if (t == 0)
return -EINVAL;
/*
diff --git a/drivers/iio/imu/adis_buffer.c b/drivers/iio/imu/adis_buffer.c
index 9ac8356d9a95..4998a89d083d 100644
--- a/drivers/iio/imu/adis_buffer.c
+++ b/drivers/iio/imu/adis_buffer.c
@@ -35,8 +35,11 @@ static int adis_update_scan_mode_burst(struct iio_dev *indio_dev,
return -ENOMEM;
adis->buffer = kzalloc(burst_length + sizeof(u16), GFP_KERNEL);
- if (!adis->buffer)
+ if (!adis->buffer) {
+ kfree(adis->xfer);
+ adis->xfer = NULL;
return -ENOMEM;
+ }
tx = adis->buffer + burst_length;
tx[0] = ADIS_READ_REG(adis->burst->reg_cmd);
@@ -78,8 +81,11 @@ int adis_update_scan_mode(struct iio_dev *indio_dev,
return -ENOMEM;
adis->buffer = kcalloc(indio_dev->scan_bytes, 2, GFP_KERNEL);
- if (!adis->buffer)
+ if (!adis->buffer) {
+ kfree(adis->xfer);
+ adis->xfer = NULL;
return -ENOMEM;
+ }
rx = adis->buffer;
tx = rx + scan_count;
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
index b17f060b52fc..868281b8adb0 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
@@ -114,54 +114,63 @@ static const struct inv_mpu6050_hw hw_info[] = {
.name = "MPU6050",
.reg = &reg_set_6050,
.config = &chip_config_6050,
+ .fifo_size = 1024,
},
{
.whoami = INV_MPU6500_WHOAMI_VALUE,
.name = "MPU6500",
.reg = &reg_set_6500,
.config = &chip_config_6050,
+ .fifo_size = 512,
},
{
.whoami = INV_MPU6515_WHOAMI_VALUE,
.name = "MPU6515",
.reg = &reg_set_6500,
.config = &chip_config_6050,
+ .fifo_size = 512,
},
{
.whoami = INV_MPU6000_WHOAMI_VALUE,
.name = "MPU6000",
.reg = &reg_set_6050,
.config = &chip_config_6050,
+ .fifo_size = 1024,
},
{
.whoami = INV_MPU9150_WHOAMI_VALUE,
.name = "MPU9150",
.reg = &reg_set_6050,
.config = &chip_config_6050,
+ .fifo_size = 1024,
},
{
.whoami = INV_MPU9250_WHOAMI_VALUE,
.name = "MPU9250",
.reg = &reg_set_6500,
.config = &chip_config_6050,
+ .fifo_size = 512,
},
{
.whoami = INV_MPU9255_WHOAMI_VALUE,
.name = "MPU9255",
.reg = &reg_set_6500,
.config = &chip_config_6050,
+ .fifo_size = 512,
},
{
.whoami = INV_ICM20608_WHOAMI_VALUE,
.name = "ICM20608",
.reg = &reg_set_6500,
.config = &chip_config_6050,
+ .fifo_size = 512,
},
{
.whoami = INV_ICM20602_WHOAMI_VALUE,
.name = "ICM20602",
.reg = &reg_set_icm20602,
.config = &chip_config_6050,
+ .fifo_size = 1008,
},
};
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
index db1c6904388b..51235677c534 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
@@ -100,12 +100,14 @@ struct inv_mpu6050_chip_config {
* @name: name of the chip.
* @reg: register map of the chip.
* @config: configuration of the chip.
+ * @fifo_size: size of the FIFO in bytes.
*/
struct inv_mpu6050_hw {
u8 whoami;
u8 *name;
const struct inv_mpu6050_reg_map *reg;
const struct inv_mpu6050_chip_config *config;
+ size_t fifo_size;
};
/*
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
index 5f9a5de0bab4..72d8c5790076 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
@@ -180,9 +180,6 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p)
"failed to ack interrupt\n");
goto flush_fifo;
}
- /* handle fifo overflow by reseting fifo */
- if (int_status & INV_MPU6050_BIT_FIFO_OVERFLOW_INT)
- goto flush_fifo;
if (!(int_status & INV_MPU6050_BIT_RAW_DATA_RDY_INT)) {
dev_warn(regmap_get_device(st->map),
"spurious interrupt with status 0x%x\n", int_status);
@@ -211,6 +208,18 @@ irqreturn_t inv_mpu6050_read_fifo(int irq, void *p)
if (result)
goto end_session;
fifo_count = get_unaligned_be16(&data[0]);
+
+ /*
+ * Handle fifo overflow by resetting fifo.
+ * Reset if there is only 3 data set free remaining to mitigate
+ * possible delay between reading fifo count and fifo data.
+ */
+ nb = 3 * bytes_per_datum;
+ if (fifo_count >= st->hw->fifo_size - nb) {
+ dev_warn(regmap_get_device(st->map), "fifo overflow reset\n");
+ goto flush_fifo;
+ }
+
/* compute and process all complete datum */
nb = fifo_count / bytes_per_datum;
inv_mpu6050_update_period(st, pf->timestamp, nb);
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
index 80e42c7dbcbe..0fe6999b8257 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
@@ -99,7 +99,9 @@ struct st_lsm6dsx_fs {
#define ST_LSM6DSX_FS_LIST_SIZE 4
struct st_lsm6dsx_fs_table_entry {
struct st_lsm6dsx_reg reg;
+
struct st_lsm6dsx_fs fs_avl[ST_LSM6DSX_FS_LIST_SIZE];
+ int fs_len;
};
/**
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index 2d3495560136..fd5ebe1e1594 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -145,6 +145,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
.fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
.fs_avl[3] = { IIO_G_TO_M_S_2(732), 0x1 },
+ .fs_len = 4,
},
[ST_LSM6DSX_ID_GYRO] = {
.reg = {
@@ -154,6 +155,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[0] = { IIO_DEGREE_TO_RAD(245), 0x0 },
.fs_avl[1] = { IIO_DEGREE_TO_RAD(500), 0x1 },
.fs_avl[2] = { IIO_DEGREE_TO_RAD(2000), 0x3 },
+ .fs_len = 3,
},
},
},
@@ -215,6 +217,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
.fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
.fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+ .fs_len = 4,
},
[ST_LSM6DSX_ID_GYRO] = {
.reg = {
@@ -225,6 +228,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
.fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
.fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+ .fs_len = 4,
},
},
.decimator = {
@@ -327,6 +331,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
.fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
.fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+ .fs_len = 4,
},
[ST_LSM6DSX_ID_GYRO] = {
.reg = {
@@ -337,6 +342,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
.fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
.fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+ .fs_len = 4,
},
},
.decimator = {
@@ -448,6 +454,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
.fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
.fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+ .fs_len = 4,
},
[ST_LSM6DSX_ID_GYRO] = {
.reg = {
@@ -458,6 +465,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
.fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
.fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+ .fs_len = 4,
},
},
.decimator = {
@@ -563,6 +571,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
.fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
.fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+ .fs_len = 4,
},
[ST_LSM6DSX_ID_GYRO] = {
.reg = {
@@ -573,6 +582,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
.fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
.fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+ .fs_len = 4,
},
},
.batch = {
@@ -693,6 +703,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
.fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
.fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+ .fs_len = 4,
},
[ST_LSM6DSX_ID_GYRO] = {
.reg = {
@@ -703,6 +714,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
.fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
.fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+ .fs_len = 4,
},
},
.batch = {
@@ -800,6 +812,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_G_TO_M_S_2(122), 0x2 },
.fs_avl[2] = { IIO_G_TO_M_S_2(244), 0x3 },
.fs_avl[3] = { IIO_G_TO_M_S_2(488), 0x1 },
+ .fs_len = 4,
},
[ST_LSM6DSX_ID_GYRO] = {
.reg = {
@@ -810,6 +823,7 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
.fs_avl[1] = { IIO_DEGREE_TO_RAD(17500), 0x1 },
.fs_avl[2] = { IIO_DEGREE_TO_RAD(35000), 0x2 },
.fs_avl[3] = { IIO_DEGREE_TO_RAD(70000), 0x3 },
+ .fs_len = 4,
},
},
.batch = {
@@ -933,11 +947,12 @@ static int st_lsm6dsx_set_full_scale(struct st_lsm6dsx_sensor *sensor,
int i, err;
fs_table = &sensor->hw->settings->fs_table[sensor->id];
- for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++)
+ for (i = 0; i < fs_table->fs_len; i++) {
if (fs_table->fs_avl[i].gain == gain)
break;
+ }
- if (i == ST_LSM6DSX_FS_LIST_SIZE)
+ if (i == fs_table->fs_len)
return -EINVAL;
data = ST_LSM6DSX_SHIFT_VAL(fs_table->fs_avl[i].val,
@@ -1196,18 +1211,13 @@ static ssize_t st_lsm6dsx_sysfs_scale_avail(struct device *dev,
{
struct st_lsm6dsx_sensor *sensor = iio_priv(dev_get_drvdata(dev));
const struct st_lsm6dsx_fs_table_entry *fs_table;
- enum st_lsm6dsx_sensor_id id = sensor->id;
struct st_lsm6dsx_hw *hw = sensor->hw;
int i, len = 0;
- fs_table = &hw->settings->fs_table[id];
- for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) {
- if (!fs_table->fs_avl[i].gain)
- break;
-
+ fs_table = &hw->settings->fs_table[sensor->id];
+ for (i = 0; i < fs_table->fs_len; i++)
len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
fs_table->fs_avl[i].gain);
- }
buf[len - 1] = '\n';
return len;
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c
index 66fbcd94642d..ea472cf6db7b 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c
@@ -61,6 +61,7 @@ static const struct st_lsm6dsx_ext_dev_settings st_lsm6dsx_ext_dev_table[] = {
.gain = 1500,
.val = 0x0,
}, /* 1500 uG/LSB */
+ .fs_len = 1,
},
.temp_comp = {
.addr = 0x60,
@@ -92,9 +93,11 @@ static const struct st_lsm6dsx_ext_dev_settings st_lsm6dsx_ext_dev_table[] = {
static void st_lsm6dsx_shub_wait_complete(struct st_lsm6dsx_hw *hw)
{
struct st_lsm6dsx_sensor *sensor;
+ u16 odr;
sensor = iio_priv(hw->iio_devs[ST_LSM6DSX_ID_ACC]);
- msleep((2000U / sensor->odr) + 1);
+ odr = (hw->enable_mask & BIT(ST_LSM6DSX_ID_ACC)) ? sensor->odr : 13;
+ msleep((2000U / odr) + 1);
}
/**
@@ -555,13 +558,9 @@ static ssize_t st_lsm6dsx_shub_scale_avail(struct device *dev,
int i, len = 0;
settings = sensor->ext_info.settings;
- for (i = 0; i < ST_LSM6DSX_FS_LIST_SIZE; i++) {
- u16 val = settings->fs_table.fs_avl[i].gain;
-
- if (val > 0)
- len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
- val);
- }
+ for (i = 0; i < settings->fs_table.fs_len; i++)
+ len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
+ settings->fs_table.fs_avl[i].gain);
buf[len - 1] = '\n';
return len;
diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig
index 08d7e1ef2186..4a1a883dc061 100644
--- a/drivers/iio/light/Kconfig
+++ b/drivers/iio/light/Kconfig
@@ -314,6 +314,7 @@ config MAX44009
config NOA1305
tristate "ON Semiconductor NOA1305 ambient light sensor"
depends on I2C
+ select REGMAP_I2C
help
Say Y here if you want to build support for the ON Semiconductor
NOA1305 ambient light sensor.
diff --git a/drivers/iio/light/opt3001.c b/drivers/iio/light/opt3001.c
index e666879007d2..92004a2563ea 100644
--- a/drivers/iio/light/opt3001.c
+++ b/drivers/iio/light/opt3001.c
@@ -686,6 +686,7 @@ static irqreturn_t opt3001_irq(int irq, void *_iio)
struct iio_dev *iio = _iio;
struct opt3001 *opt = iio_priv(iio);
int ret;
+ bool wake_result_ready_queue = false;
if (!opt->ok_to_ignore_lock)
mutex_lock(&opt->lock);
@@ -720,13 +721,16 @@ static irqreturn_t opt3001_irq(int irq, void *_iio)
}
opt->result = ret;
opt->result_ready = true;
- wake_up(&opt->result_ready_queue);
+ wake_result_ready_queue = true;
}
out:
if (!opt->ok_to_ignore_lock)
mutex_unlock(&opt->lock);
+ if (wake_result_ready_queue)
+ wake_up(&opt->result_ready_queue);
+
return IRQ_HANDLED;
}
diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c
index 51421ac32517..16dacea9eadf 100644
--- a/drivers/iio/light/vcnl4000.c
+++ b/drivers/iio/light/vcnl4000.c
@@ -398,19 +398,23 @@ static int vcnl4000_probe(struct i2c_client *client,
static const struct of_device_id vcnl_4000_of_match[] = {
{
.compatible = "vishay,vcnl4000",
- .data = "VCNL4000",
+ .data = (void *)VCNL4000,
},
{
.compatible = "vishay,vcnl4010",
- .data = "VCNL4010",
+ .data = (void *)VCNL4010,
},
{
- .compatible = "vishay,vcnl4010",
- .data = "VCNL4020",
+ .compatible = "vishay,vcnl4020",
+ .data = (void *)VCNL4010,
+ },
+ {
+ .compatible = "vishay,vcnl4040",
+ .data = (void *)VCNL4040,
},
{
.compatible = "vishay,vcnl4200",
- .data = "VCNL4200",
+ .data = (void *)VCNL4200,
},
{},
};
diff --git a/drivers/iio/proximity/srf04.c b/drivers/iio/proximity/srf04.c
index 8b50d56b0a03..01eb8cc63076 100644
--- a/drivers/iio/proximity/srf04.c
+++ b/drivers/iio/proximity/srf04.c
@@ -110,7 +110,7 @@ static int srf04_read(struct srf04_data *data)
udelay(data->cfg->trigger_pulse_us);
gpiod_set_value(data->gpiod_trig, 0);
- /* it cannot take more than 20 ms */
+ /* it should not take more than 20 ms until echo is rising */
ret = wait_for_completion_killable_timeout(&data->rising, HZ/50);
if (ret < 0) {
mutex_unlock(&data->lock);
@@ -120,7 +120,8 @@ static int srf04_read(struct srf04_data *data)
return -ETIMEDOUT;
}
- ret = wait_for_completion_killable_timeout(&data->falling, HZ/50);
+ /* it cannot take more than 50 ms until echo is falling */
+ ret = wait_for_completion_killable_timeout(&data->falling, HZ/20);
if (ret < 0) {
mutex_unlock(&data->lock);
return ret;
@@ -135,19 +136,19 @@ static int srf04_read(struct srf04_data *data)
dt_ns = ktime_to_ns(ktime_dt);
/*
- * measuring more than 3 meters is beyond the capabilities of
- * the sensor
+ * measuring more than 6,45 meters is beyond the capabilities of
+ * the supported sensors
* ==> filter out invalid results for not measuring echos of
* another us sensor
*
* formula:
- * distance 3 m
- * time = ---------- = --------- = 9404389 ns
- * speed 319 m/s
+ * distance 6,45 * 2 m
+ * time = ---------- = ------------ = 40438871 ns
+ * speed 319 m/s
*
* using a minimum speed at -20 °C of 319 m/s
*/
- if (dt_ns > 9404389)
+ if (dt_ns > 40438871)
return -EIO;
time_ns = dt_ns;
@@ -159,20 +160,20 @@ static int srf04_read(struct srf04_data *data)
* with Temp in °C
* and speed in m/s
*
- * use 343 m/s as ultrasonic speed at 20 °C here in absence of the
+ * use 343,5 m/s as ultrasonic speed at 20 °C here in absence of the
* temperature
*
* therefore:
- * time 343
- * distance = ------ * -----
- * 10^6 2
+ * time 343,5 time * 106
+ * distance = ------ * ------- = ------------
+ * 10^6 2 617176
* with time in ns
* and distance in mm (one way)
*
- * because we limit to 3 meters the multiplication with 343 just
+ * because we limit to 6,45 meters the multiplication with 106 just
* fits into 32 bit
*/
- distance_mm = time_ns * 343 / 2000000;
+ distance_mm = time_ns * 106 / 617176;
return distance_mm;
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index da10e6ccb43c..5920c0085d35 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -4399,6 +4399,7 @@ error2:
error1:
port_modify.set_port_cap_mask = 0;
port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
+ kfree(port);
while (--i) {
if (!rdma_cap_ib_cm(ib_device, i))
continue;
@@ -4407,6 +4408,7 @@ error1:
ib_modify_port(ib_device, port->port_num, 0, &port_modify);
ib_unregister_mad_agent(port->mad_agent);
cm_remove_port_fs(port);
+ kfree(port);
}
free:
kfree(cm_dev);
@@ -4460,6 +4462,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
spin_unlock_irq(&cm.state_lock);
ib_unregister_mad_agent(cur_mad_agent);
cm_remove_port_fs(port);
+ kfree(port);
}
kfree(cm_dev);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0e3cf3461999..d78f67623f24 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2396,9 +2396,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
conn_id->cm_id.iw = NULL;
cma_exch(conn_id, RDMA_CM_DESTROYING);
mutex_unlock(&conn_id->handler_mutex);
+ mutex_unlock(&listen_id->handler_mutex);
cma_deref_id(conn_id);
rdma_destroy_id(&conn_id->id);
- goto out;
+ return ret;
}
mutex_unlock(&conn_id->handler_mutex);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 3a8b0911c3bc..9d07378b5b42 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -199,6 +199,7 @@ void ib_mad_cleanup(void);
int ib_sa_init(void);
void ib_sa_cleanup(void);
+void rdma_nl_init(void);
void rdma_nl_exit(void);
int ib_nl_handle_resolve_resp(struct sk_buff *skb,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 99c4a55545cf..50a92442c4f7 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1987,8 +1987,6 @@ static int iw_query_port(struct ib_device *device,
if (!netdev)
return -ENODEV;
- dev_put(netdev);
-
port_attr->max_mtu = IB_MTU_4096;
port_attr->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -1996,19 +1994,22 @@ static int iw_query_port(struct ib_device *device,
port_attr->state = IB_PORT_DOWN;
port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
} else {
- inetdev = in_dev_get(netdev);
+ rcu_read_lock();
+ inetdev = __in_dev_get_rcu(netdev);
if (inetdev && inetdev->ifa_list) {
port_attr->state = IB_PORT_ACTIVE;
port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
- in_dev_put(inetdev);
} else {
port_attr->state = IB_PORT_INIT;
port_attr->phys_state =
IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING;
}
+
+ rcu_read_unlock();
}
+ dev_put(netdev);
err = device->ops.query_port(device, port_num, port_attr);
if (err)
return err;
@@ -2715,6 +2716,8 @@ static int __init ib_core_init(void)
goto err_comp_unbound;
}
+ rdma_nl_init();
+
ret = addr_init();
if (ret) {
pr_warn("Could't init IB address resolution\n");
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 72141c5b7c95..ade71823370f 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -372,6 +372,7 @@ EXPORT_SYMBOL(iw_cm_disconnect);
static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
+ struct ib_qp *qp;
unsigned long flags;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@@ -389,6 +390,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags);
spin_lock_irqsave(&cm_id_priv->lock, flags);
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
+
switch (cm_id_priv->state) {
case IW_CM_STATE_LISTEN:
cm_id_priv->state = IW_CM_STATE_DESTROYING;
@@ -401,7 +405,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
/* Abrupt close of the connection */
- (void)iwcm_modify_qp_err(cm_id_priv->qp);
+ (void)iwcm_modify_qp_err(qp);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_IDLE:
@@ -426,11 +430,9 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
BUG();
break;
}
- if (cm_id_priv->qp) {
- cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
- cm_id_priv->qp = NULL;
- }
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
if (cm_id->mapped) {
iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
@@ -671,11 +673,11 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
cm_id_priv->state = IW_CM_STATE_IDLE;
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id_priv->qp) {
- cm_id->device->ops.iw_rem_ref(qp);
- cm_id_priv->qp = NULL;
- }
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id->device->ops.iw_rem_ref(qp);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
}
@@ -696,7 +698,7 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
struct iwcm_id_private *cm_id_priv;
int ret;
unsigned long flags;
- struct ib_qp *qp;
+ struct ib_qp *qp = NULL;
cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
@@ -730,13 +732,13 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
return 0; /* success */
spin_lock_irqsave(&cm_id_priv->lock, flags);
- if (cm_id_priv->qp) {
- cm_id->device->ops.iw_rem_ref(qp);
- cm_id_priv->qp = NULL;
- }
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
cm_id_priv->state = IW_CM_STATE_IDLE;
err:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id->device->ops.iw_rem_ref(qp);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
return ret;
@@ -878,6 +880,7 @@ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
struct iw_cm_event *iw_event)
{
+ struct ib_qp *qp = NULL;
unsigned long flags;
int ret;
@@ -896,11 +899,13 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
} else {
/* REJECTED or RESET */
- cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
+ qp = cm_id_priv->qp;
cm_id_priv->qp = NULL;
cm_id_priv->state = IW_CM_STATE_IDLE;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
if (iw_event->private_data_len)
@@ -942,21 +947,18 @@ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
struct iw_cm_event *iw_event)
{
+ struct ib_qp *qp;
unsigned long flags;
- int ret = 0;
+ int ret = 0, notify_event = 0;
spin_lock_irqsave(&cm_id_priv->lock, flags);
+ qp = cm_id_priv->qp;
+ cm_id_priv->qp = NULL;
- if (cm_id_priv->qp) {
- cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
- cm_id_priv->qp = NULL;
- }
switch (cm_id_priv->state) {
case IW_CM_STATE_ESTABLISHED:
case IW_CM_STATE_CLOSING:
cm_id_priv->state = IW_CM_STATE_IDLE;
- spin_unlock_irqrestore(&cm_id_priv->lock, flags);
- ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
- spin_lock_irqsave(&cm_id_priv->lock, flags);
+ notify_event = 1;
break;
case IW_CM_STATE_DESTROYING:
break;
@@ -965,6 +967,10 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ if (qp)
+ cm_id_priv->id.device->ops.iw_rem_ref(qp);
+ if (notify_event)
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
return ret;
}
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index 81dbd5f41bed..8cd31ef25eff 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -42,9 +42,12 @@
#include <linux/module.h>
#include "core_priv.h"
-static DEFINE_MUTEX(rdma_nl_mutex);
static struct {
- const struct rdma_nl_cbs *cb_table;
+ const struct rdma_nl_cbs *cb_table;
+ /* Synchronizes between ongoing netlink commands and netlink client
+ * unregistration.
+ */
+ struct rw_semaphore sem;
} rdma_nl_types[RDMA_NL_NUM_CLIENTS];
bool rdma_nl_chk_listeners(unsigned int group)
@@ -75,70 +78,53 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
return (op < max_num_ops[type]) ? true : false;
}
-static bool
-is_nl_valid(const struct sk_buff *skb, unsigned int type, unsigned int op)
+static const struct rdma_nl_cbs *
+get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op)
{
const struct rdma_nl_cbs *cb_table;
- if (!is_nl_msg_valid(type, op))
- return false;
-
/*
* Currently only NLDEV client is supporting netlink commands in
* non init_net net namespace.
*/
if (sock_net(skb->sk) != &init_net && type != RDMA_NL_NLDEV)
- return false;
+ return NULL;
- if (!rdma_nl_types[type].cb_table) {
- mutex_unlock(&rdma_nl_mutex);
- request_module("rdma-netlink-subsys-%d", type);
- mutex_lock(&rdma_nl_mutex);
- }
+ cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+ if (!cb_table) {
+ /*
+ * Didn't get valid reference of the table, attempt module
+ * load once.
+ */
+ up_read(&rdma_nl_types[type].sem);
- cb_table = rdma_nl_types[type].cb_table;
+ request_module("rdma-netlink-subsys-%d", type);
+ down_read(&rdma_nl_types[type].sem);
+ cb_table = READ_ONCE(rdma_nl_types[type].cb_table);
+ }
if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit))
- return false;
- return true;
+ return NULL;
+ return cb_table;
}
void rdma_nl_register(unsigned int index,
const struct rdma_nl_cbs cb_table[])
{
- mutex_lock(&rdma_nl_mutex);
- if (!is_nl_msg_valid(index, 0)) {
- /*
- * All clients are not interesting in success/failure of
- * this call. They want to see the print to error log and
- * continue their initialization. Print warning for them,
- * because it is programmer's error to be here.
- */
- mutex_unlock(&rdma_nl_mutex);
- WARN(true,
- "The not-valid %u index was supplied to RDMA netlink\n",
- index);
+ if (WARN_ON(!is_nl_msg_valid(index, 0)) ||
+ WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table)))
return;
- }
-
- if (rdma_nl_types[index].cb_table) {
- mutex_unlock(&rdma_nl_mutex);
- WARN(true,
- "The %u index is already registered in RDMA netlink\n",
- index);
- return;
- }
- rdma_nl_types[index].cb_table = cb_table;
- mutex_unlock(&rdma_nl_mutex);
+ /* Pairs with the READ_ONCE in is_nl_valid() */
+ smp_store_release(&rdma_nl_types[index].cb_table, cb_table);
}
EXPORT_SYMBOL(rdma_nl_register);
void rdma_nl_unregister(unsigned int index)
{
- mutex_lock(&rdma_nl_mutex);
+ down_write(&rdma_nl_types[index].sem);
rdma_nl_types[index].cb_table = NULL;
- mutex_unlock(&rdma_nl_mutex);
+ up_write(&rdma_nl_types[index].sem);
}
EXPORT_SYMBOL(rdma_nl_unregister);
@@ -170,15 +156,21 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
unsigned int index = RDMA_NL_GET_CLIENT(type);
unsigned int op = RDMA_NL_GET_OP(type);
const struct rdma_nl_cbs *cb_table;
+ int err = -EINVAL;
- if (!is_nl_valid(skb, index, op))
+ if (!is_nl_msg_valid(index, op))
return -EINVAL;
- cb_table = rdma_nl_types[index].cb_table;
+ down_read(&rdma_nl_types[index].sem);
+ cb_table = get_cb_table(skb, index, op);
+ if (!cb_table)
+ goto done;
if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) &&
- !netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
+ !netlink_capable(skb, CAP_NET_ADMIN)) {
+ err = -EPERM;
+ goto done;
+ }
/*
* LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't
@@ -186,8 +178,8 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
*/
if (index == RDMA_NL_LS) {
if (cb_table[op].doit)
- return cb_table[op].doit(skb, nlh, extack);
- return -EINVAL;
+ err = cb_table[op].doit(skb, nlh, extack);
+ goto done;
}
/* FIXME: Convert IWCM to properly handle doit callbacks */
if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) {
@@ -195,14 +187,15 @@ static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
.dump = cb_table[op].dump,
};
if (c.dump)
- return netlink_dump_start(skb->sk, skb, nlh, &c);
- return -EINVAL;
+ err = netlink_dump_start(skb->sk, skb, nlh, &c);
+ goto done;
}
if (cb_table[op].doit)
- return cb_table[op].doit(skb, nlh, extack);
-
- return 0;
+ err = cb_table[op].doit(skb, nlh, extack);
+done:
+ up_read(&rdma_nl_types[index].sem);
+ return err;
}
/*
@@ -263,9 +256,7 @@ skip:
static void rdma_nl_rcv(struct sk_buff *skb)
{
- mutex_lock(&rdma_nl_mutex);
rdma_nl_rcv_skb(skb, &rdma_nl_rcv_msg);
- mutex_unlock(&rdma_nl_mutex);
}
int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid)
@@ -297,6 +288,14 @@ int rdma_nl_multicast(struct net *net, struct sk_buff *skb,
}
EXPORT_SYMBOL(rdma_nl_multicast);
+void rdma_nl_init(void)
+{
+ int idx;
+
+ for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++)
+ init_rwsem(&rdma_nl_types[idx].sem);
+}
+
void rdma_nl_exit(void)
{
int idx;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 7a7474000100..c03af08b80e7 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -778,7 +778,7 @@ static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
container_of(res, struct rdma_counter, res);
if (port && port != counter->port)
- return 0;
+ return -EAGAIN;
/* Dump it even query failed */
rdma_counter_query_stats(counter);
@@ -1230,7 +1230,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto err;
+ goto err_get;
}
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
@@ -1787,10 +1787,6 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
- ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
- if (ret)
- goto err_unbind;
-
if (fill_nldev_handle(msg, device) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
@@ -1799,13 +1795,15 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_fill;
}
+ ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
+ if (ret)
+ goto err_fill;
+
nlmsg_end(msg, nlh);
ib_device_put(device);
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
err_fill:
- rdma_counter_bind_qpn(device, port, qpn, cntn);
-err_unbind:
nlmsg_free(msg);
err:
ib_device_put(device);
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 1ab423b19f77..6eb6d2717ca5 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -426,7 +426,7 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
int ret;
rdma_for_each_port (dev, i) {
- is_ib = rdma_protocol_ib(dev, i++);
+ is_ib = rdma_protocol_ib(dev, i);
if (is_ib)
break;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index f67a30fda1ed..163ff7ba92b7 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -451,8 +451,10 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp)
* that the hardware will not attempt to access the MR any more.
*/
if (!umem_odp->is_implicit_odp) {
+ mutex_lock(&umem_odp->umem_mutex);
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
ib_umem_end(umem_odp));
+ mutex_unlock(&umem_odp->umem_mutex);
kvfree(umem_odp->dma_list);
kvfree(umem_odp->page_list);
}
@@ -719,6 +721,8 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
u64 addr;
struct ib_device *dev = umem_odp->umem.ibdev;
+ lockdep_assert_held(&umem_odp->umem_mutex);
+
virt = max_t(u64, virt, ib_umem_start(umem_odp));
bound = min_t(u64, bound, ib_umem_end(umem_odp));
/* Note that during the run of this function, the
@@ -726,7 +730,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
* faults from completion. We might be racing with other
* invalidations, so we must make sure we free each page only
* once. */
- mutex_lock(&umem_odp->umem_mutex);
for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) {
idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
if (umem_odp->page_list[idx]) {
@@ -757,7 +760,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt,
umem_odp->npages--;
}
}
- mutex_unlock(&umem_odp->umem_mutex);
}
EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 1e5aeb39f774..63f7f7db5902 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -98,7 +98,7 @@ ib_uverbs_init_udata_buf_or_null(struct ib_udata *udata,
struct ib_uverbs_device {
atomic_t refcount;
- int num_comp_vectors;
+ u32 num_comp_vectors;
struct completion comp;
struct device dev;
/* First group for device attributes, NULL terminated array */
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index f974b6854224..35c2841a569e 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -662,16 +662,17 @@ static bool find_gid_index(const union ib_gid *gid,
void *context)
{
struct find_gid_index_context *ctx = context;
+ u16 vlan_id = 0xffff;
+ int ret;
if (ctx->gid_type != gid_attr->gid_type)
return false;
- if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
- (is_vlan_dev(gid_attr->ndev) &&
- vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
+ ret = rdma_read_gid_l2_fields(gid_attr, &vlan_id, NULL);
+ if (ret)
return false;
- return true;
+ return ctx->vlan_id == vlan_id;
}
static const struct ib_gid_attr *
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index e87fc0408470..347dc242fb88 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -495,7 +495,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
release_ep_resources(ep);
- kfree_skb(skb);
return 0;
}
@@ -506,7 +505,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
c4iw_put_ep(&ep->parent_ep->com);
release_ep_resources(ep);
- kfree_skb(skb);
return 0;
}
@@ -2424,20 +2422,6 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
-
- skb_get(skb);
- rpl = cplhdr(skb);
- if (!is_t4(adapter_type)) {
- skb_trim(skb, roundup(sizeof(*rpl5), 16));
- rpl5 = (void *)rpl;
- INIT_TP_WR(rpl5, ep->hwtid);
- } else {
- skb_trim(skb, sizeof(*rpl));
- INIT_TP_WR(rpl, ep->hwtid);
- }
- OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
- ep->hwtid));
-
cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
enable_tcp_timestamps && req->tcpopt.tstamp,
(ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
@@ -2483,6 +2467,20 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
if (tcph->ece && tcph->cwr)
opt2 |= CCTRL_ECN_V(1);
}
+
+ skb_get(skb);
+ rpl = cplhdr(skb);
+ if (!is_t4(adapter_type)) {
+ skb_trim(skb, roundup(sizeof(*rpl5), 16));
+ rpl5 = (void *)rpl;
+ INIT_TP_WR(rpl5, ep->hwtid);
+ } else {
+ skb_trim(skb, sizeof(*rpl));
+ INIT_TP_WR(rpl, ep->hwtid);
+ }
+ OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
+ ep->hwtid));
+
if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
u32 isn = (prandom_u32() & ~7UL) - 1;
opt2 |= T5_OPT_2_VALID_F;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index a8b9548bd1a2..599340c1f0b8 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep,
}
}
-static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd)
+static int dump_qp(unsigned long id, struct c4iw_qp *qp,
+ struct c4iw_debugfs_data *qpd)
{
int space;
int cc;
+ if (id != qp->wq.sq.qid)
+ return 0;
space = qpd->bufsize - qpd->pos - 1;
if (space == 0)
@@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file)
xa_lock_irq(&qpd->devp->qps);
xa_for_each(&qpd->devp->qps, index, qp)
- dump_qp(qp, qpd);
+ dump_qp(index, qp, qpd);
xa_unlock_irq(&qpd->devp->qps);
qpd->buf[qpd->pos++] = 0;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index aa772ee0706f..35c284af574d 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp)
{
int err;
- struct fw_ri_tpte tpt;
+ struct fw_ri_tpte *tpt;
u32 stag_idx;
static atomic_t key;
if (c4iw_fatal_error(rdev))
return -EIO;
+ tpt = kmalloc(sizeof(*tpt), GFP_KERNEL);
+ if (!tpt)
+ return -ENOMEM;
+
stag_state = stag_state > 0;
stag_idx = (*stag) >> 8;
@@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
mutex_lock(&rdev->stats.lock);
rdev->stats.stag.fail++;
mutex_unlock(&rdev->stats.lock);
+ kfree(tpt);
return -ENOMEM;
}
mutex_lock(&rdev->stats.lock);
@@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
/* write TPT entry */
if (reset_tpt_entry)
- memset(&tpt, 0, sizeof(tpt));
+ memset(tpt, 0, sizeof(*tpt));
else {
- tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
+ tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F |
FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) |
FW_RI_TPTE_STAGSTATE_V(stag_state) |
FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid));
- tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
+ tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) |
(bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) |
FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO :
FW_RI_VA_BASED_TO))|
FW_RI_TPTE_PS_V(page_size));
- tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
+ tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32(
FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3));
- tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
- tpt.va_hi = cpu_to_be32((u32)(to >> 32));
- tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
- tpt.dca_mwbcnt_pstag = cpu_to_be32(0);
- tpt.len_hi = cpu_to_be32((u32)(len >> 32));
+ tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL));
+ tpt->va_hi = cpu_to_be32((u32)(to >> 32));
+ tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL));
+ tpt->dca_mwbcnt_pstag = cpu_to_be32(0);
+ tpt->len_hi = cpu_to_be32((u32)(len >> 32));
}
err = write_adapter_mem(rdev, stag_idx +
(rdev->lldi.vr->stag.start >> 5),
- sizeof(tpt), &tpt, skb, wr_waitp);
+ sizeof(*tpt), tpt, skb, wr_waitp);
if (reset_tpt_entry) {
c4iw_put_resource(&rdev->resource.tpt_table, stag_idx);
@@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
rdev->stats.stag.cur -= 32;
mutex_unlock(&rdev->stats.lock);
}
+ kfree(tpt);
return err;
}
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index eb9368be28c1..bbcac539777a 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
srq->flags = T4_SRQ_LIMIT_SUPPORT;
- ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL);
- if (ret)
- goto err_free_queue;
-
if (udata) {
srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
if (!srq_key_mm) {
ret = -ENOMEM;
- goto err_remove_handle;
+ goto err_free_queue;
}
srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
if (!srq_db_key_mm) {
@@ -2789,8 +2785,6 @@ err_free_srq_db_key_mm:
kfree(srq_db_key_mm);
err_free_srq_key_mm:
kfree(srq_key_mm);
-err_remove_handle:
- xa_erase_irq(&rhp->qps, srq->wq.qid);
err_free_queue:
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
srq->wr_waitp);
@@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
rhp = srq->rhp;
pr_debug("%s id %d\n", __func__, srq->wq.qid);
-
- xa_erase_irq(&rhp->qps, srq->wq.qid);
ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext,
ibucontext);
free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 71cb9525c074..26b792bb1027 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void)
goto bail_dev;
}
- hfi1_compute_tid_rdma_flow_wt();
/*
* These must be called before the driver is registered with
* the PCI subsystem.
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 61aa5504d7c3..61362bd6d3ce 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
/*
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
*/
- if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
+ if (parent &&
+ (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
+ dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
dd->link_gen3_capable = 0;
}
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 513a8aac9ccd..1a3c647675a7 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
if (qp->s_flags & RVT_S_WAIT_RNR)
goto bail_stop;
rdi = ib_to_rvt(qp->ibqp.device);
- if (qp->s_rnr_retry == 0 &&
- !((rdi->post_parms[wqe->wr.opcode].flags &
- RVT_OPERATION_IGN_RNR_CNT) &&
- qp->s_rnr_retry_cnt == 0)) {
- status = IB_WC_RNR_RETRY_EXC_ERR;
- goto class_b;
+ if (!(rdi->post_parms[wqe->wr.opcode].flags &
+ RVT_OPERATION_IGN_RNR_CNT)) {
+ if (qp->s_rnr_retry == 0) {
+ status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto class_b;
+ }
+ if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
+ qp->s_rnr_retry--;
}
- if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
- qp->s_rnr_retry--;
/*
* The last valid PSN is the previous PSN. For TID RDMA WRITE
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 2395fd4233a7..c61b6022575e 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -65,6 +65,7 @@
#define SDMA_DESCQ_CNT 2048
#define SDMA_DESC_INTR 64
#define INVALID_TAIL 0xffff
+#define SDMA_PAD max_t(size_t, MAX_16B_PADDING, sizeof(u32))
static uint sdma_descq_cnt = SDMA_DESCQ_CNT;
module_param(sdma_descq_cnt, uint, S_IRUGO);
@@ -1296,7 +1297,7 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
struct sdma_engine *sde;
if (dd->sdma_pad_dma) {
- dma_free_coherent(&dd->pcidev->dev, 4,
+ dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
(void *)dd->sdma_pad_dma,
dd->sdma_pad_phys);
dd->sdma_pad_dma = NULL;
@@ -1491,7 +1492,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
}
/* Allocate memory for pad */
- dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, sizeof(u32),
+ dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD,
&dd->sdma_pad_phys, GFP_KERNEL);
if (!dd->sdma_pad_dma) {
dd_dev_err(dd, "failed to allocate SendDMA pad memory\n");
@@ -1526,8 +1527,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
}
ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(tmp_sdma_rht);
goto bail;
+ }
+
dd->sdma_rht = tmp_sdma_rht;
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index b4dcc4d29f84..e53f542b60af 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
* C - Capcode
*/
-static u32 tid_rdma_flow_wt;
-
static void tid_rdma_trigger_resume(struct work_struct *work);
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
@@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
struct tid_rdma_flow *flow,
bool fecn);
+static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
+{
+ if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
+ priv->r_tid_ack = priv->r_tid_tail;
+}
+
+static void tid_rdma_schedule_ack(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ priv->s_flags |= RVT_S_ACK_PENDING;
+ hfi1_schedule_tid_send(qp);
+}
+
+static void tid_rdma_trigger_ack(struct rvt_qp *qp)
+{
+ validate_r_tid_ack(qp->priv);
+ tid_rdma_schedule_ack(qp);
+}
+
static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
{
return
@@ -2736,11 +2754,6 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd,
diff = cmp_psn(psn,
flow->flow_state.r_next_psn);
if (diff > 0) {
- if (!(qp->r_flags & RVT_R_RDMAR_SEQ))
- restart_tid_rdma_read_req(rcd,
- qp,
- wqe);
-
/* Drop the packet.*/
goto s_unlock;
} else if (diff < 0) {
@@ -3010,10 +3023,7 @@ nak_psn:
qpriv->s_nak_state = IB_NAK_PSN_ERROR;
/* We are NAK'ing the next expected PSN */
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
- qpriv->r_tid_ack = qpriv->r_tid_tail;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
}
goto unlock;
}
@@ -3376,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
}
-void hfi1_compute_tid_rdma_flow_wt(void)
+static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
{
/*
* Heuristic for computing the RNR timeout when waiting on the flow
* queue. Rather than a computationaly expensive exact estimate of when
* a flow will be available, we assume that if a QP is at position N in
* the flow queue it has to wait approximately (N + 1) * (number of
- * segments between two sync points), assuming PMTU of 4K. The rationale
- * for this is that flows are released and recycled at each sync point.
+ * segments between two sync points). The rationale for this is that
+ * flows are released and recycled at each sync point.
*/
- tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
- TID_RDMA_MAX_SEGMENT_SIZE;
+ return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
}
static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
@@ -3510,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
if (ret) {
- to_seg = tid_rdma_flow_wt *
+ to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
position_in_queue(qpriv,
&rcd->flow_queue);
break;
@@ -3531,7 +3540,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
/*
* If overtaking req->acked_tail, send an RNR NAK. Because the
* QP is not queued in this case, and the issue can only be
- * caused due a delay in scheduling the second leg which we
+ * caused by a delay in scheduling the second leg which we
* cannot estimate, we use a rather arbitrary RNR timeout of
* (MAX_FLOWS / 2) segments
*/
@@ -3539,8 +3548,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
MAX_FLOWS)) {
ret = -EAGAIN;
to_seg = MAX_FLOWS >> 1;
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
break;
}
@@ -4340,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
req);
trace_hfi1_tid_write_rsp_rcv_data(qp);
- if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
- priv->r_tid_ack = priv->r_tid_tail;
+ validate_r_tid_ack(priv);
if (opcode == TID_OP(WRITE_DATA_LAST)) {
release_rdma_sge_mr(e);
@@ -4380,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
}
done:
- priv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_schedule_ack(qp);
exit:
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
if (fecn)
@@ -4393,10 +4399,7 @@ send_nak:
if (!priv->s_nak_state) {
priv->s_nak_state = IB_NAK_PSN_ERROR;
priv->s_nak_psn = flow->flow_state.r_next_psn;
- priv->s_flags |= RVT_S_ACK_PENDING;
- if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
- priv->r_tid_ack = priv->r_tid_tail;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
}
goto done;
}
@@ -4944,8 +4947,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
qpriv->resync = true;
/* RESYNC request always gets a TID RDMA ACK. */
qpriv->s_nak_state = 0;
- qpriv->s_flags |= RVT_S_ACK_PENDING;
- hfi1_schedule_tid_send(qp);
+ tid_rdma_trigger_ack(qp);
bail:
if (fecn)
qp->s_flags |= RVT_S_ECN;
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
index 1c536185261e..6e82df2190b7 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.h
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -17,6 +17,7 @@
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
+#define TID_RDMA_SEGMENT_SHIFT 18
/*
* Bit definitions for priv->s_flags.
@@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);
-void hfi1_compute_tid_rdma_flow_wt(void);
-
void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);
u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 7bff0a1e713d..089e201d7550 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -147,9 +147,6 @@ static int pio_wait(struct rvt_qp *qp,
/* Length of buffer to create verbs txreq cache name */
#define TXREQ_NAME_LEN 24
-/* 16B trailing buffer */
-static const u8 trail_buf[MAX_16B_PADDING];
-
static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
@@ -820,8 +817,8 @@ static int build_verbs_tx_desc(
/* add icrc, lt byte, and padding to flit */
if (extra_bytes)
- ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq,
- (void *)trail_buf, extra_bytes);
+ ret = sdma_txadd_daddr(sde->dd, &tx->txreq,
+ sde->dd->sdma_pad_phys, extra_bytes);
bail_txadd:
return ret;
@@ -1089,7 +1086,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
}
/* add icrc, lt byte, and padding to flit */
if (extra_bytes)
- seg_pio_copy_mid(pbuf, trail_buf, extra_bytes);
+ seg_pio_copy_mid(pbuf, ppd->dd->sdma_pad_dma,
+ extra_bytes);
seg_pio_copy_end(pbuf);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index 86783276fb1f..3bb8f78fb7b0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -59,7 +59,7 @@ enum {
#define HNS_ROCE_HEM_CHUNK_LEN \
((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
- (sizeof(struct scatterlist)))
+ (sizeof(struct scatterlist) + sizeof(void *)))
#define check_whether_bt_num_3(type, hop_num) \
(type < HEM_TYPE_MTT && hop_num == 2)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 7a89d669f8bf..e82567fcdeb7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -5389,9 +5389,9 @@ static void hns_roce_v2_free_eq(struct hns_roce_dev *hr_dev,
return;
}
- if (eq->buf_list)
- dma_free_coherent(hr_dev->dev, buf_chk_sz,
- eq->buf_list->buf, eq->buf_list->map);
+ dma_free_coherent(hr_dev->dev, buf_chk_sz, eq->buf_list->buf,
+ eq->buf_list->map);
+ kfree(eq->buf_list);
}
static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 9591457eb768..43ea2c13b212 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -376,7 +376,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
srq->max_gs = srq_init_attr->attr.max_sge;
- srq_desc_size = max(16, 16 * srq->max_gs);
+ srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));
srq->wqe_shift = ilog2(srq_desc_size);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 8056930bbe2c..cd9ee1664a69 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -2773,6 +2773,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
return -ENOMEM;
iwibdev = iwdev->iwibdev;
rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
+ ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1);
+ if (ret)
+ goto error;
+
ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
if (ret)
goto error;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 59022b744144..d609f4659afb 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1298,29 +1298,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev,
return 0;
}
-static void devx_free_indirect_mkey(struct rcu_head *rcu)
-{
- kfree(container_of(rcu, struct devx_obj, devx_mr.rcu));
-}
-
-/* This function to delete from the radix tree needs to be called before
- * destroying the underlying mkey. Otherwise a race might occur in case that
- * other thread will get the same mkey before this one will be deleted,
- * in that case it will fail via inserting to the tree its own data.
- *
- * Note:
- * An error in the destroy is not expected unless there is some other indirect
- * mkey which points to this one. In a kernel cleanup flow it will be just
- * destroyed in the iterative destruction call. In a user flow, in case
- * the application didn't close in the expected order it's its own problem,
- * the mkey won't be part of the tree, in both cases the kernel is safe.
- */
-static void devx_cleanup_mkey(struct devx_obj *obj)
-{
- xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
- mlx5_base_mkey(obj->devx_mr.mmkey.key));
-}
-
static void devx_cleanup_subscription(struct mlx5_ib_dev *dev,
struct devx_event_subscription *sub)
{
@@ -1362,8 +1339,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
int ret;
dev = mlx5_udata_to_mdev(&attrs->driver_udata);
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
- devx_cleanup_mkey(obj);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ /*
+ * The pagefault_single_data_segment() does commands against
+ * the mmkey, we must wait for that to stop before freeing the
+ * mkey, as another allocation could get the same mkey #.
+ */
+ xa_erase(&obj->ib_dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(obj->devx_mr.mmkey.key));
+ synchronize_srcu(&dev->mr_srcu);
+ }
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
@@ -1382,12 +1367,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
devx_cleanup_subscription(dev, sub_entry);
mutex_unlock(&devx_event_table->event_xa_lock);
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu,
- devx_free_indirect_mkey);
- return ret;
- }
-
kfree(obj);
return ret;
}
@@ -1491,26 +1470,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
&obj_id);
WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
- err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
- if (err)
- goto obj_destroy;
- }
-
err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
if (err)
- goto err_copy;
+ goto obj_destroy;
if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT)
obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type);
-
obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id);
+ if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) {
+ err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out);
+ if (err)
+ goto obj_destroy;
+ }
return 0;
-err_copy:
- if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY)
- devx_cleanup_mkey(obj);
obj_destroy:
if (obj->flags & DEVX_OBJ_FLAGS_DCT)
mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2ceaef3ea3fb..1a98ee2e01c4 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -606,7 +606,7 @@ struct mlx5_ib_mr {
struct mlx5_ib_dev *dev;
u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
struct mlx5_core_sig_ctx *sig;
- int live;
+ unsigned int live;
void *descs_alloc;
int access_flags; /* Needed for rereg MR */
@@ -639,7 +639,6 @@ struct mlx5_ib_mw {
struct mlx5_ib_devx_mr {
struct mlx5_core_mkey mmkey;
int ndescs;
- struct rcu_head rcu;
};
struct mlx5_ib_umr_context {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1eff031ef048..7019c12005f4 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -84,32 +84,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
}
-static void update_odp_mr(struct mlx5_ib_mr *mr)
-{
- if (is_odp_mr(mr)) {
- /*
- * This barrier prevents the compiler from moving the
- * setting of umem->odp_data->private to point to our
- * MR, before reg_umr finished, to ensure that the MR
- * initialization have finished before starting to
- * handle invalidations.
- */
- smp_wmb();
- to_ib_umem_odp(mr->umem)->private = mr;
- /*
- * Make sure we will see the new
- * umem->odp_data->private value in the invalidation
- * routines, before we can get page faults on the
- * MR. Page faults can happen once we put the MR in
- * the tree, below this line. Without the barrier,
- * there can be a fault handling and an invalidation
- * before umem->odp_data->private == mr is visible to
- * the invalidation handler.
- */
- smp_wmb();
- }
-}
-
static void reg_mr_callback(int status, struct mlx5_async_work *context)
{
struct mlx5_ib_mr *mr =
@@ -1346,8 +1320,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mr->umem = umem;
set_mr_fields(dev, mr, npages, length, access_flags);
- update_odp_mr(mr);
-
if (use_umr) {
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
@@ -1363,10 +1335,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
}
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
- mr->live = 1;
+ if (is_odp_mr(mr)) {
+ to_ib_umem_odp(mr->umem)->private = mr;
atomic_set(&mr->num_pending_prefetch, 0);
}
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ smp_store_release(&mr->live, 1);
return &mr->ibmr;
error:
@@ -1441,6 +1415,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
if (!mr->umem)
return -EINVAL;
+ if (is_odp_mr(mr))
+ return -EOPNOTSUPP;
+
if (flags & IB_MR_REREG_TRANS) {
addr = virt_addr;
len = length;
@@ -1486,8 +1463,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
}
mr->allocated_from_cache = 0;
- if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- mr->live = 1;
} else {
/*
* Send a UMR WQE
@@ -1516,7 +1491,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
set_mr_fields(dev, mr, npages, len, access_flags);
- update_odp_mr(mr);
return 0;
err:
@@ -1607,15 +1581,16 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
/* Prevent new page faults and
* prefetch requests from succeeding
*/
- mr->live = 0;
+ WRITE_ONCE(mr->live, 0);
+
+ /* Wait for all running page-fault handlers to finish. */
+ synchronize_srcu(&dev->mr_srcu);
/* dequeue pending prefetch requests for the mr */
if (atomic_read(&mr->num_pending_prefetch))
flush_workqueue(system_unbound_wq);
WARN_ON(atomic_read(&mr->num_pending_prefetch));
- /* Wait for all running page-fault handlers to finish. */
- synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
if (!umem_odp->is_implicit_odp)
mlx5_ib_invalidate_range(umem_odp,
@@ -1987,14 +1962,25 @@ free:
int mlx5_ib_dealloc_mw(struct ib_mw *mw)
{
+ struct mlx5_ib_dev *dev = to_mdev(mw->device);
struct mlx5_ib_mw *mmw = to_mmw(mw);
int err;
- err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
- &mmw->mmkey);
- if (!err)
- kfree(mmw);
- return err;
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ xa_erase_irq(&dev->mdev->priv.mkey_table,
+ mlx5_base_mkey(mmw->mmkey.key));
+ /*
+ * pagefault_single_data_segment() may be accessing mmw under
+ * SRCU if the user bound an ODP MR to this MW.
+ */
+ synchronize_srcu(&dev->mr_srcu);
+ }
+
+ err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
+ if (err)
+ return err;
+ kfree(mmw);
+ return 0;
}
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 2e9b43061797..3f9478d19376 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -178,6 +178,29 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
return;
}
+ /*
+ * The locking here is pretty subtle. Ideally the implicit children
+ * list would be protected by the umem_mutex, however that is not
+ * possible. Instead this uses a weaker update-then-lock pattern:
+ *
+ * srcu_read_lock()
+ * <change children list>
+ * mutex_lock(umem_mutex)
+ * mlx5_ib_update_xlt()
+ * mutex_unlock(umem_mutex)
+ * destroy lkey
+ *
+ * ie any change the children list must be followed by the locked
+ * update_xlt before destroying.
+ *
+ * The umem_mutex provides the acquire/release semantic needed to make
+ * the children list visible to a racing thread. While SRCU is not
+ * technically required, using it gives consistent use of the SRCU
+ * locking around the children list.
+ */
+ lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ lockdep_assert_held(&mr->dev->mr_srcu);
+
odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE,
nentries * MLX5_IMR_MTT_SIZE, mr);
@@ -202,15 +225,22 @@ static void mr_leaf_free_action(struct work_struct *work)
struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT;
struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+ struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem);
+ int srcu_key;
mr->parent = NULL;
synchronize_srcu(&mr->dev->mr_srcu);
- ib_umem_odp_release(odp);
- if (imr->live)
+ if (smp_load_acquire(&imr->live)) {
+ srcu_key = srcu_read_lock(&mr->dev->mr_srcu);
+ mutex_lock(&odp_imr->umem_mutex);
mlx5_ib_update_xlt(imr, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
+ mutex_unlock(&odp_imr->umem_mutex);
+ srcu_read_unlock(&mr->dev->mr_srcu, srcu_key);
+ }
+ ib_umem_odp_release(odp);
mlx5_mr_cache_free(mr->dev, mr);
if (atomic_dec_and_test(&imr->num_leaf_free))
@@ -278,7 +308,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
- mutex_unlock(&umem_odp->umem_mutex);
/*
* We are now sure that the device will not access the
* memory. We can safely unmap it, and mark it as dirty if
@@ -289,10 +318,12 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
if (unlikely(!umem_odp->npages && mr->parent &&
!umem_odp->dying)) {
- WRITE_ONCE(umem_odp->dying, 1);
+ WRITE_ONCE(mr->live, 0);
+ umem_odp->dying = 1;
atomic_inc(&mr->parent->num_leaf_free);
schedule_work(&umem_odp->work);
}
+ mutex_unlock(&umem_odp->umem_mutex);
}
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -429,8 +460,6 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
- mr->live = 1;
-
mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
mr->mmkey.key, dev->mdev, mr);
@@ -484,6 +513,8 @@ next_mr:
mtt->parent = mr;
INIT_WORK(&odp->work, mr_leaf_free_action);
+ smp_store_release(&mtt->live, 1);
+
if (!nentries)
start_idx = addr >> MLX5_IMR_MTT_SHIFT;
nentries++;
@@ -536,6 +567,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
init_waitqueue_head(&imr->q_leaf_free);
atomic_set(&imr->num_leaf_free, 0);
atomic_set(&imr->num_pending_prefetch, 0);
+ smp_store_release(&imr->live, 1);
return imr;
}
@@ -555,15 +587,19 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
if (mr->parent != imr)
continue;
+ mutex_lock(&umem_odp->umem_mutex);
ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
ib_umem_end(umem_odp));
- if (umem_odp->dying)
+ if (umem_odp->dying) {
+ mutex_unlock(&umem_odp->umem_mutex);
continue;
+ }
- WRITE_ONCE(umem_odp->dying, 1);
+ umem_odp->dying = 1;
atomic_inc(&imr->num_leaf_free);
schedule_work(&umem_odp->work);
+ mutex_unlock(&umem_odp->umem_mutex);
}
up_read(&per_mm->umem_rwsem);
@@ -773,7 +809,7 @@ next_mr:
switch (mmkey->type) {
case MLX5_MKEY_MR:
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (!mr->live || !mr->ibmr.pd) {
+ if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) {
mlx5_ib_dbg(dev, "got dead MR\n");
ret = -EFAULT;
goto srcu_unlock;
@@ -1641,12 +1677,12 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd,
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- if (mr->ibmr.pd != pd) {
+ if (!smp_load_acquire(&mr->live)) {
ret = false;
break;
}
- if (!mr->live) {
+ if (mr->ibmr.pd != pd) {
ret = false;
break;
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8937d72ddcf6..5fd071c05944 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3249,10 +3249,12 @@ static int modify_raw_packet_qp_sq(
}
/* Only remove the old rate after new rate was set */
- if ((old_rl.rate &&
- !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
- (new_state != MLX5_SQC_STATE_RDY))
+ if ((old_rl.rate && !mlx5_rl_are_equal(&old_rl, &new_rl)) ||
+ (new_state != MLX5_SQC_STATE_RDY)) {
mlx5_rl_remove_rate(dev, &old_rl);
+ if (new_state != MLX5_SQC_STATE_RDY)
+ memset(&new_rl, 0, sizeof(new_rl));
+ }
ibqp->rl = new_rl;
sq->state = new_state;
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 5136b835e1ba..dc71b6e16a07 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -76,7 +76,7 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str)
struct qedr_dev *qedr = get_qedr_dev(ibdev);
u32 fw_ver = (u32)qedr->attr.fw_ver;
- snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d",
+ snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d",
(fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF,
(fw_ver >> 8) & 0xFF, fw_ver & 0xFF);
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index 6cac0c88cf39..36cdfbdbd325 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq)
pvrdma_page_dir_cleanup(dev, &srq->pdir);
- kfree(srq);
-
atomic_dec(&dev->num_srqs);
}
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 430314c8abd9..b4317480cee7 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -182,12 +182,19 @@ void siw_qp_llp_close(struct siw_qp *qp)
*/
void siw_qp_llp_write_space(struct sock *sk)
{
- struct siw_cep *cep = sk_to_cep(sk);
+ struct siw_cep *cep;
- cep->sk_write_space(sk);
+ read_lock(&sk->sk_callback_lock);
+
+ cep = sk_to_cep(sk);
+ if (cep) {
+ cep->sk_write_space(sk);
- if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
- (void)siw_sq_start(cep->qp);
+ if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
+ (void)siw_sq_start(cep->qp);
+ }
+
+ read_unlock(&sk->sk_callback_lock);
}
static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
@@ -1305,6 +1312,7 @@ int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
void siw_free_qp(struct kref *ref)
{
struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
+ struct siw_base_qp *siw_base_qp = to_siw_base_qp(qp->ib_qp);
struct siw_device *sdev = qp->sdev;
unsigned long flags;
@@ -1327,4 +1335,5 @@ void siw_free_qp(struct kref *ref)
atomic_dec(&sdev->num_qp);
siw_dbg_qp(qp, "free QP\n");
kfree_rcu(qp, rcu);
+ kfree(siw_base_qp);
}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 869e02b69a01..b18a677832e1 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -604,7 +604,6 @@ out:
int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
{
struct siw_qp *qp = to_siw_qp(base_qp);
- struct siw_base_qp *siw_base_qp = to_siw_base_qp(base_qp);
struct siw_ucontext *uctx =
rdma_udata_to_drv_context(udata, struct siw_ucontext,
base_ucontext);
@@ -641,7 +640,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
qp->scq = qp->rcq = NULL;
siw_qp_put(qp);
- kfree(siw_base_qp);
return 0;
}
diff --git a/drivers/input/ff-memless.c b/drivers/input/ff-memless.c
index 1cb40c7475af..8229a9006917 100644
--- a/drivers/input/ff-memless.c
+++ b/drivers/input/ff-memless.c
@@ -489,6 +489,15 @@ static void ml_ff_destroy(struct ff_device *ff)
{
struct ml_device *ml = ff->private;
+ /*
+ * Even though we stop all playing effects when tearing down
+ * an input device (via input_device_flush() that calls into
+ * input_ff_flush() that stops and erases all effects), we
+ * do not actually stop the timer, and therefore we should
+ * do it here.
+ */
+ del_timer_sync(&ml->timer);
+
kfree(ml->private);
}
diff --git a/drivers/input/misc/da9063_onkey.c b/drivers/input/misc/da9063_onkey.c
index dace8577fa43..79851923ee57 100644
--- a/drivers/input/misc/da9063_onkey.c
+++ b/drivers/input/misc/da9063_onkey.c
@@ -232,10 +232,7 @@ static int da9063_onkey_probe(struct platform_device *pdev)
onkey->input->phys = onkey->phys;
onkey->input->dev.parent = &pdev->dev;
- if (onkey->key_power)
- input_set_capability(onkey->input, EV_KEY, KEY_POWER);
-
- input_set_capability(onkey->input, EV_KEY, KEY_SLEEP);
+ input_set_capability(onkey->input, EV_KEY, KEY_POWER);
INIT_DELAYED_WORK(&onkey->work, da9063_poll_on);
diff --git a/drivers/input/misc/soc_button_array.c b/drivers/input/misc/soc_button_array.c
index 97e3639e99d0..08520b3a18b8 100644
--- a/drivers/input/misc/soc_button_array.c
+++ b/drivers/input/misc/soc_button_array.c
@@ -92,11 +92,18 @@ soc_button_device_create(struct platform_device *pdev,
continue;
gpio = soc_button_lookup_gpio(&pdev->dev, info->acpi_index);
- if (gpio < 0 && gpio != -ENOENT) {
- error = gpio;
- goto err_free_mem;
- } else if (!gpio_is_valid(gpio)) {
- /* Skip GPIO if not present */
+ if (!gpio_is_valid(gpio)) {
+ /*
+ * Skip GPIO if not present. Note we deliberately
+ * ignore -EPROBE_DEFER errors here. On some devices
+ * Intel is using so called virtual GPIOs which are not
+ * GPIOs at all but some way for AML code to check some
+ * random status bits without need a custom opregion.
+ * In some cases the resources table we parse points to
+ * such a virtual GPIO, since these are not real GPIOs
+ * we do not have a driver for these so they will never
+ * show up, therefore we ignore -EPROBE_DEFER.
+ */
continue;
}
diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c
index 04fe43440a3c..2d8434b7b623 100644
--- a/drivers/input/mouse/elantech.c
+++ b/drivers/input/mouse/elantech.c
@@ -1827,31 +1827,6 @@ static int elantech_create_smbus(struct psmouse *psmouse,
leave_breadcrumbs);
}
-static bool elantech_use_host_notify(struct psmouse *psmouse,
- struct elantech_device_info *info)
-{
- if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version))
- return true;
-
- switch (info->bus) {
- case ETP_BUS_PS2_ONLY:
- /* expected case */
- break;
- case ETP_BUS_SMB_HST_NTFY_ONLY:
- case ETP_BUS_PS2_SMB_HST_NTFY:
- /* SMbus implementation is stable since 2018 */
- if (dmi_get_bios_year() >= 2018)
- return true;
- /* fall through */
- default:
- psmouse_dbg(psmouse,
- "Ignoring SMBus bus provider %d\n", info->bus);
- break;
- }
-
- return false;
-}
-
/**
* elantech_setup_smbus - called once the PS/2 devices are enumerated
* and decides to instantiate a SMBus InterTouch device.
@@ -1871,7 +1846,7 @@ static int elantech_setup_smbus(struct psmouse *psmouse,
* i2c_blacklist_pnp_ids.
* Old ICs are up to the user to decide.
*/
- if (!elantech_use_host_notify(psmouse, info) ||
+ if (!ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version) ||
psmouse_matches_pnp_id(psmouse, i2c_blacklist_pnp_ids))
return -ENXIO;
}
@@ -1891,6 +1866,34 @@ static int elantech_setup_smbus(struct psmouse *psmouse,
return 0;
}
+static bool elantech_use_host_notify(struct psmouse *psmouse,
+ struct elantech_device_info *info)
+{
+ if (ETP_NEW_IC_SMBUS_HOST_NOTIFY(info->fw_version))
+ return true;
+
+ switch (info->bus) {
+ case ETP_BUS_PS2_ONLY:
+ /* expected case */
+ break;
+ case ETP_BUS_SMB_ALERT_ONLY:
+ /* fall-through */
+ case ETP_BUS_PS2_SMB_ALERT:
+ psmouse_dbg(psmouse, "Ignoring SMBus provider through alert protocol.\n");
+ break;
+ case ETP_BUS_SMB_HST_NTFY_ONLY:
+ /* fall-through */
+ case ETP_BUS_PS2_SMB_HST_NTFY:
+ return true;
+ default:
+ psmouse_dbg(psmouse,
+ "Ignoring SMBus bus provider %d.\n",
+ info->bus);
+ }
+
+ return false;
+}
+
int elantech_init_smbus(struct psmouse *psmouse)
{
struct elantech_device_info info;
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index 772493b1f665..190b9974526b 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -146,7 +146,7 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev)
}
mutex_lock(&data->irq_mutex);
- bitmap_and(data->irq_status, data->irq_status, data->current_irq_mask,
+ bitmap_and(data->irq_status, data->irq_status, data->fn_irq_bits,
data->irq_count);
/*
* At this point, irq_status has all bits that are set in the
@@ -385,6 +385,8 @@ static int rmi_driver_set_irq_bits(struct rmi_device *rmi_dev,
bitmap_copy(data->current_irq_mask, data->new_irq_mask,
data->num_of_irq_regs);
+ bitmap_or(data->fn_irq_bits, data->fn_irq_bits, mask, data->irq_count);
+
error_unlock:
mutex_unlock(&data->irq_mutex);
return error;
@@ -398,6 +400,8 @@ static int rmi_driver_clear_irq_bits(struct rmi_device *rmi_dev,
struct device *dev = &rmi_dev->dev;
mutex_lock(&data->irq_mutex);
+ bitmap_andnot(data->fn_irq_bits,
+ data->fn_irq_bits, mask, data->irq_count);
bitmap_andnot(data->new_irq_mask,
data->current_irq_mask, mask, data->irq_count);
diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c
index f28a7158b2ef..bbf9ae9f3f0c 100644
--- a/drivers/input/rmi4/rmi_f11.c
+++ b/drivers/input/rmi4/rmi_f11.c
@@ -510,7 +510,6 @@ struct f11_data {
struct rmi_2d_sensor_platform_data sensor_pdata;
unsigned long *abs_mask;
unsigned long *rel_mask;
- unsigned long *result_bits;
};
enum f11_finger_state {
@@ -1057,7 +1056,7 @@ static int rmi_f11_initialize(struct rmi_function *fn)
/*
** init instance data, fill in values and create any sysfs files
*/
- f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 3,
+ f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 2,
GFP_KERNEL);
if (!f11)
return -ENOMEM;
@@ -1076,8 +1075,6 @@ static int rmi_f11_initialize(struct rmi_function *fn)
+ sizeof(struct f11_data));
f11->rel_mask = (unsigned long *)((char *)f11
+ sizeof(struct f11_data) + mask_size);
- f11->result_bits = (unsigned long *)((char *)f11
- + sizeof(struct f11_data) + mask_size * 2);
set_bit(fn->irq_pos, f11->abs_mask);
set_bit(fn->irq_pos + 1, f11->rel_mask);
@@ -1284,8 +1281,8 @@ static irqreturn_t rmi_f11_attention(int irq, void *ctx)
valid_bytes = f11->sensor.attn_size;
memcpy(f11->sensor.data_pkt, drvdata->attn_data.data,
valid_bytes);
- drvdata->attn_data.data += f11->sensor.attn_size;
- drvdata->attn_data.size -= f11->sensor.attn_size;
+ drvdata->attn_data.data += valid_bytes;
+ drvdata->attn_data.size -= valid_bytes;
} else {
error = rmi_read_block(rmi_dev,
data_base_addr, f11->sensor.data_pkt,
diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c
index d20a5d6780d1..7e97944f7616 100644
--- a/drivers/input/rmi4/rmi_f12.c
+++ b/drivers/input/rmi4/rmi_f12.c
@@ -55,6 +55,9 @@ struct f12_data {
const struct rmi_register_desc_item *data15;
u16 data15_offset;
+
+ unsigned long *abs_mask;
+ unsigned long *rel_mask;
};
static int rmi_f12_read_sensor_tuning(struct f12_data *f12)
@@ -209,8 +212,8 @@ static irqreturn_t rmi_f12_attention(int irq, void *ctx)
valid_bytes = sensor->attn_size;
memcpy(sensor->data_pkt, drvdata->attn_data.data,
valid_bytes);
- drvdata->attn_data.data += sensor->attn_size;
- drvdata->attn_data.size -= sensor->attn_size;
+ drvdata->attn_data.data += valid_bytes;
+ drvdata->attn_data.size -= valid_bytes;
} else {
retval = rmi_read_block(rmi_dev, f12->data_addr,
sensor->data_pkt, sensor->pkt_size);
@@ -291,9 +294,18 @@ static int rmi_f12_write_control_regs(struct rmi_function *fn)
static int rmi_f12_config(struct rmi_function *fn)
{
struct rmi_driver *drv = fn->rmi_dev->driver;
+ struct f12_data *f12 = dev_get_drvdata(&fn->dev);
+ struct rmi_2d_sensor *sensor;
int ret;
- drv->set_irq_bits(fn->rmi_dev, fn->irq_mask);
+ sensor = &f12->sensor;
+
+ if (!sensor->report_abs)
+ drv->clear_irq_bits(fn->rmi_dev, f12->abs_mask);
+ else
+ drv->set_irq_bits(fn->rmi_dev, f12->abs_mask);
+
+ drv->clear_irq_bits(fn->rmi_dev, f12->rel_mask);
ret = rmi_f12_write_control_regs(fn);
if (ret)
@@ -315,9 +327,12 @@ static int rmi_f12_probe(struct rmi_function *fn)
struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev);
struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev);
u16 data_offset = 0;
+ int mask_size;
rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s\n", __func__);
+ mask_size = BITS_TO_LONGS(drvdata->irq_count) * sizeof(unsigned long);
+
ret = rmi_read(fn->rmi_dev, query_addr, &buf);
if (ret < 0) {
dev_err(&fn->dev, "Failed to read general info register: %d\n",
@@ -332,10 +347,19 @@ static int rmi_f12_probe(struct rmi_function *fn)
return -ENODEV;
}
- f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data), GFP_KERNEL);
+ f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data) + mask_size * 2,
+ GFP_KERNEL);
if (!f12)
return -ENOMEM;
+ f12->abs_mask = (unsigned long *)((char *)f12
+ + sizeof(struct f12_data));
+ f12->rel_mask = (unsigned long *)((char *)f12
+ + sizeof(struct f12_data) + mask_size);
+
+ set_bit(fn->irq_pos, f12->abs_mask);
+ set_bit(fn->irq_pos + 1, f12->rel_mask);
+
f12->has_dribble = !!(buf & BIT(3));
if (fn->dev.of_node) {
diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c
index 710b02595486..897105b9a98b 100644
--- a/drivers/input/rmi4/rmi_f54.c
+++ b/drivers/input/rmi4/rmi_f54.c
@@ -359,7 +359,7 @@ static const struct vb2_ops rmi_f54_queue_ops = {
static const struct vb2_queue rmi_f54_queue = {
.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
.io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF | VB2_READ,
- .buf_struct_size = sizeof(struct vb2_buffer),
+ .buf_struct_size = sizeof(struct vb2_v4l2_buffer),
.ops = &rmi_f54_queue_ops,
.mem_ops = &vb2_vmalloc_memops,
.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC,
@@ -601,7 +601,7 @@ static int rmi_f54_config(struct rmi_function *fn)
{
struct rmi_driver *drv = fn->rmi_dev->driver;
- drv->set_irq_bits(fn->rmi_dev, fn->irq_mask);
+ drv->clear_irq_bits(fn->rmi_dev, fn->irq_mask);
return 0;
}
@@ -730,6 +730,7 @@ static void rmi_f54_remove(struct rmi_function *fn)
video_unregister_device(&f54->vdev);
v4l2_device_unregister(&f54->v4l2);
+ destroy_workqueue(f54->workqueue);
}
struct rmi_function_handler rmi_f54_handler = {
diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c
index 4b22d49a0f49..6bcffc930384 100644
--- a/drivers/input/touchscreen/cyttsp4_core.c
+++ b/drivers/input/touchscreen/cyttsp4_core.c
@@ -1990,11 +1990,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd)
/* get sysinfo */
md->si = &cd->sysinfo;
- if (!md->si) {
- dev_err(dev, "%s: Fail get sysinfo pointer from core p=%p\n",
- __func__, md->si);
- goto error_get_sysinfo;
- }
rc = cyttsp4_setup_input_device(cd);
if (rc)
@@ -2004,8 +1999,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd)
error_init_input:
input_free_device(md->input);
-error_get_sysinfo:
- input_set_drvdata(md->input, NULL);
error_alloc_failed:
dev_err(dev, "%s failed.\n", __func__);
return rc;
diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c
index 5178ea8b5f30..fb43aa708660 100644
--- a/drivers/input/touchscreen/goodix.c
+++ b/drivers/input/touchscreen/goodix.c
@@ -53,6 +53,7 @@ struct goodix_ts_data {
const char *cfg_name;
struct completion firmware_loading_complete;
unsigned long irq_flags;
+ unsigned int contact_size;
};
#define GOODIX_GPIO_INT_NAME "irq"
@@ -62,6 +63,7 @@ struct goodix_ts_data {
#define GOODIX_MAX_WIDTH 4096
#define GOODIX_INT_TRIGGER 1
#define GOODIX_CONTACT_SIZE 8
+#define GOODIX_MAX_CONTACT_SIZE 9
#define GOODIX_MAX_CONTACTS 10
#define GOODIX_CONFIG_MAX_LENGTH 240
@@ -144,6 +146,19 @@ static const struct dmi_system_id rotated_screen[] = {
{}
};
+static const struct dmi_system_id nine_bytes_report[] = {
+#if defined(CONFIG_DMI) && defined(CONFIG_X86)
+ {
+ .ident = "Lenovo YogaBook",
+ /* YB1-X91L/F and YB1-X90L/F */
+ .matches = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9")
+ }
+ },
+#endif
+ {}
+};
+
/**
* goodix_i2c_read - read data from a register of the i2c slave device.
*
@@ -249,7 +264,7 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data)
max_timeout = jiffies + msecs_to_jiffies(GOODIX_BUFFER_STATUS_TIMEOUT);
do {
error = goodix_i2c_read(ts->client, GOODIX_READ_COOR_ADDR,
- data, GOODIX_CONTACT_SIZE + 1);
+ data, ts->contact_size + 1);
if (error) {
dev_err(&ts->client->dev, "I2C transfer error: %d\n",
error);
@@ -262,12 +277,12 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data)
return -EPROTO;
if (touch_num > 1) {
- data += 1 + GOODIX_CONTACT_SIZE;
+ data += 1 + ts->contact_size;
error = goodix_i2c_read(ts->client,
GOODIX_READ_COOR_ADDR +
- 1 + GOODIX_CONTACT_SIZE,
+ 1 + ts->contact_size,
data,
- GOODIX_CONTACT_SIZE *
+ ts->contact_size *
(touch_num - 1));
if (error)
return error;
@@ -286,7 +301,7 @@ static int goodix_ts_read_input_report(struct goodix_ts_data *ts, u8 *data)
return 0;
}
-static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data)
+static void goodix_ts_report_touch_8b(struct goodix_ts_data *ts, u8 *coor_data)
{
int id = coor_data[0] & 0x0F;
int input_x = get_unaligned_le16(&coor_data[1]);
@@ -301,6 +316,21 @@ static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data)
input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w);
}
+static void goodix_ts_report_touch_9b(struct goodix_ts_data *ts, u8 *coor_data)
+{
+ int id = coor_data[1] & 0x0F;
+ int input_x = get_unaligned_le16(&coor_data[3]);
+ int input_y = get_unaligned_le16(&coor_data[5]);
+ int input_w = get_unaligned_le16(&coor_data[7]);
+
+ input_mt_slot(ts->input_dev, id);
+ input_mt_report_slot_state(ts->input_dev, MT_TOOL_FINGER, true);
+ touchscreen_report_pos(ts->input_dev, &ts->prop,
+ input_x, input_y, true);
+ input_report_abs(ts->input_dev, ABS_MT_TOUCH_MAJOR, input_w);
+ input_report_abs(ts->input_dev, ABS_MT_WIDTH_MAJOR, input_w);
+}
+
/**
* goodix_process_events - Process incoming events
*
@@ -311,7 +341,7 @@ static void goodix_ts_report_touch(struct goodix_ts_data *ts, u8 *coor_data)
*/
static void goodix_process_events(struct goodix_ts_data *ts)
{
- u8 point_data[1 + GOODIX_CONTACT_SIZE * GOODIX_MAX_CONTACTS];
+ u8 point_data[1 + GOODIX_MAX_CONTACT_SIZE * GOODIX_MAX_CONTACTS];
int touch_num;
int i;
@@ -326,8 +356,12 @@ static void goodix_process_events(struct goodix_ts_data *ts)
input_report_key(ts->input_dev, KEY_LEFTMETA, point_data[0] & BIT(4));
for (i = 0; i < touch_num; i++)
- goodix_ts_report_touch(ts,
- &point_data[1 + GOODIX_CONTACT_SIZE * i]);
+ if (ts->contact_size == 9)
+ goodix_ts_report_touch_9b(ts,
+ &point_data[1 + ts->contact_size * i]);
+ else
+ goodix_ts_report_touch_8b(ts,
+ &point_data[1 + ts->contact_size * i]);
input_mt_sync_frame(ts->input_dev);
input_sync(ts->input_dev);
@@ -730,6 +764,13 @@ static int goodix_configure_dev(struct goodix_ts_data *ts)
"Applying '180 degrees rotated screen' quirk\n");
}
+ if (dmi_check_system(nine_bytes_report)) {
+ ts->contact_size = 9;
+
+ dev_dbg(&ts->client->dev,
+ "Non-standard 9-bytes report format quirk\n");
+ }
+
error = input_mt_init_slots(ts->input_dev, ts->max_touch_num,
INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
if (error) {
@@ -810,6 +851,7 @@ static int goodix_ts_probe(struct i2c_client *client,
ts->client = client;
i2c_set_clientdata(client, ts);
init_completion(&ts->firmware_loading_complete);
+ ts->contact_size = GOODIX_CONTACT_SIZE;
error = goodix_get_gpio_config(ts);
if (error)
diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c
index 34923399ece4..1139714e72e2 100644
--- a/drivers/input/touchscreen/st1232.c
+++ b/drivers/input/touchscreen/st1232.c
@@ -81,8 +81,10 @@ static int st1232_ts_read_data(struct st1232_ts_data *ts)
for (i = 0, y = 0; i < ts->chip_info->max_fingers; i++, y += 3) {
finger[i].is_valid = buf[i + y] >> 7;
if (finger[i].is_valid) {
- finger[i].x = ((buf[i + y] & 0x0070) << 4) | buf[i + 1];
- finger[i].y = ((buf[i + y] & 0x0007) << 8) | buf[i + 2];
+ finger[i].x = ((buf[i + y] & 0x0070) << 4) |
+ buf[i + y + 1];
+ finger[i].y = ((buf[i + y] & 0x0007) << 8) |
+ buf[i + y + 2];
/* st1232 includes a z-axis / touch strength */
if (ts->chip_info->have_z)
diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 7b971228df38..c498796adc07 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -405,8 +405,12 @@ void icc_set_tag(struct icc_path *path, u32 tag)
if (!path)
return;
+ mutex_lock(&icc_lock);
+
for (i = 0; i < path->num_nodes; i++)
path->reqs[i].tag = tag;
+
+ mutex_unlock(&icc_lock);
}
EXPORT_SYMBOL_GPL(icc_set_tag);
diff --git a/drivers/interconnect/qcom/qcs404.c b/drivers/interconnect/qcom/qcs404.c
index 910081d6ddc0..b4966d8f3348 100644
--- a/drivers/interconnect/qcom/qcs404.c
+++ b/drivers/interconnect/qcom/qcs404.c
@@ -433,7 +433,8 @@ static int qnoc_probe(struct platform_device *pdev)
if (!qp)
return -ENOMEM;
- data = devm_kcalloc(dev, num_nodes, sizeof(*node), GFP_KERNEL);
+ data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes),
+ GFP_KERNEL);
if (!data)
return -ENOMEM;
diff --git a/drivers/interconnect/qcom/sdm845.c b/drivers/interconnect/qcom/sdm845.c
index 57955596bb59..502a6c22b41e 100644
--- a/drivers/interconnect/qcom/sdm845.c
+++ b/drivers/interconnect/qcom/sdm845.c
@@ -790,7 +790,8 @@ static int qnoc_probe(struct platform_device *pdev)
if (!qp)
return -ENOMEM;
- data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
+ data = devm_kzalloc(&pdev->dev, struct_size(data, nodes, num_nodes),
+ GFP_KERNEL);
if (!data)
return -ENOMEM;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 2369b8af81f3..dd555078258c 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -583,7 +583,8 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
retry:
type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK;
devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK;
- pasid = PPR_PASID(*(u64 *)&event[0]);
+ pasid = (event[0] & EVENT_DOMID_MASK_HI) |
+ (event[1] & EVENT_DOMID_MASK_LO);
flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK;
address = (u64)(((u64)event[3]) << 32) | event[2];
@@ -616,7 +617,7 @@ retry:
address, flags);
break;
case EVENT_TYPE_PAGE_TAB_ERR:
- dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
+ dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
pasid, address, flags);
break;
@@ -1463,6 +1464,7 @@ static void free_pagetable(struct protection_domain *domain)
* to 64 bits.
*/
static bool increase_address_space(struct protection_domain *domain,
+ unsigned long address,
gfp_t gfp)
{
unsigned long flags;
@@ -1471,8 +1473,8 @@ static bool increase_address_space(struct protection_domain *domain,
spin_lock_irqsave(&domain->lock, flags);
- if (WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
- /* address space already 64 bit large */
+ if (address <= PM_LEVEL_SIZE(domain->mode) ||
+ WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL))
goto out;
pte = (void *)get_zeroed_page(gfp);
@@ -1505,7 +1507,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
BUG_ON(!is_power_of_2(page_size));
while (address > PM_LEVEL_SIZE(domain->mode))
- *updated = increase_address_space(domain, gfp) || *updated;
+ *updated = increase_address_space(domain, address, gfp) || *updated;
level = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
diff --git a/drivers/iommu/amd_iommu_quirks.c b/drivers/iommu/amd_iommu_quirks.c
index c235f79b7a20..5120ce4fdce3 100644
--- a/drivers/iommu/amd_iommu_quirks.c
+++ b/drivers/iommu/amd_iommu_quirks.c
@@ -74,6 +74,19 @@ static const struct dmi_system_id ivrs_quirks[] __initconst = {
.driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495],
},
{
+ /*
+ * Acer Aspire A315-41 requires the very same workaround as
+ * Dell Latitude 5495
+ */
+ .callback = ivrs_ioapic_quirk_cb,
+ .ident = "Acer Aspire A315-41",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Aspire A315-41"),
+ },
+ .driver_data = (void *)&ivrs_ioapic_quirks[DELL_LATITUDE_5495],
+ },
+ {
.callback = ivrs_ioapic_quirk_cb,
.ident = "Lenovo ideapad 330S-15ARR",
.matches = {
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index c9c1612d52e0..17bd5a349119 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -130,8 +130,8 @@
#define EVENT_TYPE_INV_PPR_REQ 0x9
#define EVENT_DEVID_MASK 0xffff
#define EVENT_DEVID_SHIFT 0
-#define EVENT_DOMID_MASK 0xffff
-#define EVENT_DOMID_SHIFT 0
+#define EVENT_DOMID_MASK_LO 0xffff
+#define EVENT_DOMID_MASK_HI 0xf0000
#define EVENT_FLAGS_MASK 0xfff
#define EVENT_FLAGS_SHIFT 0x10
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index b18aac4c105e..7c503a6bc585 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -812,6 +812,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
return 0;
out_clear_smmu:
+ __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
smmu_domain->smmu = NULL;
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 3f974919d3bd..6db6d969e31c 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2794,7 +2794,7 @@ static int identity_mapping(struct device *dev)
struct device_domain_info *info;
info = dev->archdata.iommu;
- if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
+ if (info && info != DUMMY_DEVICE_DOMAIN_INFO && info != DEFER_DEVICE_DOMAIN_INFO)
return (info->domain == si_domain);
return 0;
@@ -3471,7 +3471,7 @@ static bool iommu_need_mapping(struct device *dev)
if (dev->coherent_dma_mask && dev->coherent_dma_mask < dma_mask)
dma_mask = dev->coherent_dma_mask;
- if (dma_mask >= dma_get_required_mask(dev))
+ if (dma_mask >= dma_direct_get_required_mask(dev))
return false;
/*
@@ -3775,6 +3775,13 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele
return nelems;
}
+static u64 intel_get_required_mask(struct device *dev)
+{
+ if (!iommu_need_mapping(dev))
+ return dma_direct_get_required_mask(dev);
+ return DMA_BIT_MASK(32);
+}
+
static const struct dma_map_ops intel_dma_ops = {
.alloc = intel_alloc_coherent,
.free = intel_free_coherent,
@@ -3787,6 +3794,7 @@ static const struct dma_map_ops intel_dma_ops = {
.dma_supported = dma_direct_supported,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
+ .get_required_mask = intel_get_required_mask,
};
static void
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 4c91359057c5..ca51036aa53c 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -166,6 +166,9 @@
#define ARM_MALI_LPAE_TTBR_READ_INNER BIT(2)
#define ARM_MALI_LPAE_TTBR_SHARE_OUTER BIT(4)
+#define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL
+#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
+
/* IOPTE accessors */
#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
@@ -1015,27 +1018,56 @@ arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
static struct io_pgtable *
arm_mali_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
{
- struct io_pgtable *iop;
+ struct arm_lpae_io_pgtable *data;
- if (cfg->ias != 48 || cfg->oas > 40)
+ /* No quirks for Mali (hopefully) */
+ if (cfg->quirks)
+ return NULL;
+
+ if (cfg->ias > 48 || cfg->oas > 40)
return NULL;
cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G);
- iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie);
- if (iop) {
- u64 mair, ttbr;
- /* Copy values as union fields overlap */
- mair = cfg->arm_lpae_s1_cfg.mair[0];
- ttbr = cfg->arm_lpae_s1_cfg.ttbr[0];
+ data = arm_lpae_alloc_pgtable(cfg);
+ if (!data)
+ return NULL;
- cfg->arm_mali_lpae_cfg.memattr = mair;
- cfg->arm_mali_lpae_cfg.transtab = ttbr |
- ARM_MALI_LPAE_TTBR_READ_INNER |
- ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+ /* Mali seems to need a full 4-level table regardless of IAS */
+ if (data->levels < ARM_LPAE_MAX_LEVELS) {
+ data->levels = ARM_LPAE_MAX_LEVELS;
+ data->pgd_size = sizeof(arm_lpae_iopte);
}
+ /*
+ * MEMATTR: Mali has no actual notion of a non-cacheable type, so the
+ * best we can do is mimic the out-of-tree driver and hope that the
+ * "implementation-defined caching policy" is good enough. Similarly,
+ * we'll use it for the sake of a valid attribute for our 'device'
+ * index, although callers should never request that in practice.
+ */
+ cfg->arm_mali_lpae_cfg.memattr =
+ (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) |
+ (ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) |
+ (ARM_MALI_LPAE_MEMATTR_IMP_DEF
+ << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV));
- return iop;
+ data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+ if (!data->pgd)
+ goto out_free_data;
+
+ /* Ensure the empty pgd is visible before TRANSTAB can be written */
+ wmb();
+
+ cfg->arm_mali_lpae_cfg.transtab = virt_to_phys(data->pgd) |
+ ARM_MALI_LPAE_TTBR_READ_INNER |
+ ARM_MALI_LPAE_TTBR_ADRMODE_TABLE;
+ return &data->iop;
+
+out_free_data:
+ kfree(data);
+ return NULL;
}
struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 9da8309f7170..2639fc718117 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1086,8 +1086,6 @@ static int ipmmu_probe(struct platform_device *pdev)
mmu->num_ctx = min(IPMMU_CTX_MAX, mmu->features->number_of_contexts);
- irq = platform_get_irq(pdev, 0);
-
/*
* Determine if this IPMMU instance is a root device by checking for
* the lack of has_cache_leaf_nodes flag or renesas,ipmmu-main property.
@@ -1106,10 +1104,9 @@ static int ipmmu_probe(struct platform_device *pdev)
/* Root devices have mandatory IRQs */
if (ipmmu_is_root(mmu)) {
- if (irq < 0) {
- dev_err(&pdev->dev, "no IRQ found\n");
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
return irq;
- }
ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0,
dev_name(&pdev->dev), mmu);
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 26290f310f90..4dcbf68dfda4 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -100,6 +100,7 @@ struct rk_iommu {
struct device *dev;
void __iomem **bases;
int num_mmu;
+ int num_irq;
struct clk_bulk_data *clocks;
int num_clocks;
bool reset_disabled;
@@ -1136,7 +1137,7 @@ static int rk_iommu_probe(struct platform_device *pdev)
struct rk_iommu *iommu;
struct resource *res;
int num_res = pdev->num_resources;
- int err, i, irq;
+ int err, i;
iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
if (!iommu)
@@ -1163,6 +1164,10 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (iommu->num_mmu == 0)
return PTR_ERR(iommu->bases[0]);
+ iommu->num_irq = platform_irq_count(pdev);
+ if (iommu->num_irq < 0)
+ return iommu->num_irq;
+
iommu->reset_disabled = device_property_read_bool(dev,
"rockchip,disable-mmu-reset");
@@ -1219,8 +1224,9 @@ static int rk_iommu_probe(struct platform_device *pdev)
pm_runtime_enable(dev);
- i = 0;
- while ((irq = platform_get_irq(pdev, i++)) != -ENXIO) {
+ for (i = 0; i < iommu->num_irq; i++) {
+ int irq = platform_get_irq(pdev, i);
+
if (irq < 0)
return irq;
@@ -1245,10 +1251,13 @@ err_unprepare_clocks:
static void rk_iommu_shutdown(struct platform_device *pdev)
{
struct rk_iommu *iommu = platform_get_drvdata(pdev);
- int i = 0, irq;
+ int i;
+
+ for (i = 0; i < iommu->num_irq; i++) {
+ int irq = platform_get_irq(pdev, i);
- while ((irq = platform_get_irq(pdev, i++)) != -ENXIO)
devm_free_irq(iommu->dev, irq, iommu);
+ }
pm_runtime_force_suspend(&pdev->dev);
}
diff --git a/drivers/irqchip/irq-al-fic.c b/drivers/irqchip/irq-al-fic.c
index 1a57cee3efab..0b0a73739756 100644
--- a/drivers/irqchip/irq-al-fic.c
+++ b/drivers/irqchip/irq-al-fic.c
@@ -15,6 +15,7 @@
/* FIC Registers */
#define AL_FIC_CAUSE 0x00
+#define AL_FIC_SET_CAUSE 0x08
#define AL_FIC_MASK 0x10
#define AL_FIC_CONTROL 0x28
@@ -126,6 +127,16 @@ static void al_fic_irq_handler(struct irq_desc *desc)
chained_irq_exit(irqchip, desc);
}
+static int al_fic_irq_retrigger(struct irq_data *data)
+{
+ struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+ struct al_fic *fic = gc->private;
+
+ writel_relaxed(BIT(data->hwirq), fic->base + AL_FIC_SET_CAUSE);
+
+ return 1;
+}
+
static int al_fic_register(struct device_node *node,
struct al_fic *fic)
{
@@ -159,6 +170,7 @@ static int al_fic_register(struct device_node *node,
gc->chip_types->chip.irq_unmask = irq_gc_mask_clr_bit;
gc->chip_types->chip.irq_ack = irq_gc_ack_clr_bit;
gc->chip_types->chip.irq_set_type = al_fic_irq_set_type;
+ gc->chip_types->chip.irq_retrigger = al_fic_irq_retrigger;
gc->chip_types->chip.flags = IRQCHIP_SKIP_SET_WAKE;
gc->private = fic;
diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c
index 6acad2ea0fb3..29333497ba10 100644
--- a/drivers/irqchip/irq-atmel-aic5.c
+++ b/drivers/irqchip/irq-atmel-aic5.c
@@ -313,6 +313,7 @@ static void __init sama5d3_aic_irq_fixup(void)
static const struct of_device_id aic5_irq_fixups[] __initconst = {
{ .compatible = "atmel,sama5d3", .data = sama5d3_aic_irq_fixup },
{ .compatible = "atmel,sama5d4", .data = sama5d3_aic_irq_fixup },
+ { .compatible = "microchip,sam9x60", .data = sama5d3_aic_irq_fixup },
{ /* sentinel */ },
};
@@ -390,3 +391,12 @@ static int __init sama5d4_aic5_of_init(struct device_node *node,
return aic5_of_init(node, parent, NR_SAMA5D4_IRQS);
}
IRQCHIP_DECLARE(sama5d4_aic5, "atmel,sama5d4-aic", sama5d4_aic5_of_init);
+
+#define NR_SAM9X60_IRQS 50
+
+static int __init sam9x60_aic5_of_init(struct device_node *node,
+ struct device_node *parent)
+{
+ return aic5_of_init(node, parent, NR_SAM9X60_IRQS);
+}
+IRQCHIP_DECLARE(sam9x60_aic5, "microchip,sam9x60-aic", sam9x60_aic5_of_init);
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 62e54f1a248b..787e8eec9a7f 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -175,6 +175,22 @@ static DEFINE_IDA(its_vpeid_ida);
#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
#define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K)
+static u16 get_its_list(struct its_vm *vm)
+{
+ struct its_node *its;
+ unsigned long its_list = 0;
+
+ list_for_each_entry(its, &its_nodes, entry) {
+ if (!its->is_v4)
+ continue;
+
+ if (vm->vlpi_count[its->list_nr])
+ __set_bit(its->list_nr, &its_list);
+ }
+
+ return (u16)its_list;
+}
+
static struct its_collection *dev_event_to_col(struct its_device *its_dev,
u32 event)
{
@@ -976,17 +992,15 @@ static void its_send_vmapp(struct its_node *its,
static void its_send_vmovp(struct its_vpe *vpe)
{
- struct its_cmd_desc desc;
+ struct its_cmd_desc desc = {};
struct its_node *its;
unsigned long flags;
int col_id = vpe->col_idx;
desc.its_vmovp_cmd.vpe = vpe;
- desc.its_vmovp_cmd.its_list = (u16)its_list_map;
if (!its_list_map) {
its = list_first_entry(&its_nodes, struct its_node, entry);
- desc.its_vmovp_cmd.seq_num = 0;
desc.its_vmovp_cmd.col = &its->collections[col_id];
its_send_single_vcommand(its, its_build_vmovp_cmd, &desc);
return;
@@ -1003,6 +1017,7 @@ static void its_send_vmovp(struct its_vpe *vpe)
raw_spin_lock_irqsave(&vmovp_lock, flags);
desc.its_vmovp_cmd.seq_num = vmovp_seq_num++;
+ desc.its_vmovp_cmd.its_list = get_its_list(vpe->its_vm);
/* Emit VMOVPs */
list_for_each_entry(its, &its_nodes, entry) {
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 422664ac5f53..6bb1f682f78b 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -59,7 +59,7 @@ static struct gic_chip_data gic_data __read_mostly;
static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
#define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
-#define GIC_LINE_NR max(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
+#define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
#define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer)
/*
@@ -87,6 +87,15 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
*/
static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis);
+/*
+ * Global static key controlling whether an update to PMR allowing more
+ * interrupts requires to be propagated to the redistributor (DSB SY).
+ * And this needs to be exported for modules to be able to enable
+ * interrupts...
+ */
+DEFINE_STATIC_KEY_FALSE(gic_pmr_sync);
+EXPORT_SYMBOL(gic_pmr_sync);
+
/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
static refcount_t *ppi_nmi_refs;
@@ -1502,6 +1511,17 @@ static void gic_enable_nmi_support(void)
for (i = 0; i < gic_data.ppi_nr; i++)
refcount_set(&ppi_nmi_refs[i], 0);
+ /*
+ * Linux itself doesn't use 1:N distribution, so has no need to
+ * set PMHE. The only reason to have it set is if EL3 requires it
+ * (and we can't change it).
+ */
+ if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK)
+ static_branch_enable(&gic_pmr_sync);
+
+ pr_info("%s ICC_PMR_EL1 synchronisation\n",
+ static_branch_unlikely(&gic_pmr_sync) ? "Forcing" : "Relaxing");
+
static_branch_enable(&supports_pseudo_nmis);
if (static_branch_likely(&supports_deactivate_key))
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index c72c036aea76..7d0a12fe2714 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -97,7 +97,7 @@ static inline void plic_irq_toggle(const struct cpumask *mask,
}
}
-static void plic_irq_enable(struct irq_data *d)
+static void plic_irq_unmask(struct irq_data *d)
{
unsigned int cpu = cpumask_any_and(irq_data_get_affinity_mask(d),
cpu_online_mask);
@@ -106,7 +106,7 @@ static void plic_irq_enable(struct irq_data *d)
plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1);
}
-static void plic_irq_disable(struct irq_data *d)
+static void plic_irq_mask(struct irq_data *d)
{
plic_irq_toggle(cpu_possible_mask, d->hwirq, 0);
}
@@ -125,10 +125,8 @@ static int plic_set_affinity(struct irq_data *d,
if (cpu >= nr_cpu_ids)
return -EINVAL;
- if (!irqd_irq_disabled(d)) {
- plic_irq_toggle(cpu_possible_mask, d->hwirq, 0);
- plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1);
- }
+ plic_irq_toggle(cpu_possible_mask, d->hwirq, 0);
+ plic_irq_toggle(cpumask_of(cpu), d->hwirq, 1);
irq_data_update_effective_affinity(d, cpumask_of(cpu));
@@ -136,14 +134,18 @@ static int plic_set_affinity(struct irq_data *d,
}
#endif
+static void plic_irq_eoi(struct irq_data *d)
+{
+ struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
+
+ writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+}
+
static struct irq_chip plic_chip = {
.name = "SiFive PLIC",
- /*
- * There is no need to mask/unmask PLIC interrupts. They are "masked"
- * by reading claim and "unmasked" when writing it back.
- */
- .irq_enable = plic_irq_enable,
- .irq_disable = plic_irq_disable,
+ .irq_mask = plic_irq_mask,
+ .irq_unmask = plic_irq_unmask,
+ .irq_eoi = plic_irq_eoi,
#ifdef CONFIG_SMP
.irq_set_affinity = plic_set_affinity,
#endif
@@ -152,7 +154,7 @@ static struct irq_chip plic_chip = {
static int plic_irqdomain_map(struct irq_domain *d, unsigned int irq,
irq_hw_number_t hwirq)
{
- irq_set_chip_and_handler(irq, &plic_chip, handle_simple_irq);
+ irq_set_chip_and_handler(irq, &plic_chip, handle_fasteoi_irq);
irq_set_chip_data(irq, NULL);
irq_set_noprobe(irq);
return 0;
@@ -188,7 +190,6 @@ static void plic_handle_irq(struct pt_regs *regs)
hwirq);
else
generic_handle_irq(irq);
- writel(hwirq, claim);
}
csr_set(sie, SIE_SEIE);
}
@@ -251,8 +252,8 @@ static int __init plic_init(struct device_node *node,
continue;
}
- /* skip context holes */
- if (parent.args[0] == -1)
+ /* skip contexts other than supervisor external interrupt */
+ if (parent.args[0] != IRQ_S_EXT)
continue;
hartid = plic_find_hart_id(parent.np);
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index c92b405b7646..ba8619524231 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -744,7 +744,7 @@ capi_poll(struct file *file, poll_table *wait)
poll_wait(file, &(cdev->recvwait), wait);
mask = EPOLLOUT | EPOLLWRNORM;
- if (!skb_queue_empty(&cdev->recvqueue))
+ if (!skb_queue_empty_lockless(&cdev->recvqueue))
mask |= EPOLLIN | EPOLLRDNORM;
return mask;
}
diff --git a/drivers/macintosh/windfarm_cpufreq_clamp.c b/drivers/macintosh/windfarm_cpufreq_clamp.c
index 705c6200814b..7b726f00f183 100644
--- a/drivers/macintosh/windfarm_cpufreq_clamp.c
+++ b/drivers/macintosh/windfarm_cpufreq_clamp.c
@@ -18,7 +18,7 @@
static int clamped;
static struct wf_control *clamp_control;
-static struct dev_pm_qos_request qos_req;
+static struct freq_qos_request qos_req;
static unsigned int min_freq, max_freq;
static int clamp_set(struct wf_control *ct, s32 value)
@@ -35,7 +35,7 @@ static int clamp_set(struct wf_control *ct, s32 value)
}
clamped = value;
- return dev_pm_qos_update_request(&qos_req, freq);
+ return freq_qos_update_request(&qos_req, freq);
}
static int clamp_get(struct wf_control *ct, s32 *value)
@@ -77,38 +77,44 @@ static int __init wf_cpufreq_clamp_init(void)
min_freq = policy->cpuinfo.min_freq;
max_freq = policy->cpuinfo.max_freq;
+
+ ret = freq_qos_add_request(&policy->constraints, &qos_req, FREQ_QOS_MAX,
+ max_freq);
+
cpufreq_cpu_put(policy);
+ if (ret < 0) {
+ pr_err("%s: Failed to add freq constraint (%d)\n", __func__,
+ ret);
+ return ret;
+ }
+
dev = get_cpu_device(0);
if (unlikely(!dev)) {
pr_warn("%s: No cpu device for cpu0\n", __func__);
- return -ENODEV;
+ ret = -ENODEV;
+ goto fail;
}
clamp = kmalloc(sizeof(struct wf_control), GFP_KERNEL);
- if (clamp == NULL)
- return -ENOMEM;
-
- ret = dev_pm_qos_add_request(dev, &qos_req, DEV_PM_QOS_MAX_FREQUENCY,
- max_freq);
- if (ret < 0) {
- pr_err("%s: Failed to add freq constraint (%d)\n", __func__,
- ret);
- goto free;
+ if (clamp == NULL) {
+ ret = -ENOMEM;
+ goto fail;
}
clamp->ops = &clamp_ops;
clamp->name = "cpufreq-clamp";
ret = wf_register_control(clamp);
if (ret)
- goto fail;
+ goto free;
+
clamp_control = clamp;
return 0;
- fail:
- dev_pm_qos_remove_request(&qos_req);
free:
kfree(clamp);
+ fail:
+ freq_qos_remove_request(&qos_req);
return ret;
}
@@ -116,7 +122,7 @@ static void __exit wf_cpufreq_clamp_exit(void)
{
if (clamp_control) {
wf_unregister_control(clamp_control);
- dev_pm_qos_remove_request(&qos_req);
+ freq_qos_remove_request(&qos_req);
}
}
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index aa98953f4462..d6d5ab23c088 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -38,9 +38,9 @@ config MD_AUTODETECT
default y
---help---
If you say Y here, then the kernel will try to autodetect raid
- arrays as part of its boot process.
+ arrays as part of its boot process.
- If you don't use raid and say Y, this autodetection can cause
+ If you don't use raid and say Y, this autodetection can cause
a several-second delay in the boot time due to various
synchronisation steps that are part of this step.
@@ -290,7 +290,7 @@ config DM_SNAPSHOT
depends on BLK_DEV_DM
select DM_BUFIO
---help---
- Allow volume managers to take writable snapshots of a device.
+ Allow volume managers to take writable snapshots of a device.
config DM_THIN_PROVISIONING
tristate "Thin provisioning target"
@@ -298,7 +298,7 @@ config DM_THIN_PROVISIONING
select DM_PERSISTENT_DATA
select DM_BIO_PRISON
---help---
- Provides thin provisioning and snapshots that share a data store.
+ Provides thin provisioning and snapshots that share a data store.
config DM_CACHE
tristate "Cache target (EXPERIMENTAL)"
@@ -307,23 +307,23 @@ config DM_CACHE
select DM_PERSISTENT_DATA
select DM_BIO_PRISON
---help---
- dm-cache attempts to improve performance of a block device by
- moving frequently used data to a smaller, higher performance
- device. Different 'policy' plugins can be used to change the
- algorithms used to select which blocks are promoted, demoted,
- cleaned etc. It supports writeback and writethrough modes.
+ dm-cache attempts to improve performance of a block device by
+ moving frequently used data to a smaller, higher performance
+ device. Different 'policy' plugins can be used to change the
+ algorithms used to select which blocks are promoted, demoted,
+ cleaned etc. It supports writeback and writethrough modes.
config DM_CACHE_SMQ
tristate "Stochastic MQ Cache Policy (EXPERIMENTAL)"
depends on DM_CACHE
default y
---help---
- A cache policy that uses a multiqueue ordered by recent hits
- to select which blocks should be promoted and demoted.
- This is meant to be a general purpose policy. It prioritises
- reads over writes. This SMQ policy (vs MQ) offers the promise
- of less memory utilization, improved performance and increased
- adaptability in the face of changing workloads.
+ A cache policy that uses a multiqueue ordered by recent hits
+ to select which blocks should be promoted and demoted.
+ This is meant to be a general purpose policy. It prioritises
+ reads over writes. This SMQ policy (vs MQ) offers the promise
+ of less memory utilization, improved performance and increased
+ adaptability in the face of changing workloads.
config DM_WRITECACHE
tristate "Writecache target"
@@ -343,9 +343,9 @@ config DM_ERA
select DM_PERSISTENT_DATA
select DM_BIO_PRISON
---help---
- dm-era tracks which parts of a block device are written to
- over time. Useful for maintaining cache coherency when using
- vendor snapshots.
+ dm-era tracks which parts of a block device are written to
+ over time. Useful for maintaining cache coherency when using
+ vendor snapshots.
config DM_CLONE
tristate "Clone target (EXPERIMENTAL)"
@@ -353,20 +353,20 @@ config DM_CLONE
default n
select DM_PERSISTENT_DATA
---help---
- dm-clone produces a one-to-one copy of an existing, read-only source
- device into a writable destination device. The cloned device is
- visible/mountable immediately and the copy of the source device to the
- destination device happens in the background, in parallel with user
- I/O.
+ dm-clone produces a one-to-one copy of an existing, read-only source
+ device into a writable destination device. The cloned device is
+ visible/mountable immediately and the copy of the source device to the
+ destination device happens in the background, in parallel with user
+ I/O.
- If unsure, say N.
+ If unsure, say N.
config DM_MIRROR
tristate "Mirror target"
depends on BLK_DEV_DM
---help---
- Allow volume managers to mirror logical volumes, also
- needed for live data migration tools such as 'pvmove'.
+ Allow volume managers to mirror logical volumes, also
+ needed for live data migration tools such as 'pvmove'.
config DM_LOG_USERSPACE
tristate "Mirror userspace logging"
@@ -483,7 +483,7 @@ config DM_FLAKEY
tristate "Flakey target"
depends on BLK_DEV_DM
---help---
- A target that intermittently fails I/O for debugging purposes.
+ A target that intermittently fails I/O for debugging purposes.
config DM_VERITY
tristate "Verity target support"
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index d26b35195825..fd714628da6a 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -5,5 +5,3 @@ obj-$(CONFIG_BCACHE) += bcache.o
bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\
io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
util.o writeback.o
-
-CFLAGS_request.o += -Iblock
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 6f776823b9ba..a1df0d95151c 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -377,7 +377,10 @@ retry_invalidate:
if (!fifo_full(&ca->free_inc))
goto retry_invalidate;
- bch_prio_write(ca);
+ if (bch_prio_write(ca, false) < 0) {
+ ca->invalidate_needs_gc = 1;
+ wake_up_gc(ca->set);
+ }
}
}
out:
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 013e35a9e317..9198c1b480d9 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -582,6 +582,7 @@ struct cache_set {
*/
wait_queue_head_t btree_cache_wait;
struct task_struct *btree_cache_alloc_lock;
+ spinlock_t btree_cannibalize_lock;
/*
* When we free a btree node, we increment the gen of the bucket the
@@ -723,6 +724,7 @@ struct cache_set {
unsigned int gc_always_rewrite:1;
unsigned int shrinker_disabled:1;
unsigned int copy_gc_enabled:1;
+ unsigned int idle_max_writeback_rate_enabled:1;
#define BUCKET_HASH_BITS 12
struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS];
@@ -977,7 +979,7 @@ bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...);
-void bch_prio_write(struct cache *ca);
+int bch_prio_write(struct cache *ca, bool wait);
void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent);
extern struct workqueue_struct *bcache_wq;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 08768796b543..cffcdc9feefb 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -155,6 +155,7 @@ int __bch_keylist_realloc(struct keylist *l, unsigned int u64s)
return 0;
}
+/* Pop the top key of keylist by pointing l->top to its previous key */
struct bkey *bch_keylist_pop(struct keylist *l)
{
struct bkey *k = l->keys;
@@ -168,6 +169,7 @@ struct bkey *bch_keylist_pop(struct keylist *l)
return l->top = k;
}
+/* Pop the bottom key of keylist and update l->top_p */
void bch_keylist_pop_front(struct keylist *l)
{
l->top_p -= bkey_u64s(l->keys);
@@ -309,7 +311,6 @@ void bch_btree_keys_free(struct btree_keys *b)
t->tree = NULL;
t->data = NULL;
}
-EXPORT_SYMBOL(bch_btree_keys_free);
int bch_btree_keys_alloc(struct btree_keys *b,
unsigned int page_order,
@@ -342,7 +343,6 @@ err:
bch_btree_keys_free(b);
return -ENOMEM;
}
-EXPORT_SYMBOL(bch_btree_keys_alloc);
void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
bool *expensive_debug_checks)
@@ -361,7 +361,6 @@ void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
* any more.
*/
}
-EXPORT_SYMBOL(bch_btree_keys_init);
/* Binary tree stuff for auxiliary search trees */
@@ -678,7 +677,6 @@ void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic)
bch_bset_build_unwritten_tree(b);
}
-EXPORT_SYMBOL(bch_bset_init_next);
/*
* Build auxiliary binary tree 'struct bset_tree *t', this tree is used to
@@ -732,7 +730,6 @@ void bch_bset_build_written_tree(struct btree_keys *b)
j = inorder_next(j, t->size))
make_bfloat(t, j);
}
-EXPORT_SYMBOL(bch_bset_build_written_tree);
/* Insert */
@@ -780,7 +777,6 @@ fix_right: do {
j = j * 2 + 1;
} while (j < t->size);
}
-EXPORT_SYMBOL(bch_bset_fix_invalidated_key);
static void bch_bset_fix_lookup_table(struct btree_keys *b,
struct bset_tree *t,
@@ -855,7 +851,6 @@ bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r)
return b->ops->key_merge(b, l, r);
}
-EXPORT_SYMBOL(bch_bkey_try_merge);
void bch_bset_insert(struct btree_keys *b, struct bkey *where,
struct bkey *insert)
@@ -875,7 +870,6 @@ void bch_bset_insert(struct btree_keys *b, struct bkey *where,
bkey_copy(where, insert);
bch_bset_fix_lookup_table(b, t, where);
}
-EXPORT_SYMBOL(bch_bset_insert);
unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
struct bkey *replace_key)
@@ -931,7 +925,6 @@ copy: bkey_copy(m, k);
merged:
return status;
}
-EXPORT_SYMBOL(bch_btree_insert_key);
/* Lookup */
@@ -1077,7 +1070,6 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
return i.l;
}
-EXPORT_SYMBOL(__bch_bset_search);
/* Btree iterator */
@@ -1132,7 +1124,6 @@ struct bkey *bch_btree_iter_init(struct btree_keys *b,
{
return __bch_btree_iter_init(b, iter, search, b->set);
}
-EXPORT_SYMBOL(bch_btree_iter_init);
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
btree_iter_cmp_fn *cmp)
@@ -1165,7 +1156,6 @@ struct bkey *bch_btree_iter_next(struct btree_iter *iter)
return __bch_btree_iter_next(iter, btree_iter_cmp);
}
-EXPORT_SYMBOL(bch_btree_iter_next);
struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
struct btree_keys *b, ptr_filter_fn fn)
@@ -1196,7 +1186,6 @@ int bch_bset_sort_state_init(struct bset_sort_state *state,
return mempool_init_page_pool(&state->pool, 1, page_order);
}
-EXPORT_SYMBOL(bch_bset_sort_state_init);
static void btree_mergesort(struct btree_keys *b, struct bset *out,
struct btree_iter *iter,
@@ -1313,7 +1302,6 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
}
-EXPORT_SYMBOL(bch_btree_sort_partial);
void bch_btree_sort_and_fix_extents(struct btree_keys *b,
struct btree_iter *iter,
@@ -1366,7 +1354,6 @@ void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
out:
bch_bset_build_written_tree(b);
}
-EXPORT_SYMBOL(bch_btree_sort_lazy);
void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
{
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index ba434d9ac720..14d6c33b0957 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -543,6 +543,11 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
set_btree_node_dirty(b);
+ /*
+ * w->journal is always the oldest journal pin of all bkeys
+ * in the leaf node, to make sure the oldest jset seq won't
+ * be increased before this btree node is flushed.
+ */
if (journal_ref) {
if (w->journal &&
journal_pin_cmp(b->c, w->journal, journal_ref)) {
@@ -723,6 +728,8 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
* IO can always make forward progress:
*/
nr /= c->btree_pages;
+ if (nr == 0)
+ nr = 1;
nr = min_t(unsigned long, nr, mca_can_free(c));
i = 0;
@@ -884,15 +891,17 @@ out:
static int mca_cannibalize_lock(struct cache_set *c, struct btree_op *op)
{
- struct task_struct *old;
-
- old = cmpxchg(&c->btree_cache_alloc_lock, NULL, current);
- if (old && old != current) {
+ spin_lock(&c->btree_cannibalize_lock);
+ if (likely(c->btree_cache_alloc_lock == NULL)) {
+ c->btree_cache_alloc_lock = current;
+ } else if (c->btree_cache_alloc_lock != current) {
if (op)
prepare_to_wait(&c->btree_cache_wait, &op->wait,
TASK_UNINTERRUPTIBLE);
+ spin_unlock(&c->btree_cannibalize_lock);
return -EINTR;
}
+ spin_unlock(&c->btree_cannibalize_lock);
return 0;
}
@@ -927,10 +936,12 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op,
*/
static void bch_cannibalize_unlock(struct cache_set *c)
{
+ spin_lock(&c->btree_cannibalize_lock);
if (c->btree_cache_alloc_lock == current) {
c->btree_cache_alloc_lock = NULL;
wake_up(&c->btree_cache_wait);
}
+ spin_unlock(&c->btree_cannibalize_lock);
}
static struct btree *mca_alloc(struct cache_set *c, struct btree_op *op,
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index c12cd809ab19..0164a1fe94a9 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -45,7 +45,6 @@ void closure_sub(struct closure *cl, int v)
{
closure_put_after_sub(cl, atomic_sub_return(v, &cl->remaining));
}
-EXPORT_SYMBOL(closure_sub);
/*
* closure_put - decrement a closure's refcount
@@ -54,7 +53,6 @@ void closure_put(struct closure *cl)
{
closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
}
-EXPORT_SYMBOL(closure_put);
/*
* closure_wake_up - wake up all closures on a wait list, without memory barrier
@@ -76,7 +74,6 @@ void __closure_wake_up(struct closure_waitlist *wait_list)
closure_sub(cl, CLOSURE_WAITING + 1);
}
}
-EXPORT_SYMBOL(__closure_wake_up);
/**
* closure_wait - add a closure to a waitlist
@@ -96,7 +93,6 @@ bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
return true;
}
-EXPORT_SYMBOL(closure_wait);
struct closure_syncer {
struct task_struct *task;
@@ -131,7 +127,6 @@ void __sched __closure_sync(struct closure *cl)
__set_current_state(TASK_RUNNING);
}
-EXPORT_SYMBOL(__closure_sync);
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
@@ -149,7 +144,6 @@ void closure_debug_create(struct closure *cl)
list_add(&cl->all, &closure_list);
spin_unlock_irqrestore(&closure_list_lock, flags);
}
-EXPORT_SYMBOL(closure_debug_create);
void closure_debug_destroy(struct closure *cl)
{
@@ -162,7 +156,6 @@ void closure_debug_destroy(struct closure *cl)
list_del(&cl->all);
spin_unlock_irqrestore(&closure_list_lock, flags);
}
-EXPORT_SYMBOL(closure_debug_destroy);
static struct dentry *closure_debug;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 41adcd1546f1..73478a91a342 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -62,18 +62,6 @@ static void bch_data_insert_keys(struct closure *cl)
struct bkey *replace_key = op->replace ? &op->replace_key : NULL;
int ret;
- /*
- * If we're looping, might already be waiting on
- * another journal write - can't wait on more than one journal write at
- * a time
- *
- * XXX: this looks wrong
- */
-#if 0
- while (atomic_read(&s->cl.remaining) & CLOSURE_WAITING)
- closure_sync(&s->cl);
-#endif
-
if (!op->replace)
journal_ref = bch_journal(op->c, &op->insert_keys,
op->flush_journal ? cl : NULL);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 20ed838e9413..77e9869345e7 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -92,10 +92,11 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
sb->version, sb->flags, sb->seq, sb->keys);
- err = "Not a bcache superblock";
+ err = "Not a bcache superblock (bad offset)";
if (sb->offset != SB_SECTOR)
goto err;
+ err = "Not a bcache superblock (bad magic)";
if (memcmp(sb->magic, bcache_magic, 16))
goto err;
@@ -529,12 +530,29 @@ static void prio_io(struct cache *ca, uint64_t bucket, int op,
closure_sync(cl);
}
-void bch_prio_write(struct cache *ca)
+int bch_prio_write(struct cache *ca, bool wait)
{
int i;
struct bucket *b;
struct closure cl;
+ pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu",
+ fifo_used(&ca->free[RESERVE_PRIO]),
+ fifo_used(&ca->free[RESERVE_NONE]),
+ fifo_used(&ca->free_inc));
+
+ /*
+ * Pre-check if there are enough free buckets. In the non-blocking
+ * scenario it's better to fail early rather than starting to allocate
+ * buckets and do a cleanup later in case of failure.
+ */
+ if (!wait) {
+ size_t avail = fifo_used(&ca->free[RESERVE_PRIO]) +
+ fifo_used(&ca->free[RESERVE_NONE]);
+ if (prio_buckets(ca) > avail)
+ return -ENOMEM;
+ }
+
closure_init_stack(&cl);
lockdep_assert_held(&ca->set->bucket_lock);
@@ -544,9 +562,6 @@ void bch_prio_write(struct cache *ca)
atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
&ca->meta_sectors_written);
- //pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
- // fifo_used(&ca->free_inc), fifo_used(&ca->unused));
-
for (i = prio_buckets(ca) - 1; i >= 0; --i) {
long bucket;
struct prio_set *p = ca->disk_buckets;
@@ -564,7 +579,7 @@ void bch_prio_write(struct cache *ca)
p->magic = pset_magic(&ca->sb);
p->csum = bch_crc64(&p->magic, bucket_bytes(ca) - 8);
- bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true);
+ bucket = bch_bucket_alloc(ca, RESERVE_PRIO, wait);
BUG_ON(bucket == -1);
mutex_unlock(&ca->set->bucket_lock);
@@ -593,6 +608,7 @@ void bch_prio_write(struct cache *ca)
ca->prio_last_buckets[i] = ca->prio_buckets[i];
}
+ return 0;
}
static void prio_read(struct cache *ca, uint64_t bucket)
@@ -761,20 +777,28 @@ static inline int idx_to_first_minor(int idx)
static void bcache_device_free(struct bcache_device *d)
{
+ struct gendisk *disk = d->disk;
+
lockdep_assert_held(&bch_register_lock);
- pr_info("%s stopped", d->disk->disk_name);
+ if (disk)
+ pr_info("%s stopped", disk->disk_name);
+ else
+ pr_err("bcache device (NULL gendisk) stopped");
if (d->c)
bcache_device_detach(d);
- if (d->disk && d->disk->flags & GENHD_FL_UP)
- del_gendisk(d->disk);
- if (d->disk && d->disk->queue)
- blk_cleanup_queue(d->disk->queue);
- if (d->disk) {
+
+ if (disk) {
+ if (disk->flags & GENHD_FL_UP)
+ del_gendisk(disk);
+
+ if (disk->queue)
+ blk_cleanup_queue(disk->queue);
+
ida_simple_remove(&bcache_device_idx,
- first_minor_to_idx(d->disk->first_minor));
- put_disk(d->disk);
+ first_minor_to_idx(disk->first_minor));
+ put_disk(disk);
}
bioset_exit(&d->bio_split);
@@ -1769,6 +1793,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
sema_init(&c->sb_write_mutex, 1);
mutex_init(&c->bucket_lock);
init_waitqueue_head(&c->btree_cache_wait);
+ spin_lock_init(&c->btree_cannibalize_lock);
init_waitqueue_head(&c->bucket_wait);
init_waitqueue_head(&c->gc_wait);
sema_init(&c->uuid_write_mutex, 1);
@@ -1809,6 +1834,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb)
c->congested_read_threshold_us = 2000;
c->congested_write_threshold_us = 20000;
c->error_limit = DEFAULT_IO_ERROR_LIMIT;
+ c->idle_max_writeback_rate_enabled = 1;
WARN_ON(test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags));
return c;
@@ -1954,7 +1980,7 @@ static int run_cache_set(struct cache_set *c)
mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i)
- bch_prio_write(ca);
+ bch_prio_write(ca, true);
mutex_unlock(&c->bucket_lock);
err = "cannot allocate new UUID bucket";
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 627dcea0f5b6..733e2ddf3c78 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -134,6 +134,7 @@ rw_attribute(expensive_debug_checks);
rw_attribute(cache_replacement_policy);
rw_attribute(btree_shrinker_disabled);
rw_attribute(copy_gc_enabled);
+rw_attribute(idle_max_writeback_rate);
rw_attribute(gc_after_writeback);
rw_attribute(size);
@@ -747,6 +748,8 @@ SHOW(__bch_cache_set)
sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite);
sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled);
sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled);
+ sysfs_printf(idle_max_writeback_rate, "%i",
+ c->idle_max_writeback_rate_enabled);
sysfs_printf(gc_after_writeback, "%i", c->gc_after_writeback);
sysfs_printf(io_disable, "%i",
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
@@ -864,6 +867,9 @@ STORE(__bch_cache_set)
sysfs_strtoul_bool(gc_always_rewrite, c->gc_always_rewrite);
sysfs_strtoul_bool(btree_shrinker_disabled, c->shrinker_disabled);
sysfs_strtoul_bool(copy_gc_enabled, c->copy_gc_enabled);
+ sysfs_strtoul_bool(idle_max_writeback_rate,
+ c->idle_max_writeback_rate_enabled);
+
/*
* write gc_after_writeback here may overwrite an already set
* BCH_DO_AUTO_GC, it doesn't matter because this flag will be
@@ -954,6 +960,7 @@ static struct attribute *bch_cache_set_internal_files[] = {
&sysfs_gc_always_rewrite,
&sysfs_btree_shrinker_disabled,
&sysfs_copy_gc_enabled,
+ &sysfs_idle_max_writeback_rate,
&sysfs_gc_after_writeback,
&sysfs_io_disable,
&sysfs_cutoff_writeback,
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index d60268fe49e1..4a40f9eadeaf 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -122,6 +122,10 @@ static void __update_writeback_rate(struct cached_dev *dc)
static bool set_at_max_writeback_rate(struct cache_set *c,
struct cached_dev *dc)
{
+ /* Don't sst max writeback rate if it is disabled */
+ if (!c->idle_max_writeback_rate_enabled)
+ return false;
+
/* Don't set max writeback rate if gc is running */
if (!c->gc_mark_valid)
return false;
diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c
index b5389890bbc3..1f8f98efd97a 100644
--- a/drivers/md/dm-bio-prison-v1.c
+++ b/drivers/md/dm-bio-prison-v1.c
@@ -150,11 +150,10 @@ static int bio_detain(struct dm_bio_prison *prison,
struct dm_bio_prison_cell **cell_result)
{
int r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
@@ -198,11 +197,9 @@ void dm_cell_release(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell,
struct bio_list *bios)
{
- unsigned long flags;
-
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
__cell_release(prison, cell, bios);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
}
EXPORT_SYMBOL_GPL(dm_cell_release);
@@ -250,12 +247,10 @@ void dm_cell_visit_release(struct dm_bio_prison *prison,
void *context,
struct dm_bio_prison_cell *cell)
{
- unsigned long flags;
-
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
visit_fn(context, cell);
rb_erase(&cell->node, &prison->cells);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
}
EXPORT_SYMBOL_GPL(dm_cell_visit_release);
@@ -275,11 +270,10 @@ int dm_cell_promote_or_release(struct dm_bio_prison *prison,
struct dm_bio_prison_cell *cell)
{
int r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __promote_or_release(prison, cell);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
@@ -379,10 +373,9 @@ EXPORT_SYMBOL_GPL(dm_deferred_entry_dec);
int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work)
{
int r = 1;
- unsigned long flags;
unsigned next_entry;
- spin_lock_irqsave(&ds->lock, flags);
+ spin_lock_irq(&ds->lock);
if ((ds->sweeper == ds->current_entry) &&
!ds->entries[ds->current_entry].count)
r = 0;
@@ -392,7 +385,7 @@ int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work)
if (!ds->entries[next_entry].count)
ds->current_entry = next_entry;
}
- spin_unlock_irqrestore(&ds->lock, flags);
+ spin_unlock_irq(&ds->lock);
return r;
}
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c
index b092cdc8e1ae..8ee019eda32d 100644
--- a/drivers/md/dm-bio-prison-v2.c
+++ b/drivers/md/dm-bio-prison-v2.c
@@ -177,11 +177,10 @@ bool dm_cell_get_v2(struct dm_bio_prison_v2 *prison,
struct dm_bio_prison_cell_v2 **cell_result)
{
int r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __get(prison, key, lock_level, inmate, cell_prealloc, cell_result);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
@@ -261,11 +260,10 @@ int dm_cell_lock_v2(struct dm_bio_prison_v2 *prison,
struct dm_bio_prison_cell_v2 **cell_result)
{
int r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __lock(prison, key, lock_level, cell_prealloc, cell_result);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
@@ -285,11 +283,9 @@ void dm_cell_quiesce_v2(struct dm_bio_prison_v2 *prison,
struct dm_bio_prison_cell_v2 *cell,
struct work_struct *continuation)
{
- unsigned long flags;
-
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
__quiesce(prison, cell, continuation);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
}
EXPORT_SYMBOL_GPL(dm_cell_quiesce_v2);
@@ -309,11 +305,10 @@ int dm_cell_lock_promote_v2(struct dm_bio_prison_v2 *prison,
unsigned new_lock_level)
{
int r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __promote(prison, cell, new_lock_level);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
@@ -342,11 +337,10 @@ bool dm_cell_unlock_v2(struct dm_bio_prison_v2 *prison,
struct bio_list *bios)
{
bool r;
- unsigned long flags;
- spin_lock_irqsave(&prison->lock, flags);
+ spin_lock_irq(&prison->lock);
r = __unlock(prison, cell, bios);
- spin_unlock_irqrestore(&prison->lock, flags);
+ spin_unlock_irq(&prison->lock);
return r;
}
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index d249cf8ac277..2d32821b3a5b 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -74,22 +74,19 @@ static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
{
bool r;
- unsigned long flags;
- spin_lock_irqsave(&iot->lock, flags);
+ spin_lock_irq(&iot->lock);
r = __iot_idle_for(iot, jifs);
- spin_unlock_irqrestore(&iot->lock, flags);
+ spin_unlock_irq(&iot->lock);
return r;
}
static void iot_io_begin(struct io_tracker *iot, sector_t len)
{
- unsigned long flags;
-
- spin_lock_irqsave(&iot->lock, flags);
+ spin_lock_irq(&iot->lock);
iot->in_flight += len;
- spin_unlock_irqrestore(&iot->lock, flags);
+ spin_unlock_irq(&iot->lock);
}
static void __iot_io_end(struct io_tracker *iot, sector_t len)
@@ -172,7 +169,6 @@ static void __commit(struct work_struct *_ws)
{
struct batcher *b = container_of(_ws, struct batcher, commit_work);
blk_status_t r;
- unsigned long flags;
struct list_head work_items;
struct work_struct *ws, *tmp;
struct continuation *k;
@@ -186,12 +182,12 @@ static void __commit(struct work_struct *_ws)
* We have to grab these before the commit_op to avoid a race
* condition.
*/
- spin_lock_irqsave(&b->lock, flags);
+ spin_lock_irq(&b->lock);
list_splice_init(&b->work_items, &work_items);
bio_list_merge(&bios, &b->bios);
bio_list_init(&b->bios);
b->commit_scheduled = false;
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_unlock_irq(&b->lock);
r = b->commit_op(b->commit_context);
@@ -238,13 +234,12 @@ static void async_commit(struct batcher *b)
static void continue_after_commit(struct batcher *b, struct continuation *k)
{
- unsigned long flags;
bool commit_scheduled;
- spin_lock_irqsave(&b->lock, flags);
+ spin_lock_irq(&b->lock);
commit_scheduled = b->commit_scheduled;
list_add_tail(&k->ws.entry, &b->work_items);
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_unlock_irq(&b->lock);
if (commit_scheduled)
async_commit(b);
@@ -255,13 +250,12 @@ static void continue_after_commit(struct batcher *b, struct continuation *k)
*/
static void issue_after_commit(struct batcher *b, struct bio *bio)
{
- unsigned long flags;
bool commit_scheduled;
- spin_lock_irqsave(&b->lock, flags);
+ spin_lock_irq(&b->lock);
commit_scheduled = b->commit_scheduled;
bio_list_add(&b->bios, bio);
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_unlock_irq(&b->lock);
if (commit_scheduled)
async_commit(b);
@@ -273,12 +267,11 @@ static void issue_after_commit(struct batcher *b, struct bio *bio)
static void schedule_commit(struct batcher *b)
{
bool immediate;
- unsigned long flags;
- spin_lock_irqsave(&b->lock, flags);
+ spin_lock_irq(&b->lock);
immediate = !list_empty(&b->work_items) || !bio_list_empty(&b->bios);
b->commit_scheduled = true;
- spin_unlock_irqrestore(&b->lock, flags);
+ spin_unlock_irq(&b->lock);
if (immediate)
async_commit(b);
@@ -542,7 +535,7 @@ static void wake_migration_worker(struct cache *cache)
static struct dm_bio_prison_cell_v2 *alloc_prison_cell(struct cache *cache)
{
- return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOWAIT);
+ return dm_bio_prison_alloc_cell_v2(cache->prison, GFP_NOIO);
}
static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell_v2 *cell)
@@ -554,9 +547,7 @@ static struct dm_cache_migration *alloc_migration(struct cache *cache)
{
struct dm_cache_migration *mg;
- mg = mempool_alloc(&cache->migration_pool, GFP_NOWAIT);
- if (!mg)
- return NULL;
+ mg = mempool_alloc(&cache->migration_pool, GFP_NOIO);
memset(mg, 0, sizeof(*mg));
@@ -632,23 +623,19 @@ static struct per_bio_data *init_per_bio_data(struct bio *bio)
static void defer_bio(struct cache *cache, struct bio *bio)
{
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
bio_list_add(&cache->deferred_bios, bio);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
wake_deferred_bio_worker(cache);
}
static void defer_bios(struct cache *cache, struct bio_list *bios)
{
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
bio_list_merge(&cache->deferred_bios, bios);
bio_list_init(bios);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
wake_deferred_bio_worker(cache);
}
@@ -664,10 +651,6 @@ static bool bio_detain_shared(struct cache *cache, dm_oblock_t oblock, struct bi
struct dm_bio_prison_cell_v2 *cell_prealloc, *cell;
cell_prealloc = alloc_prison_cell(cache); /* FIXME: allow wait if calling from worker */
- if (!cell_prealloc) {
- defer_bio(cache, bio);
- return false;
- }
build_key(oblock, end, &key);
r = dm_cell_get_v2(cache->prison, &key, lock_level(bio), bio, cell_prealloc, &cell);
@@ -762,33 +745,27 @@ static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
static void set_discard(struct cache *cache, dm_dblock_t b)
{
- unsigned long flags;
-
BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
atomic_inc(&cache->stats.discard_count);
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
set_bit(from_dblock(b), cache->discard_bitset);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
}
static void clear_discard(struct cache *cache, dm_dblock_t b)
{
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
clear_bit(from_dblock(b), cache->discard_bitset);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
}
static bool is_discarded(struct cache *cache, dm_dblock_t b)
{
int r;
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
r = test_bit(from_dblock(b), cache->discard_bitset);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
return r;
}
@@ -796,12 +773,10 @@ static bool is_discarded(struct cache *cache, dm_dblock_t b)
static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
{
int r;
- unsigned long flags;
-
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
cache->discard_bitset);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
return r;
}
@@ -833,17 +808,16 @@ static void remap_to_cache(struct cache *cache, struct bio *bio,
static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
{
- unsigned long flags;
struct per_bio_data *pb;
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
if (cache->need_tick_bio && !op_is_flush(bio->bi_opf) &&
bio_op(bio) != REQ_OP_DISCARD) {
pb = get_per_bio_data(bio);
pb->tick = true;
cache->need_tick_bio = false;
}
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
}
static void __remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
@@ -1493,11 +1467,6 @@ static int mg_lock_writes(struct dm_cache_migration *mg)
struct dm_bio_prison_cell_v2 *prealloc;
prealloc = alloc_prison_cell(cache);
- if (!prealloc) {
- DMERR_LIMIT("%s: alloc_prison_cell failed", cache_device_name(cache));
- mg_complete(mg, false);
- return -ENOMEM;
- }
/*
* Prevent writes to the block, but allow reads to continue.
@@ -1535,11 +1504,6 @@ static int mg_start(struct cache *cache, struct policy_work *op, struct bio *bio
}
mg = alloc_migration(cache);
- if (!mg) {
- policy_complete_background_work(cache->policy, op, false);
- background_work_end(cache);
- return -ENOMEM;
- }
mg->op = op;
mg->overwrite_bio = bio;
@@ -1628,10 +1592,6 @@ static int invalidate_lock(struct dm_cache_migration *mg)
struct dm_bio_prison_cell_v2 *prealloc;
prealloc = alloc_prison_cell(cache);
- if (!prealloc) {
- invalidate_complete(mg, false);
- return -ENOMEM;
- }
build_key(mg->invalidate_oblock, oblock_succ(mg->invalidate_oblock), &key);
r = dm_cell_lock_v2(cache->prison, &key,
@@ -1669,10 +1629,6 @@ static int invalidate_start(struct cache *cache, dm_cblock_t cblock,
return -EPERM;
mg = alloc_migration(cache);
- if (!mg) {
- background_work_end(cache);
- return -ENOMEM;
- }
mg->overwrite_bio = bio;
mg->invalidate_cblock = cblock;
@@ -1913,17 +1869,16 @@ static void process_deferred_bios(struct work_struct *ws)
{
struct cache *cache = container_of(ws, struct cache, deferred_bio_worker);
- unsigned long flags;
bool commit_needed = false;
struct bio_list bios;
struct bio *bio;
bio_list_init(&bios);
- spin_lock_irqsave(&cache->lock, flags);
+ spin_lock_irq(&cache->lock);
bio_list_merge(&bios, &cache->deferred_bios);
bio_list_init(&cache->deferred_bios);
- spin_unlock_irqrestore(&cache->lock, flags);
+ spin_unlock_irq(&cache->lock);
while ((bio = bio_list_pop(&bios))) {
if (bio->bi_opf & REQ_PREFLUSH)
diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c
index 6bc8c1d1c351..08c552e5e41b 100644
--- a/drivers/md/dm-clone-metadata.c
+++ b/drivers/md/dm-clone-metadata.c
@@ -712,7 +712,7 @@ static int __metadata_commit(struct dm_clone_metadata *cmd)
static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
{
int r;
- unsigned long word, flags;
+ unsigned long word;
word = 0;
do {
@@ -736,9 +736,9 @@ static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
return r;
/* Update the changed flag */
- spin_lock_irqsave(&cmd->bitmap_lock, flags);
+ spin_lock_irq(&cmd->bitmap_lock);
dmap->changed = 0;
- spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ spin_unlock_irq(&cmd->bitmap_lock);
return 0;
}
@@ -746,7 +746,6 @@ static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
{
int r = -EPERM;
- unsigned long flags;
struct dirty_map *dmap, *next_dmap;
down_write(&cmd->lock);
@@ -770,9 +769,9 @@ int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
}
/* Swap dirty bitmaps */
- spin_lock_irqsave(&cmd->bitmap_lock, flags);
+ spin_lock_irq(&cmd->bitmap_lock);
cmd->current_dmap = next_dmap;
- spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ spin_unlock_irq(&cmd->bitmap_lock);
/*
* No one is accessing the old dirty bitmap anymore, so we can flush
@@ -817,9 +816,9 @@ int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
{
int r = 0;
struct dirty_map *dmap;
- unsigned long word, region_nr, flags;
+ unsigned long word, region_nr;
- spin_lock_irqsave(&cmd->bitmap_lock, flags);
+ spin_lock_irq(&cmd->bitmap_lock);
if (cmd->read_only) {
r = -EPERM;
@@ -836,7 +835,7 @@ int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
}
}
out:
- spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ spin_unlock_irq(&cmd->bitmap_lock);
return r;
}
@@ -903,13 +902,11 @@ out:
void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
{
- unsigned long flags;
-
down_write(&cmd->lock);
- spin_lock_irqsave(&cmd->bitmap_lock, flags);
+ spin_lock_irq(&cmd->bitmap_lock);
cmd->read_only = 1;
- spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ spin_unlock_irq(&cmd->bitmap_lock);
if (!cmd->fail_io)
dm_bm_set_read_only(cmd->bm);
@@ -919,13 +916,11 @@ void dm_clone_metadata_set_read_only(struct dm_clone_metadata *cmd)
void dm_clone_metadata_set_read_write(struct dm_clone_metadata *cmd)
{
- unsigned long flags;
-
down_write(&cmd->lock);
- spin_lock_irqsave(&cmd->bitmap_lock, flags);
+ spin_lock_irq(&cmd->bitmap_lock);
cmd->read_only = 0;
- spin_unlock_irqrestore(&cmd->bitmap_lock, flags);
+ spin_unlock_irq(&cmd->bitmap_lock);
if (!cmd->fail_io)
dm_bm_set_read_write(cmd->bm);
diff --git a/drivers/md/dm-clone-metadata.h b/drivers/md/dm-clone-metadata.h
index 434bff08508b..3fe50a781c11 100644
--- a/drivers/md/dm-clone-metadata.h
+++ b/drivers/md/dm-clone-metadata.h
@@ -44,7 +44,9 @@ int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long re
* @start: Starting region number
* @nr_regions: Number of regions in the range
*
- * This function doesn't block, so it's safe to call it from interrupt context.
+ * This function doesn't block, but since it uses spin_lock_irq()/spin_unlock_irq()
+ * it's NOT safe to call it from any context where interrupts are disabled, e.g.,
+ * from interrupt context.
*/
int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
unsigned long nr_regions);
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index cd6f9e9fc98e..b3d89072d21c 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -332,8 +332,6 @@ static void submit_bios(struct bio_list *bios)
*/
static void issue_bio(struct clone *clone, struct bio *bio)
{
- unsigned long flags;
-
if (!bio_triggers_commit(clone, bio)) {
generic_make_request(bio);
return;
@@ -352,9 +350,9 @@ static void issue_bio(struct clone *clone, struct bio *bio)
* Batch together any bios that trigger commits and then issue a single
* commit for them in process_deferred_flush_bios().
*/
- spin_lock_irqsave(&clone->lock, flags);
+ spin_lock_irq(&clone->lock);
bio_list_add(&clone->deferred_flush_bios, bio);
- spin_unlock_irqrestore(&clone->lock, flags);
+ spin_unlock_irq(&clone->lock);
wake_worker(clone);
}
@@ -469,7 +467,7 @@ static void complete_discard_bio(struct clone *clone, struct bio *bio, bool succ
static void process_discard_bio(struct clone *clone, struct bio *bio)
{
- unsigned long rs, re, flags;
+ unsigned long rs, re;
bio_region_range(clone, bio, &rs, &re);
BUG_ON(re > clone->nr_regions);
@@ -501,9 +499,9 @@ static void process_discard_bio(struct clone *clone, struct bio *bio)
/*
* Defer discard processing.
*/
- spin_lock_irqsave(&clone->lock, flags);
+ spin_lock_irq(&clone->lock);
bio_list_add(&clone->deferred_discard_bios, bio);
- spin_unlock_irqrestore(&clone->lock, flags);
+ spin_unlock_irq(&clone->lock);
wake_worker(clone);
}
@@ -554,6 +552,12 @@ struct hash_table_bucket {
#define bucket_unlock_irqrestore(bucket, flags) \
spin_unlock_irqrestore(&(bucket)->lock, flags)
+#define bucket_lock_irq(bucket) \
+ spin_lock_irq(&(bucket)->lock)
+
+#define bucket_unlock_irq(bucket) \
+ spin_unlock_irq(&(bucket)->lock)
+
static int hash_table_init(struct clone *clone)
{
unsigned int i, sz;
@@ -591,8 +595,8 @@ static struct hash_table_bucket *get_hash_table_bucket(struct clone *clone,
*
* NOTE: Must be called with the bucket lock held
*/
-struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
- unsigned long region_nr)
+static struct dm_clone_region_hydration *__hash_find(struct hash_table_bucket *bucket,
+ unsigned long region_nr)
{
struct dm_clone_region_hydration *hd;
@@ -851,7 +855,6 @@ static void hydration_overwrite(struct dm_clone_region_hydration *hd, struct bio
*/
static void hydrate_bio_region(struct clone *clone, struct bio *bio)
{
- unsigned long flags;
unsigned long region_nr;
struct hash_table_bucket *bucket;
struct dm_clone_region_hydration *hd, *hd2;
@@ -859,19 +862,19 @@ static void hydrate_bio_region(struct clone *clone, struct bio *bio)
region_nr = bio_to_region(clone, bio);
bucket = get_hash_table_bucket(clone, region_nr);
- bucket_lock_irqsave(bucket, flags);
+ bucket_lock_irq(bucket);
hd = __hash_find(bucket, region_nr);
if (hd) {
/* Someone else is hydrating the region */
bio_list_add(&hd->deferred_bios, bio);
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
return;
}
if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
/* The region has been hydrated */
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
issue_bio(clone, bio);
return;
}
@@ -880,16 +883,16 @@ static void hydrate_bio_region(struct clone *clone, struct bio *bio)
* We must allocate a hydration descriptor and start the hydration of
* the corresponding region.
*/
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
hd = alloc_hydration(clone);
hydration_init(hd, region_nr);
- bucket_lock_irqsave(bucket, flags);
+ bucket_lock_irq(bucket);
/* Check if the region has been hydrated in the meantime. */
if (dm_clone_is_region_hydrated(clone->cmd, region_nr)) {
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
free_hydration(hd);
issue_bio(clone, bio);
return;
@@ -899,7 +902,7 @@ static void hydrate_bio_region(struct clone *clone, struct bio *bio)
if (hd2 != hd) {
/* Someone else started the region's hydration. */
bio_list_add(&hd2->deferred_bios, bio);
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
free_hydration(hd);
return;
}
@@ -911,7 +914,7 @@ static void hydrate_bio_region(struct clone *clone, struct bio *bio)
*/
if (unlikely(get_clone_mode(clone) >= CM_READ_ONLY)) {
hlist_del(&hd->h);
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
free_hydration(hd);
bio_io_error(bio);
return;
@@ -925,11 +928,11 @@ static void hydrate_bio_region(struct clone *clone, struct bio *bio)
* to the destination device.
*/
if (is_overwrite_bio(clone, bio)) {
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
hydration_overwrite(hd, bio);
} else {
bio_list_add(&hd->deferred_bios, bio);
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
hydration_copy(hd, 1);
}
}
@@ -996,7 +999,6 @@ static unsigned long __start_next_hydration(struct clone *clone,
unsigned long offset,
struct batch_info *batch)
{
- unsigned long flags;
struct hash_table_bucket *bucket;
struct dm_clone_region_hydration *hd;
unsigned long nr_regions = clone->nr_regions;
@@ -1010,13 +1012,13 @@ static unsigned long __start_next_hydration(struct clone *clone,
break;
bucket = get_hash_table_bucket(clone, offset);
- bucket_lock_irqsave(bucket, flags);
+ bucket_lock_irq(bucket);
if (!dm_clone_is_region_hydrated(clone->cmd, offset) &&
!__hash_find(bucket, offset)) {
hydration_init(hd, offset);
__insert_region_hydration(bucket, hd);
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
/* Batch hydration */
__batch_hydration(batch, hd);
@@ -1024,7 +1026,7 @@ static unsigned long __start_next_hydration(struct clone *clone,
return (offset + 1);
}
- bucket_unlock_irqrestore(bucket, flags);
+ bucket_unlock_irq(bucket);
} while (++offset < nr_regions);
@@ -1140,13 +1142,13 @@ static void process_deferred_discards(struct clone *clone)
int r = -EPERM;
struct bio *bio;
struct blk_plug plug;
- unsigned long rs, re, flags;
+ unsigned long rs, re;
struct bio_list discards = BIO_EMPTY_LIST;
- spin_lock_irqsave(&clone->lock, flags);
+ spin_lock_irq(&clone->lock);
bio_list_merge(&discards, &clone->deferred_discard_bios);
bio_list_init(&clone->deferred_discard_bios);
- spin_unlock_irqrestore(&clone->lock, flags);
+ spin_unlock_irq(&clone->lock);
if (bio_list_empty(&discards))
return;
@@ -1176,13 +1178,12 @@ out:
static void process_deferred_bios(struct clone *clone)
{
- unsigned long flags;
struct bio_list bios = BIO_EMPTY_LIST;
- spin_lock_irqsave(&clone->lock, flags);
+ spin_lock_irq(&clone->lock);
bio_list_merge(&bios, &clone->deferred_bios);
bio_list_init(&clone->deferred_bios);
- spin_unlock_irqrestore(&clone->lock, flags);
+ spin_unlock_irq(&clone->lock);
if (bio_list_empty(&bios))
return;
@@ -1193,7 +1194,6 @@ static void process_deferred_bios(struct clone *clone)
static void process_deferred_flush_bios(struct clone *clone)
{
struct bio *bio;
- unsigned long flags;
struct bio_list bios = BIO_EMPTY_LIST;
struct bio_list bio_completions = BIO_EMPTY_LIST;
@@ -1201,13 +1201,13 @@ static void process_deferred_flush_bios(struct clone *clone)
* If there are any deferred flush bios, we must commit the metadata
* before issuing them or signaling their completion.
*/
- spin_lock_irqsave(&clone->lock, flags);
+ spin_lock_irq(&clone->lock);
bio_list_merge(&bios, &clone->deferred_flush_bios);
bio_list_init(&clone->deferred_flush_bios);
bio_list_merge(&bio_completions, &clone->deferred_flush_completions);
bio_list_init(&clone->deferred_flush_completions);
- spin_unlock_irqrestore(&clone->lock, flags);
+ spin_unlock_irq(&clone->lock);
if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
!(dm_clone_changed_this_transaction(clone->cmd) && need_commit_due_to_time(clone)))
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f87f6495652f..eb9782fc93fe 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2700,21 +2700,18 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
ret = -ENOMEM;
- cc->io_queue = alloc_workqueue("kcryptd_io/%s",
- WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
- 1, devname);
+ cc->io_queue = alloc_workqueue("kcryptd_io/%s", WQ_MEM_RECLAIM, 1, devname);
if (!cc->io_queue) {
ti->error = "Couldn't create kcryptd io queue";
goto bad;
}
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
- cc->crypt_queue = alloc_workqueue("kcryptd/%s",
- WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+ cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
1, devname);
else
cc->crypt_queue = alloc_workqueue("kcryptd/%s",
- WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
+ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
num_online_cpus(), devname);
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c
index 8288887b7f94..eb37584427a4 100644
--- a/drivers/md/dm-dust.c
+++ b/drivers/md/dm-dust.c
@@ -17,6 +17,7 @@
struct badblock {
struct rb_node node;
sector_t bb;
+ unsigned char wr_fail_cnt;
};
struct dust_device {
@@ -101,7 +102,8 @@ static int dust_remove_block(struct dust_device *dd, unsigned long long block)
return 0;
}
-static int dust_add_block(struct dust_device *dd, unsigned long long block)
+static int dust_add_block(struct dust_device *dd, unsigned long long block,
+ unsigned char wr_fail_cnt)
{
struct badblock *bblock;
unsigned long flags;
@@ -115,6 +117,7 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block)
spin_lock_irqsave(&dd->dust_lock, flags);
bblock->bb = block;
+ bblock->wr_fail_cnt = wr_fail_cnt;
if (!dust_rb_insert(&dd->badblocklist, bblock)) {
if (!dd->quiet_mode) {
DMERR("%s: block %llu already in badblocklist",
@@ -126,8 +129,10 @@ static int dust_add_block(struct dust_device *dd, unsigned long long block)
}
dd->badblock_count++;
- if (!dd->quiet_mode)
- DMINFO("%s: badblock added at block %llu", __func__, block);
+ if (!dd->quiet_mode) {
+ DMINFO("%s: badblock added at block %llu with write fail count %hhu",
+ __func__, block, wr_fail_cnt);
+ }
spin_unlock_irqrestore(&dd->dust_lock, flags);
return 0;
@@ -163,22 +168,27 @@ static int dust_map_read(struct dust_device *dd, sector_t thisblock,
bool fail_read_on_bb)
{
unsigned long flags;
- int ret = DM_MAPIO_REMAPPED;
+ int r = DM_MAPIO_REMAPPED;
if (fail_read_on_bb) {
thisblock >>= dd->sect_per_block_shift;
spin_lock_irqsave(&dd->dust_lock, flags);
- ret = __dust_map_read(dd, thisblock);
+ r = __dust_map_read(dd, thisblock);
spin_unlock_irqrestore(&dd->dust_lock, flags);
}
- return ret;
+ return r;
}
-static void __dust_map_write(struct dust_device *dd, sector_t thisblock)
+static int __dust_map_write(struct dust_device *dd, sector_t thisblock)
{
struct badblock *bblk = dust_rb_search(&dd->badblocklist, thisblock);
+ if (bblk && bblk->wr_fail_cnt > 0) {
+ bblk->wr_fail_cnt--;
+ return DM_MAPIO_KILL;
+ }
+
if (bblk) {
rb_erase(&bblk->node, &dd->badblocklist);
dd->badblock_count--;
@@ -189,37 +199,40 @@ static void __dust_map_write(struct dust_device *dd, sector_t thisblock)
(unsigned long long)thisblock);
}
}
+
+ return DM_MAPIO_REMAPPED;
}
static int dust_map_write(struct dust_device *dd, sector_t thisblock,
bool fail_read_on_bb)
{
unsigned long flags;
+ int ret = DM_MAPIO_REMAPPED;
if (fail_read_on_bb) {
thisblock >>= dd->sect_per_block_shift;
spin_lock_irqsave(&dd->dust_lock, flags);
- __dust_map_write(dd, thisblock);
+ ret = __dust_map_write(dd, thisblock);
spin_unlock_irqrestore(&dd->dust_lock, flags);
}
- return DM_MAPIO_REMAPPED;
+ return ret;
}
static int dust_map(struct dm_target *ti, struct bio *bio)
{
struct dust_device *dd = ti->private;
- int ret;
+ int r;
bio_set_dev(bio, dd->dev->bdev);
bio->bi_iter.bi_sector = dd->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
if (bio_data_dir(bio) == READ)
- ret = dust_map_read(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
+ r = dust_map_read(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
else
- ret = dust_map_write(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
+ r = dust_map_write(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
- return ret;
+ return r;
}
static bool __dust_clear_badblocks(struct rb_root *tree,
@@ -375,8 +388,10 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
struct dust_device *dd = ti->private;
sector_t size = i_size_read(dd->dev->bdev->bd_inode) >> SECTOR_SHIFT;
bool invalid_msg = false;
- int result = -EINVAL;
+ int r = -EINVAL;
unsigned long long tmp, block;
+ unsigned char wr_fail_cnt;
+ unsigned int tmp_ui;
unsigned long flags;
char dummy;
@@ -388,45 +403,69 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
} else if (!strcasecmp(argv[0], "disable")) {
DMINFO("disabling read failures on bad sectors");
dd->fail_read_on_bb = false;
- result = 0;
+ r = 0;
} else if (!strcasecmp(argv[0], "enable")) {
DMINFO("enabling read failures on bad sectors");
dd->fail_read_on_bb = true;
- result = 0;
+ r = 0;
} else if (!strcasecmp(argv[0], "countbadblocks")) {
spin_lock_irqsave(&dd->dust_lock, flags);
DMINFO("countbadblocks: %llu badblock(s) found",
dd->badblock_count);
spin_unlock_irqrestore(&dd->dust_lock, flags);
- result = 0;
+ r = 0;
} else if (!strcasecmp(argv[0], "clearbadblocks")) {
- result = dust_clear_badblocks(dd);
+ r = dust_clear_badblocks(dd);
} else if (!strcasecmp(argv[0], "quiet")) {
if (!dd->quiet_mode)
dd->quiet_mode = true;
else
dd->quiet_mode = false;
- result = 0;
+ r = 0;
} else {
invalid_msg = true;
}
} else if (argc == 2) {
if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1)
- return result;
+ return r;
block = tmp;
sector_div(size, dd->sect_per_block);
if (block > size) {
DMERR("selected block value out of range");
- return result;
+ return r;
}
if (!strcasecmp(argv[0], "addbadblock"))
- result = dust_add_block(dd, block);
+ r = dust_add_block(dd, block, 0);
else if (!strcasecmp(argv[0], "removebadblock"))
- result = dust_remove_block(dd, block);
+ r = dust_remove_block(dd, block);
else if (!strcasecmp(argv[0], "queryblock"))
- result = dust_query_block(dd, block);
+ r = dust_query_block(dd, block);
+ else
+ invalid_msg = true;
+
+ } else if (argc == 3) {
+ if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1)
+ return r;
+
+ if (sscanf(argv[2], "%u%c", &tmp_ui, &dummy) != 1)
+ return r;
+
+ block = tmp;
+ if (tmp_ui > 255) {
+ DMERR("selected write fail count out of range");
+ return r;
+ }
+ wr_fail_cnt = tmp_ui;
+ sector_div(size, dd->sect_per_block);
+ if (block > size) {
+ DMERR("selected block value out of range");
+ return r;
+ }
+
+ if (!strcasecmp(argv[0], "addbadblock"))
+ r = dust_add_block(dd, block, wr_fail_cnt);
else
invalid_msg = true;
@@ -436,7 +475,7 @@ static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
if (invalid_msg)
DMERR("unrecognized message '%s' received", argv[0]);
- return result;
+ return r;
}
static void dust_status(struct dm_target *ti, status_type_t type,
@@ -499,12 +538,12 @@ static struct target_type dust_target = {
static int __init dm_dust_init(void)
{
- int result = dm_register_target(&dust_target);
+ int r = dm_register_target(&dust_target);
- if (result < 0)
- DMERR("dm_register_target failed %d", result);
+ if (r < 0)
+ DMERR("dm_register_target failed %d", r);
- return result;
+ return r;
}
static void __exit dm_dust_exit(void)
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 2900fbde89b3..a2cc9e45cbba 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -280,7 +280,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
struct flakey_c *fc = ti->private;
bio_set_dev(bio, fc->dev->bdev);
- if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio)))
bio->bi_iter.bi_sector =
flakey_map_sector(ti, bio->bi_iter.bi_sector);
}
@@ -322,8 +322,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
pb->bio_submitted = false;
- /* Do not fail reset zone */
- if (bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (op_is_zone_mgmt(bio_op(bio)))
goto map_bio;
/* Are we alive ? */
@@ -384,7 +383,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
struct flakey_c *fc = ti->private;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
- if (bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (op_is_zone_mgmt(bio_op(bio)))
return DM_ENDIO_DONE;
if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
@@ -460,21 +459,15 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
}
#ifdef CONFIG_BLK_DEV_ZONED
-static int flakey_report_zones(struct dm_target *ti, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+static int flakey_report_zones(struct dm_target *ti,
+ struct dm_report_zones_args *args, unsigned int nr_zones)
{
struct flakey_c *fc = ti->private;
- int ret;
+ sector_t sector = flakey_map_sector(ti, args->next_sector);
- /* Do report and remap it */
- ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector),
- zones, nr_zones);
- if (ret != 0)
- return ret;
-
- if (*nr_zones)
- dm_remap_zone_report(ti, fc->start, zones, nr_zones);
- return 0;
+ args->start = fc->start;
+ return blkdev_report_zones(fc->dev->bdev, sector, nr_zones,
+ dm_report_zones_cb, args);
}
#endif
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index dab4446fe7d8..b225b3e445fa 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -53,6 +53,7 @@
#define SB_VERSION_1 1
#define SB_VERSION_2 2
#define SB_VERSION_3 3
+#define SB_VERSION_4 4
#define SB_SECTORS 8
#define MAX_SECTORS_PER_BLOCK 8
@@ -73,6 +74,7 @@ struct superblock {
#define SB_FLAG_HAVE_JOURNAL_MAC 0x1
#define SB_FLAG_RECALCULATING 0x2
#define SB_FLAG_DIRTY_BITMAP 0x4
+#define SB_FLAG_FIXED_PADDING 0x8
#define JOURNAL_ENTRY_ROUNDUP 8
@@ -250,6 +252,7 @@ struct dm_integrity_c {
bool journal_uptodate;
bool just_formatted;
bool recalculate_flag;
+ bool fix_padding;
struct alg_spec internal_hash_alg;
struct alg_spec journal_crypt_alg;
@@ -463,7 +466,9 @@ static void wraparound_section(struct dm_integrity_c *ic, unsigned *sec_ptr)
static void sb_set_version(struct dm_integrity_c *ic)
{
- if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING))
+ ic->sb->version = SB_VERSION_4;
+ else if (ic->mode == 'B' || ic->sb->flags & cpu_to_le32(SB_FLAG_DIRTY_BITMAP))
ic->sb->version = SB_VERSION_3;
else if (ic->meta_dev || ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING))
ic->sb->version = SB_VERSION_2;
@@ -2955,6 +2960,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
arg_count += !!ic->internal_hash_alg.alg_string;
arg_count += !!ic->journal_crypt_alg.alg_string;
arg_count += !!ic->journal_mac_alg.alg_string;
+ arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0;
DMEMIT("%s %llu %u %c %u", ic->dev->name, (unsigned long long)ic->start,
ic->tag_size, ic->mode, arg_count);
if (ic->meta_dev)
@@ -2974,6 +2980,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" sectors_per_bit:%llu", (unsigned long long)ic->sectors_per_block << ic->log2_blocks_per_bitmap_bit);
DMEMIT(" bitmap_flush_interval:%u", jiffies_to_msecs(ic->bitmap_flush_interval));
}
+ if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0)
+ DMEMIT(" fix_padding");
#define EMIT_ALG(a, n) \
do { \
@@ -3042,8 +3050,14 @@ static int calculate_device_limits(struct dm_integrity_c *ic)
if (!ic->meta_dev) {
sector_t last_sector, last_area, last_offset;
- ic->metadata_run = roundup((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
- (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS)) >> SECTOR_SHIFT;
+ /* we have to maintain excessive padding for compatibility with existing volumes */
+ __u64 metadata_run_padding =
+ ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING) ?
+ (__u64)(METADATA_PADDING_SECTORS << SECTOR_SHIFT) :
+ (__u64)(1 << SECTOR_SHIFT << METADATA_PADDING_SECTORS);
+
+ ic->metadata_run = round_up((__u64)ic->tag_size << (ic->sb->log2_interleave_sectors - ic->sb->log2_sectors_per_block),
+ metadata_run_padding) >> SECTOR_SHIFT;
if (!(ic->metadata_run & (ic->metadata_run - 1)))
ic->log2_metadata_run = __ffs(ic->metadata_run);
else
@@ -3086,6 +3100,8 @@ static int initialize_superblock(struct dm_integrity_c *ic, unsigned journal_sec
journal_sections = 1;
if (!ic->meta_dev) {
+ if (ic->fix_padding)
+ ic->sb->flags |= cpu_to_le32(SB_FLAG_FIXED_PADDING);
ic->sb->journal_sections = cpu_to_le32(journal_sections);
if (!interleave_sectors)
interleave_sectors = DEFAULT_INTERLEAVE_SECTORS;
@@ -3725,6 +3741,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
} else if (!strcmp(opt_string, "recalculate")) {
ic->recalculate_flag = true;
+ } else if (!strcmp(opt_string, "fix_padding")) {
+ ic->fix_padding = true;
} else {
r = -EINVAL;
ti->error = "Invalid argument";
@@ -3867,7 +3885,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
should_write_sb = true;
}
- if (!ic->sb->version || ic->sb->version > SB_VERSION_3) {
+ if (!ic->sb->version || ic->sb->version > SB_VERSION_4) {
r = -EINVAL;
ti->error = "Unknown version";
goto bad;
@@ -4182,7 +4200,7 @@ static void dm_integrity_dtr(struct dm_target *ti)
static struct target_type integrity_target = {
.name = "integrity",
- .version = {1, 3, 0},
+ .version = {1, 4, 0},
.module = THIS_MODULE,
.features = DM_TARGET_SINGLETON | DM_TARGET_INTEGRITY,
.ctr = dm_integrity_ctr,
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index ecefe6703736..8d07fdf63a47 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -90,7 +90,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio)
struct linear_c *lc = ti->private;
bio_set_dev(bio, lc->dev->bdev);
- if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET)
+ if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio)))
bio->bi_iter.bi_sector =
linear_map_sector(ti, bio->bi_iter.bi_sector);
}
@@ -136,21 +136,15 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
}
#ifdef CONFIG_BLK_DEV_ZONED
-static int linear_report_zones(struct dm_target *ti, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+static int linear_report_zones(struct dm_target *ti,
+ struct dm_report_zones_args *args, unsigned int nr_zones)
{
- struct linear_c *lc = (struct linear_c *) ti->private;
- int ret;
-
- /* Do report and remap it */
- ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector),
- zones, nr_zones);
- if (ret != 0)
- return ret;
+ struct linear_c *lc = ti->private;
+ sector_t sector = linear_map_sector(ti, args->next_sector);
- if (*nr_zones)
- dm_remap_zone_report(ti, lc->start, zones, nr_zones);
- return 0;
+ args->start = lc->start;
+ return blkdev_report_zones(lc->dev->bdev, sector, nr_zones,
+ dm_report_zones_cb, args);
}
#endif
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index b0aa595e4375..c412eaa975fc 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -209,6 +209,7 @@ struct raid_dev {
#define RT_FLAG_RS_SUSPENDED 5
#define RT_FLAG_RS_IN_SYNC 6
#define RT_FLAG_RS_RESYNCING 7
+#define RT_FLAG_RS_GROW 8
/* Array elements of 64 bit needed for rebuild/failed disk bits */
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -241,6 +242,9 @@ struct raid_set {
struct raid_type *raid_type;
struct dm_target_callbacks callbacks;
+ sector_t array_sectors;
+ sector_t dev_sectors;
+
/* Optional raid4/5/6 journal device */
struct journal_dev {
struct dm_dev *dev;
@@ -616,7 +620,6 @@ static int raid10_format_to_md_layout(struct raid_set *rs,
} else if (algorithm == ALGORITHM_RAID10_FAR) {
f = copies;
- r = !RAID10_OFFSET;
if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags))
r |= RAID10_USE_FAR_SETS;
@@ -1615,13 +1618,12 @@ static int _check_data_dev_sectors(struct raid_set *rs)
}
/* Calculate the sectors per device and per array used for @rs */
-static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
+static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev)
{
int delta_disks;
unsigned int data_stripes;
+ sector_t array_sectors = sectors, dev_sectors = sectors;
struct mddev *mddev = &rs->md;
- struct md_rdev *rdev;
- sector_t array_sectors = rs->ti->len, dev_sectors = rs->ti->len;
if (use_mddev) {
delta_disks = mddev->delta_disks;
@@ -1656,12 +1658,9 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
/* Striped layouts */
array_sectors = (data_stripes + delta_disks) * dev_sectors;
- rdev_for_each(rdev, mddev)
- if (!test_bit(Journal, &rdev->flags))
- rdev->sectors = dev_sectors;
-
mddev->array_sectors = array_sectors;
mddev->dev_sectors = dev_sectors;
+ rs_set_rdev_sectors(rs);
return _check_data_dev_sectors(rs);
bad:
@@ -1670,7 +1669,7 @@ bad:
}
/* Setup recovery on @rs */
-static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
+static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
{
/* raid0 does not recover */
if (rs_is_raid0(rs))
@@ -1691,22 +1690,6 @@ static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
? MaxSector : dev_sectors;
}
-/* Setup recovery on @rs based on raid type, device size and 'nosync' flag */
-static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
-{
- if (!dev_sectors)
- /* New raid set or 'sync' flag provided */
- __rs_setup_recovery(rs, 0);
- else if (dev_sectors == MaxSector)
- /* Prevent recovery */
- __rs_setup_recovery(rs, MaxSector);
- else if (__rdev_sectors(rs) < dev_sectors)
- /* Grown raid set */
- __rs_setup_recovery(rs, __rdev_sectors(rs));
- else
- __rs_setup_recovery(rs, MaxSector);
-}
-
static void do_table_event(struct work_struct *ws)
{
struct raid_set *rs = container_of(ws, struct raid_set, md.event_work);
@@ -2474,7 +2457,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
return -EINVAL;
}
- /* Enable bitmap creation for RAID levels != 0 */
+ /* Enable bitmap creation on @rs unless no metadevs or raid0 or journaled raid4/5/6 set. */
mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096);
mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
@@ -2911,7 +2894,7 @@ static int rs_setup_reshape(struct raid_set *rs)
/* Remove disk(s) */
} else if (rs->delta_disks < 0) {
- r = rs_set_dev_and_array_sectors(rs, true);
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, true);
mddev->reshape_backwards = 1; /* removing disk(s) -> backward reshape */
/* Change layout and/or chunk size */
@@ -3008,7 +2991,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
bool resize = false;
struct raid_type *rt;
unsigned int num_raid_params, num_raid_devs;
- sector_t calculated_dev_sectors, rdev_sectors, reshape_sectors;
+ sector_t sb_array_sectors, rdev_sectors, reshape_sectors;
struct raid_set *rs = NULL;
const char *arg;
struct rs_layout rs_layout;
@@ -3067,11 +3050,13 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
*
* Any existing superblock will overwrite the array and device sizes
*/
- r = rs_set_dev_and_array_sectors(rs, false);
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
if (r)
goto bad;
- calculated_dev_sectors = rs->md.dev_sectors;
+ /* Memorize just calculated, potentially larger sizes to grow the raid set in preresume */
+ rs->array_sectors = rs->md.array_sectors;
+ rs->dev_sectors = rs->md.dev_sectors;
/*
* Backup any new raid set level, layout, ...
@@ -3084,6 +3069,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (r)
goto bad;
+ /* All in-core metadata now as of current superblocks after calling analyse_superblocks() */
+ sb_array_sectors = rs->md.array_sectors;
rdev_sectors = __rdev_sectors(rs);
if (!rdev_sectors) {
ti->error = "Invalid rdev size";
@@ -3093,8 +3080,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
reshape_sectors = _get_reshape_sectors(rs);
- if (calculated_dev_sectors != rdev_sectors)
- resize = calculated_dev_sectors != (reshape_sectors ? rdev_sectors - reshape_sectors : rdev_sectors);
+ if (rs->dev_sectors != rdev_sectors) {
+ resize = (rs->dev_sectors != rdev_sectors - reshape_sectors);
+ if (rs->dev_sectors > rdev_sectors - reshape_sectors)
+ set_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
+ }
INIT_WORK(&rs->md.event_work, do_table_event);
ti->private = rs;
@@ -3121,13 +3111,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
rs_set_new(rs);
} else if (rs_is_recovering(rs)) {
- /* Rebuild particular devices */
- if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
- set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
- rs_setup_recovery(rs, MaxSector);
- }
/* A recovering raid set may be resized */
- ; /* skip setup rs */
+ goto size_check;
} else if (rs_is_reshaping(rs)) {
/* Have to reject size change request during reshape */
if (resize) {
@@ -3171,6 +3156,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
rs_setup_recovery(rs, MaxSector);
rs_set_new(rs);
} else if (rs_reshape_requested(rs)) {
+ /* Only request grow on raid set size extensions, not on reshapes. */
+ clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
+
/*
* No need to check for 'ongoing' takeover here, because takeover
* is an instant operation as oposed to an ongoing reshape.
@@ -3201,13 +3189,31 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
rs_set_cur(rs);
} else {
+size_check:
/* May not set recovery when a device rebuild is requested */
if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) {
- rs_setup_recovery(rs, MaxSector);
+ clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags);
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
- } else
- rs_setup_recovery(rs, test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) ?
- 0 : (resize ? calculated_dev_sectors : MaxSector));
+ rs_setup_recovery(rs, MaxSector);
+ } else if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) {
+ /*
+ * Set raid set to current size, i.e. size as of
+ * superblocks to grow to larger size in preresume.
+ */
+ r = rs_set_dev_and_array_sectors(rs, sb_array_sectors, false);
+ if (r)
+ goto bad;
+
+ rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors);
+ } else {
+ /* This is no size change or it is shrinking, update size and record in superblocks */
+ r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false);
+ if (r)
+ goto bad;
+
+ if (sb_array_sectors > rs->array_sectors)
+ set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
+ }
rs_set_cur(rs);
}
@@ -3406,10 +3412,9 @@ static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev)
/* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */
static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
- sector_t resync_max_sectors)
+ enum sync_state state, sector_t resync_max_sectors)
{
sector_t r;
- enum sync_state state;
struct mddev *mddev = &rs->md;
clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
@@ -3420,8 +3425,6 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else {
- state = decipher_sync_action(mddev, recovery);
-
if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery))
r = mddev->recovery_cp;
else
@@ -3439,18 +3442,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
/*
* In case we are recovering, the array is not in sync
* and health chars should show the recovering legs.
+ *
+ * Already retrieved recovery offset from curr_resync_completed above.
*/
;
- else if (state == st_resync)
- /*
- * If "resync" is occurring, the raid set
- * is or may be out of sync hence the health
- * characters shall be 'a'.
- */
- set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
- else if (state == st_reshape)
+
+ else if (state == st_resync || state == st_reshape)
/*
- * If "reshape" is occurring, the raid set
+ * If "resync/reshape" is occurring, the raid set
* is or may be out of sync hence the health
* characters shall be 'a'.
*/
@@ -3464,22 +3463,22 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
*/
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
- else {
- struct md_rdev *rdev;
-
+ else if (test_bit(MD_RECOVERY_NEEDED, &recovery))
/*
* We are idle and recovery is needed, prevent 'A' chars race
* caused by components still set to in-sync by constructor.
*/
- if (test_bit(MD_RECOVERY_NEEDED, &recovery))
- set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
+ set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags);
+ else {
/*
- * The raid set may be doing an initial sync, or it may
- * be rebuilding individual components. If all the
- * devices are In_sync, then it is the raid set that is
- * being initialized.
+ * We are idle and the raid set may be doing an initial
+ * sync, or it may be rebuilding individual components.
+ * If all the devices are In_sync, then it is the raid set
+ * that is being initialized.
*/
+ struct md_rdev *rdev;
+
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
rdev_for_each(rdev, mddev)
if (!test_bit(Journal, &rdev->flags) &&
@@ -3512,7 +3511,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
unsigned int rebuild_disks;
unsigned int write_mostly_params = 0;
sector_t progress, resync_max_sectors, resync_mismatches;
- const char *sync_action;
+ enum sync_state state;
struct raid_type *rt;
switch (type) {
@@ -3526,14 +3525,14 @@ static void raid_status(struct dm_target *ti, status_type_t type,
/* Access most recent mddev properties for status output */
smp_rmb();
- recovery = rs->md.recovery;
/* Get sensible max sectors even if raid set not yet started */
resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ?
mddev->resync_max_sectors : mddev->dev_sectors;
- progress = rs_get_progress(rs, recovery, resync_max_sectors);
+ recovery = rs->md.recovery;
+ state = decipher_sync_action(mddev, recovery);
+ progress = rs_get_progress(rs, recovery, state, resync_max_sectors);
resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
atomic64_read(&mddev->resync_mismatches) : 0;
- sync_action = sync_str(decipher_sync_action(&rs->md, recovery));
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
for (i = 0; i < rs->raid_disks; i++)
@@ -3561,7 +3560,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
* See Documentation/admin-guide/device-mapper/dm-raid.rst for
* information on each of these states.
*/
- DMEMIT(" %s", sync_action);
+ DMEMIT(" %s", sync_str(state));
/*
* v1.5.0+:
@@ -3955,11 +3954,22 @@ static int raid_preresume(struct dm_target *ti)
if (r)
return r;
- /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */
- if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
- mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) {
- r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors,
- to_bytes(rs->requested_bitmap_chunk_sectors), 0);
+ /* We are extending the raid set size, adjust mddev/md_rdev sizes and set capacity. */
+ if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) {
+ mddev->array_sectors = rs->array_sectors;
+ mddev->dev_sectors = rs->dev_sectors;
+ rs_set_rdev_sectors(rs);
+ rs_set_capacity(rs);
+ }
+
+ /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */
+ if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap &&
+ (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) ||
+ (rs->requested_bitmap_chunk_sectors &&
+ mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) {
+ int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize;
+
+ r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0);
if (r)
DMERR("Failed to resize bitmap");
}
@@ -3968,8 +3978,10 @@ static int raid_preresume(struct dm_target *ti)
/* Be prepared for mddev_resume() in raid_resume() */
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
- set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
mddev->resync_min = mddev->recovery_cp;
+ if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags))
+ mddev->resync_max_sectors = mddev->dev_sectors;
}
/* Check for any reshape request unless new raid set */
@@ -4017,7 +4029,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = {
.name = "raid",
- .version = {1, 14, 0},
+ .version = {1, 15, 0},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index f150f5c5492b..4fb1a40e68a0 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -18,7 +18,6 @@
#include <linux/vmalloc.h>
#include <linux/log2.h>
#include <linux/dm-kcopyd.h>
-#include <linux/semaphore.h>
#include "dm.h"
@@ -107,8 +106,8 @@ struct dm_snapshot {
/* The on disk metadata handler */
struct dm_exception_store *store;
- /* Maximum number of in-flight COW jobs. */
- struct semaphore cow_count;
+ unsigned in_progress;
+ struct wait_queue_head in_progress_wait;
struct dm_kcopyd_client *kcopyd_client;
@@ -162,8 +161,8 @@ struct dm_snapshot {
*/
#define DEFAULT_COW_THRESHOLD 2048
-static int cow_threshold = DEFAULT_COW_THRESHOLD;
-module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
+static unsigned cow_threshold = DEFAULT_COW_THRESHOLD;
+module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644);
MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
@@ -1327,7 +1326,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_hash_tables;
}
- sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
+ init_waitqueue_head(&s->in_progress_wait);
s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
if (IS_ERR(s->kcopyd_client)) {
@@ -1509,9 +1508,56 @@ static void snapshot_dtr(struct dm_target *ti)
dm_put_device(ti, s->origin);
+ WARN_ON(s->in_progress);
+
kfree(s);
}
+static void account_start_copy(struct dm_snapshot *s)
+{
+ spin_lock(&s->in_progress_wait.lock);
+ s->in_progress++;
+ spin_unlock(&s->in_progress_wait.lock);
+}
+
+static void account_end_copy(struct dm_snapshot *s)
+{
+ spin_lock(&s->in_progress_wait.lock);
+ BUG_ON(!s->in_progress);
+ s->in_progress--;
+ if (likely(s->in_progress <= cow_threshold) &&
+ unlikely(waitqueue_active(&s->in_progress_wait)))
+ wake_up_locked(&s->in_progress_wait);
+ spin_unlock(&s->in_progress_wait.lock);
+}
+
+static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins)
+{
+ if (unlikely(s->in_progress > cow_threshold)) {
+ spin_lock(&s->in_progress_wait.lock);
+ if (likely(s->in_progress > cow_threshold)) {
+ /*
+ * NOTE: this throttle doesn't account for whether
+ * the caller is servicing an IO that will trigger a COW
+ * so excess throttling may result for chunks not required
+ * to be COW'd. But if cow_threshold was reached, extra
+ * throttling is unlikely to negatively impact performance.
+ */
+ DECLARE_WAITQUEUE(wait, current);
+ __add_wait_queue(&s->in_progress_wait, &wait);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&s->in_progress_wait.lock);
+ if (unlock_origins)
+ up_read(&_origins_lock);
+ io_schedule();
+ remove_wait_queue(&s->in_progress_wait, &wait);
+ return false;
+ }
+ spin_unlock(&s->in_progress_wait.lock);
+ }
+ return true;
+}
+
/*
* Flush a list of buffers.
*/
@@ -1527,7 +1573,7 @@ static void flush_bios(struct bio *bio)
}
}
-static int do_origin(struct dm_dev *origin, struct bio *bio);
+static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit);
/*
* Flush a list of buffers.
@@ -1540,7 +1586,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
while (bio) {
n = bio->bi_next;
bio->bi_next = NULL;
- r = do_origin(s->origin, bio);
+ r = do_origin(s->origin, bio, false);
if (r == DM_MAPIO_REMAPPED)
generic_make_request(bio);
bio = n;
@@ -1732,7 +1778,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context)
rb_link_node(&pe->out_of_order_node, parent, p);
rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
}
- up(&s->cow_count);
+ account_end_copy(s);
}
/*
@@ -1756,7 +1802,7 @@ static void start_copy(struct dm_snap_pending_exception *pe)
dest.count = src.count;
/* Hand over to kcopyd */
- down(&s->cow_count);
+ account_start_copy(s);
dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
}
@@ -1776,7 +1822,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe,
pe->full_bio = bio;
pe->full_bio_end_io = bio->bi_end_io;
- down(&s->cow_count);
+ account_start_copy(s);
callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
copy_callback, pe);
@@ -1866,7 +1912,7 @@ static void zero_callback(int read_err, unsigned long write_err, void *context)
struct bio *bio = context;
struct dm_snapshot *s = bio->bi_private;
- up(&s->cow_count);
+ account_end_copy(s);
bio->bi_status = write_err ? BLK_STS_IOERR : 0;
bio_endio(bio);
}
@@ -1880,7 +1926,7 @@ static void zero_exception(struct dm_snapshot *s, struct dm_exception *e,
dest.sector = bio->bi_iter.bi_sector;
dest.count = s->store->chunk_size;
- down(&s->cow_count);
+ account_start_copy(s);
WARN_ON_ONCE(bio->bi_private);
bio->bi_private = s;
dm_kcopyd_zero(s->kcopyd_client, 1, &dest, 0, zero_callback, bio);
@@ -1916,6 +1962,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
if (!s->valid)
return DM_MAPIO_KILL;
+ if (bio_data_dir(bio) == WRITE) {
+ while (unlikely(!wait_for_in_progress(s, false)))
+ ; /* wait_for_in_progress() has slept */
+ }
+
down_read(&s->lock);
dm_exception_table_lock(&lock);
@@ -2112,7 +2163,7 @@ redirect_to_origin:
if (bio_data_dir(bio) == WRITE) {
up_write(&s->lock);
- return do_origin(s->origin, bio);
+ return do_origin(s->origin, bio, false);
}
out_unlock:
@@ -2487,15 +2538,24 @@ next_snapshot:
/*
* Called on a write from the origin driver.
*/
-static int do_origin(struct dm_dev *origin, struct bio *bio)
+static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit)
{
struct origin *o;
int r = DM_MAPIO_REMAPPED;
+again:
down_read(&_origins_lock);
o = __lookup_origin(origin->bdev);
- if (o)
+ if (o) {
+ if (limit) {
+ struct dm_snapshot *s;
+ list_for_each_entry(s, &o->snapshots, list)
+ if (unlikely(!wait_for_in_progress(s, true)))
+ goto again;
+ }
+
r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
+ }
up_read(&_origins_lock);
return r;
@@ -2608,7 +2668,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
dm_accept_partial_bio(bio, available_sectors);
/* Only tell snapshots if this is a write */
- return do_origin(o->dev, bio);
+ return do_origin(o->dev, bio, true);
}
/*
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 8547d7594338..63bbcc20f49a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -55,19 +55,6 @@ static void trigger_event(struct work_struct *work)
dm_table_event(sc->ti->table);
}
-static inline struct stripe_c *alloc_context(unsigned int stripes)
-{
- size_t len;
-
- if (dm_array_too_big(sizeof(struct stripe_c), sizeof(struct stripe),
- stripes))
- return NULL;
-
- len = sizeof(struct stripe_c) + (sizeof(struct stripe) * stripes);
-
- return kmalloc(len, GFP_KERNEL);
-}
-
/*
* Parse a single <dev> <sector> pair
*/
@@ -142,7 +129,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -EINVAL;
}
- sc = alloc_context(stripes);
+ sc = kmalloc(struct_size(sc, stripe, stripes), GFP_KERNEL);
if (!sc) {
ti->error = "Memory allocation for striped context "
"failed";
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 52e049554f5c..2ae0c1913766 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -918,21 +918,15 @@ bool dm_table_supports_dax(struct dm_table *t,
static bool dm_table_does_not_support_partial_completion(struct dm_table *t);
-struct verify_rq_based_data {
- unsigned sq_count;
- unsigned mq_count;
-};
-
-static int device_is_rq_based(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, sector_t len, void *data)
+static int device_is_rq_stackable(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
{
- struct request_queue *q = bdev_get_queue(dev->bdev);
- struct verify_rq_based_data *v = data;
+ struct block_device *bdev = dev->bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
- if (queue_is_mq(q))
- v->mq_count++;
- else
- v->sq_count++;
+ /* request-based cannot stack on partitions! */
+ if (bdev != bdev->bd_contains)
+ return false;
return queue_is_mq(q);
}
@@ -941,7 +935,6 @@ static int dm_table_determine_type(struct dm_table *t)
{
unsigned i;
unsigned bio_based = 0, request_based = 0, hybrid = 0;
- struct verify_rq_based_data v = {.sq_count = 0, .mq_count = 0};
struct dm_target *tgt;
struct list_head *devices = dm_table_get_devices(t);
enum dm_queue_mode live_md_type = dm_get_md_type(t->md);
@@ -1045,14 +1038,10 @@ verify_rq_based:
/* Non-request-stackable devices can't be used for request-based dm */
if (!tgt->type->iterate_devices ||
- !tgt->type->iterate_devices(tgt, device_is_rq_based, &v)) {
+ !tgt->type->iterate_devices(tgt, device_is_rq_stackable, NULL)) {
DMERR("table load rejected: including non-request-stackable devices");
return -EINVAL;
}
- if (v.sq_count > 0) {
- DMERR("table load rejected: not all devices are blk-mq request-stackable");
- return -EINVAL;
- }
return 0;
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fcd887703f95..5a2c494cb552 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -609,13 +609,12 @@ static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
blk_status_t error)
{
struct bio_list bios;
- unsigned long flags;
bio_list_init(&bios);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
__merge_bio_list(&bios, master);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
error_bio_list(&bios, error);
}
@@ -623,15 +622,14 @@ static void error_thin_bio_list(struct thin_c *tc, struct bio_list *master,
static void requeue_deferred_cells(struct thin_c *tc)
{
struct pool *pool = tc->pool;
- unsigned long flags;
struct list_head cells;
struct dm_bio_prison_cell *cell, *tmp;
INIT_LIST_HEAD(&cells);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
list_splice_init(&tc->deferred_cells, &cells);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
list_for_each_entry_safe(cell, tmp, &cells, user_list)
cell_requeue(pool, cell);
@@ -640,14 +638,13 @@ static void requeue_deferred_cells(struct thin_c *tc)
static void requeue_io(struct thin_c *tc)
{
struct bio_list bios;
- unsigned long flags;
bio_list_init(&bios);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
__merge_bio_list(&bios, &tc->deferred_bio_list);
__merge_bio_list(&bios, &tc->retry_on_resume_list);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
error_bio_list(&bios, BLK_STS_DM_REQUEUE);
requeue_deferred_cells(tc);
@@ -756,7 +753,6 @@ static void inc_all_io_entry(struct pool *pool, struct bio *bio)
static void issue(struct thin_c *tc, struct bio *bio)
{
struct pool *pool = tc->pool;
- unsigned long flags;
if (!bio_triggers_commit(tc, bio)) {
generic_make_request(bio);
@@ -777,9 +773,9 @@ static void issue(struct thin_c *tc, struct bio *bio)
* Batch together any bios that trigger commits and then issue a
* single commit for them in process_deferred_bios().
*/
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
bio_list_add(&pool->deferred_flush_bios, bio);
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
}
static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
@@ -886,12 +882,15 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c
{
struct pool *pool = tc->pool;
unsigned long flags;
+ int has_work;
spin_lock_irqsave(&tc->lock, flags);
cell_release_no_holder(pool, cell, &tc->deferred_bio_list);
+ has_work = !bio_list_empty(&tc->deferred_bio_list);
spin_unlock_irqrestore(&tc->lock, flags);
- wake_worker(pool);
+ if (has_work)
+ wake_worker(pool);
}
static void thin_defer_bio(struct thin_c *tc, struct bio *bio);
@@ -960,7 +959,6 @@ static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
{
struct pool *pool = tc->pool;
- unsigned long flags;
/*
* If the bio has the REQ_FUA flag set we must commit the metadata
@@ -985,9 +983,9 @@ static void complete_overwrite_bio(struct thin_c *tc, struct bio *bio)
* Batch together any bios that trigger commits and then issue a
* single commit for them in process_deferred_bios().
*/
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
bio_list_add(&pool->deferred_flush_completions, bio);
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
}
static void process_prepared_mapping(struct dm_thin_new_mapping *m)
@@ -1226,14 +1224,13 @@ static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
static void process_prepared(struct pool *pool, struct list_head *head,
process_mapping_fn *fn)
{
- unsigned long flags;
struct list_head maps;
struct dm_thin_new_mapping *m, *tmp;
INIT_LIST_HEAD(&maps);
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
list_splice_init(head, &maps);
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
list_for_each_entry_safe(m, tmp, &maps, list)
(*fn)(m);
@@ -1510,14 +1507,12 @@ static int commit(struct pool *pool)
static void check_low_water_mark(struct pool *pool, dm_block_t free_blocks)
{
- unsigned long flags;
-
if (free_blocks <= pool->low_water_blocks && !pool->low_water_triggered) {
DMWARN("%s: reached low water mark for data device: sending event.",
dm_device_name(pool->pool_md));
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
pool->low_water_triggered = true;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
dm_table_event(pool->ti->table);
}
}
@@ -1593,11 +1588,10 @@ static void retry_on_resume(struct bio *bio)
{
struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
struct thin_c *tc = h->tc;
- unsigned long flags;
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
bio_list_add(&tc->retry_on_resume_list, bio);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
}
static blk_status_t should_error_unserviceable_bio(struct pool *pool)
@@ -2170,7 +2164,6 @@ static void __sort_thin_deferred_bios(struct thin_c *tc)
static void process_thin_deferred_bios(struct thin_c *tc)
{
struct pool *pool = tc->pool;
- unsigned long flags;
struct bio *bio;
struct bio_list bios;
struct blk_plug plug;
@@ -2184,10 +2177,10 @@ static void process_thin_deferred_bios(struct thin_c *tc)
bio_list_init(&bios);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
if (bio_list_empty(&tc->deferred_bio_list)) {
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
return;
}
@@ -2196,7 +2189,7 @@ static void process_thin_deferred_bios(struct thin_c *tc)
bio_list_merge(&bios, &tc->deferred_bio_list);
bio_list_init(&tc->deferred_bio_list);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
@@ -2206,10 +2199,10 @@ static void process_thin_deferred_bios(struct thin_c *tc)
* prepared mappings to process.
*/
if (ensure_next_mapping(pool)) {
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
bio_list_add(&tc->deferred_bio_list, bio);
bio_list_merge(&tc->deferred_bio_list, &bios);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
break;
}
@@ -2264,16 +2257,15 @@ static unsigned sort_cells(struct pool *pool, struct list_head *cells)
static void process_thin_deferred_cells(struct thin_c *tc)
{
struct pool *pool = tc->pool;
- unsigned long flags;
struct list_head cells;
struct dm_bio_prison_cell *cell;
unsigned i, j, count;
INIT_LIST_HEAD(&cells);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
list_splice_init(&tc->deferred_cells, &cells);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
if (list_empty(&cells))
return;
@@ -2294,9 +2286,9 @@ static void process_thin_deferred_cells(struct thin_c *tc)
for (j = i; j < count; j++)
list_add(&pool->cell_sort_array[j]->user_list, &cells);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
list_splice(&cells, &tc->deferred_cells);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
return;
}
@@ -2349,7 +2341,6 @@ static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
static void process_deferred_bios(struct pool *pool)
{
- unsigned long flags;
struct bio *bio;
struct bio_list bios, bio_completions;
struct thin_c *tc;
@@ -2368,13 +2359,13 @@ static void process_deferred_bios(struct pool *pool)
bio_list_init(&bios);
bio_list_init(&bio_completions);
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
bio_list_merge(&bios, &pool->deferred_flush_bios);
bio_list_init(&pool->deferred_flush_bios);
bio_list_merge(&bio_completions, &pool->deferred_flush_completions);
bio_list_init(&pool->deferred_flush_completions);
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
if (bio_list_empty(&bios) && bio_list_empty(&bio_completions) &&
!(dm_pool_changed_this_transaction(pool->pmd) && need_commit_due_to_time(pool)))
@@ -2657,12 +2648,11 @@ static void metadata_operation_failed(struct pool *pool, const char *op, int r)
*/
static void thin_defer_bio(struct thin_c *tc, struct bio *bio)
{
- unsigned long flags;
struct pool *pool = tc->pool;
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
bio_list_add(&tc->deferred_bio_list, bio);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
wake_worker(pool);
}
@@ -2678,13 +2668,12 @@ static void thin_defer_bio_with_throttle(struct thin_c *tc, struct bio *bio)
static void thin_defer_cell(struct thin_c *tc, struct dm_bio_prison_cell *cell)
{
- unsigned long flags;
struct pool *pool = tc->pool;
throttle_lock(&pool->throttle);
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
list_add_tail(&cell->user_list, &tc->deferred_cells);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
throttle_unlock(&pool->throttle);
wake_worker(pool);
@@ -2810,15 +2799,14 @@ static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
static void requeue_bios(struct pool *pool)
{
- unsigned long flags;
struct thin_c *tc;
rcu_read_lock();
list_for_each_entry_rcu(tc, &pool->active_thins, list) {
- spin_lock_irqsave(&tc->lock, flags);
+ spin_lock_irq(&tc->lock);
bio_list_merge(&tc->deferred_bio_list, &tc->retry_on_resume_list);
bio_list_init(&tc->retry_on_resume_list);
- spin_unlock_irqrestore(&tc->lock, flags);
+ spin_unlock_irq(&tc->lock);
}
rcu_read_unlock();
}
@@ -3412,15 +3400,14 @@ static int pool_map(struct dm_target *ti, struct bio *bio)
int r;
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- unsigned long flags;
/*
* As this is a singleton target, ti->begin is always zero.
*/
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
bio_set_dev(bio, pt->data_dev->bdev);
r = DM_MAPIO_REMAPPED;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
return r;
}
@@ -3591,7 +3578,6 @@ static void pool_resume(struct dm_target *ti)
{
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- unsigned long flags;
/*
* Must requeue active_thins' bios and then resume
@@ -3600,10 +3586,10 @@ static void pool_resume(struct dm_target *ti)
requeue_bios(pool);
pool_resume_active_thins(pool);
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
pool->low_water_triggered = false;
pool->suspended = false;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
do_waker(&pool->waker.work);
}
@@ -3612,11 +3598,10 @@ static void pool_presuspend(struct dm_target *ti)
{
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- unsigned long flags;
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
pool->suspended = true;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
pool_suspend_active_thins(pool);
}
@@ -3625,13 +3610,12 @@ static void pool_presuspend_undo(struct dm_target *ti)
{
struct pool_c *pt = ti->private;
struct pool *pool = pt->pool;
- unsigned long flags;
pool_resume_active_thins(pool);
- spin_lock_irqsave(&pool->lock, flags);
+ spin_lock_irq(&pool->lock);
pool->suspended = false;
- spin_unlock_irqrestore(&pool->lock, flags);
+ spin_unlock_irq(&pool->lock);
}
static void pool_postsuspend(struct dm_target *ti)
@@ -4110,11 +4094,10 @@ static void thin_put(struct thin_c *tc)
static void thin_dtr(struct dm_target *ti)
{
struct thin_c *tc = ti->private;
- unsigned long flags;
- spin_lock_irqsave(&tc->pool->lock, flags);
+ spin_lock_irq(&tc->pool->lock);
list_del_rcu(&tc->list);
- spin_unlock_irqrestore(&tc->pool->lock, flags);
+ spin_unlock_irq(&tc->pool->lock);
synchronize_rcu();
thin_put(tc);
@@ -4150,7 +4133,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
struct thin_c *tc;
struct dm_dev *pool_dev, *origin_dev;
struct mapped_device *pool_md;
- unsigned long flags;
mutex_lock(&dm_thin_pool_table.mutex);
@@ -4244,9 +4226,9 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
mutex_unlock(&dm_thin_pool_table.mutex);
- spin_lock_irqsave(&tc->pool->lock, flags);
+ spin_lock_irq(&tc->pool->lock);
if (tc->pool->suspended) {
- spin_unlock_irqrestore(&tc->pool->lock, flags);
+ spin_unlock_irq(&tc->pool->lock);
mutex_lock(&dm_thin_pool_table.mutex); /* reacquire for __pool_dec */
ti->error = "Unable to activate thin device while pool is suspended";
r = -EINVAL;
@@ -4255,7 +4237,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
refcount_set(&tc->refcount, 1);
init_completion(&tc->can_destroy);
list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
- spin_unlock_irqrestore(&tc->pool->lock, flags);
+ spin_unlock_irq(&tc->pool->lock);
/*
* This synchronize_rcu() call is needed here otherwise we risk a
* wake_worker() call finding no bios to process (because the newly
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index d06b8aa41e26..7d727a72aa13 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1218,7 +1218,8 @@ bio_copy:
}
} while (bio->bi_iter.bi_size);
- if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks))
+ if (unlikely(bio->bi_opf & REQ_FUA ||
+ wc->uncommitted_blocks >= wc->autocommit_blocks))
writecache_flush(wc);
else
writecache_schedule_autocommit(wc);
@@ -1561,7 +1562,7 @@ static void writecache_writeback(struct work_struct *work)
{
struct dm_writecache *wc = container_of(work, struct dm_writecache, writeback_work);
struct blk_plug plug;
- struct wc_entry *f, *g, *e = NULL;
+ struct wc_entry *f, *uninitialized_var(g), *e = NULL;
struct rb_node *node, *next_node;
struct list_head skipped;
struct writeback_list wbl;
diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 595a73110e17..22b3cb0050a7 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -554,6 +554,7 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
TASK_UNINTERRUPTIBLE);
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
dmz_release_mblock(zmd, mblk);
+ dmz_check_bdev(zmd->dev);
return ERR_PTR(-EIO);
}
@@ -625,6 +626,8 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block,
ret = submit_bio_wait(bio);
bio_put(bio);
+ if (ret)
+ dmz_check_bdev(zmd->dev);
return ret;
}
@@ -691,6 +694,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
TASK_UNINTERRUPTIBLE);
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
clear_bit(DMZ_META_ERROR, &mblk->state);
+ dmz_check_bdev(zmd->dev);
ret = -EIO;
}
nr_mblks_submitted--;
@@ -768,7 +772,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
/* If there are no dirty metadata blocks, just flush the device cache */
if (list_empty(&write_list)) {
ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL);
- goto out;
+ goto err;
}
/*
@@ -778,7 +782,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
*/
ret = dmz_log_dirty_mblocks(zmd, &write_list);
if (ret)
- goto out;
+ goto err;
/*
* The log is on disk. It is now safe to update in place
@@ -786,11 +790,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
*/
ret = dmz_write_dirty_mblocks(zmd, &write_list, zmd->mblk_primary);
if (ret)
- goto out;
+ goto err;
ret = dmz_write_sb(zmd, zmd->mblk_primary);
if (ret)
- goto out;
+ goto err;
while (!list_empty(&write_list)) {
mblk = list_first_entry(&write_list, struct dmz_mblock, link);
@@ -805,16 +809,20 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
zmd->sb_gen++;
out:
- if (ret && !list_empty(&write_list)) {
- spin_lock(&zmd->mblk_lock);
- list_splice(&write_list, &zmd->mblk_dirty_list);
- spin_unlock(&zmd->mblk_lock);
- }
-
dmz_unlock_flush(zmd);
up_write(&zmd->mblk_sem);
return ret;
+
+err:
+ if (!list_empty(&write_list)) {
+ spin_lock(&zmd->mblk_lock);
+ list_splice(&write_list, &zmd->mblk_dirty_list);
+ spin_unlock(&zmd->mblk_lock);
+ }
+ if (!dmz_check_bdev(zmd->dev))
+ ret = -EIO;
+ goto out;
}
/*
@@ -1080,9 +1088,10 @@ static int dmz_load_sb(struct dmz_metadata *zmd)
/*
* Initialize a zone descriptor.
*/
-static int dmz_init_zone(struct dmz_metadata *zmd, struct dm_zone *zone,
- struct blk_zone *blkz)
+static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data)
{
+ struct dmz_metadata *zmd = data;
+ struct dm_zone *zone = &zmd->zones[idx];
struct dmz_dev *dev = zmd->dev;
/* Ignore the eventual last runt (smaller) zone */
@@ -1096,26 +1105,29 @@ static int dmz_init_zone(struct dmz_metadata *zmd, struct dm_zone *zone,
atomic_set(&zone->refcount, 0);
zone->chunk = DMZ_MAP_UNMAPPED;
- if (blkz->type == BLK_ZONE_TYPE_CONVENTIONAL) {
+ switch (blkz->type) {
+ case BLK_ZONE_TYPE_CONVENTIONAL:
set_bit(DMZ_RND, &zone->flags);
zmd->nr_rnd_zones++;
- } else if (blkz->type == BLK_ZONE_TYPE_SEQWRITE_REQ ||
- blkz->type == BLK_ZONE_TYPE_SEQWRITE_PREF) {
+ break;
+ case BLK_ZONE_TYPE_SEQWRITE_REQ:
+ case BLK_ZONE_TYPE_SEQWRITE_PREF:
set_bit(DMZ_SEQ, &zone->flags);
- } else
+ break;
+ default:
return -ENXIO;
-
- if (blkz->cond == BLK_ZONE_COND_OFFLINE)
- set_bit(DMZ_OFFLINE, &zone->flags);
- else if (blkz->cond == BLK_ZONE_COND_READONLY)
- set_bit(DMZ_READ_ONLY, &zone->flags);
+ }
if (dmz_is_rnd(zone))
zone->wp_block = 0;
else
zone->wp_block = dmz_sect2blk(blkz->wp - blkz->start);
- if (!dmz_is_offline(zone) && !dmz_is_readonly(zone)) {
+ if (blkz->cond == BLK_ZONE_COND_OFFLINE)
+ set_bit(DMZ_OFFLINE, &zone->flags);
+ else if (blkz->cond == BLK_ZONE_COND_READONLY)
+ set_bit(DMZ_READ_ONLY, &zone->flags);
+ else {
zmd->nr_useable_zones++;
if (dmz_is_rnd(zone)) {
zmd->nr_rnd_zones++;
@@ -1139,23 +1151,13 @@ static void dmz_drop_zones(struct dmz_metadata *zmd)
}
/*
- * The size of a zone report in number of zones.
- * This results in 4096*64B=256KB report zones commands.
- */
-#define DMZ_REPORT_NR_ZONES 4096
-
-/*
* Allocate and initialize zone descriptors using the zone
* information from disk.
*/
static int dmz_init_zones(struct dmz_metadata *zmd)
{
struct dmz_dev *dev = zmd->dev;
- struct dm_zone *zone;
- struct blk_zone *blkz;
- unsigned int nr_blkz;
- sector_t sector = 0;
- int i, ret = 0;
+ int ret;
/* Init */
zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3;
@@ -1169,54 +1171,38 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
dmz_dev_info(dev, "Using %zu B for zone information",
sizeof(struct dm_zone) * dev->nr_zones);
- /* Get zone information */
- nr_blkz = DMZ_REPORT_NR_ZONES;
- blkz = kcalloc(nr_blkz, sizeof(struct blk_zone), GFP_KERNEL);
- if (!blkz) {
- ret = -ENOMEM;
- goto out;
- }
-
/*
- * Get zone information and initialize zone descriptors.
- * At the same time, determine where the super block
- * should be: first block of the first randomly writable
- * zone.
+ * Get zone information and initialize zone descriptors. At the same
+ * time, determine where the super block should be: first block of the
+ * first randomly writable zone.
*/
- zone = zmd->zones;
- while (sector < dev->capacity) {
- /* Get zone information */
- nr_blkz = DMZ_REPORT_NR_ZONES;
- ret = blkdev_report_zones(dev->bdev, sector, blkz, &nr_blkz);
- if (ret) {
- dmz_dev_err(dev, "Report zones failed %d", ret);
- goto out;
- }
+ ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES, dmz_init_zone,
+ zmd);
+ if (ret < 0) {
+ dmz_drop_zones(zmd);
+ return ret;
+ }
- if (!nr_blkz)
- break;
+ return 0;
+}
- /* Process report */
- for (i = 0; i < nr_blkz; i++) {
- ret = dmz_init_zone(zmd, zone, &blkz[i]);
- if (ret)
- goto out;
- sector += dev->zone_nr_sectors;
- zone++;
- }
- }
+static int dmz_update_zone_cb(struct blk_zone *blkz, unsigned int idx,
+ void *data)
+{
+ struct dm_zone *zone = data;
- /* The entire zone configuration of the disk should now be known */
- if (sector < dev->capacity) {
- dmz_dev_err(dev, "Failed to get correct zone information");
- ret = -ENXIO;
- }
-out:
- kfree(blkz);
- if (ret)
- dmz_drop_zones(zmd);
+ clear_bit(DMZ_OFFLINE, &zone->flags);
+ clear_bit(DMZ_READ_ONLY, &zone->flags);
+ if (blkz->cond == BLK_ZONE_COND_OFFLINE)
+ set_bit(DMZ_OFFLINE, &zone->flags);
+ else if (blkz->cond == BLK_ZONE_COND_READONLY)
+ set_bit(DMZ_READ_ONLY, &zone->flags);
- return ret;
+ if (dmz_is_seq(zone))
+ zone->wp_block = dmz_sect2blk(blkz->wp - blkz->start);
+ else
+ zone->wp_block = 0;
+ return 0;
}
/*
@@ -1224,9 +1210,7 @@ out:
*/
static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
{
- unsigned int nr_blkz = 1;
unsigned int noio_flag;
- struct blk_zone blkz;
int ret;
/*
@@ -1236,29 +1220,19 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
* GFP_NOIO was specified.
*/
noio_flag = memalloc_noio_save();
- ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone),
- &blkz, &nr_blkz);
+ ret = blkdev_report_zones(zmd->dev->bdev, dmz_start_sect(zmd, zone), 1,
+ dmz_update_zone_cb, zone);
memalloc_noio_restore(noio_flag);
- if (!nr_blkz)
+
+ if (ret == 0)
ret = -EIO;
- if (ret) {
+ if (ret < 0) {
dmz_dev_err(zmd->dev, "Get zone %u report failed",
dmz_id(zmd, zone));
+ dmz_check_bdev(zmd->dev);
return ret;
}
- clear_bit(DMZ_OFFLINE, &zone->flags);
- clear_bit(DMZ_READ_ONLY, &zone->flags);
- if (blkz.cond == BLK_ZONE_COND_OFFLINE)
- set_bit(DMZ_OFFLINE, &zone->flags);
- else if (blkz.cond == BLK_ZONE_COND_READONLY)
- set_bit(DMZ_READ_ONLY, &zone->flags);
-
- if (dmz_is_seq(zone))
- zone->wp_block = dmz_sect2blk(blkz.wp - blkz.start);
- else
- zone->wp_block = 0;
-
return 0;
}
@@ -1312,9 +1286,9 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
struct dmz_dev *dev = zmd->dev;
- ret = blkdev_reset_zones(dev->bdev,
- dmz_start_sect(zmd, zone),
- dev->zone_nr_sectors, GFP_NOIO);
+ ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET,
+ dmz_start_sect(zmd, zone),
+ dev->zone_nr_sectors, GFP_NOIO);
if (ret) {
dmz_dev_err(dev, "Reset zone %u failed %d",
dmz_id(zmd, zone), ret);
diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index d240d7ca8a8a..e7ace908a9b7 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -82,6 +82,7 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone,
"Align zone %u wp %llu to %llu (wp+%u) blocks failed %d",
dmz_id(zmd, zone), (unsigned long long)wp_block,
(unsigned long long)block, nr_blocks, ret);
+ dmz_check_bdev(zrc->dev);
return ret;
}
@@ -489,12 +490,7 @@ static void dmz_reclaim_work(struct work_struct *work)
ret = dmz_do_reclaim(zrc);
if (ret) {
dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret);
- if (ret == -EIO)
- /*
- * LLD might be performing some error handling sequence
- * at the underlying device. To not interfere, do not
- * attempt to schedule the next reclaim run immediately.
- */
+ if (!dmz_check_bdev(zrc->dev))
return;
}
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index d3bcc4197f5d..4574e0dedbd6 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -80,6 +80,8 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
bio->bi_status = status;
+ if (bio->bi_status != BLK_STS_OK)
+ bioctx->target->dev->flags |= DMZ_CHECK_BDEV;
if (refcount_dec_and_test(&bioctx->ref)) {
struct dm_zone *zone = bioctx->zone;
@@ -565,32 +567,52 @@ out:
}
/*
- * Check the backing device availability. If it's on the way out,
+ * Check if the backing device is being removed. If it's on the way out,
* start failing I/O. Reclaim and metadata components also call this
* function to cleanly abort operation in the event of such failure.
*/
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
{
- struct gendisk *disk;
+ if (dmz_dev->flags & DMZ_BDEV_DYING)
+ return true;
- if (!(dmz_dev->flags & DMZ_BDEV_DYING)) {
- disk = dmz_dev->bdev->bd_disk;
- if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
- dmz_dev_warn(dmz_dev, "Backing device queue dying");
- dmz_dev->flags |= DMZ_BDEV_DYING;
- } else if (disk->fops->check_events) {
- if (disk->fops->check_events(disk, 0) &
- DISK_EVENT_MEDIA_CHANGE) {
- dmz_dev_warn(dmz_dev, "Backing device offline");
- dmz_dev->flags |= DMZ_BDEV_DYING;
- }
- }
+ if (dmz_dev->flags & DMZ_CHECK_BDEV)
+ return !dmz_check_bdev(dmz_dev);
+
+ if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
+ dmz_dev_warn(dmz_dev, "Backing device queue dying");
+ dmz_dev->flags |= DMZ_BDEV_DYING;
}
return dmz_dev->flags & DMZ_BDEV_DYING;
}
/*
+ * Check the backing device availability. This detects such events as
+ * backing device going offline due to errors, media removals, etc.
+ * This check is less efficient than dmz_bdev_is_dying() and should
+ * only be performed as a part of error handling.
+ */
+bool dmz_check_bdev(struct dmz_dev *dmz_dev)
+{
+ struct gendisk *disk;
+
+ dmz_dev->flags &= ~DMZ_CHECK_BDEV;
+
+ if (dmz_bdev_is_dying(dmz_dev))
+ return false;
+
+ disk = dmz_dev->bdev->bd_disk;
+ if (disk->fops->check_events &&
+ disk->fops->check_events(disk, 0) & DISK_EVENT_MEDIA_CHANGE) {
+ dmz_dev_warn(dmz_dev, "Backing device offline");
+ dmz_dev->flags |= DMZ_BDEV_DYING;
+ }
+
+ return !(dmz_dev->flags & DMZ_BDEV_DYING);
+}
+
+/*
* Process a new BIO.
*/
static int dmz_map(struct dm_target *ti, struct bio *bio)
@@ -902,8 +924,8 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
{
struct dmz_target *dmz = ti->private;
- if (dmz_bdev_is_dying(dmz->dev))
- return -ENODEV;
+ if (!dmz_check_bdev(dmz->dev))
+ return -EIO;
*bdev = dmz->dev->bdev;
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
index d8e70b0ade35..5b5e493d479c 100644
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -72,6 +72,7 @@ struct dmz_dev {
/* Device flags. */
#define DMZ_BDEV_DYING (1 << 0)
+#define DMZ_CHECK_BDEV (2 << 0)
/*
* Zone descriptor.
@@ -255,5 +256,6 @@ void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
* Functions defined in dm-zoned-target.c
*/
bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
+bool dmz_check_bdev(struct dmz_dev *dmz_dev);
#endif /* DM_ZONED_H */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 1a5e328c443a..e8f9661a10a1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -440,14 +440,48 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return dm_get_geometry(md, geo);
}
+#ifdef CONFIG_BLK_DEV_ZONED
+int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data)
+{
+ struct dm_report_zones_args *args = data;
+ sector_t sector_diff = args->tgt->begin - args->start;
+
+ /*
+ * Ignore zones beyond the target range.
+ */
+ if (zone->start >= args->start + args->tgt->len)
+ return 0;
+
+ /*
+ * Remap the start sector and write pointer position of the zone
+ * to match its position in the target range.
+ */
+ zone->start += sector_diff;
+ if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
+ if (zone->cond == BLK_ZONE_COND_FULL)
+ zone->wp = zone->start + zone->len;
+ else if (zone->cond == BLK_ZONE_COND_EMPTY)
+ zone->wp = zone->start;
+ else
+ zone->wp += sector_diff;
+ }
+
+ args->next_sector = zone->start + zone->len;
+ return args->orig_cb(zone, args->zone_idx++, args->orig_data);
+}
+EXPORT_SYMBOL_GPL(dm_report_zones_cb);
+
static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+ unsigned int nr_zones, report_zones_cb cb, void *data)
{
-#ifdef CONFIG_BLK_DEV_ZONED
struct mapped_device *md = disk->private_data;
- struct dm_target *tgt;
struct dm_table *map;
int srcu_idx, ret;
+ struct dm_report_zones_args args = {
+ .next_sector = sector,
+ .orig_data = data,
+ .orig_cb = cb,
+ };
if (dm_suspended_md(md))
return -EAGAIN;
@@ -456,38 +490,30 @@ static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
if (!map)
return -EIO;
- tgt = dm_table_find_target(map, sector);
- if (!tgt) {
- ret = -EIO;
- goto out;
- }
+ do {
+ struct dm_target *tgt;
- /*
- * If we are executing this, we already know that the block device
- * is a zoned device and so each target should have support for that
- * type of drive. A missing report_zones method means that the target
- * driver has a problem.
- */
- if (WARN_ON(!tgt->type->report_zones)) {
- ret = -EIO;
- goto out;
- }
+ tgt = dm_table_find_target(map, args.next_sector);
+ if (WARN_ON_ONCE(!tgt->type->report_zones)) {
+ ret = -EIO;
+ goto out;
+ }
- /*
- * blkdev_report_zones() will loop and call this again to cover all the
- * zones of the target, eventually moving on to the next target.
- * So there is no need to loop here trying to fill the entire array
- * of zones.
- */
- ret = tgt->type->report_zones(tgt, sector, zones, nr_zones);
+ args.tgt = tgt;
+ ret = tgt->type->report_zones(tgt, &args, nr_zones);
+ if (ret < 0)
+ goto out;
+ } while (args.zone_idx < nr_zones &&
+ args.next_sector < get_capacity(disk));
+ ret = args.zone_idx;
out:
dm_put_live_table(md, srcu_idx);
return ret;
-#else
- return -ENOTSUPP;
-#endif
}
+#else
+#define dm_blk_report_zones NULL
+#endif /* CONFIG_BLK_DEV_ZONED */
static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
struct block_device **bdev)
@@ -1174,7 +1200,8 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
/*
* A target may call dm_accept_partial_bio only from the map routine. It is
- * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET.
+ * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET,
+ * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH.
*
* dm_accept_partial_bio informs the dm that the target only wants to process
* additional n_sectors sectors of the bio and the rest of the data should be
@@ -1212,54 +1239,6 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
}
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
-/*
- * The zone descriptors obtained with a zone report indicate
- * zone positions within the underlying device of the target. The zone
- * descriptors must be remapped to match their position within the dm device.
- * The caller target should obtain the zones information using
- * blkdev_report_zones() to ensure that remapping for partition offset is
- * already handled.
- */
-void dm_remap_zone_report(struct dm_target *ti, sector_t start,
- struct blk_zone *zones, unsigned int *nr_zones)
-{
-#ifdef CONFIG_BLK_DEV_ZONED
- struct blk_zone *zone;
- unsigned int nrz = *nr_zones;
- int i;
-
- /*
- * Remap the start sector and write pointer position of the zones in
- * the array. Since we may have obtained from the target underlying
- * device more zones that the target size, also adjust the number
- * of zones.
- */
- for (i = 0; i < nrz; i++) {
- zone = zones + i;
- if (zone->start >= start + ti->len) {
- memset(zone, 0, sizeof(struct blk_zone) * (nrz - i));
- break;
- }
-
- zone->start = zone->start + ti->begin - start;
- if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
- continue;
-
- if (zone->cond == BLK_ZONE_COND_FULL)
- zone->wp = zone->start + zone->len;
- else if (zone->cond == BLK_ZONE_COND_EMPTY)
- zone->wp = zone->start;
- else
- zone->wp = zone->wp + ti->begin - start;
- }
-
- *nr_zones = i;
-#else /* !CONFIG_BLK_DEV_ZONED */
- *nr_zones = 0;
-#endif
-}
-EXPORT_SYMBOL_GPL(dm_remap_zone_report);
-
static blk_qc_t __map_bio(struct dm_target_io *tio)
{
int r;
@@ -1627,7 +1606,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
ci.sector_count = 0;
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
- } else if (bio_op(bio) == REQ_OP_ZONE_RESET) {
+ } else if (op_is_zone_mgmt(bio_op(bio))) {
ci.bio = bio;
ci.sector_count = 0;
error = __split_and_process_non_flush(&ci);
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index b092c7b5282f..3ad18246fcb3 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -2139,6 +2139,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
memcpy(page_address(store.sb_page),
page_address(bitmap->storage.sb_page),
sizeof(bitmap_super_t));
+ spin_lock_irq(&bitmap->counts.lock);
md_bitmap_file_unmap(&bitmap->storage);
bitmap->storage = store;
@@ -2154,7 +2155,6 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
blocks = min(old_counts.chunks << old_counts.chunkshift,
chunks << chunkshift);
- spin_lock_irq(&bitmap->counts.lock);
/* For cluster raid, need to pre-allocate bitmap */
if (mddev_is_clustered(bitmap->mddev)) {
unsigned long page;
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index c766c559d36d..26c75c0199fa 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -244,10 +244,9 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
sector_t start_sector, end_sector, data_offset;
sector_t bio_sector = bio->bi_iter.bi_sector;
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- md_flush_request(mddev, bio);
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
return true;
- }
tmp_dev = which_dev(mddev, bio_sector);
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c
index 6780938d2991..152f9e65a226 100644
--- a/drivers/md/md-multipath.c
+++ b/drivers/md/md-multipath.c
@@ -104,10 +104,9 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio)
struct multipath_bh * mp_bh;
struct multipath_info *multipath;
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- md_flush_request(mddev, bio);
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
return true;
- }
mp_bh = mempool_alloc(&conf->pool, GFP_NOIO);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1be7abeb24fd..805b33e27496 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -550,7 +550,13 @@ static void md_submit_flush_data(struct work_struct *ws)
}
}
-void md_flush_request(struct mddev *mddev, struct bio *bio)
+/*
+ * Manages consolidation of flushes and submitting any flushes needed for
+ * a bio with REQ_PREFLUSH. Returns true if the bio is finished or is
+ * being finished in another context. Returns false if the flushing is
+ * complete but still needs the I/O portion of the bio to be processed.
+ */
+bool md_flush_request(struct mddev *mddev, struct bio *bio)
{
ktime_t start = ktime_get_boottime();
spin_lock_irq(&mddev->lock);
@@ -575,9 +581,10 @@ void md_flush_request(struct mddev *mddev, struct bio *bio)
bio_endio(bio);
else {
bio->bi_opf &= ~REQ_PREFLUSH;
- mddev->pers->make_request(mddev, bio);
+ return false;
}
}
+ return true;
}
EXPORT_SYMBOL(md_flush_request);
@@ -1098,6 +1105,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
mdp_super_t *sb;
int ret;
+ bool spare_disk = true;
/*
* Calculate the position of the superblock (512byte sectors),
@@ -1148,8 +1156,18 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
else
rdev->desc_nr = sb->this_disk.number;
+ /* not spare disk, or LEVEL_MULTIPATH */
+ if (sb->level == LEVEL_MULTIPATH ||
+ (rdev->desc_nr >= 0 &&
+ sb->disks[rdev->desc_nr].state &
+ ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
+ spare_disk = false;
+
if (!refdev) {
- ret = 1;
+ if (!spare_disk)
+ ret = 1;
+ else
+ ret = 0;
} else {
__u64 ev1, ev2;
mdp_super_t *refsb = page_address(refdev->sb_page);
@@ -1165,7 +1183,8 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
}
ev1 = md_event(sb);
ev2 = md_event(refsb);
- if (ev1 > ev2)
+
+ if (!spare_disk && ev1 > ev2)
ret = 1;
else
ret = 0;
@@ -1525,6 +1544,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
sector_t sectors;
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
int bmask;
+ bool spare_disk = true;
/*
* Calculate the position of the superblock in 512byte sectors.
@@ -1658,8 +1678,19 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
sb->level != 0)
return -EINVAL;
+ /* not spare disk, or LEVEL_MULTIPATH */
+ if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) ||
+ (rdev->desc_nr >= 0 &&
+ rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
+ (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
+ le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)))
+ spare_disk = false;
+
if (!refdev) {
- ret = 1;
+ if (!spare_disk)
+ ret = 1;
+ else
+ ret = 0;
} else {
__u64 ev1, ev2;
struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
@@ -1676,7 +1707,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
ev1 = le64_to_cpu(sb->events);
ev2 = le64_to_cpu(refsb->events);
- if (ev1 > ev2)
+ if (!spare_disk && ev1 > ev2)
ret = 1;
else
ret = 0;
@@ -3597,7 +3628,7 @@ abort_free:
* Check a full RAID array for plausibility
*/
-static void analyze_sbs(struct mddev *mddev)
+static int analyze_sbs(struct mddev *mddev)
{
int i;
struct md_rdev *rdev, *freshest, *tmp;
@@ -3618,6 +3649,12 @@ static void analyze_sbs(struct mddev *mddev)
md_kick_rdev_from_array(rdev);
}
+ /* Cannot find a valid fresh disk */
+ if (!freshest) {
+ pr_warn("md: cannot find a valid disk\n");
+ return -EINVAL;
+ }
+
super_types[mddev->major_version].
validate_super(mddev, freshest);
@@ -3652,6 +3689,8 @@ static void analyze_sbs(struct mddev *mddev)
clear_bit(In_sync, &rdev->flags);
}
}
+
+ return 0;
}
/* Read a fixed-point number.
@@ -5570,7 +5609,9 @@ int md_run(struct mddev *mddev)
if (!mddev->raid_disks) {
if (!mddev->persistent)
return -EINVAL;
- analyze_sbs(mddev);
+ err = analyze_sbs(mddev);
+ if (err)
+ return -EINVAL;
}
if (mddev->level != LEVEL_NONE)
diff --git a/drivers/md/md.h b/drivers/md/md.h
index c5e3ff398b59..5f86f8adb0a4 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -550,7 +550,7 @@ struct md_personality
int level;
struct list_head list;
struct module *owner;
- bool (*make_request)(struct mddev *mddev, struct bio *bio);
+ bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio);
/*
* start up works that do NOT require md_thread. tasks that
* requires md_thread should go into start()
@@ -703,7 +703,7 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev);
extern int mddev_congested(struct mddev *mddev, int bits);
-extern void md_flush_request(struct mddev *mddev, struct bio *bio);
+extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
sector_t sector, int size, struct page *page);
extern int md_super_wait(struct mddev *mddev);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index f61693e59684..b7c20979bd19 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -154,7 +154,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
} else {
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
mdname(mddev));
- pr_err("md/raid0: please set raid.default_layout to 1 or 2\n");
+ pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
err = -ENOTSUPP;
goto abort;
}
@@ -575,10 +575,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
unsigned chunk_sects;
unsigned sectors;
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- md_flush_request(mddev, bio);
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
return true;
- }
if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) {
raid0_handle_discard(mddev, bio);
@@ -615,7 +614,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio)
tmp_dev = map_sector(mddev, zone, sector, &sector);
break;
default:
- WARN("md/raid0:%s: Invalid layout\n", mdname(mddev));
+ WARN(1, "md/raid0:%s: Invalid layout\n", mdname(mddev));
bio_io_error(bio);
return true;
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 0466ee2453b4..a409ab6f30bc 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -819,6 +819,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
else
generic_make_request(bio);
bio = next;
+ cond_resched();
}
}
@@ -1567,10 +1568,9 @@ static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
{
sector_t sectors;
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- md_flush_request(mddev, bio);
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
return true;
- }
/*
* There is a limit to the maximum size, but
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 299c7b1c9718..ec136e44aef7 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -191,7 +191,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
out_free_pages:
while (--j >= 0)
- resync_free_pages(&rps[j * 2]);
+ resync_free_pages(&rps[j]);
j = 0;
out_free_bio:
@@ -1525,10 +1525,9 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
int chunk_sects = chunk_mask + 1;
int sectors = bio_sectors(bio);
- if (unlikely(bio->bi_opf & REQ_PREFLUSH)) {
- md_flush_request(mddev, bio);
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
return true;
- }
if (!md_write_start(mddev, bio))
return false;
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 18a4064a61a8..cab5b1352892 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -1404,7 +1404,7 @@ int ppl_init_log(struct r5conf *conf)
atomic64_set(&ppl_conf->seq, 0);
INIT_LIST_HEAD(&ppl_conf->no_mem_stripes);
spin_lock_init(&ppl_conf->no_mem_stripes_lock);
- ppl_conf->write_hint = RWF_WRITE_LIFE_NOT_SET;
+ ppl_conf->write_hint = RWH_WRITE_LIFE_NOT_SET;
if (!mddev->external) {
ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 223e97ab27e6..f0fc538bfe59 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1134,7 +1134,7 @@ again:
bi->bi_iter.bi_size = STRIPE_SIZE;
bi->bi_write_hint = sh->dev[i].write_hint;
if (!rrdev)
- sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET;
+ sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
/*
* If this is discard request, set bi_vcnt 0. We don't
* want to confuse SCSI because SCSI will replace payload
@@ -1187,7 +1187,7 @@ again:
rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_iter.bi_size = STRIPE_SIZE;
rbi->bi_write_hint = sh->dev[i].write_hint;
- sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET;
+ sh->dev[i].write_hint = RWH_WRITE_LIFE_NOT_SET;
/*
* If this is discard request, set bi_vcnt 0. We don't
* want to confuse SCSI because SCSI will replace payload
@@ -5592,8 +5592,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
if (ret == 0)
return true;
if (ret == -ENODEV) {
- md_flush_request(mddev, bi);
- return true;
+ if (md_flush_request(mddev, bi))
+ return true;
}
/* ret == -EAGAIN, fallback */
/*
diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c
index cfca3c70599b..21f90a887485 100644
--- a/drivers/media/usb/stkwebcam/stk-webcam.c
+++ b/drivers/media/usb/stkwebcam/stk-webcam.c
@@ -643,8 +643,7 @@ static int v4l_stk_release(struct file *fp)
dev->owner = NULL;
}
- if (is_present(dev))
- usb_autopm_put_interface(dev->interface);
+ usb_autopm_put_interface(dev->interface);
mutex_unlock(&dev->lock);
return v4l2_fh_release(fp);
}
diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c
index 32747425297d..64fff6abe60e 100644
--- a/drivers/memstick/host/jmb38x_ms.c
+++ b/drivers/memstick/host/jmb38x_ms.c
@@ -941,7 +941,7 @@ static int jmb38x_ms_probe(struct pci_dev *pdev,
if (!cnt) {
rc = -ENODEV;
pci_dev_busy = 1;
- goto err_out;
+ goto err_out_int;
}
jm = kzalloc(sizeof(struct jmb38x_ms)
diff --git a/drivers/mfd/mt6397-core.c b/drivers/mfd/mt6397-core.c
index 310dae26ddff..b2c325ead1c8 100644
--- a/drivers/mfd/mt6397-core.c
+++ b/drivers/mfd/mt6397-core.c
@@ -129,11 +129,27 @@ static int mt6397_irq_resume(struct device *dev)
static SIMPLE_DEV_PM_OPS(mt6397_pm_ops, mt6397_irq_suspend,
mt6397_irq_resume);
+struct chip_data {
+ u32 cid_addr;
+ u32 cid_shift;
+};
+
+static const struct chip_data mt6323_core = {
+ .cid_addr = MT6323_CID,
+ .cid_shift = 0,
+};
+
+static const struct chip_data mt6397_core = {
+ .cid_addr = MT6397_CID,
+ .cid_shift = 0,
+};
+
static int mt6397_probe(struct platform_device *pdev)
{
int ret;
unsigned int id;
struct mt6397_chip *pmic;
+ const struct chip_data *pmic_core;
pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL);
if (!pmic)
@@ -149,28 +165,30 @@ static int mt6397_probe(struct platform_device *pdev)
if (!pmic->regmap)
return -ENODEV;
- platform_set_drvdata(pdev, pmic);
+ pmic_core = of_device_get_match_data(&pdev->dev);
+ if (!pmic_core)
+ return -ENODEV;
- ret = regmap_read(pmic->regmap, MT6397_CID, &id);
+ ret = regmap_read(pmic->regmap, pmic_core->cid_addr, &id);
if (ret) {
- dev_err(pmic->dev, "Failed to read chip id: %d\n", ret);
+ dev_err(&pdev->dev, "Failed to read chip id: %d\n", ret);
return ret;
}
+ pmic->chip_id = (id >> pmic_core->cid_shift) & 0xff;
+
+ platform_set_drvdata(pdev, pmic);
+
pmic->irq = platform_get_irq(pdev, 0);
if (pmic->irq <= 0)
return pmic->irq;
- switch (id & 0xff) {
- case MT6323_CHIP_ID:
- pmic->int_con[0] = MT6323_INT_CON0;
- pmic->int_con[1] = MT6323_INT_CON1;
- pmic->int_status[0] = MT6323_INT_STATUS0;
- pmic->int_status[1] = MT6323_INT_STATUS1;
- ret = mt6397_irq_init(pmic);
- if (ret)
- return ret;
+ ret = mt6397_irq_init(pmic);
+ if (ret)
+ return ret;
+ switch (pmic->chip_id) {
+ case MT6323_CHIP_ID:
ret = devm_mfd_add_devices(&pdev->dev, -1, mt6323_devs,
ARRAY_SIZE(mt6323_devs), NULL,
0, pmic->irq_domain);
@@ -178,21 +196,13 @@ static int mt6397_probe(struct platform_device *pdev)
case MT6391_CHIP_ID:
case MT6397_CHIP_ID:
- pmic->int_con[0] = MT6397_INT_CON0;
- pmic->int_con[1] = MT6397_INT_CON1;
- pmic->int_status[0] = MT6397_INT_STATUS0;
- pmic->int_status[1] = MT6397_INT_STATUS1;
- ret = mt6397_irq_init(pmic);
- if (ret)
- return ret;
-
ret = devm_mfd_add_devices(&pdev->dev, -1, mt6397_devs,
ARRAY_SIZE(mt6397_devs), NULL,
0, pmic->irq_domain);
break;
default:
- dev_err(&pdev->dev, "unsupported chip: %d\n", id);
+ dev_err(&pdev->dev, "unsupported chip: %d\n", pmic->chip_id);
return -ENODEV;
}
@@ -205,9 +215,15 @@ static int mt6397_probe(struct platform_device *pdev)
}
static const struct of_device_id mt6397_of_match[] = {
- { .compatible = "mediatek,mt6397" },
- { .compatible = "mediatek,mt6323" },
- { }
+ {
+ .compatible = "mediatek,mt6323",
+ .data = &mt6323_core,
+ }, {
+ .compatible = "mediatek,mt6397",
+ .data = &mt6397_core,
+ }, {
+ /* sentinel */
+ }
};
MODULE_DEVICE_TABLE(of, mt6397_of_match);
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 47ae84afac2e..1b1a794d639d 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -527,6 +527,7 @@ static int fastrpc_dma_buf_attach(struct dma_buf *dmabuf,
FASTRPC_PHYS(buffer->phys), buffer->size);
if (ret < 0) {
dev_err(buffer->dev, "failed to get scatterlist from DMA API\n");
+ kfree(a);
return -EINVAL;
}
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 32e9b1aed2ca..0a2b99e1af45 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -218,13 +218,21 @@ static void mei_mkhi_fix(struct mei_cl_device *cldev)
{
int ret;
+ /* No need to enable the client if nothing is needed from it */
+ if (!cldev->bus->fw_f_fw_ver_supported &&
+ !cldev->bus->hbm_f_os_supported)
+ return;
+
ret = mei_cldev_enable(cldev);
if (ret)
return;
- ret = mei_fwver(cldev);
- if (ret < 0)
- dev_err(&cldev->dev, "FW version command failed %d\n", ret);
+ if (cldev->bus->fw_f_fw_ver_supported) {
+ ret = mei_fwver(cldev);
+ if (ret < 0)
+ dev_err(&cldev->dev, "FW version command failed %d\n",
+ ret);
+ }
if (cldev->bus->hbm_f_os_supported) {
ret = mei_osver(cldev);
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index 77f7dff7098d..c09f8bb49495 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -79,6 +79,9 @@
#define MEI_DEV_ID_CNP_H 0xA360 /* Cannon Point H */
#define MEI_DEV_ID_CNP_H_4 0xA364 /* Cannon Point H 4 (iTouch) */
+#define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */
+#define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */
+
#define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */
#define MEI_DEV_ID_TGP_LP 0xA0E0 /* Tiger Lake Point LP */
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index abe1b1f4362f..c4f6991d3028 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -1355,6 +1355,8 @@ static bool mei_me_fw_type_sps(struct pci_dev *pdev)
#define MEI_CFG_FW_SPS \
.quirk_probe = mei_me_fw_type_sps
+#define MEI_CFG_FW_VER_SUPP \
+ .fw_ver_supported = 1
#define MEI_CFG_ICH_HFS \
.fw_status.count = 0
@@ -1392,31 +1394,41 @@ static const struct mei_cfg mei_me_ich10_cfg = {
MEI_CFG_ICH10_HFS,
};
-/* PCH devices */
-static const struct mei_cfg mei_me_pch_cfg = {
+/* PCH6 devices */
+static const struct mei_cfg mei_me_pch6_cfg = {
MEI_CFG_PCH_HFS,
};
+/* PCH7 devices */
+static const struct mei_cfg mei_me_pch7_cfg = {
+ MEI_CFG_PCH_HFS,
+ MEI_CFG_FW_VER_SUPP,
+};
+
/* PCH Cougar Point and Patsburg with quirk for Node Manager exclusion */
static const struct mei_cfg mei_me_pch_cpt_pbg_cfg = {
MEI_CFG_PCH_HFS,
+ MEI_CFG_FW_VER_SUPP,
MEI_CFG_FW_NM,
};
/* PCH8 Lynx Point and newer devices */
static const struct mei_cfg mei_me_pch8_cfg = {
MEI_CFG_PCH8_HFS,
+ MEI_CFG_FW_VER_SUPP,
};
/* PCH8 Lynx Point with quirk for SPS Firmware exclusion */
static const struct mei_cfg mei_me_pch8_sps_cfg = {
MEI_CFG_PCH8_HFS,
+ MEI_CFG_FW_VER_SUPP,
MEI_CFG_FW_SPS,
};
/* Cannon Lake and newer devices */
static const struct mei_cfg mei_me_pch12_cfg = {
MEI_CFG_PCH8_HFS,
+ MEI_CFG_FW_VER_SUPP,
MEI_CFG_DMA_128,
};
@@ -1428,7 +1440,8 @@ static const struct mei_cfg *const mei_cfg_list[] = {
[MEI_ME_UNDEF_CFG] = NULL,
[MEI_ME_ICH_CFG] = &mei_me_ich_cfg,
[MEI_ME_ICH10_CFG] = &mei_me_ich10_cfg,
- [MEI_ME_PCH_CFG] = &mei_me_pch_cfg,
+ [MEI_ME_PCH6_CFG] = &mei_me_pch6_cfg,
+ [MEI_ME_PCH7_CFG] = &mei_me_pch7_cfg,
[MEI_ME_PCH_CPT_PBG_CFG] = &mei_me_pch_cpt_pbg_cfg,
[MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg,
[MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg,
@@ -1473,6 +1486,8 @@ struct mei_device *mei_me_dev_init(struct pci_dev *pdev,
mei_device_init(dev, &pdev->dev, &mei_me_hw_ops);
hw->cfg = cfg;
+ dev->fw_f_fw_ver_supported = cfg->fw_ver_supported;
+
return dev;
}
diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h
index 08c84a0de4a8..1d8794828cbc 100644
--- a/drivers/misc/mei/hw-me.h
+++ b/drivers/misc/mei/hw-me.h
@@ -20,11 +20,13 @@
* @fw_status: FW status
* @quirk_probe: device exclusion quirk
* @dma_size: device DMA buffers size
+ * @fw_ver_supported: is fw version retrievable from FW
*/
struct mei_cfg {
const struct mei_fw_status fw_status;
bool (*quirk_probe)(struct pci_dev *pdev);
size_t dma_size[DMA_DSCR_NUM];
+ u32 fw_ver_supported:1;
};
@@ -62,7 +64,8 @@ struct mei_me_hw {
* @MEI_ME_UNDEF_CFG: Lower sentinel.
* @MEI_ME_ICH_CFG: I/O Controller Hub legacy devices.
* @MEI_ME_ICH10_CFG: I/O Controller Hub platforms Gen10
- * @MEI_ME_PCH_CFG: Platform Controller Hub platforms (Up to Gen8).
+ * @MEI_ME_PCH6_CFG: Platform Controller Hub platforms (Gen6).
+ * @MEI_ME_PCH7_CFG: Platform Controller Hub platforms (Gen7).
* @MEI_ME_PCH_CPT_PBG_CFG:Platform Controller Hub workstations
* with quirk for Node Manager exclusion.
* @MEI_ME_PCH8_CFG: Platform Controller Hub Gen8 and newer
@@ -77,7 +80,8 @@ enum mei_cfg_idx {
MEI_ME_UNDEF_CFG,
MEI_ME_ICH_CFG,
MEI_ME_ICH10_CFG,
- MEI_ME_PCH_CFG,
+ MEI_ME_PCH6_CFG,
+ MEI_ME_PCH7_CFG,
MEI_ME_PCH_CPT_PBG_CFG,
MEI_ME_PCH8_CFG,
MEI_ME_PCH8_SPS_CFG,
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index f71a023aed3c..0f2141178299 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -426,6 +426,8 @@ struct mei_fw_version {
*
* @fw_ver : FW versions
*
+ * @fw_f_fw_ver_supported : fw feature: fw version supported
+ *
* @me_clients_rwsem: rw lock over me_clients list
* @me_clients : list of FW clients
* @me_clients_map : FW clients bit map
@@ -506,6 +508,8 @@ struct mei_device {
struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS];
+ unsigned int fw_f_fw_ver_supported:1;
+
struct rw_semaphore me_clients_rwsem;
struct list_head me_clients;
DECLARE_BITMAP(me_clients_map, MEI_CLIENTS_MAX);
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index d5a92c6eadb3..3dca63eddaa0 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -61,13 +61,13 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
{MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_3, MEI_ME_ICH10_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ICH10_4, MEI_ME_ICH10_CFG)},
- {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH_CFG)},
- {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_1, MEI_ME_PCH6_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_IBXPK_2, MEI_ME_PCH6_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_CPT_1, MEI_ME_PCH_CPT_PBG_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_PBG_1, MEI_ME_PCH_CPT_PBG_CFG)},
- {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH_CFG)},
- {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH_CFG)},
- {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_1, MEI_ME_PCH7_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_2, MEI_ME_PCH7_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_PPT_3, MEI_ME_PCH7_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_H, MEI_ME_PCH8_SPS_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_W, MEI_ME_PCH8_SPS_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_LPT_LP, MEI_ME_PCH8_CFG)},
@@ -96,6 +96,9 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H, MEI_ME_PCH12_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_CNP_H_4, MEI_ME_PCH8_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)},
+
{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH12_CFG)},
diff --git a/drivers/mmc/host/cqhci.c b/drivers/mmc/host/cqhci.c
index f7bdae5354c3..5047f7343ffc 100644
--- a/drivers/mmc/host/cqhci.c
+++ b/drivers/mmc/host/cqhci.c
@@ -611,7 +611,8 @@ static int cqhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
cq_host->slot[tag].flags = 0;
cq_host->qcnt += 1;
-
+ /* Make sure descriptors are ready before ringing the doorbell */
+ wmb();
cqhci_writel(cq_host, 1 << tag, CQHCI_TDBR);
if (!(cqhci_readl(cq_host, CQHCI_TDBR) & (1 << tag)))
pr_debug("%s: cqhci: doorbell not set for tag %d\n",
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 78e7e350655c..4031217d21c3 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -17,6 +17,7 @@
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
#include <linux/dmaengine.h>
+#include <linux/dma/mxs-dma.h>
#include <linux/highmem.h>
#include <linux/clk.h>
#include <linux/err.h>
@@ -266,7 +267,7 @@ static void mxs_mmc_bc(struct mxs_mmc_host *host)
ssp->ssp_pio_words[2] = cmd1;
ssp->dma_dir = DMA_NONE;
ssp->slave_dirn = DMA_TRANS_NONE;
- desc = mxs_mmc_prep_dma(host, DMA_CTRL_ACK);
+ desc = mxs_mmc_prep_dma(host, MXS_DMA_CTRL_WAIT4END);
if (!desc)
goto out;
@@ -311,7 +312,7 @@ static void mxs_mmc_ac(struct mxs_mmc_host *host)
ssp->ssp_pio_words[2] = cmd1;
ssp->dma_dir = DMA_NONE;
ssp->slave_dirn = DMA_TRANS_NONE;
- desc = mxs_mmc_prep_dma(host, DMA_CTRL_ACK);
+ desc = mxs_mmc_prep_dma(host, MXS_DMA_CTRL_WAIT4END);
if (!desc)
goto out;
@@ -441,7 +442,7 @@ static void mxs_mmc_adtc(struct mxs_mmc_host *host)
host->data = data;
ssp->dma_dir = dma_data_dir;
ssp->slave_dirn = slave_dirn;
- desc = mxs_mmc_prep_dma(host, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ desc = mxs_mmc_prep_dma(host, DMA_PREP_INTERRUPT | MXS_DMA_CTRL_WAIT4END);
if (!desc)
goto out;
diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c
index d4ada5cca2d1..234551a68739 100644
--- a/drivers/mmc/host/renesas_sdhi_core.c
+++ b/drivers/mmc/host/renesas_sdhi_core.c
@@ -646,8 +646,8 @@ int renesas_sdhi_probe(struct platform_device *pdev,
struct tmio_mmc_dma *dma_priv;
struct tmio_mmc_host *host;
struct renesas_sdhi *priv;
+ int num_irqs, irq, ret, i;
struct resource *res;
- int irq, ret, i;
u16 ver;
of_data = of_device_get_match_data(&pdev->dev);
@@ -825,24 +825,31 @@ int renesas_sdhi_probe(struct platform_device *pdev,
host->hs400_complete = renesas_sdhi_hs400_complete;
}
- i = 0;
- while (1) {
+ num_irqs = platform_irq_count(pdev);
+ if (num_irqs < 0) {
+ ret = num_irqs;
+ goto eirq;
+ }
+
+ /* There must be at least one IRQ source */
+ if (!num_irqs) {
+ ret = -ENXIO;
+ goto eirq;
+ }
+
+ for (i = 0; i < num_irqs; i++) {
irq = platform_get_irq(pdev, i);
- if (irq < 0)
- break;
- i++;
+ if (irq < 0) {
+ ret = irq;
+ goto eirq;
+ }
+
ret = devm_request_irq(&pdev->dev, irq, tmio_mmc_irq, 0,
dev_name(&pdev->dev), host);
if (ret)
goto eirq;
}
- /* There must be at least one IRQ source */
- if (!i) {
- ret = irq;
- goto eirq;
- }
-
dev_info(&pdev->dev, "%s base at 0x%08lx max clock rate %u MHz\n",
mmc_hostname(host->mmc), (unsigned long)
(platform_get_resource(pdev, IORESOURCE_MEM, 0)->start),
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 2b9cdcd1dd9d..f4f5f0a70cda 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -262,6 +262,7 @@ static const struct sdhci_iproc_data bcm2835_data = {
};
static const struct sdhci_pltfm_data sdhci_bcm2711_pltfm_data = {
+ .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12,
.ops = &sdhci_iproc_32only_ops,
};
diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c
index e7d1920729fb..0ae986c42bc8 100644
--- a/drivers/mmc/host/sdhci-of-at91.c
+++ b/drivers/mmc/host/sdhci-of-at91.c
@@ -358,7 +358,7 @@ static int sdhci_at91_probe(struct platform_device *pdev)
pm_runtime_use_autosuspend(&pdev->dev);
/* HS200 is broken at this moment */
- host->quirks2 = SDHCI_QUIRK2_BROKEN_HS200;
+ host->quirks2 |= SDHCI_QUIRK2_BROKEN_HS200;
ret = sdhci_add_host(host);
if (ret)
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c
index 41c2677c587f..083e7e053c95 100644
--- a/drivers/mmc/host/sdhci-omap.c
+++ b/drivers/mmc/host/sdhci-omap.c
@@ -372,7 +372,7 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode)
* on temperature
*/
if (temperature < -20000)
- phase_delay = min(max_window + 4 * max_len - 24,
+ phase_delay = min(max_window + 4 * (max_len - 1) - 24,
max_window +
DIV_ROUND_UP(13 * max_len, 16) * 4);
else if (temperature < 20000)
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 81bd9afb0980..98c575de43c7 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -1393,11 +1393,9 @@ static int sh_mmcif_probe(struct platform_device *pdev)
const char *name;
irq[0] = platform_get_irq(pdev, 0);
- irq[1] = platform_get_irq(pdev, 1);
- if (irq[0] < 0) {
- dev_err(dev, "Get irq error\n");
+ irq[1] = platform_get_irq_optional(pdev, 1);
+ if (irq[0] < 0)
return -ENXIO;
- }
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
reg = devm_ioremap_resource(dev, res);
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 79a53cb8507b..00a79489067c 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -1353,7 +1353,7 @@ static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t a
{
unsigned long cmd_addr;
struct cfi_private *cfi = map->fldrv_priv;
- int ret = 0;
+ int ret;
adr += chip->start;
@@ -1383,7 +1383,7 @@ static int cfi_intelext_point(struct mtd_info *mtd, loff_t from, size_t len,
struct cfi_private *cfi = map->fldrv_priv;
unsigned long ofs, last_end = 0;
int chipnum;
- int ret = 0;
+ int ret;
if (!map->virt)
return -EINVAL;
@@ -1550,7 +1550,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
{
struct cfi_private *cfi = map->fldrv_priv;
map_word status, write_cmd;
- int ret=0;
+ int ret;
adr += chip->start;
@@ -1624,7 +1624,7 @@ static int cfi_intelext_write_words (struct mtd_info *mtd, loff_t to , size_t le
{
struct map_info *map = mtd->priv;
struct cfi_private *cfi = map->fldrv_priv;
- int ret = 0;
+ int ret;
int chipnum;
unsigned long ofs;
@@ -1871,7 +1871,7 @@ static int cfi_intelext_writev (struct mtd_info *mtd, const struct kvec *vecs,
struct map_info *map = mtd->priv;
struct cfi_private *cfi = map->fldrv_priv;
int wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
- int ret = 0;
+ int ret;
int chipnum;
unsigned long ofs, vec_seek, i;
size_t len = 0;
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index cf8c8be40a9c..04b383bc3947 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -123,19 +123,23 @@ static int cfi_use_status_reg(struct cfi_private *cfi)
(extp->SoftwareFeatures & poll_mask) == CFI_POLL_STATUS_REG;
}
-static void cfi_check_err_status(struct map_info *map, struct flchip *chip,
- unsigned long adr)
+static int cfi_check_err_status(struct map_info *map, struct flchip *chip,
+ unsigned long adr)
{
struct cfi_private *cfi = map->fldrv_priv;
map_word status;
if (!cfi_use_status_reg(cfi))
- return;
+ return 0;
cfi_send_gen_cmd(0x70, cfi->addr_unlock1, chip->start, map, cfi,
cfi->device_type, NULL);
status = map_read(map, adr);
+ /* The error bits are invalid while the chip's busy */
+ if (!map_word_bitsset(map, status, CMD(CFI_SR_DRB)))
+ return 0;
+
if (map_word_bitsset(map, status, CMD(0x3a))) {
unsigned long chipstatus = MERGESTATUS(status);
@@ -151,7 +155,12 @@ static void cfi_check_err_status(struct map_info *map, struct flchip *chip,
if (chipstatus & CFI_SR_SLSB)
pr_err("%s sector write protected, status %lx\n",
map->name, chipstatus);
+
+ /* Erase/Program status bits are set on the operation failure */
+ if (chipstatus & (CFI_SR_ESB | CFI_SR_PSB))
+ return 1;
}
+ return 0;
}
/* #define DEBUG_CFI_FEATURES */
@@ -785,7 +794,6 @@ static struct mtd_info *cfi_amdstd_setup(struct mtd_info *mtd)
kfree(mtd->eraseregions);
kfree(mtd);
kfree(cfi->cmdset_priv);
- kfree(cfi->cfiq);
return NULL;
}
@@ -848,20 +856,16 @@ static int __xipram chip_good(struct map_info *map, struct flchip *chip,
if (cfi_use_status_reg(cfi)) {
map_word ready = CMD(CFI_SR_DRB);
- map_word err = CMD(CFI_SR_PSB | CFI_SR_ESB);
+
/*
* For chips that support status register, check device
- * ready bit and Erase/Program status bit to know if
- * operation succeeded.
+ * ready bit
*/
cfi_send_gen_cmd(0x70, cfi->addr_unlock1, chip->start, map, cfi,
cfi->device_type, NULL);
curd = map_read(map, addr);
- if (map_word_andequal(map, curd, ready, ready))
- return !map_word_bitsset(map, curd, err);
-
- return 0;
+ return map_word_andequal(map, curd, ready, ready);
}
oldd = map_read(map, addr);
@@ -1699,8 +1703,11 @@ static int __xipram do_write_oneword_once(struct map_info *map,
break;
}
- if (chip_good(map, chip, adr, datum))
+ if (chip_good(map, chip, adr, datum)) {
+ if (cfi_check_err_status(map, chip, adr))
+ ret = -EIO;
break;
+ }
/* Latency issues. Drop the lock, wait a while and retry */
UDELAY(map, chip, adr, 1);
@@ -1713,7 +1720,7 @@ static int __xipram do_write_oneword_start(struct map_info *map,
struct flchip *chip,
unsigned long adr, int mode)
{
- int ret = 0;
+ int ret;
mutex_lock(&chip->mutex);
@@ -1773,7 +1780,6 @@ static int __xipram do_write_oneword_retry(struct map_info *map,
ret = do_write_oneword_once(map, chip, adr, datum, mode, cfi);
if (ret) {
/* reset on all failures. */
- cfi_check_err_status(map, chip, adr);
map_write(map, CMD(0xF0), chip->start);
/* FIXME - should have reset delay before continuing */
@@ -1791,7 +1797,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
unsigned long adr, map_word datum,
int mode)
{
- int ret = 0;
+ int ret;
adr += chip->start;
@@ -1815,7 +1821,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len,
{
struct map_info *map = mtd->priv;
struct cfi_private *cfi = map->fldrv_priv;
- int ret = 0;
+ int ret;
int chipnum;
unsigned long ofs, chipstart;
DECLARE_WAITQUEUE(wait, current);
@@ -1970,12 +1976,17 @@ static int __xipram do_write_buffer_wait(struct map_info *map,
*/
if (time_after(jiffies, timeo) &&
!chip_good(map, chip, adr, datum)) {
+ pr_err("MTD %s(): software timeout, address:0x%.8lx.\n",
+ __func__, adr);
ret = -EIO;
break;
}
- if (chip_good(map, chip, adr, datum))
+ if (chip_good(map, chip, adr, datum)) {
+ if (cfi_check_err_status(map, chip, adr))
+ ret = -EIO;
break;
+ }
/* Latency issues. Drop the lock, wait a while and retry */
UDELAY(map, chip, adr, 1);
@@ -2014,7 +2025,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
int len)
{
struct cfi_private *cfi = map->fldrv_priv;
- int ret = -EIO;
+ int ret;
unsigned long cmd_adr;
int z, words;
map_word datum;
@@ -2071,12 +2082,8 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
chip->word_write_time);
ret = do_write_buffer_wait(map, chip, adr, datum);
- if (ret) {
- cfi_check_err_status(map, chip, adr);
+ if (ret)
do_write_buffer_reset(map, chip, cfi);
- pr_err("MTD %s(): software timeout, address:0x%.8lx.\n",
- __func__, adr);
- }
xip_enable(map, chip, adr);
@@ -2095,7 +2102,7 @@ static int cfi_amdstd_write_buffers(struct mtd_info *mtd, loff_t to, size_t len,
struct map_info *map = mtd->priv;
struct cfi_private *cfi = map->fldrv_priv;
int wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
- int ret = 0;
+ int ret;
int chipnum;
unsigned long ofs;
@@ -2232,7 +2239,7 @@ static int do_panic_write_oneword(struct map_info *map, struct flchip *chip,
struct cfi_private *cfi = map->fldrv_priv;
int retry_cnt = 0;
map_word oldd;
- int ret = 0;
+ int ret;
int i;
adr += chip->start;
@@ -2271,9 +2278,9 @@ retry:
udelay(1);
}
- if (!chip_good(map, chip, adr, datum)) {
+ if (!chip_good(map, chip, adr, datum) ||
+ cfi_check_err_status(map, chip, adr)) {
/* reset on all failures. */
- cfi_check_err_status(map, chip, adr);
map_write(map, CMD(0xF0), chip->start);
/* FIXME - should have reset delay before continuing */
@@ -2307,7 +2314,7 @@ static int cfi_amdstd_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
struct map_info *map = mtd->priv;
struct cfi_private *cfi = map->fldrv_priv;
unsigned long ofs, chipstart;
- int ret = 0;
+ int ret;
int chipnum;
chipnum = to >> cfi->chipshift;
@@ -2411,7 +2418,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
unsigned long timeo = jiffies + HZ;
unsigned long int adr;
DECLARE_WAITQUEUE(wait, current);
- int ret = 0;
+ int ret;
int retry_cnt = 0;
adr = cfi->addr_unlock1;
@@ -2467,8 +2474,11 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
chip->erase_suspended = 0;
}
- if (chip_good(map, chip, adr, map_word_ff(map)))
+ if (chip_good(map, chip, adr, map_word_ff(map))) {
+ if (cfi_check_err_status(map, chip, adr))
+ ret = -EIO;
break;
+ }
if (time_after(jiffies, timeo)) {
printk(KERN_WARNING "MTD %s(): software timeout\n",
@@ -2483,7 +2493,6 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
/* Did we succeed? */
if (ret) {
/* reset on all failures. */
- cfi_check_err_status(map, chip, adr);
map_write(map, CMD(0xF0), chip->start);
/* FIXME - should have reset delay before continuing */
@@ -2508,7 +2517,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
struct cfi_private *cfi = map->fldrv_priv;
unsigned long timeo = jiffies + HZ;
DECLARE_WAITQUEUE(wait, current);
- int ret = 0;
+ int ret;
int retry_cnt = 0;
adr += chip->start;
@@ -2564,8 +2573,11 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
chip->erase_suspended = 0;
}
- if (chip_good(map, chip, adr, map_word_ff(map)))
+ if (chip_good(map, chip, adr, map_word_ff(map))) {
+ if (cfi_check_err_status(map, chip, adr))
+ ret = -EIO;
break;
+ }
if (time_after(jiffies, timeo)) {
printk(KERN_WARNING "MTD %s(): software timeout\n",
@@ -2580,7 +2592,6 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
/* Did we succeed? */
if (ret) {
/* reset on all failures. */
- cfi_check_err_status(map, chip, adr);
map_write(map, CMD(0xF0), chip->start);
/* FIXME - should have reset delay before continuing */
diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index e752067526a5..54edae63b92d 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -611,7 +611,7 @@ static int cfi_staa_write_buffers (struct mtd_info *mtd, loff_t to,
struct map_info *map = mtd->priv;
struct cfi_private *cfi = map->fldrv_priv;
int wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
- int ret = 0;
+ int ret;
int chipnum;
unsigned long ofs;
@@ -895,7 +895,7 @@ static int cfi_staa_erase_varsize(struct mtd_info *mtd,
{ struct map_info *map = mtd->priv;
struct cfi_private *cfi = map->fldrv_priv;
unsigned long adr, len;
- int chipnum, ret = 0;
+ int chipnum, ret;
int i, first;
struct mtd_erase_region_info *regions = mtd->eraseregions;
@@ -1132,7 +1132,7 @@ static int cfi_staa_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
struct map_info *map = mtd->priv;
struct cfi_private *cfi = map->fldrv_priv;
unsigned long adr;
- int chipnum, ret = 0;
+ int chipnum, ret;
#ifdef DEBUG_LOCK_BITS
int ofs_factor = cfi->interleave * cfi->device_type;
#endif
@@ -1279,7 +1279,7 @@ static int cfi_staa_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
struct map_info *map = mtd->priv;
struct cfi_private *cfi = map->fldrv_priv;
unsigned long adr;
- int chipnum, ret = 0;
+ int chipnum, ret;
#ifdef DEBUG_LOCK_BITS
int ofs_factor = cfi->interleave * cfi->device_type;
#endif
diff --git a/drivers/mtd/chips/cfi_util.c b/drivers/mtd/chips/cfi_util.c
index e3b266ee06af..e2d4db05aeb3 100644
--- a/drivers/mtd/chips/cfi_util.c
+++ b/drivers/mtd/chips/cfi_util.c
@@ -26,7 +26,7 @@
void cfi_udelay(int us)
{
if (us >= 1000) {
- msleep((us+999)/1000);
+ msleep(DIV_ROUND_UP(us, 1000));
} else {
udelay(us);
cond_resched();
diff --git a/drivers/mtd/devices/mchp23k256.c b/drivers/mtd/devices/mchp23k256.c
index b20d02b4f830..77c872fd3d83 100644
--- a/drivers/mtd/devices/mchp23k256.c
+++ b/drivers/mtd/devices/mchp23k256.c
@@ -64,15 +64,17 @@ static int mchp23k256_write(struct mtd_info *mtd, loff_t to, size_t len,
struct spi_transfer transfer[2] = {};
struct spi_message message;
unsigned char command[MAX_CMD_SIZE];
- int ret;
+ int ret, cmd_len;
spi_message_init(&message);
+ cmd_len = mchp23k256_cmdsz(flash);
+
command[0] = MCHP23K256_CMD_WRITE;
mchp23k256_addr2cmd(flash, to, command);
transfer[0].tx_buf = command;
- transfer[0].len = mchp23k256_cmdsz(flash);
+ transfer[0].len = cmd_len;
spi_message_add_tail(&transfer[0], &message);
transfer[1].tx_buf = buf;
@@ -88,8 +90,8 @@ static int mchp23k256_write(struct mtd_info *mtd, loff_t to, size_t len,
if (ret)
return ret;
- if (retlen && message.actual_length > sizeof(command))
- *retlen += message.actual_length - sizeof(command);
+ if (retlen && message.actual_length > cmd_len)
+ *retlen += message.actual_length - cmd_len;
return 0;
}
@@ -101,16 +103,18 @@ static int mchp23k256_read(struct mtd_info *mtd, loff_t from, size_t len,
struct spi_transfer transfer[2] = {};
struct spi_message message;
unsigned char command[MAX_CMD_SIZE];
- int ret;
+ int ret, cmd_len;
spi_message_init(&message);
+ cmd_len = mchp23k256_cmdsz(flash);
+
memset(&transfer, 0, sizeof(transfer));
command[0] = MCHP23K256_CMD_READ;
mchp23k256_addr2cmd(flash, from, command);
transfer[0].tx_buf = command;
- transfer[0].len = mchp23k256_cmdsz(flash);
+ transfer[0].len = cmd_len;
spi_message_add_tail(&transfer[0], &message);
transfer[1].rx_buf = buf;
@@ -126,8 +130,8 @@ static int mchp23k256_read(struct mtd_info *mtd, loff_t from, size_t len,
if (ret)
return ret;
- if (retlen && message.actual_length > sizeof(command))
- *retlen += message.actual_length - sizeof(command);
+ if (retlen && message.actual_length > cmd_len)
+ *retlen += message.actual_length - cmd_len;
return 0;
}
diff --git a/drivers/mtd/devices/spear_smi.c b/drivers/mtd/devices/spear_smi.c
index 986f81d2f93e..79dcca16481d 100644
--- a/drivers/mtd/devices/spear_smi.c
+++ b/drivers/mtd/devices/spear_smi.c
@@ -592,6 +592,26 @@ static int spear_mtd_read(struct mtd_info *mtd, loff_t from, size_t len,
return 0;
}
+/*
+ * The purpose of this function is to ensure a memcpy_toio() with byte writes
+ * only. Its structure is inspired from the ARM implementation of _memcpy_toio()
+ * which also does single byte writes but cannot be used here as this is just an
+ * implementation detail and not part of the API. Not mentioning the comment
+ * stating that _memcpy_toio() should be optimized.
+ */
+static void spear_smi_memcpy_toio_b(volatile void __iomem *dest,
+ const void *src, size_t len)
+{
+ const unsigned char *from = src;
+
+ while (len) {
+ len--;
+ writeb(*from, dest);
+ from++;
+ dest++;
+ }
+}
+
static inline int spear_smi_cpy_toio(struct spear_smi *dev, u32 bank,
void __iomem *dest, const void *src, size_t len)
{
@@ -614,7 +634,23 @@ static inline int spear_smi_cpy_toio(struct spear_smi *dev, u32 bank,
ctrlreg1 = readl(dev->io_base + SMI_CR1);
writel((ctrlreg1 | WB_MODE) & ~SW_MODE, dev->io_base + SMI_CR1);
- memcpy_toio(dest, src, len);
+ /*
+ * In Write Burst mode (WB_MODE), the specs states that writes must be:
+ * - incremental
+ * - of the same size
+ * The ARM implementation of memcpy_toio() will optimize the number of
+ * I/O by using as much 4-byte writes as possible, surrounded by
+ * 2-byte/1-byte access if:
+ * - the destination is not 4-byte aligned
+ * - the length is not a multiple of 4-byte.
+ * Avoid this alternance of write access size by using our own 'byte
+ * access' helper if at least one of the two conditions above is true.
+ */
+ if (IS_ALIGNED(len, sizeof(u32)) &&
+ IS_ALIGNED((uintptr_t)dest, sizeof(u32)))
+ memcpy_toio(dest, src, len);
+ else
+ spear_smi_memcpy_toio_b(dest, src, len);
writel(ctrlreg1, dev->io_base + SMI_CR1);
@@ -777,9 +813,6 @@ static int spear_smi_probe_config_dt(struct platform_device *pdev,
/* Fill structs for each subnode (flash device) */
while ((pp = of_get_next_child(np, pp))) {
- struct spear_smi_flash_info *flash_info;
-
- flash_info = &pdata->board_flash_info[i];
pdata->np[i] = pp;
/* Read base-addr and size from DT */
@@ -933,7 +966,6 @@ static int spear_smi_probe(struct platform_device *pdev)
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
ret = -ENODEV;
- dev_err(&pdev->dev, "invalid smi irq\n");
goto err;
}
diff --git a/drivers/mtd/devices/st_spi_fsm.c b/drivers/mtd/devices/st_spi_fsm.c
index f4d1667daaf9..1888523d9745 100644
--- a/drivers/mtd/devices/st_spi_fsm.c
+++ b/drivers/mtd/devices/st_spi_fsm.c
@@ -255,7 +255,6 @@ struct stfsm_seq {
struct stfsm {
struct device *dev;
void __iomem *base;
- struct resource *region;
struct mtd_info mtd;
struct mutex lock;
struct flash_info *info;
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig
index bc82305ebb4c..b28225a7c4f3 100644
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -96,6 +96,17 @@ config MTD_PHYSMAP_GEMINI
platforms, some detection and setting up parallel mode on the
external interface.
+config MTD_PHYSMAP_IXP4XX
+ bool "Intel IXP4xx OF-based physical memory map handling"
+ depends on MTD_PHYSMAP_OF
+ depends on ARM
+ select MTD_COMPLEX_MAPPINGS
+ select MTD_CFI_BE_BYTE_SWAP if CPU_BIG_ENDIAN
+ default ARCH_IXP4XX
+ help
+ This provides some extra DT physmap parsing for the Intel IXP4xx
+ platforms, some elaborate endianness handling in particular.
+
config MTD_PHYSMAP_GPIO_ADDR
bool "GPIO-assisted Flash Chip Support"
depends on MTD_PHYSMAP
diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile
index 1146009f41df..c0da86a5d26f 100644
--- a/drivers/mtd/maps/Makefile
+++ b/drivers/mtd/maps/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_MTD_PXA2XX) += pxa2xx-flash.o
physmap-objs-y += physmap-core.o
physmap-objs-$(CONFIG_MTD_PHYSMAP_VERSATILE) += physmap-versatile.o
physmap-objs-$(CONFIG_MTD_PHYSMAP_GEMINI) += physmap-gemini.o
+physmap-objs-$(CONFIG_MTD_PHYSMAP_IXP4XX) += physmap-ixp4xx.o
physmap-objs := $(physmap-objs-y)
obj-$(CONFIG_MTD_PHYSMAP) += physmap.o
obj-$(CONFIG_MTD_PISMO) += pismo.o
diff --git a/drivers/mtd/maps/l440gx.c b/drivers/mtd/maps/l440gx.c
index 876f12f40018..0eeadfeb620d 100644
--- a/drivers/mtd/maps/l440gx.c
+++ b/drivers/mtd/maps/l440gx.c
@@ -86,7 +86,7 @@ static int __init init_l440gx(void)
return -ENOMEM;
}
simple_map_init(&l440gx_map);
- printk(KERN_NOTICE "window_addr = 0x%08lx\n", (unsigned long)l440gx_map.virt);
+ pr_debug("window_addr = %p\n", l440gx_map.virt);
/* Setup the pm iobase resource
* This code should move into some kind of generic bridge
diff --git a/drivers/mtd/maps/physmap-core.c b/drivers/mtd/maps/physmap-core.c
index 21b556afc305..a9f7964e2edb 100644
--- a/drivers/mtd/maps/physmap-core.c
+++ b/drivers/mtd/maps/physmap-core.c
@@ -41,6 +41,7 @@
#include <linux/gpio/consumer.h>
#include "physmap-gemini.h"
+#include "physmap-ixp4xx.h"
#include "physmap-versatile.h"
struct physmap_flash_info {
@@ -370,6 +371,10 @@ static int physmap_flash_of_init(struct platform_device *dev)
if (err)
return err;
+ err = of_flash_probe_ixp4xx(dev, dp, &info->maps[i]);
+ if (err)
+ return err;
+
err = of_flash_probe_versatile(dev, dp, &info->maps[i]);
if (err)
return err;
diff --git a/drivers/mtd/maps/physmap-ixp4xx.c b/drivers/mtd/maps/physmap-ixp4xx.c
new file mode 100644
index 000000000000..6a054229a8a0
--- /dev/null
+++ b/drivers/mtd/maps/physmap-ixp4xx.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel IXP4xx OF physmap add-on
+ * Copyright (C) 2019 Linus Walleij <linus.walleij@linaro.org>
+ *
+ * Based on the ixp4xx.c map driver, originally written by:
+ * Intel Corporation
+ * Deepak Saxena <dsaxena@mvista.com>
+ * Copyright (C) 2002 Intel Corporation
+ * Copyright (C) 2003-2004 MontaVista Software, Inc.
+ */
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/xip.h>
+#include "physmap-ixp4xx.h"
+
+/*
+ * Read/write a 16 bit word from flash address 'addr'.
+ *
+ * When the cpu is in little-endian mode it swizzles the address lines
+ * ('address coherency') so we need to undo the swizzling to ensure commands
+ * and the like end up on the correct flash address.
+ *
+ * To further complicate matters, due to the way the expansion bus controller
+ * handles 32 bit reads, the byte stream ABCD is stored on the flash as:
+ * D15 D0
+ * +---+---+
+ * | A | B | 0
+ * +---+---+
+ * | C | D | 2
+ * +---+---+
+ * This means that on LE systems each 16 bit word must be swapped. Note that
+ * this requires CONFIG_MTD_CFI_BE_BYTE_SWAP to be enabled to 'unswap' the CFI
+ * data and other flash commands which are always in D7-D0.
+ */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+
+static inline u16 flash_read16(void __iomem *addr)
+{
+ return be16_to_cpu(__raw_readw((void __iomem *)((unsigned long)addr ^ 0x2)));
+}
+
+static inline void flash_write16(u16 d, void __iomem *addr)
+{
+ __raw_writew(cpu_to_be16(d), (void __iomem *)((unsigned long)addr ^ 0x2));
+}
+
+#define BYTE0(h) ((h) & 0xFF)
+#define BYTE1(h) (((h) >> 8) & 0xFF)
+
+#else
+
+static inline u16 flash_read16(const void __iomem *addr)
+{
+ return __raw_readw(addr);
+}
+
+static inline void flash_write16(u16 d, void __iomem *addr)
+{
+ __raw_writew(d, addr);
+}
+
+#define BYTE0(h) (((h) >> 8) & 0xFF)
+#define BYTE1(h) ((h) & 0xFF)
+#endif
+
+static map_word ixp4xx_read16(struct map_info *map, unsigned long ofs)
+{
+ map_word val;
+
+ val.x[0] = flash_read16(map->virt + ofs);
+ return val;
+}
+
+/*
+ * The IXP4xx expansion bus only allows 16-bit wide acceses
+ * when attached to a 16-bit wide device (such as the 28F128J3A),
+ * so we can't just memcpy_fromio().
+ */
+static void ixp4xx_copy_from(struct map_info *map, void *to,
+ unsigned long from, ssize_t len)
+{
+ u8 *dest = (u8 *) to;
+ void __iomem *src = map->virt + from;
+
+ if (len <= 0)
+ return;
+
+ if (from & 1) {
+ *dest++ = BYTE1(flash_read16(src-1));
+ src++;
+ --len;
+ }
+
+ while (len >= 2) {
+ u16 data = flash_read16(src);
+ *dest++ = BYTE0(data);
+ *dest++ = BYTE1(data);
+ src += 2;
+ len -= 2;
+ }
+
+ if (len > 0)
+ *dest++ = BYTE0(flash_read16(src));
+}
+
+static void ixp4xx_write16(struct map_info *map, map_word d, unsigned long adr)
+{
+ flash_write16(d.x[0], map->virt + adr);
+}
+
+int of_flash_probe_ixp4xx(struct platform_device *pdev,
+ struct device_node *np,
+ struct map_info *map)
+{
+ struct device *dev = &pdev->dev;
+
+ /* Multiplatform guard */
+ if (!of_device_is_compatible(np, "intel,ixp4xx-flash"))
+ return 0;
+
+ map->read = ixp4xx_read16;
+ map->write = ixp4xx_write16;
+ map->copy_from = ixp4xx_copy_from;
+ map->copy_to = NULL;
+
+ dev_info(dev, "initialized Intel IXP4xx-specific physmap control\n");
+
+ return 0;
+}
diff --git a/drivers/mtd/maps/physmap-ixp4xx.h b/drivers/mtd/maps/physmap-ixp4xx.h
new file mode 100644
index 000000000000..b0fc49b7f3ed
--- /dev/null
+++ b/drivers/mtd/maps/physmap-ixp4xx.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/of.h>
+#include <linux/mtd/map.h>
+
+#ifdef CONFIG_MTD_PHYSMAP_IXP4XX
+int of_flash_probe_ixp4xx(struct platform_device *pdev,
+ struct device_node *np,
+ struct map_info *map);
+#else
+static inline
+int of_flash_probe_ixp4xx(struct platform_device *pdev,
+ struct device_node *np,
+ struct map_info *map)
+{
+ return 0;
+}
+#endif
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 975aed94f06c..b841008a9eb7 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -174,7 +174,7 @@ static ssize_t mtdchar_read(struct file *file, char __user *buf, size_t count,
break;
case MTD_FILE_MODE_RAW:
{
- struct mtd_oob_ops ops;
+ struct mtd_oob_ops ops = {};
ops.mode = MTD_OPS_RAW;
ops.datbuf = kbuf;
@@ -268,7 +268,7 @@ static ssize_t mtdchar_write(struct file *file, const char __user *buf, size_t c
case MTD_FILE_MODE_RAW:
{
- struct mtd_oob_ops ops;
+ struct mtd_oob_ops ops = {};
ops.mode = MTD_OPS_RAW;
ops.datbuf = kbuf;
@@ -350,7 +350,7 @@ static int mtdchar_writeoob(struct file *file, struct mtd_info *mtd,
uint32_t __user *retp)
{
struct mtd_file_info *mfi = file->private_data;
- struct mtd_oob_ops ops;
+ struct mtd_oob_ops ops = {};
uint32_t retlen;
int ret = 0;
@@ -394,7 +394,7 @@ static int mtdchar_readoob(struct file *file, struct mtd_info *mtd,
uint32_t __user *retp)
{
struct mtd_file_info *mfi = file->private_data;
- struct mtd_oob_ops ops;
+ struct mtd_oob_ops ops = {};
int ret = 0;
if (length > 4096)
@@ -587,7 +587,7 @@ static int mtdchar_write_ioctl(struct mtd_info *mtd,
struct mtd_write_req __user *argp)
{
struct mtd_write_req req;
- struct mtd_oob_ops ops;
+ struct mtd_oob_ops ops = {};
const void __user *usr_data, *usr_oob;
int ret;
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 6cc7ecb0c788..5fac4355b9c2 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -382,33 +382,21 @@ static struct dentry *dfs_dir_mtd;
static void mtd_debugfs_populate(struct mtd_info *mtd)
{
struct device *dev = &mtd->dev;
- struct dentry *root, *dent;
+ struct dentry *root;
if (IS_ERR_OR_NULL(dfs_dir_mtd))
return;
root = debugfs_create_dir(dev_name(dev), dfs_dir_mtd);
- if (IS_ERR_OR_NULL(root)) {
- dev_dbg(dev, "won't show data in debugfs\n");
- return;
- }
-
mtd->dbg.dfs_dir = root;
- if (mtd->dbg.partid) {
- dent = debugfs_create_file("partid", 0400, root, mtd,
- &mtd_partid_debug_fops);
- if (IS_ERR_OR_NULL(dent))
- dev_err(dev, "can't create debugfs entry for partid\n");
- }
+ if (mtd->dbg.partid)
+ debugfs_create_file("partid", 0400, root, mtd,
+ &mtd_partid_debug_fops);
- if (mtd->dbg.partname) {
- dent = debugfs_create_file("partname", 0400, root, mtd,
- &mtd_partname_debug_fops);
- if (IS_ERR_OR_NULL(dent))
- dev_err(dev,
- "can't create debugfs entry for partname\n");
- }
+ if (mtd->dbg.partname)
+ debugfs_create_file("partname", 0400, root, mtd,
+ &mtd_partname_debug_fops);
}
#ifndef CONFIG_MMU
diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index f92414eb4c86..58eefa43af14 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -1257,7 +1257,6 @@ DEFINE_SHOW_ATTRIBUTE(mtdswap);
static int mtdswap_add_debugfs(struct mtdswap_dev *d)
{
struct dentry *root = d->mtd->dbg.dfs_dir;
- struct dentry *dent;
if (!IS_ENABLED(CONFIG_DEBUG_FS))
return 0;
@@ -1265,12 +1264,7 @@ static int mtdswap_add_debugfs(struct mtdswap_dev *d)
if (IS_ERR_OR_NULL(root))
return -1;
- dent = debugfs_create_file("mtdswap_stats", S_IRUSR, root, d,
- &mtdswap_fops);
- if (!dent) {
- dev_err(d->dev, "debugfs_create_file failed\n");
- return -1;
- }
+ debugfs_create_file("mtdswap_stats", S_IRUSR, root, d, &mtdswap_fops);
return 0;
}
diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig
index e59de3f60cf6..74fb91adeb46 100644
--- a/drivers/mtd/nand/raw/Kconfig
+++ b/drivers/mtd/nand/raw/Kconfig
@@ -450,6 +450,13 @@ config MTD_NAND_PLATFORM
devices. You will need to provide platform-specific functions
via platform_data.
+config MTD_NAND_CADENCE
+ tristate "Support Cadence NAND (HPNFC) controller"
+ depends on OF || COMPILE_TEST
+ help
+ Enable the driver for NAND flash on platforms using a Cadence NAND
+ controller.
+
comment "Misc"
config MTD_SM_COMMON
diff --git a/drivers/mtd/nand/raw/Makefile b/drivers/mtd/nand/raw/Makefile
index a98721988e61..2d136b158fb7 100644
--- a/drivers/mtd/nand/raw/Makefile
+++ b/drivers/mtd/nand/raw/Makefile
@@ -57,6 +57,7 @@ obj-$(CONFIG_MTD_NAND_MXIC) += mxic_nand.o
obj-$(CONFIG_MTD_NAND_TEGRA) += tegra_nand.o
obj-$(CONFIG_MTD_NAND_STM32_FMC2) += stm32_fmc2_nand.o
obj-$(CONFIG_MTD_NAND_MESON) += meson_nand.o
+obj-$(CONFIG_MTD_NAND_CADENCE) += cadence-nand-controller.o
nand-objs := nand_base.o nand_legacy.o nand_bbt.o nand_timings.o nand_ids.o
nand-objs += nand_onfi.o
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 97a97a9ccc36..e10b76089048 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -134,16 +134,15 @@ static void au_write_buf16(struct nand_chip *this, const u_char *buf, int len)
/**
* au_read_buf16 - read chip data into buffer
- * @mtd: MTD device structure
+ * @this: NAND chip object
* @buf: buffer to store date
* @len: number of bytes to read
*
* read function for 16bit buswidth
*/
-static void au_read_buf16(struct mtd_info *mtd, u_char *buf, int len)
+static void au_read_buf16(struct nand_chip *this, u_char *buf, int len)
{
int i;
- struct nand_chip *this = mtd_to_nand(mtd);
u16 *p = (u16 *) buf;
len >>= 1;
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 15ef30b368a5..1a66b1cd51c0 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -117,6 +117,18 @@ enum flash_dma_reg {
FLASH_DMA_CURRENT_DESC_EXT,
};
+/* flash_dma registers v0*/
+static const u16 flash_dma_regs_v0[] = {
+ [FLASH_DMA_REVISION] = 0x00,
+ [FLASH_DMA_FIRST_DESC] = 0x04,
+ [FLASH_DMA_CTRL] = 0x08,
+ [FLASH_DMA_MODE] = 0x0c,
+ [FLASH_DMA_STATUS] = 0x10,
+ [FLASH_DMA_INTERRUPT_DESC] = 0x14,
+ [FLASH_DMA_ERROR_STATUS] = 0x18,
+ [FLASH_DMA_CURRENT_DESC] = 0x1c,
+};
+
/* flash_dma registers v1*/
static const u16 flash_dma_regs_v1[] = {
[FLASH_DMA_REVISION] = 0x00,
@@ -597,6 +609,8 @@ static void brcmnand_flash_dma_revision_init(struct brcmnand_controller *ctrl)
/* flash_dma register offsets */
if (ctrl->nand_version >= 0x0703)
ctrl->flash_dma_offsets = flash_dma_regs_v4;
+ else if (ctrl->nand_version == 0x0602)
+ ctrl->flash_dma_offsets = flash_dma_regs_v0;
else
ctrl->flash_dma_offsets = flash_dma_regs_v1;
}
@@ -918,7 +932,7 @@ static inline void disable_ctrl_irqs(struct brcmnand_controller *ctrl)
return;
if (has_flash_dma(ctrl)) {
- ctrl->flash_dma_base = 0;
+ ctrl->flash_dma_base = NULL;
disable_irq(ctrl->dma_irq);
}
@@ -1673,8 +1687,11 @@ static void brcmnand_dma_run(struct brcmnand_host *host, dma_addr_t desc)
flash_dma_writel(ctrl, FLASH_DMA_FIRST_DESC, lower_32_bits(desc));
(void)flash_dma_readl(ctrl, FLASH_DMA_FIRST_DESC);
- flash_dma_writel(ctrl, FLASH_DMA_FIRST_DESC_EXT, upper_32_bits(desc));
- (void)flash_dma_readl(ctrl, FLASH_DMA_FIRST_DESC_EXT);
+ if (ctrl->nand_version > 0x0602) {
+ flash_dma_writel(ctrl, FLASH_DMA_FIRST_DESC_EXT,
+ upper_32_bits(desc));
+ (void)flash_dma_readl(ctrl, FLASH_DMA_FIRST_DESC_EXT);
+ }
/* Start FLASH_DMA engine */
ctrl->dma_pending = true;
diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c
new file mode 100644
index 000000000000..3a36285a8d8a
--- /dev/null
+++ b/drivers/mtd/nand/raw/cadence-nand-controller.c
@@ -0,0 +1,3030 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Cadence NAND flash controller driver
+ *
+ * Copyright (C) 2019 Cadence
+ *
+ * Author: Piotr Sroka <piotrs@cadence.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/rawnand.h>
+#include <linux/of_device.h>
+#include <linux/iopoll.h>
+
+/*
+ * HPNFC can work in 3 modes:
+ * - PIO - can work in master or slave DMA
+ * - CDMA - needs Master DMA for accessing command descriptors.
+ * - Generic mode - can use only slave DMA.
+ * CDMA and PIO modes can be used to execute only base commands.
+ * Generic mode can be used to execute any command
+ * on NAND flash memory. Driver uses CDMA mode for
+ * block erasing, page reading, page programing.
+ * Generic mode is used for executing rest of commands.
+ */
+
+#define MAX_OOB_SIZE_PER_SECTOR 32
+#define MAX_ADDRESS_CYC 6
+#define MAX_ERASE_ADDRESS_CYC 3
+#define MAX_DATA_SIZE 0xFFFC
+#define DMA_DATA_SIZE_ALIGN 8
+
+/* Register definition. */
+/*
+ * Command register 0.
+ * Writing data to this register will initiate a new transaction
+ * of the NF controller.
+ */
+#define CMD_REG0 0x0000
+/* Command type field mask. */
+#define CMD_REG0_CT GENMASK(31, 30)
+/* Command type CDMA. */
+#define CMD_REG0_CT_CDMA 0uL
+/* Command type generic. */
+#define CMD_REG0_CT_GEN 3uL
+/* Command thread number field mask. */
+#define CMD_REG0_TN GENMASK(27, 24)
+
+/* Command register 2. */
+#define CMD_REG2 0x0008
+/* Command register 3. */
+#define CMD_REG3 0x000C
+/* Pointer register to select which thread status will be selected. */
+#define CMD_STATUS_PTR 0x0010
+/* Command status register for selected thread. */
+#define CMD_STATUS 0x0014
+
+/* Interrupt status register. */
+#define INTR_STATUS 0x0110
+#define INTR_STATUS_SDMA_ERR BIT(22)
+#define INTR_STATUS_SDMA_TRIGG BIT(21)
+#define INTR_STATUS_UNSUPP_CMD BIT(19)
+#define INTR_STATUS_DDMA_TERR BIT(18)
+#define INTR_STATUS_CDMA_TERR BIT(17)
+#define INTR_STATUS_CDMA_IDL BIT(16)
+
+/* Interrupt enable register. */
+#define INTR_ENABLE 0x0114
+#define INTR_ENABLE_INTR_EN BIT(31)
+#define INTR_ENABLE_SDMA_ERR_EN BIT(22)
+#define INTR_ENABLE_SDMA_TRIGG_EN BIT(21)
+#define INTR_ENABLE_UNSUPP_CMD_EN BIT(19)
+#define INTR_ENABLE_DDMA_TERR_EN BIT(18)
+#define INTR_ENABLE_CDMA_TERR_EN BIT(17)
+#define INTR_ENABLE_CDMA_IDLE_EN BIT(16)
+
+/* Controller internal state. */
+#define CTRL_STATUS 0x0118
+#define CTRL_STATUS_INIT_COMP BIT(9)
+#define CTRL_STATUS_CTRL_BUSY BIT(8)
+
+/* Command Engine threads state. */
+#define TRD_STATUS 0x0120
+
+/* Command Engine interrupt thread error status. */
+#define TRD_ERR_INT_STATUS 0x0128
+/* Command Engine interrupt thread error enable. */
+#define TRD_ERR_INT_STATUS_EN 0x0130
+/* Command Engine interrupt thread complete status. */
+#define TRD_COMP_INT_STATUS 0x0138
+
+/*
+ * Transfer config 0 register.
+ * Configures data transfer parameters.
+ */
+#define TRAN_CFG_0 0x0400
+/* Offset value from the beginning of the page. */
+#define TRAN_CFG_0_OFFSET GENMASK(31, 16)
+/* Numbers of sectors to transfer within singlNF device's page. */
+#define TRAN_CFG_0_SEC_CNT GENMASK(7, 0)
+
+/*
+ * Transfer config 1 register.
+ * Configures data transfer parameters.
+ */
+#define TRAN_CFG_1 0x0404
+/* Size of last data sector. */
+#define TRAN_CFG_1_LAST_SEC_SIZE GENMASK(31, 16)
+/* Size of not-last data sector. */
+#define TRAN_CFG_1_SECTOR_SIZE GENMASK(15, 0)
+
+/* ECC engine configuration register 0. */
+#define ECC_CONFIG_0 0x0428
+/* Correction strength. */
+#define ECC_CONFIG_0_CORR_STR GENMASK(10, 8)
+/* Enable erased pages detection mechanism. */
+#define ECC_CONFIG_0_ERASE_DET_EN BIT(1)
+/* Enable controller ECC check bits generation and correction. */
+#define ECC_CONFIG_0_ECC_EN BIT(0)
+
+/* ECC engine configuration register 1. */
+#define ECC_CONFIG_1 0x042C
+
+/* Multiplane settings register. */
+#define MULTIPLANE_CFG 0x0434
+/* Cache operation settings. */
+#define CACHE_CFG 0x0438
+
+/* DMA settings register. */
+#define DMA_SETINGS 0x043C
+/* Enable SDMA error report on access unprepared slave DMA interface. */
+#define DMA_SETINGS_SDMA_ERR_RSP BIT(17)
+
+/* Transferred data block size for the slave DMA module. */
+#define SDMA_SIZE 0x0440
+
+/* Thread number associated with transferred data block
+ * for the slave DMA module.
+ */
+#define SDMA_TRD_NUM 0x0444
+/* Thread number mask. */
+#define SDMA_TRD_NUM_SDMA_TRD GENMASK(2, 0)
+
+#define CONTROL_DATA_CTRL 0x0494
+/* Thread number mask. */
+#define CONTROL_DATA_CTRL_SIZE GENMASK(15, 0)
+
+#define CTRL_VERSION 0x800
+#define CTRL_VERSION_REV GENMASK(7, 0)
+
+/* Available hardware features of the controller. */
+#define CTRL_FEATURES 0x804
+/* Support for NV-DDR2/3 work mode. */
+#define CTRL_FEATURES_NVDDR_2_3 BIT(28)
+/* Support for NV-DDR work mode. */
+#define CTRL_FEATURES_NVDDR BIT(27)
+/* Support for asynchronous work mode. */
+#define CTRL_FEATURES_ASYNC BIT(26)
+/* Support for asynchronous work mode. */
+#define CTRL_FEATURES_N_BANKS GENMASK(25, 24)
+/* Slave and Master DMA data width. */
+#define CTRL_FEATURES_DMA_DWITH64 BIT(21)
+/* Availability of Control Data feature.*/
+#define CTRL_FEATURES_CONTROL_DATA BIT(10)
+
+/* BCH Engine identification register 0 - correction strengths. */
+#define BCH_CFG_0 0x838
+#define BCH_CFG_0_CORR_CAP_0 GENMASK(7, 0)
+#define BCH_CFG_0_CORR_CAP_1 GENMASK(15, 8)
+#define BCH_CFG_0_CORR_CAP_2 GENMASK(23, 16)
+#define BCH_CFG_0_CORR_CAP_3 GENMASK(31, 24)
+
+/* BCH Engine identification register 1 - correction strengths. */
+#define BCH_CFG_1 0x83C
+#define BCH_CFG_1_CORR_CAP_4 GENMASK(7, 0)
+#define BCH_CFG_1_CORR_CAP_5 GENMASK(15, 8)
+#define BCH_CFG_1_CORR_CAP_6 GENMASK(23, 16)
+#define BCH_CFG_1_CORR_CAP_7 GENMASK(31, 24)
+
+/* BCH Engine identification register 2 - sector sizes. */
+#define BCH_CFG_2 0x840
+#define BCH_CFG_2_SECT_0 GENMASK(15, 0)
+#define BCH_CFG_2_SECT_1 GENMASK(31, 16)
+
+/* BCH Engine identification register 3. */
+#define BCH_CFG_3 0x844
+
+/* Ready/Busy# line status. */
+#define RBN_SETINGS 0x1004
+
+/* Common settings. */
+#define COMMON_SET 0x1008
+/* 16 bit device connected to the NAND Flash interface. */
+#define COMMON_SET_DEVICE_16BIT BIT(8)
+
+/* Skip_bytes registers. */
+#define SKIP_BYTES_CONF 0x100C
+#define SKIP_BYTES_MARKER_VALUE GENMASK(31, 16)
+#define SKIP_BYTES_NUM_OF_BYTES GENMASK(7, 0)
+
+#define SKIP_BYTES_OFFSET 0x1010
+#define SKIP_BYTES_OFFSET_VALUE GENMASK(23, 0)
+
+/* Timings configuration. */
+#define ASYNC_TOGGLE_TIMINGS 0x101c
+#define ASYNC_TOGGLE_TIMINGS_TRH GENMASK(28, 24)
+#define ASYNC_TOGGLE_TIMINGS_TRP GENMASK(20, 16)
+#define ASYNC_TOGGLE_TIMINGS_TWH GENMASK(12, 8)
+#define ASYNC_TOGGLE_TIMINGS_TWP GENMASK(4, 0)
+
+#define TIMINGS0 0x1024
+#define TIMINGS0_TADL GENMASK(31, 24)
+#define TIMINGS0_TCCS GENMASK(23, 16)
+#define TIMINGS0_TWHR GENMASK(15, 8)
+#define TIMINGS0_TRHW GENMASK(7, 0)
+
+#define TIMINGS1 0x1028
+#define TIMINGS1_TRHZ GENMASK(31, 24)
+#define TIMINGS1_TWB GENMASK(23, 16)
+#define TIMINGS1_TVDLY GENMASK(7, 0)
+
+#define TIMINGS2 0x102c
+#define TIMINGS2_TFEAT GENMASK(25, 16)
+#define TIMINGS2_CS_HOLD_TIME GENMASK(13, 8)
+#define TIMINGS2_CS_SETUP_TIME GENMASK(5, 0)
+
+/* Configuration of the resynchronization of slave DLL of PHY. */
+#define DLL_PHY_CTRL 0x1034
+#define DLL_PHY_CTRL_DLL_RST_N BIT(24)
+#define DLL_PHY_CTRL_EXTENDED_WR_MODE BIT(17)
+#define DLL_PHY_CTRL_EXTENDED_RD_MODE BIT(16)
+#define DLL_PHY_CTRL_RS_HIGH_WAIT_CNT GENMASK(11, 8)
+#define DLL_PHY_CTRL_RS_IDLE_CNT GENMASK(7, 0)
+
+/* Register controlling DQ related timing. */
+#define PHY_DQ_TIMING 0x2000
+/* Register controlling DSQ related timing. */
+#define PHY_DQS_TIMING 0x2004
+#define PHY_DQS_TIMING_DQS_SEL_OE_END GENMASK(3, 0)
+#define PHY_DQS_TIMING_PHONY_DQS_SEL BIT(16)
+#define PHY_DQS_TIMING_USE_PHONY_DQS BIT(20)
+
+/* Register controlling the gate and loopback control related timing. */
+#define PHY_GATE_LPBK_CTRL 0x2008
+#define PHY_GATE_LPBK_CTRL_RDS GENMASK(24, 19)
+
+/* Register holds the control for the master DLL logic. */
+#define PHY_DLL_MASTER_CTRL 0x200C
+#define PHY_DLL_MASTER_CTRL_BYPASS_MODE BIT(23)
+
+/* Register holds the control for the slave DLL logic. */
+#define PHY_DLL_SLAVE_CTRL 0x2010
+
+/* This register handles the global control settings for the PHY. */
+#define PHY_CTRL 0x2080
+#define PHY_CTRL_SDR_DQS BIT(14)
+#define PHY_CTRL_PHONY_DQS GENMASK(9, 4)
+
+/*
+ * This register handles the global control settings
+ * for the termination selects for reads.
+ */
+#define PHY_TSEL 0x2084
+
+/* Generic command layout. */
+#define GCMD_LAY_CS GENMASK_ULL(11, 8)
+/*
+ * This bit informs the minicotroller if it has to wait for tWB
+ * after sending the last CMD/ADDR/DATA in the sequence.
+ */
+#define GCMD_LAY_TWB BIT_ULL(6)
+/* Type of generic instruction. */
+#define GCMD_LAY_INSTR GENMASK_ULL(5, 0)
+
+/* Generic CMD sequence type. */
+#define GCMD_LAY_INSTR_CMD 0
+/* Generic ADDR sequence type. */
+#define GCMD_LAY_INSTR_ADDR 1
+/* Generic data transfer sequence type. */
+#define GCMD_LAY_INSTR_DATA 2
+
+/* Input part of generic command type of input is command. */
+#define GCMD_LAY_INPUT_CMD GENMASK_ULL(23, 16)
+
+/* Generic command address sequence - address fields. */
+#define GCMD_LAY_INPUT_ADDR GENMASK_ULL(63, 16)
+/* Generic command address sequence - address size. */
+#define GCMD_LAY_INPUT_ADDR_SIZE GENMASK_ULL(13, 11)
+
+/* Transfer direction field of generic command data sequence. */
+#define GCMD_DIR BIT_ULL(11)
+/* Read transfer direction of generic command data sequence. */
+#define GCMD_DIR_READ 0
+/* Write transfer direction of generic command data sequence. */
+#define GCMD_DIR_WRITE 1
+
+/* ECC enabled flag of generic command data sequence - ECC enabled. */
+#define GCMD_ECC_EN BIT_ULL(12)
+/* Generic command data sequence - sector size. */
+#define GCMD_SECT_SIZE GENMASK_ULL(31, 16)
+/* Generic command data sequence - sector count. */
+#define GCMD_SECT_CNT GENMASK_ULL(39, 32)
+/* Generic command data sequence - last sector size. */
+#define GCMD_LAST_SIZE GENMASK_ULL(55, 40)
+
+/* CDMA descriptor fields. */
+/* Erase command type of CDMA descriptor. */
+#define CDMA_CT_ERASE 0x1000
+/* Program page command type of CDMA descriptor. */
+#define CDMA_CT_WR 0x2100
+/* Read page command type of CDMA descriptor. */
+#define CDMA_CT_RD 0x2200
+
+/* Flash pointer memory shift. */
+#define CDMA_CFPTR_MEM_SHIFT 24
+/* Flash pointer memory mask. */
+#define CDMA_CFPTR_MEM GENMASK(26, 24)
+
+/*
+ * Command DMA descriptor flags. If set causes issue interrupt after
+ * the completion of descriptor processing.
+ */
+#define CDMA_CF_INT BIT(8)
+/*
+ * Command DMA descriptor flags - the next descriptor
+ * address field is valid and descriptor processing should continue.
+ */
+#define CDMA_CF_CONT BIT(9)
+/* DMA master flag of command DMA descriptor. */
+#define CDMA_CF_DMA_MASTER BIT(10)
+
+/* Operation complete status of command descriptor. */
+#define CDMA_CS_COMP BIT(15)
+/* Operation complete status of command descriptor. */
+/* Command descriptor status - operation fail. */
+#define CDMA_CS_FAIL BIT(14)
+/* Command descriptor status - page erased. */
+#define CDMA_CS_ERP BIT(11)
+/* Command descriptor status - timeout occurred. */
+#define CDMA_CS_TOUT BIT(10)
+/*
+ * Maximum amount of correction applied to one ECC sector.
+ * It is part of command descriptor status.
+ */
+#define CDMA_CS_MAXERR GENMASK(9, 2)
+/* Command descriptor status - uncorrectable ECC error. */
+#define CDMA_CS_UNCE BIT(1)
+/* Command descriptor status - descriptor error. */
+#define CDMA_CS_ERR BIT(0)
+
+/* Status of operation - OK. */
+#define STAT_OK 0
+/* Status of operation - FAIL. */
+#define STAT_FAIL 2
+/* Status of operation - uncorrectable ECC error. */
+#define STAT_ECC_UNCORR 3
+/* Status of operation - page erased. */
+#define STAT_ERASED 5
+/* Status of operation - correctable ECC error. */
+#define STAT_ECC_CORR 6
+/* Status of operation - unsuspected state. */
+#define STAT_UNKNOWN 7
+/* Status of operation - operation is not completed yet. */
+#define STAT_BUSY 0xFF
+
+#define BCH_MAX_NUM_CORR_CAPS 8
+#define BCH_MAX_NUM_SECTOR_SIZES 2
+
+struct cadence_nand_timings {
+ u32 async_toggle_timings;
+ u32 timings0;
+ u32 timings1;
+ u32 timings2;
+ u32 dll_phy_ctrl;
+ u32 phy_ctrl;
+ u32 phy_dqs_timing;
+ u32 phy_gate_lpbk_ctrl;
+};
+
+/* Command DMA descriptor. */
+struct cadence_nand_cdma_desc {
+ /* Next descriptor address. */
+ u64 next_pointer;
+
+ /* Flash address is a 32-bit address comprising of BANK and ROW ADDR. */
+ u32 flash_pointer;
+ /*field appears in HPNFC version 13*/
+ u16 bank;
+ u16 rsvd0;
+
+ /* Operation the controller needs to perform. */
+ u16 command_type;
+ u16 rsvd1;
+ /* Flags for operation of this command. */
+ u16 command_flags;
+ u16 rsvd2;
+
+ /* System/host memory address required for data DMA commands. */
+ u64 memory_pointer;
+
+ /* Status of operation. */
+ u32 status;
+ u32 rsvd3;
+
+ /* Address pointer to sync buffer location. */
+ u64 sync_flag_pointer;
+
+ /* Controls the buffer sync mechanism. */
+ u32 sync_arguments;
+ u32 rsvd4;
+
+ /* Control data pointer. */
+ u64 ctrl_data_ptr;
+};
+
+/* Interrupt status. */
+struct cadence_nand_irq_status {
+ /* Thread operation complete status. */
+ u32 trd_status;
+ /* Thread operation error. */
+ u32 trd_error;
+ /* Controller status. */
+ u32 status;
+};
+
+/* Cadence NAND flash controller capabilities get from driver data. */
+struct cadence_nand_dt_devdata {
+ /* Skew value of the output signals of the NAND Flash interface. */
+ u32 if_skew;
+ /* It informs if slave DMA interface is connected to DMA engine. */
+ unsigned int has_dma:1;
+};
+
+/* Cadence NAND flash controller capabilities read from registers. */
+struct cdns_nand_caps {
+ /* Maximum number of banks supported by hardware. */
+ u8 max_banks;
+ /* Slave and Master DMA data width in bytes (4 or 8). */
+ u8 data_dma_width;
+ /* Control Data feature supported. */
+ bool data_control_supp;
+ /* Is PHY type DLL. */
+ bool is_phy_type_dll;
+};
+
+struct cdns_nand_ctrl {
+ struct device *dev;
+ struct nand_controller controller;
+ struct cadence_nand_cdma_desc *cdma_desc;
+ /* IP capability. */
+ const struct cadence_nand_dt_devdata *caps1;
+ struct cdns_nand_caps caps2;
+ u8 ctrl_rev;
+ dma_addr_t dma_cdma_desc;
+ u8 *buf;
+ u32 buf_size;
+ u8 curr_corr_str_idx;
+
+ /* Register interface. */
+ void __iomem *reg;
+
+ struct {
+ void __iomem *virt;
+ dma_addr_t dma;
+ } io;
+
+ int irq;
+ /* Interrupts that have happened. */
+ struct cadence_nand_irq_status irq_status;
+ /* Interrupts we are waiting for. */
+ struct cadence_nand_irq_status irq_mask;
+ struct completion complete;
+ /* Protect irq_mask and irq_status. */
+ spinlock_t irq_lock;
+
+ int ecc_strengths[BCH_MAX_NUM_CORR_CAPS];
+ struct nand_ecc_step_info ecc_stepinfos[BCH_MAX_NUM_SECTOR_SIZES];
+ struct nand_ecc_caps ecc_caps;
+
+ int curr_trans_type;
+
+ struct dma_chan *dmac;
+
+ u32 nf_clk_rate;
+ /*
+ * Estimated Board delay. The value includes the total
+ * round trip delay for the signals and is used for deciding on values
+ * associated with data read capture.
+ */
+ u32 board_delay;
+
+ struct nand_chip *selected_chip;
+
+ unsigned long assigned_cs;
+ struct list_head chips;
+};
+
+struct cdns_nand_chip {
+ struct cadence_nand_timings timings;
+ struct nand_chip chip;
+ u8 nsels;
+ struct list_head node;
+
+ /*
+ * part of oob area of NAND flash memory page.
+ * This part is available for user to read or write.
+ */
+ u32 avail_oob_size;
+
+ /* Sector size. There are few sectors per mtd->writesize */
+ u32 sector_size;
+ u32 sector_count;
+
+ /* Offset of BBM. */
+ u8 bbm_offs;
+ /* Number of bytes reserved for BBM. */
+ u8 bbm_len;
+ /* ECC strength index. */
+ u8 corr_str_idx;
+
+ u8 cs[];
+};
+
+struct ecc_info {
+ int (*calc_ecc_bytes)(int step_size, int strength);
+ int max_step_size;
+};
+
+static inline struct
+cdns_nand_chip *to_cdns_nand_chip(struct nand_chip *chip)
+{
+ return container_of(chip, struct cdns_nand_chip, chip);
+}
+
+static inline struct
+cdns_nand_ctrl *to_cdns_nand_ctrl(struct nand_controller *controller)
+{
+ return container_of(controller, struct cdns_nand_ctrl, controller);
+}
+
+static bool
+cadence_nand_dma_buf_ok(struct cdns_nand_ctrl *cdns_ctrl, const void *buf,
+ u32 buf_len)
+{
+ u8 data_dma_width = cdns_ctrl->caps2.data_dma_width;
+
+ return buf && virt_addr_valid(buf) &&
+ likely(IS_ALIGNED((uintptr_t)buf, data_dma_width)) &&
+ likely(IS_ALIGNED(buf_len, DMA_DATA_SIZE_ALIGN));
+}
+
+static int cadence_nand_wait_for_value(struct cdns_nand_ctrl *cdns_ctrl,
+ u32 reg_offset, u32 timeout_us,
+ u32 mask, bool is_clear)
+{
+ u32 val;
+ int ret;
+
+ ret = readl_relaxed_poll_timeout(cdns_ctrl->reg + reg_offset,
+ val, !(val & mask) == is_clear,
+ 10, timeout_us);
+
+ if (ret < 0) {
+ dev_err(cdns_ctrl->dev,
+ "Timeout while waiting for reg %x with mask %x is clear %d\n",
+ reg_offset, mask, is_clear);
+ }
+
+ return ret;
+}
+
+static int cadence_nand_set_ecc_enable(struct cdns_nand_ctrl *cdns_ctrl,
+ bool enable)
+{
+ u32 reg;
+
+ if (cadence_nand_wait_for_value(cdns_ctrl, CTRL_STATUS,
+ 1000000,
+ CTRL_STATUS_CTRL_BUSY, true))
+ return -ETIMEDOUT;
+
+ reg = readl_relaxed(cdns_ctrl->reg + ECC_CONFIG_0);
+
+ if (enable)
+ reg |= ECC_CONFIG_0_ECC_EN;
+ else
+ reg &= ~ECC_CONFIG_0_ECC_EN;
+
+ writel_relaxed(reg, cdns_ctrl->reg + ECC_CONFIG_0);
+
+ return 0;
+}
+
+static void cadence_nand_set_ecc_strength(struct cdns_nand_ctrl *cdns_ctrl,
+ u8 corr_str_idx)
+{
+ u32 reg;
+
+ if (cdns_ctrl->curr_corr_str_idx == corr_str_idx)
+ return;
+
+ reg = readl_relaxed(cdns_ctrl->reg + ECC_CONFIG_0);
+ reg &= ~ECC_CONFIG_0_CORR_STR;
+ reg |= FIELD_PREP(ECC_CONFIG_0_CORR_STR, corr_str_idx);
+ writel_relaxed(reg, cdns_ctrl->reg + ECC_CONFIG_0);
+
+ cdns_ctrl->curr_corr_str_idx = corr_str_idx;
+}
+
+static int cadence_nand_get_ecc_strength_idx(struct cdns_nand_ctrl *cdns_ctrl,
+ u8 strength)
+{
+ int i, corr_str_idx = -1;
+
+ for (i = 0; i < BCH_MAX_NUM_CORR_CAPS; i++) {
+ if (cdns_ctrl->ecc_strengths[i] == strength) {
+ corr_str_idx = i;
+ break;
+ }
+ }
+
+ return corr_str_idx;
+}
+
+static int cadence_nand_set_skip_marker_val(struct cdns_nand_ctrl *cdns_ctrl,
+ u16 marker_value)
+{
+ u32 reg;
+
+ if (cadence_nand_wait_for_value(cdns_ctrl, CTRL_STATUS,
+ 1000000,
+ CTRL_STATUS_CTRL_BUSY, true))
+ return -ETIMEDOUT;
+
+ reg = readl_relaxed(cdns_ctrl->reg + SKIP_BYTES_CONF);
+ reg &= ~SKIP_BYTES_MARKER_VALUE;
+ reg |= FIELD_PREP(SKIP_BYTES_MARKER_VALUE,
+ marker_value);
+
+ writel_relaxed(reg, cdns_ctrl->reg + SKIP_BYTES_CONF);
+
+ return 0;
+}
+
+static int cadence_nand_set_skip_bytes_conf(struct cdns_nand_ctrl *cdns_ctrl,
+ u8 num_of_bytes,
+ u32 offset_value,
+ int enable)
+{
+ u32 reg, skip_bytes_offset;
+
+ if (cadence_nand_wait_for_value(cdns_ctrl, CTRL_STATUS,
+ 1000000,
+ CTRL_STATUS_CTRL_BUSY, true))
+ return -ETIMEDOUT;
+
+ if (!enable) {
+ num_of_bytes = 0;
+ offset_value = 0;
+ }
+
+ reg = readl_relaxed(cdns_ctrl->reg + SKIP_BYTES_CONF);
+ reg &= ~SKIP_BYTES_NUM_OF_BYTES;
+ reg |= FIELD_PREP(SKIP_BYTES_NUM_OF_BYTES,
+ num_of_bytes);
+ skip_bytes_offset = FIELD_PREP(SKIP_BYTES_OFFSET_VALUE,
+ offset_value);
+
+ writel_relaxed(reg, cdns_ctrl->reg + SKIP_BYTES_CONF);
+ writel_relaxed(skip_bytes_offset, cdns_ctrl->reg + SKIP_BYTES_OFFSET);
+
+ return 0;
+}
+
+/* Functions enables/disables hardware detection of erased data */
+static void cadence_nand_set_erase_detection(struct cdns_nand_ctrl *cdns_ctrl,
+ bool enable,
+ u8 bitflips_threshold)
+{
+ u32 reg;
+
+ reg = readl_relaxed(cdns_ctrl->reg + ECC_CONFIG_0);
+
+ if (enable)
+ reg |= ECC_CONFIG_0_ERASE_DET_EN;
+ else
+ reg &= ~ECC_CONFIG_0_ERASE_DET_EN;
+
+ writel_relaxed(reg, cdns_ctrl->reg + ECC_CONFIG_0);
+ writel_relaxed(bitflips_threshold, cdns_ctrl->reg + ECC_CONFIG_1);
+}
+
+static int cadence_nand_set_access_width16(struct cdns_nand_ctrl *cdns_ctrl,
+ bool bit_bus16)
+{
+ u32 reg;
+
+ if (cadence_nand_wait_for_value(cdns_ctrl, CTRL_STATUS,
+ 1000000,
+ CTRL_STATUS_CTRL_BUSY, true))
+ return -ETIMEDOUT;
+
+ reg = readl_relaxed(cdns_ctrl->reg + COMMON_SET);
+
+ if (!bit_bus16)
+ reg &= ~COMMON_SET_DEVICE_16BIT;
+ else
+ reg |= COMMON_SET_DEVICE_16BIT;
+ writel_relaxed(reg, cdns_ctrl->reg + COMMON_SET);
+
+ return 0;
+}
+
+static void
+cadence_nand_clear_interrupt(struct cdns_nand_ctrl *cdns_ctrl,
+ struct cadence_nand_irq_status *irq_status)
+{
+ writel_relaxed(irq_status->status, cdns_ctrl->reg + INTR_STATUS);
+ writel_relaxed(irq_status->trd_status,
+ cdns_ctrl->reg + TRD_COMP_INT_STATUS);
+ writel_relaxed(irq_status->trd_error,
+ cdns_ctrl->reg + TRD_ERR_INT_STATUS);
+}
+
+static void
+cadence_nand_read_int_status(struct cdns_nand_ctrl *cdns_ctrl,
+ struct cadence_nand_irq_status *irq_status)
+{
+ irq_status->status = readl_relaxed(cdns_ctrl->reg + INTR_STATUS);
+ irq_status->trd_status = readl_relaxed(cdns_ctrl->reg
+ + TRD_COMP_INT_STATUS);
+ irq_status->trd_error = readl_relaxed(cdns_ctrl->reg
+ + TRD_ERR_INT_STATUS);
+}
+
+static u32 irq_detected(struct cdns_nand_ctrl *cdns_ctrl,
+ struct cadence_nand_irq_status *irq_status)
+{
+ cadence_nand_read_int_status(cdns_ctrl, irq_status);
+
+ return irq_status->status || irq_status->trd_status ||
+ irq_status->trd_error;
+}
+
+static void cadence_nand_reset_irq(struct cdns_nand_ctrl *cdns_ctrl)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cdns_ctrl->irq_lock, flags);
+ memset(&cdns_ctrl->irq_status, 0, sizeof(cdns_ctrl->irq_status));
+ memset(&cdns_ctrl->irq_mask, 0, sizeof(cdns_ctrl->irq_mask));
+ spin_unlock_irqrestore(&cdns_ctrl->irq_lock, flags);
+}
+
+/*
+ * This is the interrupt service routine. It handles all interrupts
+ * sent to this device.
+ */
+static irqreturn_t cadence_nand_isr(int irq, void *dev_id)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = dev_id;
+ struct cadence_nand_irq_status irq_status;
+ irqreturn_t result = IRQ_NONE;
+
+ spin_lock(&cdns_ctrl->irq_lock);
+
+ if (irq_detected(cdns_ctrl, &irq_status)) {
+ /* Handle interrupt. */
+ /* First acknowledge it. */
+ cadence_nand_clear_interrupt(cdns_ctrl, &irq_status);
+ /* Status in the device context for someone to read. */
+ cdns_ctrl->irq_status.status |= irq_status.status;
+ cdns_ctrl->irq_status.trd_status |= irq_status.trd_status;
+ cdns_ctrl->irq_status.trd_error |= irq_status.trd_error;
+ /* Notify anyone who cares that it happened. */
+ complete(&cdns_ctrl->complete);
+ /* Tell the OS that we've handled this. */
+ result = IRQ_HANDLED;
+ }
+ spin_unlock(&cdns_ctrl->irq_lock);
+
+ return result;
+}
+
+static void cadence_nand_set_irq_mask(struct cdns_nand_ctrl *cdns_ctrl,
+ struct cadence_nand_irq_status *irq_mask)
+{
+ writel_relaxed(INTR_ENABLE_INTR_EN | irq_mask->status,
+ cdns_ctrl->reg + INTR_ENABLE);
+
+ writel_relaxed(irq_mask->trd_error,
+ cdns_ctrl->reg + TRD_ERR_INT_STATUS_EN);
+}
+
+static void
+cadence_nand_wait_for_irq(struct cdns_nand_ctrl *cdns_ctrl,
+ struct cadence_nand_irq_status *irq_mask,
+ struct cadence_nand_irq_status *irq_status)
+{
+ unsigned long timeout = msecs_to_jiffies(10000);
+ unsigned long time_left;
+
+ time_left = wait_for_completion_timeout(&cdns_ctrl->complete,
+ timeout);
+
+ *irq_status = cdns_ctrl->irq_status;
+ if (time_left == 0) {
+ /* Timeout error. */
+ dev_err(cdns_ctrl->dev, "timeout occurred:\n");
+ dev_err(cdns_ctrl->dev, "\tstatus = 0x%x, mask = 0x%x\n",
+ irq_status->status, irq_mask->status);
+ dev_err(cdns_ctrl->dev,
+ "\ttrd_status = 0x%x, trd_status mask = 0x%x\n",
+ irq_status->trd_status, irq_mask->trd_status);
+ dev_err(cdns_ctrl->dev,
+ "\t trd_error = 0x%x, trd_error mask = 0x%x\n",
+ irq_status->trd_error, irq_mask->trd_error);
+ }
+}
+
+/* Execute generic command on NAND controller. */
+static int cadence_nand_generic_cmd_send(struct cdns_nand_ctrl *cdns_ctrl,
+ u8 chip_nr,
+ u64 mini_ctrl_cmd)
+{
+ u32 mini_ctrl_cmd_l, mini_ctrl_cmd_h, reg;
+
+ mini_ctrl_cmd |= FIELD_PREP(GCMD_LAY_CS, chip_nr);
+ mini_ctrl_cmd_l = mini_ctrl_cmd & 0xFFFFFFFF;
+ mini_ctrl_cmd_h = mini_ctrl_cmd >> 32;
+
+ if (cadence_nand_wait_for_value(cdns_ctrl, CTRL_STATUS,
+ 1000000,
+ CTRL_STATUS_CTRL_BUSY, true))
+ return -ETIMEDOUT;
+
+ cadence_nand_reset_irq(cdns_ctrl);
+
+ writel_relaxed(mini_ctrl_cmd_l, cdns_ctrl->reg + CMD_REG2);
+ writel_relaxed(mini_ctrl_cmd_h, cdns_ctrl->reg + CMD_REG3);
+
+ /* Select generic command. */
+ reg = FIELD_PREP(CMD_REG0_CT, CMD_REG0_CT_GEN);
+ /* Thread number. */
+ reg |= FIELD_PREP(CMD_REG0_TN, 0);
+
+ /* Issue command. */
+ writel_relaxed(reg, cdns_ctrl->reg + CMD_REG0);
+
+ return 0;
+}
+
+/* Wait for data on slave DMA interface. */
+static int cadence_nand_wait_on_sdma(struct cdns_nand_ctrl *cdns_ctrl,
+ u8 *out_sdma_trd,
+ u32 *out_sdma_size)
+{
+ struct cadence_nand_irq_status irq_mask, irq_status;
+
+ irq_mask.trd_status = 0;
+ irq_mask.trd_error = 0;
+ irq_mask.status = INTR_STATUS_SDMA_TRIGG
+ | INTR_STATUS_SDMA_ERR
+ | INTR_STATUS_UNSUPP_CMD;
+
+ cadence_nand_set_irq_mask(cdns_ctrl, &irq_mask);
+ cadence_nand_wait_for_irq(cdns_ctrl, &irq_mask, &irq_status);
+ if (irq_status.status == 0) {
+ dev_err(cdns_ctrl->dev, "Timeout while waiting for SDMA\n");
+ return -ETIMEDOUT;
+ }
+
+ if (irq_status.status & INTR_STATUS_SDMA_TRIGG) {
+ *out_sdma_size = readl_relaxed(cdns_ctrl->reg + SDMA_SIZE);
+ *out_sdma_trd = readl_relaxed(cdns_ctrl->reg + SDMA_TRD_NUM);
+ *out_sdma_trd =
+ FIELD_GET(SDMA_TRD_NUM_SDMA_TRD, *out_sdma_trd);
+ } else {
+ dev_err(cdns_ctrl->dev, "SDMA error - irq_status %x\n",
+ irq_status.status);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static void cadence_nand_get_caps(struct cdns_nand_ctrl *cdns_ctrl)
+{
+ u32 reg;
+
+ reg = readl_relaxed(cdns_ctrl->reg + CTRL_FEATURES);
+
+ cdns_ctrl->caps2.max_banks = 1 << FIELD_GET(CTRL_FEATURES_N_BANKS, reg);
+
+ if (FIELD_GET(CTRL_FEATURES_DMA_DWITH64, reg))
+ cdns_ctrl->caps2.data_dma_width = 8;
+ else
+ cdns_ctrl->caps2.data_dma_width = 4;
+
+ if (reg & CTRL_FEATURES_CONTROL_DATA)
+ cdns_ctrl->caps2.data_control_supp = true;
+
+ if (reg & (CTRL_FEATURES_NVDDR_2_3
+ | CTRL_FEATURES_NVDDR))
+ cdns_ctrl->caps2.is_phy_type_dll = true;
+}
+
+/* Prepare CDMA descriptor. */
+static void
+cadence_nand_cdma_desc_prepare(struct cdns_nand_ctrl *cdns_ctrl,
+ char nf_mem, u32 flash_ptr, char *mem_ptr,
+ char *ctrl_data_ptr, u16 ctype)
+{
+ struct cadence_nand_cdma_desc *cdma_desc = cdns_ctrl->cdma_desc;
+
+ memset(cdma_desc, 0, sizeof(struct cadence_nand_cdma_desc));
+
+ /* Set fields for one descriptor. */
+ cdma_desc->flash_pointer = flash_ptr;
+ if (cdns_ctrl->ctrl_rev >= 13)
+ cdma_desc->bank = nf_mem;
+ else
+ cdma_desc->flash_pointer |= (nf_mem << CDMA_CFPTR_MEM_SHIFT);
+
+ cdma_desc->command_flags |= CDMA_CF_DMA_MASTER;
+ cdma_desc->command_flags |= CDMA_CF_INT;
+
+ cdma_desc->memory_pointer = (uintptr_t)mem_ptr;
+ cdma_desc->status = 0;
+ cdma_desc->sync_flag_pointer = 0;
+ cdma_desc->sync_arguments = 0;
+
+ cdma_desc->command_type = ctype;
+ cdma_desc->ctrl_data_ptr = (uintptr_t)ctrl_data_ptr;
+}
+
+static u8 cadence_nand_check_desc_error(struct cdns_nand_ctrl *cdns_ctrl,
+ u32 desc_status)
+{
+ if (desc_status & CDMA_CS_ERP)
+ return STAT_ERASED;
+
+ if (desc_status & CDMA_CS_UNCE)
+ return STAT_ECC_UNCORR;
+
+ if (desc_status & CDMA_CS_ERR) {
+ dev_err(cdns_ctrl->dev, ":CDMA desc error flag detected.\n");
+ return STAT_FAIL;
+ }
+
+ if (FIELD_GET(CDMA_CS_MAXERR, desc_status))
+ return STAT_ECC_CORR;
+
+ return STAT_FAIL;
+}
+
+static int cadence_nand_cdma_finish(struct cdns_nand_ctrl *cdns_ctrl)
+{
+ struct cadence_nand_cdma_desc *desc_ptr = cdns_ctrl->cdma_desc;
+ u8 status = STAT_BUSY;
+
+ if (desc_ptr->status & CDMA_CS_FAIL) {
+ status = cadence_nand_check_desc_error(cdns_ctrl,
+ desc_ptr->status);
+ dev_err(cdns_ctrl->dev, ":CDMA error %x\n", desc_ptr->status);
+ } else if (desc_ptr->status & CDMA_CS_COMP) {
+ /* Descriptor finished with no errors. */
+ if (desc_ptr->command_flags & CDMA_CF_CONT) {
+ dev_info(cdns_ctrl->dev, "DMA unsupported flag is set");
+ status = STAT_UNKNOWN;
+ } else {
+ /* Last descriptor. */
+ status = STAT_OK;
+ }
+ }
+
+ return status;
+}
+
+static int cadence_nand_cdma_send(struct cdns_nand_ctrl *cdns_ctrl,
+ u8 thread)
+{
+ u32 reg;
+ int status;
+
+ /* Wait for thread ready. */
+ status = cadence_nand_wait_for_value(cdns_ctrl, TRD_STATUS,
+ 1000000,
+ BIT(thread), true);
+ if (status)
+ return status;
+
+ cadence_nand_reset_irq(cdns_ctrl);
+
+ writel_relaxed((u32)cdns_ctrl->dma_cdma_desc,
+ cdns_ctrl->reg + CMD_REG2);
+ writel_relaxed(0, cdns_ctrl->reg + CMD_REG3);
+
+ /* Select CDMA mode. */
+ reg = FIELD_PREP(CMD_REG0_CT, CMD_REG0_CT_CDMA);
+ /* Thread number. */
+ reg |= FIELD_PREP(CMD_REG0_TN, thread);
+ /* Issue command. */
+ writel_relaxed(reg, cdns_ctrl->reg + CMD_REG0);
+
+ return 0;
+}
+
+/* Send SDMA command and wait for finish. */
+static u32
+cadence_nand_cdma_send_and_wait(struct cdns_nand_ctrl *cdns_ctrl,
+ u8 thread)
+{
+ struct cadence_nand_irq_status irq_mask, irq_status = {0};
+ int status;
+
+ irq_mask.trd_status = BIT(thread);
+ irq_mask.trd_error = BIT(thread);
+ irq_mask.status = INTR_STATUS_CDMA_TERR;
+
+ cadence_nand_set_irq_mask(cdns_ctrl, &irq_mask);
+
+ status = cadence_nand_cdma_send(cdns_ctrl, thread);
+ if (status)
+ return status;
+
+ cadence_nand_wait_for_irq(cdns_ctrl, &irq_mask, &irq_status);
+
+ if (irq_status.status == 0 && irq_status.trd_status == 0 &&
+ irq_status.trd_error == 0) {
+ dev_err(cdns_ctrl->dev, "CDMA command timeout\n");
+ return -ETIMEDOUT;
+ }
+ if (irq_status.status & irq_mask.status) {
+ dev_err(cdns_ctrl->dev, "CDMA command failed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/*
+ * ECC size depends on configured ECC strength and on maximum supported
+ * ECC step size.
+ */
+static int cadence_nand_calc_ecc_bytes(int max_step_size, int strength)
+{
+ int nbytes = DIV_ROUND_UP(fls(8 * max_step_size) * strength, 8);
+
+ return ALIGN(nbytes, 2);
+}
+
+#define CADENCE_NAND_CALC_ECC_BYTES(max_step_size) \
+ static int \
+ cadence_nand_calc_ecc_bytes_##max_step_size(int step_size, \
+ int strength)\
+ {\
+ return cadence_nand_calc_ecc_bytes(max_step_size, strength);\
+ }
+
+CADENCE_NAND_CALC_ECC_BYTES(256)
+CADENCE_NAND_CALC_ECC_BYTES(512)
+CADENCE_NAND_CALC_ECC_BYTES(1024)
+CADENCE_NAND_CALC_ECC_BYTES(2048)
+CADENCE_NAND_CALC_ECC_BYTES(4096)
+
+/* Function reads BCH capabilities. */
+static int cadence_nand_read_bch_caps(struct cdns_nand_ctrl *cdns_ctrl)
+{
+ struct nand_ecc_caps *ecc_caps = &cdns_ctrl->ecc_caps;
+ int max_step_size = 0, nstrengths, i;
+ u32 reg;
+
+ reg = readl_relaxed(cdns_ctrl->reg + BCH_CFG_0);
+ cdns_ctrl->ecc_strengths[0] = FIELD_GET(BCH_CFG_0_CORR_CAP_0, reg);
+ cdns_ctrl->ecc_strengths[1] = FIELD_GET(BCH_CFG_0_CORR_CAP_1, reg);
+ cdns_ctrl->ecc_strengths[2] = FIELD_GET(BCH_CFG_0_CORR_CAP_2, reg);
+ cdns_ctrl->ecc_strengths[3] = FIELD_GET(BCH_CFG_0_CORR_CAP_3, reg);
+
+ reg = readl_relaxed(cdns_ctrl->reg + BCH_CFG_1);
+ cdns_ctrl->ecc_strengths[4] = FIELD_GET(BCH_CFG_1_CORR_CAP_4, reg);
+ cdns_ctrl->ecc_strengths[5] = FIELD_GET(BCH_CFG_1_CORR_CAP_5, reg);
+ cdns_ctrl->ecc_strengths[6] = FIELD_GET(BCH_CFG_1_CORR_CAP_6, reg);
+ cdns_ctrl->ecc_strengths[7] = FIELD_GET(BCH_CFG_1_CORR_CAP_7, reg);
+
+ reg = readl_relaxed(cdns_ctrl->reg + BCH_CFG_2);
+ cdns_ctrl->ecc_stepinfos[0].stepsize =
+ FIELD_GET(BCH_CFG_2_SECT_0, reg);
+
+ cdns_ctrl->ecc_stepinfos[1].stepsize =
+ FIELD_GET(BCH_CFG_2_SECT_1, reg);
+
+ nstrengths = 0;
+ for (i = 0; i < BCH_MAX_NUM_CORR_CAPS; i++) {
+ if (cdns_ctrl->ecc_strengths[i] != 0)
+ nstrengths++;
+ }
+
+ ecc_caps->nstepinfos = 0;
+ for (i = 0; i < BCH_MAX_NUM_SECTOR_SIZES; i++) {
+ /* ECC strengths are common for all step infos. */
+ cdns_ctrl->ecc_stepinfos[i].nstrengths = nstrengths;
+ cdns_ctrl->ecc_stepinfos[i].strengths =
+ cdns_ctrl->ecc_strengths;
+
+ if (cdns_ctrl->ecc_stepinfos[i].stepsize != 0)
+ ecc_caps->nstepinfos++;
+
+ if (cdns_ctrl->ecc_stepinfos[i].stepsize > max_step_size)
+ max_step_size = cdns_ctrl->ecc_stepinfos[i].stepsize;
+ }
+ ecc_caps->stepinfos = &cdns_ctrl->ecc_stepinfos[0];
+
+ switch (max_step_size) {
+ case 256:
+ ecc_caps->calc_ecc_bytes = &cadence_nand_calc_ecc_bytes_256;
+ break;
+ case 512:
+ ecc_caps->calc_ecc_bytes = &cadence_nand_calc_ecc_bytes_512;
+ break;
+ case 1024:
+ ecc_caps->calc_ecc_bytes = &cadence_nand_calc_ecc_bytes_1024;
+ break;
+ case 2048:
+ ecc_caps->calc_ecc_bytes = &cadence_nand_calc_ecc_bytes_2048;
+ break;
+ case 4096:
+ ecc_caps->calc_ecc_bytes = &cadence_nand_calc_ecc_bytes_4096;
+ break;
+ default:
+ dev_err(cdns_ctrl->dev,
+ "Unsupported sector size(ecc step size) %d\n",
+ max_step_size);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/* Hardware initialization. */
+static int cadence_nand_hw_init(struct cdns_nand_ctrl *cdns_ctrl)
+{
+ int status;
+ u32 reg;
+
+ status = cadence_nand_wait_for_value(cdns_ctrl, CTRL_STATUS,
+ 1000000,
+ CTRL_STATUS_INIT_COMP, false);
+ if (status)
+ return status;
+
+ reg = readl_relaxed(cdns_ctrl->reg + CTRL_VERSION);
+ cdns_ctrl->ctrl_rev = FIELD_GET(CTRL_VERSION_REV, reg);
+
+ dev_info(cdns_ctrl->dev,
+ "%s: cadence nand controller version reg %x\n",
+ __func__, reg);
+
+ /* Disable cache and multiplane. */
+ writel_relaxed(0, cdns_ctrl->reg + MULTIPLANE_CFG);
+ writel_relaxed(0, cdns_ctrl->reg + CACHE_CFG);
+
+ /* Clear all interrupts. */
+ writel_relaxed(0xFFFFFFFF, cdns_ctrl->reg + INTR_STATUS);
+
+ cadence_nand_get_caps(cdns_ctrl);
+ cadence_nand_read_bch_caps(cdns_ctrl);
+
+ /*
+ * Set IO width access to 8.
+ * It is because during SW device discovering width access
+ * is expected to be 8.
+ */
+ status = cadence_nand_set_access_width16(cdns_ctrl, false);
+
+ return status;
+}
+
+#define TT_MAIN_OOB_AREAS 2
+#define TT_RAW_PAGE 3
+#define TT_BBM 4
+#define TT_MAIN_OOB_AREA_EXT 5
+
+/* Prepare size of data to transfer. */
+static void
+cadence_nand_prepare_data_size(struct nand_chip *chip,
+ int transfer_type)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ struct mtd_info *mtd = nand_to_mtd(chip);
+ u32 sec_size = 0, offset = 0, sec_cnt = 1;
+ u32 last_sec_size = cdns_chip->sector_size;
+ u32 data_ctrl_size = 0;
+ u32 reg = 0;
+
+ if (cdns_ctrl->curr_trans_type == transfer_type)
+ return;
+
+ switch (transfer_type) {
+ case TT_MAIN_OOB_AREA_EXT:
+ sec_cnt = cdns_chip->sector_count;
+ sec_size = cdns_chip->sector_size;
+ data_ctrl_size = cdns_chip->avail_oob_size;
+ break;
+ case TT_MAIN_OOB_AREAS:
+ sec_cnt = cdns_chip->sector_count;
+ last_sec_size = cdns_chip->sector_size
+ + cdns_chip->avail_oob_size;
+ sec_size = cdns_chip->sector_size;
+ break;
+ case TT_RAW_PAGE:
+ last_sec_size = mtd->writesize + mtd->oobsize;
+ break;
+ case TT_BBM:
+ offset = mtd->writesize + cdns_chip->bbm_offs;
+ last_sec_size = 8;
+ break;
+ }
+
+ reg = 0;
+ reg |= FIELD_PREP(TRAN_CFG_0_OFFSET, offset);
+ reg |= FIELD_PREP(TRAN_CFG_0_SEC_CNT, sec_cnt);
+ writel_relaxed(reg, cdns_ctrl->reg + TRAN_CFG_0);
+
+ reg = 0;
+ reg |= FIELD_PREP(TRAN_CFG_1_LAST_SEC_SIZE, last_sec_size);
+ reg |= FIELD_PREP(TRAN_CFG_1_SECTOR_SIZE, sec_size);
+ writel_relaxed(reg, cdns_ctrl->reg + TRAN_CFG_1);
+
+ if (cdns_ctrl->caps2.data_control_supp) {
+ reg = readl_relaxed(cdns_ctrl->reg + CONTROL_DATA_CTRL);
+ reg &= ~CONTROL_DATA_CTRL_SIZE;
+ reg |= FIELD_PREP(CONTROL_DATA_CTRL_SIZE, data_ctrl_size);
+ writel_relaxed(reg, cdns_ctrl->reg + CONTROL_DATA_CTRL);
+ }
+
+ cdns_ctrl->curr_trans_type = transfer_type;
+}
+
+static int
+cadence_nand_cdma_transfer(struct cdns_nand_ctrl *cdns_ctrl, u8 chip_nr,
+ int page, void *buf, void *ctrl_dat, u32 buf_size,
+ u32 ctrl_dat_size, enum dma_data_direction dir,
+ bool with_ecc)
+{
+ dma_addr_t dma_buf, dma_ctrl_dat = 0;
+ u8 thread_nr = chip_nr;
+ int status;
+ u16 ctype;
+
+ if (dir == DMA_FROM_DEVICE)
+ ctype = CDMA_CT_RD;
+ else
+ ctype = CDMA_CT_WR;
+
+ cadence_nand_set_ecc_enable(cdns_ctrl, with_ecc);
+
+ dma_buf = dma_map_single(cdns_ctrl->dev, buf, buf_size, dir);
+ if (dma_mapping_error(cdns_ctrl->dev, dma_buf)) {
+ dev_err(cdns_ctrl->dev, "Failed to map DMA buffer\n");
+ return -EIO;
+ }
+
+ if (ctrl_dat && ctrl_dat_size) {
+ dma_ctrl_dat = dma_map_single(cdns_ctrl->dev, ctrl_dat,
+ ctrl_dat_size, dir);
+ if (dma_mapping_error(cdns_ctrl->dev, dma_ctrl_dat)) {
+ dma_unmap_single(cdns_ctrl->dev, dma_buf,
+ buf_size, dir);
+ dev_err(cdns_ctrl->dev, "Failed to map DMA buffer\n");
+ return -EIO;
+ }
+ }
+
+ cadence_nand_cdma_desc_prepare(cdns_ctrl, chip_nr, page,
+ (void *)dma_buf, (void *)dma_ctrl_dat,
+ ctype);
+
+ status = cadence_nand_cdma_send_and_wait(cdns_ctrl, thread_nr);
+
+ dma_unmap_single(cdns_ctrl->dev, dma_buf,
+ buf_size, dir);
+
+ if (ctrl_dat && ctrl_dat_size)
+ dma_unmap_single(cdns_ctrl->dev, dma_ctrl_dat,
+ ctrl_dat_size, dir);
+ if (status)
+ return status;
+
+ return cadence_nand_cdma_finish(cdns_ctrl);
+}
+
+static void cadence_nand_set_timings(struct cdns_nand_ctrl *cdns_ctrl,
+ struct cadence_nand_timings *t)
+{
+ writel_relaxed(t->async_toggle_timings,
+ cdns_ctrl->reg + ASYNC_TOGGLE_TIMINGS);
+ writel_relaxed(t->timings0, cdns_ctrl->reg + TIMINGS0);
+ writel_relaxed(t->timings1, cdns_ctrl->reg + TIMINGS1);
+ writel_relaxed(t->timings2, cdns_ctrl->reg + TIMINGS2);
+
+ if (cdns_ctrl->caps2.is_phy_type_dll)
+ writel_relaxed(t->dll_phy_ctrl, cdns_ctrl->reg + DLL_PHY_CTRL);
+
+ writel_relaxed(t->phy_ctrl, cdns_ctrl->reg + PHY_CTRL);
+
+ if (cdns_ctrl->caps2.is_phy_type_dll) {
+ writel_relaxed(0, cdns_ctrl->reg + PHY_TSEL);
+ writel_relaxed(2, cdns_ctrl->reg + PHY_DQ_TIMING);
+ writel_relaxed(t->phy_dqs_timing,
+ cdns_ctrl->reg + PHY_DQS_TIMING);
+ writel_relaxed(t->phy_gate_lpbk_ctrl,
+ cdns_ctrl->reg + PHY_GATE_LPBK_CTRL);
+ writel_relaxed(PHY_DLL_MASTER_CTRL_BYPASS_MODE,
+ cdns_ctrl->reg + PHY_DLL_MASTER_CTRL);
+ writel_relaxed(0, cdns_ctrl->reg + PHY_DLL_SLAVE_CTRL);
+ }
+}
+
+static int cadence_nand_select_target(struct nand_chip *chip)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+
+ if (chip == cdns_ctrl->selected_chip)
+ return 0;
+
+ if (cadence_nand_wait_for_value(cdns_ctrl, CTRL_STATUS,
+ 1000000,
+ CTRL_STATUS_CTRL_BUSY, true))
+ return -ETIMEDOUT;
+
+ cadence_nand_set_timings(cdns_ctrl, &cdns_chip->timings);
+
+ cadence_nand_set_ecc_strength(cdns_ctrl,
+ cdns_chip->corr_str_idx);
+
+ cadence_nand_set_erase_detection(cdns_ctrl, true,
+ chip->ecc.strength);
+
+ cdns_ctrl->curr_trans_type = -1;
+ cdns_ctrl->selected_chip = chip;
+
+ return 0;
+}
+
+static int cadence_nand_erase(struct nand_chip *chip, u32 page)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ int status;
+ u8 thread_nr = cdns_chip->cs[chip->cur_cs];
+
+ cadence_nand_cdma_desc_prepare(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ page, NULL, NULL,
+ CDMA_CT_ERASE);
+ status = cadence_nand_cdma_send_and_wait(cdns_ctrl, thread_nr);
+ if (status) {
+ dev_err(cdns_ctrl->dev, "erase operation failed\n");
+ return -EIO;
+ }
+
+ status = cadence_nand_cdma_finish(cdns_ctrl);
+ if (status)
+ return status;
+
+ return 0;
+}
+
+static int cadence_nand_read_bbm(struct nand_chip *chip, int page, u8 *buf)
+{
+ int status;
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ struct mtd_info *mtd = nand_to_mtd(chip);
+
+ cadence_nand_prepare_data_size(chip, TT_BBM);
+
+ cadence_nand_set_skip_bytes_conf(cdns_ctrl, 0, 0, 0);
+
+ /*
+ * Read only bad block marker from offset
+ * defined by a memory manufacturer.
+ */
+ status = cadence_nand_cdma_transfer(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ page, cdns_ctrl->buf, NULL,
+ mtd->oobsize,
+ 0, DMA_FROM_DEVICE, false);
+ if (status) {
+ dev_err(cdns_ctrl->dev, "read BBM failed\n");
+ return -EIO;
+ }
+
+ memcpy(buf + cdns_chip->bbm_offs, cdns_ctrl->buf, cdns_chip->bbm_len);
+
+ return 0;
+}
+
+static int cadence_nand_write_page(struct nand_chip *chip,
+ const u8 *buf, int oob_required,
+ int page)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ struct mtd_info *mtd = nand_to_mtd(chip);
+ int status;
+ u16 marker_val = 0xFFFF;
+
+ status = cadence_nand_select_target(chip);
+ if (status)
+ return status;
+
+ cadence_nand_set_skip_bytes_conf(cdns_ctrl, cdns_chip->bbm_len,
+ mtd->writesize
+ + cdns_chip->bbm_offs,
+ 1);
+
+ if (oob_required) {
+ marker_val = *(u16 *)(chip->oob_poi
+ + cdns_chip->bbm_offs);
+ } else {
+ /* Set oob data to 0xFF. */
+ memset(cdns_ctrl->buf + mtd->writesize, 0xFF,
+ cdns_chip->avail_oob_size);
+ }
+
+ cadence_nand_set_skip_marker_val(cdns_ctrl, marker_val);
+
+ cadence_nand_prepare_data_size(chip, TT_MAIN_OOB_AREA_EXT);
+
+ if (cadence_nand_dma_buf_ok(cdns_ctrl, buf, mtd->writesize) &&
+ cdns_ctrl->caps2.data_control_supp) {
+ u8 *oob;
+
+ if (oob_required)
+ oob = chip->oob_poi;
+ else
+ oob = cdns_ctrl->buf + mtd->writesize;
+
+ status = cadence_nand_cdma_transfer(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ page, (void *)buf, oob,
+ mtd->writesize,
+ cdns_chip->avail_oob_size,
+ DMA_TO_DEVICE, true);
+ if (status) {
+ dev_err(cdns_ctrl->dev, "write page failed\n");
+ return -EIO;
+ }
+
+ return 0;
+ }
+
+ if (oob_required) {
+ /* Transfer the data to the oob area. */
+ memcpy(cdns_ctrl->buf + mtd->writesize, chip->oob_poi,
+ cdns_chip->avail_oob_size);
+ }
+
+ memcpy(cdns_ctrl->buf, buf, mtd->writesize);
+
+ cadence_nand_prepare_data_size(chip, TT_MAIN_OOB_AREAS);
+
+ return cadence_nand_cdma_transfer(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ page, cdns_ctrl->buf, NULL,
+ mtd->writesize
+ + cdns_chip->avail_oob_size,
+ 0, DMA_TO_DEVICE, true);
+}
+
+static int cadence_nand_write_oob(struct nand_chip *chip, int page)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct mtd_info *mtd = nand_to_mtd(chip);
+
+ memset(cdns_ctrl->buf, 0xFF, mtd->writesize);
+
+ return cadence_nand_write_page(chip, cdns_ctrl->buf, 1, page);
+}
+
+static int cadence_nand_write_page_raw(struct nand_chip *chip,
+ const u8 *buf, int oob_required,
+ int page)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ struct mtd_info *mtd = nand_to_mtd(chip);
+ int writesize = mtd->writesize;
+ int oobsize = mtd->oobsize;
+ int ecc_steps = chip->ecc.steps;
+ int ecc_size = chip->ecc.size;
+ int ecc_bytes = chip->ecc.bytes;
+ void *tmp_buf = cdns_ctrl->buf;
+ int oob_skip = cdns_chip->bbm_len;
+ size_t size = writesize + oobsize;
+ int i, pos, len;
+ int status = 0;
+
+ status = cadence_nand_select_target(chip);
+ if (status)
+ return status;
+
+ /*
+ * Fill the buffer with 0xff first except the full page transfer.
+ * This simplifies the logic.
+ */
+ if (!buf || !oob_required)
+ memset(tmp_buf, 0xff, size);
+
+ cadence_nand_set_skip_bytes_conf(cdns_ctrl, 0, 0, 0);
+
+ /* Arrange the buffer for syndrome payload/ecc layout. */
+ if (buf) {
+ for (i = 0; i < ecc_steps; i++) {
+ pos = i * (ecc_size + ecc_bytes);
+ len = ecc_size;
+
+ if (pos >= writesize)
+ pos += oob_skip;
+ else if (pos + len > writesize)
+ len = writesize - pos;
+
+ memcpy(tmp_buf + pos, buf, len);
+ buf += len;
+ if (len < ecc_size) {
+ len = ecc_size - len;
+ memcpy(tmp_buf + writesize + oob_skip, buf,
+ len);
+ buf += len;
+ }
+ }
+ }
+
+ if (oob_required) {
+ const u8 *oob = chip->oob_poi;
+ u32 oob_data_offset = (cdns_chip->sector_count - 1) *
+ (cdns_chip->sector_size + chip->ecc.bytes)
+ + cdns_chip->sector_size + oob_skip;
+
+ /* BBM at the beginning of the OOB area. */
+ memcpy(tmp_buf + writesize, oob, oob_skip);
+
+ /* OOB free. */
+ memcpy(tmp_buf + oob_data_offset, oob,
+ cdns_chip->avail_oob_size);
+ oob += cdns_chip->avail_oob_size;
+
+ /* OOB ECC. */
+ for (i = 0; i < ecc_steps; i++) {
+ pos = ecc_size + i * (ecc_size + ecc_bytes);
+ if (i == (ecc_steps - 1))
+ pos += cdns_chip->avail_oob_size;
+
+ len = ecc_bytes;
+
+ if (pos >= writesize)
+ pos += oob_skip;
+ else if (pos + len > writesize)
+ len = writesize - pos;
+
+ memcpy(tmp_buf + pos, oob, len);
+ oob += len;
+ if (len < ecc_bytes) {
+ len = ecc_bytes - len;
+ memcpy(tmp_buf + writesize + oob_skip, oob,
+ len);
+ oob += len;
+ }
+ }
+ }
+
+ cadence_nand_prepare_data_size(chip, TT_RAW_PAGE);
+
+ return cadence_nand_cdma_transfer(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ page, cdns_ctrl->buf, NULL,
+ mtd->writesize +
+ mtd->oobsize,
+ 0, DMA_TO_DEVICE, false);
+}
+
+static int cadence_nand_write_oob_raw(struct nand_chip *chip,
+ int page)
+{
+ return cadence_nand_write_page_raw(chip, NULL, true, page);
+}
+
+static int cadence_nand_read_page(struct nand_chip *chip,
+ u8 *buf, int oob_required, int page)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ struct mtd_info *mtd = nand_to_mtd(chip);
+ int status = 0;
+ int ecc_err_count = 0;
+
+ status = cadence_nand_select_target(chip);
+ if (status)
+ return status;
+
+ cadence_nand_set_skip_bytes_conf(cdns_ctrl, cdns_chip->bbm_len,
+ mtd->writesize
+ + cdns_chip->bbm_offs, 1);
+
+ /*
+ * If data buffer can be accessed by DMA and data_control feature
+ * is supported then transfer data and oob directly.
+ */
+ if (cadence_nand_dma_buf_ok(cdns_ctrl, buf, mtd->writesize) &&
+ cdns_ctrl->caps2.data_control_supp) {
+ u8 *oob;
+
+ if (oob_required)
+ oob = chip->oob_poi;
+ else
+ oob = cdns_ctrl->buf + mtd->writesize;
+
+ cadence_nand_prepare_data_size(chip, TT_MAIN_OOB_AREA_EXT);
+ status = cadence_nand_cdma_transfer(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ page, buf, oob,
+ mtd->writesize,
+ cdns_chip->avail_oob_size,
+ DMA_FROM_DEVICE, true);
+ /* Otherwise use bounce buffer. */
+ } else {
+ cadence_nand_prepare_data_size(chip, TT_MAIN_OOB_AREAS);
+ status = cadence_nand_cdma_transfer(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ page, cdns_ctrl->buf,
+ NULL, mtd->writesize
+ + cdns_chip->avail_oob_size,
+ 0, DMA_FROM_DEVICE, true);
+
+ memcpy(buf, cdns_ctrl->buf, mtd->writesize);
+ if (oob_required)
+ memcpy(chip->oob_poi,
+ cdns_ctrl->buf + mtd->writesize,
+ mtd->oobsize);
+ }
+
+ switch (status) {
+ case STAT_ECC_UNCORR:
+ mtd->ecc_stats.failed++;
+ ecc_err_count++;
+ break;
+ case STAT_ECC_CORR:
+ ecc_err_count = FIELD_GET(CDMA_CS_MAXERR,
+ cdns_ctrl->cdma_desc->status);
+ mtd->ecc_stats.corrected += ecc_err_count;
+ break;
+ case STAT_ERASED:
+ case STAT_OK:
+ break;
+ default:
+ dev_err(cdns_ctrl->dev, "read page failed\n");
+ return -EIO;
+ }
+
+ if (oob_required)
+ if (cadence_nand_read_bbm(chip, page, chip->oob_poi))
+ return -EIO;
+
+ return ecc_err_count;
+}
+
+/* Reads OOB data from the device. */
+static int cadence_nand_read_oob(struct nand_chip *chip, int page)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+
+ return cadence_nand_read_page(chip, cdns_ctrl->buf, 1, page);
+}
+
+static int cadence_nand_read_page_raw(struct nand_chip *chip,
+ u8 *buf, int oob_required, int page)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ struct mtd_info *mtd = nand_to_mtd(chip);
+ int oob_skip = cdns_chip->bbm_len;
+ int writesize = mtd->writesize;
+ int ecc_steps = chip->ecc.steps;
+ int ecc_size = chip->ecc.size;
+ int ecc_bytes = chip->ecc.bytes;
+ void *tmp_buf = cdns_ctrl->buf;
+ int i, pos, len;
+ int status = 0;
+
+ status = cadence_nand_select_target(chip);
+ if (status)
+ return status;
+
+ cadence_nand_set_skip_bytes_conf(cdns_ctrl, 0, 0, 0);
+
+ cadence_nand_prepare_data_size(chip, TT_RAW_PAGE);
+ status = cadence_nand_cdma_transfer(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ page, cdns_ctrl->buf, NULL,
+ mtd->writesize
+ + mtd->oobsize,
+ 0, DMA_FROM_DEVICE, false);
+
+ switch (status) {
+ case STAT_ERASED:
+ case STAT_OK:
+ break;
+ default:
+ dev_err(cdns_ctrl->dev, "read raw page failed\n");
+ return -EIO;
+ }
+
+ /* Arrange the buffer for syndrome payload/ecc layout. */
+ if (buf) {
+ for (i = 0; i < ecc_steps; i++) {
+ pos = i * (ecc_size + ecc_bytes);
+ len = ecc_size;
+
+ if (pos >= writesize)
+ pos += oob_skip;
+ else if (pos + len > writesize)
+ len = writesize - pos;
+
+ memcpy(buf, tmp_buf + pos, len);
+ buf += len;
+ if (len < ecc_size) {
+ len = ecc_size - len;
+ memcpy(buf, tmp_buf + writesize + oob_skip,
+ len);
+ buf += len;
+ }
+ }
+ }
+
+ if (oob_required) {
+ u8 *oob = chip->oob_poi;
+ u32 oob_data_offset = (cdns_chip->sector_count - 1) *
+ (cdns_chip->sector_size + chip->ecc.bytes)
+ + cdns_chip->sector_size + oob_skip;
+
+ /* OOB free. */
+ memcpy(oob, tmp_buf + oob_data_offset,
+ cdns_chip->avail_oob_size);
+
+ /* BBM at the beginning of the OOB area. */
+ memcpy(oob, tmp_buf + writesize, oob_skip);
+
+ oob += cdns_chip->avail_oob_size;
+
+ /* OOB ECC */
+ for (i = 0; i < ecc_steps; i++) {
+ pos = ecc_size + i * (ecc_size + ecc_bytes);
+ len = ecc_bytes;
+
+ if (i == (ecc_steps - 1))
+ pos += cdns_chip->avail_oob_size;
+
+ if (pos >= writesize)
+ pos += oob_skip;
+ else if (pos + len > writesize)
+ len = writesize - pos;
+
+ memcpy(oob, tmp_buf + pos, len);
+ oob += len;
+ if (len < ecc_bytes) {
+ len = ecc_bytes - len;
+ memcpy(oob, tmp_buf + writesize + oob_skip,
+ len);
+ oob += len;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int cadence_nand_read_oob_raw(struct nand_chip *chip,
+ int page)
+{
+ return cadence_nand_read_page_raw(chip, NULL, true, page);
+}
+
+static void cadence_nand_slave_dma_transfer_finished(void *data)
+{
+ struct completion *finished = data;
+
+ complete(finished);
+}
+
+static int cadence_nand_slave_dma_transfer(struct cdns_nand_ctrl *cdns_ctrl,
+ void *buf,
+ dma_addr_t dev_dma, size_t len,
+ enum dma_data_direction dir)
+{
+ DECLARE_COMPLETION_ONSTACK(finished);
+ struct dma_chan *chan;
+ struct dma_device *dma_dev;
+ dma_addr_t src_dma, dst_dma, buf_dma;
+ struct dma_async_tx_descriptor *tx;
+ dma_cookie_t cookie;
+
+ chan = cdns_ctrl->dmac;
+ dma_dev = chan->device;
+
+ buf_dma = dma_map_single(dma_dev->dev, buf, len, dir);
+ if (dma_mapping_error(dma_dev->dev, buf_dma)) {
+ dev_err(cdns_ctrl->dev, "Failed to map DMA buffer\n");
+ goto err;
+ }
+
+ if (dir == DMA_FROM_DEVICE) {
+ src_dma = cdns_ctrl->io.dma;
+ dst_dma = buf_dma;
+ } else {
+ src_dma = buf_dma;
+ dst_dma = cdns_ctrl->io.dma;
+ }
+
+ tx = dmaengine_prep_dma_memcpy(cdns_ctrl->dmac, dst_dma, src_dma, len,
+ DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
+ if (!tx) {
+ dev_err(cdns_ctrl->dev, "Failed to prepare DMA memcpy\n");
+ goto err_unmap;
+ }
+
+ tx->callback = cadence_nand_slave_dma_transfer_finished;
+ tx->callback_param = &finished;
+
+ cookie = dmaengine_submit(tx);
+ if (dma_submit_error(cookie)) {
+ dev_err(cdns_ctrl->dev, "Failed to do DMA tx_submit\n");
+ goto err_unmap;
+ }
+
+ dma_async_issue_pending(cdns_ctrl->dmac);
+ wait_for_completion(&finished);
+
+ dma_unmap_single(cdns_ctrl->dev, buf_dma, len, dir);
+
+ return 0;
+
+err_unmap:
+ dma_unmap_single(cdns_ctrl->dev, buf_dma, len, dir);
+
+err:
+ dev_dbg(cdns_ctrl->dev, "Fall back to CPU I/O\n");
+
+ return -EIO;
+}
+
+static int cadence_nand_read_buf(struct cdns_nand_ctrl *cdns_ctrl,
+ u8 *buf, int len)
+{
+ u8 thread_nr = 0;
+ u32 sdma_size;
+ int status;
+
+ /* Wait until slave DMA interface is ready to data transfer. */
+ status = cadence_nand_wait_on_sdma(cdns_ctrl, &thread_nr, &sdma_size);
+ if (status)
+ return status;
+
+ if (!cdns_ctrl->caps1->has_dma) {
+ int len_in_words = len >> 2;
+
+ /* read alingment data */
+ ioread32_rep(cdns_ctrl->io.virt, buf, len_in_words);
+ if (sdma_size > len) {
+ /* read rest data from slave DMA interface if any */
+ ioread32_rep(cdns_ctrl->io.virt, cdns_ctrl->buf,
+ sdma_size / 4 - len_in_words);
+ /* copy rest of data */
+ memcpy(buf + (len_in_words << 2), cdns_ctrl->buf,
+ len - (len_in_words << 2));
+ }
+ return 0;
+ }
+
+ if (cadence_nand_dma_buf_ok(cdns_ctrl, buf, len)) {
+ status = cadence_nand_slave_dma_transfer(cdns_ctrl, buf,
+ cdns_ctrl->io.dma,
+ len, DMA_FROM_DEVICE);
+ if (status == 0)
+ return 0;
+
+ dev_warn(cdns_ctrl->dev,
+ "Slave DMA transfer failed. Try again using bounce buffer.");
+ }
+
+ /* If DMA transfer is not possible or failed then use bounce buffer. */
+ status = cadence_nand_slave_dma_transfer(cdns_ctrl, cdns_ctrl->buf,
+ cdns_ctrl->io.dma,
+ sdma_size, DMA_FROM_DEVICE);
+
+ if (status) {
+ dev_err(cdns_ctrl->dev, "Slave DMA transfer failed");
+ return status;
+ }
+
+ memcpy(buf, cdns_ctrl->buf, len);
+
+ return 0;
+}
+
+static int cadence_nand_write_buf(struct cdns_nand_ctrl *cdns_ctrl,
+ const u8 *buf, int len)
+{
+ u8 thread_nr = 0;
+ u32 sdma_size;
+ int status;
+
+ /* Wait until slave DMA interface is ready to data transfer. */
+ status = cadence_nand_wait_on_sdma(cdns_ctrl, &thread_nr, &sdma_size);
+ if (status)
+ return status;
+
+ if (!cdns_ctrl->caps1->has_dma) {
+ int len_in_words = len >> 2;
+
+ iowrite32_rep(cdns_ctrl->io.virt, buf, len_in_words);
+ if (sdma_size > len) {
+ /* copy rest of data */
+ memcpy(cdns_ctrl->buf, buf + (len_in_words << 2),
+ len - (len_in_words << 2));
+ /* write all expected by nand controller data */
+ iowrite32_rep(cdns_ctrl->io.virt, cdns_ctrl->buf,
+ sdma_size / 4 - len_in_words);
+ }
+
+ return 0;
+ }
+
+ if (cadence_nand_dma_buf_ok(cdns_ctrl, buf, len)) {
+ status = cadence_nand_slave_dma_transfer(cdns_ctrl, (void *)buf,
+ cdns_ctrl->io.dma,
+ len, DMA_TO_DEVICE);
+ if (status == 0)
+ return 0;
+
+ dev_warn(cdns_ctrl->dev,
+ "Slave DMA transfer failed. Try again using bounce buffer.");
+ }
+
+ /* If DMA transfer is not possible or failed then use bounce buffer. */
+ memcpy(cdns_ctrl->buf, buf, len);
+
+ status = cadence_nand_slave_dma_transfer(cdns_ctrl, cdns_ctrl->buf,
+ cdns_ctrl->io.dma,
+ sdma_size, DMA_TO_DEVICE);
+
+ if (status)
+ dev_err(cdns_ctrl->dev, "Slave DMA transfer failed");
+
+ return status;
+}
+
+static int cadence_nand_force_byte_access(struct nand_chip *chip,
+ bool force_8bit)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ int status;
+
+ /*
+ * Callers of this function do not verify if the NAND is using a 16-bit
+ * an 8-bit bus for normal operations, so we need to take care of that
+ * here by leaving the configuration unchanged if the NAND does not have
+ * the NAND_BUSWIDTH_16 flag set.
+ */
+ if (!(chip->options & NAND_BUSWIDTH_16))
+ return 0;
+
+ status = cadence_nand_set_access_width16(cdns_ctrl, !force_8bit);
+
+ return status;
+}
+
+static int cadence_nand_cmd_opcode(struct nand_chip *chip,
+ const struct nand_subop *subop)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ const struct nand_op_instr *instr;
+ unsigned int op_id = 0;
+ u64 mini_ctrl_cmd = 0;
+ int ret;
+
+ instr = &subop->instrs[op_id];
+
+ if (instr->delay_ns > 0)
+ mini_ctrl_cmd |= GCMD_LAY_TWB;
+
+ mini_ctrl_cmd |= FIELD_PREP(GCMD_LAY_INSTR,
+ GCMD_LAY_INSTR_CMD);
+ mini_ctrl_cmd |= FIELD_PREP(GCMD_LAY_INPUT_CMD,
+ instr->ctx.cmd.opcode);
+
+ ret = cadence_nand_generic_cmd_send(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ mini_ctrl_cmd);
+ if (ret)
+ dev_err(cdns_ctrl->dev, "send cmd %x failed\n",
+ instr->ctx.cmd.opcode);
+
+ return ret;
+}
+
+static int cadence_nand_cmd_address(struct nand_chip *chip,
+ const struct nand_subop *subop)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ const struct nand_op_instr *instr;
+ unsigned int op_id = 0;
+ u64 mini_ctrl_cmd = 0;
+ unsigned int offset, naddrs;
+ u64 address = 0;
+ const u8 *addrs;
+ int ret;
+ int i;
+
+ instr = &subop->instrs[op_id];
+
+ if (instr->delay_ns > 0)
+ mini_ctrl_cmd |= GCMD_LAY_TWB;
+
+ mini_ctrl_cmd |= FIELD_PREP(GCMD_LAY_INSTR,
+ GCMD_LAY_INSTR_ADDR);
+
+ offset = nand_subop_get_addr_start_off(subop, op_id);
+ naddrs = nand_subop_get_num_addr_cyc(subop, op_id);
+ addrs = &instr->ctx.addr.addrs[offset];
+
+ for (i = 0; i < naddrs; i++)
+ address |= (u64)addrs[i] << (8 * i);
+
+ mini_ctrl_cmd |= FIELD_PREP(GCMD_LAY_INPUT_ADDR,
+ address);
+ mini_ctrl_cmd |= FIELD_PREP(GCMD_LAY_INPUT_ADDR_SIZE,
+ naddrs - 1);
+
+ ret = cadence_nand_generic_cmd_send(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ mini_ctrl_cmd);
+ if (ret)
+ dev_err(cdns_ctrl->dev, "send address %llx failed\n", address);
+
+ return ret;
+}
+
+static int cadence_nand_cmd_erase(struct nand_chip *chip,
+ const struct nand_subop *subop)
+{
+ unsigned int op_id;
+
+ if (subop->instrs[0].ctx.cmd.opcode == NAND_CMD_ERASE1) {
+ int i;
+ const struct nand_op_instr *instr = NULL;
+ unsigned int offset, naddrs;
+ const u8 *addrs;
+ u32 page = 0;
+
+ instr = &subop->instrs[1];
+ offset = nand_subop_get_addr_start_off(subop, 1);
+ naddrs = nand_subop_get_num_addr_cyc(subop, 1);
+ addrs = &instr->ctx.addr.addrs[offset];
+
+ for (i = 0; i < naddrs; i++)
+ page |= (u32)addrs[i] << (8 * i);
+
+ return cadence_nand_erase(chip, page);
+ }
+
+ /*
+ * If it is not an erase operation then handle operation
+ * by calling exec_op function.
+ */
+ for (op_id = 0; op_id < subop->ninstrs; op_id++) {
+ int ret;
+ const struct nand_operation nand_op = {
+ .cs = chip->cur_cs,
+ .instrs = &subop->instrs[op_id],
+ .ninstrs = 1};
+ ret = chip->controller->ops->exec_op(chip, &nand_op, false);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int cadence_nand_cmd_data(struct nand_chip *chip,
+ const struct nand_subop *subop)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ const struct nand_op_instr *instr;
+ unsigned int offset, op_id = 0;
+ u64 mini_ctrl_cmd = 0;
+ int len = 0;
+ int ret;
+
+ instr = &subop->instrs[op_id];
+
+ if (instr->delay_ns > 0)
+ mini_ctrl_cmd |= GCMD_LAY_TWB;
+
+ mini_ctrl_cmd |= FIELD_PREP(GCMD_LAY_INSTR,
+ GCMD_LAY_INSTR_DATA);
+
+ if (instr->type == NAND_OP_DATA_OUT_INSTR)
+ mini_ctrl_cmd |= FIELD_PREP(GCMD_DIR,
+ GCMD_DIR_WRITE);
+
+ len = nand_subop_get_data_len(subop, op_id);
+ offset = nand_subop_get_data_start_off(subop, op_id);
+ mini_ctrl_cmd |= FIELD_PREP(GCMD_SECT_CNT, 1);
+ mini_ctrl_cmd |= FIELD_PREP(GCMD_LAST_SIZE, len);
+ if (instr->ctx.data.force_8bit) {
+ ret = cadence_nand_force_byte_access(chip, true);
+ if (ret) {
+ dev_err(cdns_ctrl->dev,
+ "cannot change byte access generic data cmd failed\n");
+ return ret;
+ }
+ }
+
+ ret = cadence_nand_generic_cmd_send(cdns_ctrl,
+ cdns_chip->cs[chip->cur_cs],
+ mini_ctrl_cmd);
+ if (ret) {
+ dev_err(cdns_ctrl->dev, "send generic data cmd failed\n");
+ return ret;
+ }
+
+ if (instr->type == NAND_OP_DATA_IN_INSTR) {
+ void *buf = instr->ctx.data.buf.in + offset;
+
+ ret = cadence_nand_read_buf(cdns_ctrl, buf, len);
+ } else {
+ const void *buf = instr->ctx.data.buf.out + offset;
+
+ ret = cadence_nand_write_buf(cdns_ctrl, buf, len);
+ }
+
+ if (ret) {
+ dev_err(cdns_ctrl->dev, "data transfer failed for generic command\n");
+ return ret;
+ }
+
+ if (instr->ctx.data.force_8bit) {
+ ret = cadence_nand_force_byte_access(chip, false);
+ if (ret) {
+ dev_err(cdns_ctrl->dev,
+ "cannot change byte access generic data cmd failed\n");
+ }
+ }
+
+ return ret;
+}
+
+static int cadence_nand_cmd_waitrdy(struct nand_chip *chip,
+ const struct nand_subop *subop)
+{
+ int status;
+ unsigned int op_id = 0;
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ const struct nand_op_instr *instr = &subop->instrs[op_id];
+ u32 timeout_us = instr->ctx.waitrdy.timeout_ms * 1000;
+
+ status = cadence_nand_wait_for_value(cdns_ctrl, RBN_SETINGS,
+ timeout_us,
+ BIT(cdns_chip->cs[chip->cur_cs]),
+ false);
+ return status;
+}
+
+static const struct nand_op_parser cadence_nand_op_parser = NAND_OP_PARSER(
+ NAND_OP_PARSER_PATTERN(
+ cadence_nand_cmd_erase,
+ NAND_OP_PARSER_PAT_CMD_ELEM(false),
+ NAND_OP_PARSER_PAT_ADDR_ELEM(false, MAX_ERASE_ADDRESS_CYC),
+ NAND_OP_PARSER_PAT_CMD_ELEM(false),
+ NAND_OP_PARSER_PAT_WAITRDY_ELEM(false)),
+ NAND_OP_PARSER_PATTERN(
+ cadence_nand_cmd_opcode,
+ NAND_OP_PARSER_PAT_CMD_ELEM(false)),
+ NAND_OP_PARSER_PATTERN(
+ cadence_nand_cmd_address,
+ NAND_OP_PARSER_PAT_ADDR_ELEM(false, MAX_ADDRESS_CYC)),
+ NAND_OP_PARSER_PATTERN(
+ cadence_nand_cmd_data,
+ NAND_OP_PARSER_PAT_DATA_IN_ELEM(false, MAX_DATA_SIZE)),
+ NAND_OP_PARSER_PATTERN(
+ cadence_nand_cmd_data,
+ NAND_OP_PARSER_PAT_DATA_OUT_ELEM(false, MAX_DATA_SIZE)),
+ NAND_OP_PARSER_PATTERN(
+ cadence_nand_cmd_waitrdy,
+ NAND_OP_PARSER_PAT_WAITRDY_ELEM(false))
+ );
+
+static int cadence_nand_exec_op(struct nand_chip *chip,
+ const struct nand_operation *op,
+ bool check_only)
+{
+ int status = cadence_nand_select_target(chip);
+
+ if (status)
+ return status;
+
+ return nand_op_parser_exec_op(chip, &cadence_nand_op_parser, op,
+ check_only);
+}
+
+static int cadence_nand_ooblayout_free(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *oobregion)
+{
+ struct nand_chip *chip = mtd_to_nand(mtd);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+
+ if (section)
+ return -ERANGE;
+
+ oobregion->offset = cdns_chip->bbm_len;
+ oobregion->length = cdns_chip->avail_oob_size
+ - cdns_chip->bbm_len;
+
+ return 0;
+}
+
+static int cadence_nand_ooblayout_ecc(struct mtd_info *mtd, int section,
+ struct mtd_oob_region *oobregion)
+{
+ struct nand_chip *chip = mtd_to_nand(mtd);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+
+ if (section)
+ return -ERANGE;
+
+ oobregion->offset = cdns_chip->avail_oob_size;
+ oobregion->length = chip->ecc.total;
+
+ return 0;
+}
+
+static const struct mtd_ooblayout_ops cadence_nand_ooblayout_ops = {
+ .free = cadence_nand_ooblayout_free,
+ .ecc = cadence_nand_ooblayout_ecc,
+};
+
+static int calc_cycl(u32 timing, u32 clock)
+{
+ if (timing == 0 || clock == 0)
+ return 0;
+
+ if ((timing % clock) > 0)
+ return timing / clock;
+ else
+ return timing / clock - 1;
+}
+
+/* Calculate max data valid window. */
+static inline u32 calc_tdvw_max(u32 trp_cnt, u32 clk_period, u32 trhoh_min,
+ u32 board_delay_skew_min, u32 ext_mode)
+{
+ if (ext_mode == 0)
+ clk_period /= 2;
+
+ return (trp_cnt + 1) * clk_period + trhoh_min +
+ board_delay_skew_min;
+}
+
+/* Calculate data valid window. */
+static inline u32 calc_tdvw(u32 trp_cnt, u32 clk_period, u32 trhoh_min,
+ u32 trea_max, u32 ext_mode)
+{
+ if (ext_mode == 0)
+ clk_period /= 2;
+
+ return (trp_cnt + 1) * clk_period + trhoh_min - trea_max;
+}
+
+static int
+cadence_nand_setup_data_interface(struct nand_chip *chip, int chipnr,
+ const struct nand_data_interface *conf)
+{
+ const struct nand_sdr_timings *sdr;
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ struct cadence_nand_timings *t = &cdns_chip->timings;
+ u32 reg;
+ u32 board_delay = cdns_ctrl->board_delay;
+ u32 clk_period = DIV_ROUND_DOWN_ULL(1000000000000ULL,
+ cdns_ctrl->nf_clk_rate);
+ u32 tceh_cnt, tcs_cnt, tadl_cnt, tccs_cnt;
+ u32 tfeat_cnt, trhz_cnt, tvdly_cnt;
+ u32 trhw_cnt, twb_cnt, twh_cnt = 0, twhr_cnt;
+ u32 twp_cnt = 0, trp_cnt = 0, trh_cnt = 0;
+ u32 if_skew = cdns_ctrl->caps1->if_skew;
+ u32 board_delay_skew_min = board_delay - if_skew;
+ u32 board_delay_skew_max = board_delay + if_skew;
+ u32 dqs_sampl_res, phony_dqs_mod;
+ u32 tdvw, tdvw_min, tdvw_max;
+ u32 ext_rd_mode, ext_wr_mode;
+ u32 dll_phy_dqs_timing = 0, phony_dqs_timing = 0, rd_del_sel = 0;
+ u32 sampling_point;
+
+ sdr = nand_get_sdr_timings(conf);
+ if (IS_ERR(sdr))
+ return PTR_ERR(sdr);
+
+ memset(t, 0, sizeof(*t));
+ /* Sampling point calculation. */
+
+ if (cdns_ctrl->caps2.is_phy_type_dll)
+ phony_dqs_mod = 2;
+ else
+ phony_dqs_mod = 1;
+
+ dqs_sampl_res = clk_period / phony_dqs_mod;
+
+ tdvw_min = sdr->tREA_max + board_delay_skew_max;
+ /*
+ * The idea of those calculation is to get the optimum value
+ * for tRP and tRH timings. If it is NOT possible to sample data
+ * with optimal tRP/tRH settings, the parameters will be extended.
+ * If clk_period is 50ns (the lowest value) this condition is met
+ * for asynchronous timing modes 1, 2, 3, 4 and 5.
+ * If clk_period is 20ns the condition is met only
+ * for asynchronous timing mode 5.
+ */
+ if (sdr->tRC_min <= clk_period &&
+ sdr->tRP_min <= (clk_period / 2) &&
+ sdr->tREH_min <= (clk_period / 2)) {
+ /* Performance mode. */
+ ext_rd_mode = 0;
+ tdvw = calc_tdvw(trp_cnt, clk_period, sdr->tRHOH_min,
+ sdr->tREA_max, ext_rd_mode);
+ tdvw_max = calc_tdvw_max(trp_cnt, clk_period, sdr->tRHOH_min,
+ board_delay_skew_min,
+ ext_rd_mode);
+ /*
+ * Check if data valid window and sampling point can be found
+ * and is not on the edge (ie. we have hold margin).
+ * If not extend the tRP timings.
+ */
+ if (tdvw > 0) {
+ if (tdvw_max <= tdvw_min ||
+ (tdvw_max % dqs_sampl_res) == 0) {
+ /*
+ * No valid sampling point so the RE pulse need
+ * to be widen widening by half clock cycle.
+ */
+ ext_rd_mode = 1;
+ }
+ } else {
+ /*
+ * There is no valid window
+ * to be able to sample data the tRP need to be widen.
+ * Very safe calculations are performed here.
+ */
+ trp_cnt = (sdr->tREA_max + board_delay_skew_max
+ + dqs_sampl_res) / clk_period;
+ ext_rd_mode = 1;
+ }
+
+ } else {
+ /* Extended read mode. */
+ u32 trh;
+
+ ext_rd_mode = 1;
+ trp_cnt = calc_cycl(sdr->tRP_min, clk_period);
+ trh = sdr->tRC_min - ((trp_cnt + 1) * clk_period);
+ if (sdr->tREH_min >= trh)
+ trh_cnt = calc_cycl(sdr->tREH_min, clk_period);
+ else
+ trh_cnt = calc_cycl(trh, clk_period);
+
+ tdvw = calc_tdvw(trp_cnt, clk_period, sdr->tRHOH_min,
+ sdr->tREA_max, ext_rd_mode);
+ /*
+ * Check if data valid window and sampling point can be found
+ * or if it is at the edge check if previous is valid
+ * - if not extend the tRP timings.
+ */
+ if (tdvw > 0) {
+ tdvw_max = calc_tdvw_max(trp_cnt, clk_period,
+ sdr->tRHOH_min,
+ board_delay_skew_min,
+ ext_rd_mode);
+
+ if ((((tdvw_max / dqs_sampl_res)
+ * dqs_sampl_res) <= tdvw_min) ||
+ (((tdvw_max % dqs_sampl_res) == 0) &&
+ (((tdvw_max / dqs_sampl_res - 1)
+ * dqs_sampl_res) <= tdvw_min))) {
+ /*
+ * Data valid window width is lower than
+ * sampling resolution and do not hit any
+ * sampling point to be sure the sampling point
+ * will be found the RE low pulse width will be
+ * extended by one clock cycle.
+ */
+ trp_cnt = trp_cnt + 1;
+ }
+ } else {
+ /*
+ * There is no valid window to be able to sample data.
+ * The tRP need to be widen.
+ * Very safe calculations are performed here.
+ */
+ trp_cnt = (sdr->tREA_max + board_delay_skew_max
+ + dqs_sampl_res) / clk_period;
+ }
+ }
+
+ tdvw_max = calc_tdvw_max(trp_cnt, clk_period,
+ sdr->tRHOH_min,
+ board_delay_skew_min, ext_rd_mode);
+
+ if (sdr->tWC_min <= clk_period &&
+ (sdr->tWP_min + if_skew) <= (clk_period / 2) &&
+ (sdr->tWH_min + if_skew) <= (clk_period / 2)) {
+ ext_wr_mode = 0;
+ } else {
+ u32 twh;
+
+ ext_wr_mode = 1;
+ twp_cnt = calc_cycl(sdr->tWP_min + if_skew, clk_period);
+ if ((twp_cnt + 1) * clk_period < (sdr->tALS_min + if_skew))
+ twp_cnt = calc_cycl(sdr->tALS_min + if_skew,
+ clk_period);
+
+ twh = (sdr->tWC_min - (twp_cnt + 1) * clk_period);
+ if (sdr->tWH_min >= twh)
+ twh = sdr->tWH_min;
+
+ twh_cnt = calc_cycl(twh + if_skew, clk_period);
+ }
+
+ reg = FIELD_PREP(ASYNC_TOGGLE_TIMINGS_TRH, trh_cnt);
+ reg |= FIELD_PREP(ASYNC_TOGGLE_TIMINGS_TRP, trp_cnt);
+ reg |= FIELD_PREP(ASYNC_TOGGLE_TIMINGS_TWH, twh_cnt);
+ reg |= FIELD_PREP(ASYNC_TOGGLE_TIMINGS_TWP, twp_cnt);
+ t->async_toggle_timings = reg;
+ dev_dbg(cdns_ctrl->dev, "ASYNC_TOGGLE_TIMINGS_SDR\t%x\n", reg);
+
+ tadl_cnt = calc_cycl((sdr->tADL_min + if_skew), clk_period);
+ tccs_cnt = calc_cycl((sdr->tCCS_min + if_skew), clk_period);
+ twhr_cnt = calc_cycl((sdr->tWHR_min + if_skew), clk_period);
+ trhw_cnt = calc_cycl((sdr->tRHW_min + if_skew), clk_period);
+ reg = FIELD_PREP(TIMINGS0_TADL, tadl_cnt);
+
+ /*
+ * If timing exceeds delay field in timing register
+ * then use maximum value.
+ */
+ if (FIELD_FIT(TIMINGS0_TCCS, tccs_cnt))
+ reg |= FIELD_PREP(TIMINGS0_TCCS, tccs_cnt);
+ else
+ reg |= TIMINGS0_TCCS;
+
+ reg |= FIELD_PREP(TIMINGS0_TWHR, twhr_cnt);
+ reg |= FIELD_PREP(TIMINGS0_TRHW, trhw_cnt);
+ t->timings0 = reg;
+ dev_dbg(cdns_ctrl->dev, "TIMINGS0_SDR\t%x\n", reg);
+
+ /* The following is related to single signal so skew is not needed. */
+ trhz_cnt = calc_cycl(sdr->tRHZ_max, clk_period);
+ trhz_cnt = trhz_cnt + 1;
+ twb_cnt = calc_cycl((sdr->tWB_max + board_delay), clk_period);
+ /*
+ * Because of the two stage syncflop the value must be increased by 3
+ * first value is related with sync, second value is related
+ * with output if delay.
+ */
+ twb_cnt = twb_cnt + 3 + 5;
+ /*
+ * The following is related to the we edge of the random data input
+ * sequence so skew is not needed.
+ */
+ tvdly_cnt = calc_cycl(500000 + if_skew, clk_period);
+ reg = FIELD_PREP(TIMINGS1_TRHZ, trhz_cnt);
+ reg |= FIELD_PREP(TIMINGS1_TWB, twb_cnt);
+ reg |= FIELD_PREP(TIMINGS1_TVDLY, tvdly_cnt);
+ t->timings1 = reg;
+ dev_dbg(cdns_ctrl->dev, "TIMINGS1_SDR\t%x\n", reg);
+
+ tfeat_cnt = calc_cycl(sdr->tFEAT_max, clk_period);
+ if (tfeat_cnt < twb_cnt)
+ tfeat_cnt = twb_cnt;
+
+ tceh_cnt = calc_cycl(sdr->tCEH_min, clk_period);
+ tcs_cnt = calc_cycl((sdr->tCS_min + if_skew), clk_period);
+
+ reg = FIELD_PREP(TIMINGS2_TFEAT, tfeat_cnt);
+ reg |= FIELD_PREP(TIMINGS2_CS_HOLD_TIME, tceh_cnt);
+ reg |= FIELD_PREP(TIMINGS2_CS_SETUP_TIME, tcs_cnt);
+ t->timings2 = reg;
+ dev_dbg(cdns_ctrl->dev, "TIMINGS2_SDR\t%x\n", reg);
+
+ if (cdns_ctrl->caps2.is_phy_type_dll) {
+ reg = DLL_PHY_CTRL_DLL_RST_N;
+ if (ext_wr_mode)
+ reg |= DLL_PHY_CTRL_EXTENDED_WR_MODE;
+ if (ext_rd_mode)
+ reg |= DLL_PHY_CTRL_EXTENDED_RD_MODE;
+
+ reg |= FIELD_PREP(DLL_PHY_CTRL_RS_HIGH_WAIT_CNT, 7);
+ reg |= FIELD_PREP(DLL_PHY_CTRL_RS_IDLE_CNT, 7);
+ t->dll_phy_ctrl = reg;
+ dev_dbg(cdns_ctrl->dev, "DLL_PHY_CTRL_SDR\t%x\n", reg);
+ }
+
+ /* Sampling point calculation. */
+ if ((tdvw_max % dqs_sampl_res) > 0)
+ sampling_point = tdvw_max / dqs_sampl_res;
+ else
+ sampling_point = (tdvw_max / dqs_sampl_res - 1);
+
+ if (sampling_point * dqs_sampl_res > tdvw_min) {
+ dll_phy_dqs_timing =
+ FIELD_PREP(PHY_DQS_TIMING_DQS_SEL_OE_END, 4);
+ dll_phy_dqs_timing |= PHY_DQS_TIMING_USE_PHONY_DQS;
+ phony_dqs_timing = sampling_point / phony_dqs_mod;
+
+ if ((sampling_point % 2) > 0) {
+ dll_phy_dqs_timing |= PHY_DQS_TIMING_PHONY_DQS_SEL;
+ if ((tdvw_max % dqs_sampl_res) == 0)
+ /*
+ * Calculation for sampling point at the edge
+ * of data and being odd number.
+ */
+ phony_dqs_timing = (tdvw_max / dqs_sampl_res)
+ / phony_dqs_mod - 1;
+
+ if (!cdns_ctrl->caps2.is_phy_type_dll)
+ phony_dqs_timing--;
+
+ } else {
+ phony_dqs_timing--;
+ }
+ rd_del_sel = phony_dqs_timing + 3;
+ } else {
+ dev_warn(cdns_ctrl->dev,
+ "ERROR : cannot find valid sampling point\n");
+ }
+
+ reg = FIELD_PREP(PHY_CTRL_PHONY_DQS, phony_dqs_timing);
+ if (cdns_ctrl->caps2.is_phy_type_dll)
+ reg |= PHY_CTRL_SDR_DQS;
+ t->phy_ctrl = reg;
+ dev_dbg(cdns_ctrl->dev, "PHY_CTRL_REG_SDR\t%x\n", reg);
+
+ if (cdns_ctrl->caps2.is_phy_type_dll) {
+ dev_dbg(cdns_ctrl->dev, "PHY_TSEL_REG_SDR\t%x\n", 0);
+ dev_dbg(cdns_ctrl->dev, "PHY_DQ_TIMING_REG_SDR\t%x\n", 2);
+ dev_dbg(cdns_ctrl->dev, "PHY_DQS_TIMING_REG_SDR\t%x\n",
+ dll_phy_dqs_timing);
+ t->phy_dqs_timing = dll_phy_dqs_timing;
+
+ reg = FIELD_PREP(PHY_GATE_LPBK_CTRL_RDS, rd_del_sel);
+ dev_dbg(cdns_ctrl->dev, "PHY_GATE_LPBK_CTRL_REG_SDR\t%x\n",
+ reg);
+ t->phy_gate_lpbk_ctrl = reg;
+
+ dev_dbg(cdns_ctrl->dev, "PHY_DLL_MASTER_CTRL_REG_SDR\t%lx\n",
+ PHY_DLL_MASTER_CTRL_BYPASS_MODE);
+ dev_dbg(cdns_ctrl->dev, "PHY_DLL_SLAVE_CTRL_REG_SDR\t%x\n", 0);
+ }
+
+ return 0;
+}
+
+int cadence_nand_attach_chip(struct nand_chip *chip)
+{
+ struct cdns_nand_ctrl *cdns_ctrl = to_cdns_nand_ctrl(chip->controller);
+ struct cdns_nand_chip *cdns_chip = to_cdns_nand_chip(chip);
+ u32 ecc_size = cdns_chip->sector_count * chip->ecc.bytes;
+ struct mtd_info *mtd = nand_to_mtd(chip);
+ u32 max_oob_data_size;
+ int ret;
+
+ if (chip->options & NAND_BUSWIDTH_16) {
+ ret = cadence_nand_set_access_width16(cdns_ctrl, true);
+ if (ret)
+ return ret;
+ }
+
+ chip->bbt_options |= NAND_BBT_USE_FLASH;
+ chip->bbt_options |= NAND_BBT_NO_OOB;
+ chip->ecc.mode = NAND_ECC_HW;
+
+ chip->options |= NAND_NO_SUBPAGE_WRITE;
+
+ cdns_chip->bbm_offs = chip->badblockpos;
+ if (chip->options & NAND_BUSWIDTH_16) {
+ cdns_chip->bbm_offs &= ~0x01;
+ cdns_chip->bbm_len = 2;
+ } else {
+ cdns_chip->bbm_len = 1;
+ }
+
+ ret = nand_ecc_choose_conf(chip,
+ &cdns_ctrl->ecc_caps,
+ mtd->oobsize - cdns_chip->bbm_len);
+ if (ret) {
+ dev_err(cdns_ctrl->dev, "ECC configuration failed\n");
+ return ret;
+ }
+
+ dev_dbg(cdns_ctrl->dev,
+ "chosen ECC settings: step=%d, strength=%d, bytes=%d\n",
+ chip->ecc.size, chip->ecc.strength, chip->ecc.bytes);
+
+ /* Error correction configuration. */
+ cdns_chip->sector_size = chip->ecc.size;
+ cdns_chip->sector_count = mtd->writesize / cdns_chip->sector_size;
+
+ cdns_chip->avail_oob_size = mtd->oobsize - ecc_size;
+
+ max_oob_data_size = MAX_OOB_SIZE_PER_SECTOR;
+
+ if (cdns_chip->avail_oob_size > max_oob_data_size)
+ cdns_chip->avail_oob_size = max_oob_data_size;
+
+ if ((cdns_chip->avail_oob_size + cdns_chip->bbm_len + ecc_size)
+ > mtd->oobsize)
+ cdns_chip->avail_oob_size -= 4;
+
+ ret = cadence_nand_get_ecc_strength_idx(cdns_ctrl, chip->ecc.strength);
+ if (ret < 0)
+ return -EINVAL;
+
+ cdns_chip->corr_str_idx = (u8)ret;
+
+ if (cadence_nand_wait_for_value(cdns_ctrl, CTRL_STATUS,
+ 1000000,
+ CTRL_STATUS_CTRL_BUSY, true))
+ return -ETIMEDOUT;
+
+ cadence_nand_set_ecc_strength(cdns_ctrl,
+ cdns_chip->corr_str_idx);
+
+ cadence_nand_set_erase_detection(cdns_ctrl, true,
+ chip->ecc.strength);
+
+ /* Override the default read operations. */
+ chip->ecc.read_page = cadence_nand_read_page;
+ chip->ecc.read_page_raw = cadence_nand_read_page_raw;
+ chip->ecc.write_page = cadence_nand_write_page;
+ chip->ecc.write_page_raw = cadence_nand_write_page_raw;
+ chip->ecc.read_oob = cadence_nand_read_oob;
+ chip->ecc.write_oob = cadence_nand_write_oob;
+ chip->ecc.read_oob_raw = cadence_nand_read_oob_raw;
+ chip->ecc.write_oob_raw = cadence_nand_write_oob_raw;
+
+ if ((mtd->writesize + mtd->oobsize) > cdns_ctrl->buf_size)
+ cdns_ctrl->buf_size = mtd->writesize + mtd->oobsize;
+
+ /* Is 32-bit DMA supported? */
+ ret = dma_set_mask(cdns_ctrl->dev, DMA_BIT_MASK(32));
+ if (ret) {
+ dev_err(cdns_ctrl->dev, "no usable DMA configuration\n");
+ return ret;
+ }
+
+ mtd_set_ooblayout(mtd, &cadence_nand_ooblayout_ops);
+
+ return 0;
+}
+
+static const struct nand_controller_ops cadence_nand_controller_ops = {
+ .attach_chip = cadence_nand_attach_chip,
+ .exec_op = cadence_nand_exec_op,
+ .setup_data_interface = cadence_nand_setup_data_interface,
+};
+
+static int cadence_nand_chip_init(struct cdns_nand_ctrl *cdns_ctrl,
+ struct device_node *np)
+{
+ struct cdns_nand_chip *cdns_chip;
+ struct mtd_info *mtd;
+ struct nand_chip *chip;
+ int nsels, ret, i;
+ u32 cs;
+
+ nsels = of_property_count_elems_of_size(np, "reg", sizeof(u32));
+ if (nsels <= 0) {
+ dev_err(cdns_ctrl->dev, "missing/invalid reg property\n");
+ return -EINVAL;
+ }
+
+ /* Allocate the nand chip structure. */
+ cdns_chip = devm_kzalloc(cdns_ctrl->dev, sizeof(*cdns_chip) +
+ (nsels * sizeof(u8)),
+ GFP_KERNEL);
+ if (!cdns_chip) {
+ dev_err(cdns_ctrl->dev, "could not allocate chip structure\n");
+ return -ENOMEM;
+ }
+
+ cdns_chip->nsels = nsels;
+
+ for (i = 0; i < nsels; i++) {
+ /* Retrieve CS id. */
+ ret = of_property_read_u32_index(np, "reg", i, &cs);
+ if (ret) {
+ dev_err(cdns_ctrl->dev,
+ "could not retrieve reg property: %d\n",
+ ret);
+ return ret;
+ }
+
+ if (cs >= cdns_ctrl->caps2.max_banks) {
+ dev_err(cdns_ctrl->dev,
+ "invalid reg value: %u (max CS = %d)\n",
+ cs, cdns_ctrl->caps2.max_banks);
+ return -EINVAL;
+ }
+
+ if (test_and_set_bit(cs, &cdns_ctrl->assigned_cs)) {
+ dev_err(cdns_ctrl->dev,
+ "CS %d already assigned\n", cs);
+ return -EINVAL;
+ }
+
+ cdns_chip->cs[i] = cs;
+ }
+
+ chip = &cdns_chip->chip;
+ chip->controller = &cdns_ctrl->controller;
+ nand_set_flash_node(chip, np);
+
+ mtd = nand_to_mtd(chip);
+ mtd->dev.parent = cdns_ctrl->dev;
+
+ /*
+ * Default to HW ECC engine mode. If the nand-ecc-mode property is given
+ * in the DT node, this entry will be overwritten in nand_scan_ident().
+ */
+ chip->ecc.mode = NAND_ECC_HW;
+
+ ret = nand_scan(chip, cdns_chip->nsels);
+ if (ret) {
+ dev_err(cdns_ctrl->dev, "could not scan the nand chip\n");
+ return ret;
+ }
+
+ ret = mtd_device_register(mtd, NULL, 0);
+ if (ret) {
+ dev_err(cdns_ctrl->dev,
+ "failed to register mtd device: %d\n", ret);
+ nand_cleanup(chip);
+ return ret;
+ }
+
+ list_add_tail(&cdns_chip->node, &cdns_ctrl->chips);
+
+ return 0;
+}
+
+static void cadence_nand_chips_cleanup(struct cdns_nand_ctrl *cdns_ctrl)
+{
+ struct cdns_nand_chip *entry, *temp;
+
+ list_for_each_entry_safe(entry, temp, &cdns_ctrl->chips, node) {
+ nand_release(&entry->chip);
+ list_del(&entry->node);
+ }
+}
+
+static int cadence_nand_chips_init(struct cdns_nand_ctrl *cdns_ctrl)
+{
+ struct device_node *np = cdns_ctrl->dev->of_node;
+ struct device_node *nand_np;
+ int max_cs = cdns_ctrl->caps2.max_banks;
+ int nchips, ret;
+
+ nchips = of_get_child_count(np);
+
+ if (nchips > max_cs) {
+ dev_err(cdns_ctrl->dev,
+ "too many NAND chips: %d (max = %d CS)\n",
+ nchips, max_cs);
+ return -EINVAL;
+ }
+
+ for_each_child_of_node(np, nand_np) {
+ ret = cadence_nand_chip_init(cdns_ctrl, nand_np);
+ if (ret) {
+ of_node_put(nand_np);
+ cadence_nand_chips_cleanup(cdns_ctrl);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void
+cadence_nand_irq_cleanup(int irqnum, struct cdns_nand_ctrl *cdns_ctrl)
+{
+ /* Disable interrupts. */
+ writel_relaxed(INTR_ENABLE_INTR_EN, cdns_ctrl->reg + INTR_ENABLE);
+}
+
+static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl)
+{
+ dma_cap_mask_t mask;
+ int ret;
+
+ cdns_ctrl->cdma_desc = dma_alloc_coherent(cdns_ctrl->dev,
+ sizeof(*cdns_ctrl->cdma_desc),
+ &cdns_ctrl->dma_cdma_desc,
+ GFP_KERNEL);
+ if (!cdns_ctrl->dma_cdma_desc)
+ return -ENOMEM;
+
+ cdns_ctrl->buf_size = SZ_16K;
+ cdns_ctrl->buf = kmalloc(cdns_ctrl->buf_size, GFP_KERNEL);
+ if (!cdns_ctrl->buf) {
+ ret = -ENOMEM;
+ goto free_buf_desc;
+ }
+
+ if (devm_request_irq(cdns_ctrl->dev, cdns_ctrl->irq, cadence_nand_isr,
+ IRQF_SHARED, "cadence-nand-controller",
+ cdns_ctrl)) {
+ dev_err(cdns_ctrl->dev, "Unable to allocate IRQ\n");
+ ret = -ENODEV;
+ goto free_buf;
+ }
+
+ spin_lock_init(&cdns_ctrl->irq_lock);
+ init_completion(&cdns_ctrl->complete);
+
+ ret = cadence_nand_hw_init(cdns_ctrl);
+ if (ret)
+ goto disable_irq;
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_MEMCPY, mask);
+
+ if (cdns_ctrl->caps1->has_dma) {
+ cdns_ctrl->dmac = dma_request_channel(mask, NULL, NULL);
+ if (!cdns_ctrl->dmac) {
+ dev_err(cdns_ctrl->dev,
+ "Unable to get a DMA channel\n");
+ ret = -EBUSY;
+ goto disable_irq;
+ }
+ }
+
+ nand_controller_init(&cdns_ctrl->controller);
+ INIT_LIST_HEAD(&cdns_ctrl->chips);
+
+ cdns_ctrl->controller.ops = &cadence_nand_controller_ops;
+ cdns_ctrl->curr_corr_str_idx = 0xFF;
+
+ ret = cadence_nand_chips_init(cdns_ctrl);
+ if (ret) {
+ dev_err(cdns_ctrl->dev, "Failed to register MTD: %d\n",
+ ret);
+ goto dma_release_chnl;
+ }
+
+ kfree(cdns_ctrl->buf);
+ cdns_ctrl->buf = kzalloc(cdns_ctrl->buf_size, GFP_KERNEL);
+ if (!cdns_ctrl->buf) {
+ ret = -ENOMEM;
+ goto dma_release_chnl;
+ }
+
+ return 0;
+
+dma_release_chnl:
+ if (cdns_ctrl->dmac)
+ dma_release_channel(cdns_ctrl->dmac);
+
+disable_irq:
+ cadence_nand_irq_cleanup(cdns_ctrl->irq, cdns_ctrl);
+
+free_buf:
+ kfree(cdns_ctrl->buf);
+
+free_buf_desc:
+ dma_free_coherent(cdns_ctrl->dev, sizeof(struct cadence_nand_cdma_desc),
+ cdns_ctrl->cdma_desc, cdns_ctrl->dma_cdma_desc);
+
+ return ret;
+}
+
+/* Driver exit point. */
+static void cadence_nand_remove(struct cdns_nand_ctrl *cdns_ctrl)
+{
+ cadence_nand_chips_cleanup(cdns_ctrl);
+ cadence_nand_irq_cleanup(cdns_ctrl->irq, cdns_ctrl);
+ kfree(cdns_ctrl->buf);
+ dma_free_coherent(cdns_ctrl->dev, sizeof(struct cadence_nand_cdma_desc),
+ cdns_ctrl->cdma_desc, cdns_ctrl->dma_cdma_desc);
+
+ if (cdns_ctrl->dmac)
+ dma_release_channel(cdns_ctrl->dmac);
+}
+
+struct cadence_nand_dt {
+ struct cdns_nand_ctrl cdns_ctrl;
+ struct clk *clk;
+};
+
+static const struct cadence_nand_dt_devdata cadence_nand_default = {
+ .if_skew = 0,
+ .has_dma = 1,
+};
+
+static const struct of_device_id cadence_nand_dt_ids[] = {
+ {
+ .compatible = "cdns,hp-nfc",
+ .data = &cadence_nand_default
+ }, {}
+};
+
+MODULE_DEVICE_TABLE(of, cadence_nand_dt_ids);
+
+static int cadence_nand_dt_probe(struct platform_device *ofdev)
+{
+ struct resource *res;
+ struct cadence_nand_dt *dt;
+ struct cdns_nand_ctrl *cdns_ctrl;
+ int ret;
+ const struct of_device_id *of_id;
+ const struct cadence_nand_dt_devdata *devdata;
+ u32 val;
+
+ of_id = of_match_device(cadence_nand_dt_ids, &ofdev->dev);
+ if (of_id) {
+ ofdev->id_entry = of_id->data;
+ devdata = of_id->data;
+ } else {
+ pr_err("Failed to find the right device id.\n");
+ return -ENOMEM;
+ }
+
+ dt = devm_kzalloc(&ofdev->dev, sizeof(*dt), GFP_KERNEL);
+ if (!dt)
+ return -ENOMEM;
+
+ cdns_ctrl = &dt->cdns_ctrl;
+ cdns_ctrl->caps1 = devdata;
+
+ cdns_ctrl->dev = &ofdev->dev;
+ cdns_ctrl->irq = platform_get_irq(ofdev, 0);
+ if (cdns_ctrl->irq < 0)
+ return cdns_ctrl->irq;
+
+ dev_info(cdns_ctrl->dev, "IRQ: nr %d\n", cdns_ctrl->irq);
+
+ cdns_ctrl->reg = devm_platform_ioremap_resource(ofdev, 0);
+ if (IS_ERR(cdns_ctrl->reg)) {
+ dev_err(&ofdev->dev, "devm_ioremap_resource res 0 failed\n");
+ return PTR_ERR(cdns_ctrl->reg);
+ }
+
+ res = platform_get_resource(ofdev, IORESOURCE_MEM, 1);
+ cdns_ctrl->io.dma = res->start;
+ cdns_ctrl->io.virt = devm_ioremap_resource(&ofdev->dev, res);
+ if (IS_ERR(cdns_ctrl->io.virt)) {
+ dev_err(cdns_ctrl->dev, "devm_ioremap_resource res 1 failed\n");
+ return PTR_ERR(cdns_ctrl->io.virt);
+ }
+
+ dt->clk = devm_clk_get(cdns_ctrl->dev, "nf_clk");
+ if (IS_ERR(dt->clk))
+ return PTR_ERR(dt->clk);
+
+ cdns_ctrl->nf_clk_rate = clk_get_rate(dt->clk);
+
+ ret = of_property_read_u32(ofdev->dev.of_node,
+ "cdns,board-delay-ps", &val);
+ if (ret) {
+ val = 4830;
+ dev_info(cdns_ctrl->dev,
+ "missing cdns,board-delay-ps property, %d was set\n",
+ val);
+ }
+ cdns_ctrl->board_delay = val;
+
+ ret = cadence_nand_init(cdns_ctrl);
+ if (ret)
+ return ret;
+
+ platform_set_drvdata(ofdev, dt);
+ return 0;
+}
+
+static int cadence_nand_dt_remove(struct platform_device *ofdev)
+{
+ struct cadence_nand_dt *dt = platform_get_drvdata(ofdev);
+
+ cadence_nand_remove(&dt->cdns_ctrl);
+
+ return 0;
+}
+
+static struct platform_driver cadence_nand_dt_driver = {
+ .probe = cadence_nand_dt_probe,
+ .remove = cadence_nand_dt_remove,
+ .driver = {
+ .name = "cadence-nand-controller",
+ .of_match_table = cadence_nand_dt_ids,
+ },
+};
+
+module_platform_driver(cadence_nand_dt_driver);
+
+MODULE_AUTHOR("Piotr Sroka <piotrs@cadence.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Driver for Cadence NAND flash controller");
+
diff --git a/drivers/mtd/nand/raw/denali_dt.c b/drivers/mtd/nand/raw/denali_dt.c
index 5e14836f6bd5..8b779a899dcf 100644
--- a/drivers/mtd/nand/raw/denali_dt.c
+++ b/drivers/mtd/nand/raw/denali_dt.c
@@ -102,47 +102,6 @@ static int denali_dt_chip_init(struct denali_controller *denali,
return denali_chip_init(denali, dchip);
}
-/* Backward compatibility for old platforms */
-static int denali_dt_legacy_chip_init(struct denali_controller *denali)
-{
- struct denali_chip *dchip;
- int nsels, i;
-
- nsels = denali->nbanks;
-
- dchip = devm_kzalloc(denali->dev, struct_size(dchip, sels, nsels),
- GFP_KERNEL);
- if (!dchip)
- return -ENOMEM;
-
- dchip->nsels = nsels;
-
- for (i = 0; i < nsels; i++)
- dchip->sels[i].bank = i;
-
- nand_set_flash_node(&dchip->chip, denali->dev->of_node);
-
- return denali_chip_init(denali, dchip);
-}
-
-/*
- * Check the DT binding.
- * The new binding expects chip subnodes in the controller node.
- * So, #address-cells = <1>; #size-cells = <0>; are required.
- * Check the #size-cells to distinguish the binding.
- */
-static bool denali_dt_is_legacy_binding(struct device_node *np)
-{
- u32 cells;
- int ret;
-
- ret = of_property_read_u32(np, "#size-cells", &cells);
- if (ret)
- return true;
-
- return cells != 0;
-}
-
static int denali_dt_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
@@ -167,10 +126,8 @@ static int denali_dt_probe(struct platform_device *pdev)
denali->dev = dev;
denali->irq = platform_get_irq(pdev, 0);
- if (denali->irq < 0) {
- dev_err(dev, "no irq defined\n");
+ if (denali->irq < 0)
return denali->irq;
- }
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "denali_reg");
denali->reg = devm_ioremap_resource(dev, res);
@@ -213,17 +170,11 @@ static int denali_dt_probe(struct platform_device *pdev)
if (ret)
goto out_disable_clk_ecc;
- if (denali_dt_is_legacy_binding(dev->of_node)) {
- ret = denali_dt_legacy_chip_init(denali);
- if (ret)
+ for_each_child_of_node(dev->of_node, np) {
+ ret = denali_dt_chip_init(denali, np);
+ if (ret) {
+ of_node_put(np);
goto out_remove_denali;
- } else {
- for_each_child_of_node(dev->of_node, np) {
- ret = denali_dt_chip_init(denali, np);
- if (ret) {
- of_node_put(np);
- goto out_remove_denali;
- }
}
}
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index 6a4626a8bf95..0b48be54ba6f 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -751,10 +751,8 @@ static int hisi_nfc_probe(struct platform_device *pdev)
mtd = nand_to_mtd(chip);
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(dev, "no IRQ resource defined\n");
+ if (irq < 0)
return -ENXIO;
- }
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
host->iobase = devm_ioremap_resource(dev, res);
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 78b31f845c50..241b58b83240 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -773,7 +773,6 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
host->irq = platform_get_irq(pdev, 0);
if (host->irq < 0) {
- dev_err(&pdev->dev, "failed to get platform irq\n");
res = -EINVAL;
goto release_dma_chan;
}
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index fc49e13d81ec..fb5abdcfb007 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -2862,10 +2862,8 @@ static int marvell_nfc_probe(struct platform_device *pdev)
return PTR_ERR(nfc->regs);
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(dev, "failed to retrieve irq\n");
+ if (irq < 0)
return irq;
- }
nfc->core_clk = devm_clk_get(&pdev->dev, "core");
diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c
index 1b82b687e5a5..9f17b5b8efbf 100644
--- a/drivers/mtd/nand/raw/meson_nand.c
+++ b/drivers/mtd/nand/raw/meson_nand.c
@@ -1399,10 +1399,8 @@ static int meson_nfc_probe(struct platform_device *pdev)
}
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(dev, "no NFC IRQ resource\n");
+ if (irq < 0)
return -EINVAL;
- }
ret = meson_nfc_clk_init(nfc);
if (ret) {
diff --git a/drivers/mtd/nand/raw/mtk_ecc.c b/drivers/mtd/nand/raw/mtk_ecc.c
index 74595b644b7c..75f1fa3d4d35 100644
--- a/drivers/mtd/nand/raw/mtk_ecc.c
+++ b/drivers/mtd/nand/raw/mtk_ecc.c
@@ -527,10 +527,8 @@ static int mtk_ecc_probe(struct platform_device *pdev)
}
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(dev, "failed to get irq: %d\n", irq);
+ if (irq < 0)
return irq;
- }
ret = dma_set_mask(dev, DMA_BIT_MASK(32));
if (ret) {
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index 373d47d1ba4c..b8305e39ab51 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -1540,7 +1540,6 @@ static int mtk_nfc_probe(struct platform_device *pdev)
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
- dev_err(dev, "no nfi irq resource\n");
ret = -EINVAL;
goto clk_disable;
}
diff --git a/drivers/mtd/nand/raw/mxic_nand.c b/drivers/mtd/nand/raw/mxic_nand.c
index 9d49e6c845e1..ed7a4e021bf5 100644
--- a/drivers/mtd/nand/raw/mxic_nand.c
+++ b/drivers/mtd/nand/raw/mxic_nand.c
@@ -524,10 +524,8 @@ static int mxic_nfc_probe(struct platform_device *pdev)
nand_chip->controller = &nfc->controller;
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(&pdev->dev, "failed to retrieve irq\n");
+ if (irq < 0)
return irq;
- }
mxic_nfc_hw_init(nfc);
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 5c2c30a7dffa..f64e3b6605c6 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -292,12 +292,16 @@ int nand_bbm_get_next_page(struct nand_chip *chip, int page)
struct mtd_info *mtd = nand_to_mtd(chip);
int last_page = ((mtd->erasesize - mtd->writesize) >>
chip->page_shift) & chip->pagemask;
+ unsigned int bbm_flags = NAND_BBM_FIRSTPAGE | NAND_BBM_SECONDPAGE
+ | NAND_BBM_LASTPAGE;
+ if (page == 0 && !(chip->options & bbm_flags))
+ return 0;
if (page == 0 && chip->options & NAND_BBM_FIRSTPAGE)
return 0;
- else if (page <= 1 && chip->options & NAND_BBM_SECONDPAGE)
+ if (page <= 1 && chip->options & NAND_BBM_SECONDPAGE)
return 1;
- else if (page <= last_page && chip->options & NAND_BBM_LASTPAGE)
+ if (page <= last_page && chip->options & NAND_BBM_LASTPAGE)
return last_page;
return -EINVAL;
diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c
index 8ca9fad6e6ad..56654030ec7f 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -446,8 +446,10 @@ static int micron_nand_init(struct nand_chip *chip)
if (ret)
goto err_free_manuf_data;
+ chip->options |= NAND_BBM_FIRSTPAGE;
+
if (mtd->writesize == 2048)
- chip->options |= NAND_BBM_FIRSTPAGE | NAND_BBM_SECONDPAGE;
+ chip->options |= NAND_BBM_SECONDPAGE;
ondie = micron_supports_on_die_ecc(chip);
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index 6ec65f48501c..ad77c112a78a 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -1967,10 +1967,8 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
case NAND_OMAP_PREFETCH_IRQ:
info->gpmc_irq_fifo = platform_get_irq(info->pdev, 0);
- if (info->gpmc_irq_fifo <= 0) {
- dev_err(dev, "Error getting fifo IRQ\n");
+ if (info->gpmc_irq_fifo <= 0)
return -ENODEV;
- }
err = devm_request_irq(dev, info->gpmc_irq_fifo,
omap_nand_irq, IRQF_SHARED,
"gpmc-nand-fifo", info);
@@ -1982,10 +1980,8 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
}
info->gpmc_irq_count = platform_get_irq(info->pdev, 1);
- if (info->gpmc_irq_count <= 0) {
- dev_err(dev, "Error getting IRQ count\n");
+ if (info->gpmc_irq_count <= 0)
return -ENODEV;
- }
err = devm_request_irq(dev, info->gpmc_irq_count,
omap_nand_irq, IRQF_SHARED,
"gpmc-nand-count", info);
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index e509c93737c4..058e99d0cbcf 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -1129,10 +1129,8 @@ static int flctl_probe(struct platform_device *pdev)
flctl->fifo = res->start + 0x24; /* FLDTFIFO */
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(&pdev->dev, "failed to get flste irq data: %d\n", irq);
+ if (irq < 0)
return irq;
- }
ret = devm_request_irq(&pdev->dev, irq, flctl_handle_flste, IRQF_SHARED,
"flste", flctl);
diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c
index 8cc852dc7d54..9e63800f768a 100644
--- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c
+++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c
@@ -1880,11 +1880,8 @@ static int stm32_fmc2_probe(struct platform_device *pdev)
}
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- if (irq != -EPROBE_DEFER)
- dev_err(dev, "IRQ error missing or invalid\n");
+ if (irq < 0)
return irq;
- }
ret = devm_request_irq(dev, irq, stm32_fmc2_irq, 0,
dev_name(dev), fmc2);
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 89773293c64d..37a4ac0dd85b 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -2071,10 +2071,8 @@ static int sunxi_nfc_probe(struct platform_device *pdev)
return PTR_ERR(nfc->regs);
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(dev, "failed to retrieve irq\n");
+ if (irq < 0)
return irq;
- }
nfc->ahb_clk = devm_clk_get(dev, "ahb");
if (IS_ERR(nfc->ahb_clk)) {
diff --git a/drivers/mtd/spi-nor/aspeed-smc.c b/drivers/mtd/spi-nor/aspeed-smc.c
index 009c1da8574c..2b7cabbb680c 100644
--- a/drivers/mtd/spi-nor/aspeed-smc.c
+++ b/drivers/mtd/spi-nor/aspeed-smc.c
@@ -320,7 +320,8 @@ static void aspeed_smc_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
mutex_unlock(&chip->controller->mutex);
}
-static int aspeed_smc_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
+static int aspeed_smc_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
+ size_t len)
{
struct aspeed_smc_chip *chip = nor->priv;
@@ -331,8 +332,8 @@ static int aspeed_smc_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
return 0;
}
-static int aspeed_smc_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
- int len)
+static int aspeed_smc_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf,
+ size_t len)
{
struct aspeed_smc_chip *chip = nor->priv;
@@ -746,6 +747,15 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip)
return 0;
}
+static const struct spi_nor_controller_ops aspeed_smc_controller_ops = {
+ .prepare = aspeed_smc_prep,
+ .unprepare = aspeed_smc_unprep,
+ .read_reg = aspeed_smc_read_reg,
+ .write_reg = aspeed_smc_write_reg,
+ .read = aspeed_smc_read_user,
+ .write = aspeed_smc_write_user,
+};
+
static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
struct device_node *np, struct resource *r)
{
@@ -805,12 +815,7 @@ static int aspeed_smc_setup_flash(struct aspeed_smc_controller *controller,
nor->dev = dev;
nor->priv = chip;
spi_nor_set_flash_node(nor, child);
- nor->read = aspeed_smc_read_user;
- nor->write = aspeed_smc_write_user;
- nor->read_reg = aspeed_smc_read_reg;
- nor->write_reg = aspeed_smc_write_reg;
- nor->prepare = aspeed_smc_prep;
- nor->unprepare = aspeed_smc_unprep;
+ nor->controller_ops = &aspeed_smc_controller_ops;
ret = aspeed_smc_chip_setup_init(chip, r);
if (ret)
diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c
index 7bef63947b29..06f997247d0f 100644
--- a/drivers/mtd/spi-nor/cadence-quadspi.c
+++ b/drivers/mtd/spi-nor/cadence-quadspi.c
@@ -285,7 +285,7 @@ static irqreturn_t cqspi_irq_handler(int this_irq, void *dev)
return IRQ_HANDLED;
}
-static unsigned int cqspi_calc_rdreg(struct spi_nor *nor, const u8 opcode)
+static unsigned int cqspi_calc_rdreg(struct spi_nor *nor)
{
struct cqspi_flash_pdata *f_pdata = nor->priv;
u32 rdreg = 0;
@@ -354,27 +354,27 @@ static int cqspi_exec_flash_cmd(struct cqspi_st *cqspi, unsigned int reg)
return cqspi_wait_idle(cqspi);
}
-static int cqspi_command_read(struct spi_nor *nor,
- const u8 *txbuf, const unsigned n_tx,
- u8 *rxbuf, const unsigned n_rx)
+static int cqspi_command_read(struct spi_nor *nor, u8 opcode,
+ u8 *rxbuf, size_t n_rx)
{
struct cqspi_flash_pdata *f_pdata = nor->priv;
struct cqspi_st *cqspi = f_pdata->cqspi;
void __iomem *reg_base = cqspi->iobase;
unsigned int rdreg;
unsigned int reg;
- unsigned int read_len;
+ size_t read_len;
int status;
if (!n_rx || n_rx > CQSPI_STIG_DATA_LEN_MAX || !rxbuf) {
- dev_err(nor->dev, "Invalid input argument, len %d rxbuf 0x%p\n",
+ dev_err(nor->dev,
+ "Invalid input argument, len %zu rxbuf 0x%p\n",
n_rx, rxbuf);
return -EINVAL;
}
- reg = txbuf[0] << CQSPI_REG_CMDCTRL_OPCODE_LSB;
+ reg = opcode << CQSPI_REG_CMDCTRL_OPCODE_LSB;
- rdreg = cqspi_calc_rdreg(nor, txbuf[0]);
+ rdreg = cqspi_calc_rdreg(nor);
writel(rdreg, reg_base + CQSPI_REG_RD_INSTR);
reg |= (0x1 << CQSPI_REG_CMDCTRL_RD_EN_LSB);
@@ -404,19 +404,19 @@ static int cqspi_command_read(struct spi_nor *nor,
}
static int cqspi_command_write(struct spi_nor *nor, const u8 opcode,
- const u8 *txbuf, const unsigned n_tx)
+ const u8 *txbuf, size_t n_tx)
{
struct cqspi_flash_pdata *f_pdata = nor->priv;
struct cqspi_st *cqspi = f_pdata->cqspi;
void __iomem *reg_base = cqspi->iobase;
unsigned int reg;
unsigned int data;
- u32 write_len;
+ size_t write_len;
int ret;
if (n_tx > CQSPI_STIG_DATA_LEN_MAX || (n_tx && !txbuf)) {
dev_err(nor->dev,
- "Invalid input argument, cmdlen %d txbuf 0x%p\n",
+ "Invalid input argument, cmdlen %zu txbuf 0x%p\n",
n_tx, txbuf);
return -EINVAL;
}
@@ -470,7 +470,7 @@ static int cqspi_read_setup(struct spi_nor *nor)
unsigned int reg;
reg = nor->read_opcode << CQSPI_REG_RD_INSTR_OPCODE_LSB;
- reg |= cqspi_calc_rdreg(nor, nor->read_opcode);
+ reg |= cqspi_calc_rdreg(nor);
/* Setup dummy clock cycles */
dummy_clk = nor->read_dummy;
@@ -603,7 +603,7 @@ static int cqspi_write_setup(struct spi_nor *nor)
/* Set opcode. */
reg = nor->program_opcode << CQSPI_REG_WR_INSTR_OPCODE_LSB;
writel(reg, reg_base + CQSPI_REG_WR_INSTR);
- reg = cqspi_calc_rdreg(nor, nor->program_opcode);
+ reg = cqspi_calc_rdreg(nor);
writel(reg, reg_base + CQSPI_REG_RD_INSTR);
reg = readl(reg_base + CQSPI_REG_SIZE);
@@ -1050,7 +1050,7 @@ static int cqspi_erase(struct spi_nor *nor, loff_t offs)
return ret;
/* Send write enable, then erase commands. */
- ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0);
+ ret = nor->controller_ops->write_reg(nor, SPINOR_OP_WREN, NULL, 0);
if (ret)
return ret;
@@ -1080,18 +1080,19 @@ static void cqspi_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
mutex_unlock(&cqspi->bus_mutex);
}
-static int cqspi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
+static int cqspi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, size_t len)
{
int ret;
ret = cqspi_set_protocol(nor, 0);
if (!ret)
- ret = cqspi_command_read(nor, &opcode, 1, buf, len);
+ ret = cqspi_command_read(nor, opcode, buf, len);
return ret;
}
-static int cqspi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
+static int cqspi_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf,
+ size_t len)
{
int ret;
@@ -1216,6 +1217,16 @@ static void cqspi_request_mmap_dma(struct cqspi_st *cqspi)
init_completion(&cqspi->rx_dma_complete);
}
+static const struct spi_nor_controller_ops cqspi_controller_ops = {
+ .prepare = cqspi_prep,
+ .unprepare = cqspi_unprep,
+ .read_reg = cqspi_read_reg,
+ .write_reg = cqspi_write_reg,
+ .read = cqspi_read,
+ .write = cqspi_write,
+ .erase = cqspi_erase,
+};
+
static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np)
{
struct platform_device *pdev = cqspi->pdev;
@@ -1265,14 +1276,7 @@ static int cqspi_setup_flash(struct cqspi_st *cqspi, struct device_node *np)
nor->dev = dev;
spi_nor_set_flash_node(nor, np);
nor->priv = f_pdata;
-
- nor->read_reg = cqspi_read_reg;
- nor->write_reg = cqspi_write_reg;
- nor->read = cqspi_read;
- nor->write = cqspi_write;
- nor->erase = cqspi_erase;
- nor->prepare = cqspi_prep;
- nor->unprepare = cqspi_unprep;
+ nor->controller_ops = &cqspi_controller_ops;
mtd->name = devm_kasprintf(dev, GFP_KERNEL, "%s.%d",
dev_name(dev), cs);
@@ -1366,10 +1370,8 @@ static int cqspi_probe(struct platform_device *pdev)
/* Obtain IRQ line. */
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(dev, "Cannot obtain IRQ.\n");
+ if (irq < 0)
return -ENXIO;
- }
pm_runtime_enable(dev);
ret = pm_runtime_get_sync(dev);
diff --git a/drivers/mtd/spi-nor/hisi-sfc.c b/drivers/mtd/spi-nor/hisi-sfc.c
index 6dac9dd8bf42..a1258216f89d 100644
--- a/drivers/mtd/spi-nor/hisi-sfc.c
+++ b/drivers/mtd/spi-nor/hisi-sfc.c
@@ -177,7 +177,7 @@ static void hisi_spi_nor_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
}
static int hisi_spi_nor_op_reg(struct spi_nor *nor,
- u8 opcode, int len, u8 optype)
+ u8 opcode, size_t len, u8 optype)
{
struct hifmc_priv *priv = nor->priv;
struct hifmc_host *host = priv->host;
@@ -200,7 +200,7 @@ static int hisi_spi_nor_op_reg(struct spi_nor *nor,
}
static int hisi_spi_nor_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
- int len)
+ size_t len)
{
struct hifmc_priv *priv = nor->priv;
struct hifmc_host *host = priv->host;
@@ -215,7 +215,7 @@ static int hisi_spi_nor_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
}
static int hisi_spi_nor_write_reg(struct spi_nor *nor, u8 opcode,
- u8 *buf, int len)
+ const u8 *buf, size_t len)
{
struct hifmc_priv *priv = nor->priv;
struct hifmc_host *host = priv->host;
@@ -311,6 +311,15 @@ static ssize_t hisi_spi_nor_write(struct spi_nor *nor, loff_t to,
return len;
}
+static const struct spi_nor_controller_ops hisi_controller_ops = {
+ .prepare = hisi_spi_nor_prep,
+ .unprepare = hisi_spi_nor_unprep,
+ .read_reg = hisi_spi_nor_read_reg,
+ .write_reg = hisi_spi_nor_write_reg,
+ .read = hisi_spi_nor_read,
+ .write = hisi_spi_nor_write,
+};
+
/**
* Get spi flash device information and register it as a mtd device.
*/
@@ -357,14 +366,8 @@ static int hisi_spi_nor_register(struct device_node *np,
}
priv->host = host;
nor->priv = priv;
+ nor->controller_ops = &hisi_controller_ops;
- nor->prepare = hisi_spi_nor_prep;
- nor->unprepare = hisi_spi_nor_unprep;
- nor->read_reg = hisi_spi_nor_read_reg;
- nor->write_reg = hisi_spi_nor_write_reg;
- nor->read = hisi_spi_nor_read;
- nor->write = hisi_spi_nor_write;
- nor->erase = NULL;
ret = spi_nor_scan(nor, NULL, &hwcaps);
if (ret)
return ret;
diff --git a/drivers/mtd/spi-nor/intel-spi-pci.c b/drivers/mtd/spi-nor/intel-spi-pci.c
index 3cda8e7a68f8..3d8987baea2a 100644
--- a/drivers/mtd/spi-nor/intel-spi-pci.c
+++ b/drivers/mtd/spi-nor/intel-spi-pci.c
@@ -20,6 +20,10 @@ static const struct intel_spi_boardinfo bxt_info = {
.type = INTEL_SPI_BXT,
};
+static const struct intel_spi_boardinfo cnl_info = {
+ .type = INTEL_SPI_CNL,
+};
+
static int intel_spi_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
@@ -61,6 +65,7 @@ static void intel_spi_pci_remove(struct pci_dev *pdev)
static const struct pci_device_id intel_spi_pci_ids[] = {
{ PCI_VDEVICE(INTEL, 0x02a4), (unsigned long)&bxt_info },
+ { PCI_VDEVICE(INTEL, 0x06a4), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x18e0), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x19e0), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0x34a4), (unsigned long)&bxt_info },
@@ -68,6 +73,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = {
{ PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0xa1a4), (unsigned long)&bxt_info },
{ PCI_VDEVICE(INTEL, 0xa224), (unsigned long)&bxt_info },
+ { PCI_VDEVICE(INTEL, 0xa324), (unsigned long)&cnl_info },
{ },
};
MODULE_DEVICE_TABLE(pci, intel_spi_pci_ids);
diff --git a/drivers/mtd/spi-nor/intel-spi.c b/drivers/mtd/spi-nor/intel-spi.c
index 43e55a2e9b27..61d2a0ad2131 100644
--- a/drivers/mtd/spi-nor/intel-spi.c
+++ b/drivers/mtd/spi-nor/intel-spi.c
@@ -108,6 +108,10 @@
#define BXT_FREG_NUM 12
#define BXT_PR_NUM 6
+#define CNL_PR 0x84
+#define CNL_FREG_NUM 6
+#define CNL_PR_NUM 5
+
#define LVSCC 0xc4
#define UVSCC 0xc8
#define ERASE_OPCODE_SHIFT 8
@@ -187,12 +191,16 @@ static void intel_spi_dump_regs(struct intel_spi *ispi)
dev_dbg(ispi->dev, "PR(%d)=0x%08x\n", i,
readl(ispi->pregs + PR(i)));
- value = readl(ispi->sregs + SSFSTS_CTL);
- dev_dbg(ispi->dev, "SSFSTS_CTL=0x%08x\n", value);
- dev_dbg(ispi->dev, "PREOP_OPTYPE=0x%08x\n",
- readl(ispi->sregs + PREOP_OPTYPE));
- dev_dbg(ispi->dev, "OPMENU0=0x%08x\n", readl(ispi->sregs + OPMENU0));
- dev_dbg(ispi->dev, "OPMENU1=0x%08x\n", readl(ispi->sregs + OPMENU1));
+ if (ispi->sregs) {
+ value = readl(ispi->sregs + SSFSTS_CTL);
+ dev_dbg(ispi->dev, "SSFSTS_CTL=0x%08x\n", value);
+ dev_dbg(ispi->dev, "PREOP_OPTYPE=0x%08x\n",
+ readl(ispi->sregs + PREOP_OPTYPE));
+ dev_dbg(ispi->dev, "OPMENU0=0x%08x\n",
+ readl(ispi->sregs + OPMENU0));
+ dev_dbg(ispi->dev, "OPMENU1=0x%08x\n",
+ readl(ispi->sregs + OPMENU1));
+ }
if (ispi->info->type == INTEL_SPI_BYT)
dev_dbg(ispi->dev, "BCR=0x%08x\n", readl(ispi->base + BYT_BCR));
@@ -340,6 +348,13 @@ static int intel_spi_init(struct intel_spi *ispi)
ispi->erase_64k = true;
break;
+ case INTEL_SPI_CNL:
+ ispi->sregs = NULL;
+ ispi->pregs = ispi->base + CNL_PR;
+ ispi->nregions = CNL_FREG_NUM;
+ ispi->pr_num = CNL_PR_NUM;
+ break;
+
default:
return -EINVAL;
}
@@ -367,6 +382,11 @@ static int intel_spi_init(struct intel_spi *ispi)
!(uvscc & ERASE_64K_OPCODE_MASK))
ispi->erase_64k = false;
+ if (ispi->sregs == NULL && (ispi->swseq_reg || ispi->swseq_erase)) {
+ dev_err(ispi->dev, "software sequencer not supported, but required\n");
+ return -EINVAL;
+ }
+
/*
* Some controllers can only do basic operations using hardware
* sequencer. All other operations are supposed to be carried out
@@ -383,7 +403,7 @@ static int intel_spi_init(struct intel_spi *ispi)
val = readl(ispi->base + HSFSTS_CTL);
ispi->locked = !!(val & HSFSTS_CTL_FLOCKDN);
- if (ispi->locked) {
+ if (ispi->locked && ispi->sregs) {
/*
* BIOS programs allowed opcodes and then locks down the
* register. So read back what opcodes it decided to support.
@@ -426,7 +446,7 @@ static int intel_spi_opcode_index(struct intel_spi *ispi, u8 opcode, int optype)
return 0;
}
-static int intel_spi_hw_cycle(struct intel_spi *ispi, u8 opcode, int len)
+static int intel_spi_hw_cycle(struct intel_spi *ispi, u8 opcode, size_t len)
{
u32 val, status;
int ret;
@@ -469,7 +489,7 @@ static int intel_spi_hw_cycle(struct intel_spi *ispi, u8 opcode, int len)
return 0;
}
-static int intel_spi_sw_cycle(struct intel_spi *ispi, u8 opcode, int len,
+static int intel_spi_sw_cycle(struct intel_spi *ispi, u8 opcode, size_t len,
int optype)
{
u32 val = 0, status;
@@ -535,7 +555,8 @@ static int intel_spi_sw_cycle(struct intel_spi *ispi, u8 opcode, int len,
return 0;
}
-static int intel_spi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
+static int intel_spi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
+ size_t len)
{
struct intel_spi *ispi = nor->priv;
int ret;
@@ -555,7 +576,8 @@ static int intel_spi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
return intel_spi_read_block(ispi, buf, len);
}
-static int intel_spi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
+static int intel_spi_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf,
+ size_t len)
{
struct intel_spi *ispi = nor->priv;
int ret;
@@ -864,6 +886,14 @@ static void intel_spi_fill_partition(struct intel_spi *ispi,
}
}
+static const struct spi_nor_controller_ops intel_spi_controller_ops = {
+ .read_reg = intel_spi_read_reg,
+ .write_reg = intel_spi_write_reg,
+ .read = intel_spi_read,
+ .write = intel_spi_write,
+ .erase = intel_spi_erase,
+};
+
struct intel_spi *intel_spi_probe(struct device *dev,
struct resource *mem, const struct intel_spi_boardinfo *info)
{
@@ -897,11 +927,7 @@ struct intel_spi *intel_spi_probe(struct device *dev,
ispi->nor.dev = ispi->dev;
ispi->nor.priv = ispi;
- ispi->nor.read_reg = intel_spi_read_reg;
- ispi->nor.write_reg = intel_spi_write_reg;
- ispi->nor.read = intel_spi_read;
- ispi->nor.write = intel_spi_write;
- ispi->nor.erase = intel_spi_erase;
+ ispi->nor.controller_ops = &intel_spi_controller_ops;
ret = spi_nor_scan(&ispi->nor, NULL, &hwcaps);
if (ret) {
diff --git a/drivers/mtd/spi-nor/mtk-quadspi.c b/drivers/mtd/spi-nor/mtk-quadspi.c
index 34db01ab6cab..b1691680d174 100644
--- a/drivers/mtd/spi-nor/mtk-quadspi.c
+++ b/drivers/mtd/spi-nor/mtk-quadspi.c
@@ -151,9 +151,9 @@ static int mtk_nor_execute_cmd(struct mtk_nor *mtk_nor, u8 cmdval)
}
static int mtk_nor_do_tx_rx(struct mtk_nor *mtk_nor, u8 op,
- u8 *tx, int txlen, u8 *rx, int rxlen)
+ const u8 *tx, size_t txlen, u8 *rx, size_t rxlen)
{
- int len = 1 + txlen + rxlen;
+ size_t len = 1 + txlen + rxlen;
int i, ret, idx;
if (len > MTK_NOR_MAX_SHIFT)
@@ -193,7 +193,7 @@ static int mtk_nor_do_tx_rx(struct mtk_nor *mtk_nor, u8 op,
}
/* Do a WRSR (Write Status Register) command */
-static int mtk_nor_wr_sr(struct mtk_nor *mtk_nor, u8 sr)
+static int mtk_nor_wr_sr(struct mtk_nor *mtk_nor, const u8 sr)
{
writeb(sr, mtk_nor->base + MTK_NOR_PRGDATA5_REG);
writeb(8, mtk_nor->base + MTK_NOR_CNT_REG);
@@ -354,7 +354,7 @@ static ssize_t mtk_nor_write(struct spi_nor *nor, loff_t to, size_t len,
return len;
}
-static int mtk_nor_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
+static int mtk_nor_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, size_t len)
{
int ret;
struct mtk_nor *mtk_nor = nor->priv;
@@ -376,8 +376,8 @@ static int mtk_nor_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
return ret;
}
-static int mtk_nor_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
- int len)
+static int mtk_nor_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf,
+ size_t len)
{
int ret;
struct mtk_nor *mtk_nor = nor->priv;
@@ -419,6 +419,13 @@ static int mtk_nor_enable_clk(struct mtk_nor *mtk_nor)
return 0;
}
+static const struct spi_nor_controller_ops mtk_controller_ops = {
+ .read_reg = mtk_nor_read_reg,
+ .write_reg = mtk_nor_write_reg,
+ .read = mtk_nor_read,
+ .write = mtk_nor_write,
+};
+
static int mtk_nor_init(struct mtk_nor *mtk_nor,
struct device_node *flash_node)
{
@@ -438,12 +445,8 @@ static int mtk_nor_init(struct mtk_nor *mtk_nor,
nor->dev = mtk_nor->dev;
nor->priv = mtk_nor;
spi_nor_set_flash_node(nor, flash_node);
+ nor->controller_ops = &mtk_controller_ops;
- /* fill the hooks to spi nor */
- nor->read = mtk_nor_read;
- nor->read_reg = mtk_nor_read_reg;
- nor->write = mtk_nor_write;
- nor->write_reg = mtk_nor_write_reg;
nor->mtd.name = "mtk_nor";
/* initialized with NULL */
ret = spi_nor_scan(nor, NULL, &hwcaps);
diff --git a/drivers/mtd/spi-nor/nxp-spifi.c b/drivers/mtd/spi-nor/nxp-spifi.c
index 4a871587392b..9a5b1a7c636a 100644
--- a/drivers/mtd/spi-nor/nxp-spifi.c
+++ b/drivers/mtd/spi-nor/nxp-spifi.c
@@ -123,7 +123,8 @@ static int nxp_spifi_set_memory_mode_on(struct nxp_spifi *spifi)
return ret;
}
-static int nxp_spifi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
+static int nxp_spifi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf,
+ size_t len)
{
struct nxp_spifi *spifi = nor->priv;
u32 cmd;
@@ -145,7 +146,8 @@ static int nxp_spifi_read_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
return nxp_spifi_wait_for_cmd(spifi);
}
-static int nxp_spifi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len)
+static int nxp_spifi_write_reg(struct spi_nor *nor, u8 opcode, const u8 *buf,
+ size_t len)
{
struct nxp_spifi *spifi = nor->priv;
u32 cmd;
@@ -263,9 +265,18 @@ static int nxp_spifi_setup_memory_cmd(struct nxp_spifi *spifi)
static void nxp_spifi_dummy_id_read(struct spi_nor *nor)
{
u8 id[SPI_NOR_MAX_ID_LEN];
- nor->read_reg(nor, SPINOR_OP_RDID, id, SPI_NOR_MAX_ID_LEN);
+ nor->controller_ops->read_reg(nor, SPINOR_OP_RDID, id,
+ SPI_NOR_MAX_ID_LEN);
}
+static const struct spi_nor_controller_ops nxp_spifi_controller_ops = {
+ .read_reg = nxp_spifi_read_reg,
+ .write_reg = nxp_spifi_write_reg,
+ .read = nxp_spifi_read,
+ .write = nxp_spifi_write,
+ .erase = nxp_spifi_erase,
+};
+
static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
struct device_node *np)
{
@@ -332,11 +343,7 @@ static int nxp_spifi_setup_flash(struct nxp_spifi *spifi,
spifi->nor.dev = spifi->dev;
spi_nor_set_flash_node(&spifi->nor, np);
spifi->nor.priv = spifi;
- spifi->nor.read = nxp_spifi_read;
- spifi->nor.write = nxp_spifi_write;
- spifi->nor.erase = nxp_spifi_erase;
- spifi->nor.read_reg = nxp_spifi_read_reg;
- spifi->nor.write_reg = nxp_spifi_write_reg;
+ spifi->nor.controller_ops = &nxp_spifi_controller_ops;
/*
* The first read on a hard reset isn't reliable so do a
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 1d8621d43160..f4afe123e9dc 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -338,7 +338,7 @@ static ssize_t spi_nor_read_data(struct spi_nor *nor, loff_t from, size_t len,
if (nor->spimem)
return spi_nor_spimem_read_data(nor, from, len, buf);
- return nor->read(nor, from, len, buf);
+ return nor->controller_ops->read(nor, from, len, buf);
}
/**
@@ -385,239 +385,172 @@ static ssize_t spi_nor_write_data(struct spi_nor *nor, loff_t to, size_t len,
if (nor->spimem)
return spi_nor_spimem_write_data(nor, to, len, buf);
- return nor->write(nor, to, len, buf);
+ return nor->controller_ops->write(nor, to, len, buf);
}
-/*
- * Read the status register, returning its value in the location
- * Return the status register value.
- * Returns negative if error occurred.
+/**
+ * spi_nor_write_enable() - Set write enable latch with Write Enable command.
+ * @nor: pointer to 'struct spi_nor'.
+ *
+ * Return: 0 on success, -errno otherwise.
*/
-static int read_sr(struct spi_nor *nor)
+static int spi_nor_write_enable(struct spi_nor *nor)
{
int ret;
if (nor->spimem) {
struct spi_mem_op op =
- SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR, 1),
+ SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREN, 1),
SPI_MEM_OP_NO_ADDR,
SPI_MEM_OP_NO_DUMMY,
- SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+ SPI_MEM_OP_NO_DATA);
ret = spi_mem_exec_op(nor->spimem, &op);
} else {
- ret = nor->read_reg(nor, SPINOR_OP_RDSR, nor->bouncebuf, 1);
+ ret = nor->controller_ops->write_reg(nor, SPINOR_OP_WREN,
+ NULL, 0);
}
- if (ret < 0) {
- pr_err("error %d reading SR\n", (int) ret);
- return ret;
- }
+ if (ret)
+ dev_dbg(nor->dev, "error %d on Write Enable\n", ret);
- return nor->bouncebuf[0];
+ return ret;
}
-/*
- * Read the flag status register, returning its value in the location
- * Return the status register value.
- * Returns negative if error occurred.
+/**
+ * spi_nor_write_disable() - Send Write Disable instruction to the chip.
+ * @nor: pointer to 'struct spi_nor'.
+ *
+ * Return: 0 on success, -errno otherwise.
*/
-static int read_fsr(struct spi_nor *nor)
+static int spi_nor_write_disable(struct spi_nor *nor)
{
int ret;
if (nor->spimem) {
struct spi_mem_op op =
- SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDFSR, 1),
+ SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRDI, 1),
SPI_MEM_OP_NO_ADDR,
SPI_MEM_OP_NO_DUMMY,
- SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+ SPI_MEM_OP_NO_DATA);
ret = spi_mem_exec_op(nor->spimem, &op);
} else {
- ret = nor->read_reg(nor, SPINOR_OP_RDFSR, nor->bouncebuf, 1);
+ ret = nor->controller_ops->write_reg(nor, SPINOR_OP_WRDI,
+ NULL, 0);
}
- if (ret < 0) {
- pr_err("error %d reading FSR\n", ret);
- return ret;
- }
+ if (ret)
+ dev_dbg(nor->dev, "error %d on Write Disable\n", ret);
- return nor->bouncebuf[0];
+ return ret;
}
-/*
- * Read configuration register, returning its value in the
- * location. Return the configuration register value.
- * Returns negative if error occurred.
+/**
+ * spi_nor_read_sr() - Read the Status Register.
+ * @nor: pointer to 'struct spi_nor'.
+ * @sr: pointer to a DMA-able buffer where the value of the
+ * Status Register will be written.
+ *
+ * Return: 0 on success, -errno otherwise.
*/
-static int read_cr(struct spi_nor *nor)
+static int spi_nor_read_sr(struct spi_nor *nor, u8 *sr)
{
int ret;
if (nor->spimem) {
struct spi_mem_op op =
- SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDCR, 1),
+ SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR, 1),
SPI_MEM_OP_NO_ADDR,
SPI_MEM_OP_NO_DUMMY,
- SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
+ SPI_MEM_OP_DATA_IN(1, sr, 1));
ret = spi_mem_exec_op(nor->spimem, &op);
} else {
- ret = nor->read_reg(nor, SPINOR_OP_RDCR, nor->bouncebuf, 1);
+ ret = nor->controller_ops->read_reg(nor, SPINOR_OP_RDSR,
+ sr, 1);
}
- if (ret < 0) {
- dev_err(nor->dev, "error %d reading CR\n", ret);
- return ret;
- }
+ if (ret)
+ dev_dbg(nor->dev, "error %d reading SR\n", ret);
- return nor->bouncebuf[0];
+ return ret;
}
-/*
- * Write status register 1 byte
- * Returns negative if error occurred.
+/**
+ * spi_nor_read_fsr() - Read the Flag Status Register.
+ * @nor: pointer to 'struct spi_nor'
+ * @fsr: pointer to a DMA-able buffer where the value of the
+ * Flag Status Register will be written.
+ *
+ * Return: 0 on success, -errno otherwise.
*/
-static int write_sr(struct spi_nor *nor, u8 val)
+static int spi_nor_read_fsr(struct spi_nor *nor, u8 *fsr)
{
- nor->bouncebuf[0] = val;
- if (nor->spimem) {
- struct spi_mem_op op =
- SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1),
- SPI_MEM_OP_NO_ADDR,
- SPI_MEM_OP_NO_DUMMY,
- SPI_MEM_OP_DATA_IN(1, nor->bouncebuf, 1));
-
- return spi_mem_exec_op(nor->spimem, &op);
- }
-
- return nor->write_reg(nor, SPINOR_OP_WRSR, nor->bouncebuf, 1);
-}
+ int ret;
-/*
- * Set write enable latch with Write Enable command.
- * Returns negative if error occurred.
- */
-static int write_enable(struct spi_nor *nor)
-{
if (nor->spimem) {
struct spi_mem_op op =
- SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WREN, 1),
+ SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDFSR, 1),
SPI_MEM_OP_NO_ADDR,
SPI_MEM_OP_NO_DUMMY,
- SPI_MEM_OP_NO_DATA);
+ SPI_MEM_OP_DATA_IN(1, fsr, 1));
- return spi_mem_exec_op(nor->spimem, &op);
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->read_reg(nor, SPINOR_OP_RDFSR,
+ fsr, 1);
}
- return nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0);
+ if (ret)
+ dev_dbg(nor->dev, "error %d reading FSR\n", ret);
+
+ return ret;
}
-/*
- * Send write disable instruction to the chip.
+/**
+ * spi_nor_read_cr() - Read the Configuration Register using the
+ * SPINOR_OP_RDCR (35h) command.
+ * @nor: pointer to 'struct spi_nor'
+ * @cr: pointer to a DMA-able buffer where the value of the
+ * Configuration Register will be written.
+ *
+ * Return: 0 on success, -errno otherwise.
*/
-static int write_disable(struct spi_nor *nor)
+static int spi_nor_read_cr(struct spi_nor *nor, u8 *cr)
{
+ int ret;
+
if (nor->spimem) {
struct spi_mem_op op =
- SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRDI, 1),
+ SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDCR, 1),
SPI_MEM_OP_NO_ADDR,
SPI_MEM_OP_NO_DUMMY,
- SPI_MEM_OP_NO_DATA);
+ SPI_MEM_OP_DATA_IN(1, cr, 1));
- return spi_mem_exec_op(nor->spimem, &op);
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->read_reg(nor, SPINOR_OP_RDCR, cr, 1);
}
- return nor->write_reg(nor, SPINOR_OP_WRDI, NULL, 0);
-}
-
-static struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
-{
- return mtd->priv;
-}
-
-
-static u8 spi_nor_convert_opcode(u8 opcode, const u8 table[][2], size_t size)
-{
- size_t i;
-
- for (i = 0; i < size; i++)
- if (table[i][0] == opcode)
- return table[i][1];
-
- /* No conversion found, keep input op code. */
- return opcode;
-}
-
-static u8 spi_nor_convert_3to4_read(u8 opcode)
-{
- static const u8 spi_nor_3to4_read[][2] = {
- { SPINOR_OP_READ, SPINOR_OP_READ_4B },
- { SPINOR_OP_READ_FAST, SPINOR_OP_READ_FAST_4B },
- { SPINOR_OP_READ_1_1_2, SPINOR_OP_READ_1_1_2_4B },
- { SPINOR_OP_READ_1_2_2, SPINOR_OP_READ_1_2_2_4B },
- { SPINOR_OP_READ_1_1_4, SPINOR_OP_READ_1_1_4_4B },
- { SPINOR_OP_READ_1_4_4, SPINOR_OP_READ_1_4_4_4B },
- { SPINOR_OP_READ_1_1_8, SPINOR_OP_READ_1_1_8_4B },
- { SPINOR_OP_READ_1_8_8, SPINOR_OP_READ_1_8_8_4B },
-
- { SPINOR_OP_READ_1_1_1_DTR, SPINOR_OP_READ_1_1_1_DTR_4B },
- { SPINOR_OP_READ_1_2_2_DTR, SPINOR_OP_READ_1_2_2_DTR_4B },
- { SPINOR_OP_READ_1_4_4_DTR, SPINOR_OP_READ_1_4_4_DTR_4B },
- };
-
- return spi_nor_convert_opcode(opcode, spi_nor_3to4_read,
- ARRAY_SIZE(spi_nor_3to4_read));
-}
-
-static u8 spi_nor_convert_3to4_program(u8 opcode)
-{
- static const u8 spi_nor_3to4_program[][2] = {
- { SPINOR_OP_PP, SPINOR_OP_PP_4B },
- { SPINOR_OP_PP_1_1_4, SPINOR_OP_PP_1_1_4_4B },
- { SPINOR_OP_PP_1_4_4, SPINOR_OP_PP_1_4_4_4B },
- { SPINOR_OP_PP_1_1_8, SPINOR_OP_PP_1_1_8_4B },
- { SPINOR_OP_PP_1_8_8, SPINOR_OP_PP_1_8_8_4B },
- };
-
- return spi_nor_convert_opcode(opcode, spi_nor_3to4_program,
- ARRAY_SIZE(spi_nor_3to4_program));
-}
-
-static u8 spi_nor_convert_3to4_erase(u8 opcode)
-{
- static const u8 spi_nor_3to4_erase[][2] = {
- { SPINOR_OP_BE_4K, SPINOR_OP_BE_4K_4B },
- { SPINOR_OP_BE_32K, SPINOR_OP_BE_32K_4B },
- { SPINOR_OP_SE, SPINOR_OP_SE_4B },
- };
-
- return spi_nor_convert_opcode(opcode, spi_nor_3to4_erase,
- ARRAY_SIZE(spi_nor_3to4_erase));
-}
-
-static void spi_nor_set_4byte_opcodes(struct spi_nor *nor)
-{
- nor->read_opcode = spi_nor_convert_3to4_read(nor->read_opcode);
- nor->program_opcode = spi_nor_convert_3to4_program(nor->program_opcode);
- nor->erase_opcode = spi_nor_convert_3to4_erase(nor->erase_opcode);
-
- if (!spi_nor_has_uniform_erase(nor)) {
- struct spi_nor_erase_map *map = &nor->params.erase_map;
- struct spi_nor_erase_type *erase;
- int i;
+ if (ret)
+ dev_dbg(nor->dev, "error %d reading CR\n", ret);
- for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++) {
- erase = &map->erase_type[i];
- erase->opcode =
- spi_nor_convert_3to4_erase(erase->opcode);
- }
- }
+ return ret;
}
+/**
+ * macronix_set_4byte() - Set 4-byte address mode for Macronix flashes.
+ * @nor: pointer to 'struct spi_nor'.
+ * @enable: true to enter the 4-byte address mode, false to exit the 4-byte
+ * address mode.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int macronix_set_4byte(struct spi_nor *nor, bool enable)
{
+ int ret;
+
if (nor->spimem) {
struct spi_mem_op op =
SPI_MEM_OP(SPI_MEM_OP_CMD(enable ?
@@ -628,26 +561,55 @@ static int macronix_set_4byte(struct spi_nor *nor, bool enable)
SPI_MEM_OP_NO_DUMMY,
SPI_MEM_OP_NO_DATA);
- return spi_mem_exec_op(nor->spimem, &op);
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->write_reg(nor,
+ enable ? SPINOR_OP_EN4B :
+ SPINOR_OP_EX4B,
+ NULL, 0);
}
- return nor->write_reg(nor, enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B,
- NULL, 0);
+ if (ret)
+ dev_dbg(nor->dev, "error %d setting 4-byte mode\n", ret);
+
+ return ret;
}
+/**
+ * st_micron_set_4byte() - Set 4-byte address mode for ST and Micron flashes.
+ * @nor: pointer to 'struct spi_nor'.
+ * @enable: true to enter the 4-byte address mode, false to exit the 4-byte
+ * address mode.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int st_micron_set_4byte(struct spi_nor *nor, bool enable)
{
int ret;
- write_enable(nor);
+ ret = spi_nor_write_enable(nor);
+ if (ret)
+ return ret;
+
ret = macronix_set_4byte(nor, enable);
- write_disable(nor);
+ if (ret)
+ return ret;
- return ret;
+ return spi_nor_write_disable(nor);
}
+/**
+ * spansion_set_4byte() - Set 4-byte address mode for Spansion flashes.
+ * @nor: pointer to 'struct spi_nor'.
+ * @enable: true to enter the 4-byte address mode, false to exit the 4-byte
+ * address mode.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int spansion_set_4byte(struct spi_nor *nor, bool enable)
{
+ int ret;
+
nor->bouncebuf[0] = enable << 7;
if (nor->spimem) {
@@ -657,14 +619,29 @@ static int spansion_set_4byte(struct spi_nor *nor, bool enable)
SPI_MEM_OP_NO_DUMMY,
SPI_MEM_OP_DATA_OUT(1, nor->bouncebuf, 1));
- return spi_mem_exec_op(nor->spimem, &op);
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->write_reg(nor, SPINOR_OP_BRWR,
+ nor->bouncebuf, 1);
}
- return nor->write_reg(nor, SPINOR_OP_BRWR, nor->bouncebuf, 1);
+ if (ret)
+ dev_dbg(nor->dev, "error %d setting 4-byte mode\n", ret);
+
+ return ret;
}
+/**
+ * spi_nor_write_ear() - Write Extended Address Register.
+ * @nor: pointer to 'struct spi_nor'.
+ * @ear: value to write to the Extended Address Register.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int spi_nor_write_ear(struct spi_nor *nor, u8 ear)
{
+ int ret;
+
nor->bouncebuf[0] = ear;
if (nor->spimem) {
@@ -674,12 +651,26 @@ static int spi_nor_write_ear(struct spi_nor *nor, u8 ear)
SPI_MEM_OP_NO_DUMMY,
SPI_MEM_OP_DATA_OUT(1, nor->bouncebuf, 1));
- return spi_mem_exec_op(nor->spimem, &op);
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->write_reg(nor, SPINOR_OP_WREAR,
+ nor->bouncebuf, 1);
}
- return nor->write_reg(nor, SPINOR_OP_WREAR, nor->bouncebuf, 1);
+ if (ret)
+ dev_dbg(nor->dev, "error %d writing EAR\n", ret);
+
+ return ret;
}
+/**
+ * winbond_set_4byte() - Set 4-byte address mode for Winbond flashes.
+ * @nor: pointer to 'struct spi_nor'.
+ * @enable: true to enter the 4-byte address mode, false to exit the 4-byte
+ * address mode.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int winbond_set_4byte(struct spi_nor *nor, bool enable)
{
int ret;
@@ -693,15 +684,29 @@ static int winbond_set_4byte(struct spi_nor *nor, bool enable)
* Register to be set to 1, so all 3-byte-address reads come from the
* second 16M. We must clear the register to enable normal behavior.
*/
- write_enable(nor);
+ ret = spi_nor_write_enable(nor);
+ if (ret)
+ return ret;
+
ret = spi_nor_write_ear(nor, 0);
- write_disable(nor);
+ if (ret)
+ return ret;
- return ret;
+ return spi_nor_write_disable(nor);
}
+/**
+ * spi_nor_xread_sr() - Read the Status Register on S3AN flashes.
+ * @nor: pointer to 'struct spi_nor'.
+ * @sr: pointer to a DMA-able buffer where the value of the
+ * Status Register will be written.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int spi_nor_xread_sr(struct spi_nor *nor, u8 *sr)
{
+ int ret;
+
if (nor->spimem) {
struct spi_mem_op op =
SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_XRDSR, 1),
@@ -709,27 +714,44 @@ static int spi_nor_xread_sr(struct spi_nor *nor, u8 *sr)
SPI_MEM_OP_NO_DUMMY,
SPI_MEM_OP_DATA_IN(1, sr, 1));
- return spi_mem_exec_op(nor->spimem, &op);
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->read_reg(nor, SPINOR_OP_XRDSR,
+ sr, 1);
}
- return nor->read_reg(nor, SPINOR_OP_XRDSR, sr, 1);
+ if (ret)
+ dev_dbg(nor->dev, "error %d reading XRDSR\n", ret);
+
+ return ret;
}
+/**
+ * s3an_sr_ready() - Query the Status Register of the S3AN flash to see if the
+ * flash is ready for new commands.
+ * @nor: pointer to 'struct spi_nor'.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int s3an_sr_ready(struct spi_nor *nor)
{
int ret;
ret = spi_nor_xread_sr(nor, nor->bouncebuf);
- if (ret < 0) {
- dev_err(nor->dev, "error %d reading XRDSR\n", (int) ret);
+ if (ret)
return ret;
- }
return !!(nor->bouncebuf[0] & XSR_RDY);
}
-static int spi_nor_clear_sr(struct spi_nor *nor)
+/**
+ * spi_nor_clear_sr() - Clear the Status Register.
+ * @nor: pointer to 'struct spi_nor'.
+ */
+static void spi_nor_clear_sr(struct spi_nor *nor)
{
+ int ret;
+
if (nor->spimem) {
struct spi_mem_op op =
SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CLSR, 1),
@@ -737,20 +759,33 @@ static int spi_nor_clear_sr(struct spi_nor *nor)
SPI_MEM_OP_NO_DUMMY,
SPI_MEM_OP_NO_DATA);
- return spi_mem_exec_op(nor->spimem, &op);
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->write_reg(nor, SPINOR_OP_CLSR,
+ NULL, 0);
}
- return nor->write_reg(nor, SPINOR_OP_CLSR, NULL, 0);
+ if (ret)
+ dev_dbg(nor->dev, "error %d clearing SR\n", ret);
}
+/**
+ * spi_nor_sr_ready() - Query the Status Register to see if the flash is ready
+ * for new commands.
+ * @nor: pointer to 'struct spi_nor'.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int spi_nor_sr_ready(struct spi_nor *nor)
{
- int sr = read_sr(nor);
- if (sr < 0)
- return sr;
+ int ret = spi_nor_read_sr(nor, nor->bouncebuf);
+
+ if (ret)
+ return ret;
- if (nor->flags & SNOR_F_USE_CLSR && sr & (SR_E_ERR | SR_P_ERR)) {
- if (sr & SR_E_ERR)
+ if (nor->flags & SNOR_F_USE_CLSR &&
+ nor->bouncebuf[0] & (SR_E_ERR | SR_P_ERR)) {
+ if (nor->bouncebuf[0] & SR_E_ERR)
dev_err(nor->dev, "Erase Error occurred\n");
else
dev_err(nor->dev, "Programming Error occurred\n");
@@ -759,11 +794,17 @@ static int spi_nor_sr_ready(struct spi_nor *nor)
return -EIO;
}
- return !(sr & SR_WIP);
+ return !(nor->bouncebuf[0] & SR_WIP);
}
-static int spi_nor_clear_fsr(struct spi_nor *nor)
+/**
+ * spi_nor_clear_fsr() - Clear the Flag Status Register.
+ * @nor: pointer to 'struct spi_nor'.
+ */
+static void spi_nor_clear_fsr(struct spi_nor *nor)
{
+ int ret;
+
if (nor->spimem) {
struct spi_mem_op op =
SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_CLFSR, 1),
@@ -771,25 +812,37 @@ static int spi_nor_clear_fsr(struct spi_nor *nor)
SPI_MEM_OP_NO_DUMMY,
SPI_MEM_OP_NO_DATA);
- return spi_mem_exec_op(nor->spimem, &op);
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->write_reg(nor, SPINOR_OP_CLFSR,
+ NULL, 0);
}
- return nor->write_reg(nor, SPINOR_OP_CLFSR, NULL, 0);
+ if (ret)
+ dev_dbg(nor->dev, "error %d clearing FSR\n", ret);
}
+/**
+ * spi_nor_fsr_ready() - Query the Flag Status Register to see if the flash is
+ * ready for new commands.
+ * @nor: pointer to 'struct spi_nor'.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int spi_nor_fsr_ready(struct spi_nor *nor)
{
- int fsr = read_fsr(nor);
- if (fsr < 0)
- return fsr;
+ int ret = spi_nor_read_fsr(nor, nor->bouncebuf);
- if (fsr & (FSR_E_ERR | FSR_P_ERR)) {
- if (fsr & FSR_E_ERR)
+ if (ret)
+ return ret;
+
+ if (nor->bouncebuf[0] & (FSR_E_ERR | FSR_P_ERR)) {
+ if (nor->bouncebuf[0] & FSR_E_ERR)
dev_err(nor->dev, "Erase operation failed.\n");
else
dev_err(nor->dev, "Program operation failed.\n");
- if (fsr & FSR_PT_ERR)
+ if (nor->bouncebuf[0] & FSR_PT_ERR)
dev_err(nor->dev,
"Attempted to modify a protected sector.\n");
@@ -797,9 +850,15 @@ static int spi_nor_fsr_ready(struct spi_nor *nor)
return -EIO;
}
- return fsr & FSR_READY;
+ return nor->bouncebuf[0] & FSR_READY;
}
+/**
+ * spi_nor_ready() - Query the flash to see if it is ready for new commands.
+ * @nor: pointer to 'struct spi_nor'.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int spi_nor_ready(struct spi_nor *nor)
{
int sr, fsr;
@@ -816,9 +875,13 @@ static int spi_nor_ready(struct spi_nor *nor)
return sr && fsr;
}
-/*
- * Service routine to read status register until ready, or timeout occurs.
- * Returns non-zero if error.
+/**
+ * spi_nor_wait_till_ready_with_timeout() - Service routine to read the
+ * Status Register until ready, or timeout occurs.
+ * @nor: pointer to "struct spi_nor".
+ * @timeout_jiffies: jiffies to wait until timeout.
+ *
+ * Return: 0 on success, -errno otherwise.
*/
static int spi_nor_wait_till_ready_with_timeout(struct spi_nor *nor,
unsigned long timeout_jiffies)
@@ -841,24 +904,305 @@ static int spi_nor_wait_till_ready_with_timeout(struct spi_nor *nor,
cond_resched();
}
- dev_err(nor->dev, "flash operation timed out\n");
+ dev_dbg(nor->dev, "flash operation timed out\n");
return -ETIMEDOUT;
}
+/**
+ * spi_nor_wait_till_ready() - Wait for a predefined amount of time for the
+ * flash to be ready, or timeout occurs.
+ * @nor: pointer to "struct spi_nor".
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
static int spi_nor_wait_till_ready(struct spi_nor *nor)
{
return spi_nor_wait_till_ready_with_timeout(nor,
DEFAULT_READY_WAIT_JIFFIES);
}
-/*
- * Erase the whole flash memory
+/**
+ * spi_nor_write_sr() - Write the Status Register.
+ * @nor: pointer to 'struct spi_nor'.
+ * @sr: pointer to DMA-able buffer to write to the Status Register.
+ * @len: number of bytes to write to the Status Register.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_write_sr(struct spi_nor *nor, const u8 *sr, size_t len)
+{
+ int ret;
+
+ ret = spi_nor_write_enable(nor);
+ if (ret)
+ return ret;
+
+ if (nor->spimem) {
+ struct spi_mem_op op =
+ SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1),
+ SPI_MEM_OP_NO_ADDR,
+ SPI_MEM_OP_NO_DUMMY,
+ SPI_MEM_OP_DATA_OUT(len, sr, 1));
+
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->write_reg(nor, SPINOR_OP_WRSR,
+ sr, len);
+ }
+
+ if (ret) {
+ dev_dbg(nor->dev, "error %d writing SR\n", ret);
+ return ret;
+ }
+
+ return spi_nor_wait_till_ready(nor);
+}
+
+/**
+ * spi_nor_write_sr1_and_check() - Write one byte to the Status Register 1 and
+ * ensure that the byte written match the received value.
+ * @nor: pointer to a 'struct spi_nor'.
+ * @sr1: byte value to be written to the Status Register.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_write_sr1_and_check(struct spi_nor *nor, u8 sr1)
+{
+ int ret;
+
+ nor->bouncebuf[0] = sr1;
+
+ ret = spi_nor_write_sr(nor, nor->bouncebuf, 1);
+ if (ret)
+ return ret;
+
+ ret = spi_nor_read_sr(nor, nor->bouncebuf);
+ if (ret)
+ return ret;
+
+ if (nor->bouncebuf[0] != sr1) {
+ dev_dbg(nor->dev, "SR1: read back test failed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * spi_nor_write_16bit_sr_and_check() - Write the Status Register 1 and the
+ * Status Register 2 in one shot. Ensure that the byte written in the Status
+ * Register 1 match the received value, and that the 16-bit Write did not
+ * affect what was already in the Status Register 2.
+ * @nor: pointer to a 'struct spi_nor'.
+ * @sr1: byte value to be written to the Status Register 1.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_write_16bit_sr_and_check(struct spi_nor *nor, u8 sr1)
+{
+ int ret;
+ u8 *sr_cr = nor->bouncebuf;
+ u8 cr_written;
+
+ /* Make sure we don't overwrite the contents of Status Register 2. */
+ if (!(nor->flags & SNOR_F_NO_READ_CR)) {
+ ret = spi_nor_read_cr(nor, &sr_cr[1]);
+ if (ret)
+ return ret;
+ } else if (nor->params.quad_enable) {
+ /*
+ * If the Status Register 2 Read command (35h) is not
+ * supported, we should at least be sure we don't
+ * change the value of the SR2 Quad Enable bit.
+ *
+ * We can safely assume that when the Quad Enable method is
+ * set, the value of the QE bit is one, as a consequence of the
+ * nor->params.quad_enable() call.
+ *
+ * We can safely assume that the Quad Enable bit is present in
+ * the Status Register 2 at BIT(1). According to the JESD216
+ * revB standard, BFPT DWORDS[15], bits 22:20, the 16-bit
+ * Write Status (01h) command is available just for the cases
+ * in which the QE bit is described in SR2 at BIT(1).
+ */
+ sr_cr[1] = SR2_QUAD_EN_BIT1;
+ } else {
+ sr_cr[1] = 0;
+ }
+
+ sr_cr[0] = sr1;
+
+ ret = spi_nor_write_sr(nor, sr_cr, 2);
+ if (ret)
+ return ret;
+
+ if (nor->flags & SNOR_F_NO_READ_CR)
+ return 0;
+
+ cr_written = sr_cr[1];
+
+ ret = spi_nor_read_cr(nor, &sr_cr[1]);
+ if (ret)
+ return ret;
+
+ if (cr_written != sr_cr[1]) {
+ dev_dbg(nor->dev, "CR: read back test failed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * spi_nor_write_16bit_cr_and_check() - Write the Status Register 1 and the
+ * Configuration Register in one shot. Ensure that the byte written in the
+ * Configuration Register match the received value, and that the 16-bit Write
+ * did not affect what was already in the Status Register 1.
+ * @nor: pointer to a 'struct spi_nor'.
+ * @cr: byte value to be written to the Configuration Register.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_write_16bit_cr_and_check(struct spi_nor *nor, u8 cr)
+{
+ int ret;
+ u8 *sr_cr = nor->bouncebuf;
+ u8 sr_written;
+
+ /* Keep the current value of the Status Register 1. */
+ ret = spi_nor_read_sr(nor, sr_cr);
+ if (ret)
+ return ret;
+
+ sr_cr[1] = cr;
+
+ ret = spi_nor_write_sr(nor, sr_cr, 2);
+ if (ret)
+ return ret;
+
+ sr_written = sr_cr[0];
+
+ ret = spi_nor_read_sr(nor, sr_cr);
+ if (ret)
+ return ret;
+
+ if (sr_written != sr_cr[0]) {
+ dev_dbg(nor->dev, "SR: Read back test failed\n");
+ return -EIO;
+ }
+
+ if (nor->flags & SNOR_F_NO_READ_CR)
+ return 0;
+
+ ret = spi_nor_read_cr(nor, &sr_cr[1]);
+ if (ret)
+ return ret;
+
+ if (cr != sr_cr[1]) {
+ dev_dbg(nor->dev, "CR: read back test failed\n");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/**
+ * spi_nor_write_sr_and_check() - Write the Status Register 1 and ensure that
+ * the byte written match the received value without affecting other bits in the
+ * Status Register 1 and 2.
+ * @nor: pointer to a 'struct spi_nor'.
+ * @sr1: byte value to be written to the Status Register.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_write_sr_and_check(struct spi_nor *nor, u8 sr1)
+{
+ if (nor->flags & SNOR_F_HAS_16BIT_SR)
+ return spi_nor_write_16bit_sr_and_check(nor, sr1);
+
+ return spi_nor_write_sr1_and_check(nor, sr1);
+}
+
+/**
+ * spi_nor_write_sr2() - Write the Status Register 2 using the
+ * SPINOR_OP_WRSR2 (3eh) command.
+ * @nor: pointer to 'struct spi_nor'.
+ * @sr2: pointer to DMA-able buffer to write to the Status Register 2.
*
- * Returns 0 if successful, non-zero otherwise.
+ * Return: 0 on success, -errno otherwise.
*/
-static int erase_chip(struct spi_nor *nor)
+static int spi_nor_write_sr2(struct spi_nor *nor, const u8 *sr2)
{
+ int ret;
+
+ ret = spi_nor_write_enable(nor);
+ if (ret)
+ return ret;
+
+ if (nor->spimem) {
+ struct spi_mem_op op =
+ SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR2, 1),
+ SPI_MEM_OP_NO_ADDR,
+ SPI_MEM_OP_NO_DUMMY,
+ SPI_MEM_OP_DATA_OUT(1, sr2, 1));
+
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->write_reg(nor, SPINOR_OP_WRSR2,
+ sr2, 1);
+ }
+
+ if (ret) {
+ dev_dbg(nor->dev, "error %d writing SR2\n", ret);
+ return ret;
+ }
+
+ return spi_nor_wait_till_ready(nor);
+}
+
+/**
+ * spi_nor_read_sr2() - Read the Status Register 2 using the
+ * SPINOR_OP_RDSR2 (3fh) command.
+ * @nor: pointer to 'struct spi_nor'.
+ * @sr2: pointer to DMA-able buffer where the value of the
+ * Status Register 2 will be written.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_read_sr2(struct spi_nor *nor, u8 *sr2)
+{
+ int ret;
+
+ if (nor->spimem) {
+ struct spi_mem_op op =
+ SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR2, 1),
+ SPI_MEM_OP_NO_ADDR,
+ SPI_MEM_OP_NO_DUMMY,
+ SPI_MEM_OP_DATA_IN(1, sr2, 1));
+
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->read_reg(nor, SPINOR_OP_RDSR2,
+ sr2, 1);
+ }
+
+ if (ret)
+ dev_dbg(nor->dev, "error %d reading SR2\n", ret);
+
+ return ret;
+}
+
+/**
+ * spi_nor_erase_chip() - Erase the entire flash memory.
+ * @nor: pointer to 'struct spi_nor'.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_erase_chip(struct spi_nor *nor)
+{
+ int ret;
+
dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd.size >> 10));
if (nor->spimem) {
@@ -868,10 +1212,99 @@ static int erase_chip(struct spi_nor *nor)
SPI_MEM_OP_NO_DUMMY,
SPI_MEM_OP_NO_DATA);
- return spi_mem_exec_op(nor->spimem, &op);
+ ret = spi_mem_exec_op(nor->spimem, &op);
+ } else {
+ ret = nor->controller_ops->write_reg(nor, SPINOR_OP_CHIP_ERASE,
+ NULL, 0);
}
- return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0);
+ if (ret)
+ dev_dbg(nor->dev, "error %d erasing chip\n", ret);
+
+ return ret;
+}
+
+static struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
+{
+ return mtd->priv;
+}
+
+static u8 spi_nor_convert_opcode(u8 opcode, const u8 table[][2], size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ if (table[i][0] == opcode)
+ return table[i][1];
+
+ /* No conversion found, keep input op code. */
+ return opcode;
+}
+
+static u8 spi_nor_convert_3to4_read(u8 opcode)
+{
+ static const u8 spi_nor_3to4_read[][2] = {
+ { SPINOR_OP_READ, SPINOR_OP_READ_4B },
+ { SPINOR_OP_READ_FAST, SPINOR_OP_READ_FAST_4B },
+ { SPINOR_OP_READ_1_1_2, SPINOR_OP_READ_1_1_2_4B },
+ { SPINOR_OP_READ_1_2_2, SPINOR_OP_READ_1_2_2_4B },
+ { SPINOR_OP_READ_1_1_4, SPINOR_OP_READ_1_1_4_4B },
+ { SPINOR_OP_READ_1_4_4, SPINOR_OP_READ_1_4_4_4B },
+ { SPINOR_OP_READ_1_1_8, SPINOR_OP_READ_1_1_8_4B },
+ { SPINOR_OP_READ_1_8_8, SPINOR_OP_READ_1_8_8_4B },
+
+ { SPINOR_OP_READ_1_1_1_DTR, SPINOR_OP_READ_1_1_1_DTR_4B },
+ { SPINOR_OP_READ_1_2_2_DTR, SPINOR_OP_READ_1_2_2_DTR_4B },
+ { SPINOR_OP_READ_1_4_4_DTR, SPINOR_OP_READ_1_4_4_DTR_4B },
+ };
+
+ return spi_nor_convert_opcode(opcode, spi_nor_3to4_read,
+ ARRAY_SIZE(spi_nor_3to4_read));
+}
+
+static u8 spi_nor_convert_3to4_program(u8 opcode)
+{
+ static const u8 spi_nor_3to4_program[][2] = {
+ { SPINOR_OP_PP, SPINOR_OP_PP_4B },
+ { SPINOR_OP_PP_1_1_4, SPINOR_OP_PP_1_1_4_4B },
+ { SPINOR_OP_PP_1_4_4, SPINOR_OP_PP_1_4_4_4B },
+ { SPINOR_OP_PP_1_1_8, SPINOR_OP_PP_1_1_8_4B },
+ { SPINOR_OP_PP_1_8_8, SPINOR_OP_PP_1_8_8_4B },
+ };
+
+ return spi_nor_convert_opcode(opcode, spi_nor_3to4_program,
+ ARRAY_SIZE(spi_nor_3to4_program));
+}
+
+static u8 spi_nor_convert_3to4_erase(u8 opcode)
+{
+ static const u8 spi_nor_3to4_erase[][2] = {
+ { SPINOR_OP_BE_4K, SPINOR_OP_BE_4K_4B },
+ { SPINOR_OP_BE_32K, SPINOR_OP_BE_32K_4B },
+ { SPINOR_OP_SE, SPINOR_OP_SE_4B },
+ };
+
+ return spi_nor_convert_opcode(opcode, spi_nor_3to4_erase,
+ ARRAY_SIZE(spi_nor_3to4_erase));
+}
+
+static void spi_nor_set_4byte_opcodes(struct spi_nor *nor)
+{
+ nor->read_opcode = spi_nor_convert_3to4_read(nor->read_opcode);
+ nor->program_opcode = spi_nor_convert_3to4_program(nor->program_opcode);
+ nor->erase_opcode = spi_nor_convert_3to4_erase(nor->erase_opcode);
+
+ if (!spi_nor_has_uniform_erase(nor)) {
+ struct spi_nor_erase_map *map = &nor->params.erase_map;
+ struct spi_nor_erase_type *erase;
+ int i;
+
+ for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++) {
+ erase = &map->erase_type[i];
+ erase->opcode =
+ spi_nor_convert_3to4_erase(erase->opcode);
+ }
+ }
}
static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops)
@@ -880,10 +1313,9 @@ static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops)
mutex_lock(&nor->lock);
- if (nor->prepare) {
- ret = nor->prepare(nor, ops);
+ if (nor->controller_ops && nor->controller_ops->prepare) {
+ ret = nor->controller_ops->prepare(nor, ops);
if (ret) {
- dev_err(nor->dev, "failed in the preparation.\n");
mutex_unlock(&nor->lock);
return ret;
}
@@ -893,8 +1325,8 @@ static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops)
static void spi_nor_unlock_and_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
{
- if (nor->unprepare)
- nor->unprepare(nor, ops);
+ if (nor->controller_ops && nor->controller_ops->unprepare)
+ nor->controller_ops->unprepare(nor, ops);
mutex_unlock(&nor->lock);
}
@@ -935,9 +1367,6 @@ static int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
addr = spi_nor_convert_addr(nor, addr);
- if (nor->erase)
- return nor->erase(nor, addr);
-
if (nor->spimem) {
struct spi_mem_op op =
SPI_MEM_OP(SPI_MEM_OP_CMD(nor->erase_opcode, 1),
@@ -946,6 +1375,8 @@ static int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
SPI_MEM_OP_NO_DATA);
return spi_mem_exec_op(nor->spimem, &op);
+ } else if (nor->controller_ops->erase) {
+ return nor->controller_ops->erase(nor, addr);
}
/*
@@ -957,8 +1388,8 @@ static int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
addr >>= 8;
}
- return nor->write_reg(nor, nor->erase_opcode, nor->bouncebuf,
- nor->addr_width);
+ return nor->controller_ops->write_reg(nor, nor->erase_opcode,
+ nor->bouncebuf, nor->addr_width);
}
/**
@@ -1208,7 +1639,9 @@ static int spi_nor_erase_multi_sectors(struct spi_nor *nor, u64 addr, u32 len)
list_for_each_entry_safe(cmd, next, &erase_list, list) {
nor->erase_opcode = cmd->opcode;
while (cmd->count) {
- write_enable(nor);
+ ret = spi_nor_write_enable(nor);
+ if (ret)
+ goto destroy_erase_cmd_list;
ret = spi_nor_erase_sector(nor, addr);
if (ret)
@@ -1263,12 +1696,13 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
if (len == mtd->size && !(nor->flags & SNOR_F_NO_OP_CHIP_ERASE)) {
unsigned long timeout;
- write_enable(nor);
+ ret = spi_nor_write_enable(nor);
+ if (ret)
+ goto erase_err;
- if (erase_chip(nor)) {
- ret = -EIO;
+ ret = spi_nor_erase_chip(nor);
+ if (ret)
goto erase_err;
- }
/*
* Scale the timeout linearly with the size of the flash, with
@@ -1291,7 +1725,9 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
/* "sector"-at-a-time erase */
} else if (spi_nor_has_uniform_erase(nor)) {
while (len) {
- write_enable(nor);
+ ret = spi_nor_write_enable(nor);
+ if (ret)
+ goto erase_err;
ret = spi_nor_erase_sector(nor, addr);
if (ret)
@@ -1312,7 +1748,7 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
goto erase_err;
}
- write_disable(nor);
+ ret = spi_nor_write_disable(nor);
erase_err:
spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE);
@@ -1320,27 +1756,6 @@ erase_err:
return ret;
}
-/* Write status register and ensure bits in mask match written values */
-static int write_sr_and_check(struct spi_nor *nor, u8 status_new, u8 mask)
-{
- int ret;
-
- write_enable(nor);
- ret = write_sr(nor, status_new);
- if (ret)
- return ret;
-
- ret = spi_nor_wait_till_ready(nor);
- if (ret)
- return ret;
-
- ret = read_sr(nor);
- if (ret < 0)
- return ret;
-
- return ((ret & mask) != (status_new & mask)) ? -EIO : 0;
-}
-
static void stm_get_locked_range(struct spi_nor *nor, u8 sr, loff_t *ofs,
uint64_t *len)
{
@@ -1433,16 +1848,18 @@ static int stm_is_unlocked_sr(struct spi_nor *nor, loff_t ofs, uint64_t len,
static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
{
struct mtd_info *mtd = &nor->mtd;
- int status_old, status_new;
+ int ret, status_old, status_new;
u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
u8 shift = ffs(mask) - 1, pow, val;
loff_t lock_len;
bool can_be_top = true, can_be_bottom = nor->flags & SNOR_F_HAS_SR_TB;
bool use_top;
- status_old = read_sr(nor);
- if (status_old < 0)
- return status_old;
+ ret = spi_nor_read_sr(nor, nor->bouncebuf);
+ if (ret)
+ return ret;
+
+ status_old = nor->bouncebuf[0];
/* If nothing in our range is unlocked, we don't need to do anything */
if (stm_is_locked_sr(nor, ofs, len, status_old))
@@ -1502,7 +1919,7 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
if ((status_new & mask) < (status_old & mask))
return -EINVAL;
- return write_sr_and_check(nor, status_new, mask);
+ return spi_nor_write_sr_and_check(nor, status_new);
}
/*
@@ -1513,16 +1930,18 @@ static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
{
struct mtd_info *mtd = &nor->mtd;
- int status_old, status_new;
+ int ret, status_old, status_new;
u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
u8 shift = ffs(mask) - 1, pow, val;
loff_t lock_len;
bool can_be_top = true, can_be_bottom = nor->flags & SNOR_F_HAS_SR_TB;
bool use_top;
- status_old = read_sr(nor);
- if (status_old < 0)
- return status_old;
+ ret = spi_nor_read_sr(nor, nor->bouncebuf);
+ if (ret)
+ return ret;
+
+ status_old = nor->bouncebuf[0];
/* If nothing in our range is locked, we don't need to do anything */
if (stm_is_unlocked_sr(nor, ofs, len, status_old))
@@ -1585,7 +2004,7 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
if ((status_new & mask) > (status_old & mask))
return -EINVAL;
- return write_sr_and_check(nor, status_new, mask);
+ return spi_nor_write_sr_and_check(nor, status_new);
}
/*
@@ -1597,13 +2016,13 @@ static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
*/
static int stm_is_locked(struct spi_nor *nor, loff_t ofs, uint64_t len)
{
- int status;
+ int ret;
- status = read_sr(nor);
- if (status < 0)
- return status;
+ ret = spi_nor_read_sr(nor, nor->bouncebuf);
+ if (ret)
+ return ret;
- return stm_is_locked_sr(nor, ofs, len, status);
+ return stm_is_locked_sr(nor, ofs, len, nor->bouncebuf[0]);
}
static const struct spi_nor_locking_ops stm_locking_ops = {
@@ -1657,242 +2076,59 @@ static int spi_nor_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len)
return ret;
}
-/*
- * Write status Register and configuration register with 2 bytes
- * The first byte will be written to the status register, while the
- * second byte will be written to the configuration register.
- * Return negative if error occurred.
- */
-static int write_sr_cr(struct spi_nor *nor, u8 *sr_cr)
-{
- int ret;
-
- write_enable(nor);
-
- if (nor->spimem) {
- struct spi_mem_op op =
- SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR, 1),
- SPI_MEM_OP_NO_ADDR,
- SPI_MEM_OP_NO_DUMMY,
- SPI_MEM_OP_DATA_OUT(2, sr_cr, 1));
-
- ret = spi_mem_exec_op(nor->spimem, &op);
- } else {
- ret = nor->write_reg(nor, SPINOR_OP_WRSR, sr_cr, 2);
- }
-
- if (ret < 0) {
- dev_err(nor->dev,
- "error while writing configuration register\n");
- return -EINVAL;
- }
-
- ret = spi_nor_wait_till_ready(nor);
- if (ret) {
- dev_err(nor->dev,
- "timeout while writing configuration register\n");
- return ret;
- }
-
- return 0;
-}
-
-/**
- * macronix_quad_enable() - set QE bit in Status Register.
- * @nor: pointer to a 'struct spi_nor'
- *
- * Set the Quad Enable (QE) bit in the Status Register.
- *
- * bit 6 of the Status Register is the QE bit for Macronix like QSPI memories.
- *
- * Return: 0 on success, -errno otherwise.
- */
-static int macronix_quad_enable(struct spi_nor *nor)
-{
- int ret, val;
-
- val = read_sr(nor);
- if (val < 0)
- return val;
- if (val & SR_QUAD_EN_MX)
- return 0;
-
- write_enable(nor);
-
- write_sr(nor, val | SR_QUAD_EN_MX);
-
- ret = spi_nor_wait_till_ready(nor);
- if (ret)
- return ret;
-
- ret = read_sr(nor);
- if (!(ret > 0 && (ret & SR_QUAD_EN_MX))) {
- dev_err(nor->dev, "Macronix Quad bit not set\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
/**
- * spansion_quad_enable() - set QE bit in Configuraiton Register.
+ * spi_nor_sr1_bit6_quad_enable() - Set the Quad Enable BIT(6) in the Status
+ * Register 1.
* @nor: pointer to a 'struct spi_nor'
*
- * Set the Quad Enable (QE) bit in the Configuration Register.
- * This function is kept for legacy purpose because it has been used for a
- * long time without anybody complaining but it should be considered as
- * deprecated and maybe buggy.
- * First, this function doesn't care about the previous values of the Status
- * and Configuration Registers when it sets the QE bit (bit 1) in the
- * Configuration Register: all other bits are cleared, which may have unwanted
- * side effects like removing some block protections.
- * Secondly, it uses the Read Configuration Register (35h) instruction though
- * some very old and few memories don't support this instruction. If a pull-up
- * resistor is present on the MISO/IO1 line, we might still be able to pass the
- * "read back" test because the QSPI memory doesn't recognize the command,
- * so leaves the MISO/IO1 line state unchanged, hence read_cr() returns 0xFF.
- *
- * bit 1 of the Configuration Register is the QE bit for Spansion like QSPI
- * memories.
+ * Bit 6 of the Status Register 1 is the QE bit for Macronix like QSPI memories.
*
* Return: 0 on success, -errno otherwise.
*/
-static int spansion_quad_enable(struct spi_nor *nor)
+static int spi_nor_sr1_bit6_quad_enable(struct spi_nor *nor)
{
- u8 *sr_cr = nor->bouncebuf;
int ret;
- sr_cr[0] = 0;
- sr_cr[1] = CR_QUAD_EN_SPAN;
- ret = write_sr_cr(nor, sr_cr);
+ ret = spi_nor_read_sr(nor, nor->bouncebuf);
if (ret)
return ret;
- /* read back and check it */
- ret = read_cr(nor);
- if (!(ret > 0 && (ret & CR_QUAD_EN_SPAN))) {
- dev_err(nor->dev, "Spansion Quad bit not set\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * spansion_no_read_cr_quad_enable() - set QE bit in Configuration Register.
- * @nor: pointer to a 'struct spi_nor'
- *
- * Set the Quad Enable (QE) bit in the Configuration Register.
- * This function should be used with QSPI memories not supporting the Read
- * Configuration Register (35h) instruction.
- *
- * bit 1 of the Configuration Register is the QE bit for Spansion like QSPI
- * memories.
- *
- * Return: 0 on success, -errno otherwise.
- */
-static int spansion_no_read_cr_quad_enable(struct spi_nor *nor)
-{
- u8 *sr_cr = nor->bouncebuf;
- int ret;
+ if (nor->bouncebuf[0] & SR1_QUAD_EN_BIT6)
+ return 0;
- /* Keep the current value of the Status Register. */
- ret = read_sr(nor);
- if (ret < 0) {
- dev_err(nor->dev, "error while reading status register\n");
- return -EINVAL;
- }
- sr_cr[0] = ret;
- sr_cr[1] = CR_QUAD_EN_SPAN;
+ nor->bouncebuf[0] |= SR1_QUAD_EN_BIT6;
- return write_sr_cr(nor, sr_cr);
+ return spi_nor_write_sr1_and_check(nor, nor->bouncebuf[0]);
}
/**
- * spansion_read_cr_quad_enable() - set QE bit in Configuration Register.
- * @nor: pointer to a 'struct spi_nor'
- *
- * Set the Quad Enable (QE) bit in the Configuration Register.
- * This function should be used with QSPI memories supporting the Read
- * Configuration Register (35h) instruction.
+ * spi_nor_sr2_bit1_quad_enable() - set the Quad Enable BIT(1) in the Status
+ * Register 2.
+ * @nor: pointer to a 'struct spi_nor'.
*
- * bit 1 of the Configuration Register is the QE bit for Spansion like QSPI
- * memories.
+ * Bit 1 of the Status Register 2 is the QE bit for Spansion like QSPI memories.
*
* Return: 0 on success, -errno otherwise.
*/
-static int spansion_read_cr_quad_enable(struct spi_nor *nor)
+static int spi_nor_sr2_bit1_quad_enable(struct spi_nor *nor)
{
- struct device *dev = nor->dev;
- u8 *sr_cr = nor->bouncebuf;
int ret;
- /* Check current Quad Enable bit value. */
- ret = read_cr(nor);
- if (ret < 0) {
- dev_err(dev, "error while reading configuration register\n");
- return -EINVAL;
- }
-
- if (ret & CR_QUAD_EN_SPAN)
- return 0;
-
- sr_cr[1] = ret | CR_QUAD_EN_SPAN;
-
- /* Keep the current value of the Status Register. */
- ret = read_sr(nor);
- if (ret < 0) {
- dev_err(dev, "error while reading status register\n");
- return -EINVAL;
- }
- sr_cr[0] = ret;
+ if (nor->flags & SNOR_F_NO_READ_CR)
+ return spi_nor_write_16bit_cr_and_check(nor, SR2_QUAD_EN_BIT1);
- ret = write_sr_cr(nor, sr_cr);
+ ret = spi_nor_read_cr(nor, nor->bouncebuf);
if (ret)
return ret;
- /* Read back and check it. */
- ret = read_cr(nor);
- if (!(ret > 0 && (ret & CR_QUAD_EN_SPAN))) {
- dev_err(nor->dev, "Spansion Quad bit not set\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int spi_nor_write_sr2(struct spi_nor *nor, u8 *sr2)
-{
- if (nor->spimem) {
- struct spi_mem_op op =
- SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_WRSR2, 1),
- SPI_MEM_OP_NO_ADDR,
- SPI_MEM_OP_NO_DUMMY,
- SPI_MEM_OP_DATA_OUT(1, sr2, 1));
-
- return spi_mem_exec_op(nor->spimem, &op);
- }
-
- return nor->write_reg(nor, SPINOR_OP_WRSR2, sr2, 1);
-}
-
-static int spi_nor_read_sr2(struct spi_nor *nor, u8 *sr2)
-{
- if (nor->spimem) {
- struct spi_mem_op op =
- SPI_MEM_OP(SPI_MEM_OP_CMD(SPINOR_OP_RDSR2, 1),
- SPI_MEM_OP_NO_ADDR,
- SPI_MEM_OP_NO_DUMMY,
- SPI_MEM_OP_DATA_IN(1, sr2, 1));
-
- return spi_mem_exec_op(nor->spimem, &op);
- }
+ if (nor->bouncebuf[0] & SR2_QUAD_EN_BIT1)
+ return 0;
- return nor->read_reg(nor, SPINOR_OP_RDSR2, sr2, 1);
+ return spi_nor_write_16bit_cr_and_check(nor, nor->bouncebuf[0]);
}
/**
- * sr2_bit7_quad_enable() - set QE bit in Status Register 2.
+ * spi_nor_sr2_bit7_quad_enable() - set QE bit in Status Register 2.
* @nor: pointer to a 'struct spi_nor'
*
* Set the Quad Enable (QE) bit in the Status Register 2.
@@ -1903,10 +2139,11 @@ static int spi_nor_read_sr2(struct spi_nor *nor, u8 *sr2)
*
* Return: 0 on success, -errno otherwise.
*/
-static int sr2_bit7_quad_enable(struct spi_nor *nor)
+static int spi_nor_sr2_bit7_quad_enable(struct spi_nor *nor)
{
u8 *sr2 = nor->bouncebuf;
int ret;
+ u8 sr2_written;
/* Check current Quad Enable bit value. */
ret = spi_nor_read_sr2(nor, sr2);
@@ -1918,117 +2155,23 @@ static int sr2_bit7_quad_enable(struct spi_nor *nor)
/* Update the Quad Enable bit. */
*sr2 |= SR2_QUAD_EN_BIT7;
- write_enable(nor);
-
ret = spi_nor_write_sr2(nor, sr2);
- if (ret < 0) {
- dev_err(nor->dev, "error while writing status register 2\n");
- return -EINVAL;
- }
-
- ret = spi_nor_wait_till_ready(nor);
- if (ret < 0) {
- dev_err(nor->dev, "timeout while writing status register 2\n");
+ if (ret)
return ret;
- }
+
+ sr2_written = *sr2;
/* Read back and check it. */
ret = spi_nor_read_sr2(nor, sr2);
- if (!(ret > 0 && (*sr2 & SR2_QUAD_EN_BIT7))) {
- dev_err(nor->dev, "SR2 Quad bit not set\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
- * spi_nor_clear_sr_bp() - clear the Status Register Block Protection bits.
- * @nor: pointer to a 'struct spi_nor'
- *
- * Read-modify-write function that clears the Block Protection bits from the
- * Status Register without affecting other bits.
- *
- * Return: 0 on success, -errno otherwise.
- */
-static int spi_nor_clear_sr_bp(struct spi_nor *nor)
-{
- int ret;
- u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
-
- ret = read_sr(nor);
- if (ret < 0) {
- dev_err(nor->dev, "error while reading status register\n");
- return ret;
- }
-
- write_enable(nor);
-
- ret = write_sr(nor, ret & ~mask);
- if (ret) {
- dev_err(nor->dev, "write to status register failed\n");
- return ret;
- }
-
- ret = spi_nor_wait_till_ready(nor);
if (ret)
- dev_err(nor->dev, "timeout while writing status register\n");
- return ret;
-}
-
-/**
- * spi_nor_spansion_clear_sr_bp() - clear the Status Register Block Protection
- * bits on spansion flashes.
- * @nor: pointer to a 'struct spi_nor'
- *
- * Read-modify-write function that clears the Block Protection bits from the
- * Status Register without affecting other bits. The function is tightly
- * coupled with the spansion_quad_enable() function. Both assume that the Write
- * Register with 16 bits, together with the Read Configuration Register (35h)
- * instructions are supported.
- *
- * Return: 0 on success, -errno otherwise.
- */
-static int spi_nor_spansion_clear_sr_bp(struct spi_nor *nor)
-{
- int ret;
- u8 mask = SR_BP2 | SR_BP1 | SR_BP0;
- u8 *sr_cr = nor->bouncebuf;
-
- /* Check current Quad Enable bit value. */
- ret = read_cr(nor);
- if (ret < 0) {
- dev_err(nor->dev,
- "error while reading configuration register\n");
return ret;
- }
-
- /*
- * When the configuration register Quad Enable bit is one, only the
- * Write Status (01h) command with two data bytes may be used.
- */
- if (ret & CR_QUAD_EN_SPAN) {
- sr_cr[1] = ret;
-
- ret = read_sr(nor);
- if (ret < 0) {
- dev_err(nor->dev,
- "error while reading status register\n");
- return ret;
- }
- sr_cr[0] = ret & ~mask;
- ret = write_sr_cr(nor, sr_cr);
- if (ret)
- dev_err(nor->dev, "16-bit write register failed\n");
- return ret;
+ if (*sr2 != sr2_written) {
+ dev_dbg(nor->dev, "SR2: Read back test failed\n");
+ return -EIO;
}
- /*
- * If the Quad Enable bit is zero, use the Write Status (01h) command
- * with one data byte.
- */
- return spi_nor_clear_sr_bp(nor);
+ return 0;
}
/* Used when the "_ext_id" is two bytes at most */
@@ -2136,7 +2279,7 @@ static void gd25q256_default_init(struct spi_nor *nor)
* indicate the quad_enable method for this case, we need
* to set it in the default_init fixup hook.
*/
- nor->params.quad_enable = macronix_quad_enable;
+ nor->params.quad_enable = spi_nor_sr1_bit6_quad_enable;
}
static struct spi_nor_fixups gd25q256_fixups = {
@@ -2179,6 +2322,8 @@ static const struct flash_info spi_nor_ids[] = {
{ "en25q64", INFO(0x1c3017, 0, 64 * 1024, 128, SECT_4K) },
{ "en25q80a", INFO(0x1c3014, 0, 64 * 1024, 16,
SECT_4K | SPI_NOR_DUAL_READ) },
+ { "en25qh16", INFO(0x1c7015, 0, 64 * 1024, 32,
+ SECT_4K | SPI_NOR_DUAL_READ) },
{ "en25qh32", INFO(0x1c7016, 0, 64 * 1024, 64, 0) },
{ "en25qh64", INFO(0x1c7017, 0, 64 * 1024, 128,
SECT_4K | SPI_NOR_DUAL_READ) },
@@ -2267,6 +2412,10 @@ static const struct flash_info spi_nor_ids[] = {
SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
{ "is25wp128", INFO(0x9d7018, 0, 64 * 1024, 256,
SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+ { "is25wp256", INFO(0x9d7019, 0, 64 * 1024, 512,
+ SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ |
+ SPI_NOR_4B_OPCODES)
+ .fixups = &is25lp256_fixups },
/* Macronix */
{ "mx25l512e", INFO(0xc22010, 0, 64 * 1024, 1, SECT_4K) },
@@ -2482,6 +2631,8 @@ static const struct flash_info spi_nor_ids[] = {
{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
{ "w25q256jvm", INFO(0xef7019, 0, 64 * 1024, 512,
SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+ { "w25q256jw", INFO(0xef6019, 0, 64 * 1024, 512,
+ SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
{ "w25m512jv", INFO(0xef7119, 0, 64 * 1024, 1024,
SECT_4K | SPI_NOR_QUAD_READ | SPI_NOR_DUAL_READ) },
@@ -2520,11 +2671,11 @@ static const struct flash_info *spi_nor_read_id(struct spi_nor *nor)
tmp = spi_mem_exec_op(nor->spimem, &op);
} else {
- tmp = nor->read_reg(nor, SPINOR_OP_RDID, id,
- SPI_NOR_MAX_ID_LEN);
+ tmp = nor->controller_ops->read_reg(nor, SPINOR_OP_RDID, id,
+ SPI_NOR_MAX_ID_LEN);
}
- if (tmp < 0) {
- dev_err(nor->dev, "error %d reading JEDEC ID\n", tmp);
+ if (tmp) {
+ dev_dbg(nor->dev, "error %d reading JEDEC ID\n", tmp);
return ERR_PTR(tmp);
}
@@ -2544,7 +2695,7 @@ static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
size_t *retlen, u_char *buf)
{
struct spi_nor *nor = mtd_to_spi_nor(mtd);
- int ret;
+ ssize_t ret;
dev_dbg(nor->dev, "from 0x%08x, len %zd\n", (u32)from, len);
@@ -2583,7 +2734,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
size_t *retlen, const u_char *buf)
{
struct spi_nor *nor = mtd_to_spi_nor(mtd);
- size_t actual;
+ size_t actual = 0;
int ret;
dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len);
@@ -2592,26 +2743,28 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
if (ret)
return ret;
- write_enable(nor);
+ ret = spi_nor_write_enable(nor);
+ if (ret)
+ goto out;
nor->sst_write_second = false;
- actual = to % 2;
/* Start write from odd address. */
- if (actual) {
+ if (to % 2) {
nor->program_opcode = SPINOR_OP_BP;
/* write one byte. */
ret = spi_nor_write_data(nor, to, 1, buf);
if (ret < 0)
- goto sst_write_err;
- WARN(ret != 1, "While writing 1 byte written %i bytes\n",
- (int)ret);
+ goto out;
+ WARN(ret != 1, "While writing 1 byte written %i bytes\n", ret);
ret = spi_nor_wait_till_ready(nor);
if (ret)
- goto sst_write_err;
+ goto out;
+
+ to++;
+ actual++;
}
- to += actual;
/* Write out most of the data here. */
for (; actual < len - 1; actual += 2) {
@@ -2620,39 +2773,44 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
/* write two bytes. */
ret = spi_nor_write_data(nor, to, 2, buf + actual);
if (ret < 0)
- goto sst_write_err;
- WARN(ret != 2, "While writing 2 bytes written %i bytes\n",
- (int)ret);
+ goto out;
+ WARN(ret != 2, "While writing 2 bytes written %i bytes\n", ret);
ret = spi_nor_wait_till_ready(nor);
if (ret)
- goto sst_write_err;
+ goto out;
to += 2;
nor->sst_write_second = true;
}
nor->sst_write_second = false;
- write_disable(nor);
+ ret = spi_nor_write_disable(nor);
+ if (ret)
+ goto out;
+
ret = spi_nor_wait_till_ready(nor);
if (ret)
- goto sst_write_err;
+ goto out;
/* Write out trailing byte if it exists. */
if (actual != len) {
- write_enable(nor);
+ ret = spi_nor_write_enable(nor);
+ if (ret)
+ goto out;
nor->program_opcode = SPINOR_OP_BP;
ret = spi_nor_write_data(nor, to, 1, buf + actual);
if (ret < 0)
- goto sst_write_err;
- WARN(ret != 1, "While writing 1 byte written %i bytes\n",
- (int)ret);
+ goto out;
+ WARN(ret != 1, "While writing 1 byte written %i bytes\n", ret);
ret = spi_nor_wait_till_ready(nor);
if (ret)
- goto sst_write_err;
- write_disable(nor);
+ goto out;
+
actual += 1;
+
+ ret = spi_nor_write_disable(nor);
}
-sst_write_err:
+out:
*retlen += actual;
spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE);
return ret;
@@ -2701,7 +2859,10 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
addr = spi_nor_convert_addr(nor, addr);
- write_enable(nor);
+ ret = spi_nor_write_enable(nor);
+ if (ret)
+ goto write_err;
+
ret = spi_nor_write_data(nor, addr, page_remain, buf + i);
if (ret < 0)
goto write_err;
@@ -2722,13 +2883,21 @@ write_err:
static int spi_nor_check(struct spi_nor *nor)
{
if (!nor->dev ||
- (!nor->spimem &&
- (!nor->read || !nor->write || !nor->read_reg ||
- !nor->write_reg))) {
+ (!nor->spimem && !nor->controller_ops) ||
+ (!nor->spimem && nor->controller_ops &&
+ (!nor->controller_ops->read ||
+ !nor->controller_ops->write ||
+ !nor->controller_ops->read_reg ||
+ !nor->controller_ops->write_reg))) {
pr_err("spi-nor: please fill all the necessary fields!\n");
return -EINVAL;
}
+ if (nor->spimem && nor->controller_ops) {
+ dev_err(nor->dev, "nor->spimem and nor->controller_ops are mutually exclusive, please set just one of them.\n");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -2738,10 +2907,8 @@ static int s3an_nor_setup(struct spi_nor *nor,
int ret;
ret = spi_nor_xread_sr(nor, nor->bouncebuf);
- if (ret < 0) {
- dev_err(nor->dev, "error %d reading XRDSR\n", (int) ret);
+ if (ret)
return ret;
- }
nor->erase_opcode = SPINOR_OP_XSE;
nor->program_opcode = SPINOR_OP_XPP;
@@ -2865,7 +3032,7 @@ static int spi_nor_hwcaps_pp2cmd(u32 hwcaps)
*/
static int spi_nor_read_raw(struct spi_nor *nor, u32 addr, size_t len, u8 *buf)
{
- int ret;
+ ssize_t ret;
while (len) {
ret = spi_nor_read_data(nor, addr, len, buf);
@@ -3489,20 +3656,39 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
break;
case BFPT_DWORD15_QER_SR2_BIT1_BUGGY:
+ /*
+ * Writing only one byte to the Status Register has the
+ * side-effect of clearing Status Register 2.
+ */
case BFPT_DWORD15_QER_SR2_BIT1_NO_RD:
- params->quad_enable = spansion_no_read_cr_quad_enable;
+ /*
+ * Read Configuration Register (35h) instruction is not
+ * supported.
+ */
+ nor->flags |= SNOR_F_HAS_16BIT_SR | SNOR_F_NO_READ_CR;
+ params->quad_enable = spi_nor_sr2_bit1_quad_enable;
break;
case BFPT_DWORD15_QER_SR1_BIT6:
- params->quad_enable = macronix_quad_enable;
+ nor->flags &= ~SNOR_F_HAS_16BIT_SR;
+ params->quad_enable = spi_nor_sr1_bit6_quad_enable;
break;
case BFPT_DWORD15_QER_SR2_BIT7:
- params->quad_enable = sr2_bit7_quad_enable;
+ nor->flags &= ~SNOR_F_HAS_16BIT_SR;
+ params->quad_enable = spi_nor_sr2_bit7_quad_enable;
break;
case BFPT_DWORD15_QER_SR2_BIT1:
- params->quad_enable = spansion_read_cr_quad_enable;
+ /*
+ * JESD216 rev B or later does not specify if writing only one
+ * byte to the Status Register clears or not the Status
+ * Register 2, so let's be cautious and keep the default
+ * assumption of a 16-bit Write Status (01h) command.
+ */
+ nor->flags |= SNOR_F_HAS_16BIT_SR;
+
+ params->quad_enable = spi_nor_sr2_bit1_quad_enable;
break;
default:
@@ -4101,7 +4287,7 @@ static int spi_nor_parse_sfdp(struct spi_nor *nor,
err = spi_nor_read_sfdp(nor, sizeof(header),
psize, param_headers);
if (err < 0) {
- dev_err(dev, "failed to read SFDP parameter headers\n");
+ dev_dbg(dev, "failed to read SFDP parameter headers\n");
goto exit;
}
}
@@ -4348,7 +4534,7 @@ static int spi_nor_default_setup(struct spi_nor *nor,
/* Select the (Fast) Read command. */
err = spi_nor_select_read(nor, shared_mask);
if (err) {
- dev_err(nor->dev,
+ dev_dbg(nor->dev,
"can't select read settings supported by both the SPI controller and memory.\n");
return err;
}
@@ -4356,7 +4542,7 @@ static int spi_nor_default_setup(struct spi_nor *nor,
/* Select the Page Program command. */
err = spi_nor_select_pp(nor, shared_mask);
if (err) {
- dev_err(nor->dev,
+ dev_dbg(nor->dev,
"can't select write settings supported by both the SPI controller and memory.\n");
return err;
}
@@ -4364,7 +4550,7 @@ static int spi_nor_default_setup(struct spi_nor *nor,
/* Select the Sector Erase command. */
err = spi_nor_select_erase(nor);
if (err) {
- dev_err(nor->dev,
+ dev_dbg(nor->dev,
"can't select erase settings supported by both the SPI controller and memory.\n");
return err;
}
@@ -4381,12 +4567,32 @@ static int spi_nor_setup(struct spi_nor *nor,
return nor->params.setup(nor, hwcaps);
}
+static void atmel_set_default_init(struct spi_nor *nor)
+{
+ nor->flags |= SNOR_F_HAS_LOCK;
+}
+
+static void intel_set_default_init(struct spi_nor *nor)
+{
+ nor->flags |= SNOR_F_HAS_LOCK;
+}
+
+static void issi_set_default_init(struct spi_nor *nor)
+{
+ nor->params.quad_enable = spi_nor_sr1_bit6_quad_enable;
+}
+
static void macronix_set_default_init(struct spi_nor *nor)
{
- nor->params.quad_enable = macronix_quad_enable;
+ nor->params.quad_enable = spi_nor_sr1_bit6_quad_enable;
nor->params.set_4byte = macronix_set_4byte;
}
+static void sst_set_default_init(struct spi_nor *nor)
+{
+ nor->flags |= SNOR_F_HAS_LOCK;
+}
+
static void st_micron_set_default_init(struct spi_nor *nor)
{
nor->flags |= SNOR_F_HAS_LOCK;
@@ -4408,6 +4614,18 @@ static void spi_nor_manufacturer_init_params(struct spi_nor *nor)
{
/* Init flash parameters based on MFR */
switch (JEDEC_MFR(nor->info)) {
+ case SNOR_MFR_ATMEL:
+ atmel_set_default_init(nor);
+ break;
+
+ case SNOR_MFR_INTEL:
+ intel_set_default_init(nor);
+ break;
+
+ case SNOR_MFR_ISSI:
+ issi_set_default_init(nor);
+ break;
+
case SNOR_MFR_MACRONIX:
macronix_set_default_init(nor);
break;
@@ -4417,6 +4635,10 @@ static void spi_nor_manufacturer_init_params(struct spi_nor *nor)
st_micron_set_default_init(nor);
break;
+ case SNOR_MFR_SST:
+ sst_set_default_init(nor);
+ break;
+
case SNOR_MFR_WINBOND:
winbond_set_default_init(nor);
break;
@@ -4465,9 +4687,11 @@ static void spi_nor_info_init_params(struct spi_nor *nor)
u8 i, erase_mask;
/* Initialize legacy flash parameters and settings. */
- params->quad_enable = spansion_quad_enable;
+ params->quad_enable = spi_nor_sr2_bit1_quad_enable;
params->set_4byte = spansion_set_4byte;
params->setup = spi_nor_default_setup;
+ /* Default to 16-bit Write Status (01h) Command */
+ nor->flags |= SNOR_F_HAS_16BIT_SR;
/* Set SPI NOR sizes. */
params->size = (u64)info->sector_size * info->n_sectors;
@@ -4675,25 +4899,36 @@ static int spi_nor_quad_enable(struct spi_nor *nor)
return nor->params.quad_enable(nor);
}
+/**
+ * spi_nor_unlock_all() - Unlocks the entire flash memory array.
+ * @nor: pointer to a 'struct spi_nor'.
+ *
+ * Some SPI NOR flashes are write protected by default after a power-on reset
+ * cycle, in order to avoid inadvertent writes during power-up. Backward
+ * compatibility imposes to unlock the entire flash memory array at power-up
+ * by default.
+ */
+static int spi_nor_unlock_all(struct spi_nor *nor)
+{
+ if (nor->flags & SNOR_F_HAS_LOCK)
+ return spi_nor_unlock(&nor->mtd, 0, nor->params.size);
+
+ return 0;
+}
+
static int spi_nor_init(struct spi_nor *nor)
{
int err;
- if (nor->clear_sr_bp) {
- if (nor->params.quad_enable == spansion_quad_enable)
- nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp;
-
- err = nor->clear_sr_bp(nor);
- if (err) {
- dev_err(nor->dev,
- "fail to clear block protection bits\n");
- return err;
- }
+ err = spi_nor_quad_enable(nor);
+ if (err) {
+ dev_dbg(nor->dev, "quad mode not supported\n");
+ return err;
}
- err = spi_nor_quad_enable(nor);
+ err = spi_nor_unlock_all(nor);
if (err) {
- dev_err(nor->dev, "quad mode not supported\n");
+ dev_dbg(nor->dev, "Failed to unlock the entire flash memory array\n");
return err;
}
@@ -4761,7 +4996,7 @@ static int spi_nor_set_addr_width(struct spi_nor *nor)
}
if (nor->addr_width > SPI_NOR_MAX_ADDR_WIDTH) {
- dev_err(nor->dev, "address width is too large: %u\n",
+ dev_dbg(nor->dev, "address width is too large: %u\n",
nor->addr_width);
return -EINVAL;
}
@@ -4879,16 +5114,6 @@ int spi_nor_scan(struct spi_nor *nor, const char *name,
if (info->flags & SPI_NOR_HAS_LOCK)
nor->flags |= SNOR_F_HAS_LOCK;
- /*
- * Atmel, SST, Intel/Numonyx, and others serial NOR tend to power up
- * with the software protection bits set.
- */
- if (JEDEC_MFR(nor->info) == SNOR_MFR_ATMEL ||
- JEDEC_MFR(nor->info) == SNOR_MFR_INTEL ||
- JEDEC_MFR(nor->info) == SNOR_MFR_SST ||
- nor->info->flags & SPI_NOR_HAS_LOCK)
- nor->clear_sr_bp = spi_nor_clear_sr_bp;
-
/* Init flash parameters based on flash_info struct and SFDP */
spi_nor_init_params(nor);
diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c
index a1dff92ceedf..0f847d510950 100644
--- a/drivers/mtd/ubi/debug.c
+++ b/drivers/mtd/ubi/debug.c
@@ -509,11 +509,9 @@ static const struct file_operations eraseblk_count_fops = {
*/
int ubi_debugfs_init_dev(struct ubi_device *ubi)
{
- int err, n;
unsigned long ubi_num = ubi->ubi_num;
- const char *fname;
- struct dentry *dent;
struct ubi_debug_info *d = &ubi->dbg;
+ int n;
if (!IS_ENABLED(CONFIG_DEBUG_FS))
return 0;
@@ -522,95 +520,52 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi)
ubi->ubi_num);
if (n == UBI_DFS_DIR_LEN) {
/* The array size is too small */
- fname = UBI_DFS_DIR_NAME;
- dent = ERR_PTR(-EINVAL);
- goto out;
+ return -EINVAL;
}
- fname = d->dfs_dir_name;
- dent = debugfs_create_dir(fname, dfs_rootdir);
- if (IS_ERR_OR_NULL(dent))
- goto out;
- d->dfs_dir = dent;
-
- fname = "chk_gen";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_chk_gen = dent;
-
- fname = "chk_io";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_chk_io = dent;
-
- fname = "chk_fastmap";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_chk_fastmap = dent;
-
- fname = "tst_disable_bgt";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_disable_bgt = dent;
-
- fname = "tst_emulate_bitflips";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_emulate_bitflips = dent;
-
- fname = "tst_emulate_io_failures";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_emulate_io_failures = dent;
-
- fname = "tst_emulate_power_cut";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_emulate_power_cut = dent;
-
- fname = "tst_emulate_power_cut_min";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_power_cut_min = dent;
-
- fname = "tst_emulate_power_cut_max";
- dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num,
- &dfs_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
- d->dfs_power_cut_max = dent;
-
- fname = "detailed_erase_block_info";
- dent = debugfs_create_file(fname, S_IRUSR, d->dfs_dir, (void *)ubi_num,
- &eraseblk_count_fops);
- if (IS_ERR_OR_NULL(dent))
- goto out_remove;
+ d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir);
- return 0;
+ d->dfs_chk_gen = debugfs_create_file("chk_gen", S_IWUSR, d->dfs_dir,
+ (void *)ubi_num, &dfs_fops);
-out_remove:
- debugfs_remove_recursive(d->dfs_dir);
-out:
- err = dent ? PTR_ERR(dent) : -ENODEV;
- ubi_err(ubi, "cannot create \"%s\" debugfs file or directory, error %d\n",
- fname, err);
- return err;
+ d->dfs_chk_io = debugfs_create_file("chk_io", S_IWUSR, d->dfs_dir,
+ (void *)ubi_num, &dfs_fops);
+
+ d->dfs_chk_fastmap = debugfs_create_file("chk_fastmap", S_IWUSR,
+ d->dfs_dir, (void *)ubi_num,
+ &dfs_fops);
+
+ d->dfs_disable_bgt = debugfs_create_file("tst_disable_bgt", S_IWUSR,
+ d->dfs_dir, (void *)ubi_num,
+ &dfs_fops);
+
+ d->dfs_emulate_bitflips = debugfs_create_file("tst_emulate_bitflips",
+ S_IWUSR, d->dfs_dir,
+ (void *)ubi_num,
+ &dfs_fops);
+
+ d->dfs_emulate_io_failures = debugfs_create_file("tst_emulate_io_failures",
+ S_IWUSR, d->dfs_dir,
+ (void *)ubi_num,
+ &dfs_fops);
+
+ d->dfs_emulate_power_cut = debugfs_create_file("tst_emulate_power_cut",
+ S_IWUSR, d->dfs_dir,
+ (void *)ubi_num,
+ &dfs_fops);
+
+ d->dfs_power_cut_min = debugfs_create_file("tst_emulate_power_cut_min",
+ S_IWUSR, d->dfs_dir,
+ (void *)ubi_num, &dfs_fops);
+
+ d->dfs_power_cut_max = debugfs_create_file("tst_emulate_power_cut_max",
+ S_IWUSR, d->dfs_dir,
+ (void *)ubi_num, &dfs_fops);
+
+ debugfs_create_file("detailed_erase_block_info", S_IRUSR, d->dfs_dir,
+ (void *)ubi_num, &eraseblk_count_fops);
+
+ return 0;
}
/**
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 8c79bad2a9a5..4f2e6910c623 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -952,7 +952,7 @@ static int alb_upper_dev_walk(struct net_device *upper, void *_data)
struct bond_vlan_tag *tags;
if (is_vlan_dev(upper) &&
- bond->nest_level == vlan_get_encap_level(upper) - 1) {
+ bond->dev->lower_level == upper->lower_level - 1) {
if (upper->addr_assign_type == NET_ADDR_STOLEN) {
alb_send_lp_vid(slave, mac_addr,
vlan_dev_vlan_proto(upper),
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 931d9d935686..62f65573eb04 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1733,8 +1733,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
goto err_upper_unlink;
}
- bond->nest_level = dev_get_nest_level(bond_dev) + 1;
-
/* If the mode uses primary, then the following is handled by
* bond_change_active_slave().
*/
@@ -1816,7 +1814,8 @@ err_detach:
slave_disable_netpoll(new_slave);
err_close:
- slave_dev->priv_flags &= ~IFF_BONDING;
+ if (!netif_is_bond_master(slave_dev))
+ slave_dev->priv_flags &= ~IFF_BONDING;
dev_close(slave_dev);
err_restore_mac:
@@ -1956,9 +1955,6 @@ static int __bond_release_one(struct net_device *bond_dev,
if (!bond_has_slaves(bond)) {
bond_set_carrier(bond);
eth_hw_addr_random(bond_dev);
- bond->nest_level = SINGLE_DEPTH_NESTING;
- } else {
- bond->nest_level = dev_get_nest_level(bond_dev) + 1;
}
unblock_netpoll_tx();
@@ -2017,7 +2013,8 @@ static int __bond_release_one(struct net_device *bond_dev,
else
dev_set_mtu(slave_dev, slave->original_mtu);
- slave_dev->priv_flags &= ~IFF_BONDING;
+ if (!netif_is_bond_master(slave_dev))
+ slave_dev->priv_flags &= ~IFF_BONDING;
bond_free_slave(slave);
@@ -2086,8 +2083,7 @@ static int bond_miimon_inspect(struct bonding *bond)
ignore_updelay = !rcu_dereference(bond->curr_active_slave);
bond_for_each_slave_rcu(bond, slave, iter) {
- slave->new_link = BOND_LINK_NOCHANGE;
- slave->link_new_state = slave->link;
+ bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
link_state = bond_check_dev_link(bond, slave->dev, 0);
@@ -2121,7 +2117,7 @@ static int bond_miimon_inspect(struct bonding *bond)
}
if (slave->delay <= 0) {
- slave->new_link = BOND_LINK_DOWN;
+ bond_propose_link_state(slave, BOND_LINK_DOWN);
commit++;
continue;
}
@@ -2158,7 +2154,7 @@ static int bond_miimon_inspect(struct bonding *bond)
slave->delay = 0;
if (slave->delay <= 0) {
- slave->new_link = BOND_LINK_UP;
+ bond_propose_link_state(slave, BOND_LINK_UP);
commit++;
ignore_updelay = false;
continue;
@@ -2196,7 +2192,7 @@ static void bond_miimon_commit(struct bonding *bond)
struct slave *slave, *primary;
bond_for_each_slave(bond, slave, iter) {
- switch (slave->new_link) {
+ switch (slave->link_new_state) {
case BOND_LINK_NOCHANGE:
/* For 802.3ad mode, check current slave speed and
* duplex again in case its port was disabled after
@@ -2268,8 +2264,8 @@ static void bond_miimon_commit(struct bonding *bond)
default:
slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n",
- slave->new_link);
- slave->new_link = BOND_LINK_NOCHANGE;
+ slave->link_new_state);
+ bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
continue;
}
@@ -2677,13 +2673,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
bond_for_each_slave_rcu(bond, slave, iter) {
unsigned long trans_start = dev_trans_start(slave->dev);
- slave->new_link = BOND_LINK_NOCHANGE;
+ bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
if (slave->link != BOND_LINK_UP) {
if (bond_time_in_interval(bond, trans_start, 1) &&
bond_time_in_interval(bond, slave->last_rx, 1)) {
- slave->new_link = BOND_LINK_UP;
+ bond_propose_link_state(slave, BOND_LINK_UP);
slave_state_changed = 1;
/* primary_slave has no meaning in round-robin
@@ -2708,7 +2704,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
if (!bond_time_in_interval(bond, trans_start, 2) ||
!bond_time_in_interval(bond, slave->last_rx, 2)) {
- slave->new_link = BOND_LINK_DOWN;
+ bond_propose_link_state(slave, BOND_LINK_DOWN);
slave_state_changed = 1;
if (slave->link_failure_count < UINT_MAX)
@@ -2739,8 +2735,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
goto re_arm;
bond_for_each_slave(bond, slave, iter) {
- if (slave->new_link != BOND_LINK_NOCHANGE)
- slave->link = slave->new_link;
+ if (slave->link_new_state != BOND_LINK_NOCHANGE)
+ slave->link = slave->link_new_state;
}
if (slave_state_changed) {
@@ -2763,9 +2759,9 @@ re_arm:
}
/* Called to inspect slaves for active-backup mode ARP monitor link state
- * changes. Sets new_link in slaves to specify what action should take
- * place for the slave. Returns 0 if no changes are found, >0 if changes
- * to link states must be committed.
+ * changes. Sets proposed link state in slaves to specify what action
+ * should take place for the slave. Returns 0 if no changes are found, >0
+ * if changes to link states must be committed.
*
* Called with rcu_read_lock held.
*/
@@ -2777,12 +2773,12 @@ static int bond_ab_arp_inspect(struct bonding *bond)
int commit = 0;
bond_for_each_slave_rcu(bond, slave, iter) {
- slave->new_link = BOND_LINK_NOCHANGE;
+ bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
last_rx = slave_last_rx(bond, slave);
if (slave->link != BOND_LINK_UP) {
if (bond_time_in_interval(bond, last_rx, 1)) {
- slave->new_link = BOND_LINK_UP;
+ bond_propose_link_state(slave, BOND_LINK_UP);
commit++;
}
continue;
@@ -2810,7 +2806,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
if (!bond_is_active_slave(slave) &&
!rcu_access_pointer(bond->current_arp_slave) &&
!bond_time_in_interval(bond, last_rx, 3)) {
- slave->new_link = BOND_LINK_DOWN;
+ bond_propose_link_state(slave, BOND_LINK_DOWN);
commit++;
}
@@ -2823,7 +2819,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
if (bond_is_active_slave(slave) &&
(!bond_time_in_interval(bond, trans_start, 2) ||
!bond_time_in_interval(bond, last_rx, 2))) {
- slave->new_link = BOND_LINK_DOWN;
+ bond_propose_link_state(slave, BOND_LINK_DOWN);
commit++;
}
}
@@ -2843,7 +2839,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
struct slave *slave;
bond_for_each_slave(bond, slave, iter) {
- switch (slave->new_link) {
+ switch (slave->link_new_state) {
case BOND_LINK_NOCHANGE:
continue;
@@ -2893,8 +2889,9 @@ static void bond_ab_arp_commit(struct bonding *bond)
continue;
default:
- slave_err(bond->dev, slave->dev, "impossible: new_link %d on slave\n",
- slave->new_link);
+ slave_err(bond->dev, slave->dev,
+ "impossible: link_new_state %d on slave\n",
+ slave->link_new_state);
continue;
}
@@ -3442,13 +3439,6 @@ static void bond_fold_stats(struct rtnl_link_stats64 *_res,
}
}
-static int bond_get_nest_level(struct net_device *bond_dev)
-{
- struct bonding *bond = netdev_priv(bond_dev);
-
- return bond->nest_level;
-}
-
static void bond_get_stats(struct net_device *bond_dev,
struct rtnl_link_stats64 *stats)
{
@@ -3457,7 +3447,7 @@ static void bond_get_stats(struct net_device *bond_dev,
struct list_head *iter;
struct slave *slave;
- spin_lock_nested(&bond->stats_lock, bond_get_nest_level(bond_dev));
+ spin_lock(&bond->stats_lock);
memcpy(stats, &bond->bond_stats, sizeof(*stats));
rcu_read_lock();
@@ -4039,7 +4029,7 @@ out:
* this to-be-skipped slave to send a packet out.
*/
old_arr = rtnl_dereference(bond->slave_arr);
- for (idx = 0; idx < old_arr->count; idx++) {
+ for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) {
if (skipslave == old_arr->arr[idx]) {
old_arr->arr[idx] =
old_arr->arr[old_arr->count-1];
@@ -4268,7 +4258,6 @@ static const struct net_device_ops bond_netdev_ops = {
.ndo_neigh_setup = bond_neigh_setup,
.ndo_vlan_rx_add_vid = bond_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = bond_vlan_rx_kill_vid,
- .ndo_get_lock_subclass = bond_get_nest_level,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_netpoll_setup = bond_netpoll_setup,
.ndo_netpoll_cleanup = bond_netpoll_cleanup,
@@ -4296,7 +4285,6 @@ void bond_setup(struct net_device *bond_dev)
struct bonding *bond = netdev_priv(bond_dev);
spin_lock_init(&bond->mode_lock);
- spin_lock_init(&bond->stats_lock);
bond->params = bonding_defaults;
/* Initialize pointers */
@@ -4365,6 +4353,7 @@ static void bond_uninit(struct net_device *bond_dev)
list_del(&bond->bond_list);
+ lockdep_unregister_key(&bond->stats_lock_key);
bond_debug_unregister(bond);
}
@@ -4768,8 +4757,9 @@ static int bond_init(struct net_device *bond_dev)
if (!bond->wq)
return -ENOMEM;
- bond->nest_level = SINGLE_DEPTH_NESTING;
- netdev_lockdep_set_classes(bond_dev);
+ spin_lock_init(&bond->stats_lock);
+ lockdep_register_key(&bond->stats_lock_key);
+ lockdep_set_class(&bond->stats_lock, &bond->stats_lock_key);
list_add_tail(&bond->bond_list, &bn->dev_list);
diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c
index 606b7d8ffe13..8e9f5620c9a2 100644
--- a/drivers/net/can/c_can/c_can.c
+++ b/drivers/net/can/c_can/c_can.c
@@ -52,6 +52,7 @@
#define CONTROL_EX_PDR BIT(8)
/* control register */
+#define CONTROL_SWR BIT(15)
#define CONTROL_TEST BIT(7)
#define CONTROL_CCE BIT(6)
#define CONTROL_DISABLE_AR BIT(5)
@@ -97,6 +98,9 @@
#define BTR_TSEG2_SHIFT 12
#define BTR_TSEG2_MASK (0x7 << BTR_TSEG2_SHIFT)
+/* interrupt register */
+#define INT_STS_PENDING 0x8000
+
/* brp extension register */
#define BRP_EXT_BRPE_MASK 0x0f
#define BRP_EXT_BRPE_SHIFT 0
@@ -569,6 +573,26 @@ static void c_can_configure_msg_objects(struct net_device *dev)
IF_MCONT_RCV_EOB);
}
+static int c_can_software_reset(struct net_device *dev)
+{
+ struct c_can_priv *priv = netdev_priv(dev);
+ int retry = 0;
+
+ if (priv->type != BOSCH_D_CAN)
+ return 0;
+
+ priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_SWR | CONTROL_INIT);
+ while (priv->read_reg(priv, C_CAN_CTRL_REG) & CONTROL_SWR) {
+ msleep(20);
+ if (retry++ > 100) {
+ netdev_err(dev, "CCTRL: software reset failed\n");
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
/*
* Configure C_CAN chip:
* - enable/disable auto-retransmission
@@ -578,6 +602,11 @@ static void c_can_configure_msg_objects(struct net_device *dev)
static int c_can_chip_config(struct net_device *dev)
{
struct c_can_priv *priv = netdev_priv(dev);
+ int err;
+
+ err = c_can_software_reset(dev);
+ if (err)
+ return err;
/* enable automatic retransmission */
priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_ENABLE_AR);
@@ -886,6 +915,9 @@ static int c_can_handle_state_change(struct net_device *dev,
struct can_berr_counter bec;
switch (error_type) {
+ case C_CAN_NO_ERROR:
+ priv->can.state = CAN_STATE_ERROR_ACTIVE;
+ break;
case C_CAN_ERROR_WARNING:
/* error warning state */
priv->can.can_stats.error_warning++;
@@ -916,6 +948,13 @@ static int c_can_handle_state_change(struct net_device *dev,
ERR_CNT_RP_SHIFT;
switch (error_type) {
+ case C_CAN_NO_ERROR:
+ /* error warning state */
+ cf->can_id |= CAN_ERR_CRTL;
+ cf->data[1] = CAN_ERR_CRTL_ACTIVE;
+ cf->data[6] = bec.txerr;
+ cf->data[7] = bec.rxerr;
+ break;
case C_CAN_ERROR_WARNING:
/* error warning state */
cf->can_id |= CAN_ERR_CRTL;
@@ -1029,10 +1068,16 @@ static int c_can_poll(struct napi_struct *napi, int quota)
u16 curr, last = priv->last_status;
int work_done = 0;
- priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG);
- /* Ack status on C_CAN. D_CAN is self clearing */
- if (priv->type != BOSCH_D_CAN)
- priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED);
+ /* Only read the status register if a status interrupt was pending */
+ if (atomic_xchg(&priv->sie_pending, 0)) {
+ priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG);
+ /* Ack status on C_CAN. D_CAN is self clearing */
+ if (priv->type != BOSCH_D_CAN)
+ priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED);
+ } else {
+ /* no change detected ... */
+ curr = last;
+ }
/* handle state changes */
if ((curr & STATUS_EWARN) && (!(last & STATUS_EWARN))) {
@@ -1054,11 +1099,17 @@ static int c_can_poll(struct napi_struct *napi, int quota)
/* handle bus recovery events */
if ((!(curr & STATUS_BOFF)) && (last & STATUS_BOFF)) {
netdev_dbg(dev, "left bus off state\n");
- priv->can.state = CAN_STATE_ERROR_ACTIVE;
+ work_done += c_can_handle_state_change(dev, C_CAN_ERROR_PASSIVE);
}
+
if ((!(curr & STATUS_EPASS)) && (last & STATUS_EPASS)) {
netdev_dbg(dev, "left error passive state\n");
- priv->can.state = CAN_STATE_ERROR_ACTIVE;
+ work_done += c_can_handle_state_change(dev, C_CAN_ERROR_WARNING);
+ }
+
+ if ((!(curr & STATUS_EWARN)) && (last & STATUS_EWARN)) {
+ netdev_dbg(dev, "left error warning state\n");
+ work_done += c_can_handle_state_change(dev, C_CAN_NO_ERROR);
}
/* handle lec errors on the bus */
@@ -1083,10 +1134,16 @@ static irqreturn_t c_can_isr(int irq, void *dev_id)
{
struct net_device *dev = (struct net_device *)dev_id;
struct c_can_priv *priv = netdev_priv(dev);
+ int reg_int;
- if (!priv->read_reg(priv, C_CAN_INT_REG))
+ reg_int = priv->read_reg(priv, C_CAN_INT_REG);
+ if (!reg_int)
return IRQ_NONE;
+ /* save for later use */
+ if (reg_int & INT_STS_PENDING)
+ atomic_set(&priv->sie_pending, 1);
+
/* disable all interrupts and schedule the NAPI */
c_can_irq_control(priv, false);
napi_schedule(&priv->napi);
diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h
index 8acdc7fa4792..d5567a7c1c6d 100644
--- a/drivers/net/can/c_can/c_can.h
+++ b/drivers/net/can/c_can/c_can.h
@@ -198,6 +198,7 @@ struct c_can_priv {
struct net_device *dev;
struct device *device;
atomic_t tx_active;
+ atomic_t sie_pending;
unsigned long tx_dir;
int last_status;
u16 (*read_reg) (const struct c_can_priv *priv, enum reg index);
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index ac86be52b461..1c88c361938c 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -848,6 +848,7 @@ void of_can_transceiver(struct net_device *dev)
return;
ret = of_property_read_u32(dn, "max-bitrate", &priv->bitrate_max);
+ of_node_put(dn);
if ((ret && ret != -EINVAL) || (!ret && !priv->bitrate_max))
netdev_warn(dev, "Invalid value for transceiver max bitrate. Ignoring bitrate limit.\n");
}
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index dc5695dffc2e..57f9a2f51085 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -677,6 +677,7 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr)
struct can_frame *cf;
bool rx_errors = false, tx_errors = false;
u32 timestamp;
+ int err;
timestamp = priv->read(&regs->timer) << 16;
@@ -725,7 +726,9 @@ static void flexcan_irq_bus_err(struct net_device *dev, u32 reg_esr)
if (tx_errors)
dev->stats.tx_errors++;
- can_rx_offload_queue_sorted(&priv->offload, skb, timestamp);
+ err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp);
+ if (err)
+ dev->stats.rx_fifo_errors++;
}
static void flexcan_irq_state(struct net_device *dev, u32 reg_esr)
@@ -738,6 +741,7 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr)
int flt;
struct can_berr_counter bec;
u32 timestamp;
+ int err;
timestamp = priv->read(&regs->timer) << 16;
@@ -769,7 +773,9 @@ static void flexcan_irq_state(struct net_device *dev, u32 reg_esr)
if (unlikely(new_state == CAN_STATE_BUS_OFF))
can_bus_off(dev);
- can_rx_offload_queue_sorted(&priv->offload, skb, timestamp);
+ err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp);
+ if (err)
+ dev->stats.rx_fifo_errors++;
}
static inline struct flexcan_priv *rx_offload_to_priv(struct can_rx_offload *offload)
@@ -1188,6 +1194,7 @@ static int flexcan_chip_start(struct net_device *dev)
reg_mecr = priv->read(&regs->mecr);
reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS;
priv->write(reg_mecr, &regs->mecr);
+ reg_mecr |= FLEXCAN_MECR_ECCDIS;
reg_mecr &= ~(FLEXCAN_MECR_NCEFAFRZ | FLEXCAN_MECR_HANCEI_MSK |
FLEXCAN_MECR_FANCEI_MSK);
priv->write(reg_mecr, &regs->mecr);
diff --git a/drivers/net/can/m_can/m_can_platform.c b/drivers/net/can/m_can/m_can_platform.c
index 6ac4c35f247a..38ea5e600fb8 100644
--- a/drivers/net/can/m_can/m_can_platform.c
+++ b/drivers/net/can/m_can/m_can_platform.c
@@ -107,7 +107,7 @@ static int m_can_plat_probe(struct platform_device *pdev)
mcan_class->is_peripheral = false;
- platform_set_drvdata(pdev, mcan_class->dev);
+ platform_set_drvdata(pdev, mcan_class->net);
m_can_init_ram(mcan_class);
@@ -166,8 +166,6 @@ static int __maybe_unused m_can_runtime_resume(struct device *dev)
if (err)
clk_disable_unprepare(mcan_class->hclk);
- m_can_class_resume(dev);
-
return err;
}
diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c
index e6a668ee7730..84cae167e42f 100644
--- a/drivers/net/can/rx-offload.c
+++ b/drivers/net/can/rx-offload.c
@@ -107,37 +107,95 @@ static int can_rx_offload_compare(struct sk_buff *a, struct sk_buff *b)
return cb_b->timestamp - cb_a->timestamp;
}
-static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n)
+/**
+ * can_rx_offload_offload_one() - Read one CAN frame from HW
+ * @offload: pointer to rx_offload context
+ * @n: number of mailbox to read
+ *
+ * The task of this function is to read a CAN frame from mailbox @n
+ * from the device and return the mailbox's content as a struct
+ * sk_buff.
+ *
+ * If the struct can_rx_offload::skb_queue exceeds the maximal queue
+ * length (struct can_rx_offload::skb_queue_len_max) or no skb can be
+ * allocated, the mailbox contents is discarded by reading it into an
+ * overflow buffer. This way the mailbox is marked as free by the
+ * driver.
+ *
+ * Return: A pointer to skb containing the CAN frame on success.
+ *
+ * NULL if the mailbox @n is empty.
+ *
+ * ERR_PTR() in case of an error
+ */
+static struct sk_buff *
+can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n)
{
- struct sk_buff *skb = NULL;
+ struct sk_buff *skb = NULL, *skb_error = NULL;
struct can_rx_offload_cb *cb;
struct can_frame *cf;
int ret;
- /* If queue is full or skb not available, read to discard mailbox */
- if (likely(skb_queue_len(&offload->skb_queue) <=
- offload->skb_queue_len_max))
+ if (likely(skb_queue_len(&offload->skb_queue) <
+ offload->skb_queue_len_max)) {
skb = alloc_can_skb(offload->dev, &cf);
+ if (unlikely(!skb))
+ skb_error = ERR_PTR(-ENOMEM); /* skb alloc failed */
+ } else {
+ skb_error = ERR_PTR(-ENOBUFS); /* skb_queue is full */
+ }
- if (!skb) {
+ /* If queue is full or skb not available, drop by reading into
+ * overflow buffer.
+ */
+ if (unlikely(skb_error)) {
struct can_frame cf_overflow;
u32 timestamp;
ret = offload->mailbox_read(offload, &cf_overflow,
&timestamp, n);
- if (ret)
- offload->dev->stats.rx_dropped++;
- return NULL;
+ /* Mailbox was empty. */
+ if (unlikely(!ret))
+ return NULL;
+
+ /* Mailbox has been read and we're dropping it or
+ * there was a problem reading the mailbox.
+ *
+ * Increment error counters in any case.
+ */
+ offload->dev->stats.rx_dropped++;
+ offload->dev->stats.rx_fifo_errors++;
+
+ /* There was a problem reading the mailbox, propagate
+ * error value.
+ */
+ if (unlikely(ret < 0))
+ return ERR_PTR(ret);
+
+ return skb_error;
}
cb = can_rx_offload_get_cb(skb);
ret = offload->mailbox_read(offload, cf, &cb->timestamp, n);
- if (!ret) {
+
+ /* Mailbox was empty. */
+ if (unlikely(!ret)) {
kfree_skb(skb);
return NULL;
}
+ /* There was a problem reading the mailbox, propagate error value. */
+ if (unlikely(ret < 0)) {
+ kfree_skb(skb);
+
+ offload->dev->stats.rx_dropped++;
+ offload->dev->stats.rx_fifo_errors++;
+
+ return ERR_PTR(ret);
+ }
+
+ /* Mailbox was read. */
return skb;
}
@@ -157,8 +215,8 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pen
continue;
skb = can_rx_offload_offload_one(offload, i);
- if (!skb)
- break;
+ if (IS_ERR_OR_NULL(skb))
+ continue;
__skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare);
}
@@ -188,7 +246,13 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload)
struct sk_buff *skb;
int received = 0;
- while ((skb = can_rx_offload_offload_one(offload, 0))) {
+ while (1) {
+ skb = can_rx_offload_offload_one(offload, 0);
+ if (IS_ERR(skb))
+ continue;
+ if (!skb)
+ break;
+
skb_queue_tail(&offload->skb_queue, skb);
received++;
}
@@ -207,8 +271,10 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload,
unsigned long flags;
if (skb_queue_len(&offload->skb_queue) >
- offload->skb_queue_len_max)
- return -ENOMEM;
+ offload->skb_queue_len_max) {
+ kfree_skb(skb);
+ return -ENOBUFS;
+ }
cb = can_rx_offload_get_cb(skb);
cb->timestamp = timestamp;
@@ -250,8 +316,10 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload,
struct sk_buff *skb)
{
if (skb_queue_len(&offload->skb_queue) >
- offload->skb_queue_len_max)
- return -ENOMEM;
+ offload->skb_queue_len_max) {
+ kfree_skb(skb);
+ return -ENOBUFS;
+ }
skb_queue_tail(&offload->skb_queue, skb);
can_rx_offload_schedule(offload);
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index bb6032211043..0a9f42e5fedf 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -617,6 +617,7 @@ err_free_chan:
sl->tty = NULL;
tty->disc_data = NULL;
clear_bit(SLF_INUSE, &sl->flags);
+ free_netdev(sl->dev);
err_exit:
rtnl_unlock();
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index bee9f7b8dad6..bb20a9b75cc6 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -717,6 +717,7 @@ static void mcp251x_restart_work_handler(struct work_struct *ws)
if (priv->after_suspend) {
mcp251x_hw_reset(spi);
mcp251x_setup(net, spi);
+ priv->force_quit = 0;
if (priv->after_suspend & AFTER_SUSPEND_RESTART) {
mcp251x_set_normal_mode(spi);
} else if (priv->after_suspend & AFTER_SUSPEND_UP) {
@@ -728,7 +729,6 @@ static void mcp251x_restart_work_handler(struct work_struct *ws)
mcp251x_hw_sleep(spi);
}
priv->after_suspend = 0;
- priv->force_quit = 0;
}
if (priv->restart_tx) {
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index f8b19eef5d26..31ad364a89bb 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -73,6 +73,7 @@ MODULE_VERSION(HECC_MODULE_VERSION);
*/
#define HECC_MAX_RX_MBOX (HECC_MAX_MAILBOXES - HECC_MAX_TX_MBOX)
#define HECC_RX_FIRST_MBOX (HECC_MAX_MAILBOXES - 1)
+#define HECC_RX_LAST_MBOX (HECC_MAX_TX_MBOX)
/* TI HECC module registers */
#define HECC_CANME 0x0 /* Mailbox enable */
@@ -82,7 +83,7 @@ MODULE_VERSION(HECC_MODULE_VERSION);
#define HECC_CANTA 0x10 /* Transmission acknowledge */
#define HECC_CANAA 0x14 /* Abort acknowledge */
#define HECC_CANRMP 0x18 /* Receive message pending */
-#define HECC_CANRML 0x1C /* Remote message lost */
+#define HECC_CANRML 0x1C /* Receive message lost */
#define HECC_CANRFP 0x20 /* Remote frame pending */
#define HECC_CANGAM 0x24 /* SECC only:Global acceptance mask */
#define HECC_CANMC 0x28 /* Master control */
@@ -149,6 +150,8 @@ MODULE_VERSION(HECC_MODULE_VERSION);
#define HECC_BUS_ERROR (HECC_CANES_FE | HECC_CANES_BE |\
HECC_CANES_CRCE | HECC_CANES_SE |\
HECC_CANES_ACKE)
+#define HECC_CANES_FLAGS (HECC_BUS_ERROR | HECC_CANES_BO |\
+ HECC_CANES_EP | HECC_CANES_EW)
#define HECC_CANMCF_RTR BIT(4) /* Remote transmit request */
@@ -382,8 +385,18 @@ static void ti_hecc_start(struct net_device *ndev)
hecc_set_bit(priv, HECC_CANMIM, mbx_mask);
}
- /* Prevent message over-write & Enable interrupts */
- hecc_write(priv, HECC_CANOPC, HECC_SET_REG);
+ /* Enable tx interrupts */
+ hecc_set_bit(priv, HECC_CANMIM, BIT(HECC_MAX_TX_MBOX) - 1);
+
+ /* Prevent message over-write to create a rx fifo, but not for
+ * the lowest priority mailbox, since that allows detecting
+ * overflows instead of the hardware silently dropping the
+ * messages.
+ */
+ mbx_mask = ~BIT(HECC_RX_LAST_MBOX);
+ hecc_write(priv, HECC_CANOPC, mbx_mask);
+
+ /* Enable interrupts */
if (priv->use_hecc1int) {
hecc_write(priv, HECC_CANMIL, HECC_SET_REG);
hecc_write(priv, HECC_CANGIM, HECC_CANGIM_DEF_MASK |
@@ -400,6 +413,9 @@ static void ti_hecc_stop(struct net_device *ndev)
{
struct ti_hecc_priv *priv = netdev_priv(ndev);
+ /* Disable the CPK; stop sending, erroring and acking */
+ hecc_set_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
+
/* Disable interrupts and disable mailboxes */
hecc_write(priv, HECC_CANGIM, 0);
hecc_write(priv, HECC_CANMIM, 0);
@@ -508,8 +524,6 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev)
hecc_set_bit(priv, HECC_CANME, mbx_mask);
spin_unlock_irqrestore(&priv->mbx_lock, flags);
- hecc_clear_bit(priv, HECC_CANMD, mbx_mask);
- hecc_set_bit(priv, HECC_CANMIM, mbx_mask);
hecc_write(priv, HECC_CANTRS, mbx_mask);
return NETDEV_TX_OK;
@@ -526,8 +540,10 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload,
u32 *timestamp, unsigned int mbxno)
{
struct ti_hecc_priv *priv = rx_offload_to_priv(offload);
- u32 data;
+ u32 data, mbx_mask;
+ int ret = 1;
+ mbx_mask = BIT(mbxno);
data = hecc_read_mbx(priv, mbxno, HECC_CANMID);
if (data & HECC_CANMID_IDE)
cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG;
@@ -548,7 +564,25 @@ static unsigned int ti_hecc_mailbox_read(struct can_rx_offload *offload,
*timestamp = hecc_read_stamp(priv, mbxno);
- return 1;
+ /* Check for FIFO overrun.
+ *
+ * All but the last RX mailbox have activated overwrite
+ * protection. So skip check for overrun, if we're not
+ * handling the last RX mailbox.
+ *
+ * As the overwrite protection for the last RX mailbox is
+ * disabled, the CAN core might update while we're reading
+ * it. This means the skb might be inconsistent.
+ *
+ * Return an error to let rx-offload discard this CAN frame.
+ */
+ if (unlikely(mbxno == HECC_RX_LAST_MBOX &&
+ hecc_read(priv, HECC_CANRML) & mbx_mask))
+ ret = -ENOBUFS;
+
+ hecc_write(priv, HECC_CANRMP, mbx_mask);
+
+ return ret;
}
static int ti_hecc_error(struct net_device *ndev, int int_status,
@@ -558,92 +592,73 @@ static int ti_hecc_error(struct net_device *ndev, int int_status,
struct can_frame *cf;
struct sk_buff *skb;
u32 timestamp;
+ int err;
- /* propagate the error condition to the can stack */
- skb = alloc_can_err_skb(ndev, &cf);
- if (!skb) {
- if (printk_ratelimit())
- netdev_err(priv->ndev,
- "%s: alloc_can_err_skb() failed\n",
- __func__);
- return -ENOMEM;
- }
-
- if (int_status & HECC_CANGIF_WLIF) { /* warning level int */
- if ((int_status & HECC_CANGIF_BOIF) == 0) {
- priv->can.state = CAN_STATE_ERROR_WARNING;
- ++priv->can.can_stats.error_warning;
- cf->can_id |= CAN_ERR_CRTL;
- if (hecc_read(priv, HECC_CANTEC) > 96)
- cf->data[1] |= CAN_ERR_CRTL_TX_WARNING;
- if (hecc_read(priv, HECC_CANREC) > 96)
- cf->data[1] |= CAN_ERR_CRTL_RX_WARNING;
- }
- hecc_set_bit(priv, HECC_CANES, HECC_CANES_EW);
- netdev_dbg(priv->ndev, "Error Warning interrupt\n");
- hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
- }
-
- if (int_status & HECC_CANGIF_EPIF) { /* error passive int */
- if ((int_status & HECC_CANGIF_BOIF) == 0) {
- priv->can.state = CAN_STATE_ERROR_PASSIVE;
- ++priv->can.can_stats.error_passive;
- cf->can_id |= CAN_ERR_CRTL;
- if (hecc_read(priv, HECC_CANTEC) > 127)
- cf->data[1] |= CAN_ERR_CRTL_TX_PASSIVE;
- if (hecc_read(priv, HECC_CANREC) > 127)
- cf->data[1] |= CAN_ERR_CRTL_RX_PASSIVE;
+ if (err_status & HECC_BUS_ERROR) {
+ /* propagate the error condition to the can stack */
+ skb = alloc_can_err_skb(ndev, &cf);
+ if (!skb) {
+ if (net_ratelimit())
+ netdev_err(priv->ndev,
+ "%s: alloc_can_err_skb() failed\n",
+ __func__);
+ return -ENOMEM;
}
- hecc_set_bit(priv, HECC_CANES, HECC_CANES_EP);
- netdev_dbg(priv->ndev, "Error passive interrupt\n");
- hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
- }
-
- /* Need to check busoff condition in error status register too to
- * ensure warning interrupts don't hog the system
- */
- if ((int_status & HECC_CANGIF_BOIF) || (err_status & HECC_CANES_BO)) {
- priv->can.state = CAN_STATE_BUS_OFF;
- cf->can_id |= CAN_ERR_BUSOFF;
- hecc_set_bit(priv, HECC_CANES, HECC_CANES_BO);
- hecc_clear_bit(priv, HECC_CANMC, HECC_CANMC_CCR);
- /* Disable all interrupts in bus-off to avoid int hog */
- hecc_write(priv, HECC_CANGIM, 0);
- ++priv->can.can_stats.bus_off;
- can_bus_off(ndev);
- }
- if (err_status & HECC_BUS_ERROR) {
++priv->can.can_stats.bus_error;
cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
- if (err_status & HECC_CANES_FE) {
- hecc_set_bit(priv, HECC_CANES, HECC_CANES_FE);
+ if (err_status & HECC_CANES_FE)
cf->data[2] |= CAN_ERR_PROT_FORM;
- }
- if (err_status & HECC_CANES_BE) {
- hecc_set_bit(priv, HECC_CANES, HECC_CANES_BE);
+ if (err_status & HECC_CANES_BE)
cf->data[2] |= CAN_ERR_PROT_BIT;
- }
- if (err_status & HECC_CANES_SE) {
- hecc_set_bit(priv, HECC_CANES, HECC_CANES_SE);
+ if (err_status & HECC_CANES_SE)
cf->data[2] |= CAN_ERR_PROT_STUFF;
- }
- if (err_status & HECC_CANES_CRCE) {
- hecc_set_bit(priv, HECC_CANES, HECC_CANES_CRCE);
+ if (err_status & HECC_CANES_CRCE)
cf->data[3] = CAN_ERR_PROT_LOC_CRC_SEQ;
- }
- if (err_status & HECC_CANES_ACKE) {
- hecc_set_bit(priv, HECC_CANES, HECC_CANES_ACKE);
+ if (err_status & HECC_CANES_ACKE)
cf->data[3] = CAN_ERR_PROT_LOC_ACK;
- }
+
+ timestamp = hecc_read(priv, HECC_CANLNT);
+ err = can_rx_offload_queue_sorted(&priv->offload, skb,
+ timestamp);
+ if (err)
+ ndev->stats.rx_fifo_errors++;
}
- timestamp = hecc_read(priv, HECC_CANLNT);
- can_rx_offload_queue_sorted(&priv->offload, skb, timestamp);
+ hecc_write(priv, HECC_CANES, HECC_CANES_FLAGS);
return 0;
}
+static void ti_hecc_change_state(struct net_device *ndev,
+ enum can_state rx_state,
+ enum can_state tx_state)
+{
+ struct ti_hecc_priv *priv = netdev_priv(ndev);
+ struct can_frame *cf;
+ struct sk_buff *skb;
+ u32 timestamp;
+ int err;
+
+ skb = alloc_can_err_skb(priv->ndev, &cf);
+ if (unlikely(!skb)) {
+ priv->can.state = max(tx_state, rx_state);
+ return;
+ }
+
+ can_change_state(priv->ndev, cf, tx_state, rx_state);
+
+ if (max(tx_state, rx_state) != CAN_STATE_BUS_OFF) {
+ cf->data[6] = hecc_read(priv, HECC_CANTEC);
+ cf->data[7] = hecc_read(priv, HECC_CANREC);
+ }
+
+ timestamp = hecc_read(priv, HECC_CANLNT);
+ err = can_rx_offload_queue_sorted(&priv->offload, skb, timestamp);
+ if (err)
+ ndev->stats.rx_fifo_errors++;
+}
+
static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id)
{
struct net_device *ndev = (struct net_device *)dev_id;
@@ -651,6 +666,7 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id)
struct net_device_stats *stats = &ndev->stats;
u32 mbxno, mbx_mask, int_status, err_status, stamp;
unsigned long flags, rx_pending;
+ u32 handled = 0;
int_status = hecc_read(priv,
priv->use_hecc1int ?
@@ -660,17 +676,66 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id)
return IRQ_NONE;
err_status = hecc_read(priv, HECC_CANES);
- if (err_status & (HECC_BUS_ERROR | HECC_CANES_BO |
- HECC_CANES_EP | HECC_CANES_EW))
+ if (unlikely(err_status & HECC_CANES_FLAGS))
ti_hecc_error(ndev, int_status, err_status);
+ if (unlikely(int_status & HECC_CANGIM_DEF_MASK)) {
+ enum can_state rx_state, tx_state;
+ u32 rec = hecc_read(priv, HECC_CANREC);
+ u32 tec = hecc_read(priv, HECC_CANTEC);
+
+ if (int_status & HECC_CANGIF_WLIF) {
+ handled |= HECC_CANGIF_WLIF;
+ rx_state = rec >= tec ? CAN_STATE_ERROR_WARNING : 0;
+ tx_state = rec <= tec ? CAN_STATE_ERROR_WARNING : 0;
+ netdev_dbg(priv->ndev, "Error Warning interrupt\n");
+ ti_hecc_change_state(ndev, rx_state, tx_state);
+ }
+
+ if (int_status & HECC_CANGIF_EPIF) {
+ handled |= HECC_CANGIF_EPIF;
+ rx_state = rec >= tec ? CAN_STATE_ERROR_PASSIVE : 0;
+ tx_state = rec <= tec ? CAN_STATE_ERROR_PASSIVE : 0;
+ netdev_dbg(priv->ndev, "Error passive interrupt\n");
+ ti_hecc_change_state(ndev, rx_state, tx_state);
+ }
+
+ if (int_status & HECC_CANGIF_BOIF) {
+ handled |= HECC_CANGIF_BOIF;
+ rx_state = CAN_STATE_BUS_OFF;
+ tx_state = CAN_STATE_BUS_OFF;
+ netdev_dbg(priv->ndev, "Bus off interrupt\n");
+
+ /* Disable all interrupts */
+ hecc_write(priv, HECC_CANGIM, 0);
+ can_bus_off(ndev);
+ ti_hecc_change_state(ndev, rx_state, tx_state);
+ }
+ } else if (unlikely(priv->can.state != CAN_STATE_ERROR_ACTIVE)) {
+ enum can_state new_state, tx_state, rx_state;
+ u32 rec = hecc_read(priv, HECC_CANREC);
+ u32 tec = hecc_read(priv, HECC_CANTEC);
+
+ if (rec >= 128 || tec >= 128)
+ new_state = CAN_STATE_ERROR_PASSIVE;
+ else if (rec >= 96 || tec >= 96)
+ new_state = CAN_STATE_ERROR_WARNING;
+ else
+ new_state = CAN_STATE_ERROR_ACTIVE;
+
+ if (new_state < priv->can.state) {
+ rx_state = rec >= tec ? new_state : 0;
+ tx_state = rec <= tec ? new_state : 0;
+ ti_hecc_change_state(ndev, rx_state, tx_state);
+ }
+ }
+
if (int_status & HECC_CANGIF_GMIF) {
while (priv->tx_tail - priv->tx_head > 0) {
mbxno = get_tx_tail_mb(priv);
mbx_mask = BIT(mbxno);
if (!(mbx_mask & hecc_read(priv, HECC_CANTA)))
break;
- hecc_clear_bit(priv, HECC_CANMIM, mbx_mask);
hecc_write(priv, HECC_CANTA, mbx_mask);
spin_lock_irqsave(&priv->mbx_lock, flags);
hecc_clear_bit(priv, HECC_CANME, mbx_mask);
@@ -695,16 +760,15 @@ static irqreturn_t ti_hecc_interrupt(int irq, void *dev_id)
while ((rx_pending = hecc_read(priv, HECC_CANRMP))) {
can_rx_offload_irq_offload_timestamp(&priv->offload,
rx_pending);
- hecc_write(priv, HECC_CANRMP, rx_pending);
}
}
/* clear all interrupt conditions - read back to avoid spurious ints */
if (priv->use_hecc1int) {
- hecc_write(priv, HECC_CANGIF1, HECC_SET_REG);
+ hecc_write(priv, HECC_CANGIF1, handled);
int_status = hecc_read(priv, HECC_CANGIF1);
} else {
- hecc_write(priv, HECC_CANGIF0, HECC_SET_REG);
+ hecc_write(priv, HECC_CANGIF0, handled);
int_status = hecc_read(priv, HECC_CANGIF0);
}
@@ -877,7 +941,7 @@ static int ti_hecc_probe(struct platform_device *pdev)
priv->offload.mailbox_read = ti_hecc_mailbox_read;
priv->offload.mb_first = HECC_RX_FIRST_MBOX;
- priv->offload.mb_last = HECC_MAX_TX_MBOX;
+ priv->offload.mb_last = HECC_RX_LAST_MBOX;
err = can_rx_offload_add_timestamp(ndev, &priv->offload);
if (err) {
dev_err(&pdev->dev, "can_rx_offload_add_timestamp() failed\n");
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index bd6eb9967630..2f74f6704c12 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -623,6 +623,7 @@ static int gs_can_open(struct net_device *netdev)
rc);
usb_unanchor_urb(urb);
+ usb_free_urb(urb);
break;
}
diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 19a702ac49e4..21faa2ec4632 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -876,9 +876,8 @@ static void mcba_usb_disconnect(struct usb_interface *intf)
netdev_info(priv->netdev, "device disconnected\n");
unregister_candev(priv->netdev);
- free_candev(priv->netdev);
-
mcba_urb_unlink(priv);
+ free_candev(priv->netdev);
}
static struct usb_driver mcba_usb_driver = {
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 617da295b6c1..d2539c95adb6 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -100,7 +100,7 @@ struct pcan_usb_msg_context {
u8 *end;
u8 rec_cnt;
u8 rec_idx;
- u8 rec_data_idx;
+ u8 rec_ts_idx;
struct net_device *netdev;
struct pcan_usb *pdev;
};
@@ -436,8 +436,8 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n,
}
if ((n & PCAN_USB_ERROR_BUS_LIGHT) == 0) {
/* no error (back to active state) */
- mc->pdev->dev.can.state = CAN_STATE_ERROR_ACTIVE;
- return 0;
+ new_state = CAN_STATE_ERROR_ACTIVE;
+ break;
}
break;
@@ -460,9 +460,9 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n,
}
if ((n & PCAN_USB_ERROR_BUS_HEAVY) == 0) {
- /* no error (back to active state) */
- mc->pdev->dev.can.state = CAN_STATE_ERROR_ACTIVE;
- return 0;
+ /* no error (back to warning state) */
+ new_state = CAN_STATE_ERROR_WARNING;
+ break;
}
break;
@@ -501,6 +501,11 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n,
mc->pdev->dev.can.can_stats.error_warning++;
break;
+ case CAN_STATE_ERROR_ACTIVE:
+ cf->can_id |= CAN_ERR_CRTL;
+ cf->data[1] = CAN_ERR_CRTL_ACTIVE;
+ break;
+
default:
/* CAN_STATE_MAX (trick to handle other errors) */
cf->can_id |= CAN_ERR_CRTL;
@@ -547,10 +552,15 @@ static int pcan_usb_decode_status(struct pcan_usb_msg_context *mc,
mc->ptr += PCAN_USB_CMD_ARGS;
if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) {
- int err = pcan_usb_decode_ts(mc, !mc->rec_idx);
+ int err = pcan_usb_decode_ts(mc, !mc->rec_ts_idx);
if (err)
return err;
+
+ /* Next packet in the buffer will have a timestamp on a single
+ * byte
+ */
+ mc->rec_ts_idx++;
}
switch (f) {
@@ -632,10 +642,13 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len)
cf->can_dlc = get_can_dlc(rec_len);
- /* first data packet timestamp is a word */
- if (pcan_usb_decode_ts(mc, !mc->rec_data_idx))
+ /* Only first packet timestamp is a word */
+ if (pcan_usb_decode_ts(mc, !mc->rec_ts_idx))
goto decode_failed;
+ /* Next packet in the buffer will have a timestamp on a single byte */
+ mc->rec_ts_idx++;
+
/* read data */
memset(cf->data, 0x0, sizeof(cf->data));
if (status_len & PCAN_USB_STATUSLEN_RTR) {
@@ -688,7 +701,6 @@ static int pcan_usb_decode_msg(struct peak_usb_device *dev, u8 *ibuf, u32 lbuf)
/* handle normal can frames here */
} else {
err = pcan_usb_decode_data(&mc, sl);
- mc.rec_data_idx++;
}
}
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 65dce642b86b..0b7766b715fd 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -750,7 +750,7 @@ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter,
dev = netdev_priv(netdev);
/* allocate a buffer large enough to send commands */
- dev->cmd_buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL);
+ dev->cmd_buf = kzalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL);
if (!dev->cmd_buf) {
err = -ENOMEM;
goto lbl_free_candev;
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index d596a2ad7f78..8fa224b28218 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -996,9 +996,8 @@ static void usb_8dev_disconnect(struct usb_interface *intf)
netdev_info(priv->netdev, "device disconnected\n");
unregister_netdev(priv->netdev);
- free_candev(priv->netdev);
-
unlink_all_urbs(priv);
+ free_candev(priv->netdev);
}
}
diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 911b34316c9d..7c482b2d78d2 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -1599,7 +1599,6 @@ static const struct xcan_devtype_data xcan_zynq_data = {
static const struct xcan_devtype_data xcan_axi_data = {
.cantype = XAXI_CAN,
- .flags = XCAN_FLAG_TXFEMP,
.bittiming_const = &xcan_bittiming_const,
.btr_ts2_shift = XCAN_BTR_TS2_SHIFT,
.btr_sjw_shift = XCAN_BTR_SJW_SHIFT,
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 526ba2ab66f1..cc3536315eff 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1845,7 +1845,6 @@ int b53_mirror_add(struct dsa_switch *ds, int port,
loc = B53_EG_MIR_CTL;
b53_read16(dev, B53_MGMT_PAGE, loc, &reg);
- reg &= ~MIRROR_MASK;
reg |= BIT(port);
b53_write16(dev, B53_MGMT_PAGE, loc, reg);
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index 26509fa37a50..69fc13046ac7 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -37,22 +37,11 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
unsigned int i;
u32 reg, offset;
- if (priv->type == BCM7445_DEVICE_ID)
- offset = CORE_STS_OVERRIDE_IMP;
- else
- offset = CORE_STS_OVERRIDE_IMP2;
-
/* Enable the port memories */
reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL);
reg &= ~P_TXQ_PSM_VDD(port);
core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
- /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
- reg = core_readl(priv, CORE_IMP_CTL);
- reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN);
- reg &= ~(RX_DIS | TX_DIS);
- core_writel(priv, reg, CORE_IMP_CTL);
-
/* Enable forwarding */
core_writel(priv, SW_FWDG_EN, CORE_SWMODE);
@@ -71,10 +60,27 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
b53_brcm_hdr_setup(ds, port);
- /* Force link status for IMP port */
- reg = core_readl(priv, offset);
- reg |= (MII_SW_OR | LINK_STS);
- core_writel(priv, reg, offset);
+ if (port == 8) {
+ if (priv->type == BCM7445_DEVICE_ID)
+ offset = CORE_STS_OVERRIDE_IMP;
+ else
+ offset = CORE_STS_OVERRIDE_IMP2;
+
+ /* Force link status for IMP port */
+ reg = core_readl(priv, offset);
+ reg |= (MII_SW_OR | LINK_STS);
+ core_writel(priv, reg, offset);
+
+ /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
+ reg = core_readl(priv, CORE_IMP_CTL);
+ reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN);
+ reg &= ~(RX_DIS | TX_DIS);
+ core_writel(priv, reg, CORE_IMP_CTL);
+ } else {
+ reg = core_readl(priv, CORE_G_PCTL_PORT(port));
+ reg &= ~(RX_DIS | TX_DIS);
+ core_writel(priv, reg, CORE_G_PCTL_PORT(port));
+ }
}
static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable)
@@ -1209,10 +1215,10 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
priv->wol_ports_mask = 0;
+ /* Disable interrupts */
+ bcm_sf2_intr_disable(priv);
dsa_unregister_switch(priv->dev->ds);
bcm_sf2_cfp_exit(priv->dev->ds);
- /* Disable all ports and interrupts */
- bcm_sf2_sw_suspend(priv->dev->ds);
bcm_sf2_mdio_unregister(priv);
return 0;
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index a23d3ffdf0c4..24a5e99f7fd5 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -1224,10 +1224,6 @@ static int ksz8795_switch_init(struct ksz_device *dev)
{
int i;
- mutex_init(&dev->stats_mutex);
- mutex_init(&dev->alu_mutex);
- mutex_init(&dev->vlan_mutex);
-
dev->ds->ops = &ksz8795_switch_ops;
for (i = 0; i < ARRAY_SIZE(ksz8795_switch_chips); i++) {
diff --git a/drivers/net/dsa/microchip/ksz8795_spi.c b/drivers/net/dsa/microchip/ksz8795_spi.c
index d0f8153e86b7..8b00f8e6c02f 100644
--- a/drivers/net/dsa/microchip/ksz8795_spi.c
+++ b/drivers/net/dsa/microchip/ksz8795_spi.c
@@ -25,6 +25,7 @@ KSZ_REGMAP_TABLE(ksz8795, 16, SPI_ADDR_SHIFT,
static int ksz8795_spi_probe(struct spi_device *spi)
{
+ struct regmap_config rc;
struct ksz_device *dev;
int i, ret;
@@ -33,9 +34,9 @@ static int ksz8795_spi_probe(struct spi_device *spi)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(ksz8795_regmap_config); i++) {
- dev->regmap[i] = devm_regmap_init_spi(spi,
- &ksz8795_regmap_config
- [i]);
+ rc = ksz8795_regmap_config[i];
+ rc.lock_arg = &dev->regmap_mutex;
+ dev->regmap[i] = devm_regmap_init_spi(spi, &rc);
if (IS_ERR(dev->regmap[i])) {
ret = PTR_ERR(dev->regmap[i]);
dev_err(&spi->dev,
diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c
index 0b1e01f0873d..fdffd9e0c518 100644
--- a/drivers/net/dsa/microchip/ksz9477_i2c.c
+++ b/drivers/net/dsa/microchip/ksz9477_i2c.c
@@ -17,6 +17,7 @@ KSZ_REGMAP_TABLE(ksz9477, not_used, 16, 0, 0);
static int ksz9477_i2c_probe(struct i2c_client *i2c,
const struct i2c_device_id *i2c_id)
{
+ struct regmap_config rc;
struct ksz_device *dev;
int i, ret;
@@ -25,8 +26,9 @@ static int ksz9477_i2c_probe(struct i2c_client *i2c,
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) {
- dev->regmap[i] = devm_regmap_init_i2c(i2c,
- &ksz9477_regmap_config[i]);
+ rc = ksz9477_regmap_config[i];
+ rc.lock_arg = &dev->regmap_mutex;
+ dev->regmap[i] = devm_regmap_init_i2c(i2c, &rc);
if (IS_ERR(dev->regmap[i])) {
ret = PTR_ERR(dev->regmap[i]);
dev_err(&i2c->dev,
diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h
index 2938e892b631..16939f29faa5 100644
--- a/drivers/net/dsa/microchip/ksz9477_reg.h
+++ b/drivers/net/dsa/microchip/ksz9477_reg.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- *
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
* Microchip KSZ9477 register definitions
*
* Copyright (C) 2017-2018 Microchip Technology Inc.
diff --git a/drivers/net/dsa/microchip/ksz9477_spi.c b/drivers/net/dsa/microchip/ksz9477_spi.c
index f4198d6f72be..c5f64959a184 100644
--- a/drivers/net/dsa/microchip/ksz9477_spi.c
+++ b/drivers/net/dsa/microchip/ksz9477_spi.c
@@ -24,6 +24,7 @@ KSZ_REGMAP_TABLE(ksz9477, 32, SPI_ADDR_SHIFT,
static int ksz9477_spi_probe(struct spi_device *spi)
{
+ struct regmap_config rc;
struct ksz_device *dev;
int i, ret;
@@ -32,8 +33,9 @@ static int ksz9477_spi_probe(struct spi_device *spi)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(ksz9477_regmap_config); i++) {
- dev->regmap[i] = devm_regmap_init_spi(spi,
- &ksz9477_regmap_config[i]);
+ rc = ksz9477_regmap_config[i];
+ rc.lock_arg = &dev->regmap_mutex;
+ dev->regmap[i] = devm_regmap_init_spi(spi, &rc);
if (IS_ERR(dev->regmap[i])) {
ret = PTR_ERR(dev->regmap[i]);
dev_err(&spi->dev,
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index b0b870f0c252..fe47180c908b 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -436,7 +436,7 @@ int ksz_switch_register(struct ksz_device *dev,
}
mutex_init(&dev->dev_mutex);
- mutex_init(&dev->stats_mutex);
+ mutex_init(&dev->regmap_mutex);
mutex_init(&dev->alu_mutex);
mutex_init(&dev->vlan_mutex);
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index dd60d0837fc6..a20ebb749377 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Microchip switch driver common header
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Microchip switch driver common header
*
* Copyright (C) 2017-2019 Microchip Technology Inc.
*/
@@ -47,7 +47,7 @@ struct ksz_device {
const char *name;
struct mutex dev_mutex; /* device access */
- struct mutex stats_mutex; /* status access */
+ struct mutex regmap_mutex; /* regmap access */
struct mutex alu_mutex; /* ALU access */
struct mutex vlan_mutex; /* vlan access */
const struct ksz_dev_ops *dev_ops;
@@ -290,6 +290,18 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset,
ksz_write32(dev, dev->dev_ops->get_port_addr(port, offset), data);
}
+static inline void ksz_regmap_lock(void *__mtx)
+{
+ struct mutex *mtx = __mtx;
+ mutex_lock(mtx);
+}
+
+static inline void ksz_regmap_unlock(void *__mtx)
+{
+ struct mutex *mtx = __mtx;
+ mutex_unlock(mtx);
+}
+
/* Regmap tables generation */
#define KSZ_SPI_OP_RD 3
#define KSZ_SPI_OP_WR 2
@@ -314,6 +326,8 @@ static inline void ksz_pwrite32(struct ksz_device *dev, int port, int offset,
.write_flag_mask = \
KSZ_SPI_OP_FLAG_MASK(KSZ_SPI_OP_WR, swp, \
regbits, regpad), \
+ .lock = ksz_regmap_lock, \
+ .unlock = ksz_regmap_unlock, \
.reg_format_endian = REGMAP_ENDIAN_BIG, \
.val_format_endian = REGMAP_ENDIAN_BIG \
}
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
index 073cbd0bb91b..d838c174dc0d 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.c
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -273,6 +273,19 @@ static int mv88e6352_ptp_enable_extts(struct mv88e6xxx_chip *chip,
int pin;
int err;
+ /* Reject requests with unsupported flags */
+ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
+ /* Reject requests to enable time stamping on both edges. */
+ if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+ (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+ (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+ return -EOPNOTSUPP;
+
pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index);
if (pin < 0)
diff --git a/drivers/net/dsa/sja1105/Kconfig b/drivers/net/dsa/sja1105/Kconfig
index f40b248f0b23..ffac0ea4e8d5 100644
--- a/drivers/net/dsa/sja1105/Kconfig
+++ b/drivers/net/dsa/sja1105/Kconfig
@@ -26,8 +26,8 @@ config NET_DSA_SJA1105_PTP
config NET_DSA_SJA1105_TAS
bool "Support for the Time-Aware Scheduler on NXP SJA1105"
- depends on NET_DSA_SJA1105
- depends on NET_SCH_TAPRIO
+ depends on NET_DSA_SJA1105 && NET_SCH_TAPRIO
+ depends on NET_SCH_TAPRIO=y || NET_DSA_SJA1105=m
help
This enables support for the TTEthernet-based egress scheduling
engine in the SJA1105 DSA driver, which is controlled using a
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index e53e494c22e0..fbb564c3beb8 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018, Sensor-Technik Wiedemann GmbH
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Sensor-Technik Wiedemann GmbH
* Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _SJA1105_H
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.h b/drivers/net/dsa/sja1105/sja1105_dynamic_config.h
index 740dadf43f01..1fc0d13dc623 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _SJA1105_DYNAMIC_CONFIG_H
#define _SJA1105_DYNAMIC_CONFIG_H
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index af456b0a4d27..394e12a6ad59 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _SJA1105_PTP_H
#define _SJA1105_PTP_H
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 7f87022a2d61..f4a5c5c04311 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright (c) 2016-2018, NXP Semiconductors
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Copyright (c) 2016-2018, NXP Semiconductors
* Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _SJA1105_STATIC_CONFIG_H
diff --git a/drivers/net/dsa/sja1105/sja1105_tas.h b/drivers/net/dsa/sja1105/sja1105_tas.h
index 0b803c30e640..0aad212d88b2 100644
--- a/drivers/net/dsa/sja1105/sja1105_tas.h
+++ b/drivers/net/dsa/sja1105/sja1105_tas.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019, Vladimir Oltean <olteanv@gmail.com>
*/
#ifndef _SJA1105_TAS_H
#define _SJA1105_TAS_H
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index b4a0fb281e69..bb65dd39f847 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -194,9 +194,7 @@ static void aq_ndev_set_multicast_settings(struct net_device *ndev)
{
struct aq_nic_s *aq_nic = netdev_priv(ndev);
- aq_nic_set_packet_filter(aq_nic, ndev->flags);
-
- aq_nic_set_multicast_list(aq_nic, ndev);
+ (void)aq_nic_set_multicast_list(aq_nic, ndev);
}
static int aq_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 8f66e7817811..137c1de4c6ec 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -631,9 +631,12 @@ err_exit:
int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev)
{
- unsigned int packet_filter = self->packet_filter;
+ const struct aq_hw_ops *hw_ops = self->aq_hw_ops;
+ struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+ unsigned int packet_filter = ndev->flags;
struct netdev_hw_addr *ha = NULL;
unsigned int i = 0U;
+ int err = 0;
self->mc_list.count = 0;
if (netdev_uc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
@@ -641,29 +644,28 @@ int aq_nic_set_multicast_list(struct aq_nic_s *self, struct net_device *ndev)
} else {
netdev_for_each_uc_addr(ha, ndev) {
ether_addr_copy(self->mc_list.ar[i++], ha->addr);
-
- if (i >= AQ_HW_MULTICAST_ADDRESS_MAX)
- break;
}
}
- if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
- packet_filter |= IFF_ALLMULTI;
- } else {
- netdev_for_each_mc_addr(ha, ndev) {
- ether_addr_copy(self->mc_list.ar[i++], ha->addr);
-
- if (i >= AQ_HW_MULTICAST_ADDRESS_MAX)
- break;
+ cfg->is_mc_list_enabled = !!(packet_filter & IFF_MULTICAST);
+ if (cfg->is_mc_list_enabled) {
+ if (i + netdev_mc_count(ndev) > AQ_HW_MULTICAST_ADDRESS_MAX) {
+ packet_filter |= IFF_ALLMULTI;
+ } else {
+ netdev_for_each_mc_addr(ha, ndev) {
+ ether_addr_copy(self->mc_list.ar[i++],
+ ha->addr);
+ }
}
}
if (i > 0 && i <= AQ_HW_MULTICAST_ADDRESS_MAX) {
- packet_filter |= IFF_MULTICAST;
self->mc_list.count = i;
- self->aq_hw_ops->hw_multicast_list_set(self->aq_hw,
- self->mc_list.ar,
- self->mc_list.count);
+ err = hw_ops->hw_multicast_list_set(self->aq_hw,
+ self->mc_list.ar,
+ self->mc_list.count);
+ if (err < 0)
+ return err;
}
return aq_nic_set_packet_filter(self, packet_filter);
}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 3901d7994ca1..76bdbe1596d6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -313,6 +313,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
break;
buff->is_error |= buff_->is_error;
+ buff->is_cso_err |= buff_->is_cso_err;
} while (!buff_->is_eop);
@@ -320,7 +321,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
err = 0;
goto err_exit;
}
- if (buff->is_error) {
+ if (buff->is_error || buff->is_cso_err) {
buff_ = buff;
do {
next_ = buff_->next,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 30f7fc4c97ff..2ad3fa6316ce 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -818,14 +818,15 @@ static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
cfg->is_vlan_force_promisc);
hw_atl_rpfl2multicast_flr_en_set(self,
- IS_FILTER_ENABLED(IFF_ALLMULTI), 0);
+ IS_FILTER_ENABLED(IFF_ALLMULTI) &&
+ IS_FILTER_ENABLED(IFF_MULTICAST), 0);
hw_atl_rpfl2_accept_all_mc_packets_set(self,
- IS_FILTER_ENABLED(IFF_ALLMULTI));
+ IS_FILTER_ENABLED(IFF_ALLMULTI) &&
+ IS_FILTER_ENABLED(IFF_MULTICAST));
hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST));
- cfg->is_mc_list_enabled = IS_FILTER_ENABLED(IFF_MULTICAST);
for (i = HW_ATL_B0_MAC_MIN; i < HW_ATL_B0_MAC_MAX; ++i)
hw_atl_rpfl2_uc_flr_en_set(self,
@@ -968,14 +969,26 @@ static int hw_atl_b0_hw_interrupt_moderation_set(struct aq_hw_s *self)
static int hw_atl_b0_hw_stop(struct aq_hw_s *self)
{
+ int err;
+ u32 val;
+
hw_atl_b0_hw_irq_disable(self, HW_ATL_B0_INT_MASK);
/* Invalidate Descriptor Cache to prevent writing to the cached
* descriptors and to the data pointer of those descriptors
*/
- hw_atl_rdm_rx_dma_desc_cache_init_set(self, 1);
+ hw_atl_rdm_rx_dma_desc_cache_init_tgl(self);
- return aq_hw_err_from_flags(self);
+ err = aq_hw_err_from_flags(self);
+
+ if (err)
+ goto err_exit;
+
+ readx_poll_timeout_atomic(hw_atl_rdm_rx_dma_desc_cache_init_done_get,
+ self, val, val == 1, 1000U, 10000U);
+
+err_exit:
+ return err;
}
static int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index 1149812ae463..6f340695e6bd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -606,12 +606,25 @@ void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw, u32 rx_flow_ctl_mode
HW_ATL_RPB_RX_FC_MODE_SHIFT, rx_flow_ctl_mode);
}
-void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init)
+void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw)
{
+ u32 val;
+
+ val = aq_hw_read_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR,
+ HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK,
+ HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT);
+
aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_ADR,
HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_MSK,
HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_SHIFT,
- init);
+ val ^ 1);
+}
+
+u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw)
+{
+ return aq_hw_read_reg_bit(aq_hw, RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR,
+ RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK,
+ RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT);
}
void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index 0c37abbabca5..c3ee278c3747 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
@@ -313,8 +313,11 @@ void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
u32 rx_pkt_buff_size_per_tc,
u32 buffer);
-/* set rdm rx dma descriptor cache init */
-void hw_atl_rdm_rx_dma_desc_cache_init_set(struct aq_hw_s *aq_hw, u32 init);
+/* toggle rdm rx dma descriptor cache init */
+void hw_atl_rdm_rx_dma_desc_cache_init_tgl(struct aq_hw_s *aq_hw);
+
+/* get rdm rx dma descriptor cache init done */
+u32 hw_atl_rdm_rx_dma_desc_cache_init_done_get(struct aq_hw_s *aq_hw);
/* set rx xoff enable (per tc) */
void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw, u32 rx_xoff_en_per_tc,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index c3febcdfa92e..35887ad89025 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -318,6 +318,25 @@
/* default value of bitfield rdm_desc_init_i */
#define HW_ATL_RDM_RX_DMA_DESC_CACHE_INIT_DEFAULT 0x0
+/* rdm_desc_init_done_i bitfield definitions
+ * preprocessor definitions for the bitfield rdm_desc_init_done_i.
+ * port="pif_rdm_desc_init_done_i"
+ */
+
+/* register address for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_ADR 0x00005a10
+/* bitmask for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSK 0x00000001U
+/* inverted bitmask for bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_MSKN 0xfffffffe
+/* lower bit position of bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_SHIFT 0U
+/* width of bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_WIDTH 1
+/* default value of bitfield rdm_desc_init_done_i */
+#define RDM_RX_DMA_DESC_CACHE_INIT_DONE_DEFAULT 0x0
+
+
/* rx int_desc_wrb_en bitfield definitions
* preprocessor definitions for the bitfield "int_desc_wrb_en".
* port="pif_rdm_int_desc_wrb_en_i"
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index da726489e3c8..7bc51f8d6f2f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -337,7 +337,7 @@ static int aq_fw2x_get_phy_temp(struct aq_hw_s *self, int *temp)
/* Convert PHY temperature from 1/256 degree Celsius
* to 1/1000 degree Celsius.
*/
- *temp = temp_res * 1000 / 256;
+ *temp = (temp_res & 0xFFFF) * 1000 / 256;
return 0;
}
diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c
index 42d2e1b02c44..664d664e0925 100644
--- a/drivers/net/ethernet/arc/emac_rockchip.c
+++ b/drivers/net/ethernet/arc/emac_rockchip.c
@@ -256,6 +256,9 @@ static int emac_rockchip_remove(struct platform_device *pdev)
if (priv->regulator)
regulator_disable(priv->regulator);
+ if (priv->soc_data->need_div_macclk)
+ clk_disable_unprepare(priv->macclk);
+
free_netdev(ndev);
return err;
}
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index e24f5d2b6afe..53055ce5dfd6 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -8,7 +8,6 @@ config NET_VENDOR_BROADCOM
default y
depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \
SIBYTE_SB1xxx_SOC
- select DIMLIB
---help---
If you have a network (Ethernet) chipset belonging to this class,
say Y.
@@ -69,6 +68,7 @@ config BCMGENET
select FIXED_PHY
select BCM7XXX_PHY
select MDIO_BCM_UNIMAC
+ select DIMLIB
help
This driver supports the built-in Ethernet MACs found in the
Broadcom BCM7xxx Set Top Box family chipset.
@@ -188,6 +188,7 @@ config SYSTEMPORT
select MII
select PHYLIB
select FIXED_PHY
+ select DIMLIB
help
This driver supports the built-in Ethernet MACs found in the
Broadcom BCM7xxx Set Top Box family chipset using an internal
@@ -200,6 +201,7 @@ config BNXT
select LIBCRC32C
select NET_DEVLINK
select PAGE_POOL
+ select DIMLIB
---help---
This driver supports Broadcom NetXtreme-C/E 10/25/40/50 gigabit
Ethernet cards. To compile this driver as a module, choose M here:
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index b4a8cf620a0c..04ec909e06df 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -10382,7 +10382,8 @@ static void bnxt_cleanup_pci(struct bnxt *bp)
{
bnxt_unmap_bars(bp, bp->pdev);
pci_release_regions(bp->pdev);
- pci_disable_device(bp->pdev);
+ if (pci_is_enabled(bp->pdev))
+ pci_disable_device(bp->pdev);
}
static void bnxt_init_dflt_coal(struct bnxt *bp)
@@ -10669,14 +10670,11 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bp->fw_reset_state = BNXT_FW_RESET_STATE_RESET_FW;
}
/* fall through */
- case BNXT_FW_RESET_STATE_RESET_FW: {
- u32 wait_dsecs = bp->fw_health->post_reset_wait_dsecs;
-
+ case BNXT_FW_RESET_STATE_RESET_FW:
bnxt_reset_all(bp);
bp->fw_reset_state = BNXT_FW_RESET_STATE_ENABLE_DEV;
- bnxt_queue_fw_reset_work(bp, wait_dsecs * HZ / 10);
+ bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
return;
- }
case BNXT_FW_RESET_STATE_ENABLE_DEV:
if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) &&
bp->fw_health) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index e664392dccc0..7151244f8c7d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -29,25 +29,20 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
health_status = val & 0xffff;
- if (health_status == BNXT_FW_STATUS_HEALTHY) {
- rc = devlink_fmsg_string_pair_put(fmsg, "FW status",
- "Healthy;");
- if (rc)
- return rc;
- } else if (health_status < BNXT_FW_STATUS_HEALTHY) {
- rc = devlink_fmsg_string_pair_put(fmsg, "FW status",
- "Not yet completed initialization;");
+ if (health_status < BNXT_FW_STATUS_HEALTHY) {
+ rc = devlink_fmsg_string_pair_put(fmsg, "Description",
+ "Not yet completed initialization");
if (rc)
return rc;
} else if (health_status > BNXT_FW_STATUS_HEALTHY) {
- rc = devlink_fmsg_string_pair_put(fmsg, "FW status",
- "Encountered fatal error and cannot recover;");
+ rc = devlink_fmsg_string_pair_put(fmsg, "Description",
+ "Encountered fatal error and cannot recover");
if (rc)
return rc;
}
if (val >> 16) {
- rc = devlink_fmsg_u32_pair_put(fmsg, "Error", val >> 16);
+ rc = devlink_fmsg_u32_pair_put(fmsg, "Error code", val >> 16);
if (rc)
return rc;
}
@@ -215,25 +210,68 @@ enum bnxt_dl_param_id {
static const struct bnxt_dl_nvm_param nvm_params[] = {
{DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV, NVM_OFF_ENABLE_SRIOV,
- BNXT_NVM_SHARED_CFG, 1},
+ BNXT_NVM_SHARED_CFG, 1, 1},
{DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, NVM_OFF_IGNORE_ARI,
- BNXT_NVM_SHARED_CFG, 1},
+ BNXT_NVM_SHARED_CFG, 1, 1},
{DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX,
- NVM_OFF_MSIX_VEC_PER_PF_MAX, BNXT_NVM_SHARED_CFG, 10},
+ NVM_OFF_MSIX_VEC_PER_PF_MAX, BNXT_NVM_SHARED_CFG, 10, 4},
{DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN,
- NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7},
+ NVM_OFF_MSIX_VEC_PER_PF_MIN, BNXT_NVM_SHARED_CFG, 7, 4},
{BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK, NVM_OFF_DIS_GRE_VER_CHECK,
- BNXT_NVM_SHARED_CFG, 1},
+ BNXT_NVM_SHARED_CFG, 1, 1},
};
+union bnxt_nvm_data {
+ u8 val8;
+ __le32 val32;
+};
+
+static void bnxt_copy_to_nvm_data(union bnxt_nvm_data *dst,
+ union devlink_param_value *src,
+ int nvm_num_bits, int dl_num_bytes)
+{
+ u32 val32 = 0;
+
+ if (nvm_num_bits == 1) {
+ dst->val8 = src->vbool;
+ return;
+ }
+ if (dl_num_bytes == 4)
+ val32 = src->vu32;
+ else if (dl_num_bytes == 2)
+ val32 = (u32)src->vu16;
+ else if (dl_num_bytes == 1)
+ val32 = (u32)src->vu8;
+ dst->val32 = cpu_to_le32(val32);
+}
+
+static void bnxt_copy_from_nvm_data(union devlink_param_value *dst,
+ union bnxt_nvm_data *src,
+ int nvm_num_bits, int dl_num_bytes)
+{
+ u32 val32;
+
+ if (nvm_num_bits == 1) {
+ dst->vbool = src->val8;
+ return;
+ }
+ val32 = le32_to_cpu(src->val32);
+ if (dl_num_bytes == 4)
+ dst->vu32 = val32;
+ else if (dl_num_bytes == 2)
+ dst->vu16 = (u16)val32;
+ else if (dl_num_bytes == 1)
+ dst->vu8 = (u8)val32;
+}
+
static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
int msg_len, union devlink_param_value *val)
{
struct hwrm_nvm_get_variable_input *req = msg;
- void *data_addr = NULL, *buf = NULL;
struct bnxt_dl_nvm_param nvm_param;
- int bytesize, idx = 0, rc, i;
+ union bnxt_nvm_data *data;
dma_addr_t data_dma_addr;
+ int idx = 0, rc, i;
/* Get/Set NVM CFG parameter is supported only on PFs */
if (BNXT_VF(bp))
@@ -254,47 +292,31 @@ static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg,
else if (nvm_param.dir_type == BNXT_NVM_FUNC_CFG)
idx = bp->pf.fw_fid - BNXT_FIRST_PF_FID;
- bytesize = roundup(nvm_param.num_bits, BITS_PER_BYTE) / BITS_PER_BYTE;
- switch (bytesize) {
- case 1:
- if (nvm_param.num_bits == 1)
- buf = &val->vbool;
- else
- buf = &val->vu8;
- break;
- case 2:
- buf = &val->vu16;
- break;
- case 4:
- buf = &val->vu32;
- break;
- default:
- return -EFAULT;
- }
-
- data_addr = dma_alloc_coherent(&bp->pdev->dev, bytesize,
- &data_dma_addr, GFP_KERNEL);
- if (!data_addr)
+ data = dma_alloc_coherent(&bp->pdev->dev, sizeof(*data),
+ &data_dma_addr, GFP_KERNEL);
+ if (!data)
return -ENOMEM;
req->dest_data_addr = cpu_to_le64(data_dma_addr);
- req->data_len = cpu_to_le16(nvm_param.num_bits);
+ req->data_len = cpu_to_le16(nvm_param.nvm_num_bits);
req->option_num = cpu_to_le16(nvm_param.offset);
req->index_0 = cpu_to_le16(idx);
if (idx)
req->dimensions = cpu_to_le16(1);
if (req->req_type == cpu_to_le16(HWRM_NVM_SET_VARIABLE)) {
- memcpy(data_addr, buf, bytesize);
+ bnxt_copy_to_nvm_data(data, val, nvm_param.nvm_num_bits,
+ nvm_param.dl_num_bytes);
rc = hwrm_send_message(bp, msg, msg_len, HWRM_CMD_TIMEOUT);
} else {
rc = hwrm_send_message_silent(bp, msg, msg_len,
HWRM_CMD_TIMEOUT);
+ if (!rc)
+ bnxt_copy_from_nvm_data(val, data,
+ nvm_param.nvm_num_bits,
+ nvm_param.dl_num_bytes);
}
- if (!rc && req->req_type == cpu_to_le16(HWRM_NVM_GET_VARIABLE))
- memcpy(buf, data_addr, bytesize);
-
- dma_free_coherent(&bp->pdev->dev, bytesize, data_addr, data_dma_addr);
+ dma_free_coherent(&bp->pdev->dev, sizeof(*data), data, data_dma_addr);
if (rc == -EACCES)
netdev_err(bp->dev, "PF does not have admin privileges to modify NVM config\n");
return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
index b97e0baeb42d..2f4fd0a7d04b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
@@ -52,7 +52,8 @@ struct bnxt_dl_nvm_param {
u16 id;
u16 offset;
u16 dir_type;
- u16 num_bits;
+ u16 nvm_num_bits;
+ u8 dl_num_bytes;
};
void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 12cb77ef1081..1de51811fcb4 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -1996,8 +1996,6 @@ static void reset_umac(struct bcmgenet_priv *priv)
/* issue soft reset with (rg)mii loopback to ensure a stable rxclk */
bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD);
- udelay(2);
- bcmgenet_umac_writel(priv, 0, UMAC_CMD);
}
static void bcmgenet_intr_disable(struct bcmgenet_priv *priv)
@@ -2018,6 +2016,8 @@ static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv)
*/
if (priv->internal_phy) {
int0_enable |= UMAC_IRQ_LINK_EVENT;
+ if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
+ int0_enable |= UMAC_IRQ_PHY_DET_R;
} else if (priv->ext_phy) {
int0_enable |= UMAC_IRQ_LINK_EVENT;
} else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) {
@@ -2611,11 +2611,16 @@ static void bcmgenet_irq_task(struct work_struct *work)
priv->irq0_stat = 0;
spin_unlock_irq(&priv->lock);
+ if (status & UMAC_IRQ_PHY_DET_R &&
+ priv->dev->phydev->autoneg != AUTONEG_ENABLE) {
+ phy_init_hw(priv->dev->phydev);
+ genphy_config_aneg(priv->dev->phydev);
+ }
+
/* Link UP/DOWN event */
- if (status & UMAC_IRQ_LINK_EVENT) {
- priv->dev->phydev->link = !!(status & UMAC_IRQ_LINK_UP);
+ if (status & UMAC_IRQ_LINK_EVENT)
phy_mac_interrupt(priv->dev->phydev);
- }
+
}
/* bcmgenet_isr1: handle Rx and Tx priority queues */
@@ -2710,7 +2715,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
}
/* all other interested interrupts handled in bottom half */
- status &= UMAC_IRQ_LINK_EVENT;
+ status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_PHY_DET_R);
if (status) {
/* Save irq status for bottom-half processing. */
spin_lock_irqsave(&priv->lock, flags);
@@ -3631,6 +3636,7 @@ static int bcmgenet_resume(struct device *d)
phy_init_hw(dev->phydev);
/* Speed settings must be restored */
+ genphy_config_aneg(dev->phydev);
bcmgenet_mii_config(priv->dev, false);
bcmgenet_set_hw_addr(priv, dev->dev_addr);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 4a8fc03d82fd..dbc69d8fa05f 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -366,6 +366,7 @@ struct bcmgenet_mib_counters {
#define EXT_PWR_DOWN_PHY_EN (1 << 20)
#define EXT_RGMII_OOB_CTRL 0x0C
+#define RGMII_MODE_EN_V123 (1 << 0)
#define RGMII_LINK (1 << 4)
#define OOB_DISABLE (1 << 5)
#define RGMII_MODE_EN (1 << 6)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 970e478a9017..dbe18cdf6c1b 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -181,8 +181,38 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
const char *phy_name = NULL;
u32 id_mode_dis = 0;
u32 port_ctrl;
+ int bmcr = -1;
+ int ret;
u32 reg;
+ /* MAC clocking workaround during reset of umac state machines */
+ reg = bcmgenet_umac_readl(priv, UMAC_CMD);
+ if (reg & CMD_SW_RESET) {
+ /* An MII PHY must be isolated to prevent TXC contention */
+ if (priv->phy_interface == PHY_INTERFACE_MODE_MII) {
+ ret = phy_read(phydev, MII_BMCR);
+ if (ret >= 0) {
+ bmcr = ret;
+ ret = phy_write(phydev, MII_BMCR,
+ bmcr | BMCR_ISOLATE);
+ }
+ if (ret) {
+ netdev_err(dev, "failed to isolate PHY\n");
+ return ret;
+ }
+ }
+ /* Switch MAC clocking to RGMII generated clock */
+ bcmgenet_sys_writel(priv, PORT_MODE_EXT_GPHY, SYS_PORT_CTRL);
+ /* Ensure 5 clks with Rx disabled
+ * followed by 5 clks with Reset asserted
+ */
+ udelay(4);
+ reg &= ~(CMD_SW_RESET | CMD_LCL_LOOP_EN);
+ bcmgenet_umac_writel(priv, reg, UMAC_CMD);
+ /* Ensure 5 more clocks before Rx is enabled */
+ udelay(2);
+ }
+
priv->ext_phy = !priv->internal_phy &&
(priv->phy_interface != PHY_INTERFACE_MODE_MOCA);
@@ -214,6 +244,9 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
phy_set_max_speed(phydev, SPEED_100);
bcmgenet_sys_writel(priv,
PORT_MODE_EXT_EPHY, SYS_PORT_CTRL);
+ /* Restore the MII PHY after isolation */
+ if (bmcr >= 0)
+ phy_write(phydev, MII_BMCR, bmcr);
break;
case PHY_INTERFACE_MODE_REVMII:
@@ -258,7 +291,11 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
*/
if (priv->ext_phy) {
reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
- reg |= RGMII_MODE_EN | id_mode_dis;
+ reg |= id_mode_dis;
+ if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv))
+ reg |= RGMII_MODE_EN_V123;
+ else
+ reg |= RGMII_MODE_EN;
bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
}
@@ -273,11 +310,12 @@ int bcmgenet_mii_probe(struct net_device *dev)
struct bcmgenet_priv *priv = netdev_priv(dev);
struct device_node *dn = priv->pdev->dev.of_node;
struct phy_device *phydev;
- u32 phy_flags;
+ u32 phy_flags = 0;
int ret;
/* Communicate the integrated PHY revision */
- phy_flags = priv->gphy_rev;
+ if (priv->internal_phy)
+ phy_flags = priv->gphy_rev;
/* Initialize link state variables that bcmgenet_mii_setup() uses */
priv->old_link = -1;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 77f3511b97de..ca3aa1250dd1 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6280,6 +6280,10 @@ static int tg3_ptp_enable(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
+ /* Reject requests with unsupported flags */
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
+
if (rq->perout.index != 0)
return -EINVAL;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 8e8d557901a9..1e1b774e1953 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3405,17 +3405,17 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
return err;
}
- *tx_clk = devm_clk_get(&pdev->dev, "tx_clk");
+ *tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk");
if (IS_ERR(*tx_clk))
- *tx_clk = NULL;
+ return PTR_ERR(*tx_clk);
- *rx_clk = devm_clk_get(&pdev->dev, "rx_clk");
+ *rx_clk = devm_clk_get_optional(&pdev->dev, "rx_clk");
if (IS_ERR(*rx_clk))
- *rx_clk = NULL;
+ return PTR_ERR(*rx_clk);
- *tsu_clk = devm_clk_get(&pdev->dev, "tsu_clk");
+ *tsu_clk = devm_clk_get_optional(&pdev->dev, "tsu_clk");
if (IS_ERR(*tsu_clk))
- *tsu_clk = NULL;
+ return PTR_ERR(*tsu_clk);
err = clk_prepare_enable(*pclk);
if (err) {
diff --git a/drivers/net/ethernet/cavium/common/cavium_ptp.h b/drivers/net/ethernet/cavium/common/cavium_ptp.h
index be2bafc7beeb..a04eccbc78e8 100644
--- a/drivers/net/ethernet/cavium/common/cavium_ptp.h
+++ b/drivers/net/ethernet/cavium/common/cavium_ptp.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/* cavium_ptp.h - PTP 1588 clock on Cavium hardware
* Copyright (c) 2003-2015, 2017 Cavium, Inc.
*/
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 0e5de88fd6e8..cdd7e5da4a74 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -1499,7 +1499,7 @@ static int octeon_mgmt_probe(struct platform_device *pdev)
netdev->ethtool_ops = &octeon_mgmt_ethtool_ops;
netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM;
- netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM;
+ netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN;
mac = of_get_mac_address(pdev->dev.of_node);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index a4dead4ab0ed..86b528d8364c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -695,10 +695,10 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
lld->write_cmpl_support = adap->params.write_cmpl_support;
}
-static void uld_attach(struct adapter *adap, unsigned int uld)
+static int uld_attach(struct adapter *adap, unsigned int uld)
{
- void *handle;
struct cxgb4_lld_info lli;
+ void *handle;
uld_init(adap, &lli);
uld_queue_init(adap, uld, &lli);
@@ -708,7 +708,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
dev_warn(adap->pdev_dev,
"could not attach to the %s driver, error %ld\n",
adap->uld[uld].name, PTR_ERR(handle));
- return;
+ return PTR_ERR(handle);
}
adap->uld[uld].handle = handle;
@@ -716,22 +716,22 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
if (adap->flags & CXGB4_FULL_INIT_DONE)
adap->uld[uld].state_change(handle, CXGB4_STATE_UP);
+
+ return 0;
}
-/**
- * cxgb4_register_uld - register an upper-layer driver
- * @type: the ULD type
- * @p: the ULD methods
+/* cxgb4_register_uld - register an upper-layer driver
+ * @type: the ULD type
+ * @p: the ULD methods
*
- * Registers an upper-layer driver with this driver and notifies the ULD
- * about any presently available devices that support its type. Returns
- * %-EBUSY if a ULD of the same type is already registered.
+ * Registers an upper-layer driver with this driver and notifies the ULD
+ * about any presently available devices that support its type.
*/
void cxgb4_register_uld(enum cxgb4_uld type,
const struct cxgb4_uld_info *p)
{
- int ret = 0;
struct adapter *adap;
+ int ret = 0;
if (type >= CXGB4_ULD_MAX)
return;
@@ -763,8 +763,12 @@ void cxgb4_register_uld(enum cxgb4_uld type,
if (ret)
goto free_irq;
adap->uld[type] = *p;
- uld_attach(adap, type);
+ ret = uld_attach(adap, type);
+ if (ret)
+ goto free_txq;
continue;
+free_txq:
+ release_sge_txq_uld(adap, type);
free_irq:
if (adap->flags & CXGB4_FULL_INIT_DONE)
quiesce_rx_uld(adap, type);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index b3da81e90132..928bfea5457b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -3791,15 +3791,11 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
* write the CIDX Updates into the Status Page at the end of the
* TX Queue.
*/
- c.autoequiqe_to_viid = htonl((dbqt
- ? FW_EQ_ETH_CMD_AUTOEQUIQE_F
- : FW_EQ_ETH_CMD_AUTOEQUEQE_F) |
+ c.autoequiqe_to_viid = htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F |
FW_EQ_ETH_CMD_VIID_V(pi->viid));
c.fetchszm_to_iqid =
- htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(dbqt
- ? HOSTFCMODE_INGRESS_QUEUE_X
- : HOSTFCMODE_STATUS_PAGE_X) |
+ htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X) |
FW_EQ_ETH_CMD_PCIECHN_V(pi->tx_chan) |
FW_EQ_ETH_CMD_FETCHRO_F | FW_EQ_ETH_CMD_IQID_V(iqid));
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index f1a0c4dceda0..f37c9a08c4cf 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -763,6 +763,7 @@ static int ep93xx_eth_remove(struct platform_device *pdev)
{
struct net_device *dev;
struct ep93xx_priv *ep;
+ struct resource *mem;
dev = platform_get_drvdata(pdev);
if (dev == NULL)
@@ -778,8 +779,8 @@ static int ep93xx_eth_remove(struct platform_device *pdev)
iounmap(ep->base_addr);
if (ep->res != NULL) {
- release_resource(ep->res);
- kfree(ep->res);
+ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ release_mem_region(mem->start, resource_size(mem));
}
free_netdev(dev);
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index e736ce2c58ca..a8f4c69252ff 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -2524,6 +2524,7 @@ static int gemini_ethernet_port_remove(struct platform_device *pdev)
struct gemini_ethernet_port *port = platform_get_drvdata(pdev);
gemini_port_remove(port);
+ free_netdev(port->netdev);
return 0;
}
diff --git a/drivers/net/ethernet/cortina/gemini.h b/drivers/net/ethernet/cortina/gemini.h
index 0b12f89bf89a..9fdf77d5eb37 100644
--- a/drivers/net/ethernet/cortina/gemini.h
+++ b/drivers/net/ethernet/cortina/gemini.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/* Register definitions for Gemini GMAC Ethernet device driver
*
* Copyright (C) 2006 Storlink, Corp.
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 9b7af94a40bb..96e9565f1e08 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -727,6 +727,18 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb,
*/
nfrags = skb_shinfo(skb)->nr_frags;
+ /* Setup HW checksumming */
+ csum_vlan = 0;
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ !ftgmac100_prep_tx_csum(skb, &csum_vlan))
+ goto drop;
+
+ /* Add VLAN tag */
+ if (skb_vlan_tag_present(skb)) {
+ csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG;
+ csum_vlan |= skb_vlan_tag_get(skb) & 0xffff;
+ }
+
/* Get header len */
len = skb_headlen(skb);
@@ -753,19 +765,6 @@ static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb,
if (nfrags == 0)
f_ctl_stat |= FTGMAC100_TXDES0_LTS;
txdes->txdes3 = cpu_to_le32(map);
-
- /* Setup HW checksumming */
- csum_vlan = 0;
- if (skb->ip_summed == CHECKSUM_PARTIAL &&
- !ftgmac100_prep_tx_csum(skb, &csum_vlan))
- goto drop;
-
- /* Add VLAN tag */
- if (skb_vlan_tag_present(skb)) {
- csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG;
- csum_vlan |= skb_vlan_tag_get(skb) & 0xffff;
- }
-
txdes->txdes1 = cpu_to_le32(csum_vlan);
/* Next descriptor */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 162d7d8fb295..bf5add954181 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1235,6 +1235,8 @@ static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable)
priv->rx_td_enabled = enable;
}
+static void update_tx_fqids(struct dpaa2_eth_priv *priv);
+
static int link_state_update(struct dpaa2_eth_priv *priv)
{
struct dpni_link_state state = {0};
@@ -1261,6 +1263,7 @@ static int link_state_update(struct dpaa2_eth_priv *priv)
goto out;
if (state.up) {
+ update_tx_fqids(priv);
netif_carrier_on(priv->net_dev);
netif_tx_start_all_queues(priv->net_dev);
} else {
@@ -2229,8 +2232,16 @@ err_set_cdan:
err_service_reg:
free_channel(priv, channel);
err_alloc_ch:
- if (err == -EPROBE_DEFER)
+ if (err == -EPROBE_DEFER) {
+ for (i = 0; i < priv->num_channels; i++) {
+ channel = priv->channel[i];
+ nctx = &channel->nctx;
+ dpaa2_io_service_deregister(channel->dpio, nctx, dev);
+ free_channel(priv, channel);
+ }
+ priv->num_channels = 0;
return err;
+ }
if (cpumask_empty(&priv->dpio_cpumask)) {
dev_err(dev, "No cpu with an affine DPIO/DPCON\n");
@@ -2533,6 +2544,47 @@ static int set_pause(struct dpaa2_eth_priv *priv)
return 0;
}
+static void update_tx_fqids(struct dpaa2_eth_priv *priv)
+{
+ struct dpni_queue_id qid = {0};
+ struct dpaa2_eth_fq *fq;
+ struct dpni_queue queue;
+ int i, j, err;
+
+ /* We only use Tx FQIDs for FQID-based enqueue, so check
+ * if DPNI version supports it before updating FQIDs
+ */
+ if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR,
+ DPNI_ENQUEUE_FQID_VER_MINOR) < 0)
+ return;
+
+ for (i = 0; i < priv->num_fqs; i++) {
+ fq = &priv->fq[i];
+ if (fq->type != DPAA2_TX_CONF_FQ)
+ continue;
+ for (j = 0; j < dpaa2_eth_tc_count(priv); j++) {
+ err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
+ DPNI_QUEUE_TX, j, fq->flowid,
+ &queue, &qid);
+ if (err)
+ goto out_err;
+
+ fq->tx_fqid[j] = qid.fqid;
+ if (fq->tx_fqid[j] == 0)
+ goto out_err;
+ }
+ }
+
+ priv->enqueue = dpaa2_eth_enqueue_fq;
+
+ return;
+
+out_err:
+ netdev_info(priv->net_dev,
+ "Error reading Tx FQID, fallback to QDID-based enqueue\n");
+ priv->enqueue = dpaa2_eth_enqueue_qd;
+}
+
/* Configure the DPNI object this interface is associated with */
static int setup_dpni(struct fsl_mc_device *ls_dev)
{
@@ -3306,6 +3358,9 @@ static irqreturn_t dpni_irq0_handler_thread(int irq_num, void *arg)
if (status & DPNI_IRQ_EVENT_LINK_CHANGED)
link_state_update(netdev_priv(net_dev));
+ if (status & DPNI_IRQ_EVENT_ENDPOINT_CHANGED)
+ set_mac_addr(netdev_priv(net_dev));
+
return IRQ_HANDLED;
}
@@ -3331,7 +3386,8 @@ static int setup_irqs(struct fsl_mc_device *ls_dev)
}
err = dpni_set_irq_mask(ls_dev->mc_io, 0, ls_dev->mc_handle,
- DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED);
+ DPNI_IRQ_INDEX, DPNI_IRQ_EVENT_LINK_CHANGED |
+ DPNI_IRQ_EVENT_ENDPOINT_CHANGED);
if (err < 0) {
dev_err(&ls_dev->dev, "dpni_set_irq_mask(): %d\n", err);
goto free_irq;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h
index ff2e177395d4..df2458a5e9ef 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2018 NXP
*/
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index fd583911b6c0..ee0711d06b3a 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -133,9 +133,12 @@ int dpni_reset(struct fsl_mc_io *mc_io,
*/
#define DPNI_IRQ_INDEX 0
/**
- * IRQ event - indicates a change in link state
+ * IRQ events:
+ * indicates a change in link state
+ * indicates a change in endpoint
*/
#define DPNI_IRQ_EVENT_LINK_CHANGED 0x00000001
+#define DPNI_IRQ_EVENT_ENDPOINT_CHANGED 0x00000002
int dpni_set_irq_enable(struct fsl_mc_io *mc_io,
u32 cmd_flags,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
index 720cd50f5895..4ac05bfef338 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2013-2016 Freescale Semiconductor Inc.
* Copyright 2016-2018 NXP
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
index be7914c1634d..311c184e1aef 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright 2013-2016 Freescale Semiconductor Inc.
* Copyright 2016-2018 NXP
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index d4d4c72adf49..4bb30761abfc 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3558,7 +3558,7 @@ fec_probe(struct platform_device *pdev)
for (i = 0; i < irq_cnt; i++) {
snprintf(irq_name, sizeof(irq_name), "int%d", i);
- irq = platform_get_irq_byname(pdev, irq_name);
+ irq = platform_get_irq_byname_optional(pdev, irq_name);
if (irq < 0)
irq = platform_get_irq(pdev, i);
if (irq < 0) {
@@ -3636,6 +3636,11 @@ fec_drv_remove(struct platform_device *pdev)
struct net_device *ndev = platform_get_drvdata(pdev);
struct fec_enet_private *fep = netdev_priv(ndev);
struct device_node *np = pdev->dev.of_node;
+ int ret;
+
+ ret = pm_runtime_get_sync(&pdev->dev);
+ if (ret < 0)
+ return ret;
cancel_work_sync(&fep->tx_timeout_work);
fec_ptp_stop(pdev);
@@ -3643,13 +3648,17 @@ fec_drv_remove(struct platform_device *pdev)
fec_enet_mii_remove(fep);
if (fep->reg_phy)
regulator_disable(fep->reg_phy);
- pm_runtime_put(&pdev->dev);
- pm_runtime_disable(&pdev->dev);
+
if (of_phy_is_fixed_link(np))
of_phy_deregister_fixed_link(np);
of_node_put(fep->phy_node);
free_netdev(ndev);
+ clk_disable_unprepare(fep->clk_ahb);
+ clk_disable_unprepare(fep->clk_ipg);
+ pm_runtime_put_noidle(&pdev->dev);
+ pm_runtime_disable(&pdev->dev);
+
return 0;
}
diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c
index 19e2365be7d8..945643c02615 100644
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@@ -600,9 +600,9 @@ void fec_ptp_init(struct platform_device *pdev, int irq_idx)
INIT_DELAYED_WORK(&fep->time_keep, fec_time_keep);
- irq = platform_get_irq_byname(pdev, "pps");
+ irq = platform_get_irq_byname_optional(pdev, "pps");
if (irq < 0)
- irq = platform_get_irq(pdev, irq_idx);
+ irq = platform_get_irq_optional(pdev, irq_idx);
/* Failure to get an irq is not fatal,
* only the PTP_CLOCK_PPS clock events should stop
*/
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index 59564ac99d2a..edec61dfc868 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -289,6 +289,8 @@ static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc,
len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD;
page_info = &rx->data.page_info[idx];
+ dma_sync_single_for_cpu(&priv->pdev->dev, rx->data.qpl->page_buses[idx],
+ PAGE_SIZE, DMA_FROM_DEVICE);
/* gvnic can only receive into registered segments. If the buffer
* can't be recycled, our only choice is to copy the data out of
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index 778b87b5a06c..f4889431f9b7 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -390,7 +390,22 @@ static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
seg_desc->seg.seg_addr = cpu_to_be64(addr);
}
-static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb)
+static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses,
+ u64 iov_offset, u64 iov_len)
+{
+ u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE;
+ u64 first_page = iov_offset / PAGE_SIZE;
+ dma_addr_t dma;
+ u64 page;
+
+ for (page = first_page; page <= last_page; page++) {
+ dma = page_buses[page];
+ dma_sync_single_for_device(dev, dma, PAGE_SIZE, DMA_TO_DEVICE);
+ }
+}
+
+static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb,
+ struct device *dev)
{
int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
union gve_tx_desc *pkt_desc, *seg_desc;
@@ -432,6 +447,9 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb)
skb_copy_bits(skb, 0,
tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
hlen);
+ gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses,
+ info->iov[hdr_nfrags - 1].iov_offset,
+ info->iov[hdr_nfrags - 1].iov_len);
copy_offset = hlen;
for (i = payload_iov; i < payload_nfrags + payload_iov; i++) {
@@ -445,6 +463,9 @@ static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb)
skb_copy_bits(skb, copy_offset,
tx->tx_fifo.base + info->iov[i].iov_offset,
info->iov[i].iov_len);
+ gve_dma_sync_for_device(dev, tx->tx_fifo.qpl->page_buses,
+ info->iov[i].iov_offset,
+ info->iov[i].iov_len);
copy_offset += info->iov[i].iov_len;
}
@@ -473,7 +494,7 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
return NETDEV_TX_BUSY;
}
- nsegs = gve_tx_add_skb(tx, skb);
+ nsegs = gve_tx_add_skb(tx, skb, &priv->pdev->dev);
netdev_tx_sent_queue(tx->netdev_txq, skb->len);
skb_tx_timestamp(skb);
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index c84167447abe..4606a7e4a6d1 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -237,6 +237,7 @@ struct hip04_priv {
dma_addr_t rx_phys[RX_DESC_NUM];
unsigned int rx_head;
unsigned int rx_buf_size;
+ unsigned int rx_cnt_remaining;
struct device_node *phy_node;
struct phy_device *phy;
@@ -575,7 +576,6 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget)
struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi);
struct net_device *ndev = priv->ndev;
struct net_device_stats *stats = &ndev->stats;
- unsigned int cnt = hip04_recv_cnt(priv);
struct rx_desc *desc;
struct sk_buff *skb;
unsigned char *buf;
@@ -588,8 +588,8 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget)
/* clean up tx descriptors */
tx_remaining = hip04_tx_reclaim(ndev, false);
-
- while (cnt && !last) {
+ priv->rx_cnt_remaining += hip04_recv_cnt(priv);
+ while (priv->rx_cnt_remaining && !last) {
buf = priv->rx_buf[priv->rx_head];
skb = build_skb(buf, priv->rx_buf_size);
if (unlikely(!skb)) {
@@ -635,11 +635,13 @@ refill:
hip04_set_recv_desc(priv, phys);
priv->rx_head = RX_NEXT(priv->rx_head);
- if (rx >= budget)
+ if (rx >= budget) {
+ --priv->rx_cnt_remaining;
goto done;
+ }
- if (--cnt == 0)
- cnt = hip04_recv_cnt(priv);
+ if (--priv->rx_cnt_remaining == 0)
+ priv->rx_cnt_remaining += hip04_recv_cnt(priv);
}
if (!(priv->reg_inten & RCV_INT)) {
@@ -724,6 +726,7 @@ static int hip04_mac_open(struct net_device *ndev)
int i;
priv->rx_head = 0;
+ priv->rx_cnt_remaining = 0;
priv->tx_head = 0;
priv->tx_tail = 0;
hip04_reset_ppe(priv);
@@ -1038,7 +1041,6 @@ static int hip04_remove(struct platform_device *pdev)
hip04_free_ring(ndev, d);
unregister_netdev(ndev);
- free_irq(ndev->irq, ndev);
of_node_put(priv->phy_node);
cancel_work_sync(&priv->tx_timeout_task);
free_netdev(ndev);
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.c b/drivers/net/ethernet/hisilicon/hns/hnae.c
index 6d0457eb4faa..08339278c722 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.c
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.c
@@ -199,7 +199,6 @@ hnae_init_ring(struct hnae_queue *q, struct hnae_ring *ring, int flags)
ring->q = q;
ring->flags = flags;
- spin_lock_init(&ring->lock);
ring->coal_param = q->handle->coal_param;
assert(!ring->desc && !ring->desc_cb && !ring->desc_dma_addr);
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index e9c67c06bfd2..6ab9458302e1 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -274,9 +274,6 @@ struct hnae_ring {
/* statistic */
struct ring_stats stats;
- /* ring lock for poll one */
- spinlock_t lock;
-
dma_addr_t desc_dma_addr;
u32 buf_size; /* size for hnae_desc->addr, preset by AE */
u16 desc_num; /* total number of desc */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index a48396dd4ebb..14ab20491fd0 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -943,15 +943,6 @@ static int is_valid_clean_head(struct hnae_ring *ring, int h)
return u > c ? (h > c && h <= u) : (h > c || h <= u);
}
-/* netif_tx_lock will turn down the performance, set only when necessary */
-#ifdef CONFIG_NET_POLL_CONTROLLER
-#define NETIF_TX_LOCK(ring) spin_lock(&(ring)->lock)
-#define NETIF_TX_UNLOCK(ring) spin_unlock(&(ring)->lock)
-#else
-#define NETIF_TX_LOCK(ring)
-#define NETIF_TX_UNLOCK(ring)
-#endif
-
/* reclaim all desc in one budget
* return error or number of desc left
*/
@@ -965,21 +956,16 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
int head;
int bytes, pkts;
- NETIF_TX_LOCK(ring);
-
head = readl_relaxed(ring->io_base + RCB_REG_HEAD);
rmb(); /* make sure head is ready before touch any data */
- if (is_ring_empty(ring) || head == ring->next_to_clean) {
- NETIF_TX_UNLOCK(ring);
+ if (is_ring_empty(ring) || head == ring->next_to_clean)
return 0; /* no data to poll */
- }
if (!is_valid_clean_head(ring, head)) {
netdev_err(ndev, "wrong head (%d, %d-%d)\n", head,
ring->next_to_use, ring->next_to_clean);
ring->stats.io_err_cnt++;
- NETIF_TX_UNLOCK(ring);
return -EIO;
}
@@ -994,8 +980,6 @@ static int hns_nic_tx_poll_one(struct hns_nic_ring_data *ring_data,
ring->stats.tx_pkts += pkts;
ring->stats.tx_bytes += bytes;
- NETIF_TX_UNLOCK(ring);
-
dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
netdev_tx_completed_queue(dev_queue, pkts, bytes);
@@ -1055,16 +1039,12 @@ static void hns_nic_tx_clr_all_bufs(struct hns_nic_ring_data *ring_data)
int head;
int bytes, pkts;
- NETIF_TX_LOCK(ring);
-
head = ring->next_to_use; /* ntu :soft setted ring position*/
bytes = 0;
pkts = 0;
while (head != ring->next_to_clean)
hns_nic_reclaim_one_desc(ring, &bytes, &pkts);
- NETIF_TX_UNLOCK(ring);
-
dev_queue = netdev_get_tx_queue(ndev, ring_data->queue_index);
netdev_tx_reset_queue(dev_queue);
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index c4b7bf851a28..a0998937727d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
// Copyright (c) 2016-2017 Hisilicon Limited.
#ifndef __HNAE3_H
@@ -32,6 +32,8 @@
#define HNAE3_MOD_VERSION "1.0"
+#define HNAE3_MIN_VECTOR_NUM 2 /* first one for misc, another for IO */
+
/* Device IDs */
#define HNAE3_DEV_ID_GE 0xA220
#define HNAE3_DEV_ID_25GE 0xA221
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 2110fa3b4479..5d468ed404a6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
// Copyright (c) 2016-2017 Hisilicon Limited.
#ifndef __HNS3_ENET_H
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 680c3508876d..52c9d204fe3d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -70,11 +70,6 @@ static const struct hns3_stats hns3_rxq_stats[] = {
#define HNS3_NIC_LB_TEST_TX_CNT_ERR 2
#define HNS3_NIC_LB_TEST_RX_CNT_ERR 3
-struct hns3_link_mode_mapping {
- u32 hns3_link_mode;
- u32 ethtool_link_mode;
-};
-
static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
{
struct hnae3_handle *h = hns3_get_handle(ndev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 4821fe08b5e4..1426eb5ddf3d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
// Copyright (c) 2016-2017 Hisilicon Limited.
#ifndef __HCLGE_CMD_H
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index c063301d6060..a1790af73096 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -124,7 +124,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
if (ret)
return ret;
- for (i = 0; i < HNAE3_MAX_TC; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
switch (ets->tc_tsa[i]) {
case IEEE_8021QAZ_TSA_STRICT:
if (hdev->tm_info.tc_info[i].tc_sch_mode !=
@@ -318,6 +318,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
struct net_device *netdev = h->kinfo.netdev;
struct hclge_dev *hdev = vport->back;
u8 i, j, pfc_map, *prio_tc;
+ int ret;
if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
@@ -347,7 +348,21 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
hclge_tm_pfc_info_update(hdev);
- return hclge_pause_setup_hw(hdev, false);
+ ret = hclge_pause_setup_hw(hdev, false);
+ if (ret)
+ return ret;
+
+ ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
+ if (ret)
+ return ret;
+
+ ret = hclge_buffer_alloc(hdev);
+ if (ret) {
+ hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+ return ret;
+ }
+
+ return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
}
/* DCBX configuration */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
index 278f21e02736..b04702e65689 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
// Copyright (c) 2016-2017 Hisilicon Limited.
#ifndef __HCLGE_DCB_H__
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index fd7f94372ff0..c052bb33b3d3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -906,6 +906,9 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+ /* nic's msix numbers is always equals to the roce's. */
+ hdev->num_nic_msi = hdev->num_roce_msi;
+
/* PF should have NIC vectors and Roce vectors,
* NIC vectors are queued before Roce vectors.
*/
@@ -915,6 +918,15 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
hdev->num_msi =
hnae3_get_field(__le16_to_cpu(req->pf_intr_vector_number),
HCLGE_PF_VEC_NUM_M, HCLGE_PF_VEC_NUM_S);
+
+ hdev->num_nic_msi = hdev->num_msi;
+ }
+
+ if (hdev->num_nic_msi < HNAE3_MIN_VECTOR_NUM) {
+ dev_err(&hdev->pdev->dev,
+ "Just %u msi resources, not enough for pf(min:2).\n",
+ hdev->num_nic_msi);
+ return -EINVAL;
}
return 0;
@@ -1507,6 +1519,10 @@ static int hclge_assign_tqp(struct hclge_vport *vport, u16 num_tqps)
kinfo->rss_size = min_t(u16, hdev->rss_size_max,
vport->alloc_tqps / hdev->tm_info.num_tc);
+ /* ensure one to one mapping between irq and queue at default */
+ kinfo->rss_size = min_t(u16, kinfo->rss_size,
+ (hdev->num_nic_msi - 1) / hdev->tm_info.num_tc);
+
return 0;
}
@@ -2285,7 +2301,8 @@ static int hclge_init_msi(struct hclge_dev *hdev)
int vectors;
int i;
- vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+ vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
+ hdev->num_msi,
PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (vectors < 0) {
dev_err(&pdev->dev,
@@ -2300,6 +2317,7 @@ static int hclge_init_msi(struct hclge_dev *hdev)
hdev->num_msi = vectors;
hdev->num_msi_left = vectors;
+
hdev->base_msi_vector = pdev->irq;
hdev->roce_base_vector = hdev->base_msi_vector +
hdev->roce_base_msix_offset;
@@ -3569,12 +3587,28 @@ static int hclge_set_rst_done(struct hclge_dev *hdev)
{
struct hclge_pf_rst_done_cmd *req;
struct hclge_desc desc;
+ int ret;
req = (struct hclge_pf_rst_done_cmd *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PF_RST_DONE, false);
req->pf_rst_done |= HCLGE_PF_RESET_DONE_BIT;
- return hclge_cmd_send(&hdev->hw, &desc, 1);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ /* To be compatible with the old firmware, which does not support
+ * command HCLGE_OPC_PF_RST_DONE, just print a warning and
+ * return success
+ */
+ if (ret == -EOPNOTSUPP) {
+ dev_warn(&hdev->pdev->dev,
+ "current firmware does not support command(0x%x)!\n",
+ HCLGE_OPC_PF_RST_DONE);
+ return 0;
+ } else if (ret) {
+ dev_err(&hdev->pdev->dev, "assert PF reset done fail %d!\n",
+ ret);
+ }
+
+ return ret;
}
static int hclge_reset_prepare_up(struct hclge_dev *hdev)
@@ -3903,6 +3937,7 @@ static int hclge_get_vector(struct hnae3_handle *handle, u16 vector_num,
int alloc = 0;
int i, j;
+ vector_num = min_t(u16, hdev->num_nic_msi - 1, vector_num);
vector_num = min(hdev->num_msi_left, vector_num);
for (j = 0; j < vector_num; j++) {
@@ -6228,11 +6263,23 @@ static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
func_id = hclge_get_port_number(HOST_PORT, 0, vfid, 0);
req = (struct hclge_mac_vlan_switch_cmd *)desc.data;
+
+ /* read current config parameter */
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_SWITCH_PARAM,
- false);
+ true);
req->roce_sel = HCLGE_MAC_VLAN_NIC_SEL;
req->func_id = cpu_to_le32(func_id);
- req->switch_param = switch_param;
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "read mac vlan switch parameter fail, ret = %d\n", ret);
+ return ret;
+ }
+
+ /* modify and write new config parameter */
+ hclge_cmd_reuse_desc(&desc, false);
+ req->switch_param = (req->switch_param & param_mask) | switch_param;
req->param_mask = param_mask;
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 3e9574a9e22d..615cde1cbf0b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
// Copyright (c) 2016-2017 Hisilicon Limited.
#ifndef __HCLGE_MAIN_H
@@ -166,7 +166,7 @@ enum HLCGE_PORT_TYPE {
#define HCLGE_GLOBAL_RESET_BIT 0
#define HCLGE_CORE_RESET_BIT 1
#define HCLGE_IMP_RESET_BIT 2
-#define HCLGE_RESET_INT_M GENMASK(2, 0)
+#define HCLGE_RESET_INT_M GENMASK(7, 5)
#define HCLGE_FUN_RST_ING 0x20C00
#define HCLGE_FUN_RST_ING_B 0
@@ -763,6 +763,7 @@ struct hclge_dev {
u32 base_msi_vector;
u16 *vector_status;
int *vector_irq;
+ u16 num_nic_msi; /* Num of nic vectors for this PF */
u16 num_roce_msi; /* Num of roce vectors for this PF */
int roce_base_vector;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
index ef095d9c566f..dd9a1218a7b0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
// Copyright (c) 2016-2017 Hisilicon Limited.
#ifndef __HCLGE_MDIO_H
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 9f0e35f27789..62399cc1c5a6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -537,9 +537,16 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
kinfo->rss_size = kinfo->req_rss_size;
} else if (kinfo->rss_size > max_rss_size ||
(!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) {
+ /* if user not set rss, the rss_size should compare with the
+ * valid msi numbers to ensure one to one map between tqp and
+ * irq as default.
+ */
+ if (!kinfo->req_rss_size)
+ max_rss_size = min_t(u16, max_rss_size,
+ (hdev->num_nic_msi - 1) /
+ kinfo->num_tc);
+
/* Set to the maximum specification value (max_rss_size). */
- dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n",
- kinfo->rss_size, max_rss_size);
kinfo->rss_size = max_rss_size;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 818610988d34..260f22d19d81 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
// Copyright (c) 2016-2017 Hisilicon Limited.
#ifndef __HCLGE_TM_H
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index e3090b3dab1d..7d7e712691b9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -411,6 +411,13 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev)
kinfo->tqp[i] = &hdev->htqp[i].q;
}
+ /* after init the max rss_size and tqps, adjust the default tqp numbers
+ * and rss size with the actual vector numbers
+ */
+ kinfo->num_tqps = min_t(u16, hdev->num_nic_msix - 1, kinfo->num_tqps);
+ kinfo->rss_size = min_t(u16, kinfo->num_tqps / kinfo->num_tc,
+ kinfo->rss_size);
+
return 0;
}
@@ -502,6 +509,7 @@ static int hclgevf_get_vector(struct hnae3_handle *handle, u16 vector_num,
int alloc = 0;
int i, j;
+ vector_num = min_t(u16, hdev->num_nic_msix - 1, vector_num);
vector_num = min(hdev->num_msi_left, vector_num);
for (j = 0; j < vector_num; j++) {
@@ -2246,13 +2254,14 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
int vectors;
int i;
- if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B))
+ if (hnae3_dev_roce_supported(hdev))
vectors = pci_alloc_irq_vectors(pdev,
hdev->roce_base_msix_offset + 1,
hdev->num_msi,
PCI_IRQ_MSIX);
else
- vectors = pci_alloc_irq_vectors(pdev, 1, hdev->num_msi,
+ vectors = pci_alloc_irq_vectors(pdev, HNAE3_MIN_VECTOR_NUM,
+ hdev->num_msi,
PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (vectors < 0) {
@@ -2268,6 +2277,7 @@ static int hclgevf_init_msi(struct hclgevf_dev *hdev)
hdev->num_msi = vectors;
hdev->num_msi_left = vectors;
+
hdev->base_msi_vector = pdev->irq;
hdev->roce_base_vector = pdev->irq + hdev->roce_base_msix_offset;
@@ -2533,7 +2543,7 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev)
req = (struct hclgevf_query_res_cmd *)desc.data;
- if (hnae3_get_bit(hdev->ae_dev->flag, HNAE3_DEV_SUPPORT_ROCE_B)) {
+ if (hnae3_dev_roce_supported(hdev)) {
hdev->roce_base_msix_offset =
hnae3_get_field(__le16_to_cpu(req->msixcap_localid_ba_rocee),
HCLGEVF_MSIX_OFT_ROCEE_M,
@@ -2542,6 +2552,9 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev)
hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
+ /* nic's msix numbers is always equals to the roce's. */
+ hdev->num_nic_msix = hdev->num_roce_msix;
+
/* VF should have NIC vectors and Roce vectors, NIC vectors
* are queued before Roce vectors. The offset is fixed to 64.
*/
@@ -2551,6 +2564,15 @@ static int hclgevf_query_vf_resource(struct hclgevf_dev *hdev)
hdev->num_msi =
hnae3_get_field(__le16_to_cpu(req->vf_intr_vector_number),
HCLGEVF_VEC_NUM_M, HCLGEVF_VEC_NUM_S);
+
+ hdev->num_nic_msix = hdev->num_msi;
+ }
+
+ if (hdev->num_nic_msix < HNAE3_MIN_VECTOR_NUM) {
+ dev_err(&hdev->pdev->dev,
+ "Just %u msi resources, not enough for vf(min:2).\n",
+ hdev->num_nic_msix);
+ return -EINVAL;
}
return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index bdde3afc286b..2b8d6bc6d224 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -270,6 +270,7 @@ struct hclgevf_dev {
u16 num_msi;
u16 num_msi_left;
u16 num_msi_used;
+ u16 num_nic_msix; /* Num of nic vectors for this VF */
u16 num_roce_msix; /* Num of roce vectors for this VF */
u16 roce_base_msix_offset;
int roce_base_vector;
diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c
index 211c5f74b4c8..aec7e98bcc85 100644
--- a/drivers/net/ethernet/i825xx/lasi_82596.c
+++ b/drivers/net/ethernet/i825xx/lasi_82596.c
@@ -96,6 +96,8 @@
#define OPT_SWAP_PORT 0x0001 /* Need to wordswp on the MPU port */
+#define LIB82596_DMA_ATTR DMA_ATTR_NON_CONSISTENT
+
#define DMA_WBACK(ndev, addr, len) \
do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0)
@@ -200,7 +202,7 @@ static int __exit lan_remove_chip(struct parisc_device *pdev)
unregister_netdev (dev);
dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma,
- lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+ lp->dma_addr, LIB82596_DMA_ATTR);
free_netdev (dev);
return 0;
}
diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c
index 1274ad24d6af..f9742af7f142 100644
--- a/drivers/net/ethernet/i825xx/lib82596.c
+++ b/drivers/net/ethernet/i825xx/lib82596.c
@@ -1065,7 +1065,7 @@ static int i82596_probe(struct net_device *dev)
dma = dma_alloc_attrs(dev->dev.parent, sizeof(struct i596_dma),
&lp->dma_addr, GFP_KERNEL,
- DMA_ATTR_NON_CONSISTENT);
+ LIB82596_DMA_ATTR);
if (!dma) {
printk(KERN_ERR "%s: Couldn't get shared memory\n", __FILE__);
return -ENOMEM;
@@ -1087,7 +1087,7 @@ static int i82596_probe(struct net_device *dev)
i = register_netdev(dev);
if (i) {
dma_free_attrs(dev->dev.parent, sizeof(struct i596_dma),
- dma, lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+ dma, lp->dma_addr, LIB82596_DMA_ATTR);
return i;
}
diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c
index 6eb6c2ff7f09..6436a98c5953 100644
--- a/drivers/net/ethernet/i825xx/sni_82596.c
+++ b/drivers/net/ethernet/i825xx/sni_82596.c
@@ -24,6 +24,8 @@
static const char sni_82596_string[] = "snirm_82596";
+#define LIB82596_DMA_ATTR 0
+
#define DMA_WBACK(priv, addr, len) do { } while (0)
#define DMA_INV(priv, addr, len) do { } while (0)
#define DMA_WBACK_INV(priv, addr, len) do { } while (0)
@@ -152,7 +154,7 @@ static int sni_82596_driver_remove(struct platform_device *pdev)
unregister_netdev(dev);
dma_free_attrs(dev->dev.parent, sizeof(struct i596_private), lp->dma,
- lp->dma_addr, DMA_ATTR_NON_CONSISTENT);
+ lp->dma_addr, LIB82596_DMA_ATTR);
iounmap(lp->ca);
iounmap(lp->mpu_port);
free_netdev (dev);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 2b073a3c0b84..0686ded7ad3a 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2881,7 +2881,10 @@ static int enable_scrq_irq(struct ibmvnic_adapter *adapter,
u64 val = (0xff000000) | scrq->hw_irq;
rc = plpar_hcall_norets(H_EOI, val);
- if (rc)
+ /* H_EOI would fail with rc = H_FUNCTION when running
+ * in XIVE mode which is expected, but not an error.
+ */
+ if (rc && (rc != H_FUNCTION))
dev_err(dev, "H_EOI FAILED irq 0x%llx. rc=%ld\n",
val, rc);
}
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index 71d3d8854d8f..be56e631d693 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -607,6 +607,7 @@ static int e1000_set_ringparam(struct net_device *netdev,
for (i = 0; i < adapter->num_rx_queues; i++)
rxdr[i].count = rxdr->count;
+ err = 0;
if (netif_running(adapter->netdev)) {
/* Try to get new resources before deleting old */
err = e1000_setup_all_rx_resources(adapter);
@@ -627,14 +628,13 @@ static int e1000_set_ringparam(struct net_device *netdev,
adapter->rx_ring = rxdr;
adapter->tx_ring = txdr;
err = e1000_up(adapter);
- if (err)
- goto err_setup;
}
kfree(tx_old);
kfree(rx_old);
clear_bit(__E1000_RESETTING, &adapter->flags);
- return 0;
+ return err;
+
err_setup_tx:
e1000_free_all_rx_resources(adapter);
err_setup_rx:
@@ -646,7 +646,6 @@ err_alloc_rx:
err_alloc_tx:
if (netif_running(adapter->netdev))
e1000_up(adapter);
-err_setup:
clear_bit(__E1000_RESETTING, &adapter->flags);
return err;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 530613f31527..69a2daaca5c5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -20,6 +20,8 @@
/* API version 1.7 implements additional link and PHY-specific APIs */
#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007
+/* API version 1.9 for X722 implements additional link and PHY-specific APIs */
+#define I40E_MINOR_VER_GET_LINK_INFO_X722 0x0009
/* API version 1.6 for X722 devices adds ability to stop FW LLDP agent */
#define I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722 0x0006
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index d37c6e0e5f08..7560f06768e0 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1876,7 +1876,8 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
- if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+ if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE &&
+ hw->mac.type != I40E_MAC_X722) {
__le32 tmp;
memcpy(&tmp, resp->link_type, sizeof(tmp));
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index b1c3227ae4ab..d07e1a890428 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -157,11 +157,6 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
err = i40e_queue_pair_enable(vsi, qid);
if (err)
return err;
-
- /* Kick start the NAPI context so that receiving will start */
- err = i40e_xsk_wakeup(vsi->netdev, qid, XDP_WAKEUP_RX);
- if (err)
- return err;
}
return 0;
@@ -694,8 +689,6 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
i40e_xdp_ring_update_tail(xdp_ring);
xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
- if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem))
- xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem);
}
return !!budget && work_done;
@@ -774,12 +767,8 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
i40e_update_tx_stats(tx_ring, completed_frames, total_bytes);
out_xmit:
- if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) {
- if (tx_ring->next_to_clean == tx_ring->next_to_use)
- xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
- else
- xsk_clear_tx_need_wakeup(tx_ring->xsk_umem);
- }
+ if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem))
+ xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
xmit_done = i40e_xmit_zc(tx_ring, budget);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 8f310e520b06..821987da5698 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -314,7 +314,7 @@ iavf_map_vector_to_rxq(struct iavf_adapter *adapter, int v_idx, int r_idx)
q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
q_vector->ring_mask |= BIT(r_idx);
wr32(hw, IAVF_VFINT_ITRN1(IAVF_RX_ITR, q_vector->reg_idx),
- q_vector->rx.current_itr);
+ q_vector->rx.current_itr >> 1);
q_vector->rx.current_itr = q_vector->rx.target_itr;
}
@@ -340,7 +340,7 @@ iavf_map_vector_to_txq(struct iavf_adapter *adapter, int v_idx, int t_idx)
q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
q_vector->num_ringpairs++;
wr32(hw, IAVF_VFINT_ITRN1(IAVF_TX_ITR, q_vector->reg_idx),
- q_vector->tx.target_itr);
+ q_vector->tx.target_itr >> 1);
q_vector->tx.current_itr = q_vector->tx.target_itr;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index fc624b73d05d..2fde9653a608 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -1036,7 +1036,7 @@ enum ice_status ice_sched_query_res_alloc(struct ice_hw *hw)
struct ice_aqc_query_txsched_res_resp *buf;
enum ice_status status = 0;
__le16 max_sibl;
- u8 i;
+ u16 i;
if (hw->layer_info)
return status;
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index 3ec2ce0725d5..8a6ef3514129 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -466,7 +466,7 @@ static s32 igb_init_mac_params_82575(struct e1000_hw *hw)
? igb_setup_copper_link_82575
: igb_setup_serdes_link_82575;
- if (mac->type == e1000_82580) {
+ if (mac->type == e1000_82580 || mac->type == e1000_i350) {
switch (hw->device_id) {
/* feature not supported on these id's */
case E1000_DEV_ID_DH89XXCC_SGMII:
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 105b0624081a..ed7e667d7eb2 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -753,7 +753,8 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg)
struct net_device *netdev = igb->netdev;
hw->hw_addr = NULL;
netdev_err(netdev, "PCIe link lost\n");
- WARN(1, "igb: Failed to read reg 0x%x!\n", reg);
+ WARN(pci_device_is_present(igb->pdev),
+ "igb: Failed to read reg 0x%x!\n", reg);
}
return value;
@@ -2064,7 +2065,8 @@ static void igb_check_swap_media(struct igb_adapter *adapter)
if ((hw->phy.media_type == e1000_media_type_copper) &&
(!(connsw & E1000_CONNSW_AUTOSENSE_EN))) {
swap_now = true;
- } else if (!(connsw & E1000_CONNSW_SERDESD)) {
+ } else if ((hw->phy.media_type != e1000_media_type_copper) &&
+ !(connsw & E1000_CONNSW_SERDESD)) {
/* copper signal takes time to appear */
if (adapter->copper_tries < 4) {
adapter->copper_tries++;
@@ -2370,7 +2372,7 @@ void igb_reset(struct igb_adapter *adapter)
adapter->ei.get_invariants(hw);
adapter->flags &= ~IGB_FLAG_MEDIA_RESET;
}
- if ((mac->type == e1000_82575) &&
+ if ((mac->type == e1000_82575 || mac->type == e1000_i350) &&
(adapter->flags & IGB_FLAG_MAS_ENABLE)) {
igb_enable_mas(adapter);
}
@@ -5673,8 +5675,8 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring,
* should have been handled by the upper layers.
*/
if (tx_ring->launchtime_enable) {
- ts = ns_to_timespec64(first->skb->tstamp);
- first->skb->tstamp = 0;
+ ts = ktime_to_timespec64(first->skb->tstamp);
+ first->skb->tstamp = ktime_set(0, 0);
context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
} else {
context_desc->seqnum_seed = 0;
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index fd3071f55bd3..c39e921757ba 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -521,6 +521,19 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_EXTTS:
+ /* Reject requests with unsupported flags */
+ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
+ /* Reject requests failing to enable both edges. */
+ if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+ (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+ (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES)
+ return -EOPNOTSUPP;
+
if (on) {
pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS,
rq->extts.index);
@@ -551,6 +564,10 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp,
return 0;
case PTP_CLK_REQ_PEROUT:
+ /* Reject requests with unsupported flags */
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
+
if (on) {
pin = ptp_find_pin(igb->ptp_clock, PTP_PF_PEROUT,
rq->perout.index);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 63b62d74f961..24888676f69b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -824,8 +824,8 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
* should have been handled by the upper layers.
*/
if (tx_ring->launchtime_enable) {
- ts = ns_to_timespec64(first->skb->tstamp);
- first->skb->tstamp = 0;
+ ts = ktime_to_timespec64(first->skb->tstamp);
+ first->skb->tstamp = ktime_set(0, 0);
context_desc->launch_time = cpu_to_le32(ts.tv_nsec / 32);
} else {
context_desc->launch_time = 0;
@@ -4047,7 +4047,8 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
hw->hw_addr = NULL;
netif_device_detach(netdev);
netdev_err(netdev, "PCIe link lost, device now detached\n");
- WARN(1, "igc: Failed to read reg 0x%x!\n", reg);
+ WARN(pci_device_is_present(igc->pdev),
+ "igc: Failed to read reg 0x%x!\n", reg);
}
return value;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 1ce2397306b9..91b3780ddb04 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4310,7 +4310,6 @@ static void ixgbe_set_rx_buffer_len(struct ixgbe_adapter *adapter)
if (test_bit(__IXGBE_RX_FCOE, &rx_ring->state))
set_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state);
- clear_bit(__IXGBE_RX_BUILD_SKB_ENABLED, &rx_ring->state);
if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
continue;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 100ac89b345d..d6feaacfbf89 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -622,8 +622,6 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
if (tx_desc) {
ixgbe_xdp_ring_update_tail(xdp_ring);
xsk_umem_consume_tx_done(xdp_ring->xsk_umem);
- if (xsk_umem_uses_need_wakeup(xdp_ring->xsk_umem))
- xsk_clear_tx_need_wakeup(xdp_ring->xsk_umem);
}
return !!budget && work_done;
@@ -691,12 +689,8 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
if (xsk_frames)
xsk_umem_complete_tx(umem, xsk_frames);
- if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem)) {
- if (tx_ring->next_to_clean == tx_ring->next_to_use)
- xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
- else
- xsk_clear_tx_need_wakeup(tx_ring->xsk_umem);
- }
+ if (xsk_umem_uses_need_wakeup(tx_ring->xsk_umem))
+ xsk_set_tx_need_wakeup(tx_ring->xsk_umem);
return ixgbe_xmit_zc(tx_ring, q_vector->tx.work_limit);
}
diff --git a/drivers/net/ethernet/marvell/mvneta_bm.h b/drivers/net/ethernet/marvell/mvneta_bm.h
index c8425d35c049..e47783ce77e0 100644
--- a/drivers/net/ethernet/marvell/mvneta_bm.h
+++ b/drivers/net/ethernet/marvell/mvneta_bm.h
@@ -160,16 +160,23 @@ static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv,
(bm_pool->id << MVNETA_BM_POOL_ACCESS_OFFS));
}
#else
-void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
- struct mvneta_bm_pool *bm_pool, u8 port_map) {}
-void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
- u8 port_map) {}
-int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf) { return 0; }
-int mvneta_bm_pool_refill(struct mvneta_bm *priv,
- struct mvneta_bm_pool *bm_pool) {return 0; }
-struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
- enum mvneta_bm_type type, u8 port_id,
- int pkt_size) { return NULL; }
+static inline void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
+ struct mvneta_bm_pool *bm_pool,
+ u8 port_map) {}
+static inline void mvneta_bm_bufs_free(struct mvneta_bm *priv,
+ struct mvneta_bm_pool *bm_pool,
+ u8 port_map) {}
+static inline int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf)
+{ return 0; }
+static inline int mvneta_bm_pool_refill(struct mvneta_bm *priv,
+ struct mvneta_bm_pool *bm_pool)
+{ return 0; }
+static inline struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv,
+ u8 pool_id,
+ enum mvneta_bm_type type,
+ u8 port_id,
+ int pkt_size)
+{ return NULL; }
static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv,
struct mvneta_bm_pool *bm_pool,
@@ -178,7 +185,8 @@ static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv,
static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv,
struct mvneta_bm_pool *bm_pool)
{ return 0; }
-struct mvneta_bm *mvneta_bm_get(struct device_node *node) { return NULL; }
-void mvneta_bm_put(struct mvneta_bm *priv) {}
+static inline struct mvneta_bm *mvneta_bm_get(struct device_node *node)
+{ return NULL; }
+static inline void mvneta_bm_put(struct mvneta_bm *priv) {}
#endif /* CONFIG_MVNETA_BM */
#endif
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
index 206dc5dc1df8..5c1f389e3320 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 CGX driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 CGX driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
index fb3ba4968a9b..473d9751601f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 CGX driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 CGX driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h
index e332e82fc066..413c3f254cf8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 76a4575d18ff..75439fce0505 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index 8d6d90fdfb73..5d4df315a0e1 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
index b2ce957605bb..da649f6a5573 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index c9d60b0554c0..5222e4228905 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
index 09a8d61f3144..1ea92a2e7cfe 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
index f920dac74e6c..84a39063a8bb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_struct.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Marvell OcteonTx2 RVU Admin Function driver
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Marvell OcteonTx2 RVU Admin Function driver
*
* Copyright (C) 2018 Marvell International Ltd.
*
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index c61069340f4f..703adb96429e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -261,6 +261,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
ge_mode = 0;
switch (state->interface) {
case PHY_INTERFACE_MODE_MII:
+ case PHY_INTERFACE_MODE_GMII:
ge_mode = 1;
break;
case PHY_INTERFACE_MODE_REVMII:
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index d8313e2ee600..a1202e53710c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1745,6 +1745,7 @@ static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
err = mlx4_en_get_flow(dev, cmd, cmd->fs.location);
break;
case ETHTOOL_GRXCLSRLALL:
+ cmd->data = MAX_NUM_OF_FS_RULES;
while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) {
err = mlx4_en_get_flow(dev, cmd, i);
if (!err)
@@ -1811,6 +1812,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_port_profile new_prof;
struct mlx4_en_priv *tmp;
+ int total_tx_count;
int port_up = 0;
int xdp_count;
int err = 0;
@@ -1825,13 +1827,12 @@ static int mlx4_en_set_channels(struct net_device *dev,
mutex_lock(&mdev->state_lock);
xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
- if (channel->tx_count * priv->prof->num_up + xdp_count >
- priv->mdev->profile.max_num_tx_rings_p_up * priv->prof->num_up) {
+ total_tx_count = channel->tx_count * priv->prof->num_up + xdp_count;
+ if (total_tx_count > MAX_TX_RINGS) {
err = -EINVAL;
en_err(priv,
"Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
- channel->tx_count * priv->prof->num_up + xdp_count,
- MAX_TX_RINGS);
+ total_tx_count, MAX_TX_RINGS);
goto out;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 40ec5acf79c0..70fd246840e2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -91,6 +91,7 @@ int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc)
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_port_profile new_prof;
struct mlx4_en_priv *tmp;
+ int total_count;
int port_up = 0;
int err = 0;
@@ -104,6 +105,14 @@ int mlx4_en_alloc_tx_queue_per_tc(struct net_device *dev, u8 tc)
MLX4_EN_NUM_UP_HIGH;
new_prof.tx_ring_num[TX] = new_prof.num_tx_rings_p_up *
new_prof.num_up;
+ total_count = new_prof.tx_ring_num[TX] + new_prof.tx_ring_num[TX_XDP];
+ if (total_count > MAX_TX_RINGS) {
+ err = -EINVAL;
+ en_err(priv,
+ "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
+ total_count, MAX_TX_RINGS);
+ goto out;
+ }
err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof, true);
if (err)
goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index fce9b3a24347..d44ac666e730 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -514,8 +514,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz;
/*
* Subtract 1 from the limit because we need to allocate a
- * spare CQE so the HCA HW can tell the difference between an
- * empty CQ and a full CQ.
+ * spare CQE to enable resizing the CQ.
*/
dev->caps.max_cqes = dev_cap->max_cq_sz - 1;
dev->caps.reserved_cqs = dev_cap->reserved_cqs;
@@ -4011,6 +4010,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_params_unregister;
devlink_params_publish(devlink);
+ devlink_reload_enable(devlink);
pci_save_state(pdev);
return 0;
@@ -4122,6 +4122,8 @@ static void mlx4_remove_one(struct pci_dev *pdev)
struct devlink *devlink = priv_to_devlink(priv);
int active_vfs = 0;
+ devlink_reload_disable(devlink);
+
if (mlx4_is_slave(dev))
persist->interface_state |= MLX4_INTERFACE_STATE_NOWAIT;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 4356f3a58002..1187ef1375e2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -471,12 +471,31 @@ void mlx4_init_quotas(struct mlx4_dev *dev)
priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf];
}
-static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev)
+static int
+mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev,
+ struct resource_allocator *res_alloc,
+ int vf)
{
- /* reduce the sink counter */
- return (dev->caps.max_counters - 1 -
- (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS))
- / MLX4_MAX_PORTS;
+ struct mlx4_active_ports actv_ports;
+ int ports, counters_guaranteed;
+
+ /* For master, only allocate according to the number of phys ports */
+ if (vf == mlx4_master_func_num(dev))
+ return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports;
+
+ /* calculate real number of ports for the VF */
+ actv_ports = mlx4_get_active_ports(dev, vf);
+ ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
+ counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT;
+
+ /* If we do not have enough counters for this VF, do not
+ * allocate any for it. '-1' to reduce the sink counter.
+ */
+ if ((res_alloc->res_reserved + counters_guaranteed) >
+ (dev->caps.max_counters - 1))
+ return 0;
+
+ return counters_guaranteed;
}
int mlx4_init_resource_tracker(struct mlx4_dev *dev)
@@ -484,7 +503,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
int i, j;
int t;
- int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev);
priv->mfunc.master.res_tracker.slave_list =
kcalloc(dev->num_slaves, sizeof(struct slave_list),
@@ -603,16 +621,8 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
break;
case RES_COUNTER:
res_alloc->quota[t] = dev->caps.max_counters;
- if (t == mlx4_master_func_num(dev))
- res_alloc->guaranteed[t] =
- MLX4_PF_COUNTERS_PER_PORT *
- MLX4_MAX_PORTS;
- else if (t <= max_vfs_guarantee_counter)
- res_alloc->guaranteed[t] =
- MLX4_VF_COUNTERS_PER_PORT *
- MLX4_MAX_PORTS;
- else
- res_alloc->guaranteed[t] = 0;
+ res_alloc->guaranteed[t] =
+ mlx4_calc_res_counter_guaranteed(dev, res_alloc, t);
break;
default:
break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8d76452cacdc..f1a7bc46f1c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -345,7 +345,7 @@ struct mlx5e_tx_wqe_info {
u8 num_wqebbs;
u8 num_dma;
#ifdef CONFIG_MLX5_EN_TLS
- skb_frag_t *resync_dump_frag;
+ struct page *resync_dump_frag_page;
#endif
};
@@ -410,6 +410,7 @@ struct mlx5e_txqsq {
struct device *pdev;
__be32 mkey_be;
unsigned long state;
+ unsigned int hw_mtu;
struct hwtstamp_config *tstamp;
struct mlx5_clock *clock;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
index b3a249b2a482..ac44bbe95c5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c
@@ -141,7 +141,7 @@ int mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv)
"Failed to create hv vhca stats agent, err = %ld\n",
PTR_ERR(agent));
- kfree(priv->stats_agent.buf);
+ kvfree(priv->stats_agent.buf);
return IS_ERR_OR_NULL(agent);
}
@@ -157,5 +157,5 @@ void mlx5e_hv_vhca_stats_destroy(struct mlx5e_priv *priv)
return;
mlx5_hv_vhca_agent_destroy(priv->stats_agent.agent);
- kfree(priv->stats_agent.buf);
+ kvfree(priv->stats_agent.buf);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index f8ee18b4da6f..745ab6cd7c30 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -97,15 +97,19 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
if (ret)
return ret;
- if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET)
+ if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
+ ip_rt_put(rt);
return -ENETUNREACH;
+ }
#else
return -EOPNOTSUPP;
#endif
ret = get_route_and_out_devs(priv, rt->dst.dev, route_dev, out_dev);
- if (ret < 0)
+ if (ret < 0) {
+ ip_rt_put(rt);
return ret;
+ }
if (!(*out_ttl))
*out_ttl = ip4_dst_hoplimit(&rt->dst);
@@ -149,8 +153,10 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
*out_ttl = ip6_dst_hoplimit(dst);
ret = get_route_and_out_devs(priv, dst->dev, route_dev, out_dev);
- if (ret < 0)
+ if (ret < 0) {
+ dst_release(dst);
return ret;
+ }
#else
return -EOPNOTSUPP;
#endif
@@ -233,12 +239,15 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv,
if (max_encap_size < ipv4_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
ipv4_encap_size, max_encap_size);
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto out;
}
encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
- if (!encap_header)
- return -ENOMEM;
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto out;
+ }
/* used by mlx5e_detach_encap to lookup a neigh hash table
* entry in the neigh hash table when a user deletes a rule
@@ -349,12 +358,15 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
if (max_encap_size < ipv6_encap_size) {
mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
ipv6_encap_size, max_encap_size);
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ goto out;
}
encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
- if (!encap_header)
- return -ENOMEM;
+ if (!encap_header) {
+ err = -ENOMEM;
+ goto out;
+ }
/* used by mlx5e_detach_encap to lookup a neigh hash table
* entry in the neigh hash table when a user deletes a rule
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 87be96747902..7c8796d9743f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -15,15 +15,14 @@
#else
/* TLS offload requires additional stop_room for:
* - a resync SKB.
- * kTLS offload requires additional stop_room for:
- * - static params WQE,
- * - progress params WQE, and
- * - resync DUMP per frag.
+ * kTLS offload requires fixed additional stop_room for:
+ * - a static params WQE, and a progress params WQE.
+ * The additional MTU-depending room for the resync DUMP WQEs
+ * will be calculated and added in runtime.
*/
#define MLX5E_SQ_TLS_ROOM \
(MLX5_SEND_WQE_MAX_WQEBBS + \
- MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \
- MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS)
+ MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)
#endif
#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
@@ -92,7 +91,7 @@ mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
/* fill sq frag edge with nops to avoid wqe wrapping two pages */
for (; wi < edge_wi; wi++) {
- wi->skb = NULL;
+ memset(wi, 0, sizeof(*wi));
wi->num_wqebbs = 1;
mlx5e_post_nop(wq, sq->sqn, &sq->pc);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
index d2ff74d52720..46725cd743a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -38,7 +38,7 @@ static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk,
return -ENOMEM;
tx_priv->expected_seq = start_offload_tcp_sn;
- tx_priv->crypto_info = crypto_info;
+ tx_priv->crypto_info = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv);
/* tc and underlay_qpn values are not in use for tls tis */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index b7298f9ee3d3..a3efa29a4629 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -21,7 +21,14 @@
MLX5_ST_SZ_BYTES(tls_progress_params))
#define MLX5E_KTLS_PROGRESS_WQEBBS \
(DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB))
-#define MLX5E_KTLS_MAX_DUMP_WQEBBS 2
+
+struct mlx5e_dump_wqe {
+ struct mlx5_wqe_ctrl_seg ctrl;
+ struct mlx5_wqe_data_seg data;
+};
+
+#define MLX5E_KTLS_DUMP_WQEBBS \
+ (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB))
enum {
MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0,
@@ -37,7 +44,7 @@ enum {
struct mlx5e_ktls_offload_context_tx {
struct tls_offload_context_tx *tx_ctx;
- struct tls_crypto_info *crypto_info;
+ struct tls12_crypto_info_aes_gcm_128 crypto_info;
u32 expected_seq;
u32 tisn;
u32 key_id;
@@ -86,14 +93,28 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
struct mlx5e_tx_wqe **wqe, u16 *pi);
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
- struct mlx5e_sq_dma *dma);
-
+ u32 *dma_fifo_cc);
+static inline u8
+mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags,
+ unsigned int sync_len)
+{
+ /* Given the MTU and sync_len, calculates an upper bound for the
+ * number of WQEBBs needed for the TX resync DUMP WQEs of a record.
+ */
+ return MLX5E_KTLS_DUMP_WQEBBS *
+ (nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu));
+}
#else
static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv)
{
}
+static inline void
+mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
+ struct mlx5e_tx_wqe_info *wi,
+ u32 *dma_fifo_cc) {}
+
#endif
#endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index d195366461c9..778dab1af8fc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -24,17 +24,12 @@ enum {
static void
fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx)
{
- struct tls_crypto_info *crypto_info = priv_tx->crypto_info;
- struct tls12_crypto_info_aes_gcm_128 *info;
+ struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info;
char *initial_rn, *gcm_iv;
u16 salt_sz, rec_seq_sz;
char *salt, *rec_seq;
u8 tls_version;
- if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128))
- return;
-
- info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
EXTRACT_INFO_FIELDS;
gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv);
@@ -108,16 +103,15 @@ build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn,
}
static void tx_fill_wi(struct mlx5e_txqsq *sq,
- u16 pi, u8 num_wqebbs,
- skb_frag_t *resync_dump_frag,
- u32 num_bytes)
+ u16 pi, u8 num_wqebbs, u32 num_bytes,
+ struct page *page)
{
struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
- wi->skb = NULL;
- wi->num_wqebbs = num_wqebbs;
- wi->resync_dump_frag = resync_dump_frag;
- wi->num_bytes = num_bytes;
+ memset(wi, 0, sizeof(*wi));
+ wi->num_wqebbs = num_wqebbs;
+ wi->num_bytes = num_bytes;
+ wi->resync_dump_frag_page = page;
}
void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
@@ -145,7 +139,7 @@ post_static_params(struct mlx5e_txqsq *sq,
umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi);
build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence);
- tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL, 0);
+ tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, 0, NULL);
sq->pc += MLX5E_KTLS_STATIC_WQEBBS;
}
@@ -159,7 +153,7 @@ post_progress_params(struct mlx5e_txqsq *sq,
wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi);
build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence);
- tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL, 0);
+ tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, 0, NULL);
sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS;
}
@@ -169,6 +163,14 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
bool skip_static_post, bool fence_first_post)
{
bool progress_fence = skip_static_post || !fence_first_post;
+ struct mlx5_wq_cyc *wq = &sq->wq;
+ u16 contig_wqebbs_room, pi;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+ if (unlikely(contig_wqebbs_room <
+ MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS))
+ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
if (!skip_static_post)
post_static_params(sq, priv_tx, fence_first_post);
@@ -180,29 +182,36 @@ struct tx_sync_info {
u64 rcd_sn;
s32 sync_len;
int nr_frags;
- skb_frag_t *frags[MAX_SKB_FRAGS];
+ skb_frag_t frags[MAX_SKB_FRAGS];
+};
+
+enum mlx5e_ktls_sync_retval {
+ MLX5E_KTLS_SYNC_DONE,
+ MLX5E_KTLS_SYNC_FAIL,
+ MLX5E_KTLS_SYNC_SKIP_NO_DATA,
};
-static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
- u32 tcp_seq, struct tx_sync_info *info)
+static enum mlx5e_ktls_sync_retval
+tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
+ u32 tcp_seq, struct tx_sync_info *info)
{
struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx;
+ enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE;
struct tls_record_info *record;
int remaining, i = 0;
unsigned long flags;
- bool ret = true;
spin_lock_irqsave(&tx_ctx->lock, flags);
record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn);
if (unlikely(!record)) {
- ret = false;
+ ret = MLX5E_KTLS_SYNC_FAIL;
goto out;
}
if (unlikely(tcp_seq < tls_record_start_seq(record))) {
- if (!tls_record_is_start_marker(record))
- ret = false;
+ ret = tls_record_is_start_marker(record) ?
+ MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL;
goto out;
}
@@ -211,13 +220,13 @@ static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx,
while (remaining > 0) {
skb_frag_t *frag = &record->frags[i];
- __skb_frag_ref(frag);
+ get_page(skb_frag_page(frag));
remaining -= skb_frag_size(frag);
- info->frags[i++] = frag;
+ info->frags[i++] = *frag;
}
/* reduce the part which will be sent with the original SKB */
if (remaining < 0)
- skb_frag_size_add(info->frags[i - 1], remaining);
+ skb_frag_size_add(&info->frags[i - 1], remaining);
info->nr_frags = i;
out:
spin_unlock_irqrestore(&tx_ctx->lock, flags);
@@ -229,17 +238,12 @@ tx_post_resync_params(struct mlx5e_txqsq *sq,
struct mlx5e_ktls_offload_context_tx *priv_tx,
u64 rcd_sn)
{
- struct tls_crypto_info *crypto_info = priv_tx->crypto_info;
- struct tls12_crypto_info_aes_gcm_128 *info;
+ struct tls12_crypto_info_aes_gcm_128 *info = &priv_tx->crypto_info;
__be64 rn_be = cpu_to_be64(rcd_sn);
bool skip_static_post;
u16 rec_seq_sz;
char *rec_seq;
- if (WARN_ON(crypto_info->cipher_type != TLS_CIPHER_AES_GCM_128))
- return;
-
- info = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
rec_seq = info->rec_seq;
rec_seq_sz = sizeof(info->rec_seq);
@@ -250,11 +254,6 @@ tx_post_resync_params(struct mlx5e_txqsq *sq,
mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true);
}
-struct mlx5e_dump_wqe {
- struct mlx5_wqe_ctrl_seg ctrl;
- struct mlx5_wqe_data_seg data;
-};
-
static int
tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first)
{
@@ -262,7 +261,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
struct mlx5_wqe_data_seg *dseg;
struct mlx5e_dump_wqe *wqe;
dma_addr_t dma_addr = 0;
- u8 num_wqebbs;
u16 ds_cnt;
int fsz;
u16 pi;
@@ -270,7 +268,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
- num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
cseg = &wqe->ctrl;
dseg = &wqe->data;
@@ -291,24 +288,27 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
dseg->byte_count = cpu_to_be32(fsz);
mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
- tx_fill_wi(sq, pi, num_wqebbs, frag, fsz);
- sq->pc += num_wqebbs;
-
- WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS,
- "unexpected DUMP num_wqebbs, %d > %d",
- num_wqebbs, MLX5E_KTLS_MAX_DUMP_WQEBBS);
+ tx_fill_wi(sq, pi, MLX5E_KTLS_DUMP_WQEBBS, fsz, skb_frag_page(frag));
+ sq->pc += MLX5E_KTLS_DUMP_WQEBBS;
return 0;
}
void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
struct mlx5e_tx_wqe_info *wi,
- struct mlx5e_sq_dma *dma)
+ u32 *dma_fifo_cc)
{
- struct mlx5e_sq_stats *stats = sq->stats;
+ struct mlx5e_sq_stats *stats;
+ struct mlx5e_sq_dma *dma;
+
+ if (!wi->resync_dump_frag_page)
+ return;
+
+ dma = mlx5e_dma_get(sq, (*dma_fifo_cc)++);
+ stats = sq->stats;
mlx5e_tx_dma_unmap(sq->pdev, dma);
- __skb_frag_unref(wi->resync_dump_frag);
+ put_page(wi->resync_dump_frag_page);
stats->tls_dump_packets++;
stats->tls_dump_bytes += wi->num_bytes;
}
@@ -318,25 +318,31 @@ static void tx_post_fence_nop(struct mlx5e_txqsq *sq)
struct mlx5_wq_cyc *wq = &sq->wq;
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
- tx_fill_wi(sq, pi, 1, NULL, 0);
+ tx_fill_wi(sq, pi, 1, 0, NULL);
mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc);
}
-static struct sk_buff *
+static enum mlx5e_ktls_sync_retval
mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
struct mlx5e_txqsq *sq,
- struct sk_buff *skb,
+ int datalen,
u32 seq)
{
struct mlx5e_sq_stats *stats = sq->stats;
struct mlx5_wq_cyc *wq = &sq->wq;
+ enum mlx5e_ktls_sync_retval ret;
struct tx_sync_info info = {};
u16 contig_wqebbs_room, pi;
u8 num_wqebbs;
- int i;
-
- if (!tx_sync_info_get(priv_tx, seq, &info)) {
+ int i = 0;
+
+ ret = tx_sync_info_get(priv_tx, seq, &info);
+ if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) {
+ if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) {
+ stats->tls_skip_no_sync_data++;
+ return MLX5E_KTLS_SYNC_SKIP_NO_DATA;
+ }
/* We might get here if a retransmission reaches the driver
* after the relevant record is acked.
* It should be safe to drop the packet in this case
@@ -346,13 +352,8 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
}
if (unlikely(info.sync_len < 0)) {
- u32 payload;
- int headln;
-
- headln = skb_transport_offset(skb) + tcp_hdrlen(skb);
- payload = skb->len - headln;
- if (likely(payload <= -info.sync_len))
- return skb;
+ if (likely(datalen <= -info.sync_len))
+ return MLX5E_KTLS_SYNC_DONE;
stats->tls_drop_bypass_req++;
goto err_out;
@@ -360,30 +361,62 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
stats->tls_ooo++;
- num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS +
- (info.nr_frags ? info.nr_frags * MLX5E_KTLS_MAX_DUMP_WQEBBS : 1);
+ tx_post_resync_params(sq, priv_tx, info.rcd_sn);
+
+ /* If no dump WQE was sent, we need to have a fence NOP WQE before the
+ * actual data xmit.
+ */
+ if (!info.nr_frags) {
+ tx_post_fence_nop(sq);
+ return MLX5E_KTLS_SYNC_DONE;
+ }
+
+ num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len);
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+
if (unlikely(contig_wqebbs_room < num_wqebbs))
mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
tx_post_resync_params(sq, priv_tx, info.rcd_sn);
- for (i = 0; i < info.nr_frags; i++)
- if (tx_post_resync_dump(sq, info.frags[i], priv_tx->tisn, !i))
- goto err_out;
+ for (; i < info.nr_frags; i++) {
+ unsigned int orig_fsz, frag_offset = 0, n = 0;
+ skb_frag_t *f = &info.frags[i];
- /* If no dump WQE was sent, we need to have a fence NOP WQE before the
- * actual data xmit.
- */
- if (!info.nr_frags)
- tx_post_fence_nop(sq);
+ orig_fsz = skb_frag_size(f);
- return skb;
+ do {
+ bool fence = !(i || frag_offset);
+ unsigned int fsz;
+
+ n++;
+ fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset);
+ skb_frag_size_set(f, fsz);
+ if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) {
+ page_ref_add(skb_frag_page(f), n - 1);
+ goto err_out;
+ }
+
+ skb_frag_off_add(f, fsz);
+ frag_offset += fsz;
+ } while (frag_offset < orig_fsz);
+
+ page_ref_add(skb_frag_page(f), n - 1);
+ }
+
+ return MLX5E_KTLS_SYNC_DONE;
err_out:
- dev_kfree_skb_any(skb);
- return NULL;
+ for (; i < info.nr_frags; i++)
+ /* The put_page() here undoes the page ref obtained in tx_sync_info_get().
+ * Page refs obtained for the DUMP WQEs above (by page_ref_add) will be
+ * released only upon their completions (or in mlx5e_free_txqsq_descs,
+ * if channel closes).
+ */
+ put_page(skb_frag_page(&info.frags[i]));
+
+ return MLX5E_KTLS_SYNC_FAIL;
}
struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
@@ -419,10 +452,15 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
seq = ntohl(tcp_hdr(skb)->seq);
if (unlikely(priv_tx->expected_seq != seq)) {
- skb = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, skb, seq);
- if (unlikely(!skb))
+ enum mlx5e_ktls_sync_retval ret =
+ mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq);
+
+ if (likely(ret == MLX5E_KTLS_SYNC_DONE))
+ *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+ else if (ret == MLX5E_KTLS_SYNC_FAIL)
+ goto err_out;
+ else /* ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA */
goto out;
- *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
}
priv_tx->expected_seq = seq + datalen;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index c5a9c20d7f00..95601269fa2e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -708,9 +708,9 @@ static int get_fec_supported_advertised(struct mlx5_core_dev *dev,
static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings,
u32 eth_proto_cap,
- u8 connector_type)
+ u8 connector_type, bool ext)
{
- if (!connector_type || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) {
+ if ((!connector_type && !ext) || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) {
if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR)
| MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
| MLX5E_PROT_MASK(MLX5E_40GBASE_CR4)
@@ -842,9 +842,9 @@ static int ptys2connector_type[MLX5E_CONNECTOR_TYPE_NUMBER] = {
[MLX5E_PORT_OTHER] = PORT_OTHER,
};
-static u8 get_connector_port(u32 eth_proto, u8 connector_type)
+static u8 get_connector_port(u32 eth_proto, u8 connector_type, bool ext)
{
- if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
+ if ((connector_type || ext) && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
return ptys2connector_type[connector_type];
if (eth_proto &
@@ -945,9 +945,9 @@ int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv,
eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
link_ksettings->base.port = get_connector_port(eth_proto_oper,
- connector_type);
+ connector_type, ext);
ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin,
- connector_type);
+ connector_type, ext);
get_lp_advertising(mdev, eth_proto_lp, link_ksettings);
if (an_status == MLX5_AN_COMPLETE)
@@ -1021,7 +1021,7 @@ static bool ext_link_mode_requested(const unsigned long *adver)
{
#define MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT ETHTOOL_LINK_MODE_50000baseKR_Full_BIT
int size = __ETHTOOL_LINK_MODE_MASK_NBITS - MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT;
- __ETHTOOL_DECLARE_LINK_MODE_MASK(modes);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(modes) = {0,};
bitmap_set(modes, MLX5E_MIN_PTYS_EXT_LINK_MODE_BIT, size);
return bitmap_intersects(modes, adver, __ETHTOOL_LINK_MODE_MASK_NBITS);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 7569287f8f3c..2a56e66f58d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1128,6 +1128,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->txq_ix = txq_ix;
sq->uar_map = mdev->mlx5e_res.bfreg.map;
sq->min_inline_mode = params->tx_min_inline_mode;
+ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu);
sq->stats = &c->priv->channel_stats[c->ix].sq[tc];
sq->stop_room = MLX5E_SQ_STOP_ROOM;
INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
@@ -1135,10 +1136,14 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
if (MLX5_IPSEC_DEV(c->priv->mdev))
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
+#ifdef CONFIG_MLX5_EN_TLS
if (mlx5_accel_is_tls_device(c->priv->mdev)) {
set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
- sq->stop_room += MLX5E_SQ_TLS_ROOM;
+ sq->stop_room += MLX5E_SQ_TLS_ROOM +
+ mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS,
+ TLS_MAX_PAYLOAD_SIZE);
}
+#endif
param->wq.db_numa_node = cpu_to_node(c->cpu);
err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@ -1349,9 +1354,13 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
/* last doorbell out, godspeed .. */
if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ struct mlx5e_tx_wqe_info *wi;
struct mlx5e_tx_wqe *nop;
- sq->db.wqe_info[pi].skb = NULL;
+ wi = &sq->db.wqe_info[pi];
+
+ memset(wi, 0, sizeof(*wi));
+ wi->num_wqebbs = 1;
nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
}
@@ -4243,9 +4252,12 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
switch (proto) {
case IPPROTO_GRE:
+ return features;
case IPPROTO_IPIP:
case IPPROTO_IPV6:
- return features;
+ if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP))
+ return features;
+ break;
case IPPROTO_UDP:
udph = udp_hdr(skb);
port = be16_to_cpu(udph->dest);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 95892a3b63a1..cd9bb7c7b341 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -611,8 +611,8 @@ static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
mutex_lock(&esw->offloads.encap_tbl_lock);
encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
- if (e->compl_result || (encap_connected == neigh_connected &&
- ether_addr_equal(e->h_dest, ha)))
+ if (e->compl_result < 0 || (encap_connected == neigh_connected &&
+ ether_addr_equal(e->h_dest, ha)))
goto unlock;
mlx5e_take_all_encap_flows(e, &flow_list);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index d6a547238de0..82cffb3a9964 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1386,8 +1386,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
return 0;
- if (rq->cqd.left)
+ if (rq->cqd.left) {
work_done += mlx5e_decompress_cqes_cont(rq, cqwq, 0, budget);
+ if (rq->cqd.left || work_done >= budget)
+ goto out;
+ }
cqe = mlx5_cqwq_get_cqe(cqwq);
if (!cqe) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 840ec945ccba..bbff8d8ded76 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -35,6 +35,7 @@
#include <linux/udp.h>
#include <net/udp.h>
#include "en.h"
+#include "en/port.h"
enum {
MLX5E_ST_LINK_STATE,
@@ -80,22 +81,12 @@ static int mlx5e_test_link_state(struct mlx5e_priv *priv)
static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
{
- u32 out[MLX5_ST_SZ_DW(ptys_reg)];
- u32 eth_proto_oper;
- int i;
+ u32 speed;
if (!netif_carrier_ok(priv->netdev))
return 1;
- if (mlx5_query_port_ptys(priv->mdev, out, sizeof(out), MLX5_PTYS_EN, 1))
- return 1;
-
- eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
- for (i = 0; i < MLX5E_LINK_MODES_NUMBER; i++) {
- if (eth_proto_oper & MLX5E_PROT_MASK(i))
- return 0;
- }
- return 1;
+ return mlx5e_port_linkspeed(priv->mdev, &speed);
}
struct mlx5ehdr {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index ac6fdcda7019..7e6ebd0505cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -52,11 +52,12 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_skip_no_sync_data) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) },
- { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
- { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
#endif
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
@@ -288,11 +289,12 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes;
s->tx_tls_ctx += sq_stats->tls_ctx;
s->tx_tls_ooo += sq_stats->tls_ooo;
+ s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes;
+ s->tx_tls_dump_packets += sq_stats->tls_dump_packets;
s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes;
+ s->tx_tls_skip_no_sync_data += sq_stats->tls_skip_no_sync_data;
s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data;
s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req;
- s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes;
- s->tx_tls_dump_packets += sq_stats->tls_dump_packets;
#endif
s->tx_cqes += sq_stats->cqes;
}
@@ -1472,10 +1474,12 @@ static const struct counter_desc sq_stats_desc[] = {
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
- { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
- { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_resync_bytes) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_skip_no_sync_data) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) },
+ { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) },
#endif
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) },
{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 79f261bf86ac..869f3502f631 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -129,11 +129,12 @@ struct mlx5e_sw_stats {
u64 tx_tls_encrypted_bytes;
u64 tx_tls_ctx;
u64 tx_tls_ooo;
+ u64 tx_tls_dump_packets;
+ u64 tx_tls_dump_bytes;
u64 tx_tls_resync_bytes;
+ u64 tx_tls_skip_no_sync_data;
u64 tx_tls_drop_no_sync_data;
u64 tx_tls_drop_bypass_req;
- u64 tx_tls_dump_packets;
- u64 tx_tls_dump_bytes;
#endif
u64 rx_xsk_packets;
@@ -273,11 +274,12 @@ struct mlx5e_sq_stats {
u64 tls_encrypted_bytes;
u64 tls_ctx;
u64 tls_ooo;
+ u64 tls_dump_packets;
+ u64 tls_dump_bytes;
u64 tls_resync_bytes;
+ u64 tls_skip_no_sync_data;
u64 tls_drop_no_sync_data;
u64 tls_drop_bypass_req;
- u64 tls_dump_packets;
- u64 tls_dump_bytes;
#endif
/* less likely accessed in data path */
u64 csum_none;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 3e78a727f3e6..f90a9f8e0fc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1278,8 +1278,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_eswitch_del_vlan_action(esw, attr);
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
- if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+ if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
mlx5e_detach_encap(priv, flow, out_index);
+ kfree(attr->parse_attr->tun_info[out_index]);
+ }
kvfree(attr->parse_attr);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
@@ -1559,6 +1561,7 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr
mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
}
+ kfree(e->tun_info);
kfree(e->encap_header);
kfree_rcu(e, rcu);
}
@@ -2972,6 +2975,13 @@ mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
return NULL;
}
+static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
+{
+ size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+
+ return kmemdup(tun_info, tun_size, GFP_KERNEL);
+}
+
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct net_device *mirred_dev,
@@ -3028,13 +3038,15 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
refcount_set(&e->refcnt, 1);
init_completion(&e->res_ready);
+ tun_info = dup_tun_info(tun_info);
+ if (!tun_info) {
+ err = -ENOMEM;
+ goto out_err_init;
+ }
e->tun_info = tun_info;
err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
- if (err) {
- kfree(e);
- e = NULL;
- goto out_err;
- }
+ if (err)
+ goto out_err_init;
INIT_LIST_HEAD(&e->flows);
hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
@@ -3075,6 +3087,12 @@ out_err:
if (e)
mlx5e_encap_put(priv, e);
return err;
+
+out_err_init:
+ mutex_unlock(&esw->offloads.encap_tbl_lock);
+ kfree(tun_info);
+ kfree(e);
+ return err;
}
static int parse_tc_vlan_action(struct mlx5e_priv *priv,
@@ -3160,7 +3178,7 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv,
struct mlx5_esw_flow_attr *attr,
u32 *action)
{
- int nest_level = vlan_get_encap_level(attr->parse_attr->filter_dev);
+ int nest_level = attr->parse_attr->filter_dev->lower_level;
struct flow_action_entry vlan_act = {
.id = FLOW_ACTION_VLAN_POP,
};
@@ -3250,7 +3268,20 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
- if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
+ if (encap) {
+ parse_attr->mirred_ifindex[attr->out_count] =
+ out_dev->ifindex;
+ parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
+ if (!parse_attr->tun_info[attr->out_count])
+ return -ENOMEM;
+ encap = false;
+ attr->dests[attr->out_count].flags |=
+ MLX5_ESW_DEST_ENCAP;
+ attr->out_count++;
+ /* attr->dests[].rep is resolved when we
+ * handle encap
+ */
+ } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
struct net_device *uplink_upper;
@@ -3292,17 +3323,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
attr->dests[attr->out_count].rep = rpriv->rep;
attr->dests[attr->out_count].mdev = out_priv->mdev;
attr->out_count++;
- } else if (encap) {
- parse_attr->mirred_ifindex[attr->out_count] =
- out_dev->ifindex;
- parse_attr->tun_info[attr->out_count] = info;
- encap = false;
- attr->dests[attr->out_count].flags |=
- MLX5_ESW_DEST_ENCAP;
- attr->out_count++;
- /* attr->dests[].rep is resolved when we
- * handle encap
- */
} else if (parse_attr->filter_dev != priv->netdev) {
/* All mlx5 devices are called to configure
* high level device filters. Therefore, the
@@ -3980,9 +4000,8 @@ int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
struct tc_cls_matchall_offload *ma)
{
struct netlink_ext_ack *extack = ma->common.extack;
- int prio = TC_H_MAJ(ma->common.prio) >> 16;
- if (prio != 1) {
+ if (ma->common.prio != 1) {
NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index d3a67a9b4eba..67dc4f0921b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -403,7 +403,10 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
struct mlx5_err_cqe *err_cqe)
{
- u32 ci = mlx5_cqwq_get_ci(&sq->cq.wq);
+ struct mlx5_cqwq *wq = &sq->cq.wq;
+ u32 ci;
+
+ ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
netdev_err(sq->channel->netdev,
"Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
@@ -479,14 +482,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
skb = wi->skb;
if (unlikely(!skb)) {
-#ifdef CONFIG_MLX5_EN_TLS
- if (wi->resync_dump_frag) {
- struct mlx5e_sq_dma *dma =
- mlx5e_dma_get(sq, dma_fifo_cc++);
-
- mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma);
- }
-#endif
+ mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
sqcc += wi->num_wqebbs;
continue;
}
@@ -542,29 +538,38 @@ void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq)
{
struct mlx5e_tx_wqe_info *wi;
struct sk_buff *skb;
+ u32 dma_fifo_cc;
+ u16 sqcc;
u16 ci;
int i;
- while (sq->cc != sq->pc) {
- ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->cc);
+ sqcc = sq->cc;
+ dma_fifo_cc = sq->dma_fifo_cc;
+
+ while (sqcc != sq->pc) {
+ ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
wi = &sq->db.wqe_info[ci];
skb = wi->skb;
- if (!skb) { /* nop */
- sq->cc++;
+ if (!skb) {
+ mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, &dma_fifo_cc);
+ sqcc += wi->num_wqebbs;
continue;
}
for (i = 0; i < wi->num_dma; i++) {
struct mlx5e_sq_dma *dma =
- mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+ mlx5e_dma_get(sq, dma_fifo_cc++);
mlx5e_tx_dma_unmap(sq->pdev, dma);
}
dev_kfree_skb_any(skb);
- sq->cc += wi->num_wqebbs;
+ sqcc += wi->num_wqebbs;
}
+
+ sq->dma_fifo_cc = dma_fifo_cc;
+ sq->cc = sqcc;
}
#ifdef CONFIG_MLX5_CORE_IPOIB
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 30aae76b6a1d..60fddf8afc99 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2117,7 +2117,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
unlock:
mutex_unlock(&esw->state_lock);
- return 0;
+ return err;
}
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 00d71db15f22..9004a07e457a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -285,7 +285,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
if (attr->outer_match_level != MLX5_MATCH_NONE)
spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
@@ -1080,7 +1079,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
MLX5_CAP_GEN(dev, max_flow_counter_15_0);
fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
- esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n",
+ esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(%d))\n",
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
max_flow_counter, ESW_OFFLOADS_NUM_GROUPS,
fdb_max);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
index 1d55a324a17e..366bda1bb1c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c
@@ -177,22 +177,33 @@ mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src,
memset(&src->vlan[1], 0, sizeof(src->vlan[1]));
}
+static bool mlx5_eswitch_offload_is_uplink_port(const struct mlx5_eswitch *esw,
+ const struct mlx5_flow_spec *spec)
+{
+ u32 port_mask, port_value;
+
+ if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source))
+ return spec->flow_context.flow_source ==
+ MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+ port_mask = MLX5_GET(fte_match_param, spec->match_criteria,
+ misc_parameters.source_port);
+ port_value = MLX5_GET(fte_match_param, spec->match_value,
+ misc_parameters.source_port);
+ return (port_mask & port_value & 0xffff) == MLX5_VPORT_UPLINK;
+}
+
bool
mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_spec *spec)
{
- u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria,
- misc_parameters.source_port);
- u32 port_value = MLX5_GET(fte_match_param, spec->match_value,
- misc_parameters.source_port);
-
if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table))
return false;
/* push vlan on RX */
return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) &&
- ((port_mask & port_value) == MLX5_VPORT_UPLINK);
+ mlx5_eswitch_offload_is_uplink_port(esw, spec);
}
struct mlx5_flow_handle *
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 4c50efe4e7f1..61021133029e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -464,8 +464,10 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
}
err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
- if (err)
+ if (err) {
+ kvfree(in);
goto err_cqwq;
+ }
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 579c306caa7b..3c816e81f8d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -507,7 +507,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(dest_format_struct, in_dests,
destination_eswitch_owner_vhca_id,
dst->dest_attr.vport.vhca_id);
- if (extended_dest) {
+ if (extended_dest &&
+ dst->dest_attr.vport.pkt_reformat) {
MLX5_SET(dest_format_struct, in_dests,
packet_reformat,
!!(dst->dest_attr.vport.flags &
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 3bbb49354829..791e14ac26f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -579,7 +579,7 @@ static void del_sw_flow_group(struct fs_node *node)
rhashtable_destroy(&fg->ftes_hash);
ida_destroy(&fg->fte_allocator);
- if (ft->autogroup.active)
+ if (ft->autogroup.active && fg->max_ftes == ft->autogroup.group_size)
ft->autogroup.num_groups--;
err = rhltable_remove(&ft->fgs_hash,
&fg->hash,
@@ -1126,6 +1126,8 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
ft->autogroup.active = true;
ft->autogroup.required_groups = max_num_groups;
+ /* We save place for flow groups in addition to max types */
+ ft->autogroup.group_size = ft->max_fte / (max_num_groups + 1);
return ft;
}
@@ -1328,8 +1330,7 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft
return ERR_PTR(-ENOENT);
if (ft->autogroup.num_groups < ft->autogroup.required_groups)
- /* We save place for flow groups in addition to max types */
- group_size = ft->max_fte / (ft->autogroup.required_groups + 1);
+ group_size = ft->autogroup.group_size;
/* ft->max_fte == ft->autogroup.max_types */
if (group_size == 0)
@@ -1356,7 +1357,8 @@ static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft
if (IS_ERR(fg))
goto out;
- ft->autogroup.num_groups++;
+ if (group_size == ft->autogroup.group_size)
+ ft->autogroup.num_groups++;
out:
return fg;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 00717eba2256..c2621b911563 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -162,6 +162,7 @@ struct mlx5_flow_table {
struct {
bool active;
unsigned int required_groups;
+ unsigned int group_size;
unsigned int num_groups;
} autogroup;
/* Protect fwd_rules */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d685122d9ff7..c07f3154437c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -572,7 +572,7 @@ mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
return -ENOMEM;
err = mlx5_crdump_collect(dev, cr_data);
if (err)
- return err;
+ goto free_data;
if (priv_ctx) {
struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 0059b290e095..43f97601b500 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -236,6 +236,19 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
if (!MLX5_PPS_CAP(mdev))
return -EOPNOTSUPP;
+ /* Reject requests with unsupported flags */
+ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
+ /* Reject requests to enable time stamping on both edges. */
+ if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+ (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+ (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+ return -EOPNOTSUPP;
+
if (rq->extts.index >= clock->ptp_info.n_pins)
return -EINVAL;
@@ -290,6 +303,10 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
if (!MLX5_PPS_CAP(mdev))
return -EOPNOTSUPP;
+ /* Reject requests with unsupported flags */
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
+
if (rq->perout.index >= clock->ptp_info.n_pins)
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index e47dd7c1b909..50ab88d80033 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1566,6 +1566,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */
{ PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */
{ PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */
+ { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
{ PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 9231b39d18b2..c501bf2a0252 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -112,17 +112,11 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0};
struct xarray *mkeys = &dev->priv.mkey_table;
- struct mlx5_core_mkey *deleted_mkey;
unsigned long flags;
xa_lock_irqsave(mkeys, flags);
- deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
+ __xa_erase(mkeys, mlx5_base_mkey(mkey->key));
xa_unlock_irqrestore(mkeys, flags);
- if (!deleted_mkey) {
- mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n",
- mlx5_base_mkey(mkey->key));
- return -ENOENT;
- }
MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index b74b7d0f6590..004c56c2fc0c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -1577,6 +1577,7 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
break;
case DR_ACTION_TYP_MODIFY_HDR:
mlx5dr_icm_free_chunk(action->rewrite.chunk);
+ kfree(action->rewrite.data);
refcount_dec(&action->rewrite.dmn->refcount);
break;
default:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 4187f2b112b8..bd1699e62142 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -595,6 +595,18 @@ static void dr_rule_clean_rule_members(struct mlx5dr_rule *rule,
}
}
+static u16 dr_get_bits_per_mask(u16 byte_mask)
+{
+ u16 bits = 0;
+
+ while (byte_mask) {
+ byte_mask = byte_mask & (byte_mask - 1);
+ bits++;
+ }
+
+ return bits;
+}
+
static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl,
struct mlx5dr_domain *dmn,
struct mlx5dr_domain_rx_tx *nic_dmn)
@@ -607,6 +619,9 @@ static bool dr_rule_need_enlarge_hash(struct mlx5dr_ste_htbl *htbl,
if (!ctrl->may_grow)
return false;
+ if (dr_get_bits_per_mask(htbl->byte_mask) * BITS_PER_BYTE <= htbl->chunk_size)
+ return false;
+
if (ctrl->num_of_collisions >= ctrl->increase_threshold &&
(ctrl->num_of_valid_entries - ctrl->num_of_collisions) >= ctrl->increase_threshold)
return true;
@@ -788,12 +803,10 @@ again:
* it means that all the previous stes are the same,
* if so, this rule is duplicated.
*/
- if (mlx5dr_ste_is_last_in_rule(nic_matcher,
- matched_ste->ste_chain_location)) {
- mlx5dr_info(dmn, "Duplicate rule inserted, aborting!!\n");
- return NULL;
- }
- return matched_ste;
+ if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste_location))
+ return matched_ste;
+
+ mlx5dr_dbg(dmn, "Duplicate rule inserted\n");
}
if (!skip_rehash && dr_rule_need_enlarge_hash(cur_htbl, dmn, nic_dmn)) {
@@ -1098,6 +1111,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
if (htbl)
mlx5dr_htbl_put(htbl);
+ kfree(hw_ste_arr);
+
return 0;
free_ste:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 5df8436b2ae3..51803eef13dd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -700,6 +700,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
unsigned int irqn;
void *cqc, *in;
__be64 *pas;
+ int vector;
u32 i;
cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -728,7 +729,8 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
if (!in)
goto err_cqwq;
- err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn);
+ vector = smp_processor_id() % mlx5_comp_vectors_count(mdev);
+ err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
if (err) {
kvfree(in);
goto err_cqwq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 4efe1b0be4a8..3cbf74b44d1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -560,18 +560,6 @@ bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste)
return !refcount_read(&ste->refcount);
}
-static u16 get_bits_per_mask(u16 byte_mask)
-{
- u16 bits = 0;
-
- while (byte_mask) {
- byte_mask = byte_mask & (byte_mask - 1);
- bits++;
- }
-
- return bits;
-}
-
/* Init one ste as a pattern for ste data array */
void mlx5dr_ste_set_formatted_ste(u16 gvmi,
struct mlx5dr_domain_rx_tx *nic_dmn,
@@ -620,20 +608,12 @@ int mlx5dr_ste_create_next_htbl(struct mlx5dr_matcher *matcher,
struct mlx5dr_ste_htbl *next_htbl;
if (!mlx5dr_ste_is_last_in_rule(nic_matcher, ste->ste_chain_location)) {
- u32 bits_in_mask;
u8 next_lu_type;
u16 byte_mask;
next_lu_type = MLX5_GET(ste_general, hw_ste, next_lu_type);
byte_mask = MLX5_GET(ste_general, hw_ste, byte_mask);
- /* Don't allocate table more than required,
- * the size of the table defined via the byte_mask, so no need
- * to allocate more than that.
- */
- bits_in_mask = get_bits_per_mask(byte_mask) * BITS_PER_BYTE;
- log_table_size = min(log_table_size, bits_in_mask);
-
next_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool,
log_table_size,
next_lu_type,
@@ -671,7 +651,7 @@ static void dr_ste_set_ctrl(struct mlx5dr_ste_htbl *htbl)
htbl->ctrl.may_grow = true;
- if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1)
+ if (htbl->chunk_size == DR_CHUNK_SIZE_MAX - 1 || !htbl->byte_mask)
htbl->ctrl.may_grow = false;
/* Threshold is 50%, one is added to table of size 1 */
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
index 67990406cba2..29e95d0a6ad1 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
@@ -66,6 +66,8 @@ retry:
return err;
if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) {
+ fsm_state_err = min_t(enum mlxfw_fsm_state_err,
+ fsm_state_err, MLXFW_FSM_STATE_ERR_MAX);
pr_err("Firmware flash failed: %s\n",
mlxfw_fsm_state_err_str[fsm_state_err]);
NL_SET_ERR_MSG_MOD(extack, "Firmware flash failed");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 14dcc786926d..0a0884d86d44 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1186,9 +1186,12 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (err)
goto err_thermal_init;
- if (mlxsw_driver->params_register && !reload)
+ if (mlxsw_driver->params_register)
devlink_params_publish(devlink);
+ if (!reload)
+ devlink_reload_enable(devlink);
+
return 0;
err_thermal_init:
@@ -1249,6 +1252,8 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
{
struct devlink *devlink = priv_to_devlink(mlxsw_core);
+ if (!reload)
+ devlink_reload_disable(devlink);
if (devlink_is_reload_failed(devlink)) {
if (!reload)
/* Only the parts that were not de-initialized in the
@@ -1259,7 +1264,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
return;
}
- if (mlxsw_core->driver->params_unregister && !reload)
+ if (mlxsw_core->driver->params_unregister)
devlink_params_unpublish(devlink);
mlxsw_thermal_fini(mlxsw_core->thermal);
mlxsw_hwmon_fini(mlxsw_core->hwmon);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index a330b369e899..39d600c8b92d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -994,7 +994,7 @@ u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
if (d)
return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
else
- return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
+ return RT_TABLE_MAIN;
}
static struct mlxsw_sp_rif *
@@ -1598,27 +1598,10 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
{
struct mlxsw_sp_ipip_entry *ipip_entry =
mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
- enum mlxsw_sp_l3proto ul_proto;
- union mlxsw_sp_l3addr saddr;
- u32 ul_tb_id;
if (!ipip_entry)
return 0;
- /* For flat configuration cases, moving overlay to a different VRF might
- * cause local address conflict, and the conflicting tunnels need to be
- * demoted.
- */
- ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
- ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
- saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
- if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
- saddr, ul_tb_id,
- ipip_entry)) {
- mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
- return 0;
- }
-
return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
true, false, false, extack);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 899450b28621..7c03b661ae7e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -99,6 +99,7 @@ static void mlxsw_sp_rx_drop_listener(struct sk_buff *skb, u8 local_port,
devlink = priv_to_devlink(mlxsw_sp->core);
in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core,
local_port);
+ skb_push(skb, ETH_HLEN);
devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port);
consume_skb(skb);
}
diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c
index 57b26c2acf87..e8fe9a90fe4f 100644
--- a/drivers/net/ethernet/microchip/lan743x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan743x_ptp.c
@@ -429,6 +429,10 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on,
int pulse_width = 0;
int perout_bit = 0;
+ /* Reject requests with unsupported flags */
+ if (perout->flags)
+ return -EOPNOTSUPP;
+
if (!on) {
lan743x_ptp_perout_off(adapter);
return 0;
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 4d1bce4389c7..672ea1342add 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -261,8 +261,15 @@ static int ocelot_vlan_vid_add(struct net_device *dev, u16 vid, bool pvid,
port->pvid = vid;
/* Untagged egress vlan clasification */
- if (untagged)
+ if (untagged && port->vid != vid) {
+ if (port->vid) {
+ dev_err(ocelot->dev,
+ "Port already has a native VLAN: %d\n",
+ port->vid);
+ return -EBUSY;
+ }
port->vid = vid;
+ }
ocelot_vlan_port_apply(ocelot, port);
@@ -934,7 +941,7 @@ end:
static int ocelot_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
u16 vid)
{
- return ocelot_vlan_vid_add(dev, vid, false, true);
+ return ocelot_vlan_vid_add(dev, vid, false, false);
}
static int ocelot_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
@@ -1673,9 +1680,6 @@ static int ocelot_netdevice_port_event(struct net_device *dev,
struct ocelot_port *ocelot_port = netdev_priv(dev);
int err = 0;
- if (!ocelot_netdevice_dev_check(dev))
- return 0;
-
switch (event) {
case NETDEV_CHANGEUPPER:
if (netif_is_bridge_master(info->upper_dev)) {
@@ -1712,12 +1716,16 @@ static int ocelot_netdevice_event(struct notifier_block *unused,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
int ret = 0;
+ if (!ocelot_netdevice_dev_check(dev))
+ return 0;
+
if (event == NETDEV_PRECHANGEUPPER &&
netif_is_lag_master(info->upper_dev)) {
struct netdev_lag_upper_info *lag_upper_info = info->upper_info;
struct netlink_ext_ack *extack;
- if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+ if (lag_upper_info &&
+ lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
extack = netdev_notifier_info_to_extack(&info->info);
NL_SET_ERR_MSG_MOD(extack, "LAG device using unsupported Tx type");
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index e40773c01a44..06ac806052bc 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -523,7 +523,7 @@ void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset);
#define ocelot_write_rix(ocelot, val, reg, ri) __ocelot_write_ix(ocelot, val, reg, reg##_RSZ * (ri))
#define ocelot_write(ocelot, val, reg) __ocelot_write_ix(ocelot, val, reg, 0)
-void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 mask,
+void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg,
u32 offset);
#define ocelot_rmw_ix(ocelot, val, m, reg, gi, ri) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
#define ocelot_rmw_gix(ocelot, val, m, reg, gi) __ocelot_rmw_ix(ocelot, val, m, reg, reg##_GSZ * (gi))
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 1eef446036d6..79d72c88bbef 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -299,22 +299,6 @@ static void nfp_repr_clean(struct nfp_repr *repr)
nfp_port_free(repr->port);
}
-static struct lock_class_key nfp_repr_netdev_xmit_lock_key;
-static struct lock_class_key nfp_repr_netdev_addr_lock_key;
-
-static void nfp_repr_set_lockdep_class_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock, &nfp_repr_netdev_xmit_lock_key);
-}
-
-static void nfp_repr_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock, &nfp_repr_netdev_addr_lock_key);
- netdev_for_each_tx_queue(dev, nfp_repr_set_lockdep_class_one, NULL);
-}
-
int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
u32 cmsg_port_id, struct nfp_port *port,
struct net_device *pf_netdev)
@@ -324,8 +308,6 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
u32 repr_cap = nn->tlv_caps.repr_cap;
int err;
- nfp_repr_set_lockdep_class(netdev);
-
repr->port = port;
repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL);
if (!repr->dst)
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 141571e2ec11..544012a67221 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1356,9 +1356,6 @@ static int lpc_eth_drv_probe(struct platform_device *pdev)
if (!is_valid_ether_addr(ndev->dev_addr))
eth_hw_addr_random(ndev);
- /* Reset the ethernet controller */
- __lpc_eth_reset(pldat);
-
/* then shut everything down to save power */
__lpc_eth_shutdown(pldat);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 72107a0627a9..20faa8d24c9f 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+#include <linux/printk.h>
+#include <linux/dynamic_debug.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 812190e729c2..6a95b42a8d8c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -182,6 +182,8 @@ struct ionic_lif {
#define lif_to_txqcq(lif, i) ((lif)->txqcqs[i].qcq)
#define lif_to_rxqcq(lif, i) ((lif)->rxqcqs[i].qcq)
+#define lif_to_txstats(lif, i) ((lif)->txqcqs[i].stats->tx)
+#define lif_to_rxstats(lif, i) ((lif)->rxqcqs[i].stats->rx)
#define lif_to_txq(lif, i) (&lif_to_txqcq((lif), i)->q)
#define lif_to_rxq(lif, i) (&lif_to_txqcq((lif), i)->q)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 15e432386b35..aab311413412 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2017 - 2019 Pensando Systems, Inc */
+#include <linux/printk.h>
+#include <linux/dynamic_debug.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/utsname.h>
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index e2907884f843..03916b6d47f2 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -117,7 +117,8 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
/* rx stats */
total += MAX_Q(lif) * IONIC_NUM_RX_STATS;
- if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ if (test_bit(IONIC_LIF_UP, lif->state) &&
+ test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
/* tx debug stats */
total += MAX_Q(lif) * (IONIC_NUM_DBG_CQ_STATS +
IONIC_NUM_TX_Q_STATS +
@@ -149,7 +150,8 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
*buf += ETH_GSTRING_LEN;
}
- if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ if (test_bit(IONIC_LIF_UP, lif->state) &&
+ test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
snprintf(*buf, ETH_GSTRING_LEN,
"txq_%d_%s",
@@ -187,7 +189,8 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
*buf += ETH_GSTRING_LEN;
}
- if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ if (test_bit(IONIC_LIF_UP, lif->state) &&
+ test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
snprintf(*buf, ETH_GSTRING_LEN,
"rxq_%d_cq_%s",
@@ -223,6 +226,8 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
{
struct ionic_lif_sw_stats lif_stats;
struct ionic_qcq *txqcq, *rxqcq;
+ struct ionic_tx_stats *txstats;
+ struct ionic_rx_stats *rxstats;
int i, q_num;
ionic_get_lif_stats(lif, &lif_stats);
@@ -233,15 +238,17 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
}
for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
- txqcq = lif_to_txqcq(lif, q_num);
+ txstats = &lif_to_txstats(lif, q_num);
for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
- **buf = IONIC_READ_STAT64(&txqcq->stats->tx,
+ **buf = IONIC_READ_STAT64(txstats,
&ionic_tx_stats_desc[i]);
(*buf)++;
}
- if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ if (test_bit(IONIC_LIF_UP, lif->state) &&
+ test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ txqcq = lif_to_txqcq(lif, q_num);
for (i = 0; i < IONIC_NUM_TX_Q_STATS; i++) {
**buf = IONIC_READ_STAT64(&txqcq->q,
&ionic_txq_stats_desc[i]);
@@ -258,22 +265,24 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
(*buf)++;
}
for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) {
- **buf = txqcq->stats->tx.sg_cntr[i];
+ **buf = txstats->sg_cntr[i];
(*buf)++;
}
}
}
for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
- rxqcq = lif_to_rxqcq(lif, q_num);
+ rxstats = &lif_to_rxstats(lif, q_num);
for (i = 0; i < IONIC_NUM_RX_STATS; i++) {
- **buf = IONIC_READ_STAT64(&rxqcq->stats->rx,
+ **buf = IONIC_READ_STAT64(rxstats,
&ionic_rx_stats_desc[i]);
(*buf)++;
}
- if (test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ if (test_bit(IONIC_LIF_UP, lif->state) &&
+ test_bit(IONIC_LIF_SW_DEBUG_STATS, lif->state)) {
+ rxqcq = lif_to_rxqcq(lif, q_num);
for (i = 0; i < IONIC_NUM_DBG_CQ_STATS; i++) {
**buf = IONIC_READ_STAT64(&rxqcq->cq,
&ionic_dbg_cq_stats_desc[i]);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 2ce70097d018..38f7f40b3a4d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -67,10 +67,9 @@
#define QED_ROCE_QPS (8192)
#define QED_ROCE_DPIS (8)
#define QED_RDMA_SRQS QED_ROCE_QPS
-#define QED_NVM_CFG_SET_FLAGS 0xE
-#define QED_NVM_CFG_SET_PF_FLAGS 0x1E
#define QED_NVM_CFG_GET_FLAGS 0xA
#define QED_NVM_CFG_GET_PF_FLAGS 0x1A
+#define QED_NVM_CFG_MAX_ATTRS 50
static char version[] =
"QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n";
@@ -2255,6 +2254,7 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data)
{
struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
u8 entity_id, len, buf[32];
+ bool need_nvm_init = true;
struct qed_ptt *ptt;
u16 cfg_id, count;
int rc = 0, i;
@@ -2271,8 +2271,10 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data)
DP_VERBOSE(cdev, NETIF_MSG_DRV,
"Read config ids: num_attrs = %0d\n", count);
- /* NVM CFG ID attributes */
- for (i = 0; i < count; i++) {
+ /* NVM CFG ID attributes. Start loop index from 1 to avoid additional
+ * arithmetic operations in the implementation.
+ */
+ for (i = 1; i <= count; i++) {
cfg_id = *((u16 *)*data);
*data += 2;
entity_id = **data;
@@ -2282,8 +2284,21 @@ static int qed_nvm_flash_cfg_write(struct qed_dev *cdev, const u8 **data)
memcpy(buf, *data, len);
*data += len;
- flags = entity_id ? QED_NVM_CFG_SET_PF_FLAGS :
- QED_NVM_CFG_SET_FLAGS;
+ flags = 0;
+ if (need_nvm_init) {
+ flags |= QED_NVM_CFG_OPTION_INIT;
+ need_nvm_init = false;
+ }
+
+ /* Commit to flash and free the resources */
+ if (!(i % QED_NVM_CFG_MAX_ATTRS) || i == count) {
+ flags |= QED_NVM_CFG_OPTION_COMMIT |
+ QED_NVM_CFG_OPTION_FREE;
+ need_nvm_init = true;
+ }
+
+ if (entity_id)
+ flags |= QED_NVM_CFG_OPTION_ENTITY_SEL;
DP_VERBOSE(cdev, NETIF_MSG_DRV,
"cfg_id = %d entity = %d len = %d\n", cfg_id,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 78f77b712b10..dcb5c917f373 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -2005,7 +2005,7 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn,
(qed_iov_validate_active_txq(p_hwfn, vf))) {
vf->b_malicious = true;
DP_NOTICE(p_hwfn,
- "VF [%02x] - considered malicious; Unable to stop RX/TX queuess\n",
+ "VF [%02x] - considered malicious; Unable to stop RX/TX queues\n",
vf->abs_vf_id);
status = PFVF_STATUS_MALICIOUS;
goto out;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 8d1c208f778f..a220cc7c947a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1208,8 +1208,16 @@ enum qede_remove_mode {
static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
{
struct net_device *ndev = pci_get_drvdata(pdev);
- struct qede_dev *edev = netdev_priv(ndev);
- struct qed_dev *cdev = edev->cdev;
+ struct qede_dev *edev;
+ struct qed_dev *cdev;
+
+ if (!ndev) {
+ dev_info(&pdev->dev, "Device has already been removed\n");
+ return;
+ }
+
+ edev = netdev_priv(ndev);
+ cdev = edev->cdev;
DP_INFO(edev, "Starting qede_remove\n");
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 5ecf61df78bd..baac016f3ec0 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -363,7 +363,7 @@ qcaspi_receive(struct qcaspi *qca)
netdev_dbg(net_dev, "qcaspi_receive: SPI_REG_RDBUF_BYTE_AVA: Value: %08x\n",
available);
- if (available > QCASPI_HW_BUF_LEN) {
+ if (available > QCASPI_HW_BUF_LEN + QCASPI_HW_PKT_LEN) {
/* This could only happen by interferences on the SPI line.
* So retry later ...
*/
@@ -496,7 +496,6 @@ qcaspi_qca7k_sync(struct qcaspi *qca, int event)
u16 signature = 0;
u16 spi_config;
u16 wrbuf_space = 0;
- static u16 reset_count;
if (event == QCASPI_EVENT_CPUON) {
/* Read signature twice, if not valid
@@ -549,13 +548,13 @@ qcaspi_qca7k_sync(struct qcaspi *qca, int event)
qca->sync = QCASPI_SYNC_RESET;
qca->stats.trig_reset++;
- reset_count = 0;
+ qca->reset_count = 0;
break;
case QCASPI_SYNC_RESET:
- reset_count++;
+ qca->reset_count++;
netdev_dbg(qca->net_dev, "sync: waiting for CPU on, count %u.\n",
- reset_count);
- if (reset_count >= QCASPI_RESET_TIMEOUT) {
+ qca->reset_count);
+ if (qca->reset_count >= QCASPI_RESET_TIMEOUT) {
/* reset did not seem to take place, try again */
qca->sync = QCASPI_SYNC_UNKNOWN;
qca->stats.reset_timeout++;
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h
index eb9af45fcc5e..d13a67e20d65 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.h
+++ b/drivers/net/ethernet/qualcomm/qca_spi.h
@@ -94,6 +94,7 @@ struct qcaspi {
unsigned int intr_req;
unsigned int intr_svc;
+ u16 reset_count;
#ifdef CONFIG_DEBUG_FS
struct dentry *device_root;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 9c54b715228e..06de59521fc4 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -57,10 +57,10 @@ static int rmnet_unregister_real_device(struct net_device *real_dev,
if (port->nr_rmnet_devs)
return -EINVAL;
- kfree(port);
-
netdev_rx_handler_unregister(real_dev);
+ kfree(port);
+
/* release reference on real_dev */
dev_put(real_dev);
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 74f81fe03810..c33c438850cc 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -916,6 +916,9 @@ static void r8168g_mdio_write(struct rtl8169_private *tp, int reg, int value)
static int r8168g_mdio_read(struct rtl8169_private *tp, int reg)
{
+ if (reg == 0x1f)
+ return tp->ocp_base == OCP_STD_PHY_BASE ? 0 : tp->ocp_base >> 4;
+
if (tp->ocp_base != OCP_STD_PHY_BASE)
reg -= 0x10;
@@ -1029,6 +1032,10 @@ static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
{
int value;
+ /* Work around issue with chip reporting wrong PHY ID */
+ if (reg == MII_PHYSID2)
+ return 0xc912;
+
r8168dp_2_mdio_start(tp);
value = r8169_mdio_read(tp, reg);
@@ -4146,6 +4153,14 @@ static void rtl_hw_jumbo_disable(struct rtl8169_private *tp)
rtl_lock_config_regs(tp);
}
+static void rtl_jumbo_config(struct rtl8169_private *tp, int mtu)
+{
+ if (mtu > ETH_DATA_LEN)
+ rtl_hw_jumbo_enable(tp);
+ else
+ rtl_hw_jumbo_disable(tp);
+}
+
DECLARE_RTL_COND(rtl_chipcmd_cond)
{
return RTL_R8(tp, ChipCmd) & CmdReset;
@@ -4442,11 +4457,6 @@ static void rtl8168g_set_pause_thresholds(struct rtl8169_private *tp,
static void rtl_hw_start_8168bb(struct rtl8169_private *tp)
{
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
-
- if (tp->dev->mtu <= ETH_DATA_LEN) {
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B |
- PCI_EXP_DEVCTL_NOSNOOP_EN);
- }
}
static void rtl_hw_start_8168bef(struct rtl8169_private *tp)
@@ -4462,9 +4472,6 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp)
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
- if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
rtl_disable_clock_request(tp);
}
@@ -4490,9 +4497,6 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp)
rtl_set_def_aspm_entry_latency(tp);
RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en);
-
- if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
}
static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
@@ -4503,9 +4507,6 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp)
/* Magic. */
RTL_W8(tp, DBG_REG, 0x20);
-
- if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
}
static void rtl_hw_start_8168c_1(struct rtl8169_private *tp)
@@ -4611,9 +4612,6 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp)
rtl_ephy_init(tp, e_info_8168e_1);
- if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
rtl_disable_clock_request(tp);
/* Reset tx FIFO pointer */
@@ -4636,9 +4634,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
rtl_ephy_init(tp, e_info_8168e_2);
- if (tp->dev->mtu <= ETH_DATA_LEN)
- rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B);
-
rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
@@ -5485,6 +5480,8 @@ static void rtl_hw_start(struct rtl8169_private *tp)
rtl_set_rx_tx_desc_registers(tp);
rtl_lock_config_regs(tp);
+ rtl_jumbo_config(tp, tp->dev->mtu);
+
/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
RTL_R16(tp, CPlusCmd);
RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
@@ -5498,10 +5495,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
{
struct rtl8169_private *tp = netdev_priv(dev);
- if (new_mtu > ETH_DATA_LEN)
- rtl_hw_jumbo_enable(tp);
- else
- rtl_hw_jumbo_disable(tp);
+ rtl_jumbo_config(tp, new_mtu);
dev->mtu = new_mtu;
netdev_update_features(dev);
@@ -7185,8 +7179,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dev->gso_max_segs = RTL_GSO_MAX_SEGS_V1;
}
- /* RTL8168e-vl has a HW issue with TSO */
- if (tp->mac_version == RTL_GIGA_MAC_VER_34) {
+ /* RTL8168e-vl and one RTL8168c variant are known to have a
+ * HW issue with TSO.
+ */
+ if (tp->mac_version == RTL_GIGA_MAC_VER_34 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_22) {
dev->vlan_features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG);
dev->hw_features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG);
dev->features &= ~(NETIF_F_ALL_TSO | NETIF_F_SG);
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index a9c89d5d8898..9f88b5db4f89 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -955,6 +955,8 @@ enum RAVB_QUEUE {
#define NUM_RX_QUEUE 2
#define NUM_TX_QUEUE 2
+#define RX_BUF_SZ (2048 - ETH_FCS_LEN + sizeof(__sum16))
+
/* TX descriptors per packet */
#define NUM_TX_DESC_GEN2 2
#define NUM_TX_DESC_GEN3 1
@@ -1018,7 +1020,6 @@ struct ravb_private {
u32 dirty_rx[NUM_RX_QUEUE]; /* Producer ring indices */
u32 cur_tx[NUM_TX_QUEUE];
u32 dirty_tx[NUM_TX_QUEUE];
- u32 rx_buf_sz; /* Based on MTU+slack. */
struct napi_struct napi[NUM_RX_QUEUE];
struct work_struct work;
/* MII transceiver section. */
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index de9aa8c47f1c..3f165c137236 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -230,7 +230,7 @@ static void ravb_ring_free(struct net_device *ndev, int q)
le32_to_cpu(desc->dptr)))
dma_unmap_single(ndev->dev.parent,
le32_to_cpu(desc->dptr),
- priv->rx_buf_sz,
+ RX_BUF_SZ,
DMA_FROM_DEVICE);
}
ring_size = sizeof(struct ravb_ex_rx_desc) *
@@ -293,9 +293,9 @@ static void ravb_ring_format(struct net_device *ndev, int q)
for (i = 0; i < priv->num_rx_ring[q]; i++) {
/* RX descriptor */
rx_desc = &priv->rx_ring[q][i];
- rx_desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
+ rx_desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
dma_addr = dma_map_single(ndev->dev.parent, priv->rx_skb[q][i]->data,
- priv->rx_buf_sz,
+ RX_BUF_SZ,
DMA_FROM_DEVICE);
/* We just set the data size to 0 for a failed mapping which
* should prevent DMA from happening...
@@ -342,9 +342,6 @@ static int ravb_ring_init(struct net_device *ndev, int q)
int ring_size;
int i;
- priv->rx_buf_sz = (ndev->mtu <= 1492 ? PKT_BUF_SZ : ndev->mtu) +
- ETH_HLEN + VLAN_HLEN + sizeof(__sum16);
-
/* Allocate RX and TX skb rings */
priv->rx_skb[q] = kcalloc(priv->num_rx_ring[q],
sizeof(*priv->rx_skb[q]), GFP_KERNEL);
@@ -354,7 +351,7 @@ static int ravb_ring_init(struct net_device *ndev, int q)
goto error;
for (i = 0; i < priv->num_rx_ring[q]; i++) {
- skb = netdev_alloc_skb(ndev, priv->rx_buf_sz + RAVB_ALIGN - 1);
+ skb = netdev_alloc_skb(ndev, RX_BUF_SZ + RAVB_ALIGN - 1);
if (!skb)
goto error;
ravb_set_buffer_align(skb);
@@ -584,7 +581,7 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
skb = priv->rx_skb[q][entry];
priv->rx_skb[q][entry] = NULL;
dma_unmap_single(ndev->dev.parent, le32_to_cpu(desc->dptr),
- priv->rx_buf_sz,
+ RX_BUF_SZ,
DMA_FROM_DEVICE);
get_ts &= (q == RAVB_NC) ?
RAVB_RXTSTAMP_TYPE_V2_L2_EVENT :
@@ -617,11 +614,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
for (; priv->cur_rx[q] - priv->dirty_rx[q] > 0; priv->dirty_rx[q]++) {
entry = priv->dirty_rx[q] % priv->num_rx_ring[q];
desc = &priv->rx_ring[q][entry];
- desc->ds_cc = cpu_to_le16(priv->rx_buf_sz);
+ desc->ds_cc = cpu_to_le16(RX_BUF_SZ);
if (!priv->rx_skb[q][entry]) {
skb = netdev_alloc_skb(ndev,
- priv->rx_buf_sz +
+ RX_BUF_SZ +
RAVB_ALIGN - 1);
if (!skb)
break; /* Better luck next round. */
@@ -1801,10 +1798,15 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
static int ravb_change_mtu(struct net_device *ndev, int new_mtu)
{
- if (netif_running(ndev))
- return -EBUSY;
+ struct ravb_private *priv = netdev_priv(ndev);
ndev->mtu = new_mtu;
+
+ if (netif_running(ndev)) {
+ synchronize_irq(priv->emac_irq);
+ ravb_emac_init(ndev);
+ }
+
netdev_update_features(ndev);
return 0;
diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c
index 9a42580693cb..6984bd5b7da9 100644
--- a/drivers/net/ethernet/renesas/ravb_ptp.c
+++ b/drivers/net/ethernet/renesas/ravb_ptp.c
@@ -182,6 +182,13 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp,
struct net_device *ndev = priv->ndev;
unsigned long flags;
+ /* Reject requests with unsupported flags */
+ if (req->flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
if (req->index)
return -EINVAL;
@@ -211,6 +218,10 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
unsigned long flags;
int error = 0;
+ /* Reject requests with unsupported flags */
+ if (req->flags)
+ return -EOPNOTSUPP;
+
if (req->index)
return -EINVAL;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 02ed6d1b716c..af15a737c675 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1531,7 +1531,8 @@ void efx_ptp_remove(struct efx_nic *efx)
(void)efx_ptp_disable(efx);
cancel_work_sync(&efx->ptp_data->work);
- cancel_work_sync(&efx->ptp_data->pps_work);
+ if (efx->ptp_data->pps_workwq)
+ cancel_work_sync(&efx->ptp_data->pps_work);
skb_queue_purge(&efx->ptp_data->rxq);
skb_queue_purge(&efx->ptp_data->txq);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index f97a4096f8fc..6e47be63a43c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -651,7 +651,8 @@ static void sun8i_dwmac_set_filter(struct mac_device_info *hw,
}
}
} else {
- netdev_info(dev, "Too many address, switching to promiscuous\n");
+ if (!(readl(ioaddr + EMAC_RX_FRM_FLT) & EMAC_FRM_FLT_RXALL))
+ netdev_info(dev, "Too many address, switching to promiscuous\n");
v = EMAC_FRM_FLT_RXALL;
}
@@ -1225,7 +1226,7 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
dwmac_mux:
sun8i_dwmac_unset_syscon(gmac);
dwmac_exit:
- sun8i_dwmac_exit(pdev, plat_dat->bsp_priv);
+ stmmac_pltfr_remove(pdev);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 2cb9c53f93b8..66e60c7e9850 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -432,7 +432,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
* bits used depends on the hardware configuration
* selected at core configuration time.
*/
- int bit_nr = bitrev32(~crc32_le(~0, ha->addr,
+ u32 bit_nr = bitrev32(~crc32_le(~0, ha->addr,
ETH_ALEN)) >> (32 - mcbitslog2);
/* The most significant bit determines the register to
* use (H/L) while the other 5 bits determine the bit
@@ -448,7 +448,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
value |= GMAC_PACKET_FILTER_HPF;
/* Handle multiple unicast addresses */
- if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) {
+ if (netdev_uc_count(dev) > hw->unicast_filter_entries) {
/* Switch to promiscuous mode if more than 128 addrs
* are required
*/
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 3f4f3132e16b..e436fa160c7d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -515,6 +515,7 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
if (!enable) {
val |= PPSCMDx(index, 0x5);
+ val |= PPSEN0;
writel(val, ioaddr + MAC_PPS_CONTROL);
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 775db776b3cc..23fecf68f781 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
// Copyright (c) 2017 Synopsys, Inc. and/or its affiliates.
// stmmac Support for 5.xx Ethernet QoS cores
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 99037386080a..9d08a934fe4f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
/*
* Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
* stmmac XGMAC definitions.
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 5031398e612c..070bd7d1ae4c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -224,6 +224,7 @@ static void dwxgmac2_config_cbs(struct mac_device_info *hw,
writel(low_credit, ioaddr + XGMAC_MTL_TCx_LOCREDIT(queue));
value = readl(ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(queue));
+ value &= ~XGMAC_TSA;
value |= XGMAC_CC | XGMAC_CBS;
writel(value, ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(queue));
}
@@ -463,7 +464,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw,
value |= XGMAC_FILTER_HMC;
netdev_for_each_mc_addr(ha, dev) {
- int nr = (bitrev32(~crc32_le(~0, ha->addr, 6)) >>
+ u32 nr = (bitrev32(~crc32_le(~0, ha->addr, 6)) >>
(32 - mcbitslog2));
mc_filter[nr >> 5] |= (1 << (nr & 0x1F));
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
index ae48154f933c..bd5838ce1e8a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
@@ -288,7 +288,8 @@ static int dwxgmac2_get_rx_hash(struct dma_desc *p, u32 *hash,
static int dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len)
{
- *len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL;
+ if (le32_to_cpu(p->des3) & XGMAC_RDES3_L34T)
+ *len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL;
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 965cbe3e6f51..f70ca5300b82 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -369,7 +369,7 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
dma_cap->eee = (hw_cap & XGMAC_HWFEAT_EEESEL) >> 13;
dma_cap->atime_stamp = (hw_cap & XGMAC_HWFEAT_TSSEL) >> 12;
dma_cap->av = (hw_cap & XGMAC_HWFEAT_AVSEL) >> 11;
- dma_cap->av &= !(hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10;
+ dma_cap->av &= !((hw_cap & XGMAC_HWFEAT_RAVSEL) >> 10);
dma_cap->arpoffsel = (hw_cap & XGMAC_HWFEAT_ARPOFFSEL) >> 9;
dma_cap->rmon = (hw_cap & XGMAC_HWFEAT_MMCSEL) >> 8;
dma_cap->pmt_magic_frame = (hw_cap & XGMAC_HWFEAT_MGKSEL) >> 7;
@@ -470,6 +470,7 @@ static void dwxgmac2_enable_tso(void __iomem *ioaddr, bool en, u32 chan)
static void dwxgmac2_qmode(void __iomem *ioaddr, u32 channel, u8 qmode)
{
u32 value = readl(ioaddr + XGMAC_MTL_TXQ_OPMODE(channel));
+ u32 flow = readl(ioaddr + XGMAC_RX_FLOW_CTRL);
value &= ~XGMAC_TXQEN;
if (qmode != MTL_QUEUE_AVB) {
@@ -477,6 +478,7 @@ static void dwxgmac2_qmode(void __iomem *ioaddr, u32 channel, u8 qmode)
writel(0, ioaddr + XGMAC_MTL_TCx_ETS_CONTROL(channel));
} else {
value |= 0x1 << XGMAC_TXQEN_SHIFT;
+ writel(flow & (~XGMAC_RFE), ioaddr + XGMAC_RX_FLOW_CTRL);
}
writel(value, ioaddr + XGMAC_MTL_TXQ_OPMODE(channel));
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index ddb851d99618..9010d881b12e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
// Copyright (c) 2018 Synopsys, Inc. and/or its affiliates.
// stmmac HW Interface Callbacks
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index a223584f5f9a..252cf48c5816 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -176,6 +176,7 @@
#define MMC_XGMAC_RX_PKT_SMD_ERR 0x22c
#define MMC_XGMAC_RX_PKT_ASSEMBLY_OK 0x230
#define MMC_XGMAC_RX_FPE_FRAG 0x234
+#define MMC_XGMAC_RX_IPC_INTR_MASK 0x25c
static void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
{
@@ -333,8 +334,9 @@ static void dwxgmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
static void dwxgmac_mmc_intr_all_mask(void __iomem *mmcaddr)
{
- writel(MMC_DEFAULT_MASK, mmcaddr + MMC_RX_INTR_MASK);
- writel(MMC_DEFAULT_MASK, mmcaddr + MMC_TX_INTR_MASK);
+ writel(0x0, mmcaddr + MMC_RX_INTR_MASK);
+ writel(0x0, mmcaddr + MMC_TX_INTR_MASK);
+ writel(MMC_DEFAULT_MASK, mmcaddr + MMC_XGMAC_RX_IPC_INTR_MASK);
}
static void dwxgmac_read_mmc_reg(void __iomem *addr, u32 reg, u32 *dest)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index c76a1336a451..f826365c979d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2610,7 +2610,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
}
if (priv->hw->pcs)
- stmmac_pcs_ctrl_ane(priv, priv->hw, 1, priv->hw->ps, 0);
+ stmmac_pcs_ctrl_ane(priv, priv->ioaddr, 1, priv->hw->ps, 0);
/* set TX and RX rings length */
stmmac_set_rings_length(priv);
@@ -2995,6 +2995,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
} else {
stmmac_set_desc_addr(priv, first, des);
tmp_pay_len = pay_len;
+ des += proto_hdr_len;
+ pay_len = 0;
}
stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue);
@@ -3022,6 +3024,19 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
/* Only the last descriptor gets to point to the skb. */
tx_q->tx_skbuff[tx_q->cur_tx] = skb;
+ /* Manage tx mitigation */
+ tx_q->tx_count_frames += nfrags + 1;
+ if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+ !((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ priv->hwts_tx_en)) {
+ stmmac_tx_timer_arm(priv, queue);
+ } else {
+ desc = &tx_q->dma_tx[tx_q->cur_tx];
+ tx_q->tx_count_frames = 0;
+ stmmac_set_tx_ic(priv, desc);
+ priv->xstats.tx_set_ic_bit++;
+ }
+
/* We've used all descriptors we need for this skb, however,
* advance cur_tx so that it references a fresh descriptor.
* ndo_start_xmit will fill this descriptor the next time it's
@@ -3039,19 +3054,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
priv->xstats.tx_tso_frames++;
priv->xstats.tx_tso_nfrags += nfrags;
- /* Manage tx mitigation */
- tx_q->tx_count_frames += nfrags + 1;
- if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
- !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
- (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
- priv->hwts_tx_en)) {
- stmmac_tx_timer_arm(priv, queue);
- } else {
- tx_q->tx_count_frames = 0;
- stmmac_set_tx_ic(priv, desc);
- priv->xstats.tx_set_ic_bit++;
- }
-
if (priv->sarc_type)
stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -3223,6 +3225,27 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
/* Only the last descriptor gets to point to the skb. */
tx_q->tx_skbuff[entry] = skb;
+ /* According to the coalesce parameter the IC bit for the latest
+ * segment is reset and the timer re-started to clean the tx status.
+ * This approach takes care about the fragments: desc is the first
+ * element in case of no SG.
+ */
+ tx_q->tx_count_frames += nfrags + 1;
+ if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
+ !((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ priv->hwts_tx_en)) {
+ stmmac_tx_timer_arm(priv, queue);
+ } else {
+ if (likely(priv->extend_desc))
+ desc = &tx_q->dma_etx[entry].basic;
+ else
+ desc = &tx_q->dma_tx[entry];
+
+ tx_q->tx_count_frames = 0;
+ stmmac_set_tx_ic(priv, desc);
+ priv->xstats.tx_set_ic_bit++;
+ }
+
/* We've used all descriptors we need for this skb, however,
* advance cur_tx so that it references a fresh descriptor.
* ndo_start_xmit will fill this descriptor the next time it's
@@ -3258,23 +3281,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
dev->stats.tx_bytes += skb->len;
- /* According to the coalesce parameter the IC bit for the latest
- * segment is reset and the timer re-started to clean the tx status.
- * This approach takes care about the fragments: desc is the first
- * element in case of no SG.
- */
- tx_q->tx_count_frames += nfrags + 1;
- if (likely(priv->tx_coal_frames > tx_q->tx_count_frames) &&
- !(priv->synopsys_id >= DWMAC_CORE_4_00 &&
- (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
- priv->hwts_tx_en)) {
- stmmac_tx_timer_arm(priv, queue);
- } else {
- tx_q->tx_count_frames = 0;
- stmmac_set_tx_ic(priv, desc);
- priv->xstats.tx_set_ic_bit++;
- }
-
if (priv->sarc_type)
stmmac_set_desc_sarc(priv, first, priv->sarc_type);
@@ -3505,8 +3511,6 @@ read_again:
if (unlikely(status & dma_own))
break;
- count++;
-
rx_q->cur_rx = STMMAC_GET_ENTRY(rx_q->cur_rx, DMA_RX_SIZE);
next_entry = rx_q->cur_rx;
@@ -3533,6 +3537,7 @@ read_again:
goto read_again;
if (unlikely(error)) {
dev_kfree_skb(skb);
+ count++;
continue;
}
@@ -3572,6 +3577,7 @@ read_again:
skb = napi_alloc_skb(&ch->rx_napi, len);
if (!skb) {
priv->dev->stats.rx_dropped++;
+ count++;
continue;
}
@@ -3637,6 +3643,7 @@ read_again:
priv->dev->stats.rx_packets++;
priv->dev->stats.rx_bytes += len;
+ count++;
}
if (status & rx_not_ls) {
@@ -4742,8 +4749,10 @@ int stmmac_suspend(struct device *dev)
stmmac_mac_set(priv, priv->ioaddr, false);
pinctrl_pm_select_sleep_state(priv->device);
/* Disable clock in case of PWM is off */
- clk_disable(priv->plat->pclk);
- clk_disable(priv->plat->stmmac_clk);
+ if (priv->plat->clk_ptp_ref)
+ clk_disable_unprepare(priv->plat->clk_ptp_ref);
+ clk_disable_unprepare(priv->plat->pclk);
+ clk_disable_unprepare(priv->plat->stmmac_clk);
}
mutex_unlock(&priv->lock);
@@ -4806,8 +4815,10 @@ int stmmac_resume(struct device *dev)
} else {
pinctrl_pm_select_default_state(priv->device);
/* enable the clk previously disabled */
- clk_enable(priv->plat->stmmac_clk);
- clk_enable(priv->plat->pclk);
+ clk_prepare_enable(priv->plat->stmmac_clk);
+ clk_prepare_enable(priv->plat->pclk);
+ if (priv->plat->clk_ptp_ref)
+ clk_prepare_enable(priv->plat->clk_ptp_ref);
/* reset the phy so that it's ready */
if (priv->mii)
stmmac_mdio_reset(priv->mii);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 173493db038c..0989e2bb6ee3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -140,6 +140,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
+ /* Reject requests with unsupported flags */
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
+
cfg = &priv->pps[rq->perout.index];
cfg->start.tv_sec = rq->perout.start.sec;
@@ -164,7 +168,7 @@ static int stmmac_enable(struct ptp_clock_info *ptp,
/* structure describing a PTP hardware clock */
static struct ptp_clock_info stmmac_ptp_clock_ops = {
.owner = THIS_MODULE,
- .name = "stmmac_ptp_clock",
+ .name = "stmmac ptp",
.max_adj = 62500000,
.n_alarm = 0,
.n_ext_ts = 0,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index cc76a42c7466..ac3f658105c0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -6,7 +6,9 @@
* Author: Jose Abreu <joabreu@synopsys.com>
*/
+#include <linux/bitrev.h>
#include <linux/completion.h>
+#include <linux/crc32.h>
#include <linux/ethtool.h>
#include <linux/ip.h>
#include <linux/phy.h>
@@ -485,17 +487,66 @@ static int stmmac_filter_check(struct stmmac_priv *priv)
return -EOPNOTSUPP;
}
+static bool stmmac_hash_check(struct stmmac_priv *priv, unsigned char *addr)
+{
+ int mc_offset = 32 - priv->hw->mcast_bits_log2;
+ struct netdev_hw_addr *ha;
+ u32 hash, hash_nr;
+
+ /* First compute the hash for desired addr */
+ hash = bitrev32(~crc32_le(~0, addr, 6)) >> mc_offset;
+ hash_nr = hash >> 5;
+ hash = 1 << (hash & 0x1f);
+
+ /* Now, check if it collides with any existing one */
+ netdev_for_each_mc_addr(ha, priv->dev) {
+ u32 nr = bitrev32(~crc32_le(~0, ha->addr, ETH_ALEN)) >> mc_offset;
+ if (((nr >> 5) == hash_nr) && ((1 << (nr & 0x1f)) == hash))
+ return false;
+ }
+
+ /* No collisions, address is good to go */
+ return true;
+}
+
+static bool stmmac_perfect_check(struct stmmac_priv *priv, unsigned char *addr)
+{
+ struct netdev_hw_addr *ha;
+
+ /* Check if it collides with any existing one */
+ netdev_for_each_uc_addr(ha, priv->dev) {
+ if (!memcmp(ha->addr, addr, ETH_ALEN))
+ return false;
+ }
+
+ /* No collisions, address is good to go */
+ return true;
+}
+
static int stmmac_test_hfilt(struct stmmac_priv *priv)
{
- unsigned char gd_addr[ETH_ALEN] = {0x01, 0x00, 0xcc, 0xcc, 0xdd, 0xdd};
- unsigned char bd_addr[ETH_ALEN] = {0x09, 0x00, 0xaa, 0xaa, 0xbb, 0xbb};
+ unsigned char gd_addr[ETH_ALEN] = {0xf1, 0xee, 0xdd, 0xcc, 0xbb, 0xaa};
+ unsigned char bd_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff};
struct stmmac_packet_attrs attr = { };
- int ret;
+ int ret, tries = 256;
ret = stmmac_filter_check(priv);
if (ret)
return ret;
+ if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins)
+ return -EOPNOTSUPP;
+
+ while (--tries) {
+ /* We only need to check the bd_addr for collisions */
+ bd_addr[ETH_ALEN - 1] = tries;
+ if (stmmac_hash_check(priv, bd_addr))
+ break;
+ }
+
+ if (!tries)
+ return -EOPNOTSUPP;
+
ret = dev_mc_add(priv->dev, gd_addr);
if (ret)
return ret;
@@ -520,13 +571,25 @@ cleanup:
static int stmmac_test_pfilt(struct stmmac_priv *priv)
{
- unsigned char gd_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
- unsigned char bd_addr[ETH_ALEN] = {0x08, 0x00, 0x22, 0x33, 0x44, 0x55};
+ unsigned char gd_addr[ETH_ALEN] = {0xf0, 0x01, 0x44, 0x55, 0x66, 0x77};
+ unsigned char bd_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff};
struct stmmac_packet_attrs attr = { };
- int ret;
+ int ret, tries = 256;
if (stmmac_filter_check(priv))
return -EOPNOTSUPP;
+ if (netdev_uc_count(priv->dev) >= priv->hw->unicast_filter_entries)
+ return -EOPNOTSUPP;
+
+ while (--tries) {
+ /* We only need to check the bd_addr for collisions */
+ bd_addr[ETH_ALEN - 1] = tries;
+ if (stmmac_perfect_check(priv, bd_addr))
+ break;
+ }
+
+ if (!tries)
+ return -EOPNOTSUPP;
ret = dev_uc_add(priv->dev, gd_addr);
if (ret)
@@ -550,37 +613,31 @@ cleanup:
return ret;
}
-static int stmmac_dummy_sync(struct net_device *netdev, const u8 *addr)
-{
- return 0;
-}
-
-static void stmmac_test_set_rx_mode(struct net_device *netdev)
-{
- /* As we are in test mode of ethtool we already own the rtnl lock
- * so no address will change from user. We can just call the
- * ndo_set_rx_mode() callback directly */
- if (netdev->netdev_ops->ndo_set_rx_mode)
- netdev->netdev_ops->ndo_set_rx_mode(netdev);
-}
-
static int stmmac_test_mcfilt(struct stmmac_priv *priv)
{
- unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
- unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77};
+ unsigned char uc_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff};
+ unsigned char mc_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff};
struct stmmac_packet_attrs attr = { };
- int ret;
+ int ret, tries = 256;
if (stmmac_filter_check(priv))
return -EOPNOTSUPP;
+ if (netdev_uc_count(priv->dev) >= priv->hw->unicast_filter_entries)
+ return -EOPNOTSUPP;
+
+ while (--tries) {
+ /* We only need to check the mc_addr for collisions */
+ mc_addr[ETH_ALEN - 1] = tries;
+ if (stmmac_hash_check(priv, mc_addr))
+ break;
+ }
- /* Remove all MC addresses */
- __dev_mc_unsync(priv->dev, NULL);
- stmmac_test_set_rx_mode(priv->dev);
+ if (!tries)
+ return -EOPNOTSUPP;
ret = dev_uc_add(priv->dev, uc_addr);
if (ret)
- goto cleanup;
+ return ret;
attr.dst = uc_addr;
@@ -597,28 +654,34 @@ static int stmmac_test_mcfilt(struct stmmac_priv *priv)
cleanup:
dev_uc_del(priv->dev, uc_addr);
- __dev_mc_sync(priv->dev, stmmac_dummy_sync, NULL);
- stmmac_test_set_rx_mode(priv->dev);
return ret;
}
static int stmmac_test_ucfilt(struct stmmac_priv *priv)
{
- unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
- unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77};
+ unsigned char uc_addr[ETH_ALEN] = {0xf0, 0xff, 0xff, 0xff, 0xff, 0xff};
+ unsigned char mc_addr[ETH_ALEN] = {0xf1, 0xff, 0xff, 0xff, 0xff, 0xff};
struct stmmac_packet_attrs attr = { };
- int ret;
+ int ret, tries = 256;
if (stmmac_filter_check(priv))
return -EOPNOTSUPP;
+ if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins)
+ return -EOPNOTSUPP;
- /* Remove all UC addresses */
- __dev_uc_unsync(priv->dev, NULL);
- stmmac_test_set_rx_mode(priv->dev);
+ while (--tries) {
+ /* We only need to check the uc_addr for collisions */
+ uc_addr[ETH_ALEN - 1] = tries;
+ if (stmmac_perfect_check(priv, uc_addr))
+ break;
+ }
+
+ if (!tries)
+ return -EOPNOTSUPP;
ret = dev_mc_add(priv->dev, mc_addr);
if (ret)
- goto cleanup;
+ return ret;
attr.dst = mc_addr;
@@ -635,8 +698,6 @@ static int stmmac_test_ucfilt(struct stmmac_priv *priv)
cleanup:
dev_mc_del(priv->dev, mc_addr);
- __dev_uc_sync(priv->dev, stmmac_dummy_sync, NULL);
- stmmac_test_set_rx_mode(priv->dev);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index e231098061b6..f9a9a9d82233 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -510,7 +510,7 @@ static struct stmmac_flow_entry *tc_find_flow(struct stmmac_priv *priv,
return NULL;
}
-struct {
+static struct {
int (*fn)(struct stmmac_priv *priv, struct flow_cls_offload *cls,
struct stmmac_flow_entry *entry);
} tc_flow_parsers[] = {
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index a65edd2770e6..37ba708ac781 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -722,7 +722,7 @@ static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
* cpdma_chan_split_pool - Splits ctrl pool between all channels.
* Has to be called under ctlr lock
*/
-int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
+static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
{
int tx_per_ch_desc = 0, rx_per_ch_desc = 0;
int free_rx_num = 0, free_tx_num = 0;
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index bbbc1dcb6ab5..b517c1af9de0 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1237,8 +1237,17 @@ static int fjes_probe(struct platform_device *plat_dev)
adapter->open_guard = false;
adapter->txrx_wq = alloc_workqueue(DRV_NAME "/txrx", WQ_MEM_RECLAIM, 0);
+ if (unlikely(!adapter->txrx_wq)) {
+ err = -ENOMEM;
+ goto err_free_netdev;
+ }
+
adapter->control_wq = alloc_workqueue(DRV_NAME "/control",
WQ_MEM_RECLAIM, 0);
+ if (unlikely(!adapter->control_wq)) {
+ err = -ENOMEM;
+ goto err_free_txrx_wq;
+ }
INIT_WORK(&adapter->tx_stall_task, fjes_tx_stall_task);
INIT_WORK(&adapter->raise_intr_rxdata_task,
@@ -1255,7 +1264,7 @@ static int fjes_probe(struct platform_device *plat_dev)
hw->hw_res.irq = platform_get_irq(plat_dev, 0);
err = fjes_hw_init(&adapter->hw);
if (err)
- goto err_free_netdev;
+ goto err_free_control_wq;
/* setup MAC address (02:00:00:00:00:[epid])*/
netdev->dev_addr[0] = 2;
@@ -1277,6 +1286,10 @@ static int fjes_probe(struct platform_device *plat_dev)
err_hw_exit:
fjes_hw_exit(&adapter->hw);
+err_free_control_wq:
+ destroy_workqueue(adapter->control_wq);
+err_free_txrx_wq:
+ destroy_workqueue(adapter->txrx_wq);
err_free_netdev:
free_netdev(netdev);
err_out:
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index fbec711ff514..fbea6f232819 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -107,27 +107,6 @@ struct bpqdev {
static LIST_HEAD(bpq_devices);
-/*
- * bpqether network devices are paired with ethernet devices below them, so
- * form a special "super class" of normal ethernet devices; split their locks
- * off into a separate class since they always nest.
- */
-static struct lock_class_key bpq_netdev_xmit_lock_key;
-static struct lock_class_key bpq_netdev_addr_lock_key;
-
-static void bpq_set_lockdep_class_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock, &bpq_netdev_xmit_lock_key);
-}
-
-static void bpq_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock, &bpq_netdev_addr_lock_key);
- netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL);
-}
-
/* ------------------------------------------------------------------------ */
@@ -498,7 +477,6 @@ static int bpq_new_device(struct net_device *edev)
err = register_netdevice(ndev);
if (err)
goto error;
- bpq_set_lockdep_class(ndev);
/* List protected by RTNL */
list_add_rcu(&bpq->bpq_list, &bpq_devices);
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 670ef682f268..fb547f37af1e 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -609,7 +609,8 @@ struct nvsp_5_send_indirect_table {
/* The number of entries in the send indirection table */
u32 count;
- /* The offset of the send indirection table from top of this struct.
+ /* The offset of the send indirection table from the beginning of
+ * struct nvsp_message.
* The send indirection table tells which channel to put the send
* traffic on. Each entry is a channel number.
*/
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index d22a36fc7a7c..eab83e71567a 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -1178,20 +1178,39 @@ static int netvsc_receive(struct net_device *ndev,
}
static void netvsc_send_table(struct net_device *ndev,
- const struct nvsp_message *nvmsg)
+ struct netvsc_device *nvscdev,
+ const struct nvsp_message *nvmsg,
+ u32 msglen)
{
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- u32 count, *tab;
+ u32 count, offset, *tab;
int i;
count = nvmsg->msg.v5_msg.send_table.count;
+ offset = nvmsg->msg.v5_msg.send_table.offset;
+
if (count != VRSS_SEND_TAB_SIZE) {
netdev_err(ndev, "Received wrong send-table size:%u\n", count);
return;
}
- tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
- nvmsg->msg.v5_msg.send_table.offset);
+ /* If negotiated version <= NVSP_PROTOCOL_VERSION_6, the offset may be
+ * wrong due to a host bug. So fix the offset here.
+ */
+ if (nvscdev->nvsp_version <= NVSP_PROTOCOL_VERSION_6 &&
+ msglen >= sizeof(struct nvsp_message_header) +
+ sizeof(union nvsp_6_message_uber) + count * sizeof(u32))
+ offset = sizeof(struct nvsp_message_header) +
+ sizeof(union nvsp_6_message_uber);
+
+ /* Boundary check for all versions */
+ if (offset > msglen - count * sizeof(u32)) {
+ netdev_err(ndev, "Received send-table offset too big:%u\n",
+ offset);
+ return;
+ }
+
+ tab = (void *)nvmsg + offset;
for (i = 0; i < count; i++)
net_device_ctx->tx_table[i] = tab[i];
@@ -1209,12 +1228,14 @@ static void netvsc_send_vf(struct net_device *ndev,
net_device_ctx->vf_alloc ? "added" : "removed");
}
-static void netvsc_receive_inband(struct net_device *ndev,
- const struct nvsp_message *nvmsg)
+static void netvsc_receive_inband(struct net_device *ndev,
+ struct netvsc_device *nvscdev,
+ const struct nvsp_message *nvmsg,
+ u32 msglen)
{
switch (nvmsg->hdr.msg_type) {
case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
- netvsc_send_table(ndev, nvmsg);
+ netvsc_send_table(ndev, nvscdev, nvmsg, msglen);
break;
case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
@@ -1232,6 +1253,7 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
{
struct vmbus_channel *channel = nvchan->channel;
const struct nvsp_message *nvmsg = hv_pkt_data(desc);
+ u32 msglen = hv_pkt_datalen(desc);
trace_nvsp_recv(ndev, channel, nvmsg);
@@ -1247,7 +1269,7 @@ static int netvsc_process_raw_pkt(struct hv_device *device,
break;
case VM_PKT_DATA_INBAND:
- netvsc_receive_inband(ndev, nvmsg);
+ netvsc_receive_inband(ndev, net_device, nvmsg, msglen);
break;
default:
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 39dddcd8b3cb..963509add611 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -982,7 +982,7 @@ static int netvsc_attach(struct net_device *ndev,
if (netif_running(ndev)) {
ret = rndis_filter_open(nvdev);
if (ret)
- return ret;
+ goto err;
rdev = nvdev->extension;
if (!rdev->link_state)
@@ -990,6 +990,13 @@ static int netvsc_attach(struct net_device *ndev,
}
return 0;
+
+err:
+ netif_device_detach(ndev);
+
+ rndis_filter_device_remove(hdev, nvdev);
+
+ return ret;
}
static int netvsc_set_channels(struct net_device *net,
@@ -1807,8 +1814,10 @@ static int netvsc_set_features(struct net_device *ndev,
ret = rndis_filter_set_offload_params(ndev, nvdev, &offloads);
- if (ret)
+ if (ret) {
features ^= NETIF_F_LRO;
+ ndev->features = features;
+ }
syncvf:
if (!vf_netdev)
@@ -2335,8 +2344,6 @@ static int netvsc_probe(struct hv_device *dev,
NETIF_F_HW_VLAN_CTAG_RX;
net->vlan_features = net->features;
- netdev_lockdep_set_classes(net);
-
/* MTU range: 68 - 1500 or 65521 */
net->min_mtu = NETVSC_MTU_MIN;
if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 887bbba4631e..ba3dfac1d904 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -131,8 +131,6 @@ static int ipvlan_init(struct net_device *dev)
dev->gso_max_segs = phy_dev->gso_max_segs;
dev->hard_header_len = phy_dev->hard_header_len;
- netdev_lockdep_set_classes(dev);
-
ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats);
if (!ipvlan->pcpu_stats)
return -ENOMEM;
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index cb7637364b40..afd8b2a08245 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -267,7 +267,6 @@ struct macsec_dev {
struct pcpu_secy_stats __percpu *stats;
struct list_head secys;
struct gro_cells gro_cells;
- unsigned int nest_level;
};
/**
@@ -2750,7 +2749,6 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
#define MACSEC_FEATURES \
(NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST)
-static struct lock_class_key macsec_netdev_addr_lock_key;
static int macsec_dev_init(struct net_device *dev)
{
@@ -2958,11 +2956,6 @@ static int macsec_get_iflink(const struct net_device *dev)
return macsec_priv(dev)->real_dev->ifindex;
}
-static int macsec_get_nest_level(struct net_device *dev)
-{
- return macsec_priv(dev)->nest_level;
-}
-
static const struct net_device_ops macsec_netdev_ops = {
.ndo_init = macsec_dev_init,
.ndo_uninit = macsec_dev_uninit,
@@ -2976,7 +2969,6 @@ static const struct net_device_ops macsec_netdev_ops = {
.ndo_start_xmit = macsec_start_xmit,
.ndo_get_stats64 = macsec_get_stats64,
.ndo_get_iflink = macsec_get_iflink,
- .ndo_get_lock_subclass = macsec_get_nest_level,
};
static const struct device_type macsec_type = {
@@ -3001,12 +2993,10 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = {
static void macsec_free_netdev(struct net_device *dev)
{
struct macsec_dev *macsec = macsec_priv(dev);
- struct net_device *real_dev = macsec->real_dev;
free_percpu(macsec->stats);
free_percpu(macsec->secy.tx_sc.stats);
- dev_put(real_dev);
}
static void macsec_setup(struct net_device *dev)
@@ -3261,14 +3251,6 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
if (err < 0)
return err;
- dev_hold(real_dev);
-
- macsec->nest_level = dev_get_nest_level(real_dev) + 1;
- netdev_lockdep_set_classes(dev);
- lockdep_set_class_and_subclass(&dev->addr_list_lock,
- &macsec_netdev_addr_lock_key,
- macsec_get_nest_level(dev));
-
err = netdev_upper_dev_link(real_dev, dev, extack);
if (err < 0)
goto unregister;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 940192c057b6..34fc59bd1e20 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -852,8 +852,6 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
* "super class" of normal network devices; split their locks off into a
* separate class since they always nest.
*/
-static struct lock_class_key macvlan_netdev_addr_lock_key;
-
#define ALWAYS_ON_OFFLOADS \
(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE | \
NETIF_F_GSO_ROBUST | NETIF_F_GSO_ENCAP_ALL)
@@ -869,19 +867,6 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
#define MACVLAN_STATE_MASK \
((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
-static int macvlan_get_nest_level(struct net_device *dev)
-{
- return ((struct macvlan_dev *)netdev_priv(dev))->nest_level;
-}
-
-static void macvlan_set_lockdep_class(struct net_device *dev)
-{
- netdev_lockdep_set_classes(dev);
- lockdep_set_class_and_subclass(&dev->addr_list_lock,
- &macvlan_netdev_addr_lock_key,
- macvlan_get_nest_level(dev));
-}
-
static int macvlan_init(struct net_device *dev)
{
struct macvlan_dev *vlan = netdev_priv(dev);
@@ -900,8 +885,6 @@ static int macvlan_init(struct net_device *dev)
dev->gso_max_segs = lowerdev->gso_max_segs;
dev->hard_header_len = lowerdev->hard_header_len;
- macvlan_set_lockdep_class(dev);
-
vlan->pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
if (!vlan->pcpu_stats)
return -ENOMEM;
@@ -1161,7 +1144,6 @@ static const struct net_device_ops macvlan_netdev_ops = {
.ndo_fdb_add = macvlan_fdb_add,
.ndo_fdb_del = macvlan_fdb_del,
.ndo_fdb_dump = ndo_dflt_fdb_dump,
- .ndo_get_lock_subclass = macvlan_get_nest_level,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = macvlan_dev_poll_controller,
.ndo_netpoll_setup = macvlan_dev_netpoll_setup,
@@ -1445,7 +1427,6 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
vlan->dev = dev;
vlan->port = port;
vlan->set_features = MACVLAN_FEATURES;
- vlan->nest_level = dev_get_nest_level(lowerdev) + 1;
vlan->mode = MACVLAN_MODE_VEPA;
if (data && data[IFLA_MACVLAN_MODE])
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 56576d4f34a5..44c2d857a7fa 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -708,6 +708,7 @@ nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count)
goto err_debugfs_exit;
devlink_params_publish(devlink);
+ devlink_reload_enable(devlink);
return nsim_dev;
err_debugfs_exit:
@@ -732,6 +733,7 @@ static void nsim_dev_destroy(struct nsim_dev *nsim_dev)
{
struct devlink *devlink = priv_to_devlink(nsim_dev);
+ devlink_reload_disable(devlink);
nsim_bpf_dev_exit(nsim_dev);
nsim_dev_debugfs_exit(nsim_dev);
nsim_dev_traps_exit(devlink);
@@ -806,9 +808,11 @@ static void nsim_dev_port_del_all(struct nsim_dev *nsim_dev)
{
struct nsim_dev_port *nsim_dev_port, *tmp;
+ mutex_lock(&nsim_dev->port_list_lock);
list_for_each_entry_safe(nsim_dev_port, tmp,
&nsim_dev->port_list, list)
__nsim_dev_port_del(nsim_dev_port);
+ mutex_unlock(&nsim_dev->port_list_lock);
}
int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
@@ -822,14 +826,17 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev)
return PTR_ERR(nsim_dev);
dev_set_drvdata(&nsim_bus_dev->dev, nsim_dev);
+ mutex_lock(&nsim_dev->port_list_lock);
for (i = 0; i < nsim_bus_dev->port_count; i++) {
err = __nsim_dev_port_add(nsim_dev, i);
if (err)
goto err_port_del_all;
}
+ mutex_unlock(&nsim_dev->port_list_lock);
return 0;
err_port_del_all:
+ mutex_unlock(&nsim_dev->port_list_lock);
nsim_dev_port_del_all(nsim_dev);
nsim_dev_destroy(nsim_dev);
return err;
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index f61d094746c0..1a251f76d09b 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -241,8 +241,8 @@ static struct pernet_operations nsim_fib_net_ops = {
void nsim_fib_exit(void)
{
- unregister_pernet_subsys(&nsim_fib_net_ops);
unregister_fib_notifier(&nsim_fib_nb);
+ unregister_pernet_subsys(&nsim_fib_net_ops);
}
int nsim_fib_init(void)
@@ -258,6 +258,7 @@ int nsim_fib_init(void)
err = register_fib_notifier(&nsim_fib_nb, nsim_fib_dump_inconsistent);
if (err < 0) {
pr_err("Failed to register fib notifier\n");
+ unregister_pernet_subsys(&nsim_fib_net_ops);
goto err_out;
}
diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 8fc33867e524..af8eabe7a6d4 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -572,6 +572,7 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev)
.name = _name, \
/* PHY_BASIC_FEATURES */ \
.flags = PHY_IS_INTERNAL, \
+ .soft_reset = genphy_soft_reset, \
.config_init = bcm7xxx_config_init, \
.suspend = bcm7xxx_suspend, \
.resume = bcm7xxx_config_init, \
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 6580094161a9..8f241b57fcf6 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -469,6 +469,19 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_EXTTS:
+ /* Reject requests with unsupported flags */
+ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
+ PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS))
+ return -EOPNOTSUPP;
+
+ /* Reject requests to enable time stamping on both edges. */
+ if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
+ (rq->extts.flags & PTP_ENABLE_FEATURE) &&
+ (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES)
+ return -EOPNOTSUPP;
+
index = rq->extts.index;
if (index >= N_EXT_TS)
return -EINVAL;
@@ -491,6 +504,9 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
return 0;
case PTP_CLK_REQ_PEROUT:
+ /* Reject requests with unsupported flags */
+ if (rq->perout.flags)
+ return -EOPNOTSUPP;
if (rq->perout.index >= N_PER_OUT)
return -EINVAL;
return periodic_output(clock, rq, on, rq->perout.index);
diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c
index 58d6504495e0..f798de3276dc 100644
--- a/drivers/net/phy/mdio-sun4i.c
+++ b/drivers/net/phy/mdio-sun4i.c
@@ -145,8 +145,11 @@ err_out_free_mdiobus:
static int sun4i_mdio_remove(struct platform_device *pdev)
{
struct mii_bus *bus = platform_get_drvdata(pdev);
+ struct sun4i_mdio_data *data = bus->priv;
mdiobus_unregister(bus);
+ if (data->regulator)
+ regulator_disable(data->regulator);
mdiobus_free(bus);
return 0;
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 2e29ab841b4d..dbacb0031877 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -64,11 +64,12 @@ static int mdiobus_register_reset(struct mdio_device *mdiodev)
if (mdiodev->dev.of_node)
reset = devm_reset_control_get_exclusive(&mdiodev->dev,
"phy");
- if (PTR_ERR(reset) == -ENOENT ||
- PTR_ERR(reset) == -ENOTSUPP)
- reset = NULL;
- else if (IS_ERR(reset))
- return PTR_ERR(reset);
+ if (IS_ERR(reset)) {
+ if (PTR_ERR(reset) == -ENOENT || PTR_ERR(reset) == -ENOTSUPP)
+ reset = NULL;
+ else
+ return PTR_ERR(reset);
+ }
mdiodev->reset_ctrl = reset;
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 2fea5541c35a..63dedec0433d 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -341,6 +341,35 @@ static int ksz8041_config_aneg(struct phy_device *phydev)
return genphy_config_aneg(phydev);
}
+static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev,
+ const u32 ksz_phy_id)
+{
+ int ret;
+
+ if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id)
+ return 0;
+
+ ret = phy_read(phydev, MII_BMSR);
+ if (ret < 0)
+ return ret;
+
+ /* KSZ8051 PHY and KSZ8794/KSZ8795/KSZ8765 switch share the same
+ * exact PHY ID. However, they can be told apart by the extended
+ * capability registers presence. The KSZ8051 PHY has them while
+ * the switch does not.
+ */
+ ret &= BMSR_ERCAP;
+ if (ksz_phy_id == PHY_ID_KSZ8051)
+ return ret;
+ else
+ return !ret;
+}
+
+static int ksz8051_match_phy_device(struct phy_device *phydev)
+{
+ return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051);
+}
+
static int ksz8081_config_init(struct phy_device *phydev)
{
/* KSZPHY_OMSO_FACTORY_TEST is set at de-assertion of the reset line
@@ -364,6 +393,11 @@ static int ksz8061_config_init(struct phy_device *phydev)
return kszphy_config_init(phydev);
}
+static int ksz8795_match_phy_device(struct phy_device *phydev)
+{
+ return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX);
+}
+
static int ksz9021_load_values_from_of(struct phy_device *phydev,
const struct device_node *of_node,
u16 reg,
@@ -1017,8 +1051,6 @@ static struct phy_driver ksphy_driver[] = {
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
- .phy_id = PHY_ID_KSZ8051,
- .phy_id_mask = MICREL_PHY_ID_MASK,
.name = "Micrel KSZ8051",
/* PHY_BASIC_FEATURES */
.driver_data = &ksz8051_type,
@@ -1029,6 +1061,7 @@ static struct phy_driver ksphy_driver[] = {
.get_sset_count = kszphy_get_sset_count,
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
+ .match_phy_device = ksz8051_match_phy_device,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
@@ -1141,13 +1174,12 @@ static struct phy_driver ksphy_driver[] = {
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
- .phy_id = PHY_ID_KSZ8795,
- .phy_id_mask = MICREL_PHY_ID_MASK,
- .name = "Micrel KSZ8795",
+ .name = "Micrel KSZ87XX Switch",
/* PHY_BASIC_FEATURES */
.config_init = kszphy_config_init,
.config_aneg = ksz8873mll_config_aneg,
.read_status = ksz8873mll_read_status,
+ .match_phy_device = ksz8795_match_phy_device,
.suspend = genphy_suspend,
.resume = genphy_resume,
}, {
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 7935593debb1..a1caeee12236 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -323,6 +323,8 @@ int genphy_c45_read_pma(struct phy_device *phydev)
{
int val;
+ linkmode_zero(phydev->lp_advertising);
+
val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1);
if (val < 0)
return val;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 119e6f466056..105d389b58e7 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -572,9 +572,6 @@ int phy_start_aneg(struct phy_device *phydev)
if (AUTONEG_DISABLE == phydev->autoneg)
phy_sanitize_settings(phydev);
- /* Invalidate LP advertising flags */
- linkmode_zero(phydev->lp_advertising);
-
err = phy_config_aneg(phydev);
if (err < 0)
goto out_unlock;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 9d2bbb13293e..adb66a2fae18 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1787,7 +1787,14 @@ int genphy_read_lpa(struct phy_device *phydev)
{
int lpa, lpagb;
- if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
+ if (phydev->autoneg == AUTONEG_ENABLE) {
+ if (!phydev->autoneg_complete) {
+ mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising,
+ 0);
+ mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, 0);
+ return 0;
+ }
+
if (phydev->is_gigabit_capable) {
lpagb = phy_read(phydev, MII_STAT1000);
if (lpagb < 0)
@@ -1815,6 +1822,8 @@ int genphy_read_lpa(struct phy_device *phydev)
return lpa;
mii_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa);
+ } else {
+ linkmode_zero(phydev->lp_advertising);
}
return 0;
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index a5a57ca94c1a..536236fdb232 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -87,8 +87,24 @@ struct phylink {
phylink_printk(KERN_WARNING, pl, fmt, ##__VA_ARGS__)
#define phylink_info(pl, fmt, ...) \
phylink_printk(KERN_INFO, pl, fmt, ##__VA_ARGS__)
+#if defined(CONFIG_DYNAMIC_DEBUG)
#define phylink_dbg(pl, fmt, ...) \
+do { \
+ if ((pl)->config->type == PHYLINK_NETDEV) \
+ netdev_dbg((pl)->netdev, fmt, ##__VA_ARGS__); \
+ else if ((pl)->config->type == PHYLINK_DEV) \
+ dev_dbg((pl)->dev, fmt, ##__VA_ARGS__); \
+} while (0)
+#elif defined(DEBUG)
+#define phylink_dbg(pl, fmt, ...) \
phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__)
+#else
+#define phylink_dbg(pl, fmt, ...) \
+({ \
+ if (0) \
+ phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__); \
+})
+#endif
/**
* phylink_set_port_modes() - set the port type modes in the ethtool mask
@@ -576,7 +592,7 @@ static int phylink_register_sfp(struct phylink *pl,
/**
* phylink_create() - create a phylink instance
- * @ndev: a pointer to the &struct net_device
+ * @config: a pointer to the target &struct phylink_config
* @fwnode: a pointer to a &struct fwnode_handle describing the network
* interface
* @iface: the desired link mode defined by &typedef phy_interface_t
@@ -585,6 +601,8 @@ static int phylink_register_sfp(struct phylink *pl,
* Create a new phylink instance, and parse the link parameters found in @np.
* This will parse in-band modes, fixed-link or SFP configuration.
*
+ * Note: the rtnl lock must not be held when calling this function.
+ *
* Returns a pointer to a &struct phylink, or an error-pointer value. Users
* must use IS_ERR() to check for errors from this function.
*/
@@ -662,6 +680,8 @@ EXPORT_SYMBOL_GPL(phylink_create);
*
* Destroy a phylink instance. Any PHY that has been attached must have been
* cleaned up via phylink_disconnect_phy() prior to calling this function.
+ *
+ * Note: the rtnl lock must not be held when calling this function.
*/
void phylink_destroy(struct phylink *pl)
{
@@ -1238,7 +1258,13 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
pl->link_config.duplex = our_kset.base.duplex;
pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE;
- if (!test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) {
+ /* If we have a PHY, phylib will call our link state function if the
+ * mode has changed, which will trigger a resolve and update the MAC
+ * configuration. For a fixed link, this isn't able to change any
+ * parameters, which just leaves inband mode.
+ */
+ if (pl->link_an_mode == MLO_AN_INBAND &&
+ !test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) {
phylink_mac_config(pl, &pl->link_config);
phylink_mac_an_restart(pl);
}
@@ -1318,15 +1344,16 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl,
if (pause->tx_pause)
config->pause |= MLO_PAUSE_TX;
- if (!test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) {
+ /* If we have a PHY, phylib will call our link state function if the
+ * mode has changed, which will trigger a resolve and update the MAC
+ * configuration.
+ */
+ if (pl->phydev) {
+ phy_set_asym_pause(pl->phydev, pause->rx_pause,
+ pause->tx_pause);
+ } else if (!test_bit(PHYLINK_DISABLE_STOPPED,
+ &pl->phylink_disable_state)) {
switch (pl->link_an_mode) {
- case MLO_AN_PHY:
- /* Silently mark the carrier down, and then trigger a resolve */
- if (pl->netdev)
- netif_carrier_off(pl->netdev);
- phylink_run_resolve(pl);
- break;
-
case MLO_AN_FIXED:
/* Should we allow fixed links to change against the config? */
phylink_resolve_flow(pl, config);
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index dc3d92d340c4..b73298250793 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -327,6 +327,7 @@ static struct phy_driver smsc_phy_driver[] = {
.name = "SMSC LAN8740",
/* PHY_BASIC_FEATURES */
+ .flags = PHY_RST_AFTER_CLK_EN,
.probe = smsc_phy_probe,
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 9a1b006904a7..61824bbb5588 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1324,8 +1324,6 @@ static int ppp_dev_init(struct net_device *dev)
{
struct ppp *ppp;
- netdev_lockdep_set_classes(dev);
-
ppp = netdev_priv(dev);
/* Let the netdevice take a reference on the ppp file. This ensures
* that ppp_destroy_interface() won't run before the device gets
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index cac64b96d545..4d479e3c817d 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -855,6 +855,7 @@ err_free_chan:
sl->tty = NULL;
tty->disc_data = NULL;
clear_bit(SLF_INUSE, &sl->flags);
+ free_netdev(sl->dev);
err_exit:
rtnl_unlock();
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index e8089def5a46..8156b33ee3e7 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1615,7 +1615,6 @@ static int team_init(struct net_device *dev)
int err;
team->dev = dev;
- mutex_init(&team->lock);
team_set_no_mode(team);
team->pcpu_stats = netdev_alloc_pcpu_stats(struct team_pcpu_stats);
@@ -1642,7 +1641,8 @@ static int team_init(struct net_device *dev)
goto err_options_register;
netif_carrier_off(dev);
- netdev_lockdep_set_classes(dev);
+ lockdep_register_key(&team->team_lock_key);
+ __mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key);
return 0;
@@ -1673,6 +1673,7 @@ static void team_uninit(struct net_device *dev)
team_queue_override_fini(team);
mutex_unlock(&team->lock);
netdev_change_features(dev);
+ lockdep_unregister_key(&team->team_lock_key);
}
static void team_destructor(struct net_device *dev)
@@ -1976,8 +1977,15 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
err = team_port_del(team, port_dev);
mutex_unlock(&team->lock);
- if (!err)
- netdev_change_features(dev);
+ if (err)
+ return err;
+
+ if (netif_is_team_master(port_dev)) {
+ lockdep_unregister_key(&team->team_lock_key);
+ lockdep_register_key(&team->team_lock_key);
+ lockdep_set_class(&team->lock, &team->team_lock_key);
+ }
+ netdev_change_features(dev);
return err;
}
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 812dc3a65efb..a8d3141582a5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -526,8 +526,8 @@ static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
e = tun_flow_find(head, rxhash);
if (likely(e)) {
/* TODO: keep queueing to old queue until it's empty? */
- if (e->queue_index != queue_index)
- e->queue_index = queue_index;
+ if (READ_ONCE(e->queue_index) != queue_index)
+ WRITE_ONCE(e->queue_index, queue_index);
if (e->updated != jiffies)
e->updated = jiffies;
sock_rps_record_flow_hash(e->rps_rxhash);
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index 011bd4cb546e..af3994e0853b 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -196,7 +196,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf)
/* Get the MAC address */
ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0);
- if (ret < 0) {
+ if (ret < ETH_ALEN) {
netdev_err(dev->net, "Failed to read MAC address: %d\n", ret);
goto free;
}
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index 32f53de5b1fe..fe630438f67b 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -787,6 +787,13 @@ static const struct usb_device_id products[] = {
.driver_info = 0,
},
+/* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */
+{
+ USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM,
+ USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+ .driver_info = 0,
+},
+
/* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */
{
USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM,
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 00cab3f43a4c..c2c82e6391b4 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -578,8 +578,8 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size)
/* read current mtu value from device */
err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE,
USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE,
- 0, iface_no, &max_datagram_size, 2);
- if (err < 0) {
+ 0, iface_no, &max_datagram_size, sizeof(max_datagram_size));
+ if (err != sizeof(max_datagram_size)) {
dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n");
goto out;
}
@@ -590,7 +590,7 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size)
max_datagram_size = cpu_to_le16(ctx->max_datagram_size);
err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE,
USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE,
- 0, iface_no, &max_datagram_size, 2);
+ 0, iface_no, &max_datagram_size, sizeof(max_datagram_size));
if (err < 0)
dev_dbg(&dev->intf->dev, "SET_MAX_DATAGRAM_SIZE failed\n");
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index a505b2ab88b8..74849da031fa 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -186,7 +186,7 @@ struct hso_tiocmget {
int intr_completed;
struct usb_endpoint_descriptor *endp;
struct urb *urb;
- struct hso_serial_state_notification serial_state_notification;
+ struct hso_serial_state_notification *serial_state_notification;
u16 prev_UART_state_bitmap;
struct uart_icount icount;
};
@@ -1432,7 +1432,7 @@ static int tiocmget_submit_urb(struct hso_serial *serial,
usb_rcvintpipe(usb,
tiocmget->endp->
bEndpointAddress & 0x7F),
- &tiocmget->serial_state_notification,
+ tiocmget->serial_state_notification,
sizeof(struct hso_serial_state_notification),
tiocmget_intr_callback, serial,
tiocmget->endp->bInterval);
@@ -1479,7 +1479,7 @@ static void tiocmget_intr_callback(struct urb *urb)
/* wIndex should be the USB interface number of the port to which the
* notification applies, which should always be the Modem port.
*/
- serial_state_notification = &tiocmget->serial_state_notification;
+ serial_state_notification = tiocmget->serial_state_notification;
if (serial_state_notification->bmRequestType != BM_REQUEST_TYPE ||
serial_state_notification->bNotification != B_NOTIFICATION ||
le16_to_cpu(serial_state_notification->wValue) != W_VALUE ||
@@ -2565,6 +2565,8 @@ static void hso_free_tiomget(struct hso_serial *serial)
usb_free_urb(tiocmget->urb);
tiocmget->urb = NULL;
serial->tiocmget = NULL;
+ kfree(tiocmget->serial_state_notification);
+ tiocmget->serial_state_notification = NULL;
kfree(tiocmget);
}
}
@@ -2615,10 +2617,13 @@ static struct hso_device *hso_create_bulk_serial_device(
num_urbs = 2;
serial->tiocmget = kzalloc(sizeof(struct hso_tiocmget),
GFP_KERNEL);
+ serial->tiocmget->serial_state_notification
+ = kzalloc(sizeof(struct hso_serial_state_notification),
+ GFP_KERNEL);
/* it isn't going to break our heart if serial->tiocmget
* allocation fails don't bother checking this.
*/
- if (serial->tiocmget) {
+ if (serial->tiocmget && serial->tiocmget->serial_state_notification) {
tiocmget = serial->tiocmget;
tiocmget->endp = hso_get_ep(interface,
USB_ENDPOINT_XFER_INT,
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 58f5a219fb65..f24a1b0b801f 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1264,8 +1264,11 @@ static void lan78xx_status(struct lan78xx_net *dev, struct urb *urb)
netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata);
lan78xx_defer_kevent(dev, EVENT_LINK_RESET);
- if (dev->domain_data.phyirq > 0)
+ if (dev->domain_data.phyirq > 0) {
+ local_irq_disable();
generic_handle_irq(dev->domain_data.phyirq);
+ local_irq_enable();
+ }
} else
netdev_warn(dev->net,
"unexpected interrupt: 0x%08x\n", intdata);
@@ -3782,10 +3785,14 @@ static int lan78xx_probe(struct usb_interface *intf,
/* driver requires remote-wakeup capability during autosuspend. */
intf->needs_remote_wakeup = 1;
+ ret = lan78xx_phy_init(dev);
+ if (ret < 0)
+ goto out4;
+
ret = register_netdev(netdev);
if (ret != 0) {
netif_err(dev, probe, netdev, "couldn't register the device\n");
- goto out4;
+ goto out5;
}
usb_set_intfdata(intf, dev);
@@ -3798,14 +3805,10 @@ static int lan78xx_probe(struct usb_interface *intf,
pm_runtime_set_autosuspend_delay(&udev->dev,
DEFAULT_AUTOSUSPEND_DELAY);
- ret = lan78xx_phy_init(dev);
- if (ret < 0)
- goto out5;
-
return 0;
out5:
- unregister_netdev(netdev);
+ phy_disconnect(netdev->phydev);
out4:
usb_free_urb(dev->urb_intr);
out3:
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 3d77cd402ba9..4196c0e32740 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1327,6 +1327,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
{QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */
{QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */
+ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */
{QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */
{QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */
{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */
@@ -1361,6 +1362,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */
{QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */
{QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */
+ {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/
{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */
{QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
@@ -1369,6 +1371,8 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */
{QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
{QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */
+ {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */
+ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/
/* 4. Gobi 1000 devices */
{QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index cee9fef925cd..b2507c59ba8b 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -4283,10 +4283,10 @@ static int rtl8152_close(struct net_device *netdev)
unregister_pm_notifier(&tp->pm_notifier);
#endif
tasklet_disable(&tp->tx_tl);
- napi_disable(&tp->napi);
clear_bit(WORK_ENABLE, &tp->flags);
usb_kill_urb(tp->intr_urb);
cancel_delayed_work_sync(&tp->schedule);
+ napi_disable(&tp->napi);
netif_stop_queue(netdev);
res = usb_autopm_get_interface(tp->intf);
@@ -4552,10 +4552,10 @@ static int rtl8152_pre_reset(struct usb_interface *intf)
netif_stop_queue(netdev);
tasklet_disable(&tp->tx_tl);
- napi_disable(&tp->napi);
clear_bit(WORK_ENABLE, &tp->flags);
usb_kill_urb(tp->intr_urb);
cancel_delayed_work_sync(&tp->schedule);
+ napi_disable(&tp->napi);
if (netif_carrier_ok(netdev)) {
mutex_lock(&tp->control);
tp->rtl_ops.disable(tp);
@@ -4673,7 +4673,7 @@ static int rtl8152_system_resume(struct r8152 *tp)
netif_device_attach(netdev);
- if (netif_running(netdev) && netdev->flags & IFF_UP) {
+ if (netif_running(netdev) && (netdev->flags & IFF_UP)) {
tp->rtl_ops.up(tp);
netif_carrier_off(netdev);
set_bit(WORK_ENABLE, &tp->flags);
@@ -5244,9 +5244,15 @@ static int rtl8152_set_tunable(struct net_device *netdev,
}
if (tp->rx_copybreak != val) {
- napi_disable(&tp->napi);
- tp->rx_copybreak = val;
- napi_enable(&tp->napi);
+ if (netdev->flags & IFF_UP) {
+ mutex_lock(&tp->control);
+ napi_disable(&tp->napi);
+ tp->rx_copybreak = val;
+ napi_enable(&tp->napi);
+ mutex_unlock(&tp->control);
+ } else {
+ tp->rx_copybreak = val;
+ }
}
break;
default:
@@ -5274,9 +5280,15 @@ static int rtl8152_set_ringparam(struct net_device *netdev,
return -EINVAL;
if (tp->rx_pending != ring->rx_pending) {
- napi_disable(&tp->napi);
- tp->rx_pending = ring->rx_pending;
- napi_enable(&tp->napi);
+ if (netdev->flags & IFF_UP) {
+ mutex_lock(&tp->control);
+ napi_disable(&tp->napi);
+ tp->rx_pending = ring->rx_pending;
+ napi_enable(&tp->napi);
+ mutex_unlock(&tp->control);
+ } else {
+ tp->rx_pending = ring->rx_pending;
+ }
}
return 0;
@@ -5755,6 +5767,7 @@ static const struct usb_device_id rtl8152_table[] = {
{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)},
{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)},
{REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)},
+ {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387)},
{REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)},
{REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)},
{REALTEK_USB_DEVICE(VENDOR_ID_TPLINK, 0x0601)},
diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c
index c5d4a0060124..681e0def6356 100644
--- a/drivers/net/usb/sr9800.c
+++ b/drivers/net/usb/sr9800.c
@@ -335,7 +335,7 @@ static void sr_set_multicast(struct net_device *net)
static int sr_mdio_read(struct net_device *net, int phy_id, int loc)
{
struct usbnet *dev = netdev_priv(net);
- __le16 res;
+ __le16 res = 0;
mutex_lock(&dev->phy_mutex);
sr_set_sw_mii(dev);
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index ee52bde058df..b8228f50bc94 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -865,7 +865,6 @@ static int vrf_dev_init(struct net_device *dev)
/* similarly, oper state is irrelevant; set to up to avoid confusion */
dev->operstate = IF_OPER_UP;
- netdev_lockdep_set_classes(dev);
return 0;
out_rth:
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 3d9bcc957f7d..8869154fad88 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2487,9 +2487,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
vni = tunnel_id_to_key32(info->key.tun_id);
ifindex = 0;
dst_cache = &info->dst_cache;
- if (info->options_len &&
- info->key.tun_flags & TUNNEL_VXLAN_OPT)
+ if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+ if (info->options_len < sizeof(*md))
+ goto drop;
md = ip_tunnel_info_opts(info);
+ }
ttl = info->key.ttl;
tos = info->key.tos;
label = info->key.label;
@@ -3566,10 +3568,13 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct net_device *remote_dev = NULL;
struct vxlan_fdb *f = NULL;
bool unregister = false;
+ struct vxlan_rdst *dst;
int err;
+ dst = &vxlan->default_dst;
err = vxlan_dev_configure(net, dev, conf, false, extack);
if (err)
return err;
@@ -3577,14 +3582,14 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
dev->ethtool_ops = &vxlan_ethtool_ops;
/* create an fdb entry for a valid default destination */
- if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
+ if (!vxlan_addr_any(&dst->remote_ip)) {
err = vxlan_fdb_create(vxlan, all_zeros_mac,
- &vxlan->default_dst.remote_ip,
+ &dst->remote_ip,
NUD_REACHABLE | NUD_PERMANENT,
vxlan->cfg.dst_port,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_vni,
- vxlan->default_dst.remote_ifindex,
+ dst->remote_vni,
+ dst->remote_vni,
+ dst->remote_ifindex,
NTF_SELF, &f);
if (err)
return err;
@@ -3595,26 +3600,41 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
goto errout;
unregister = true;
+ if (dst->remote_ifindex) {
+ remote_dev = __dev_get_by_index(net, dst->remote_ifindex);
+ if (!remote_dev)
+ goto errout;
+
+ err = netdev_upper_dev_link(remote_dev, dev, extack);
+ if (err)
+ goto errout;
+ }
+
err = rtnl_configure_link(dev, NULL);
if (err)
- goto errout;
+ goto unlink;
if (f) {
- vxlan_fdb_insert(vxlan, all_zeros_mac,
- vxlan->default_dst.remote_vni, f);
+ vxlan_fdb_insert(vxlan, all_zeros_mac, dst->remote_vni, f);
/* notify default fdb entry */
err = vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f),
RTM_NEWNEIGH, true, extack);
if (err) {
vxlan_fdb_destroy(vxlan, f, false, false);
+ if (remote_dev)
+ netdev_upper_dev_unlink(remote_dev, dev);
goto unregister;
}
}
list_add(&vxlan->next, &vn->vxlan_list);
+ if (remote_dev)
+ dst->remote_dev = remote_dev;
return 0;
-
+unlink:
+ if (remote_dev)
+ netdev_upper_dev_unlink(remote_dev, dev);
errout:
/* unregister_netdevice() destroys the default FDB entry with deletion
* notification. But the addition notification was not sent yet, so
@@ -3932,11 +3952,12 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
- struct vxlan_rdst *dst = &vxlan->default_dst;
struct net_device *lowerdev;
struct vxlan_config conf;
+ struct vxlan_rdst *dst;
int err;
+ dst = &vxlan->default_dst;
err = vxlan_nl2conf(tb, data, dev, &conf, true, extack);
if (err)
return err;
@@ -3946,6 +3967,14 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
if (err)
return err;
+ if (dst->remote_dev == lowerdev)
+ lowerdev = NULL;
+
+ err = netdev_adjacent_change_prepare(dst->remote_dev, lowerdev, dev,
+ extack);
+ if (err)
+ return err;
+
/* handle default dst entry */
if (!vxlan_addr_equal(&conf.remote_ip, &dst->remote_ip)) {
u32 hash_index = fdb_head_index(vxlan, all_zeros_mac, conf.vni);
@@ -3962,6 +3991,8 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
NTF_SELF, true, extack);
if (err) {
spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+ netdev_adjacent_change_abort(dst->remote_dev,
+ lowerdev, dev);
return err;
}
}
@@ -3979,6 +4010,11 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
if (conf.age_interval != vxlan->cfg.age_interval)
mod_timer(&vxlan->age_timer, jiffies);
+ netdev_adjacent_change_commit(dst->remote_dev, lowerdev, dev);
+ if (lowerdev && lowerdev != dst->remote_dev) {
+ dst->remote_dev = lowerdev;
+ netdev_update_lockdep_key(lowerdev);
+ }
vxlan_config_apply(dev, &conf, lowerdev, vxlan->net, true);
return 0;
}
@@ -3991,6 +4027,8 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head)
list_del(&vxlan->next);
unregister_netdevice_queue(dev, head);
+ if (vxlan->default_dst.remote_dev)
+ netdev_upper_dev_unlink(vxlan->default_dst.remote_dev, dev);
}
static size_t vxlan_get_size(const struct net_device *dev)
diff --git a/drivers/net/wimax/i2400m/op-rfkill.c b/drivers/net/wimax/i2400m/op-rfkill.c
index 8efb493ceec2..5c79f052cad2 100644
--- a/drivers/net/wimax/i2400m/op-rfkill.c
+++ b/drivers/net/wimax/i2400m/op-rfkill.c
@@ -127,12 +127,12 @@ int i2400m_op_rfkill_sw_toggle(struct wimax_dev *wimax_dev,
"%d\n", result);
result = 0;
error_cmd:
- kfree(cmd);
kfree_skb(ack_skb);
error_msg_to_dev:
error_alloc:
d_fnend(4, dev, "(wimax_dev %p state %d) = %d\n",
wimax_dev, state, result);
+ kfree(cmd);
return result;
}
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index dc45d16e8d21..383d4fa555a8 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -2118,12 +2118,15 @@ static int ath10k_init_uart(struct ath10k *ar)
return ret;
}
- if (!uart_print && ar->hw_params.uart_pin_workaround) {
- ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin,
- ar->hw_params.uart_pin);
- if (ret) {
- ath10k_warn(ar, "failed to set UART TX pin: %d", ret);
- return ret;
+ if (!uart_print) {
+ if (ar->hw_params.uart_pin_workaround) {
+ ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin,
+ ar->hw_params.uart_pin);
+ if (ret) {
+ ath10k_warn(ar, "failed to set UART TX pin: %d",
+ ret);
+ return ret;
+ }
}
return 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index 7573af2d88ce..c2db758b9d54 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -162,12 +162,13 @@ int iwl_acpi_get_mcc(struct device *dev, char *mcc)
wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_WRDD_WIFI_DATA_SIZE,
&tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
- if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+ if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+ tbl_rev != 0) {
ret = -EINVAL;
goto out_free;
}
@@ -224,12 +225,13 @@ int iwl_acpi_get_eckv(struct device *dev, u32 *extl_clk)
wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_ECKV_WIFI_DATA_SIZE,
&tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
- if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+ if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+ tbl_rev != 0) {
ret = -EINVAL;
goto out_free;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
index 39c64850cb6f..c0750ced5ac2 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
@@ -520,7 +520,7 @@ struct iwl_scan_dwell {
} __packed;
/**
- * struct iwl_scan_config
+ * struct iwl_scan_config_v1
* @flags: enum scan_config_flags
* @tx_chains: valid_tx antenna - ANT_* definitions
* @rx_chains: valid_rx antenna - ANT_* definitions
@@ -552,7 +552,7 @@ struct iwl_scan_config_v1 {
#define SCAN_LB_LMAC_IDX 0
#define SCAN_HB_LMAC_IDX 1
-struct iwl_scan_config {
+struct iwl_scan_config_v2 {
__le32 flags;
__le32 tx_chains;
__le32 rx_chains;
@@ -564,6 +564,24 @@ struct iwl_scan_config {
u8 bcast_sta_id;
u8 channel_flags;
u8 channel_array[];
+} __packed; /* SCAN_CONFIG_DB_CMD_API_S_2 */
+
+/**
+ * struct iwl_scan_config
+ * @enable_cam_mode: whether to enable CAM mode.
+ * @enable_promiscouos_mode: whether to enable promiscouos mode
+ * @bcast_sta_id: the index of the station in the fw
+ * @reserved: reserved
+ * @tx_chains: valid_tx antenna - ANT_* definitions
+ * @rx_chains: valid_rx antenna - ANT_* definitions
+ */
+struct iwl_scan_config {
+ u8 enable_cam_mode;
+ u8 enable_promiscouos_mode;
+ u8 bcast_sta_id;
+ u8 reserved;
+ __le32 tx_chains;
+ __le32 rx_chains;
} __packed; /* SCAN_CONFIG_DB_CMD_API_S_3 */
/**
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 5c8602de9168..87421807e040 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -646,6 +646,7 @@ static struct scatterlist *alloc_sgtable(int size)
if (new_page)
__free_page(new_page);
}
+ kfree(table);
return NULL;
}
alloc_size = min_t(int, size, PAGE_SIZE);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 423cc0cf8e78..0d5bc4ce5c07 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -288,6 +288,8 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t;
* STA_CONTEXT_DOT11AX_API_S
* @IWL_UCODE_TLV_CAPA_SAR_TABLE_VER: This ucode supports different sar
* version tables.
+ * @IWL_UCODE_TLV_API_REDUCED_SCAN_CONFIG: This ucode supports v3 of
+ * SCAN_CONFIG_DB_CMD_API_S.
*
* @NUM_IWL_UCODE_TLV_API: number of bits used
*/
@@ -321,6 +323,7 @@ enum iwl_ucode_tlv_api {
IWL_UCODE_TLV_API_WOWLAN_TCP_SYN_WAKE = (__force iwl_ucode_tlv_api_t)53,
IWL_UCODE_TLV_API_FTM_RTT_ACCURACY = (__force iwl_ucode_tlv_api_t)54,
IWL_UCODE_TLV_API_SAR_TABLE_VER = (__force iwl_ucode_tlv_api_t)55,
+ IWL_UCODE_TLV_API_REDUCED_SCAN_CONFIG = (__force iwl_ucode_tlv_api_t)56,
IWL_UCODE_TLV_API_ADWELL_HB_DEF_N_AP = (__force iwl_ucode_tlv_api_t)57,
IWL_UCODE_TLV_API_SCAN_EXT_CHAN_VER = (__force iwl_ucode_tlv_api_t)58,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index cb4c5514a556..695bbaa86273 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -279,6 +279,7 @@
* Indicates MAC is entering a power-saving sleep power-down.
* Not a good time to access device-internal resources.
*/
+#define CSR_GP_CNTRL_REG_FLAG_INIT_DONE (0x00000004)
#define CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP (0x00000010)
#define CSR_GP_CNTRL_REG_FLAG_XTAL_ON (0x00000400)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.h b/drivers/net/wireless/intel/iwlwifi/iwl-io.h
index f8e4f0f5de0c..f09e368c7040 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-io.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.h
@@ -112,38 +112,38 @@ int iwl_dump_fh(struct iwl_trans *trans, char **buf);
*/
static inline u32 iwl_umac_prph(struct iwl_trans *trans, u32 ofs)
{
- return ofs + trans->cfg->trans.umac_prph_offset;
+ return ofs + trans->trans_cfg->umac_prph_offset;
}
static inline u32 iwl_read_umac_prph_no_grab(struct iwl_trans *trans, u32 ofs)
{
return iwl_read_prph_no_grab(trans, ofs +
- trans->cfg->trans.umac_prph_offset);
+ trans->trans_cfg->umac_prph_offset);
}
static inline u32 iwl_read_umac_prph(struct iwl_trans *trans, u32 ofs)
{
- return iwl_read_prph(trans, ofs + trans->cfg->trans.umac_prph_offset);
+ return iwl_read_prph(trans, ofs + trans->trans_cfg->umac_prph_offset);
}
static inline void iwl_write_umac_prph_no_grab(struct iwl_trans *trans, u32 ofs,
u32 val)
{
- iwl_write_prph_no_grab(trans, ofs + trans->cfg->trans.umac_prph_offset,
+ iwl_write_prph_no_grab(trans, ofs + trans->trans_cfg->umac_prph_offset,
val);
}
static inline void iwl_write_umac_prph(struct iwl_trans *trans, u32 ofs,
u32 val)
{
- iwl_write_prph(trans, ofs + trans->cfg->trans.umac_prph_offset, val);
+ iwl_write_prph(trans, ofs + trans->trans_cfg->umac_prph_offset, val);
}
static inline int iwl_poll_umac_prph_bit(struct iwl_trans *trans, u32 addr,
u32 bits, u32 mask, int timeout)
{
return iwl_poll_prph_bit(trans, addr +
- trans->cfg->trans.umac_prph_offset,
+ trans->trans_cfg->umac_prph_offset,
bits, mask, timeout);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
index f47e0f97acf8..23c25a7665f2 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
@@ -449,6 +449,11 @@ enum {
#define PERSISTENCE_BIT BIT(12)
#define PREG_WFPM_ACCESS BIT(12)
+#define HPM_HIPM_GEN_CFG 0xA03458
+#define HPM_HIPM_GEN_CFG_CR_PG_EN BIT(0)
+#define HPM_HIPM_GEN_CFG_CR_SLP_EN BIT(1)
+#define HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE BIT(10)
+
#define UREG_DOORBELL_TO_ISR6 0xA05C04
#define UREG_DOORBELL_TO_ISR6_NMI_BIT BIT(0)
#define UREG_DOORBELL_TO_ISR6_SUSPEND BIT(18)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 32a5e4e5461f..d9eb2b286438 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -420,6 +420,9 @@ static int iwl_run_unified_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
};
int ret;
+ if (mvm->trans->cfg->tx_with_siso_diversity)
+ init_cfg.init_flags |= cpu_to_le32(BIT(IWL_INIT_PHY));
+
lockdep_assert_held(&mvm->mutex);
mvm->rfkill_safe_init_done = false;
@@ -694,12 +697,13 @@ static int iwl_mvm_sar_get_wrds_table(struct iwl_mvm *mvm)
wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
ACPI_WRDS_WIFI_DATA_SIZE, &tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
- if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+ if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+ tbl_rev != 0) {
ret = -EINVAL;
goto out_free;
}
@@ -731,13 +735,14 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm)
wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
ACPI_EWRD_WIFI_DATA_SIZE, &tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
if ((wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) ||
- (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER)) {
+ (wifi_pkg->package.elements[2].type != ACPI_TYPE_INTEGER) ||
+ tbl_rev != 0) {
ret = -EINVAL;
goto out_free;
}
@@ -791,11 +796,16 @@ static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm)
wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
ACPI_WGDS_WIFI_DATA_SIZE, &tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev > 1) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
+ if (tbl_rev != 0) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
mvm->geo_rev = tbl_rev;
for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) {
for (j = 0; j < ACPI_GEO_TABLE_SIZE; j++) {
@@ -889,15 +899,17 @@ static bool iwl_mvm_sar_geo_support(struct iwl_mvm *mvm)
* firmware versions. Unfortunately, we don't have a TLV API
* flag to rely on, so rely on the major version which is in
* the first byte of ucode_ver. This was implemented
- * initially on version 38 and then backported to29 and 17.
- * The intention was to have it in 36 as well, but not all
- * 8000 family got this feature enabled. The 8000 family is
- * the only one using version 36, so skip this version
- * entirely.
+ * initially on version 38 and then backported to 17. It was
+ * also backported to 29, but only for 7265D devices. The
+ * intention was to have it in 36 as well, but not all 8000
+ * family got this feature enabled. The 8000 family is the
+ * only one using version 36, so skip this version entirely.
*/
return IWL_UCODE_SERIAL(mvm->fw->ucode_ver) >= 38 ||
- IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 ||
- IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17;
+ IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 17 ||
+ (IWL_UCODE_SERIAL(mvm->fw->ucode_ver) == 29 &&
+ ((mvm->trans->hw_rev & CSR_HW_REV_TYPE_MSK) ==
+ CSR_HW_REV_TYPE_7265D));
}
int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm)
@@ -1020,11 +1032,16 @@ static int iwl_mvm_get_ppag_table(struct iwl_mvm *mvm)
wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
ACPI_PPAG_WIFI_DATA_SIZE, &tbl_rev);
- if (IS_ERR(wifi_pkg) || tbl_rev != 0) {
+ if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
+ if (tbl_rev != 0) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
enabled = &wifi_pkg->package.elements[1];
if (enabled->type != ACPI_TYPE_INTEGER ||
(enabled->integer.value != 0 && enabled->integer.value != 1)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index cd1b10042fbf..d31f96c3f925 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -4881,11 +4881,11 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
if (!iwl_mvm_has_new_rx_api(mvm))
return;
- notif->cookie = mvm->queue_sync_cookie;
-
- if (notif->sync)
+ if (notif->sync) {
+ notif->cookie = mvm->queue_sync_cookie;
atomic_set(&mvm->queue_sync_counter,
mvm->trans->num_rx_queues);
+ }
ret = iwl_mvm_notify_rx_queue(mvm, qmask, (u8 *)notif,
size, !notif->sync);
@@ -4905,7 +4905,8 @@ void iwl_mvm_sync_rx_queues_internal(struct iwl_mvm *mvm,
out:
atomic_set(&mvm->queue_sync_counter, 0);
- mvm->queue_sync_cookie++;
+ if (notif->sync)
+ mvm->queue_sync_cookie++;
}
static void iwl_mvm_sync_rx_queues(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 843d00bf2bd5..5ca50f39a023 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -1405,6 +1405,12 @@ static inline bool iwl_mvm_is_scan_ext_chan_supported(struct iwl_mvm *mvm)
IWL_UCODE_TLV_API_SCAN_EXT_CHAN_VER);
}
+static inline bool iwl_mvm_is_reduced_config_scan_supported(struct iwl_mvm *mvm)
+{
+ return fw_has_api(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_API_REDUCED_SCAN_CONFIG);
+}
+
static inline bool iwl_mvm_has_new_rx_stats_api(struct iwl_mvm *mvm)
{
return fw_has_api(&mvm->fw->ucode_capa,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index f6b3045badbd..fcafa22ec6ce 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -1137,11 +1137,11 @@ static void iwl_mvm_fill_scan_config_v1(struct iwl_mvm *mvm, void *config,
iwl_mvm_fill_channels(mvm, cfg->channel_array, max_channels);
}
-static void iwl_mvm_fill_scan_config(struct iwl_mvm *mvm, void *config,
- u32 flags, u8 channel_flags,
- u32 max_channels)
+static void iwl_mvm_fill_scan_config_v2(struct iwl_mvm *mvm, void *config,
+ u32 flags, u8 channel_flags,
+ u32 max_channels)
{
- struct iwl_scan_config *cfg = config;
+ struct iwl_scan_config_v2 *cfg = config;
cfg->flags = cpu_to_le32(flags);
cfg->tx_chains = cpu_to_le32(iwl_mvm_get_valid_tx_ant(mvm));
@@ -1185,7 +1185,7 @@ static void iwl_mvm_fill_scan_config(struct iwl_mvm *mvm, void *config,
iwl_mvm_fill_channels(mvm, cfg->channel_array, max_channels);
}
-int iwl_mvm_config_scan(struct iwl_mvm *mvm)
+static int iwl_mvm_legacy_config_scan(struct iwl_mvm *mvm)
{
void *cfg;
int ret, cmd_size;
@@ -1217,7 +1217,7 @@ int iwl_mvm_config_scan(struct iwl_mvm *mvm)
}
if (iwl_mvm_cdb_scan_api(mvm))
- cmd_size = sizeof(struct iwl_scan_config);
+ cmd_size = sizeof(struct iwl_scan_config_v2);
else
cmd_size = sizeof(struct iwl_scan_config_v1);
cmd_size += num_channels;
@@ -1254,8 +1254,8 @@ int iwl_mvm_config_scan(struct iwl_mvm *mvm)
flags |= (iwl_mvm_is_scan_fragmented(hb_type)) ?
SCAN_CONFIG_FLAG_SET_LMAC2_FRAGMENTED :
SCAN_CONFIG_FLAG_CLEAR_LMAC2_FRAGMENTED;
- iwl_mvm_fill_scan_config(mvm, cfg, flags, channel_flags,
- num_channels);
+ iwl_mvm_fill_scan_config_v2(mvm, cfg, flags, channel_flags,
+ num_channels);
} else {
iwl_mvm_fill_scan_config_v1(mvm, cfg, flags, channel_flags,
num_channels);
@@ -1277,6 +1277,30 @@ int iwl_mvm_config_scan(struct iwl_mvm *mvm)
return ret;
}
+int iwl_mvm_config_scan(struct iwl_mvm *mvm)
+{
+ struct iwl_scan_config cfg;
+ struct iwl_host_cmd cmd = {
+ .id = iwl_cmd_id(SCAN_CFG_CMD, IWL_ALWAYS_LONG_GROUP, 0),
+ .len[0] = sizeof(cfg),
+ .data[0] = &cfg,
+ .dataflags[0] = IWL_HCMD_DFL_NOCOPY,
+ };
+
+ if (!iwl_mvm_is_reduced_config_scan_supported(mvm))
+ return iwl_mvm_legacy_config_scan(mvm);
+
+ memset(&cfg, 0, sizeof(cfg));
+
+ cfg.bcast_sta_id = mvm->aux_sta.sta_id;
+ cfg.tx_chains = cpu_to_le32(iwl_mvm_get_valid_tx_ant(mvm));
+ cfg.rx_chains = cpu_to_le32(iwl_mvm_scan_rx_ant(mvm));
+
+ IWL_DEBUG_SCAN(mvm, "Sending UMAC scan config\n");
+
+ return iwl_mvm_send_cmd(mvm, &cmd);
+}
+
static int iwl_mvm_scan_uid_by_status(struct iwl_mvm *mvm, int status)
{
int i;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 0bedba4c61f2..b3768d5d852a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1482,6 +1482,13 @@ static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm,
mvm_sta->sta_id, i);
txq_id = iwl_mvm_tvqm_enable_txq(mvm, mvm_sta->sta_id,
i, wdg);
+ /*
+ * on failures, just set it to IWL_MVM_INVALID_QUEUE
+ * to try again later, we have no other good way of
+ * failing here
+ */
+ if (txq_id < 0)
+ txq_id = IWL_MVM_INVALID_QUEUE;
tid_data->txq_id = txq_id;
/*
@@ -1950,30 +1957,73 @@ void iwl_mvm_dealloc_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta)
sta->sta_id = IWL_MVM_INVALID_STA;
}
-static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 *queue,
+static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 queue,
u8 sta_id, u8 fifo)
{
unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ?
mvm->trans->trans_cfg->base_params->wd_timeout :
IWL_WATCHDOG_DISABLED;
+ struct iwl_trans_txq_scd_cfg cfg = {
+ .fifo = fifo,
+ .sta_id = sta_id,
+ .tid = IWL_MAX_TID_COUNT,
+ .aggregate = false,
+ .frame_limit = IWL_FRAME_LIMIT,
+ };
+
+ WARN_ON(iwl_mvm_has_new_tx_api(mvm));
+
+ iwl_mvm_enable_txq(mvm, NULL, queue, 0, &cfg, wdg_timeout);
+}
+
+static int iwl_mvm_enable_aux_snif_queue_tvqm(struct iwl_mvm *mvm, u8 sta_id)
+{
+ unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ?
+ mvm->trans->trans_cfg->base_params->wd_timeout :
+ IWL_WATCHDOG_DISABLED;
+
+ WARN_ON(!iwl_mvm_has_new_tx_api(mvm));
+
+ return iwl_mvm_tvqm_enable_txq(mvm, sta_id, IWL_MAX_TID_COUNT,
+ wdg_timeout);
+}
+static int iwl_mvm_add_int_sta_with_queue(struct iwl_mvm *mvm, int macidx,
+ int maccolor,
+ struct iwl_mvm_int_sta *sta,
+ u16 *queue, int fifo)
+{
+ int ret;
+
+ /* Map queue to fifo - needs to happen before adding station */
+ if (!iwl_mvm_has_new_tx_api(mvm))
+ iwl_mvm_enable_aux_snif_queue(mvm, *queue, sta->sta_id, fifo);
+
+ ret = iwl_mvm_add_int_sta_common(mvm, sta, NULL, macidx, maccolor);
+ if (ret) {
+ if (!iwl_mvm_has_new_tx_api(mvm))
+ iwl_mvm_disable_txq(mvm, NULL, *queue,
+ IWL_MAX_TID_COUNT, 0);
+ return ret;
+ }
+
+ /*
+ * For 22000 firmware and on we cannot add queue to a station unknown
+ * to firmware so enable queue here - after the station was added
+ */
if (iwl_mvm_has_new_tx_api(mvm)) {
- int tvqm_queue =
- iwl_mvm_tvqm_enable_txq(mvm, sta_id,
- IWL_MAX_TID_COUNT,
- wdg_timeout);
- *queue = tvqm_queue;
- } else {
- struct iwl_trans_txq_scd_cfg cfg = {
- .fifo = fifo,
- .sta_id = sta_id,
- .tid = IWL_MAX_TID_COUNT,
- .aggregate = false,
- .frame_limit = IWL_FRAME_LIMIT,
- };
+ int txq;
- iwl_mvm_enable_txq(mvm, NULL, *queue, 0, &cfg, wdg_timeout);
+ txq = iwl_mvm_enable_aux_snif_queue_tvqm(mvm, sta->sta_id);
+ if (txq < 0) {
+ iwl_mvm_rm_sta_common(mvm, sta->sta_id);
+ return txq;
+ }
+
+ *queue = txq;
}
+
+ return 0;
}
int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm)
@@ -1989,59 +2039,26 @@ int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm)
if (ret)
return ret;
- /* Map Aux queue to fifo - needs to happen before adding Aux station */
- if (!iwl_mvm_has_new_tx_api(mvm))
- iwl_mvm_enable_aux_snif_queue(mvm, &mvm->aux_queue,
- mvm->aux_sta.sta_id,
- IWL_MVM_TX_FIFO_MCAST);
-
- ret = iwl_mvm_add_int_sta_common(mvm, &mvm->aux_sta, NULL,
- MAC_INDEX_AUX, 0);
+ ret = iwl_mvm_add_int_sta_with_queue(mvm, MAC_INDEX_AUX, 0,
+ &mvm->aux_sta, &mvm->aux_queue,
+ IWL_MVM_TX_FIFO_MCAST);
if (ret) {
iwl_mvm_dealloc_int_sta(mvm, &mvm->aux_sta);
return ret;
}
- /*
- * For 22000 firmware and on we cannot add queue to a station unknown
- * to firmware so enable queue here - after the station was added
- */
- if (iwl_mvm_has_new_tx_api(mvm))
- iwl_mvm_enable_aux_snif_queue(mvm, &mvm->aux_queue,
- mvm->aux_sta.sta_id,
- IWL_MVM_TX_FIFO_MCAST);
-
return 0;
}
int iwl_mvm_add_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
{
struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- int ret;
lockdep_assert_held(&mvm->mutex);
- /* Map snif queue to fifo - must happen before adding snif station */
- if (!iwl_mvm_has_new_tx_api(mvm))
- iwl_mvm_enable_aux_snif_queue(mvm, &mvm->snif_queue,
- mvm->snif_sta.sta_id,
+ return iwl_mvm_add_int_sta_with_queue(mvm, mvmvif->id, mvmvif->color,
+ &mvm->snif_sta, &mvm->snif_queue,
IWL_MVM_TX_FIFO_BE);
-
- ret = iwl_mvm_add_int_sta_common(mvm, &mvm->snif_sta, vif->addr,
- mvmvif->id, 0);
- if (ret)
- return ret;
-
- /*
- * For 22000 firmware and on we cannot add queue to a station unknown
- * to firmware so enable queue here - after the station was added
- */
- if (iwl_mvm_has_new_tx_api(mvm))
- iwl_mvm_enable_aux_snif_queue(mvm, &mvm->snif_queue,
- mvm->snif_sta.sta_id,
- IWL_MVM_TX_FIFO_BE);
-
- return 0;
}
int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
@@ -2133,6 +2150,10 @@ int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
queue = iwl_mvm_tvqm_enable_txq(mvm, bsta->sta_id,
IWL_MAX_TID_COUNT,
wdg_timeout);
+ if (queue < 0) {
+ iwl_mvm_rm_sta_common(mvm, bsta->sta_id);
+ return queue;
+ }
if (vif->type == NL80211_IFTYPE_AP ||
vif->type == NL80211_IFTYPE_ADHOC)
@@ -2307,10 +2328,8 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
}
ret = iwl_mvm_add_int_sta_common(mvm, msta, maddr,
mvmvif->id, mvmvif->color);
- if (ret) {
- iwl_mvm_dealloc_int_sta(mvm, msta);
- return ret;
- }
+ if (ret)
+ goto err;
/*
* Enable cab queue after the ADD_STA command is sent.
@@ -2323,6 +2342,10 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
int queue = iwl_mvm_tvqm_enable_txq(mvm, msta->sta_id,
0,
timeout);
+ if (queue < 0) {
+ ret = queue;
+ goto err;
+ }
mvmvif->cab_queue = queue;
} else if (!fw_has_api(&mvm->fw->ucode_capa,
IWL_UCODE_TLV_API_STA_TYPE))
@@ -2330,6 +2353,9 @@ int iwl_mvm_add_mcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
timeout);
return 0;
+err:
+ iwl_mvm_dealloc_int_sta(mvm, msta);
+ return ret;
}
static int __iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, u8 sta_id,
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
index 75fa8a6aafee..74980382e64c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
@@ -107,13 +107,9 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
/* allocate ucode sections in dram and set addresses */
ret = iwl_pcie_init_fw_sec(trans, fw, &prph_scratch->dram);
- if (ret) {
- dma_free_coherent(trans->dev,
- sizeof(*prph_scratch),
- prph_scratch,
- trans_pcie->prph_scratch_dma_addr);
- return ret;
- }
+ if (ret)
+ goto err_free_prph_scratch;
+
/* Allocate prph information
* currently we don't assign to the prph info anything, but it would get
@@ -121,16 +117,20 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
prph_info = dma_alloc_coherent(trans->dev, sizeof(*prph_info),
&trans_pcie->prph_info_dma_addr,
GFP_KERNEL);
- if (!prph_info)
- return -ENOMEM;
+ if (!prph_info) {
+ ret = -ENOMEM;
+ goto err_free_prph_scratch;
+ }
/* Allocate context info */
ctxt_info_gen3 = dma_alloc_coherent(trans->dev,
sizeof(*ctxt_info_gen3),
&trans_pcie->ctxt_info_dma_addr,
GFP_KERNEL);
- if (!ctxt_info_gen3)
- return -ENOMEM;
+ if (!ctxt_info_gen3) {
+ ret = -ENOMEM;
+ goto err_free_prph_info;
+ }
ctxt_info_gen3->prph_info_base_addr =
cpu_to_le64(trans_pcie->prph_info_dma_addr);
@@ -186,6 +186,20 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
iwl_set_bit(trans, CSR_GP_CNTRL, CSR_AUTO_FUNC_INIT);
return 0;
+
+err_free_prph_info:
+ dma_free_coherent(trans->dev,
+ sizeof(*prph_info),
+ prph_info,
+ trans_pcie->prph_info_dma_addr);
+
+err_free_prph_scratch:
+ dma_free_coherent(trans->dev,
+ sizeof(*prph_scratch),
+ prph_scratch,
+ trans_pcie->prph_scratch_dma_addr);
+ return ret;
+
}
void iwl_pcie_ctxt_info_gen3_free(struct iwl_trans *trans)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index e29c47744ef5..040cec17d3ad 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -513,31 +513,33 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x24FD, 0x9074, iwl8265_2ac_cfg)},
/* 9000 Series */
- {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x1551, iwl9560_killer_s_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x1552, iwl9560_killer_i_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
- {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_quz_a0_jf_b0_soc)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x02F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
{IWL_PCI_DEVICE(0x06F0, 0x0030, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
{IWL_PCI_DEVICE(0x06F0, 0x0034, iwl9560_2ac_cfg_quz_a0_jf_b0_soc)},
{IWL_PCI_DEVICE(0x06F0, 0x0038, iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc)},
@@ -571,20 +573,20 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x2526, 0x0034, iwl9560_2ac_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x0038, iwl9560_2ac_160_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x003C, iwl9560_2ac_160_cfg)},
- {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9460_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9460_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9460_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9461_2ac_cfg_soc)},
+ {IWL_PCI_DEVICE(0x2526, 0x0064, iwl9461_2ac_cfg_soc)},
+ {IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9462_2ac_cfg_soc)},
+ {IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9462_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x2526, 0x0210, iwl9260_2ac_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x0214, iwl9260_2ac_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x0230, iwl9560_2ac_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x0234, iwl9560_2ac_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x0238, iwl9560_2ac_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x023C, iwl9560_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2526, 0x0260, iwl9460_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x2526, 0x0260, iwl9461_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x2526, 0x0264, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9460_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9460_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9462_2ac_cfg_soc)},
+ {IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9462_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x2526, 0x1010, iwl9260_2ac_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x1030, iwl9560_2ac_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x1210, iwl9260_2ac_cfg)},
@@ -601,7 +603,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x2526, 0x401C, iwl9260_2ac_160_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_160_cfg)},
{IWL_PCI_DEVICE(0x2526, 0x4034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)},
+ {IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9462_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x2526, 0x4234, iwl9560_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x2526, 0x42A4, iwl9462_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x2526, 0x6010, iwl9260_2ac_160_cfg)},
@@ -616,33 +618,33 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)},
{IWL_PCI_DEVICE(0x271B, 0x0214, iwl9260_2ac_cfg)},
{IWL_PCI_DEVICE(0x271C, 0x0214, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_160_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_160_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_160_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x0060, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x0064, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x00A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x00A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x0230, iwl9560_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x0234, iwl9560_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x0238, iwl9560_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x023C, iwl9560_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x0260, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x0264, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x02A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x02A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x1010, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x1210, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x1552, iwl9560_killer_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_160_cfg)},
- {IWL_PCI_DEVICE(0x2720, 0x4034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_soc)},
+
+ {IWL_PCI_DEVICE(0x2720, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x2720, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
{IWL_PCI_DEVICE(0x30DC, 0x0030, iwl9560_2ac_160_cfg_soc)},
{IWL_PCI_DEVICE(0x30DC, 0x0034, iwl9560_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x30DC, 0x0038, iwl9560_2ac_160_cfg_soc)},
@@ -671,6 +673,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x30DC, 0x40A4, iwl9462_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x30DC, 0x4234, iwl9560_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x30DC, 0x42A4, iwl9462_2ac_cfg_soc)},
+
{IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_160_cfg_shared_clk)},
{IWL_PCI_DEVICE(0x31DC, 0x0034, iwl9560_2ac_cfg_shared_clk)},
{IWL_PCI_DEVICE(0x31DC, 0x0038, iwl9560_2ac_160_cfg_shared_clk)},
@@ -726,62 +729,60 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x34F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
{IWL_PCI_DEVICE(0x34F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x1010, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x1210, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x3DF0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x1010, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x1210, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0x43F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_soc)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x3DF0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
+ {IWL_PCI_DEVICE(0x43F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0x43F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
{IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_160_cfg_soc)},
@@ -821,34 +822,34 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x9DF0, 0x40A4, iwl9462_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x9DF0, 0x4234, iwl9560_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0x9DF0, 0x42A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x1010, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x1210, iwl9260_2ac_cfg)},
- {IWL_PCI_DEVICE(0xA0F0, 0x1551, iwl9560_killer_s_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x1552, iwl9560_killer_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_soc)},
- {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_soc)},
+
+ {IWL_PCI_DEVICE(0xA0F0, 0x0030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0034, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0038, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x003C, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0060, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0064, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x00A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x00A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0230, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0238, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x023C, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0260, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x0264, iwl9461_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x02A0, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x02A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x1030, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x1551, killer1550s_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x1552, killer1550i_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x2030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x2034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x4030, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x4034, iwl9560_2ac_160_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x40A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x4234, iwl9560_2ac_cfg_qu_b0_jf_b0)},
+ {IWL_PCI_DEVICE(0xA0F0, 0x42A4, iwl9462_2ac_cfg_qu_b0_jf_b0)},
+
{IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_160_cfg_soc)},
{IWL_PCI_DEVICE(0xA370, 0x0034, iwl9560_2ac_cfg_soc)},
{IWL_PCI_DEVICE(0xA370, 0x0038, iwl9560_2ac_160_cfg_soc)},
@@ -1067,11 +1068,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
}
} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) &&
- ((cfg != &iwl_ax200_cfg_cc &&
- cfg != &killer1650x_2ax_cfg &&
- cfg != &killer1650w_2ax_cfg &&
- cfg != &iwl_ax201_cfg_quz_hr) ||
- iwl_trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0)) {
+ iwl_trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0) {
u32 hw_status;
hw_status = iwl_read_prph(iwl_trans, UMAG_GEN_HW_STATUS);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index df8455f14e4d..ca3bb4d65b00 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -57,6 +57,24 @@
#include "internal.h"
#include "fw/dbg.h"
+static int iwl_pcie_gen2_force_power_gating(struct iwl_trans *trans)
+{
+ iwl_set_bits_prph(trans, HPM_HIPM_GEN_CFG,
+ HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE);
+ udelay(20);
+ iwl_set_bits_prph(trans, HPM_HIPM_GEN_CFG,
+ HPM_HIPM_GEN_CFG_CR_PG_EN |
+ HPM_HIPM_GEN_CFG_CR_SLP_EN);
+ udelay(20);
+ iwl_clear_bits_prph(trans, HPM_HIPM_GEN_CFG,
+ HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE);
+
+ iwl_trans_sw_reset(trans);
+ iwl_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
+
+ return 0;
+}
+
/*
* Start up NIC's basic functionality after it has been reset
* (e.g. after platform boot, or shutdown via iwl_pcie_apm_stop())
@@ -92,6 +110,13 @@ int iwl_pcie_gen2_apm_init(struct iwl_trans *trans)
iwl_pcie_apm_config(trans);
+ if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_22000 &&
+ trans->cfg->integrated) {
+ ret = iwl_pcie_gen2_force_power_gating(trans);
+ if (ret)
+ return ret;
+ }
+
ret = iwl_finish_nic_init(trans, trans->trans_cfg);
if (ret)
return ret;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index f8a1f985a1d8..6961f00ff812 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -3272,11 +3272,17 @@ static struct iwl_trans_dump_data
ptr = cmdq->write_ptr;
for (i = 0; i < cmdq->n_window; i++) {
u8 idx = iwl_pcie_get_cmd_index(cmdq, ptr);
+ u8 tfdidx;
u32 caplen, cmdlen;
+ if (trans->trans_cfg->use_tfh)
+ tfdidx = idx;
+ else
+ tfdidx = ptr;
+
cmdlen = iwl_trans_pcie_get_cmdlen(trans,
- cmdq->tfds +
- tfd_size * ptr);
+ (u8 *)cmdq->tfds +
+ tfd_size * tfdidx);
caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen);
if (cmdlen) {
@@ -3450,6 +3456,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
spin_lock_init(&trans_pcie->reg_lock);
mutex_init(&trans_pcie->mutex);
init_waitqueue_head(&trans_pcie->ucode_write_waitq);
+
+ trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator",
+ WQ_HIGHPRI | WQ_UNBOUND, 1);
+ if (!trans_pcie->rba.alloc_wq) {
+ ret = -ENOMEM;
+ goto out_free_trans;
+ }
+ INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work);
+
trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page);
if (!trans_pcie->tso_hdr_page) {
ret = -ENOMEM;
@@ -3584,10 +3599,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
trans_pcie->inta_mask = CSR_INI_SET_MASK;
}
- trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator",
- WQ_HIGHPRI | WQ_UNBOUND, 1);
- INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work);
-
#ifdef CONFIG_IWLWIFI_DEBUGFS
trans_pcie->fw_mon_data.state = IWL_FW_MON_DBGFS_STATE_CLOSED;
mutex_init(&trans_pcie->fw_mon_data.mutex);
@@ -3599,6 +3610,8 @@ out_free_ict:
iwl_pcie_free_ict(trans);
out_no_pci:
free_percpu(trans_pcie->tso_hdr_page);
+ destroy_workqueue(trans_pcie->rba.alloc_wq);
+out_free_trans:
iwl_trans_free(trans);
return ERR_PTR(ret);
}
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 8894027429d6..d80f71f82a6d 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -251,27 +251,23 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans,
struct ieee80211_hdr *hdr = (void *)skb->data;
unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
unsigned int mss = skb_shinfo(skb)->gso_size;
- u16 length, iv_len, amsdu_pad;
+ u16 length, amsdu_pad;
u8 *start_hdr;
struct iwl_tso_hdr_page *hdr_page;
struct page **page_ptr;
struct tso_t tso;
- /* if the packet is protected, then it must be CCMP or GCMP */
- iv_len = ieee80211_has_protected(hdr->frame_control) ?
- IEEE80211_CCMP_HDR_LEN : 0;
-
trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd),
&dev_cmd->hdr, start_len, 0);
ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb);
snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
- total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len;
+ total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len;
amsdu_pad = 0;
/* total amount of header we may need for this A-MSDU */
hdr_room = DIV_ROUND_UP(total_len, mss) *
- (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len;
+ (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr));
/* Our device supports 9 segments at most, it will fit in 1 page */
hdr_page = get_page_hdr(trans, hdr_room);
@@ -282,14 +278,12 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans,
start_hdr = hdr_page->pos;
page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs);
*page_ptr = hdr_page->page;
- memcpy(hdr_page->pos, skb->data + hdr_len, iv_len);
- hdr_page->pos += iv_len;
/*
- * Pull the ieee80211 header + IV to be able to use TSO core,
+ * Pull the ieee80211 header to be able to use TSO core,
* we will restore it for the tx_status flow.
*/
- skb_pull(skb, hdr_len + iv_len);
+ skb_pull(skb, hdr_len);
/*
* Remove the length of all the headers that we don't actually
@@ -364,8 +358,8 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans,
}
}
- /* re -add the WiFi header and IV */
- skb_push(skb, hdr_len + iv_len);
+ /* re -add the WiFi header */
+ skb_push(skb, hdr_len);
return 0;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c
index 158a3d762e55..e323e9a5999f 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_hw.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c
@@ -3041,30 +3041,6 @@ static void prism2_clear_set_tim_queue(local_info_t *local)
}
}
-
-/*
- * HostAP uses two layers of net devices, where the inner
- * layer gets called all the time from the outer layer.
- * This is a natural nesting, which needs a split lock type.
- */
-static struct lock_class_key hostap_netdev_xmit_lock_key;
-static struct lock_class_key hostap_netdev_addr_lock_key;
-
-static void prism2_set_lockdep_class_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock,
- &hostap_netdev_xmit_lock_key);
-}
-
-static void prism2_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock,
- &hostap_netdev_addr_lock_key);
- netdev_for_each_tx_queue(dev, prism2_set_lockdep_class_one, NULL);
-}
-
static struct net_device *
prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx,
struct device *sdev)
@@ -3223,7 +3199,6 @@ while (0)
if (ret >= 0)
ret = register_netdevice(dev);
- prism2_set_lockdep_class(dev);
rtnl_unlock();
if (ret < 0) {
printk(KERN_WARNING "%s: register netdevice failed!\n",
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 45c73a6f09a1..14f562cd715c 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -4026,7 +4026,7 @@ static int __init init_mac80211_hwsim(void)
err = dev_alloc_name(hwsim_mon, hwsim_mon->name);
if (err < 0) {
rtnl_unlock();
- goto out_free_radios;
+ goto out_free_mon;
}
err = register_netdevice(hwsim_mon);
diff --git a/drivers/net/wireless/mediatek/mt76/Makefile b/drivers/net/wireless/mediatek/mt76/Makefile
index 4d03596e891f..d7a1ddc9e407 100644
--- a/drivers/net/wireless/mediatek/mt76/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/Makefile
@@ -8,6 +8,8 @@ mt76-y := \
mmio.o util.o trace.o dma.o mac80211.o debugfs.o eeprom.o \
tx.o agg-rx.o mcu.o
+mt76-$(CONFIG_PCI) += pci.o
+
mt76-usb-y := usb.o usb_trace.o
CFLAGS_trace.o := -I$(src)
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index c747eb24581c..8f69d00bd940 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -53,8 +53,10 @@ mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
u32 ctrl;
int i, idx = -1;
- if (txwi)
+ if (txwi) {
q->entry[q->head].txwi = DMA_DUMMY_DATA;
+ q->entry[q->head].skip_buf0 = true;
+ }
for (i = 0; i < nbufs; i += 2, buf += 2) {
u32 buf0 = buf[0].addr, buf1 = 0;
@@ -97,7 +99,7 @@ mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx,
__le32 __ctrl = READ_ONCE(q->desc[idx].ctrl);
u32 ctrl = le32_to_cpu(__ctrl);
- if (!e->txwi || !e->skb) {
+ if (!e->skip_buf0) {
__le32 addr = READ_ONCE(q->desc[idx].buf0);
u32 len = FIELD_GET(MT_DMA_CTL_SD_LEN0, ctrl);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 570c159515a0..8aec7ccf2d79 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -93,8 +93,9 @@ struct mt76_queue_entry {
struct urb *urb;
};
enum mt76_txq_id qid;
- bool schedule;
- bool done;
+ bool skip_buf0:1;
+ bool schedule:1;
+ bool done:1;
};
struct mt76_queue_regs {
@@ -578,6 +579,7 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
#define mt76_poll_msec(dev, ...) __mt76_poll_msec(&((dev)->mt76), __VA_ARGS__)
void mt76_mmio_init(struct mt76_dev *dev, void __iomem *regs);
+void mt76_pci_disable_aspm(struct pci_dev *pdev);
static inline u16 mt76_chip(struct mt76_dev *dev)
{
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
index 73c3104f8858..cf611d1b817c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci.c
@@ -81,6 +81,8 @@ mt76pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
/* RG_SSUSB_CDR_BR_PE1D = 0x3 */
mt76_rmw_field(dev, 0x15c58, 0x3 << 6, 0x3);
+ mt76_pci_disable_aspm(pdev);
+
return 0;
error:
diff --git a/drivers/net/wireless/mediatek/mt76/pci.c b/drivers/net/wireless/mediatek/mt76/pci.c
new file mode 100644
index 000000000000..04c5a692bc85
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/pci.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: ISC
+/*
+ * Copyright (C) 2019 Lorenzo Bianconi <lorenzo@kernel.org>
+ */
+
+#include <linux/pci.h>
+
+void mt76_pci_disable_aspm(struct pci_dev *pdev)
+{
+ struct pci_dev *parent = pdev->bus->self;
+ u16 aspm_conf, parent_aspm_conf = 0;
+
+ pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &aspm_conf);
+ aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
+ if (parent) {
+ pcie_capability_read_word(parent, PCI_EXP_LNKCTL,
+ &parent_aspm_conf);
+ parent_aspm_conf &= PCI_EXP_LNKCTL_ASPMC;
+ }
+
+ if (!aspm_conf && (!parent || !parent_aspm_conf)) {
+ /* aspm already disabled */
+ return;
+ }
+
+ dev_info(&pdev->dev, "disabling ASPM %s %s\n",
+ (aspm_conf & PCI_EXP_LNKCTL_ASPM_L0S) ? "L0s" : "",
+ (aspm_conf & PCI_EXP_LNKCTL_ASPM_L1) ? "L1" : "");
+
+ if (IS_ENABLED(CONFIG_PCIEASPM)) {
+ int err;
+
+ err = pci_disable_link_state(pdev, aspm_conf);
+ if (!err)
+ return;
+ }
+
+ /* both device and parent should have the same ASPM setting.
+ * disable ASPM in downstream component first and then upstream.
+ */
+ pcie_capability_clear_word(pdev, PCI_EXP_LNKCTL, aspm_conf);
+ if (parent)
+ pcie_capability_clear_word(parent, PCI_EXP_LNKCTL,
+ aspm_conf);
+}
+EXPORT_SYMBOL_GPL(mt76_pci_disable_aspm);
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
index 2b216edd0c7d..a90a518b40d3 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
@@ -23,7 +23,6 @@
#include <linux/leds.h>
#include <linux/mutex.h>
#include <linux/etherdevice.h>
-#include <linux/input-polldev.h>
#include <linux/kfifo.h>
#include <linux/hrtimer.h>
#include <linux/average.h>
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
index 4d4e3888ef20..f2395309ec00 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00debug.c
@@ -555,7 +555,7 @@ static ssize_t rt2x00debug_write_restart_hw(struct file *file,
{
struct rt2x00debug_intf *intf = file->private_data;
struct rt2x00_dev *rt2x00dev = intf->rt2x00dev;
- static unsigned long last_reset;
+ static unsigned long last_reset = INITIAL_JIFFIES;
if (!rt2x00_has_cap_restart_hw(rt2x00dev))
return -EOPNOTSUPP;
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
index 6087ec7a90a6..f88d26535978 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
@@ -822,7 +822,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw)
hdr = rtl_get_hdr(skb);
fc = rtl_get_fc(skb);
- if (!stats.crc && !stats.hwerror) {
+ if (!stats.crc && !stats.hwerror && (skb->len > FCS_LEN)) {
memcpy(IEEE80211_SKB_RXCB(skb), &rx_status,
sizeof(rx_status));
@@ -859,6 +859,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw)
_rtl_pci_rx_to_mac80211(hw, skb, rx_status);
}
} else {
+ /* drop packets with errors or those too short */
dev_kfree_skb_any(skb);
}
new_trx_end:
diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c
index 70f04c2f5b17..fff8dda14023 100644
--- a/drivers/net/wireless/realtek/rtlwifi/ps.c
+++ b/drivers/net/wireless/realtek/rtlwifi/ps.c
@@ -754,6 +754,9 @@ static void rtl_p2p_noa_ie(struct ieee80211_hw *hw, void *data,
return;
} else {
noa_num = (noa_len - 2) / 13;
+ if (noa_num > P2P_MAX_NOA_NUM)
+ noa_num = P2P_MAX_NOA_NUM;
+
}
noa_index = ie[3];
if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode ==
@@ -848,6 +851,9 @@ static void rtl_p2p_action_ie(struct ieee80211_hw *hw, void *data,
return;
} else {
noa_num = (noa_len - 2) / 13;
+ if (noa_num > P2P_MAX_NOA_NUM)
+ noa_num = P2P_MAX_NOA_NUM;
+
}
noa_index = ie[3];
if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode ==
diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c
index be92e1220284..7997cc6de334 100644
--- a/drivers/net/wireless/virt_wifi.c
+++ b/drivers/net/wireless/virt_wifi.c
@@ -548,6 +548,7 @@ static int virt_wifi_newlink(struct net *src_net, struct net_device *dev,
priv->is_connected = false;
priv->is_up = false;
INIT_DELAYED_WORK(&priv->connect, virt_wifi_connect_complete);
+ __module_get(THIS_MODULE);
return 0;
unregister_netdev:
@@ -578,6 +579,7 @@ static void virt_wifi_dellink(struct net_device *dev,
netdev_upper_dev_unlink(priv->lowerdev, dev);
unregister_netdevice_queue(dev, head);
+ module_put(THIS_MODULE);
/* Deleting the wiphy is handled in the module destructor. */
}
@@ -590,6 +592,42 @@ static struct rtnl_link_ops virt_wifi_link_ops = {
.priv_size = sizeof(struct virt_wifi_netdev_priv),
};
+static bool netif_is_virt_wifi_dev(const struct net_device *dev)
+{
+ return rcu_access_pointer(dev->rx_handler) == virt_wifi_rx_handler;
+}
+
+static int virt_wifi_event(struct notifier_block *this, unsigned long event,
+ void *ptr)
+{
+ struct net_device *lower_dev = netdev_notifier_info_to_dev(ptr);
+ struct virt_wifi_netdev_priv *priv;
+ struct net_device *upper_dev;
+ LIST_HEAD(list_kill);
+
+ if (!netif_is_virt_wifi_dev(lower_dev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ priv = rtnl_dereference(lower_dev->rx_handler_data);
+ if (!priv)
+ return NOTIFY_DONE;
+
+ upper_dev = priv->upperdev;
+
+ upper_dev->rtnl_link_ops->dellink(upper_dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block virt_wifi_notifier = {
+ .notifier_call = virt_wifi_event,
+};
+
/* Acquires and releases the rtnl lock. */
static int __init virt_wifi_init_module(void)
{
@@ -598,14 +636,25 @@ static int __init virt_wifi_init_module(void)
/* Guaranteed to be locallly-administered and not multicast. */
eth_random_addr(fake_router_bssid);
+ err = register_netdevice_notifier(&virt_wifi_notifier);
+ if (err)
+ return err;
+
+ err = -ENOMEM;
common_wiphy = virt_wifi_make_wiphy();
if (!common_wiphy)
- return -ENOMEM;
+ goto notifier;
err = rtnl_link_register(&virt_wifi_link_ops);
if (err)
- virt_wifi_destroy_wiphy(common_wiphy);
+ goto destroy_wiphy;
+ return 0;
+
+destroy_wiphy:
+ virt_wifi_destroy_wiphy(common_wiphy);
+notifier:
+ unregister_netdevice_notifier(&virt_wifi_notifier);
return err;
}
@@ -615,6 +664,7 @@ static void __exit virt_wifi_cleanup_module(void)
/* Will delete any devices that depend on the wiphy. */
rtnl_link_unregister(&virt_wifi_link_ops);
virt_wifi_destroy_wiphy(common_wiphy);
+ unregister_netdevice_notifier(&virt_wifi_notifier);
}
module_init(virt_wifi_init_module);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 240f762b3749..103ed00775eb 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -719,7 +719,6 @@ err_unmap:
xenvif_unmap_frontend_data_rings(queue);
netif_napi_del(&queue->napi);
err:
- module_put(THIS_MODULE);
return err;
}
diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
index 1cd113c8d7cb..ad0abb1f0bae 100644
--- a/drivers/nfc/fdp/i2c.c
+++ b/drivers/nfc/fdp/i2c.c
@@ -259,7 +259,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev,
*fw_vsc_cfg, len);
if (r) {
- devm_kfree(dev, fw_vsc_cfg);
+ devm_kfree(dev, *fw_vsc_cfg);
goto vsc_read_err;
}
} else {
diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c
index 307bd2afbe05..4d1909aecd6c 100644
--- a/drivers/nfc/nxp-nci/i2c.c
+++ b/drivers/nfc/nxp-nci/i2c.c
@@ -220,8 +220,10 @@ static irqreturn_t nxp_nci_i2c_irq_thread_fn(int irq, void *phy_id)
if (r == -EREMOTEIO) {
phy->hard_fault = r;
- skb = NULL;
- } else if (r < 0) {
+ if (info->mode == NXP_NCI_MODE_FW)
+ nxp_nci_fw_recv_frame(phy->ndev, NULL);
+ }
+ if (r < 0) {
nfc_err(&client->dev, "Read failed with error %d\n", r);
goto exit_irq_handled;
}
diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
index c5289eaf17ee..e897e4d768ef 100644
--- a/drivers/nfc/pn533/usb.c
+++ b/drivers/nfc/pn533/usb.c
@@ -547,18 +547,25 @@ static int pn533_usb_probe(struct usb_interface *interface,
rc = pn533_finalize_setup(priv);
if (rc)
- goto error;
+ goto err_deregister;
usb_set_intfdata(interface, phy);
return 0;
+err_deregister:
+ pn533_unregister_device(phy->priv);
error:
+ usb_kill_urb(phy->in_urb);
+ usb_kill_urb(phy->out_urb);
+ usb_kill_urb(phy->ack_urb);
+
usb_free_urb(phy->in_urb);
usb_free_urb(phy->out_urb);
usb_free_urb(phy->ack_urb);
usb_put_dev(phy->udev);
kfree(in_buf);
+ kfree(phy->ack_buffer);
return rc;
}
diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index 145ddf3f0a45..604dba4f18af 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c
@@ -783,7 +783,7 @@ static int port100_send_frame_async(struct port100 *dev, struct sk_buff *out,
rc = port100_submit_urb_for_ack(dev, GFP_KERNEL);
if (rc)
- usb_unlink_urb(dev->out_urb);
+ usb_kill_urb(dev->out_urb);
exit:
mutex_unlock(&dev->out_urb_lock);
diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c
index f9ac176cf257..2ce17932a073 100644
--- a/drivers/nfc/st21nfca/core.c
+++ b/drivers/nfc/st21nfca/core.c
@@ -708,6 +708,7 @@ static int st21nfca_hci_complete_target_discovered(struct nfc_hci_dev *hdev,
NFC_PROTO_FELICA_MASK;
} else {
kfree_skb(nfcid_skb);
+ nfcid_skb = NULL;
/* P2P in type A */
r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_F_GATE,
ST21NFCA_RF_READER_F_NFCID1,
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index 2b36f052bfb9..c6439638a419 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -23,6 +23,16 @@ config NVME_MULTIPATH
/dev/nvmeXnY device will show up for each NVMe namespaces,
even if it is accessible through multiple controllers.
+config NVME_HWMON
+ bool "NVMe hardware monitoring"
+ depends on (NVME_CORE=y && HWMON=y) || (NVME_CORE=m && HWMON)
+ help
+ This provides support for NVMe hardware monitoring. If enabled,
+ a hardware monitoring device will be created for each NVMe drive
+ in the system.
+
+ If unsure, say N.
+
config NVME_FABRICS
tristate
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index 8a4b671c5f0c..fc7b26be692d 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -14,6 +14,7 @@ nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
nvme-core-$(CONFIG_FAULT_INJECTION_DEBUG_FS) += fault_inject.o
+nvme-core-$(CONFIG_NVME_HWMON) += hwmon.o
nvme-y += pci.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fd7dea36c3b6..8e8527408db3 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -116,10 +116,26 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
/*
* Only new queue scan work when admin and IO queues are both alive
*/
- if (ctrl->state == NVME_CTRL_LIVE)
+ if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
queue_work(nvme_wq, &ctrl->scan_work);
}
+/*
+ * Use this function to proceed with scheduling reset_work for a controller
+ * that had previously been set to the resetting state. This is intended for
+ * code paths that can't be interrupted by other reset attempts. A hot removal
+ * may prevent this from succeeding.
+ */
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
+{
+ if (ctrl->state != NVME_CTRL_RESETTING)
+ return -EBUSY;
+ if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
+ return -EBUSY;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
+
int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -137,8 +153,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
ret = nvme_reset_ctrl(ctrl);
if (!ret) {
flush_work(&ctrl->reset_work);
- if (ctrl->state != NVME_CTRL_LIVE &&
- ctrl->state != NVME_CTRL_ADMIN_ONLY)
+ if (ctrl->state != NVME_CTRL_LIVE)
ret = -ENETRESET;
}
@@ -268,6 +283,8 @@ void nvme_complete_rq(struct request *req)
trace_nvme_complete_rq(req);
+ nvme_cleanup_cmd(req);
+
if (nvme_req(req)->ctrl->kas)
nvme_req(req)->ctrl->comp_seen = true;
@@ -298,7 +315,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
if (blk_mq_request_completed(req))
return true;
- nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR;
+ nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
blk_mq_complete_request(req);
return true;
}
@@ -315,15 +332,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
old_state = ctrl->state;
switch (new_state) {
- case NVME_CTRL_ADMIN_ONLY:
- switch (old_state) {
- case NVME_CTRL_CONNECTING:
- changed = true;
- /* FALLTHRU */
- default:
- break;
- }
- break;
case NVME_CTRL_LIVE:
switch (old_state) {
case NVME_CTRL_NEW:
@@ -339,7 +347,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
changed = true;
/* FALLTHRU */
default:
@@ -359,7 +366,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_DELETING:
switch (old_state) {
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
changed = true;
@@ -381,8 +387,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
break;
}
- if (changed)
+ if (changed) {
ctrl->state = new_state;
+ wake_up_all(&ctrl->state_wq);
+ }
spin_unlock_irqrestore(&ctrl->lock, flags);
if (changed && ctrl->state == NVME_CTRL_LIVE)
@@ -391,6 +399,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
}
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
+/*
+ * Returns true for sink states that can't ever transition back to live.
+ */
+static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
+{
+ switch (ctrl->state) {
+ case NVME_CTRL_NEW:
+ case NVME_CTRL_LIVE:
+ case NVME_CTRL_RESETTING:
+ case NVME_CTRL_CONNECTING:
+ return false;
+ case NVME_CTRL_DELETING:
+ case NVME_CTRL_DEAD:
+ return true;
+ default:
+ WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
+ return true;
+ }
+}
+
+/*
+ * Waits for the controller state to be resetting, or returns false if it is
+ * not possible to ever transition to that state.
+ */
+bool nvme_wait_reset(struct nvme_ctrl *ctrl)
+{
+ wait_event(ctrl->state_wq,
+ nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
+ nvme_state_terminal(ctrl));
+ return ctrl->state == NVME_CTRL_RESETTING;
+}
+EXPORT_SYMBOL_GPL(nvme_wait_reset);
+
static void nvme_free_ns_head(struct kref *ref)
{
struct nvme_ns_head *head =
@@ -572,8 +613,14 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_dsm_range *range;
struct bio *bio;
- range = kmalloc_array(segments, sizeof(*range),
- GFP_ATOMIC | __GFP_NOWARN);
+ /*
+ * Some devices do not consider the DSM 'Number of Ranges' field when
+ * determining how much data to DMA. Always allocate memory for maximum
+ * number of segments to prevent device reading beyond end of buffer.
+ */
+ static const size_t alloc_size = sizeof(*range) * NVME_DSM_MAX_RANGES;
+
+ range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN);
if (!range) {
/*
* If we fail allocation our range, fallback to the controller
@@ -587,7 +634,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
}
__rq_for_each_bio(bio, req) {
- u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
+ u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
if (n < segments) {
@@ -613,7 +660,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
req->special_vec.bv_page = virt_to_page(range);
req->special_vec.bv_offset = offset_in_page(range);
- req->special_vec.bv_len = sizeof(*range) * segments;
+ req->special_vec.bv_len = alloc_size;
req->rq_flags |= RQF_SPECIAL_PAYLOAD;
return BLK_STS_OK;
@@ -628,7 +675,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns,
cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes;
cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id);
cmnd->write_zeroes.slba =
- cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->write_zeroes.length =
cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
cmnd->write_zeroes.control = 0;
@@ -652,7 +699,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
- cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+ cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
@@ -1306,8 +1353,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
if (ns->disk && nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns);
up_read(&ctrl->namespaces_rwsem);
-
- nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
}
static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
@@ -1323,6 +1368,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
mutex_unlock(&ctrl->subsys->lock);
+ nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
mutex_unlock(&ctrl->scan_lock);
}
if (effects & NVME_CMD_EFFECTS_CCC)
@@ -1609,7 +1655,7 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
static void nvme_set_chunk_size(struct nvme_ns *ns)
{
- u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
+ u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob);
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
}
@@ -1646,8 +1692,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
{
- u32 max_sectors;
- unsigned short bs = 1 << ns->lba_shift;
+ u64 max_blocks;
if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
(ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
@@ -1663,11 +1708,12 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
* nvme_init_identify() if available.
*/
if (ns->ctrl->max_hw_sectors == UINT_MAX)
- max_sectors = ((u32)(USHRT_MAX + 1) * bs) >> 9;
+ max_blocks = (u64)USHRT_MAX + 1;
else
- max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9;
+ max_blocks = ns->ctrl->max_hw_sectors + 1;
- blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors);
+ blk_queue_max_write_zeroes_sectors(disk->queue,
+ nvme_lba_to_sect(ns, max_blocks));
}
static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
@@ -1710,7 +1756,7 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
static void nvme_update_disk_info(struct gendisk *disk,
struct nvme_ns *ns, struct nvme_id_ns *id)
{
- sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
+ sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
unsigned short bs = 1 << ns->lba_shift;
u32 atomic_bs, phys_bs, io_opt;
@@ -2758,6 +2804,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->oncs = le16_to_cpu(id->oncs);
ctrl->mtfa = le16_to_cpu(id->mtfa);
ctrl->oaes = le32_to_cpu(id->oaes);
+ ctrl->wctemp = le16_to_cpu(id->wctemp);
+ ctrl->cctemp = le16_to_cpu(id->cctemp);
+
atomic_set(&ctrl->abort_limit, id->acl + 1);
ctrl->vwc = id->vwc;
if (id->mdts)
@@ -2857,6 +2906,9 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
if (ret < 0)
return ret;
+ if (!ctrl->identified)
+ nvme_hwmon_init(ctrl);
+
ctrl->identified = true;
return 0;
@@ -2874,7 +2926,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
switch (ctrl->state) {
case NVME_CTRL_LIVE:
- case NVME_CTRL_ADMIN_ONLY:
break;
default:
return -EWOULDBLOCK;
@@ -3168,7 +3219,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
- [NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_CONNECTING] = "connecting",
[NVME_CTRL_DELETING] = "deleting",
@@ -3679,11 +3729,10 @@ static void nvme_scan_work(struct work_struct *work)
struct nvme_id_ctrl *id;
unsigned nn;
- if (ctrl->state != NVME_CTRL_LIVE)
+ /* No tagset on a live ctrl means IO queues could not created */
+ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
return;
- WARN_ON_ONCE(!ctrl->tagset);
-
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
dev_info(ctrl->device, "rescanning namespaces.\n");
nvme_clear_changed_ns_log(ctrl);
@@ -3844,13 +3893,13 @@ static void nvme_fw_act_work(struct work_struct *work)
if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device,
"Fw activation timeout, reset controller\n");
- nvme_reset_ctrl(ctrl);
- break;
+ nvme_try_sched_reset(ctrl);
+ return;
}
msleep(100);
}
- if (ctrl->state != NVME_CTRL_LIVE)
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return;
nvme_start_queues(ctrl);
@@ -3870,7 +3919,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
- queue_work(nvme_wq, &ctrl->fw_act_work);
+ /*
+ * We are (ab)using the RESETTING state to prevent subsequent
+ * recovery actions from interfering with the controller's
+ * firmware activation.
+ */
+ if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
+ queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
@@ -3993,6 +4048,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
+ init_waitqueue_head(&ctrl->state_wq);
INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 93f08d77c896..a0ec40ab62ee 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -182,8 +182,7 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
- if (likely(ctrl->state == NVME_CTRL_LIVE ||
- ctrl->state == NVME_CTRL_ADMIN_ONLY))
+ if (likely(ctrl->state == NVME_CTRL_LIVE))
return true;
return __nvmf_check_ready(ctrl, rq, queue_live);
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 265f89e11d8b..679a721ae229 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1224,7 +1224,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
lsreq->rqstlen = sizeof(*assoc_rqst);
lsreq->rspaddr = assoc_acc;
lsreq->rsplen = sizeof(*assoc_acc);
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
if (ret)
@@ -1264,7 +1264,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
if (fcret) {
ret = -EBADF;
dev_err(ctrl->dev,
- "q %d connect failed: %s\n",
+ "q %d Create Association LS failed: %s\n",
queue->qnum, validation_errors[fcret]);
} else {
ctrl->association_id =
@@ -1332,7 +1332,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
lsreq->rqstlen = sizeof(*conn_rqst);
lsreq->rspaddr = conn_acc;
lsreq->rsplen = sizeof(*conn_acc);
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
ret = nvme_fc_send_ls_req(ctrl->rport, lsop);
if (ret)
@@ -1363,7 +1363,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
if (fcret) {
ret = -EBADF;
dev_err(ctrl->dev,
- "q %d connect failed: %s\n",
+ "q %d Create I/O Connection LS failed: %s\n",
queue->qnum, validation_errors[fcret]);
} else {
queue->connection_id =
@@ -1376,7 +1376,7 @@ out_free_buffer:
out_no_memory:
if (ret)
dev_err(ctrl->dev,
- "queue %d connect command failed (%d).\n",
+ "queue %d connect I/O queue failed (%d).\n",
queue->qnum, ret);
return ret;
}
@@ -1413,8 +1413,8 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
static void
nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
{
- struct fcnvme_ls_disconnect_rqst *discon_rqst;
- struct fcnvme_ls_disconnect_acc *discon_acc;
+ struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
+ struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
struct nvmefc_ls_req_op *lsop;
struct nvmefc_ls_req *lsreq;
int ret;
@@ -1430,11 +1430,11 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
lsreq = &lsop->ls_req;
lsreq->private = (void *)&lsop[1];
- discon_rqst = (struct fcnvme_ls_disconnect_rqst *)
+ discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)
(lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
- discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1];
+ discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
- discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT;
+ discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC;
discon_rqst->desc_list_len = cpu_to_be32(
sizeof(struct fcnvme_lsdesc_assoc_id) +
sizeof(struct fcnvme_lsdesc_disconn_cmd));
@@ -1451,22 +1451,17 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
discon_rqst->discon_cmd.desc_len =
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_disconn_cmd));
- discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION;
- discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id);
lsreq->rqstaddr = discon_rqst;
lsreq->rqstlen = sizeof(*discon_rqst);
lsreq->rspaddr = discon_acc;
lsreq->rsplen = sizeof(*discon_acc);
- lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC;
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
nvme_fc_disconnect_assoc_done);
if (ret)
kfree(lsop);
-
- /* only meaningful part to terminating the association */
- ctrl->association_id = 0;
}
@@ -1662,7 +1657,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
(freq->rcv_rsplen / 4) ||
be32_to_cpu(op->rsp_iu.xfrd_len) !=
freq->transferred_length ||
- op->rsp_iu.status_code ||
+ op->rsp_iu.ersp_result ||
sqe->common.command_id != cqe->command_id)) {
status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
dev_info(ctrl->ctrl.device,
@@ -1672,7 +1667,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len),
be32_to_cpu(op->rsp_iu.xfrd_len),
freq->transferred_length,
- op->rsp_iu.status_code,
+ op->rsp_iu.ersp_result,
sqe->common.command_id,
cqe->command_id);
goto done;
@@ -1731,9 +1726,14 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
op->rq = rq;
op->rqno = rqno;
- cmdiu->scsi_id = NVME_CMD_SCSI_ID;
+ cmdiu->format_id = NVME_CMD_FORMAT_ID;
cmdiu->fc_id = NVME_CMD_FC_ID;
cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
+ if (queue->qnum)
+ cmdiu->rsv_cat = fccmnd_set_cat_css(0,
+ (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT));
+ else
+ cmdiu->rsv_cat = fccmnd_set_cat_admin(0);
op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev,
&op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE);
@@ -2173,8 +2173,6 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents,
rq_dma_dir(rq));
- nvme_cleanup_cmd(rq);
-
sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
freq->sg_cnt = 0;
@@ -2305,6 +2303,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
if (!(op->flags & FCOP_FLAGS_AEN))
nvme_fc_unmap_data(ctrl, op->rq, op);
+ nvme_cleanup_cmd(op->rq);
nvme_fc_ctrl_put(ctrl);
if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
@@ -2695,7 +2694,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
/* warn if maxcmd is lower than queue_size */
dev_warn(ctrl->ctrl.device,
"queue_size %zu > ctrl maxcmd %u, reducing "
- "to queue_size\n",
+ "to maxcmd\n",
opts->queue_size, ctrl->ctrl.maxcmd);
opts->queue_size = ctrl->ctrl.maxcmd;
}
@@ -2703,7 +2702,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
/* warn if sqsize is lower than queue_size */
dev_warn(ctrl->ctrl.device,
- "queue_size %zu > ctrl sqsize %u, clamping down\n",
+ "queue_size %zu > ctrl sqsize %u, reducing "
+ "to sqsize\n",
opts->queue_size, ctrl->ctrl.sqsize + 1);
opts->queue_size = ctrl->ctrl.sqsize + 1;
}
@@ -2739,6 +2739,7 @@ out_term_aen_ops:
out_disconnect_admin_queue:
/* send a Disconnect(association) LS to fc-nvme target */
nvme_fc_xmt_disconnect_assoc(ctrl);
+ ctrl->association_id = 0;
out_delete_hw_queue:
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
out_free_queue:
@@ -2830,6 +2831,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
if (ctrl->association_id)
nvme_fc_xmt_disconnect_assoc(ctrl);
+ ctrl->association_id = 0;
+
if (ctrl->ctrl.tagset) {
nvme_fc_delete_hw_io_queues(ctrl);
nvme_fc_free_io_queues(ctrl);
diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
new file mode 100644
index 000000000000..a5af21f5d370
--- /dev/null
+++ b/drivers/nvme/host/hwmon.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NVM Express hardware monitoring support
+ * Copyright (c) 2019, Guenter Roeck
+ */
+
+#include <linux/hwmon.h>
+#include <asm/unaligned.h>
+
+#include "nvme.h"
+
+/* These macros should be moved to linux/temperature.h */
+#define MILLICELSIUS_TO_KELVIN(t) DIV_ROUND_CLOSEST((t) + 273150, 1000)
+#define KELVIN_TO_MILLICELSIUS(t) ((t) * 1000L - 273150)
+
+struct nvme_hwmon_data {
+ struct nvme_ctrl *ctrl;
+ struct nvme_smart_log log;
+ struct mutex read_lock;
+};
+
+static int nvme_get_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
+ long *temp)
+{
+ unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT;
+ u32 status;
+ int ret;
+
+ if (under)
+ threshold |= NVME_TEMP_THRESH_TYPE_UNDER;
+
+ ret = nvme_get_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0,
+ &status);
+ if (ret > 0)
+ return -EIO;
+ if (ret < 0)
+ return ret;
+ *temp = KELVIN_TO_MILLICELSIUS(status & NVME_TEMP_THRESH_MASK);
+
+ return 0;
+}
+
+static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
+ long temp)
+{
+ unsigned int threshold = sensor << NVME_TEMP_THRESH_SELECT_SHIFT;
+ int ret;
+
+ temp = MILLICELSIUS_TO_KELVIN(temp);
+ threshold |= clamp_val(temp, 0, NVME_TEMP_THRESH_MASK);
+
+ if (under)
+ threshold |= NVME_TEMP_THRESH_TYPE_UNDER;
+
+ ret = nvme_set_features(ctrl, NVME_FEAT_TEMP_THRESH, threshold, NULL, 0,
+ NULL);
+ if (ret > 0)
+ return -EIO;
+
+ return ret;
+}
+
+static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
+{
+ int ret;
+
+ ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
+ &data->log, sizeof(data->log), 0);
+
+ return ret <= 0 ? ret : -EIO;
+}
+
+static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct nvme_hwmon_data *data = dev_get_drvdata(dev);
+ struct nvme_smart_log *log = &data->log;
+ int temp;
+ int err;
+
+ /*
+ * First handle attributes which don't require us to read
+ * the smart log.
+ */
+ switch (attr) {
+ case hwmon_temp_max:
+ return nvme_get_temp_thresh(data->ctrl, channel, false, val);
+ case hwmon_temp_min:
+ return nvme_get_temp_thresh(data->ctrl, channel, true, val);
+ case hwmon_temp_crit:
+ *val = KELVIN_TO_MILLICELSIUS(data->ctrl->cctemp);
+ return 0;
+ default:
+ break;
+ }
+
+ mutex_lock(&data->read_lock);
+ err = nvme_hwmon_get_smart_log(data);
+ if (err)
+ goto unlock;
+
+ switch (attr) {
+ case hwmon_temp_input:
+ if (!channel)
+ temp = get_unaligned_le16(log->temperature);
+ else
+ temp = le16_to_cpu(log->temp_sensor[channel - 1]);
+ *val = KELVIN_TO_MILLICELSIUS(temp);
+ break;
+ case hwmon_temp_alarm:
+ *val = !!(log->critical_warning & NVME_SMART_CRIT_TEMPERATURE);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+unlock:
+ mutex_unlock(&data->read_lock);
+ return err;
+}
+
+static int nvme_hwmon_write(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long val)
+{
+ struct nvme_hwmon_data *data = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_temp_max:
+ return nvme_set_temp_thresh(data->ctrl, channel, false, val);
+ case hwmon_temp_min:
+ return nvme_set_temp_thresh(data->ctrl, channel, true, val);
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const char * const nvme_hwmon_sensor_names[] = {
+ "Composite",
+ "Sensor 1",
+ "Sensor 2",
+ "Sensor 3",
+ "Sensor 4",
+ "Sensor 5",
+ "Sensor 6",
+ "Sensor 7",
+ "Sensor 8",
+};
+
+static int nvme_hwmon_read_string(struct device *dev,
+ enum hwmon_sensor_types type, u32 attr,
+ int channel, const char **str)
+{
+ *str = nvme_hwmon_sensor_names[channel];
+ return 0;
+}
+
+static umode_t nvme_hwmon_is_visible(const void *_data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ const struct nvme_hwmon_data *data = _data;
+
+ switch (attr) {
+ case hwmon_temp_crit:
+ if (!channel && data->ctrl->cctemp)
+ return 0444;
+ break;
+ case hwmon_temp_max:
+ case hwmon_temp_min:
+ if ((!channel && data->ctrl->wctemp) ||
+ (channel && data->log.temp_sensor[channel - 1])) {
+ if (data->ctrl->quirks &
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
+ return 0444;
+ return 0644;
+ }
+ break;
+ case hwmon_temp_alarm:
+ if (!channel)
+ return 0444;
+ break;
+ case hwmon_temp_input:
+ case hwmon_temp_label:
+ if (!channel || data->log.temp_sensor[channel - 1])
+ return 0444;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static const struct hwmon_channel_info *nvme_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(chip, HWMON_C_REGISTER_TZ),
+ HWMON_CHANNEL_INFO(temp,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_CRIT | HWMON_T_LABEL | HWMON_T_ALARM,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL,
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN |
+ HWMON_T_LABEL),
+ NULL
+};
+
+static const struct hwmon_ops nvme_hwmon_ops = {
+ .is_visible = nvme_hwmon_is_visible,
+ .read = nvme_hwmon_read,
+ .read_string = nvme_hwmon_read_string,
+ .write = nvme_hwmon_write,
+};
+
+static const struct hwmon_chip_info nvme_hwmon_chip_info = {
+ .ops = &nvme_hwmon_ops,
+ .info = nvme_hwmon_info,
+};
+
+void nvme_hwmon_init(struct nvme_ctrl *ctrl)
+{
+ struct device *dev = ctrl->dev;
+ struct nvme_hwmon_data *data;
+ struct device *hwmon;
+ int err;
+
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return;
+
+ data->ctrl = ctrl;
+ mutex_init(&data->read_lock);
+
+ err = nvme_hwmon_get_smart_log(data);
+ if (err) {
+ dev_warn(dev, "Failed to read smart log (error %d)\n", err);
+ devm_kfree(dev, data);
+ return;
+ }
+
+ hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data,
+ &nvme_hwmon_chip_info,
+ NULL);
+ if (IS_ERR(hwmon)) {
+ dev_warn(dev, "Failed to instantiate hwmon device\n");
+ devm_kfree(dev, data);
+ }
+}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 30de7efef003..797c18337d96 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -95,6 +95,7 @@ void nvme_failover_req(struct request *req)
}
break;
case NVME_SC_HOST_PATH_ERROR:
+ case NVME_SC_HOST_ABORTED_CMD:
/*
* Temporary transport disruption in talking to the controller.
* Try to send on a new path.
@@ -158,9 +159,11 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
struct nvme_ns *ns;
mutex_lock(&ctrl->scan_lock);
+ down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list)
if (nvme_mpath_clear_current_path(ns))
kblockd_schedule_work(&ns->head->requeue_work);
+ up_read(&ctrl->namespaces_rwsem);
mutex_unlock(&ctrl->scan_lock);
}
@@ -444,8 +447,14 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
struct nvme_ana_group_desc *desc = base + offset;
- u32 nr_nsids = le32_to_cpu(desc->nnsids);
- size_t nsid_buf_size = nr_nsids * sizeof(__le32);
+ u32 nr_nsids;
+ size_t nsid_buf_size;
+
+ if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
+ return -EINVAL;
+
+ nr_nsids = le32_to_cpu(desc->nnsids);
+ nsid_buf_size = nr_nsids * sizeof(__le32);
if (WARN_ON_ONCE(desc->grpid == 0))
return -EINVAL;
@@ -465,8 +474,6 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
return error;
offset += nsid_buf_size;
- if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
- return -EINVAL;
}
return 0;
@@ -522,14 +529,13 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
return 0;
}
-static int nvme_read_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
+static int nvme_read_ana_log(struct nvme_ctrl *ctrl)
{
u32 nr_change_groups = 0;
int error;
mutex_lock(&ctrl->ana_lock);
- error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA,
- groups_only ? NVME_ANA_LOG_RGO : 0,
+ error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0,
ctrl->ana_log_buf, ctrl->ana_log_size, 0);
if (error) {
dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
@@ -565,7 +571,7 @@ static void nvme_ana_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
- nvme_read_ana_log(ctrl, false);
+ nvme_read_ana_log(ctrl);
}
static void nvme_anatt_timeout(struct timer_list *t)
@@ -715,7 +721,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
goto out;
}
- error = nvme_read_ana_log(ctrl, true);
+ error = nvme_read_ana_log(ctrl);
if (error)
goto out_free_ana_log_buf;
return 0;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 38a83ef5bcd3..3b9cbe0668fa 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -15,6 +15,7 @@
#include <linux/sed-opal.h>
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
+#include <linux/wait.h>
#include <trace/events/block.h>
@@ -114,6 +115,11 @@ enum nvme_quirks {
* Prevent tag overlap between queues
*/
NVME_QUIRK_SHARED_TAGS = (1 << 13),
+
+ /*
+ * Don't change the value of the temperature threshold feature
+ */
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE = (1 << 14),
};
/*
@@ -161,7 +167,6 @@ static inline u16 nvme_req_qid(struct request *req)
enum nvme_ctrl_state {
NVME_CTRL_NEW,
NVME_CTRL_LIVE,
- NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */
NVME_CTRL_RESETTING,
NVME_CTRL_CONNECTING,
NVME_CTRL_DELETING,
@@ -199,6 +204,7 @@ struct nvme_ctrl {
struct cdev cdev;
struct work_struct reset_work;
struct work_struct delete_work;
+ wait_queue_head_t state_wq;
struct nvme_subsystem *subsys;
struct list_head subsys_entry;
@@ -230,6 +236,8 @@ struct nvme_ctrl {
u16 kas;
u8 npss;
u8 apsta;
+ u16 wctemp;
+ u16 cctemp;
u32 oaes;
u32 aen_result;
u32 ctratt;
@@ -418,9 +426,20 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
}
-static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
+/*
+ * Convert a 512B sector number to a device logical block number.
+ */
+static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector)
+{
+ return sector >> (ns->lba_shift - SECTOR_SHIFT);
+}
+
+/*
+ * Convert a device logical block number to a 512B sector number.
+ */
+static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba)
{
- return (sector >> (ns->lba_shift - 9));
+ return lba << (ns->lba_shift - SECTOR_SHIFT);
}
static inline void nvme_end_request(struct request *req, __le16 status,
@@ -445,10 +464,16 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl)
put_device(ctrl->device);
}
+static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
+{
+ return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH;
+}
+
void nvme_complete_rq(struct request *req);
bool nvme_cancel_request(struct request *req, void *data, bool reserved);
bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
enum nvme_ctrl_state new_state);
+bool nvme_wait_reset(struct nvme_ctrl *ctrl);
int nvme_disable_ctrl(struct nvme_ctrl *ctrl);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl);
@@ -499,6 +524,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
+int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
@@ -649,4 +675,10 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
return dev_to_disk(dev)->private_data;
}
+#ifdef CONFIG_NVME_HWMON
+void nvme_hwmon_init(struct nvme_ctrl *ctrl);
+#else
+static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { }
+#endif
+
#endif /* _NVME_H */
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index bb88681f4dc3..dcaad5831cee 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -773,7 +773,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
struct bio_vec *bv)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset;
+ unsigned int offset = bv->bv_offset & (dev->ctrl.page_size - 1);
+ unsigned int first_prp_len = dev->ctrl.page_size - offset;
iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->first_dma))
@@ -924,7 +925,6 @@ static void nvme_pci_complete_rq(struct request *req)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
struct nvme_dev *dev = iod->nvmeq->dev;
- nvme_cleanup_cmd(req);
if (blk_integrity_rq(req))
dma_unmap_page(dev->dev, iod->meta_dma,
rq_integrity_vec(req)->bv_len, rq_data_dir(req));
@@ -967,8 +967,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvmeq->qid == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
+ if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) {
nvme_complete_async_event(&nvmeq->dev->ctrl,
cqe->status, &cqe->result);
return;
@@ -2263,10 +2262,7 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
return true;
}
-/*
- * return error value only when tagset allocation failed
- */
-static int nvme_dev_add(struct nvme_dev *dev)
+static void nvme_dev_add(struct nvme_dev *dev)
{
int ret;
@@ -2296,7 +2292,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
if (ret) {
dev_warn(dev->ctrl.device,
"IO queues tagset allocation failed %d\n", ret);
- return ret;
+ return;
}
dev->ctrl.tagset = &dev->tagset;
} else {
@@ -2307,7 +2303,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
}
nvme_dbbuf_set(dev);
- return 0;
}
static int nvme_pci_enable(struct nvme_dev *dev)
@@ -2467,6 +2462,14 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
mutex_unlock(&dev->shutdown_lock);
}
+static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
+{
+ if (!nvme_wait_reset(&dev->ctrl))
+ return -EBUSY;
+ nvme_dev_disable(dev, shutdown);
+ return 0;
+}
+
static int nvme_setup_prp_pools(struct nvme_dev *dev)
{
dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
@@ -2490,14 +2493,20 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
dma_pool_destroy(dev->prp_small_pool);
}
+static void nvme_free_tagset(struct nvme_dev *dev)
+{
+ if (dev->tagset.tags)
+ blk_mq_free_tag_set(&dev->tagset);
+ dev->ctrl.tagset = NULL;
+}
+
static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
{
struct nvme_dev *dev = to_nvme_dev(ctrl);
nvme_dbbuf_dma_free(dev);
put_device(dev->dev);
- if (dev->tagset.tags)
- blk_mq_free_tag_set(&dev->tagset);
+ nvme_free_tagset(dev);
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
@@ -2508,6 +2517,11 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
{
+ /*
+ * Set state to deleting now to avoid blocking nvme_wait_reset(), which
+ * may be holding this pci_dev's device lock.
+ */
+ nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
nvme_get_ctrl(&dev->ctrl);
nvme_dev_disable(dev, false);
nvme_kill_queues(&dev->ctrl);
@@ -2521,7 +2535,6 @@ static void nvme_reset_work(struct work_struct *work)
container_of(work, struct nvme_dev, ctrl.reset_work);
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result;
- enum nvme_ctrl_state new_state = NVME_CTRL_LIVE;
if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) {
result = -ENODEV;
@@ -2615,13 +2628,11 @@ static void nvme_reset_work(struct work_struct *work)
dev_warn(dev->ctrl.device, "IO queues not created\n");
nvme_kill_queues(&dev->ctrl);
nvme_remove_namespaces(&dev->ctrl);
- new_state = NVME_CTRL_ADMIN_ONLY;
+ nvme_free_tagset(dev);
} else {
nvme_start_queues(&dev->ctrl);
nvme_wait_freeze(&dev->ctrl);
- /* hit this only when allocate tagset fails */
- if (nvme_dev_add(dev))
- new_state = NVME_CTRL_ADMIN_ONLY;
+ nvme_dev_add(dev);
nvme_unfreeze(&dev->ctrl);
}
@@ -2629,9 +2640,9 @@ static void nvme_reset_work(struct work_struct *work)
* If only admin queue live, keep it to do further investigation or
* recovery.
*/
- if (!nvme_change_ctrl_state(&dev->ctrl, new_state)) {
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
dev_warn(dev->ctrl.device,
- "failed to mark controller state %d\n", new_state);
+ "failed to mark controller live state\n");
result = -ENODEV;
goto out;
}
@@ -2672,7 +2683,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
{
- *val = readq(to_nvme_dev(ctrl)->bar + off);
+ *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off);
return 0;
}
@@ -2836,19 +2847,28 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
static void nvme_reset_prepare(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, false);
+
+ /*
+ * We don't need to check the return value from waiting for the reset
+ * state as pci_dev device lock is held, making it impossible to race
+ * with ->remove().
+ */
+ nvme_disable_prepare_reset(dev, false);
+ nvme_sync_queues(&dev->ctrl);
}
static void nvme_reset_done(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_reset_ctrl_sync(&dev->ctrl);
+
+ if (!nvme_try_sched_reset(&dev->ctrl))
+ flush_work(&dev->ctrl.reset_work);
}
static void nvme_shutdown(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
- nvme_dev_disable(dev, true);
+ nvme_disable_prepare_reset(dev, true);
}
/*
@@ -2901,7 +2921,7 @@ static int nvme_resume(struct device *dev)
if (ndev->last_ps == U32_MAX ||
nvme_set_power_state(ctrl, ndev->last_ps) != 0)
- nvme_reset_ctrl(ctrl);
+ return nvme_try_sched_reset(&ndev->ctrl);
return 0;
}
@@ -2929,17 +2949,14 @@ static int nvme_suspend(struct device *dev)
*/
if (pm_suspend_via_firmware() || !ctrl->npss ||
!pcie_aspm_enabled(pdev) ||
- (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) {
- nvme_dev_disable(ndev, true);
- return 0;
- }
+ (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
+ return nvme_disable_prepare_reset(ndev, true);
nvme_start_freeze(ctrl);
nvme_wait_freeze(ctrl);
nvme_sync_queues(ctrl);
- if (ctrl->state != NVME_CTRL_LIVE &&
- ctrl->state != NVME_CTRL_ADMIN_ONLY)
+ if (ctrl->state != NVME_CTRL_LIVE)
goto unfreeze;
ret = nvme_get_power_state(ctrl, &ndev->last_ps);
@@ -2963,11 +2980,10 @@ static int nvme_suspend(struct device *dev)
/*
* Clearing npss forces a controller reset on resume. The
- * correct value will be resdicovered then.
+ * correct value will be rediscovered then.
*/
- nvme_dev_disable(ndev, true);
+ ret = nvme_disable_prepare_reset(ndev, true);
ctrl->npss = 0;
- ret = 0;
}
unfreeze:
nvme_unfreeze(ctrl);
@@ -2977,9 +2993,7 @@ unfreeze:
static int nvme_simple_suspend(struct device *dev)
{
struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
-
- nvme_dev_disable(ndev, true);
- return 0;
+ return nvme_disable_prepare_reset(ndev, true);
}
static int nvme_simple_resume(struct device *dev)
@@ -2987,8 +3001,7 @@ static int nvme_simple_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- nvme_reset_ctrl(&ndev->ctrl);
- return 0;
+ return nvme_try_sched_reset(&ndev->ctrl);
}
static const struct dev_pm_ops nvme_dev_pm_ops = {
@@ -3067,7 +3080,8 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_DEALLOCATE_ZEROES, },
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
- NVME_QUIRK_MEDIUM_PRIO_SQ },
+ NVME_QUIRK_MEDIUM_PRIO_SQ |
+ NVME_QUIRK_NO_TEMP_THRESH_CHANGE },
{ PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 4d280160dd3f..dce59459ed41 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1160,8 +1160,6 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
}
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
-
- nvme_cleanup_cmd(rq);
sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
}
@@ -1501,8 +1499,8 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvme_rdma_queue_idx(queue) == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH))
+ if (unlikely(nvme_is_aen_req(nvme_rdma_queue_idx(queue),
+ cqe->command_id)))
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result);
else
@@ -1701,6 +1699,14 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
rq->tag, nvme_rdma_queue_idx(queue));
+ /*
+ * Restart the timer if a controller reset is already scheduled. Any
+ * timed out commands would be handled before entering the connecting
+ * state.
+ */
+ if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
+ return BLK_EH_RESET_TIMER;
+
if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
/*
* Teardown immediately if controller times out while starting
@@ -1760,7 +1766,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
if (unlikely(err < 0)) {
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", err);
- nvme_cleanup_cmd(rq);
goto err;
}
@@ -1771,18 +1776,19 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr ? &req->reg_wr.wr : NULL);
- if (unlikely(err)) {
- nvme_rdma_unmap_data(queue, rq);
- goto err;
- }
+ if (unlikely(err))
+ goto err_unmap;
return BLK_STS_OK;
+err_unmap:
+ nvme_rdma_unmap_data(queue, rq);
err:
if (err == -ENOMEM || err == -EAGAIN)
ret = BLK_STS_RESOURCE;
else
ret = BLK_STS_IOERR;
+ nvme_cleanup_cmd(rq);
unmap_qe:
ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
@@ -2125,8 +2131,16 @@ err_unreg_client:
static void __exit nvme_rdma_cleanup_module(void)
{
+ struct nvme_rdma_ctrl *ctrl;
+
nvmf_unregister_transport(&nvme_rdma_transport);
ib_unregister_client(&nvme_rdma_ib_client);
+
+ mutex_lock(&nvme_rdma_ctrl_mutex);
+ list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
+ nvme_delete_ctrl(&ctrl->ctrl);
+ mutex_unlock(&nvme_rdma_ctrl_mutex);
+ flush_workqueue(nvme_delete_wq);
}
module_init(nvme_rdma_init_module);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 385a5212c10f..6d43b23a0fc8 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -491,8 +491,8 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvme_tcp_queue_id(queue) == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH))
+ if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue),
+ cqe->command_id)))
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result);
else
@@ -1386,7 +1386,9 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
queue->sock->sk->sk_state_change = nvme_tcp_state_change;
queue->sock->sk->sk_write_space = nvme_tcp_write_space;
+#ifdef CONFIG_NET_RX_BUSY_POLL
queue->sock->sk->sk_ll_usec = 1;
+#endif
write_unlock_bh(&queue->sock->sk->sk_callback_lock);
return 0;
@@ -2044,6 +2046,14 @@ nvme_tcp_timeout(struct request *rq, bool reserved)
struct nvme_tcp_ctrl *ctrl = req->queue->ctrl;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+ /*
+ * Restart the timer if a controller reset is already scheduled. Any
+ * timed out commands would be handled before entering the connecting
+ * state.
+ */
+ if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
+ return BLK_EH_RESET_TIMER;
+
dev_warn(ctrl->ctrl.device,
"queue %d: timeout request %#x type %d\n",
nvme_tcp_queue_id(req->queue), rq->tag, pdu->hdr.type);
@@ -2126,6 +2136,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
ret = nvme_tcp_map_data(queue, rq);
if (unlikely(ret)) {
+ nvme_cleanup_cmd(rq);
dev_err(queue->ctrl->ctrl.device,
"Failed to map data (%d)\n", ret);
return ret;
@@ -2208,7 +2219,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
struct nvme_tcp_queue *queue = hctx->driver_data;
struct sock *sk = queue->sock->sk;
- if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue))
+ if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
sk_busy_loop(sk, true);
nvme_tcp_try_recv(queue);
return queue->nr_cqe;
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 831a062d27cb..56c21b501185 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -31,7 +31,7 @@ u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
{
- nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
+ nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->transfer_len));
}
static void nvmet_execute_get_log_page_error(struct nvmet_req *req)
@@ -134,7 +134,7 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
u16 status = NVME_SC_INTERNAL;
unsigned long flags;
- if (req->data_len != sizeof(*log))
+ if (req->transfer_len != sizeof(*log))
goto out;
log = kzalloc(sizeof(*log), GFP_KERNEL);
@@ -196,7 +196,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
u16 status = NVME_SC_INTERNAL;
size_t len;
- if (req->data_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
+ if (req->transfer_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32))
goto out;
mutex_lock(&ctrl->lock);
@@ -206,7 +206,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
len = ctrl->nr_changed_ns * sizeof(__le32);
status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len);
if (!status)
- status = nvmet_zero_sgl(req, len, req->data_len - len);
+ status = nvmet_zero_sgl(req, len, req->transfer_len - len);
ctrl->nr_changed_ns = 0;
nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR);
mutex_unlock(&ctrl->lock);
@@ -282,6 +282,36 @@ out:
nvmet_req_complete(req, status);
}
+static void nvmet_execute_get_log_page(struct nvmet_req *req)
+{
+ if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd)))
+ return;
+
+ switch (req->cmd->get_log_page.lid) {
+ case NVME_LOG_ERROR:
+ return nvmet_execute_get_log_page_error(req);
+ case NVME_LOG_SMART:
+ return nvmet_execute_get_log_page_smart(req);
+ case NVME_LOG_FW_SLOT:
+ /*
+ * We only support a single firmware slot which always is
+ * active, so we can zero out the whole firmware slot log and
+ * still claim to fully implement this mandatory log page.
+ */
+ return nvmet_execute_get_log_page_noop(req);
+ case NVME_LOG_CHANGED_NS:
+ return nvmet_execute_get_log_changed_ns(req);
+ case NVME_LOG_CMD_EFFECTS:
+ return nvmet_execute_get_log_cmd_effects_ns(req);
+ case NVME_LOG_ANA:
+ return nvmet_execute_get_log_page_ana(req);
+ }
+ pr_err("unhandled lid %d on qid %d\n",
+ req->cmd->get_log_page.lid, req->sq->qid);
+ req->error_loc = offsetof(struct nvme_get_log_page_command, lid);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+}
+
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -565,6 +595,28 @@ out:
nvmet_req_complete(req, status);
}
+static void nvmet_execute_identify(struct nvmet_req *req)
+{
+ if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+ return;
+
+ switch (req->cmd->identify.cns) {
+ case NVME_ID_CNS_NS:
+ return nvmet_execute_identify_ns(req);
+ case NVME_ID_CNS_CTRL:
+ return nvmet_execute_identify_ctrl(req);
+ case NVME_ID_CNS_NS_ACTIVE_LIST:
+ return nvmet_execute_identify_nslist(req);
+ case NVME_ID_CNS_NS_DESC_LIST:
+ return nvmet_execute_identify_desclist(req);
+ }
+
+ pr_err("unhandled identify cns %d on qid %d\n",
+ req->cmd->identify.cns, req->sq->qid);
+ req->error_loc = offsetof(struct nvme_identify, cns);
+ nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+}
+
/*
* A "minimum viable" abort implementation: the command is mandatory in the
* spec, but we are not required to do any useful work. We couldn't really
@@ -574,6 +626,8 @@ out:
*/
static void nvmet_execute_abort(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len(req, 0))
+ return;
nvmet_set_result(req, 1);
nvmet_req_complete(req, 0);
}
@@ -658,6 +712,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
switch (cdw10 & 0xff) {
case NVME_FEAT_NUM_QUEUES:
nvmet_set_result(req,
@@ -721,6 +778,9 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
switch (cdw10 & 0xff) {
/*
* These features are mandatory in the spec, but we don't
@@ -785,6 +845,9 @@ void nvmet_execute_async_event(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
mutex_lock(&ctrl->lock);
if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) {
mutex_unlock(&ctrl->lock);
@@ -801,6 +864,9 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
pr_debug("ctrl %d update keep-alive timer for %d secs\n",
ctrl->cntlid, ctrl->kato);
@@ -813,77 +879,36 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
struct nvme_command *cmd = req->cmd;
u16 ret;
+ if (nvme_is_fabrics(cmd))
+ return nvmet_parse_fabrics_cmd(req);
+ if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
+ return nvmet_parse_discovery_cmd(req);
+
ret = nvmet_check_ctrl_status(req, cmd);
if (unlikely(ret))
return ret;
switch (cmd->common.opcode) {
case nvme_admin_get_log_page:
- req->data_len = nvmet_get_log_page_len(cmd);
-
- switch (cmd->get_log_page.lid) {
- case NVME_LOG_ERROR:
- req->execute = nvmet_execute_get_log_page_error;
- return 0;
- case NVME_LOG_SMART:
- req->execute = nvmet_execute_get_log_page_smart;
- return 0;
- case NVME_LOG_FW_SLOT:
- /*
- * We only support a single firmware slot which always
- * is active, so we can zero out the whole firmware slot
- * log and still claim to fully implement this mandatory
- * log page.
- */
- req->execute = nvmet_execute_get_log_page_noop;
- return 0;
- case NVME_LOG_CHANGED_NS:
- req->execute = nvmet_execute_get_log_changed_ns;
- return 0;
- case NVME_LOG_CMD_EFFECTS:
- req->execute = nvmet_execute_get_log_cmd_effects_ns;
- return 0;
- case NVME_LOG_ANA:
- req->execute = nvmet_execute_get_log_page_ana;
- return 0;
- }
- break;
+ req->execute = nvmet_execute_get_log_page;
+ return 0;
case nvme_admin_identify:
- req->data_len = NVME_IDENTIFY_DATA_SIZE;
- switch (cmd->identify.cns) {
- case NVME_ID_CNS_NS:
- req->execute = nvmet_execute_identify_ns;
- return 0;
- case NVME_ID_CNS_CTRL:
- req->execute = nvmet_execute_identify_ctrl;
- return 0;
- case NVME_ID_CNS_NS_ACTIVE_LIST:
- req->execute = nvmet_execute_identify_nslist;
- return 0;
- case NVME_ID_CNS_NS_DESC_LIST:
- req->execute = nvmet_execute_identify_desclist;
- return 0;
- }
- break;
+ req->execute = nvmet_execute_identify;
+ return 0;
case nvme_admin_abort_cmd:
req->execute = nvmet_execute_abort;
- req->data_len = 0;
return 0;
case nvme_admin_set_features:
req->execute = nvmet_execute_set_features;
- req->data_len = 0;
return 0;
case nvme_admin_get_features:
req->execute = nvmet_execute_get_features;
- req->data_len = 0;
return 0;
case nvme_admin_async_event:
req->execute = nvmet_execute_async_event;
- req->data_len = 0;
return 0;
case nvme_admin_keep_alive:
req->execute = nvmet_execute_keep_alive;
- req->data_len = 0;
return 0;
}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 3a67e244e568..28438b833c1b 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -892,14 +892,10 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
}
if (unlikely(!req->sq->ctrl))
- /* will return an error for any Non-connect command: */
+ /* will return an error for any non-connect command: */
status = nvmet_parse_connect_cmd(req);
else if (likely(req->sq->qid != 0))
status = nvmet_parse_io_cmd(req);
- else if (nvme_is_fabrics(req->cmd))
- status = nvmet_parse_fabrics_cmd(req);
- else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
- status = nvmet_parse_discovery_cmd(req);
else
status = nvmet_parse_admin_cmd(req);
@@ -930,15 +926,17 @@ void nvmet_req_uninit(struct nvmet_req *req)
}
EXPORT_SYMBOL_GPL(nvmet_req_uninit);
-void nvmet_req_execute(struct nvmet_req *req)
+bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
{
- if (unlikely(req->data_len != req->transfer_len)) {
+ if (unlikely(data_len != req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
- } else
- req->execute(req);
+ return false;
+ }
+
+ return true;
}
-EXPORT_SYMBOL_GPL(nvmet_req_execute);
+EXPORT_SYMBOL_GPL(nvmet_check_data_len);
int nvmet_req_alloc_sgl(struct nvmet_req *req)
{
@@ -966,7 +964,7 @@ int nvmet_req_alloc_sgl(struct nvmet_req *req)
}
req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
- if (!req->sg)
+ if (unlikely(!req->sg))
return -ENOMEM;
return 0;
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 3764a8900850..0c2274b21e15 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -157,7 +157,7 @@ static size_t discovery_log_entries(struct nvmet_req *req)
return entries;
}
-static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
+static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
{
const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
struct nvmet_ctrl *ctrl = req->sq->ctrl;
@@ -171,6 +171,16 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
u16 status = 0;
void *buffer;
+ if (!nvmet_check_data_len(req, data_len))
+ return;
+
+ if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
+ req->error_loc =
+ offsetof(struct nvme_get_log_page_command, lid);
+ status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ goto out;
+ }
+
/* Spec requires dword aligned offsets */
if (offset & 0x3) {
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
@@ -227,20 +237,35 @@ out:
nvmet_req_complete(req, status);
}
-static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req)
+static void nvmet_execute_disc_identify(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
+ const char model[] = "Linux";
u16 status = 0;
+ if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+ return;
+
+ if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
+ req->error_loc = offsetof(struct nvme_identify, cns);
+ status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
+ goto out;
+ }
+
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
status = NVME_SC_INTERNAL;
goto out;
}
+ memset(id->sn, ' ', sizeof(id->sn));
+ bin2hex(id->sn, &ctrl->subsys->serial,
+ min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
memset(id->fr, ' ', sizeof(id->fr));
- strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr));
+ memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' ');
+ memcpy_and_pad(id->fr, sizeof(id->fr),
+ UTS_RELEASE, strlen(UTS_RELEASE), ' ');
/* no limit on data transfer sizes for now */
id->mdts = 0;
@@ -273,6 +298,9 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
switch (cdw10 & 0xff) {
case NVME_FEAT_KATO:
stat = nvmet_set_feat_kato(req);
@@ -296,6 +324,9 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat = 0;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
switch (cdw10 & 0xff) {
case NVME_FEAT_KATO:
nvmet_get_feat_kato(req);
@@ -328,47 +359,22 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
switch (cmd->common.opcode) {
case nvme_admin_set_features:
req->execute = nvmet_execute_disc_set_features;
- req->data_len = 0;
return 0;
case nvme_admin_get_features:
req->execute = nvmet_execute_disc_get_features;
- req->data_len = 0;
return 0;
case nvme_admin_async_event:
req->execute = nvmet_execute_async_event;
- req->data_len = 0;
return 0;
case nvme_admin_keep_alive:
req->execute = nvmet_execute_keep_alive;
- req->data_len = 0;
return 0;
case nvme_admin_get_log_page:
- req->data_len = nvmet_get_log_page_len(cmd);
-
- switch (cmd->get_log_page.lid) {
- case NVME_LOG_DISC:
- req->execute = nvmet_execute_get_disc_log_page;
- return 0;
- default:
- pr_err("unsupported get_log_page lid %d\n",
- cmd->get_log_page.lid);
- req->error_loc =
- offsetof(struct nvme_get_log_page_command, lid);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
- }
+ req->execute = nvmet_execute_disc_get_log_page;
+ return 0;
case nvme_admin_identify:
- req->data_len = NVME_IDENTIFY_DATA_SIZE;
- switch (cmd->identify.cns) {
- case NVME_ID_CNS_CTRL:
- req->execute =
- nvmet_execute_identify_disc_ctrl;
- return 0;
- default:
- pr_err("unsupported identify cns %d\n",
- cmd->identify.cns);
- req->error_loc = offsetof(struct nvme_identify, cns);
- return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
- }
+ req->execute = nvmet_execute_disc_identify;
+ return 0;
default:
pr_err("unhandled cmd %d\n", cmd->common.opcode);
req->error_loc = offsetof(struct nvme_common_command, opcode);
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index d16b55ffe79f..f7297473d9eb 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -12,6 +12,9 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
u64 val = le64_to_cpu(req->cmd->prop_set.value);
u16 status = 0;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
if (req->cmd->prop_set.attrib & 1) {
req->error_loc =
offsetof(struct nvmf_property_set_command, attrib);
@@ -38,6 +41,9 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
u16 status = 0;
u64 val = 0;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
if (req->cmd->prop_get.attrib & 1) {
switch (le32_to_cpu(req->cmd->prop_get.offset)) {
case NVME_REG_CAP:
@@ -82,11 +88,9 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
switch (cmd->fabrics.fctype) {
case nvme_fabrics_type_property_set:
- req->data_len = 0;
req->execute = nvmet_execute_prop_set;
break;
case nvme_fabrics_type_property_get:
- req->data_len = 0;
req->execute = nvmet_execute_prop_get;
break;
default:
@@ -147,6 +151,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
struct nvmet_ctrl *ctrl = NULL;
u16 status = 0;
+ if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+ return;
+
d = kmalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
status = NVME_SC_INTERNAL;
@@ -211,6 +218,9 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 status = 0;
+ if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+ return;
+
d = kmalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
status = NVME_SC_INTERNAL;
@@ -281,7 +291,6 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
}
- req->data_len = sizeof(struct nvmf_connect_data);
if (cmd->connect.qid == 0)
req->execute = nvmet_execute_admin_connect;
else
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index ce8d819f86cc..a0db6371b43e 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -1495,20 +1495,20 @@ static void
nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_ls_iod *iod)
{
- struct fcnvme_ls_disconnect_rqst *rqst =
- (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf;
- struct fcnvme_ls_disconnect_acc *acc =
- (struct fcnvme_ls_disconnect_acc *)iod->rspbuf;
+ struct fcnvme_ls_disconnect_assoc_rqst *rqst =
+ (struct fcnvme_ls_disconnect_assoc_rqst *)iod->rqstbuf;
+ struct fcnvme_ls_disconnect_assoc_acc *acc =
+ (struct fcnvme_ls_disconnect_assoc_acc *)iod->rspbuf;
struct nvmet_fc_tgt_assoc *assoc;
int ret = 0;
memset(acc, 0, sizeof(*acc));
- if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_rqst))
+ if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst))
ret = VERR_DISCONN_LEN;
else if (rqst->desc_list_len !=
fcnvme_lsdesc_len(
- sizeof(struct fcnvme_ls_disconnect_rqst)))
+ sizeof(struct fcnvme_ls_disconnect_assoc_rqst)))
ret = VERR_DISCONN_RQST_LEN;
else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
ret = VERR_ASSOC_ID;
@@ -1523,8 +1523,11 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
fcnvme_lsdesc_len(
sizeof(struct fcnvme_lsdesc_disconn_cmd)))
ret = VERR_DISCONN_CMD_LEN;
- else if ((rqst->discon_cmd.scope != FCNVME_DISCONN_ASSOCIATION) &&
- (rqst->discon_cmd.scope != FCNVME_DISCONN_CONNECTION))
+ /*
+ * As the standard changed on the LS, check if old format and scope
+ * something other than Association (e.g. 0).
+ */
+ else if (rqst->discon_cmd.rsvd8[0])
ret = VERR_DISCONN_SCOPE;
else {
/* match an active association */
@@ -1556,8 +1559,8 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
fcnvme_lsdesc_len(
- sizeof(struct fcnvme_ls_disconnect_acc)),
- FCNVME_LS_DISCONNECT);
+ sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
+ FCNVME_LS_DISCONNECT_ASSOC);
/* release get taken in nvmet_fc_find_target_assoc */
nvmet_fc_tgt_a_put(iod->assoc);
@@ -1632,7 +1635,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport,
/* Creates an IO Queue/Connection */
nvmet_fc_ls_create_connection(tgtport, iod);
break;
- case FCNVME_LS_DISCONNECT:
+ case FCNVME_LS_DISCONNECT_ASSOC:
/* Terminate a Queue/Connection or the Association */
nvmet_fc_ls_disconnect(tgtport, iod);
break;
@@ -2015,7 +2018,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod)
}
/* data transfer complete, resume with nvmet layer */
- nvmet_req_execute(&fod->req);
+ fod->req.execute(&fod->req);
break;
case NVMET_FCOP_READDATA:
@@ -2231,7 +2234,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
* can invoke the nvmet_layer now. If read data, cmd completion will
* push the data
*/
- nvmet_req_execute(&fod->req);
+ fod->req.execute(&fod->req);
return;
transport_error:
@@ -2299,7 +2302,7 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
/* validate iu, so the connection id can be used to find the queue */
if ((cmdiubuf_len != sizeof(*cmdiu)) ||
- (cmdiu->scsi_id != NVME_CMD_SCSI_ID) ||
+ (cmdiu->format_id != NVME_CMD_FORMAT_ID) ||
(cmdiu->fc_id != NVME_CMD_FC_ID) ||
(be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4)))
return -EIO;
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 32008d85172b..b6fca0e421ef 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -147,8 +147,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
int sg_cnt = req->sg_cnt;
struct bio *bio;
struct scatterlist *sg;
+ struct blk_plug plug;
sector_t sector;
- int op, op_flags = 0, i;
+ int op, i;
+
+ if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+ return;
if (!req->sg_cnt) {
nvmet_req_complete(req, 0);
@@ -156,21 +160,20 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
}
if (req->cmd->rw.opcode == nvme_cmd_write) {
- op = REQ_OP_WRITE;
- op_flags = REQ_SYNC | REQ_IDLE;
+ op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
- op_flags |= REQ_FUA;
+ op |= REQ_FUA;
} else {
op = REQ_OP_READ;
}
if (is_pci_p2pdma_page(sg_page(req->sg)))
- op_flags |= REQ_NOMERGE;
+ op |= REQ_NOMERGE;
sector = le64_to_cpu(req->cmd->rw.slba);
sector <<= (req->ns->blksize_shift - 9);
- if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) {
+ if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
bio = &req->b.inline_bio;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
} else {
@@ -180,8 +183,9 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio->bi_iter.bi_sector = sector;
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
- bio_set_op_attrs(bio, op, op_flags);
+ bio->bi_opf = op;
+ blk_start_plug(&plug);
for_each_sg(req->sg, sg, req->sg_cnt, i) {
while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
!= sg->length) {
@@ -190,7 +194,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
- bio_set_op_attrs(bio, op, op_flags);
+ bio->bi_opf = op;
bio_chain(bio, prev);
submit_bio(prev);
@@ -201,12 +205,16 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
}
submit_bio(bio);
+ blk_finish_plug(&plug);
}
static void nvmet_bdev_execute_flush(struct nvmet_req *req)
{
struct bio *bio = &req->b.inline_bio;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
bio_set_dev(bio, req->ns->bdev);
bio->bi_private = req;
@@ -261,12 +269,10 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
if (bio) {
bio->bi_private = req;
bio->bi_end_io = nvmet_bio_done;
- if (status) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- } else {
+ if (status)
+ bio_io_error(bio);
+ else
submit_bio(bio);
- }
} else {
nvmet_req_complete(req, status);
}
@@ -274,6 +280,9 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req)
static void nvmet_bdev_execute_dsm(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len(req, nvmet_dsm_len(req)))
+ return;
+
switch (le32_to_cpu(req->cmd->dsm.attributes)) {
case NVME_DSMGMT_AD:
nvmet_bdev_execute_discard(req);
@@ -295,6 +304,9 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
sector_t nr_sector;
int ret;
+ if (!nvmet_check_data_len(req, 0))
+ return;
+
sector = le64_to_cpu(write_zeroes->slba) <<
(req->ns->blksize_shift - 9);
nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) <<
@@ -319,20 +331,15 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_bdev_execute_rw;
- req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_bdev_execute_flush;
- req->data_len = 0;
return 0;
case nvme_cmd_dsm:
req->execute = nvmet_bdev_execute_dsm;
- req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
- sizeof(struct nvme_dsm_range);
return 0;
case nvme_cmd_write_zeroes:
req->execute = nvmet_bdev_execute_write_zeroes;
- req->data_len = 0;
return 0;
default:
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 05453f5d1448..caebfce06605 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -126,7 +126,7 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
mempool_free(req->f.bvec, req->ns->bvec_pool);
}
- if (unlikely(ret != req->data_len))
+ if (unlikely(ret != req->transfer_len))
status = errno_to_nvme_status(req, ret);
nvmet_req_complete(req, status);
}
@@ -146,7 +146,7 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
is_sync = true;
pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift;
- if (unlikely(pos + req->data_len > req->ns->size)) {
+ if (unlikely(pos + req->transfer_len > req->ns->size)) {
nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC));
return true;
}
@@ -173,7 +173,7 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
nr_bvec--;
}
- if (WARN_ON_ONCE(total_len != req->data_len)) {
+ if (WARN_ON_ONCE(total_len != req->transfer_len)) {
ret = -EIO;
goto complete;
}
@@ -232,6 +232,9 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
{
ssize_t nr_bvec = req->sg_cnt;
+ if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+ return;
+
if (!req->sg_cnt || !nr_bvec) {
nvmet_req_complete(req, 0);
return;
@@ -273,6 +276,8 @@ static void nvmet_file_flush_work(struct work_struct *w)
static void nvmet_file_execute_flush(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len(req, 0))
+ return;
INIT_WORK(&req->f.work, nvmet_file_flush_work);
schedule_work(&req->f.work);
}
@@ -331,6 +336,8 @@ static void nvmet_file_dsm_work(struct work_struct *w)
static void nvmet_file_execute_dsm(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len(req, nvmet_dsm_len(req)))
+ return;
INIT_WORK(&req->f.work, nvmet_file_dsm_work);
schedule_work(&req->f.work);
}
@@ -359,6 +366,8 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
{
+ if (!nvmet_check_data_len(req, 0))
+ return;
INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
schedule_work(&req->f.work);
}
@@ -371,20 +380,15 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req)
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_file_execute_rw;
- req->data_len = nvmet_rw_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_file_execute_flush;
- req->data_len = 0;
return 0;
case nvme_cmd_dsm:
req->execute = nvmet_file_execute_dsm;
- req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) *
- sizeof(struct nvme_dsm_range);
return 0;
case nvme_cmd_write_zeroes:
req->execute = nvmet_file_execute_write_zeroes;
- req->data_len = 0;
return 0;
default:
pr_err("unhandled cmd for file ns %d on qid %d\n",
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 748a39fca771..a758bb3d5dd4 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -76,7 +76,6 @@ static void nvme_loop_complete_rq(struct request *req)
{
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
- nvme_cleanup_cmd(req);
sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE);
nvme_complete_rq(req);
}
@@ -102,8 +101,8 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
* aborts. We don't even bother to allocate a struct request
* for them but rather special case them here.
*/
- if (unlikely(nvme_loop_queue_idx(queue) == 0 &&
- cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) {
+ if (unlikely(nvme_is_aen_req(nvme_loop_queue_idx(queue),
+ cqe->command_id))) {
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result);
} else {
@@ -126,7 +125,7 @@ static void nvme_loop_execute_work(struct work_struct *work)
struct nvme_loop_iod *iod =
container_of(work, struct nvme_loop_iod, work);
- nvmet_req_execute(&iod->req);
+ iod->req.execute(&iod->req);
}
static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -157,8 +156,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl, SG_CHUNK_SIZE))
+ iod->sg_table.sgl, SG_CHUNK_SIZE)) {
+ nvme_cleanup_cmd(req);
return BLK_STS_RESOURCE;
+ }
iod->req.sg = iod->sg_table.sgl;
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index c51f8dd01dc4..46df45e837c9 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -304,8 +304,6 @@ struct nvmet_req {
} f;
};
int sg_cnt;
- /* data length as parsed from the command: */
- size_t data_len;
/* data length as parsed from the SGL descriptor: */
size_t transfer_len;
@@ -375,7 +373,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
-void nvmet_req_execute(struct nvmet_req *req);
+bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
int nvmet_req_alloc_sgl(struct nvmet_req *req);
void nvmet_req_free_sgl(struct nvmet_req *req);
@@ -495,6 +493,12 @@ static inline u32 nvmet_rw_len(struct nvmet_req *req)
req->ns->blksize_shift;
}
+static inline u32 nvmet_dsm_len(struct nvmet_req *req)
+{
+ return (le32_to_cpu(req->cmd->dsm.nr) + 1) *
+ sizeof(struct nvme_dsm_range);
+}
+
u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
/* Convert a 32-bit number to a 16-bit 0's based number */
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 36d906a7f70d..37d262a65877 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -603,7 +603,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
- nvmet_req_execute(&rsp->req);
+ rsp->req.execute(&rsp->req);
}
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
@@ -672,13 +672,13 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
return 0;
ret = nvmet_req_alloc_sgl(&rsp->req);
- if (ret < 0)
+ if (unlikely(ret < 0))
goto error_out;
ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
nvmet_data_dir(&rsp->req));
- if (ret < 0)
+ if (unlikely(ret < 0))
goto error_out;
rsp->n_rdma += ret;
@@ -746,7 +746,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
queue->cm_id->port_num, &rsp->read_cqe, NULL))
nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
} else {
- nvmet_req_execute(&rsp->req);
+ rsp->req.execute(&rsp->req);
}
return true;
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index d535080b781f..af674fc0bb1e 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -320,7 +320,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
u32 len = le32_to_cpu(sgl->length);
- if (!cmd->req.data_len)
+ if (!len)
return 0;
if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) |
@@ -813,13 +813,11 @@ free_crypto:
static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
{
+ size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
int ret;
- /* recover the expected data transfer length */
- req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
-
if (!nvme_is_write(cmd->req.cmd) ||
- req->data_len > cmd->req.port->inline_data_size) {
+ data_len > cmd->req.port->inline_data_size) {
nvmet_prepare_receive_pdu(queue);
return;
}
@@ -932,7 +930,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
goto out;
}
- nvmet_req_execute(&queue->cmd->req);
+ queue->cmd->req.execute(&queue->cmd->req);
out:
nvmet_prepare_receive_pdu(queue);
return ret;
@@ -1052,7 +1050,7 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
nvmet_tcp_prep_recv_ddgst(cmd);
return 0;
}
- nvmet_req_execute(&cmd->req);
+ cmd->req.execute(&cmd->req);
}
nvmet_prepare_receive_pdu(queue);
@@ -1092,7 +1090,7 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
cmd->rbytes_done == cmd->req.transfer_len)
- nvmet_req_execute(&cmd->req);
+ cmd->req.execute(&cmd->req);
ret = 0;
out:
nvmet_prepare_receive_pdu(queue);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 7989703b883c..6bd610ee2cd7 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -324,8 +324,10 @@ int of_reserved_mem_device_init_by_idx(struct device *dev,
if (!target)
return -ENODEV;
- if (!of_device_is_available(target))
+ if (!of_device_is_available(target)) {
+ of_node_put(target);
return 0;
+ }
rmem = __find_rmem(target);
of_node_put(target);
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 480a21e2ed39..92e895d86458 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -1207,6 +1207,7 @@ static int __init unittest_data_add(void)
of_fdt_unflatten_tree(unittest_data, NULL, &unittest_data_node);
if (!unittest_data_node) {
pr_warn("%s: No tree to attach; not running tests\n", __func__);
+ kfree(unittest_data);
return -ENODATA;
}
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 3b7ffd0234e9..9ff0538ee83a 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1626,12 +1626,6 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev,
goto free_regulators;
}
- ret = regulator_enable(reg);
- if (ret < 0) {
- regulator_put(reg);
- goto free_regulators;
- }
-
opp_table->regulators[i] = reg;
}
@@ -1645,10 +1639,8 @@ struct opp_table *dev_pm_opp_set_regulators(struct device *dev,
return opp_table;
free_regulators:
- while (i--) {
- regulator_disable(opp_table->regulators[i]);
- regulator_put(opp_table->regulators[i]);
- }
+ while (i != 0)
+ regulator_put(opp_table->regulators[--i]);
kfree(opp_table->regulators);
opp_table->regulators = NULL;
@@ -1674,10 +1666,8 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table)
/* Make sure there are no concurrent readers while updating opp_table */
WARN_ON(!list_empty(&opp_table->opp_list));
- for (i = opp_table->regulator_count - 1; i >= 0; i--) {
- regulator_disable(opp_table->regulators[i]);
+ for (i = opp_table->regulator_count - 1; i >= 0; i--)
regulator_put(opp_table->regulators[i]);
- }
_free_set_opp_data(opp_table);
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 1813f5ad5fa2..1cbb58240b80 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -77,8 +77,6 @@ static struct dev_pm_opp *_find_opp_of_np(struct opp_table *opp_table,
{
struct dev_pm_opp *opp;
- lockdep_assert_held(&opp_table_lock);
-
mutex_lock(&opp_table->lock);
list_for_each_entry(opp, &opp_table->opp_list, node) {
@@ -665,6 +663,13 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table)
return 0;
}
+ /*
+ * Re-initialize list_kref every time we add static OPPs to the OPP
+ * table as the reference count may be 0 after the last tie static OPPs
+ * were removed.
+ */
+ kref_init(&opp_table->list_kref);
+
/* We have opp-table node now, iterate over it and add OPPs */
for_each_available_child_of_node(opp_table->np, np) {
opp = _opp_add_static_v2(opp_table, dev, np);
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index ed50502cc65a..de8e4e347249 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -678,14 +678,6 @@ static int sba_dma_supported( struct device *dev, u64 mask)
return(0);
}
- /* Documentation/DMA-API-HOWTO.txt tells drivers to try 64-bit
- * first, then fall back to 32-bit if that fails.
- * We are just "encouraging" 32-bit DMA masks here since we can
- * never allow IOMMU bypass unless we add special support for ZX1.
- */
- if (mask > ~0U)
- return 0;
-
ioc = GET_IOC(dev);
if (!ioc)
return 0;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e7982af9a5d8..a97e2571a527 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -959,19 +959,6 @@ void pci_refresh_power_state(struct pci_dev *dev)
}
/**
- * pci_power_up - Put the given device into D0 forcibly
- * @dev: PCI device to power up
- */
-void pci_power_up(struct pci_dev *dev)
-{
- if (platform_pci_power_manageable(dev))
- platform_pci_set_power_state(dev, PCI_D0);
-
- pci_raw_set_power_state(dev, PCI_D0);
- pci_update_current_state(dev, PCI_D0);
-}
-
-/**
* pci_platform_power_transition - Use platform to change device power state
* @dev: PCI device to handle.
* @state: State to put the device into.
@@ -1154,6 +1141,17 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
EXPORT_SYMBOL(pci_set_power_state);
/**
+ * pci_power_up - Put the given device into D0 forcibly
+ * @dev: PCI device to power up
+ */
+void pci_power_up(struct pci_dev *dev)
+{
+ __pci_start_power_transition(dev, PCI_D0);
+ pci_raw_set_power_state(dev, PCI_D0);
+ pci_update_current_state(dev, PCI_D0);
+}
+
+/**
* pci_choose_state - Choose the power state of a PCI device
* @dev: PCI device to be suspended
* @state: target sleep state for the whole system. This is the value
diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 8f8606b9bc9e..1b8e337a29ca 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -1642,7 +1642,6 @@ static struct cci_pmu *cci_pmu_alloc(struct device *dev)
static int cci_pmu_probe(struct platform_device *pdev)
{
- struct resource *res;
struct cci_pmu *cci_pmu;
int i, ret, irq;
@@ -1650,8 +1649,7 @@ static int cci_pmu_probe(struct platform_device *pdev)
if (IS_ERR(cci_pmu))
return PTR_ERR(cci_pmu);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- cci_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ cci_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(cci_pmu->base))
return -ENOMEM;
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 6fc0273b6129..fea354d6fb29 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -1477,8 +1477,7 @@ static int arm_ccn_probe(struct platform_device *pdev)
ccn->dev = &pdev->dev;
platform_set_drvdata(pdev, ccn);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- ccn->base = devm_ioremap_resource(ccn->dev, res);
+ ccn->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(ccn->base))
return PTR_ERR(ccn->base);
@@ -1537,6 +1536,7 @@ static int arm_ccn_remove(struct platform_device *pdev)
static const struct of_device_id arm_ccn_match[] = {
{ .compatible = "arm,ccn-502", },
{ .compatible = "arm,ccn-504", },
+ { .compatible = "arm,ccn-512", },
{},
};
MODULE_DEVICE_TABLE(of, arm_ccn_match);
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index abcf54f7d19c..773128f411f1 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -727,7 +727,7 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu)
static int smmu_pmu_probe(struct platform_device *pdev)
{
struct smmu_pmu *smmu_pmu;
- struct resource *res_0, *res_1;
+ struct resource *res_0;
u32 cfgr, reg_size;
u64 ceid_64[2];
int irq, err;
@@ -764,8 +764,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
/* Determine if page 1 is present */
if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) {
- res_1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- smmu_pmu->reloc_base = devm_ioremap_resource(dev, res_1);
+ smmu_pmu->reloc_base = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(smmu_pmu->reloc_base))
return PTR_ERR(smmu_pmu->reloc_base);
} else {
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index ce7345745b42..55083c67b2bb 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -45,7 +45,8 @@
static DEFINE_IDA(ddr_ida);
/* DDR Perf hardware feature */
-#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */
+#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */
+#define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */
struct fsl_ddr_devtype_data {
unsigned int quirks; /* quirks needed for different DDR Perf core */
@@ -57,9 +58,14 @@ static const struct fsl_ddr_devtype_data imx8m_devtype_data = {
.quirks = DDR_CAP_AXI_ID_FILTER,
};
+static const struct fsl_ddr_devtype_data imx8mp_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_FILTER_ENHANCED,
+};
+
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
+ { .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data},
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids);
@@ -78,6 +84,61 @@ struct ddr_pmu {
int id;
};
+enum ddr_perf_filter_capabilities {
+ PERF_CAP_AXI_ID_FILTER = 0,
+ PERF_CAP_AXI_ID_FILTER_ENHANCED,
+ PERF_CAP_AXI_ID_FEAT_MAX,
+};
+
+static u32 ddr_perf_filter_cap_get(struct ddr_pmu *pmu, int cap)
+{
+ u32 quirks = pmu->devtype_data->quirks;
+
+ switch (cap) {
+ case PERF_CAP_AXI_ID_FILTER:
+ return !!(quirks & DDR_CAP_AXI_ID_FILTER);
+ case PERF_CAP_AXI_ID_FILTER_ENHANCED:
+ quirks &= DDR_CAP_AXI_ID_FILTER_ENHANCED;
+ return quirks == DDR_CAP_AXI_ID_FILTER_ENHANCED;
+ default:
+ WARN(1, "unknown filter cap %d\n", cap);
+ }
+
+ return 0;
+}
+
+static ssize_t ddr_perf_filter_cap_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+ struct dev_ext_attribute *ea =
+ container_of(attr, struct dev_ext_attribute, attr);
+ int cap = (long)ea->var;
+
+ return snprintf(buf, PAGE_SIZE, "%u\n",
+ ddr_perf_filter_cap_get(pmu, cap));
+}
+
+#define PERF_EXT_ATTR_ENTRY(_name, _func, _var) \
+ (&((struct dev_ext_attribute) { \
+ __ATTR(_name, 0444, _func, NULL), (void *)_var \
+ }).attr.attr)
+
+#define PERF_FILTER_EXT_ATTR_ENTRY(_name, _var) \
+ PERF_EXT_ATTR_ENTRY(_name, ddr_perf_filter_cap_show, _var)
+
+static struct attribute *ddr_perf_filter_cap_attr[] = {
+ PERF_FILTER_EXT_ATTR_ENTRY(filter, PERF_CAP_AXI_ID_FILTER),
+ PERF_FILTER_EXT_ATTR_ENTRY(enhanced_filter, PERF_CAP_AXI_ID_FILTER_ENHANCED),
+ NULL,
+};
+
+static struct attribute_group ddr_perf_filter_cap_attr_group = {
+ .name = "caps",
+ .attrs = ddr_perf_filter_cap_attr,
+};
+
static ssize_t ddr_perf_cpumask_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -175,9 +236,40 @@ static const struct attribute_group *attr_groups[] = {
&ddr_perf_events_attr_group,
&ddr_perf_format_attr_group,
&ddr_perf_cpumask_attr_group,
+ &ddr_perf_filter_cap_attr_group,
NULL,
};
+static bool ddr_perf_is_filtered(struct perf_event *event)
+{
+ return event->attr.config == 0x41 || event->attr.config == 0x42;
+}
+
+static u32 ddr_perf_filter_val(struct perf_event *event)
+{
+ return event->attr.config1;
+}
+
+static bool ddr_perf_filters_compatible(struct perf_event *a,
+ struct perf_event *b)
+{
+ if (!ddr_perf_is_filtered(a))
+ return true;
+ if (!ddr_perf_is_filtered(b))
+ return true;
+ return ddr_perf_filter_val(a) == ddr_perf_filter_val(b);
+}
+
+static bool ddr_perf_is_enhanced_filtered(struct perf_event *event)
+{
+ unsigned int filt;
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+
+ filt = pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED;
+ return (filt == DDR_CAP_AXI_ID_FILTER_ENHANCED) &&
+ ddr_perf_is_filtered(event);
+}
+
static u32 ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event)
{
int i;
@@ -209,27 +301,17 @@ static void ddr_perf_free_counter(struct ddr_pmu *pmu, int counter)
static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter)
{
- return readl_relaxed(pmu->base + COUNTER_READ + counter * 4);
-}
-
-static bool ddr_perf_is_filtered(struct perf_event *event)
-{
- return event->attr.config == 0x41 || event->attr.config == 0x42;
-}
+ struct perf_event *event = pmu->events[counter];
+ void __iomem *base = pmu->base;
-static u32 ddr_perf_filter_val(struct perf_event *event)
-{
- return event->attr.config1;
-}
-
-static bool ddr_perf_filters_compatible(struct perf_event *a,
- struct perf_event *b)
-{
- if (!ddr_perf_is_filtered(a))
- return true;
- if (!ddr_perf_is_filtered(b))
- return true;
- return ddr_perf_filter_val(a) == ddr_perf_filter_val(b);
+ /*
+ * return bytes instead of bursts from ddr transaction for
+ * axid-read and axid-write event if PMU core supports enhanced
+ * filter.
+ */
+ base += ddr_perf_is_enhanced_filtered(event) ? COUNTER_DPCR1 :
+ COUNTER_READ;
+ return readl_relaxed(base + counter * 4);
}
static int ddr_perf_event_init(struct perf_event *event)
diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
index e42d4464c2cf..453f1c6a16ca 100644
--- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c
@@ -243,8 +243,6 @@ MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match);
static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *ddrc_pmu)
{
- struct resource *res;
-
/*
* Use the SCCL_ID and DDRC channel ID to identify the
* DDRC PMU, while SCCL_ID is in MPIDR[aff2].
@@ -263,8 +261,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev,
/* DDRC PMUs only share the same SCCL */
ddrc_pmu->ccl_id = -1;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- ddrc_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(ddrc_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for ddrc_pmu resource\n");
return PTR_ERR(ddrc_pmu->base);
diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
index f28063873e11..6a1dd72d8abb 100644
--- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c
@@ -234,7 +234,6 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *hha_pmu)
{
unsigned long long id;
- struct resource *res;
acpi_status status;
status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
@@ -256,8 +255,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev,
/* HHA PMUs only share the same SCCL */
hha_pmu->ccl_id = -1;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- hha_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ hha_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(hha_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for hha_pmu resource\n");
return PTR_ERR(hha_pmu->base);
diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
index 078b8dc57250..1151e99b241c 100644
--- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c
@@ -233,7 +233,6 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
struct hisi_pmu *l3c_pmu)
{
unsigned long long id;
- struct resource *res;
acpi_status status;
status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev),
@@ -259,8 +258,7 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev,
return -EINVAL;
}
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- l3c_pmu->base = devm_ioremap_resource(&pdev->dev, res);
+ l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(l3c_pmu->base)) {
dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n");
return PTR_ERR(l3c_pmu->base);
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c
index 79f76f8dda8e..96183e31b96a 100644
--- a/drivers/perf/hisilicon/hisi_uncore_pmu.c
+++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c
@@ -15,6 +15,7 @@
#include <linux/errno.h>
#include <linux/interrupt.h>
+#include <asm/cputype.h>
#include <asm/local64.h>
#include "hisi_uncore_pmu.h"
@@ -338,8 +339,10 @@ void hisi_uncore_pmu_disable(struct pmu *pmu)
/*
* Read Super CPU cluster and CPU cluster ID from MPIDR_EL1.
- * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2]
- * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID
+ * If multi-threading is supported, On Huawei Kunpeng 920 SoC whose cpu
+ * core is tsv110, CCL_ID is the low 3-bits in MPIDR[Aff2] and SCCL_ID
+ * is the upper 5-bits of Aff2 field; while for other cpu types, SCCL_ID
+ * is in MPIDR[Aff3] and CCL_ID is in MPIDR[Aff2], if not, SCCL_ID
* is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1].
*/
static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id)
@@ -347,12 +350,19 @@ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id)
u64 mpidr = read_cpuid_mpidr();
if (mpidr & MPIDR_MT_BITMASK) {
- int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2);
-
- if (sccl_id)
- *sccl_id = aff2 >> 3;
- if (ccl_id)
- *ccl_id = aff2 & 0x7;
+ if (read_cpuid_part_number() == HISI_CPU_PART_TSV110) {
+ int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+
+ if (sccl_id)
+ *sccl_id = aff2 >> 3;
+ if (ccl_id)
+ *ccl_id = aff2 & 0x7;
+ } else {
+ if (sccl_id)
+ *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3);
+ if (ccl_id)
+ *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
+ }
} else {
if (sccl_id)
*sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2);
diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c
index 43d76c85da56..51b31d6ff2c4 100644
--- a/drivers/perf/thunderx2_pmu.c
+++ b/drivers/perf/thunderx2_pmu.c
@@ -16,23 +16,36 @@
* they need to be sampled before overflow(i.e, at every 2 seconds).
*/
-#define TX2_PMU_MAX_COUNTERS 4
+#define TX2_PMU_DMC_L3C_MAX_COUNTERS 4
+#define TX2_PMU_CCPI2_MAX_COUNTERS 8
+#define TX2_PMU_MAX_COUNTERS TX2_PMU_CCPI2_MAX_COUNTERS
+
+
#define TX2_PMU_DMC_CHANNELS 8
#define TX2_PMU_L3_TILES 16
#define TX2_PMU_HRTIMER_INTERVAL (2 * NSEC_PER_SEC)
-#define GET_EVENTID(ev) ((ev->hw.config) & 0x1f)
-#define GET_COUNTERID(ev) ((ev->hw.idx) & 0x3)
+#define GET_EVENTID(ev, mask) ((ev->hw.config) & mask)
+#define GET_COUNTERID(ev, mask) ((ev->hw.idx) & mask)
/* 1 byte per counter(4 counters).
* Event id is encoded in bits [5:1] of a byte,
*/
#define DMC_EVENT_CFG(idx, val) ((val) << (((idx) * 8) + 1))
+/* bits[3:0] to select counters, are indexed from 8 to 15. */
+#define CCPI2_COUNTER_OFFSET 8
+
#define L3C_COUNTER_CTL 0xA8
#define L3C_COUNTER_DATA 0xAC
#define DMC_COUNTER_CTL 0x234
#define DMC_COUNTER_DATA 0x240
+#define CCPI2_PERF_CTL 0x108
+#define CCPI2_COUNTER_CTL 0x10C
+#define CCPI2_COUNTER_SEL 0x12c
+#define CCPI2_COUNTER_DATA_L 0x130
+#define CCPI2_COUNTER_DATA_H 0x134
+
/* L3C event IDs */
#define L3_EVENT_READ_REQ 0xD
#define L3_EVENT_WRITEBACK_REQ 0xE
@@ -51,15 +64,28 @@
#define DMC_EVENT_READ_TXNS 0xF
#define DMC_EVENT_MAX 0x10
+#define CCPI2_EVENT_REQ_PKT_SENT 0x3D
+#define CCPI2_EVENT_SNOOP_PKT_SENT 0x65
+#define CCPI2_EVENT_DATA_PKT_SENT 0x105
+#define CCPI2_EVENT_GIC_PKT_SENT 0x12D
+#define CCPI2_EVENT_MAX 0x200
+
+#define CCPI2_PERF_CTL_ENABLE BIT(0)
+#define CCPI2_PERF_CTL_START BIT(1)
+#define CCPI2_PERF_CTL_RESET BIT(4)
+#define CCPI2_EVENT_LEVEL_RISING_EDGE BIT(10)
+#define CCPI2_EVENT_TYPE_EDGE_SENSITIVE BIT(11)
+
enum tx2_uncore_type {
PMU_TYPE_L3C,
PMU_TYPE_DMC,
+ PMU_TYPE_CCPI2,
PMU_TYPE_INVALID,
};
/*
- * pmu on each socket has 2 uncore devices(dmc and l3c),
- * each device has 4 counters.
+ * Each socket has 3 uncore devices associated with a PMU. The DMC and
+ * L3C have 4 32-bit counters and the CCPI2 has 8 64-bit counters.
*/
struct tx2_uncore_pmu {
struct hlist_node hpnode;
@@ -69,8 +95,10 @@ struct tx2_uncore_pmu {
int node;
int cpu;
u32 max_counters;
+ u32 counters_mask;
u32 prorate_factor;
u32 max_events;
+ u32 events_mask;
u64 hrtimer_interval;
void __iomem *base;
DECLARE_BITMAP(active_counters, TX2_PMU_MAX_COUNTERS);
@@ -79,6 +107,7 @@ struct tx2_uncore_pmu {
struct hrtimer hrtimer;
const struct attribute_group **attr_groups;
enum tx2_uncore_type type;
+ enum hrtimer_restart (*hrtimer_callback)(struct hrtimer *cb);
void (*init_cntr_base)(struct perf_event *event,
struct tx2_uncore_pmu *tx2_pmu);
void (*stop_event)(struct perf_event *event);
@@ -92,7 +121,21 @@ static inline struct tx2_uncore_pmu *pmu_to_tx2_pmu(struct pmu *pmu)
return container_of(pmu, struct tx2_uncore_pmu, pmu);
}
-PMU_FORMAT_ATTR(event, "config:0-4");
+#define TX2_PMU_FORMAT_ATTR(_var, _name, _format) \
+static ssize_t \
+__tx2_pmu_##_var##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+ \
+static struct device_attribute format_attr_##_var = \
+ __ATTR(_name, 0444, __tx2_pmu_##_var##_show, NULL)
+
+TX2_PMU_FORMAT_ATTR(event, event, "config:0-4");
+TX2_PMU_FORMAT_ATTR(event_ccpi2, event, "config:0-9");
static struct attribute *l3c_pmu_format_attrs[] = {
&format_attr_event.attr,
@@ -104,6 +147,11 @@ static struct attribute *dmc_pmu_format_attrs[] = {
NULL,
};
+static struct attribute *ccpi2_pmu_format_attrs[] = {
+ &format_attr_event_ccpi2.attr,
+ NULL,
+};
+
static const struct attribute_group l3c_pmu_format_attr_group = {
.name = "format",
.attrs = l3c_pmu_format_attrs,
@@ -114,6 +162,11 @@ static const struct attribute_group dmc_pmu_format_attr_group = {
.attrs = dmc_pmu_format_attrs,
};
+static const struct attribute_group ccpi2_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = ccpi2_pmu_format_attrs,
+};
+
/*
* sysfs event attributes
*/
@@ -164,6 +217,19 @@ static struct attribute *dmc_pmu_events_attrs[] = {
NULL,
};
+TX2_EVENT_ATTR(req_pktsent, CCPI2_EVENT_REQ_PKT_SENT);
+TX2_EVENT_ATTR(snoop_pktsent, CCPI2_EVENT_SNOOP_PKT_SENT);
+TX2_EVENT_ATTR(data_pktsent, CCPI2_EVENT_DATA_PKT_SENT);
+TX2_EVENT_ATTR(gic_pktsent, CCPI2_EVENT_GIC_PKT_SENT);
+
+static struct attribute *ccpi2_pmu_events_attrs[] = {
+ &tx2_pmu_event_attr_req_pktsent.attr.attr,
+ &tx2_pmu_event_attr_snoop_pktsent.attr.attr,
+ &tx2_pmu_event_attr_data_pktsent.attr.attr,
+ &tx2_pmu_event_attr_gic_pktsent.attr.attr,
+ NULL,
+};
+
static const struct attribute_group l3c_pmu_events_attr_group = {
.name = "events",
.attrs = l3c_pmu_events_attrs,
@@ -174,6 +240,11 @@ static const struct attribute_group dmc_pmu_events_attr_group = {
.attrs = dmc_pmu_events_attrs,
};
+static const struct attribute_group ccpi2_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = ccpi2_pmu_events_attrs,
+};
+
/*
* sysfs cpumask attributes
*/
@@ -213,6 +284,13 @@ static const struct attribute_group *dmc_pmu_attr_groups[] = {
NULL
};
+static const struct attribute_group *ccpi2_pmu_attr_groups[] = {
+ &ccpi2_pmu_format_attr_group,
+ &pmu_cpumask_attr_group,
+ &ccpi2_pmu_events_attr_group,
+ NULL
+};
+
static inline u32 reg_readl(unsigned long addr)
{
return readl((void __iomem *)addr);
@@ -245,33 +323,58 @@ static void init_cntr_base_l3c(struct perf_event *event,
struct tx2_uncore_pmu *tx2_pmu)
{
struct hw_perf_event *hwc = &event->hw;
+ u32 cmask;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ cmask = tx2_pmu->counters_mask;
/* counter ctrl/data reg offset at 8 */
hwc->config_base = (unsigned long)tx2_pmu->base
- + L3C_COUNTER_CTL + (8 * GET_COUNTERID(event));
+ + L3C_COUNTER_CTL + (8 * GET_COUNTERID(event, cmask));
hwc->event_base = (unsigned long)tx2_pmu->base
- + L3C_COUNTER_DATA + (8 * GET_COUNTERID(event));
+ + L3C_COUNTER_DATA + (8 * GET_COUNTERID(event, cmask));
}
static void init_cntr_base_dmc(struct perf_event *event,
struct tx2_uncore_pmu *tx2_pmu)
{
struct hw_perf_event *hwc = &event->hw;
+ u32 cmask;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ cmask = tx2_pmu->counters_mask;
hwc->config_base = (unsigned long)tx2_pmu->base
+ DMC_COUNTER_CTL;
/* counter data reg offset at 0xc */
hwc->event_base = (unsigned long)tx2_pmu->base
- + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event));
+ + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event, cmask));
+}
+
+static void init_cntr_base_ccpi2(struct perf_event *event,
+ struct tx2_uncore_pmu *tx2_pmu)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u32 cmask;
+
+ cmask = tx2_pmu->counters_mask;
+
+ hwc->config_base = (unsigned long)tx2_pmu->base
+ + CCPI2_COUNTER_CTL + (4 * GET_COUNTERID(event, cmask));
+ hwc->event_base = (unsigned long)tx2_pmu->base;
}
static void uncore_start_event_l3c(struct perf_event *event, int flags)
{
- u32 val;
+ u32 val, emask;
struct hw_perf_event *hwc = &event->hw;
+ struct tx2_uncore_pmu *tx2_pmu;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ emask = tx2_pmu->events_mask;
/* event id encoded in bits [07:03] */
- val = GET_EVENTID(event) << 3;
+ val = GET_EVENTID(event, emask) << 3;
reg_writel(val, hwc->config_base);
local64_set(&hwc->prev_count, 0);
reg_writel(0, hwc->event_base);
@@ -284,10 +387,17 @@ static inline void uncore_stop_event_l3c(struct perf_event *event)
static void uncore_start_event_dmc(struct perf_event *event, int flags)
{
- u32 val;
+ u32 val, cmask, emask;
struct hw_perf_event *hwc = &event->hw;
- int idx = GET_COUNTERID(event);
- int event_id = GET_EVENTID(event);
+ struct tx2_uncore_pmu *tx2_pmu;
+ int idx, event_id;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ cmask = tx2_pmu->counters_mask;
+ emask = tx2_pmu->events_mask;
+
+ idx = GET_COUNTERID(event, cmask);
+ event_id = GET_EVENTID(event, emask);
/* enable and start counters.
* 8 bits for each counter, bits[05:01] of a counter to set event type.
@@ -302,9 +412,14 @@ static void uncore_start_event_dmc(struct perf_event *event, int flags)
static void uncore_stop_event_dmc(struct perf_event *event)
{
- u32 val;
+ u32 val, cmask;
struct hw_perf_event *hwc = &event->hw;
- int idx = GET_COUNTERID(event);
+ struct tx2_uncore_pmu *tx2_pmu;
+ int idx;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ cmask = tx2_pmu->counters_mask;
+ idx = GET_COUNTERID(event, cmask);
/* clear event type(bits[05:01]) to stop counter */
val = reg_readl(hwc->config_base);
@@ -312,27 +427,72 @@ static void uncore_stop_event_dmc(struct perf_event *event)
reg_writel(val, hwc->config_base);
}
+static void uncore_start_event_ccpi2(struct perf_event *event, int flags)
+{
+ u32 emask;
+ struct hw_perf_event *hwc = &event->hw;
+ struct tx2_uncore_pmu *tx2_pmu;
+
+ tx2_pmu = pmu_to_tx2_pmu(event->pmu);
+ emask = tx2_pmu->events_mask;
+
+ /* Bit [09:00] to set event id.
+ * Bits [10], set level to rising edge.
+ * Bits [11], set type to edge sensitive.
+ */
+ reg_writel((CCPI2_EVENT_TYPE_EDGE_SENSITIVE |
+ CCPI2_EVENT_LEVEL_RISING_EDGE |
+ GET_EVENTID(event, emask)), hwc->config_base);
+
+ /* reset[4], enable[0] and start[1] counters */
+ reg_writel(CCPI2_PERF_CTL_RESET |
+ CCPI2_PERF_CTL_START |
+ CCPI2_PERF_CTL_ENABLE,
+ hwc->event_base + CCPI2_PERF_CTL);
+ local64_set(&event->hw.prev_count, 0ULL);
+}
+
+static void uncore_stop_event_ccpi2(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* disable and stop counter */
+ reg_writel(0, hwc->event_base + CCPI2_PERF_CTL);
+}
+
static void tx2_uncore_event_update(struct perf_event *event)
{
- s64 prev, delta, new = 0;
+ u64 prev, delta, new = 0;
struct hw_perf_event *hwc = &event->hw;
struct tx2_uncore_pmu *tx2_pmu;
enum tx2_uncore_type type;
u32 prorate_factor;
+ u32 cmask, emask;
tx2_pmu = pmu_to_tx2_pmu(event->pmu);
type = tx2_pmu->type;
+ cmask = tx2_pmu->counters_mask;
+ emask = tx2_pmu->events_mask;
prorate_factor = tx2_pmu->prorate_factor;
-
- new = reg_readl(hwc->event_base);
- prev = local64_xchg(&hwc->prev_count, new);
-
- /* handles rollover of 32 bit counter */
- delta = (u32)(((1UL << 32) - prev) + new);
+ if (type == PMU_TYPE_CCPI2) {
+ reg_writel(CCPI2_COUNTER_OFFSET +
+ GET_COUNTERID(event, cmask),
+ hwc->event_base + CCPI2_COUNTER_SEL);
+ new = reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_H);
+ new = (new << 32) +
+ reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_L);
+ prev = local64_xchg(&hwc->prev_count, new);
+ delta = new - prev;
+ } else {
+ new = reg_readl(hwc->event_base);
+ prev = local64_xchg(&hwc->prev_count, new);
+ /* handles rollover of 32 bit counter */
+ delta = (u32)(((1UL << 32) - prev) + new);
+ }
/* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */
if (type == PMU_TYPE_DMC &&
- GET_EVENTID(event) == DMC_EVENT_DATA_TRANSFERS)
+ GET_EVENTID(event, emask) == DMC_EVENT_DATA_TRANSFERS)
delta = delta/4;
/* L3C and DMC has 16 and 8 interleave channels respectively.
@@ -351,6 +511,7 @@ static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev)
} devices[] = {
{"CAV901D", PMU_TYPE_L3C},
{"CAV901F", PMU_TYPE_DMC},
+ {"CAV901E", PMU_TYPE_CCPI2},
{"", PMU_TYPE_INVALID}
};
@@ -380,7 +541,8 @@ static bool tx2_uncore_validate_event(struct pmu *pmu,
* Make sure the group of events can be scheduled at once
* on the PMU.
*/
-static bool tx2_uncore_validate_event_group(struct perf_event *event)
+static bool tx2_uncore_validate_event_group(struct perf_event *event,
+ int max_counters)
{
struct perf_event *sibling, *leader = event->group_leader;
int counters = 0;
@@ -403,7 +565,7 @@ static bool tx2_uncore_validate_event_group(struct perf_event *event)
* If the group requires more counters than the HW has,
* it cannot ever be scheduled.
*/
- return counters <= TX2_PMU_MAX_COUNTERS;
+ return counters <= max_counters;
}
@@ -439,7 +601,7 @@ static int tx2_uncore_event_init(struct perf_event *event)
hwc->config = event->attr.config;
/* Validate the group */
- if (!tx2_uncore_validate_event_group(event))
+ if (!tx2_uncore_validate_event_group(event, tx2_pmu->max_counters))
return -EINVAL;
return 0;
@@ -456,6 +618,10 @@ static void tx2_uncore_event_start(struct perf_event *event, int flags)
tx2_pmu->start_event(event, flags);
perf_event_update_userpage(event);
+ /* No hrtimer needed for CCPI2, 64-bit counters */
+ if (!tx2_pmu->hrtimer_callback)
+ return;
+
/* Start timer for first event */
if (bitmap_weight(tx2_pmu->active_counters,
tx2_pmu->max_counters) == 1) {
@@ -510,15 +676,23 @@ static void tx2_uncore_event_del(struct perf_event *event, int flags)
{
struct tx2_uncore_pmu *tx2_pmu = pmu_to_tx2_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
+ u32 cmask;
+ cmask = tx2_pmu->counters_mask;
tx2_uncore_event_stop(event, PERF_EF_UPDATE);
/* clear the assigned counter */
- free_counter(tx2_pmu, GET_COUNTERID(event));
+ free_counter(tx2_pmu, GET_COUNTERID(event, cmask));
perf_event_update_userpage(event);
tx2_pmu->events[hwc->idx] = NULL;
hwc->idx = -1;
+
+ if (!tx2_pmu->hrtimer_callback)
+ return;
+
+ if (bitmap_empty(tx2_pmu->active_counters, tx2_pmu->max_counters))
+ hrtimer_cancel(&tx2_pmu->hrtimer);
}
static void tx2_uncore_event_read(struct perf_event *event)
@@ -580,8 +754,12 @@ static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu)
cpu_online_mask);
tx2_pmu->cpu = cpu;
- hrtimer_init(&tx2_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- tx2_pmu->hrtimer.function = tx2_hrtimer_callback;
+
+ if (tx2_pmu->hrtimer_callback) {
+ hrtimer_init(&tx2_pmu->hrtimer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ tx2_pmu->hrtimer.function = tx2_pmu->hrtimer_callback;
+ }
ret = tx2_uncore_pmu_register(tx2_pmu);
if (ret) {
@@ -653,10 +831,13 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
switch (tx2_pmu->type) {
case PMU_TYPE_L3C:
- tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS;
+ tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS;
+ tx2_pmu->counters_mask = 0x3;
tx2_pmu->prorate_factor = TX2_PMU_L3_TILES;
tx2_pmu->max_events = L3_EVENT_MAX;
+ tx2_pmu->events_mask = 0x1f;
tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL;
+ tx2_pmu->hrtimer_callback = tx2_hrtimer_callback;
tx2_pmu->attr_groups = l3c_pmu_attr_groups;
tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
"uncore_l3c_%d", tx2_pmu->node);
@@ -665,10 +846,13 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
tx2_pmu->stop_event = uncore_stop_event_l3c;
break;
case PMU_TYPE_DMC:
- tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS;
+ tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS;
+ tx2_pmu->counters_mask = 0x3;
tx2_pmu->prorate_factor = TX2_PMU_DMC_CHANNELS;
tx2_pmu->max_events = DMC_EVENT_MAX;
+ tx2_pmu->events_mask = 0x1f;
tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL;
+ tx2_pmu->hrtimer_callback = tx2_hrtimer_callback;
tx2_pmu->attr_groups = dmc_pmu_attr_groups;
tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
"uncore_dmc_%d", tx2_pmu->node);
@@ -676,6 +860,21 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev,
tx2_pmu->start_event = uncore_start_event_dmc;
tx2_pmu->stop_event = uncore_stop_event_dmc;
break;
+ case PMU_TYPE_CCPI2:
+ /* CCPI2 has 8 counters */
+ tx2_pmu->max_counters = TX2_PMU_CCPI2_MAX_COUNTERS;
+ tx2_pmu->counters_mask = 0x7;
+ tx2_pmu->prorate_factor = 1;
+ tx2_pmu->max_events = CCPI2_EVENT_MAX;
+ tx2_pmu->events_mask = 0x1ff;
+ tx2_pmu->attr_groups = ccpi2_pmu_attr_groups;
+ tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL,
+ "uncore_ccpi2_%d", tx2_pmu->node);
+ tx2_pmu->init_cntr_base = init_cntr_base_ccpi2;
+ tx2_pmu->start_event = uncore_start_event_ccpi2;
+ tx2_pmu->stop_event = uncore_stop_event_ccpi2;
+ tx2_pmu->hrtimer_callback = NULL;
+ break;
case PMU_TYPE_INVALID:
devm_kfree(dev, tx2_pmu);
return NULL;
@@ -744,7 +943,9 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu,
if (cpu != tx2_pmu->cpu)
return 0;
- hrtimer_cancel(&tx2_pmu->hrtimer);
+ if (tx2_pmu->hrtimer_callback)
+ hrtimer_cancel(&tx2_pmu->hrtimer);
+
cpumask_copy(&cpu_online_mask_temp, cpu_online_mask);
cpumask_clear_cpu(cpu, &cpu_online_mask_temp);
new_cpu = cpumask_any_and(
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index 7e328d6385c3..46ee6807d533 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1282,25 +1282,21 @@ static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
void __iomem *csw_csr, *mcba_csr, *mcbb_csr;
- struct resource *res;
unsigned int reg;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- csw_csr = devm_ioremap_resource(&pdev->dev, res);
+ csw_csr = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(csw_csr)) {
dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
return PTR_ERR(csw_csr);
}
- res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
- mcba_csr = devm_ioremap_resource(&pdev->dev, res);
+ mcba_csr = devm_platform_ioremap_resource(pdev, 2);
if (IS_ERR(mcba_csr)) {
dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n");
return PTR_ERR(mcba_csr);
}
- res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
- mcbb_csr = devm_ioremap_resource(&pdev->dev, res);
+ mcbb_csr = devm_platform_ioremap_resource(pdev, 3);
if (IS_ERR(mcbb_csr)) {
dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n");
return PTR_ERR(mcbb_csr);
@@ -1332,13 +1328,11 @@ static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu,
struct platform_device *pdev)
{
void __iomem *csw_csr;
- struct resource *res;
unsigned int reg;
u32 mcb0routing;
u32 mcb1routing;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- csw_csr = devm_ioremap_resource(&pdev->dev, res);
+ csw_csr = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(csw_csr)) {
dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n");
return PTR_ERR(csw_csr);
diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
index 648ddb7f038a..c6800d220920 100644
--- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
+++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
@@ -87,7 +87,7 @@ FUNC_GROUP_DECL(MACLINK3, L23);
#define K25 7
SIG_EXPR_LIST_DECL_SESG(K25, MACLINK4, MACLINK4, SIG_DESC_SET(SCU410, 7));
-SIG_EXPR_LIST_DECL_SESG(K25, SDA14, SDA14, SIG_DESC_SET(SCU4B0, 7));
+SIG_EXPR_LIST_DECL_SESG(K25, SDA14, I2C14, SIG_DESC_SET(SCU4B0, 7));
PIN_DECL_2(K25, GPIOA7, MACLINK4, SDA14);
FUNC_GROUP_DECL(MACLINK4, K25);
@@ -1262,13 +1262,13 @@ GROUP_DECL(SPI1, AB11, AC11, AA11);
#define AD11 206
SIG_EXPR_LIST_DECL_SEMG(AD11, SPI1DQ2, QSPI1, SPI1, SIG_DESC_SET(SCU438, 14));
SIG_EXPR_LIST_DECL_SEMG(AD11, TXD13, UART13G1, UART13,
- SIG_DESC_SET(SCU438, 14));
+ SIG_DESC_CLEAR(SCU4B8, 2), SIG_DESC_SET(SCU4D8, 14));
PIN_DECL_2(AD11, GPIOZ6, SPI1DQ2, TXD13);
#define AF10 207
SIG_EXPR_LIST_DECL_SEMG(AF10, SPI1DQ3, QSPI1, SPI1, SIG_DESC_SET(SCU438, 15));
SIG_EXPR_LIST_DECL_SEMG(AF10, RXD13, UART13G1, UART13,
- SIG_DESC_SET(SCU438, 15));
+ SIG_DESC_CLEAR(SCU4B8, 3), SIG_DESC_SET(SCU4D8, 15));
PIN_DECL_2(AF10, GPIOZ7, SPI1DQ3, RXD13);
GROUP_DECL(QSPI1, AB11, AC11, AA11, AD11, AF10);
@@ -1440,91 +1440,85 @@ FUNC_GROUP_DECL(RGMII2, D4, C2, C1, D3, E4, F5, D2, E3, D1, F4, E2, E1);
FUNC_GROUP_DECL(RMII2, D4, C2, C1, D3, D2, D1, F4, E2, E1);
#define AB4 232
-SIG_EXPR_LIST_DECL_SESG(AB4, SD3CLK, SD3, SIG_DESC_SET(SCU400, 24));
-PIN_DECL_1(AB4, GPIO18D0, SD3CLK);
+SIG_EXPR_LIST_DECL_SEMG(AB4, EMMCCLK, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 24));
+PIN_DECL_1(AB4, GPIO18D0, EMMCCLK);
#define AA4 233
-SIG_EXPR_LIST_DECL_SESG(AA4, SD3CMD, SD3, SIG_DESC_SET(SCU400, 25));
-PIN_DECL_1(AA4, GPIO18D1, SD3CMD);
+SIG_EXPR_LIST_DECL_SEMG(AA4, EMMCCMD, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 25));
+PIN_DECL_1(AA4, GPIO18D1, EMMCCMD);
#define AC4 234
-SIG_EXPR_LIST_DECL_SESG(AC4, SD3DAT0, SD3, SIG_DESC_SET(SCU400, 26));
-PIN_DECL_1(AC4, GPIO18D2, SD3DAT0);
+SIG_EXPR_LIST_DECL_SEMG(AC4, EMMCDAT0, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 26));
+PIN_DECL_1(AC4, GPIO18D2, EMMCDAT0);
#define AA5 235
-SIG_EXPR_LIST_DECL_SESG(AA5, SD3DAT1, SD3, SIG_DESC_SET(SCU400, 27));
-PIN_DECL_1(AA5, GPIO18D3, SD3DAT1);
+SIG_EXPR_LIST_DECL_SEMG(AA5, EMMCDAT1, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 27));
+PIN_DECL_1(AA5, GPIO18D3, EMMCDAT1);
#define Y5 236
-SIG_EXPR_LIST_DECL_SESG(Y5, SD3DAT2, SD3, SIG_DESC_SET(SCU400, 28));
-PIN_DECL_1(Y5, GPIO18D4, SD3DAT2);
+SIG_EXPR_LIST_DECL_SEMG(Y5, EMMCDAT2, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 28));
+PIN_DECL_1(Y5, GPIO18D4, EMMCDAT2);
#define AB5 237
-SIG_EXPR_LIST_DECL_SESG(AB5, SD3DAT3, SD3, SIG_DESC_SET(SCU400, 29));
-PIN_DECL_1(AB5, GPIO18D5, SD3DAT3);
+SIG_EXPR_LIST_DECL_SEMG(AB5, EMMCDAT3, EMMCG4, EMMC, SIG_DESC_SET(SCU400, 29));
+PIN_DECL_1(AB5, GPIO18D5, EMMCDAT3);
#define AB6 238
-SIG_EXPR_LIST_DECL_SESG(AB6, SD3CD, SD3, SIG_DESC_SET(SCU400, 30));
-PIN_DECL_1(AB6, GPIO18D6, SD3CD);
+SIG_EXPR_LIST_DECL_SEMG(AB6, EMMCCD, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 30));
+PIN_DECL_1(AB6, GPIO18D6, EMMCCD);
#define AC5 239
-SIG_EXPR_LIST_DECL_SESG(AC5, SD3WP, SD3, SIG_DESC_SET(SCU400, 31));
-PIN_DECL_1(AC5, GPIO18D7, SD3WP);
+SIG_EXPR_LIST_DECL_SEMG(AC5, EMMCWP, EMMCG1, EMMC, SIG_DESC_SET(SCU400, 31));
+PIN_DECL_1(AC5, GPIO18D7, EMMCWP);
-FUNC_GROUP_DECL(SD3, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5);
+GROUP_DECL(EMMCG1, AB4, AA4, AC4, AB6, AC5);
+GROUP_DECL(EMMCG4, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5);
#define Y1 240
SIG_EXPR_LIST_DECL_SEMG(Y1, FWSPIDCS, FWSPID, FWSPID, SIG_DESC_SET(SCU500, 3));
SIG_EXPR_LIST_DECL_SESG(Y1, VBCS, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y1, SD3DAT4, SD3DAT4, SIG_DESC_SET(SCU404, 0));
-PIN_DECL_3(Y1, GPIO18E0, FWSPIDCS, VBCS, SD3DAT4);
-FUNC_GROUP_DECL(SD3DAT4, Y1);
+SIG_EXPR_LIST_DECL_SEMG(Y1, EMMCDAT4, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 0));
+PIN_DECL_3(Y1, GPIO18E0, FWSPIDCS, VBCS, EMMCDAT4);
#define Y2 241
SIG_EXPR_LIST_DECL_SEMG(Y2, FWSPIDCK, FWSPID, FWSPID, SIG_DESC_SET(SCU500, 3));
SIG_EXPR_LIST_DECL_SESG(Y2, VBCK, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y2, SD3DAT5, SD3DAT5, SIG_DESC_SET(SCU404, 1));
-PIN_DECL_3(Y2, GPIO18E1, FWSPIDCK, VBCK, SD3DAT5);
-FUNC_GROUP_DECL(SD3DAT5, Y2);
+SIG_EXPR_LIST_DECL_SEMG(Y2, EMMCDAT5, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 1));
+PIN_DECL_3(Y2, GPIO18E1, FWSPIDCK, VBCK, EMMCDAT5);
#define Y3 242
SIG_EXPR_LIST_DECL_SEMG(Y3, FWSPIDMOSI, FWSPID, FWSPID,
SIG_DESC_SET(SCU500, 3));
SIG_EXPR_LIST_DECL_SESG(Y3, VBMOSI, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y3, SD3DAT6, SD3DAT6, SIG_DESC_SET(SCU404, 2));
-PIN_DECL_3(Y3, GPIO18E2, FWSPIDMOSI, VBMOSI, SD3DAT6);
-FUNC_GROUP_DECL(SD3DAT6, Y3);
+SIG_EXPR_LIST_DECL_SEMG(Y3, EMMCDAT6, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 2));
+PIN_DECL_3(Y3, GPIO18E2, FWSPIDMOSI, VBMOSI, EMMCDAT6);
#define Y4 243
SIG_EXPR_LIST_DECL_SEMG(Y4, FWSPIDMISO, FWSPID, FWSPID,
SIG_DESC_SET(SCU500, 3));
SIG_EXPR_LIST_DECL_SESG(Y4, VBMISO, VB, SIG_DESC_SET(SCU500, 5));
-SIG_EXPR_LIST_DECL_SESG(Y4, SD3DAT7, SD3DAT7, SIG_DESC_SET(SCU404, 3));
-PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, SD3DAT7);
-FUNC_GROUP_DECL(SD3DAT7, Y4);
+SIG_EXPR_LIST_DECL_SEMG(Y4, EMMCDAT7, EMMCG8, EMMC, SIG_DESC_SET(SCU404, 3));
+PIN_DECL_3(Y4, GPIO18E3, FWSPIDMISO, VBMISO, EMMCDAT7);
GROUP_DECL(FWSPID, Y1, Y2, Y3, Y4);
GROUP_DECL(FWQSPID, Y1, Y2, Y3, Y4, AE12, AF12);
+GROUP_DECL(EMMCG8, AB4, AA4, AC4, AA5, Y5, AB5, AB6, AC5, Y1, Y2, Y3, Y4);
FUNC_DECL_2(FWSPID, FWSPID, FWQSPID);
FUNC_GROUP_DECL(VB, Y1, Y2, Y3, Y4);
-
+FUNC_DECL_3(EMMC, EMMCG1, EMMCG4, EMMCG8);
/*
* FIXME: Confirm bits and priorities are the right way around for the
* following 4 pins
*/
#define AF25 244
-SIG_EXPR_LIST_DECL_SEMG(AF25, I3C3SCL, I3C3, I3C3, SIG_DESC_SET(SCU438, 20),
- SIG_DESC_SET(SCU4D8, 20));
-SIG_EXPR_LIST_DECL_SESG(AF25, FSI1CLK, FSI1, SIG_DESC_CLEAR(SCU438, 20),
- SIG_DESC_SET(SCU4D8, 20));
+SIG_EXPR_LIST_DECL_SEMG(AF25, I3C3SCL, I3C3, I3C3, SIG_DESC_SET(SCU438, 20));
+SIG_EXPR_LIST_DECL_SESG(AF25, FSI1CLK, FSI1, SIG_DESC_SET(SCU4D8, 20));
PIN_DECL_(AF25, SIG_EXPR_LIST_PTR(AF25, I3C3SCL),
SIG_EXPR_LIST_PTR(AF25, FSI1CLK));
#define AE26 245
-SIG_EXPR_LIST_DECL_SEMG(AE26, I3C3SDA, I3C3, I3C3, SIG_DESC_SET(SCU438, 21),
- SIG_DESC_SET(SCU4D8, 21));
-SIG_EXPR_LIST_DECL_SESG(AE26, FSI1DATA, FSI1, SIG_DESC_CLEAR(SCU438, 21),
- SIG_DESC_SET(SCU4D8, 21));
+SIG_EXPR_LIST_DECL_SEMG(AE26, I3C3SDA, I3C3, I3C3, SIG_DESC_SET(SCU438, 21));
+SIG_EXPR_LIST_DECL_SESG(AE26, FSI1DATA, FSI1, SIG_DESC_SET(SCU4D8, 21));
PIN_DECL_(AE26, SIG_EXPR_LIST_PTR(AE26, I3C3SDA),
SIG_EXPR_LIST_PTR(AE26, FSI1DATA));
@@ -1533,18 +1527,14 @@ FUNC_DECL_2(I3C3, HVI3C3, I3C3);
FUNC_GROUP_DECL(FSI1, AF25, AE26);
#define AE25 246
-SIG_EXPR_LIST_DECL_SEMG(AE25, I3C4SCL, I3C4, I3C4, SIG_DESC_SET(SCU438, 22),
- SIG_DESC_SET(SCU4D8, 22));
-SIG_EXPR_LIST_DECL_SESG(AE25, FSI2CLK, FSI2, SIG_DESC_CLEAR(SCU438, 22),
- SIG_DESC_SET(SCU4D8, 22));
+SIG_EXPR_LIST_DECL_SEMG(AE25, I3C4SCL, I3C4, I3C4, SIG_DESC_SET(SCU438, 22));
+SIG_EXPR_LIST_DECL_SESG(AE25, FSI2CLK, FSI2, SIG_DESC_SET(SCU4D8, 22));
PIN_DECL_(AE25, SIG_EXPR_LIST_PTR(AE25, I3C4SCL),
SIG_EXPR_LIST_PTR(AE25, FSI2CLK));
#define AF24 247
-SIG_EXPR_LIST_DECL_SEMG(AF24, I3C4SDA, I3C4, I3C4, SIG_DESC_SET(SCU438, 23),
- SIG_DESC_SET(SCU4D8, 23));
-SIG_EXPR_LIST_DECL_SESG(AF24, FSI2DATA, FSI2, SIG_DESC_CLEAR(SCU438, 23),
- SIG_DESC_SET(SCU4D8, 23));
+SIG_EXPR_LIST_DECL_SEMG(AF24, I3C4SDA, I3C4, I3C4, SIG_DESC_SET(SCU438, 23));
+SIG_EXPR_LIST_DECL_SESG(AF24, FSI2DATA, FSI2, SIG_DESC_SET(SCU4D8, 23));
PIN_DECL_(AF24, SIG_EXPR_LIST_PTR(AF24, I3C4SDA),
SIG_EXPR_LIST_PTR(AF24, FSI2DATA));
@@ -1574,6 +1564,8 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
ASPEED_PINCTRL_PIN(A3),
ASPEED_PINCTRL_PIN(AA11),
ASPEED_PINCTRL_PIN(AA12),
+ ASPEED_PINCTRL_PIN(AA16),
+ ASPEED_PINCTRL_PIN(AA17),
ASPEED_PINCTRL_PIN(AA23),
ASPEED_PINCTRL_PIN(AA24),
ASPEED_PINCTRL_PIN(AA25),
@@ -1585,6 +1577,8 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
ASPEED_PINCTRL_PIN(AB11),
ASPEED_PINCTRL_PIN(AB12),
ASPEED_PINCTRL_PIN(AB15),
+ ASPEED_PINCTRL_PIN(AB16),
+ ASPEED_PINCTRL_PIN(AB17),
ASPEED_PINCTRL_PIN(AB18),
ASPEED_PINCTRL_PIN(AB19),
ASPEED_PINCTRL_PIN(AB22),
@@ -1602,6 +1596,7 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
ASPEED_PINCTRL_PIN(AC11),
ASPEED_PINCTRL_PIN(AC12),
ASPEED_PINCTRL_PIN(AC15),
+ ASPEED_PINCTRL_PIN(AC16),
ASPEED_PINCTRL_PIN(AC17),
ASPEED_PINCTRL_PIN(AC18),
ASPEED_PINCTRL_PIN(AC19),
@@ -1619,6 +1614,7 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
ASPEED_PINCTRL_PIN(AD12),
ASPEED_PINCTRL_PIN(AD14),
ASPEED_PINCTRL_PIN(AD15),
+ ASPEED_PINCTRL_PIN(AD16),
ASPEED_PINCTRL_PIN(AD19),
ASPEED_PINCTRL_PIN(AD20),
ASPEED_PINCTRL_PIN(AD22),
@@ -1634,8 +1630,11 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
ASPEED_PINCTRL_PIN(AE12),
ASPEED_PINCTRL_PIN(AE14),
ASPEED_PINCTRL_PIN(AE15),
+ ASPEED_PINCTRL_PIN(AE16),
ASPEED_PINCTRL_PIN(AE18),
ASPEED_PINCTRL_PIN(AE19),
+ ASPEED_PINCTRL_PIN(AE25),
+ ASPEED_PINCTRL_PIN(AE26),
ASPEED_PINCTRL_PIN(AE7),
ASPEED_PINCTRL_PIN(AE8),
ASPEED_PINCTRL_PIN(AF10),
@@ -1643,6 +1642,8 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
ASPEED_PINCTRL_PIN(AF12),
ASPEED_PINCTRL_PIN(AF14),
ASPEED_PINCTRL_PIN(AF15),
+ ASPEED_PINCTRL_PIN(AF24),
+ ASPEED_PINCTRL_PIN(AF25),
ASPEED_PINCTRL_PIN(AF7),
ASPEED_PINCTRL_PIN(AF8),
ASPEED_PINCTRL_PIN(AF9),
@@ -1792,17 +1793,6 @@ static struct pinctrl_pin_desc aspeed_g6_pins[ASPEED_G6_NR_PINS] = {
ASPEED_PINCTRL_PIN(Y3),
ASPEED_PINCTRL_PIN(Y4),
ASPEED_PINCTRL_PIN(Y5),
- ASPEED_PINCTRL_PIN(AB16),
- ASPEED_PINCTRL_PIN(AA17),
- ASPEED_PINCTRL_PIN(AB17),
- ASPEED_PINCTRL_PIN(AE16),
- ASPEED_PINCTRL_PIN(AC16),
- ASPEED_PINCTRL_PIN(AA16),
- ASPEED_PINCTRL_PIN(AD16),
- ASPEED_PINCTRL_PIN(AF25),
- ASPEED_PINCTRL_PIN(AE26),
- ASPEED_PINCTRL_PIN(AE25),
- ASPEED_PINCTRL_PIN(AF24),
};
static const struct aspeed_pin_group aspeed_g6_groups[] = {
@@ -1976,11 +1966,9 @@ static const struct aspeed_pin_group aspeed_g6_groups[] = {
ASPEED_PINCTRL_GROUP(SALT9G1),
ASPEED_PINCTRL_GROUP(SD1),
ASPEED_PINCTRL_GROUP(SD2),
- ASPEED_PINCTRL_GROUP(SD3),
- ASPEED_PINCTRL_GROUP(SD3DAT4),
- ASPEED_PINCTRL_GROUP(SD3DAT5),
- ASPEED_PINCTRL_GROUP(SD3DAT6),
- ASPEED_PINCTRL_GROUP(SD3DAT7),
+ ASPEED_PINCTRL_GROUP(EMMCG1),
+ ASPEED_PINCTRL_GROUP(EMMCG4),
+ ASPEED_PINCTRL_GROUP(EMMCG8),
ASPEED_PINCTRL_GROUP(SGPM1),
ASPEED_PINCTRL_GROUP(SGPS1),
ASPEED_PINCTRL_GROUP(SIOONCTRL),
@@ -2059,6 +2047,7 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = {
ASPEED_PINCTRL_FUNC(ADC8),
ASPEED_PINCTRL_FUNC(ADC9),
ASPEED_PINCTRL_FUNC(BMCINT),
+ ASPEED_PINCTRL_FUNC(EMMC),
ASPEED_PINCTRL_FUNC(ESPI),
ASPEED_PINCTRL_FUNC(ESPIALT),
ASPEED_PINCTRL_FUNC(FSI1),
@@ -2191,11 +2180,6 @@ static const struct aspeed_pin_function aspeed_g6_functions[] = {
ASPEED_PINCTRL_FUNC(SALT9),
ASPEED_PINCTRL_FUNC(SD1),
ASPEED_PINCTRL_FUNC(SD2),
- ASPEED_PINCTRL_FUNC(SD3),
- ASPEED_PINCTRL_FUNC(SD3DAT4),
- ASPEED_PINCTRL_FUNC(SD3DAT5),
- ASPEED_PINCTRL_FUNC(SD3DAT6),
- ASPEED_PINCTRL_FUNC(SD3DAT7),
ASPEED_PINCTRL_FUNC(SGPM1),
ASPEED_PINCTRL_FUNC(SGPS1),
ASPEED_PINCTRL_FUNC(SIOONCTRL),
diff --git a/drivers/pinctrl/aspeed/pinmux-aspeed.h b/drivers/pinctrl/aspeed/pinmux-aspeed.h
index a2c0d52e4f7b..140c5ce9fbc1 100644
--- a/drivers/pinctrl/aspeed/pinmux-aspeed.h
+++ b/drivers/pinctrl/aspeed/pinmux-aspeed.h
@@ -508,7 +508,7 @@ struct aspeed_pin_desc {
* @idx: The bit index in the register
*/
#define SIG_DESC_SET(reg, idx) SIG_DESC_IP_BIT(ASPEED_IP_SCU, reg, idx, 1)
-#define SIG_DESC_CLEAR(reg, idx) SIG_DESC_IP_BIT(ASPEED_IP_SCU, reg, idx, 0)
+#define SIG_DESC_CLEAR(reg, idx) { ASPEED_IP_SCU, reg, BIT_MASK(idx), 0, 0 }
#define SIG_DESC_LIST_SYM(sig, group) sig_descs_ ## sig ## _ ## group
#define SIG_DESC_LIST_DECL(sig, group, ...) \
@@ -738,6 +738,7 @@ struct aspeed_pin_desc {
static const char *FUNC_SYM(func)[] = { __VA_ARGS__ }
#define FUNC_DECL_2(func, one, two) FUNC_DECL_(func, #one, #two)
+#define FUNC_DECL_3(func, one, two, three) FUNC_DECL_(func, #one, #two, #three)
#define FUNC_GROUP_DECL(func, ...) \
GROUP_DECL(func, __VA_ARGS__); \
diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
index 6f7d3a2f2e97..42f7ab383ad9 100644
--- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
+++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c
@@ -1,14 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2014-2017 Broadcom
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
/*
@@ -853,7 +845,7 @@ static int iproc_gpio_probe(struct platform_device *pdev)
/* optional GPIO interrupt support */
irq = platform_get_irq(pdev, 0);
- if (irq) {
+ if (irq > 0) {
struct irq_chip *irqc;
struct gpio_irq_chip *girq;
diff --git a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c
index 2bf6af7df7d9..9fabc451550e 100644
--- a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c
+++ b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c
@@ -640,8 +640,8 @@ static int ns2_pinmux_enable(struct pinctrl_dev *pctrl_dev,
const struct ns2_pin_function *func;
const struct ns2_pin_group *grp;
- if (grp_select > pinctrl->num_groups ||
- func_select > pinctrl->num_functions)
+ if (grp_select >= pinctrl->num_groups ||
+ func_select >= pinctrl->num_functions)
return -EINVAL;
func = &pinctrl->functions[func_select];
diff --git a/drivers/pinctrl/berlin/pinctrl-as370.c b/drivers/pinctrl/berlin/pinctrl-as370.c
index 44f8ccdbeeff..9dfdc275ee33 100644
--- a/drivers/pinctrl/berlin/pinctrl-as370.c
+++ b/drivers/pinctrl/berlin/pinctrl-as370.c
@@ -43,7 +43,7 @@ static const struct berlin_desc_group as370_soc_pinctrl_groups[] = {
BERLIN_PINCTRL_FUNCTION(0x0, "gpio"), /* GPIO5 */
BERLIN_PINCTRL_FUNCTION(0x1, "i2s1"), /* DO3 */
BERLIN_PINCTRL_FUNCTION(0x2, "pwm"), /* PWM5 */
- BERLIN_PINCTRL_FUNCTION(0x3, "spififib"), /* SPDIFIB */
+ BERLIN_PINCTRL_FUNCTION(0x3, "spdifib"), /* SPDIFIB */
BERLIN_PINCTRL_FUNCTION(0x4, "spdifo"), /* SPDIFO */
BERLIN_PINCTRL_FUNCTION(0x5, "phy")), /* DBG5 */
BERLIN_PINCTRL_GROUP("I2S1_MCLK", 0x0, 0x3, 0x12,
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index aae51c507f59..2c419fa5d1c1 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -147,6 +147,7 @@ struct chv_pin_context {
* @pctldesc: Pin controller description
* @pctldev: Pointer to the pin controller device
* @chip: GPIO chip in this pin controller
+ * @irqchip: IRQ chip in this pin controller
* @regs: MMIO registers
* @intr_lines: Stores mapping between 16 HW interrupt wires and GPIO
* offset (in GPIO number space)
@@ -162,6 +163,7 @@ struct chv_pinctrl {
struct pinctrl_desc pctldesc;
struct pinctrl_dev *pctldev;
struct gpio_chip chip;
+ struct irq_chip irqchip;
void __iomem *regs;
unsigned intr_lines[16];
const struct chv_community *community;
@@ -1466,16 +1468,6 @@ static int chv_gpio_irq_type(struct irq_data *d, unsigned int type)
return 0;
}
-static struct irq_chip chv_gpio_irqchip = {
- .name = "chv-gpio",
- .irq_startup = chv_gpio_irq_startup,
- .irq_ack = chv_gpio_irq_ack,
- .irq_mask = chv_gpio_irq_mask,
- .irq_unmask = chv_gpio_irq_unmask,
- .irq_set_type = chv_gpio_irq_type,
- .flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
static void chv_gpio_irq_handler(struct irq_desc *desc)
{
struct gpio_chip *gc = irq_desc_get_handler_data(desc);
@@ -1513,7 +1505,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
DMI_MATCH(DMI_PRODUCT_FAMILY, "Intel_Strago"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
},
},
{
@@ -1521,7 +1512,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "HP"),
DMI_MATCH(DMI_PRODUCT_NAME, "Setzer"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
},
},
{
@@ -1529,7 +1519,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
DMI_MATCH(DMI_PRODUCT_NAME, "Cyan"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
},
},
{
@@ -1537,7 +1526,6 @@ static const struct dmi_system_id chv_no_valid_mask[] = {
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
DMI_MATCH(DMI_PRODUCT_NAME, "Celes"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "1.0"),
},
},
{}
@@ -1563,7 +1551,7 @@ static void chv_init_irq_valid_mask(struct gpio_chip *chip,
intsel >>= CHV_PADCTRL0_INTSEL_SHIFT;
if (intsel >= community->nirqs)
- clear_bit(i, valid_mask);
+ clear_bit(desc->number, valid_mask);
}
}
@@ -1629,7 +1617,15 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
}
}
- ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0,
+ pctrl->irqchip.name = "chv-gpio";
+ pctrl->irqchip.irq_startup = chv_gpio_irq_startup;
+ pctrl->irqchip.irq_ack = chv_gpio_irq_ack;
+ pctrl->irqchip.irq_mask = chv_gpio_irq_mask;
+ pctrl->irqchip.irq_unmask = chv_gpio_irq_unmask;
+ pctrl->irqchip.irq_set_type = chv_gpio_irq_type;
+ pctrl->irqchip.flags = IRQCHIP_SKIP_SET_WAKE;
+
+ ret = gpiochip_irqchip_add(chip, &pctrl->irqchip, 0,
handle_bad_irq, IRQ_TYPE_NONE);
if (ret) {
dev_err(pctrl->dev, "failed to add IRQ chip\n");
@@ -1646,7 +1642,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
}
}
- gpiochip_set_chained_irqchip(chip, &chv_gpio_irqchip, irq,
+ gpiochip_set_chained_irqchip(chip, &pctrl->irqchip, irq,
chv_gpio_irq_handler);
return 0;
}
diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c
index 1f13bcd0e4e1..83981ad66a71 100644
--- a/drivers/pinctrl/intel/pinctrl-intel.c
+++ b/drivers/pinctrl/intel/pinctrl-intel.c
@@ -52,6 +52,7 @@
#define PADCFG0_GPIROUTNMI BIT(17)
#define PADCFG0_PMODE_SHIFT 10
#define PADCFG0_PMODE_MASK GENMASK(13, 10)
+#define PADCFG0_PMODE_GPIO 0
#define PADCFG0_GPIORXDIS BIT(9)
#define PADCFG0_GPIOTXDIS BIT(8)
#define PADCFG0_GPIORXSTATE BIT(1)
@@ -96,6 +97,7 @@ struct intel_pinctrl_context {
* @pctldesc: Pin controller description
* @pctldev: Pointer to the pin controller device
* @chip: GPIO chip in this pin controller
+ * @irqchip: IRQ chip in this pin controller
* @soc: SoC/PCH specific pin configuration data
* @communities: All communities in this pin controller
* @ncommunities: Number of communities in this pin controller
@@ -108,6 +110,7 @@ struct intel_pinctrl {
struct pinctrl_desc pctldesc;
struct pinctrl_dev *pctldev;
struct gpio_chip chip;
+ struct irq_chip irqchip;
const struct intel_pinctrl_soc_data *soc;
struct intel_community *communities;
size_t ncommunities;
@@ -330,7 +333,7 @@ static void intel_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file *s,
cfg1 = readl(intel_get_padcfg(pctrl, pin, PADCFG1));
mode = (cfg0 & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT;
- if (!mode)
+ if (mode == PADCFG0_PMODE_GPIO)
seq_puts(s, "GPIO ");
else
seq_printf(s, "mode %d ", mode);
@@ -456,6 +459,11 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input)
writel(value, padcfg0);
}
+static int intel_gpio_get_gpio_mode(void __iomem *padcfg0)
+{
+ return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT;
+}
+
static void intel_gpio_set_gpio_mode(void __iomem *padcfg0)
{
u32 value;
@@ -489,7 +497,20 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev,
}
padcfg0 = intel_get_padcfg(pctrl, pin, PADCFG0);
+
+ /*
+ * If pin is already configured in GPIO mode, we assume that
+ * firmware provides correct settings. In such case we avoid
+ * potential glitches on the pin. Otherwise, for the pin in
+ * alternative mode, consumer has to supply respective flags.
+ */
+ if (intel_gpio_get_gpio_mode(padcfg0) == PADCFG0_PMODE_GPIO) {
+ raw_spin_unlock_irqrestore(&pctrl->lock, flags);
+ return 0;
+ }
+
intel_gpio_set_gpio_mode(padcfg0);
+
/* Disable TX buffer and enable RX (this will be input) */
__intel_gpio_set_direction(padcfg0, true);
@@ -1139,16 +1160,6 @@ static irqreturn_t intel_gpio_irq(int irq, void *data)
return ret;
}
-static struct irq_chip intel_gpio_irqchip = {
- .name = "intel-gpio",
- .irq_ack = intel_gpio_irq_ack,
- .irq_mask = intel_gpio_irq_mask,
- .irq_unmask = intel_gpio_irq_unmask,
- .irq_set_type = intel_gpio_irq_type,
- .irq_set_wake = intel_gpio_irq_wake,
- .flags = IRQCHIP_MASK_ON_SUSPEND,
-};
-
static int intel_gpio_add_pin_ranges(struct intel_pinctrl *pctrl,
const struct intel_community *community)
{
@@ -1198,12 +1209,22 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq)
pctrl->chip = intel_gpio_chip;
+ /* Setup GPIO chip */
pctrl->chip.ngpio = intel_gpio_ngpio(pctrl);
pctrl->chip.label = dev_name(pctrl->dev);
pctrl->chip.parent = pctrl->dev;
pctrl->chip.base = -1;
pctrl->irq = irq;
+ /* Setup IRQ chip */
+ pctrl->irqchip.name = dev_name(pctrl->dev);
+ pctrl->irqchip.irq_ack = intel_gpio_irq_ack;
+ pctrl->irqchip.irq_mask = intel_gpio_irq_mask;
+ pctrl->irqchip.irq_unmask = intel_gpio_irq_unmask;
+ pctrl->irqchip.irq_set_type = intel_gpio_irq_type;
+ pctrl->irqchip.irq_set_wake = intel_gpio_irq_wake;
+ pctrl->irqchip.flags = IRQCHIP_MASK_ON_SUSPEND;
+
ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl);
if (ret) {
dev_err(pctrl->dev, "failed to register gpiochip\n");
@@ -1233,15 +1254,14 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq)
return ret;
}
- ret = gpiochip_irqchip_add(&pctrl->chip, &intel_gpio_irqchip, 0,
+ ret = gpiochip_irqchip_add(&pctrl->chip, &pctrl->irqchip, 0,
handle_bad_irq, IRQ_TYPE_NONE);
if (ret) {
dev_err(pctrl->dev, "failed to add irqchip\n");
return ret;
}
- gpiochip_set_chained_irqchip(&pctrl->chip, &intel_gpio_irqchip, irq,
- NULL);
+ gpiochip_set_chained_irqchip(&pctrl->chip, &pctrl->irqchip, irq, NULL);
return 0;
}
diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
index 6462d3ca7ceb..f2f5fcd9a237 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c
@@ -183,10 +183,10 @@ static struct armada_37xx_pin_group armada_37xx_nb_groups[] = {
PIN_GRP_EXTRA("uart2", 9, 2, BIT(1) | BIT(13) | BIT(14) | BIT(19),
BIT(1) | BIT(13) | BIT(14), BIT(1) | BIT(19),
18, 2, "gpio", "uart"),
- PIN_GRP_GPIO("led0_od", 11, 1, BIT(20), "led"),
- PIN_GRP_GPIO("led1_od", 12, 1, BIT(21), "led"),
- PIN_GRP_GPIO("led2_od", 13, 1, BIT(22), "led"),
- PIN_GRP_GPIO("led3_od", 14, 1, BIT(23), "led"),
+ PIN_GRP_GPIO_2("led0_od", 11, 1, BIT(20), BIT(20), 0, "led"),
+ PIN_GRP_GPIO_2("led1_od", 12, 1, BIT(21), BIT(21), 0, "led"),
+ PIN_GRP_GPIO_2("led2_od", 13, 1, BIT(22), BIT(22), 0, "led"),
+ PIN_GRP_GPIO_2("led3_od", 14, 1, BIT(23), BIT(23), 0, "led"),
};
@@ -221,11 +221,11 @@ static const struct armada_37xx_pin_data armada_37xx_pin_sb = {
};
static inline void armada_37xx_update_reg(unsigned int *reg,
- unsigned int offset)
+ unsigned int *offset)
{
/* We never have more than 2 registers */
- if (offset >= GPIO_PER_REG) {
- offset -= GPIO_PER_REG;
+ if (*offset >= GPIO_PER_REG) {
+ *offset -= GPIO_PER_REG;
*reg += sizeof(u32);
}
}
@@ -376,7 +376,7 @@ static inline void armada_37xx_irq_update_reg(unsigned int *reg,
{
int offset = irqd_to_hwirq(d);
- armada_37xx_update_reg(reg, offset);
+ armada_37xx_update_reg(reg, &offset);
}
static int armada_37xx_gpio_direction_input(struct gpio_chip *chip,
@@ -386,7 +386,7 @@ static int armada_37xx_gpio_direction_input(struct gpio_chip *chip,
unsigned int reg = OUTPUT_EN;
unsigned int mask;
- armada_37xx_update_reg(&reg, offset);
+ armada_37xx_update_reg(&reg, &offset);
mask = BIT(offset);
return regmap_update_bits(info->regmap, reg, mask, 0);
@@ -399,7 +399,7 @@ static int armada_37xx_gpio_get_direction(struct gpio_chip *chip,
unsigned int reg = OUTPUT_EN;
unsigned int val, mask;
- armada_37xx_update_reg(&reg, offset);
+ armada_37xx_update_reg(&reg, &offset);
mask = BIT(offset);
regmap_read(info->regmap, reg, &val);
@@ -413,7 +413,7 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip,
unsigned int reg = OUTPUT_EN;
unsigned int mask, val, ret;
- armada_37xx_update_reg(&reg, offset);
+ armada_37xx_update_reg(&reg, &offset);
mask = BIT(offset);
ret = regmap_update_bits(info->regmap, reg, mask, mask);
@@ -434,7 +434,7 @@ static int armada_37xx_gpio_get(struct gpio_chip *chip, unsigned int offset)
unsigned int reg = INPUT_VAL;
unsigned int val, mask;
- armada_37xx_update_reg(&reg, offset);
+ armada_37xx_update_reg(&reg, &offset);
mask = BIT(offset);
regmap_read(info->regmap, reg, &val);
@@ -449,7 +449,7 @@ static void armada_37xx_gpio_set(struct gpio_chip *chip, unsigned int offset,
unsigned int reg = OUTPUT_VAL;
unsigned int mask, val;
- armada_37xx_update_reg(&reg, offset);
+ armada_37xx_update_reg(&reg, &offset);
mask = BIT(offset);
val = value ? mask : 0;
diff --git a/drivers/pinctrl/pinctrl-stmfx.c b/drivers/pinctrl/pinctrl-stmfx.c
index 974973777395..ccdf0bb21414 100644
--- a/drivers/pinctrl/pinctrl-stmfx.c
+++ b/drivers/pinctrl/pinctrl-stmfx.c
@@ -585,19 +585,6 @@ static int stmfx_pinctrl_gpio_function_enable(struct stmfx_pinctrl *pctl)
return stmfx_function_enable(pctl->stmfx, func);
}
-static int stmfx_pinctrl_gpio_init_valid_mask(struct gpio_chip *gc,
- unsigned long *valid_mask,
- unsigned int ngpios)
-{
- struct stmfx_pinctrl *pctl = gpiochip_get_data(gc);
- u32 n;
-
- for_each_clear_bit(n, &pctl->gpio_valid_mask, ngpios)
- clear_bit(n, valid_mask);
-
- return 0;
-}
-
static int stmfx_pinctrl_probe(struct platform_device *pdev)
{
struct stmfx *stmfx = dev_get_drvdata(pdev->dev.parent);
@@ -660,7 +647,6 @@ static int stmfx_pinctrl_probe(struct platform_device *pdev)
pctl->gpio_chip.ngpio = pctl->pctl_desc.npins;
pctl->gpio_chip.can_sleep = true;
pctl->gpio_chip.of_node = np;
- pctl->gpio_chip.init_valid_mask = stmfx_pinctrl_gpio_init_valid_mask;
ret = devm_gpiochip_add_data(pctl->dev, &pctl->gpio_chip, pctl);
if (ret) {
@@ -705,7 +691,7 @@ static int stmfx_pinctrl_probe(struct platform_device *pdev)
static int stmfx_pinctrl_remove(struct platform_device *pdev)
{
- struct stmfx *stmfx = dev_get_platdata(&pdev->dev);
+ struct stmfx *stmfx = dev_get_drvdata(pdev->dev.parent);
return stmfx_function_disable(stmfx,
STMFX_FUNC_GPIO |
diff --git a/drivers/platform/x86/classmate-laptop.c b/drivers/platform/x86/classmate-laptop.c
index 86cc2cc68fb5..af063f690846 100644
--- a/drivers/platform/x86/classmate-laptop.c
+++ b/drivers/platform/x86/classmate-laptop.c
@@ -420,12 +420,6 @@ failed_sensitivity:
static int cmpc_accel_remove_v4(struct acpi_device *acpi)
{
- struct input_dev *inputdev;
- struct cmpc_accel *accel;
-
- inputdev = dev_get_drvdata(&acpi->dev);
- accel = dev_get_drvdata(&inputdev->dev);
-
device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr_v4);
device_remove_file(&acpi->dev, &cmpc_accel_g_select_attr_v4);
return cmpc_remove_acpi_notify_device(acpi);
@@ -656,12 +650,6 @@ failed_file:
static int cmpc_accel_remove(struct acpi_device *acpi)
{
- struct input_dev *inputdev;
- struct cmpc_accel *accel;
-
- inputdev = dev_get_drvdata(&acpi->dev);
- accel = dev_get_drvdata(&inputdev->dev);
-
device_remove_file(&acpi->dev, &cmpc_accel_sensitivity_attr);
return cmpc_remove_acpi_notify_device(acpi);
}
diff --git a/drivers/platform/x86/i2c-multi-instantiate.c b/drivers/platform/x86/i2c-multi-instantiate.c
index ea68f6ed66ae..ffb8d5d1eb5f 100644
--- a/drivers/platform/x86/i2c-multi-instantiate.c
+++ b/drivers/platform/x86/i2c-multi-instantiate.c
@@ -108,6 +108,7 @@ static int i2c_multi_inst_probe(struct platform_device *pdev)
if (ret < 0) {
dev_dbg(dev, "Error requesting irq at index %d: %d\n",
inst_data[i].irq_idx, ret);
+ goto error;
}
board_info.irq = ret;
break;
diff --git a/drivers/platform/x86/intel_punit_ipc.c b/drivers/platform/x86/intel_punit_ipc.c
index ab7ae1950867..fa97834fdb78 100644
--- a/drivers/platform/x86/intel_punit_ipc.c
+++ b/drivers/platform/x86/intel_punit_ipc.c
@@ -293,9 +293,8 @@ static int intel_punit_ipc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, punit_ipcdev);
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
if (irq < 0) {
- punit_ipcdev->irq = 0;
dev_warn(&pdev->dev, "Invalid IRQ, using polling mode\n");
} else {
ret = devm_request_irq(&pdev->dev, irq, intel_punit_ioc,
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index 960961fb0d7c..0517272a268e 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -97,8 +97,8 @@ config PTP_1588_CLOCK_PCH
help
This driver adds support for using the PCH EG20T as a PTP
clock. The hardware supports time stamping of PTP packets
- when using the end-to-end delay (E2E) mechansim. The peer
- delay mechansim (P2P) is not supported.
+ when using the end-to-end delay (E2E) mechanism. The peer
+ delay mechanism (P2P) is not supported.
This clock is only useful if your PTP programs are getting
hardware time stamps on the PTP Ethernet packets using the
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 67d0199840fd..9d72ab593f13 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -149,11 +149,21 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
err = -EFAULT;
break;
}
- if (((req.extts.flags & ~PTP_EXTTS_VALID_FLAGS) ||
- req.extts.rsv[0] || req.extts.rsv[1]) &&
- cmd == PTP_EXTTS_REQUEST2) {
- err = -EINVAL;
- break;
+ if (cmd == PTP_EXTTS_REQUEST2) {
+ /* Tell the drivers to check the flags carefully. */
+ req.extts.flags |= PTP_STRICT_FLAGS;
+ /* Make sure no reserved bit is set. */
+ if ((req.extts.flags & ~PTP_EXTTS_VALID_FLAGS) ||
+ req.extts.rsv[0] || req.extts.rsv[1]) {
+ err = -EINVAL;
+ break;
+ }
+ /* Ensure one of the rising/falling edge bits is set. */
+ if ((req.extts.flags & PTP_ENABLE_FEATURE) &&
+ (req.extts.flags & PTP_EXTTS_EDGES) == 0) {
+ err = -EINVAL;
+ break;
+ }
} else if (cmd == PTP_EXTTS_REQUEST) {
req.extts.flags &= PTP_EXTTS_V1_VALID_FLAGS;
req.extts.rsv[0] = 0;
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 6ad51aa60c03..f877e77d9184 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -472,14 +472,7 @@ int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state)
if (err)
return err;
- /*
- * .apply might have to round some values in *state, if possible
- * read the actually implemented value back.
- */
- if (chip->ops->get_state)
- chip->ops->get_state(chip, pwm, &pwm->state);
- else
- pwm->state = *state;
+ pwm->state = *state;
} else {
/*
* FIXME: restore the initial state in case of error.
diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c
index 56c38cfae92c..1f829edd8ee7 100644
--- a/drivers/pwm/pwm-bcm-iproc.c
+++ b/drivers/pwm/pwm-bcm-iproc.c
@@ -187,6 +187,7 @@ static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops iproc_pwm_ops = {
.apply = iproc_pwmc_apply,
.get_state = iproc_pwmc_get_state,
+ .owner = THIS_MODULE,
};
static int iproc_pwmc_probe(struct platform_device *pdev)
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index afe94470b67f..a46be221dbdc 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -5053,6 +5053,19 @@ regulator_register(const struct regulator_desc *regulator_desc,
init_data = regulator_of_get_init_data(dev, regulator_desc, config,
&rdev->dev.of_node);
+
+ /*
+ * Sometimes not all resources are probed already so we need to take
+ * that into account. This happens most the time if the ena_gpiod comes
+ * from a gpio extender or something else.
+ */
+ if (PTR_ERR(init_data) == -EPROBE_DEFER) {
+ kfree(config);
+ kfree(rdev);
+ ret = -EPROBE_DEFER;
+ goto rinse;
+ }
+
/*
* We need to keep track of any GPIO descriptor coming from the
* device tree until we have handled it over to the core. If the
diff --git a/drivers/regulator/da9062-regulator.c b/drivers/regulator/da9062-regulator.c
index 56f3f72d7707..710e67081d53 100644
--- a/drivers/regulator/da9062-regulator.c
+++ b/drivers/regulator/da9062-regulator.c
@@ -136,7 +136,6 @@ static int da9062_buck_set_mode(struct regulator_dev *rdev, unsigned mode)
static unsigned da9062_buck_get_mode(struct regulator_dev *rdev)
{
struct da9062_regulator *regl = rdev_get_drvdata(rdev);
- struct regmap_field *field;
unsigned int val, mode = 0;
int ret;
@@ -158,18 +157,7 @@ static unsigned da9062_buck_get_mode(struct regulator_dev *rdev)
return REGULATOR_MODE_NORMAL;
}
- /* Detect current regulator state */
- ret = regmap_field_read(regl->suspend, &val);
- if (ret < 0)
- return 0;
-
- /* Read regulator mode from proper register, depending on state */
- if (val)
- field = regl->suspend_sleep;
- else
- field = regl->sleep;
-
- ret = regmap_field_read(field, &val);
+ ret = regmap_field_read(regl->sleep, &val);
if (ret < 0)
return 0;
@@ -208,21 +196,9 @@ static int da9062_ldo_set_mode(struct regulator_dev *rdev, unsigned mode)
static unsigned da9062_ldo_get_mode(struct regulator_dev *rdev)
{
struct da9062_regulator *regl = rdev_get_drvdata(rdev);
- struct regmap_field *field;
int ret, val;
- /* Detect current regulator state */
- ret = regmap_field_read(regl->suspend, &val);
- if (ret < 0)
- return 0;
-
- /* Read regulator mode from proper register, depending on state */
- if (val)
- field = regl->suspend_sleep;
- else
- field = regl->sleep;
-
- ret = regmap_field_read(field, &val);
+ ret = regmap_field_read(regl->sleep, &val);
if (ret < 0)
return 0;
@@ -408,10 +384,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
__builtin_ffs((int)DA9062AA_BUCK1_MODE_MASK) - 1,
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_BUCK1_MODE_MASK)) - 1),
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VBUCK1_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_BUCK1_CONT,
+ __builtin_ffs((int)DA9062AA_BUCK1_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VBUCK1_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_BUCK1_CONF_MASK) - 1),
},
{
.desc.id = DA9061_ID_BUCK2,
@@ -444,10 +420,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
__builtin_ffs((int)DA9062AA_BUCK3_MODE_MASK) - 1,
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_BUCK3_MODE_MASK)) - 1),
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VBUCK3_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_BUCK3_CONT,
+ __builtin_ffs((int)DA9062AA_BUCK3_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VBUCK3_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_BUCK3_CONF_MASK) - 1),
},
{
.desc.id = DA9061_ID_BUCK3,
@@ -480,10 +456,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
__builtin_ffs((int)DA9062AA_BUCK4_MODE_MASK) - 1,
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_BUCK4_MODE_MASK)) - 1),
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VBUCK4_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_BUCK4_CONT,
+ __builtin_ffs((int)DA9062AA_BUCK4_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VBUCK4_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_BUCK4_CONF_MASK) - 1),
},
{
.desc.id = DA9061_ID_LDO1,
@@ -509,10 +485,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_LDO1_SL_B_MASK)) - 1),
.suspend_vsel_reg = DA9062AA_VLDO1_B,
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VLDO1_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_LDO1_CONT,
+ __builtin_ffs((int)DA9062AA_LDO1_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VLDO1_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_LDO1_CONF_MASK) - 1),
.oc_event = REG_FIELD(DA9062AA_STATUS_D,
__builtin_ffs((int)DA9062AA_LDO1_ILIM_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -542,10 +518,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_LDO2_SL_B_MASK)) - 1),
.suspend_vsel_reg = DA9062AA_VLDO2_B,
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VLDO2_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_LDO2_CONT,
+ __builtin_ffs((int)DA9062AA_LDO2_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VLDO2_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_LDO2_CONF_MASK) - 1),
.oc_event = REG_FIELD(DA9062AA_STATUS_D,
__builtin_ffs((int)DA9062AA_LDO2_ILIM_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -575,10 +551,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_LDO3_SL_B_MASK)) - 1),
.suspend_vsel_reg = DA9062AA_VLDO3_B,
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VLDO3_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_LDO3_CONT,
+ __builtin_ffs((int)DA9062AA_LDO3_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VLDO3_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_LDO3_CONF_MASK) - 1),
.oc_event = REG_FIELD(DA9062AA_STATUS_D,
__builtin_ffs((int)DA9062AA_LDO3_ILIM_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -608,10 +584,10 @@ static const struct da9062_regulator_info local_da9061_regulator_info[] = {
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_LDO4_SL_B_MASK)) - 1),
.suspend_vsel_reg = DA9062AA_VLDO4_B,
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VLDO4_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_LDO4_CONT,
+ __builtin_ffs((int)DA9062AA_LDO4_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VLDO4_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_LDO4_CONF_MASK) - 1),
.oc_event = REG_FIELD(DA9062AA_STATUS_D,
__builtin_ffs((int)DA9062AA_LDO4_ILIM_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -652,10 +628,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
__builtin_ffs((int)DA9062AA_BUCK1_MODE_MASK) - 1,
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_BUCK1_MODE_MASK)) - 1),
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VBUCK1_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_BUCK1_CONT,
+ __builtin_ffs((int)DA9062AA_BUCK1_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VBUCK1_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_BUCK1_CONF_MASK) - 1),
},
{
.desc.id = DA9062_ID_BUCK2,
@@ -688,10 +664,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
__builtin_ffs((int)DA9062AA_BUCK2_MODE_MASK) - 1,
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_BUCK2_MODE_MASK)) - 1),
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VBUCK2_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_BUCK2_CONT,
+ __builtin_ffs((int)DA9062AA_BUCK2_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VBUCK2_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_BUCK2_CONF_MASK) - 1),
},
{
.desc.id = DA9062_ID_BUCK3,
@@ -724,10 +700,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
__builtin_ffs((int)DA9062AA_BUCK3_MODE_MASK) - 1,
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_BUCK3_MODE_MASK)) - 1),
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VBUCK3_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_BUCK3_CONT,
+ __builtin_ffs((int)DA9062AA_BUCK3_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VBUCK3_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_BUCK3_CONF_MASK) - 1),
},
{
.desc.id = DA9062_ID_BUCK4,
@@ -760,10 +736,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
__builtin_ffs((int)DA9062AA_BUCK4_MODE_MASK) - 1,
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_BUCK4_MODE_MASK)) - 1),
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VBUCK4_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_BUCK4_CONT,
+ __builtin_ffs((int)DA9062AA_BUCK4_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VBUCK4_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_BUCK4_CONF_MASK) - 1),
},
{
.desc.id = DA9062_ID_LDO1,
@@ -789,10 +765,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_LDO1_SL_B_MASK)) - 1),
.suspend_vsel_reg = DA9062AA_VLDO1_B,
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VLDO1_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_LDO1_CONT,
+ __builtin_ffs((int)DA9062AA_LDO1_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VLDO1_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_LDO1_CONF_MASK) - 1),
.oc_event = REG_FIELD(DA9062AA_STATUS_D,
__builtin_ffs((int)DA9062AA_LDO1_ILIM_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -822,10 +798,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_LDO2_SL_B_MASK)) - 1),
.suspend_vsel_reg = DA9062AA_VLDO2_B,
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VLDO2_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_LDO2_CONT,
+ __builtin_ffs((int)DA9062AA_LDO2_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VLDO2_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_LDO2_CONF_MASK) - 1),
.oc_event = REG_FIELD(DA9062AA_STATUS_D,
__builtin_ffs((int)DA9062AA_LDO2_ILIM_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -855,10 +831,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_LDO3_SL_B_MASK)) - 1),
.suspend_vsel_reg = DA9062AA_VLDO3_B,
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VLDO3_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_LDO3_CONT,
+ __builtin_ffs((int)DA9062AA_LDO3_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VLDO3_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_LDO3_CONF_MASK) - 1),
.oc_event = REG_FIELD(DA9062AA_STATUS_D,
__builtin_ffs((int)DA9062AA_LDO3_ILIM_MASK) - 1,
sizeof(unsigned int) * 8 -
@@ -888,10 +864,10 @@ static const struct da9062_regulator_info local_da9062_regulator_info[] = {
sizeof(unsigned int) * 8 -
__builtin_clz((DA9062AA_LDO4_SL_B_MASK)) - 1),
.suspend_vsel_reg = DA9062AA_VLDO4_B,
- .suspend = REG_FIELD(DA9062AA_DVC_1,
- __builtin_ffs((int)DA9062AA_VLDO4_SEL_MASK) - 1,
+ .suspend = REG_FIELD(DA9062AA_LDO4_CONT,
+ __builtin_ffs((int)DA9062AA_LDO4_CONF_MASK) - 1,
sizeof(unsigned int) * 8 -
- __builtin_clz((DA9062AA_VLDO4_SEL_MASK)) - 1),
+ __builtin_clz(DA9062AA_LDO4_CONF_MASK) - 1),
.oc_event = REG_FIELD(DA9062AA_STATUS_D,
__builtin_ffs((int)DA9062AA_LDO4_ILIM_MASK) - 1,
sizeof(unsigned int) * 8 -
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index d90a6fd8cbc7..f81533070058 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -144,8 +144,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct fixed_voltage_config *config;
struct fixed_voltage_data *drvdata;
- const struct fixed_dev_type *drvtype =
- of_match_device(dev->driver->of_match_table, dev)->data;
+ const struct fixed_dev_type *drvtype = of_device_get_match_data(dev);
struct regulator_config cfg = { };
enum gpiod_flags gflags;
int ret;
@@ -177,7 +176,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
drvdata->desc.type = REGULATOR_VOLTAGE;
drvdata->desc.owner = THIS_MODULE;
- if (drvtype->has_enable_clock) {
+ if (drvtype && drvtype->has_enable_clock) {
drvdata->desc.ops = &fixed_voltage_clkenabled_ops;
drvdata->enable_clock = devm_clk_get(dev, NULL);
diff --git a/drivers/regulator/lochnagar-regulator.c b/drivers/regulator/lochnagar-regulator.c
index ff97cc50f2eb..9b05e03ba830 100644
--- a/drivers/regulator/lochnagar-regulator.c
+++ b/drivers/regulator/lochnagar-regulator.c
@@ -210,6 +210,7 @@ static const struct regulator_desc lochnagar_regulators[] = {
.enable_time = 3000,
.ramp_delay = 1000,
+ .off_on_delay = 15000,
.owner = THIS_MODULE,
},
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index afefb29ce1b0..87637eb6bcbc 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -231,12 +231,12 @@ static int of_get_regulation_constraints(struct device *dev,
"regulator-off-in-suspend"))
suspend_state->enabled = DISABLE_IN_SUSPEND;
- if (!of_property_read_u32(np, "regulator-suspend-min-microvolt",
- &pval))
+ if (!of_property_read_u32(suspend_np,
+ "regulator-suspend-min-microvolt", &pval))
suspend_state->min_uV = pval;
- if (!of_property_read_u32(np, "regulator-suspend-max-microvolt",
- &pval))
+ if (!of_property_read_u32(suspend_np,
+ "regulator-suspend-max-microvolt", &pval))
suspend_state->max_uV = pval;
if (!of_property_read_u32(suspend_np,
@@ -445,11 +445,20 @@ struct regulator_init_data *regulator_of_get_init_data(struct device *dev,
goto error;
}
- if (desc->of_parse_cb && desc->of_parse_cb(child, desc, config)) {
- dev_err(dev,
- "driver callback failed to parse DT for regulator %pOFn\n",
- child);
- goto error;
+ if (desc->of_parse_cb) {
+ int ret;
+
+ ret = desc->of_parse_cb(child, desc, config);
+ if (ret) {
+ if (ret == -EPROBE_DEFER) {
+ of_node_put(child);
+ return ERR_PTR(-EPROBE_DEFER);
+ }
+ dev_err(dev,
+ "driver callback failed to parse DT for regulator %pOFn\n",
+ child);
+ goto error;
+ }
}
*node = child;
diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c
index df5df1c495ad..689537927f6f 100644
--- a/drivers/regulator/pfuze100-regulator.c
+++ b/drivers/regulator/pfuze100-regulator.c
@@ -788,7 +788,13 @@ static int pfuze100_regulator_probe(struct i2c_client *client,
/* SW2~SW4 high bit check and modify the voltage value table */
if (i >= sw_check_start && i <= sw_check_end) {
- regmap_read(pfuze_chip->regmap, desc->vsel_reg, &val);
+ ret = regmap_read(pfuze_chip->regmap,
+ desc->vsel_reg, &val);
+ if (ret) {
+ dev_err(&client->dev, "Fails to read from the register.\n");
+ return ret;
+ }
+
if (val & sw_hi) {
if (pfuze_chip->chip_id == PFUZE3000 ||
pfuze_chip->chip_id == PFUZE3001) {
diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index db6c085da65e..0246b6f99fb5 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -735,8 +735,8 @@ static const struct rpmh_vreg_hw_data pmic5_hfsmps515 = {
static const struct rpmh_vreg_hw_data pmic5_bob = {
.regulator_type = VRM,
.ops = &rpmh_regulator_vrm_bypass_ops,
- .voltage_range = REGULATOR_LINEAR_RANGE(300000, 0, 135, 32000),
- .n_voltages = 136,
+ .voltage_range = REGULATOR_LINEAR_RANGE(3000000, 0, 31, 32000),
+ .n_voltages = 32,
.pmic_mode_map = pmic_mode_map_pmic5_bob,
.of_map_mode = rpmh_regulator_pmic4_bob_of_map_mode,
};
diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c
index cced1ffb896c..89b9314d64c9 100644
--- a/drivers/regulator/ti-abb-regulator.c
+++ b/drivers/regulator/ti-abb-regulator.c
@@ -173,19 +173,14 @@ static int ti_abb_wait_txdone(struct device *dev, struct ti_abb *abb)
while (timeout++ <= abb->settling_time) {
status = ti_abb_check_txdone(abb);
if (status)
- break;
+ return 0;
udelay(1);
}
- if (timeout > abb->settling_time) {
- dev_warn_ratelimited(dev,
- "%s:TRANXDONE timeout(%duS) int=0x%08x\n",
- __func__, timeout, readl(abb->int_base));
- return -ETIMEDOUT;
- }
-
- return 0;
+ dev_warn_ratelimited(dev, "%s:TRANXDONE timeout(%duS) int=0x%08x\n",
+ __func__, timeout, readl(abb->int_base));
+ return -ETIMEDOUT;
}
/**
@@ -205,19 +200,14 @@ static int ti_abb_clear_all_txdone(struct device *dev, const struct ti_abb *abb)
status = ti_abb_check_txdone(abb);
if (!status)
- break;
+ return 0;
udelay(1);
}
- if (timeout > abb->settling_time) {
- dev_warn_ratelimited(dev,
- "%s:TRANXDONE timeout(%duS) int=0x%08x\n",
- __func__, timeout, readl(abb->int_base));
- return -ETIMEDOUT;
- }
-
- return 0;
+ dev_warn_ratelimited(dev, "%s:TRANXDONE timeout(%duS) int=0x%08x\n",
+ __func__, timeout, readl(abb->int_base));
+ return -ETIMEDOUT;
}
/**
diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index 213ff40dda11..3c9a64c1b7a8 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -76,7 +76,6 @@ static const char *rcdev_name(struct reset_controller_dev *rcdev)
* of_reset_simple_xlate - translate reset_spec to the reset line number
* @rcdev: a pointer to the reset controller device
* @reset_spec: reset line specifier as found in the device tree
- * @flags: a flags pointer to fill in (optional)
*
* This simple translation function should be used for reset controllers
* with 1:1 mapping, where reset lines can be indexed by number without gaps.
@@ -748,6 +747,7 @@ static void reset_control_array_put(struct reset_control_array *resets)
for (i = 0; i < resets->num_rstcs; i++)
__reset_control_put_internal(resets->rstc[i]);
mutex_unlock(&reset_list_mutex);
+ kfree(resets);
}
/**
@@ -825,9 +825,10 @@ int __device_reset(struct device *dev, bool optional)
}
EXPORT_SYMBOL_GPL(__device_reset);
-/**
+/*
* APIs to manage an array of reset controls.
*/
+
/**
* of_reset_control_get_count - Count number of resets available with a device
*
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 5542d9eadfe0..7d079154f849 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -116,7 +116,9 @@ int dasd_scan_partitions(struct dasd_block *block)
return -ENODEV;
}
- rc = blkdev_reread_part(bdev);
+ mutex_lock(&bdev->bd_mutex);
+ rc = bdev_disk_changed(bdev, false);
+ mutex_unlock(&bdev->bd_mutex);
if (rc)
DBF_DEV_EVENT(DBF_ERR, block->base,
"scan partitions error, rc %d", rc);
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index ba7d2480613b..dcdaba689b20 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -113,6 +113,7 @@ struct subchannel {
enum sch_todo todo;
struct work_struct todo_work;
struct schib_config config;
+ u64 dma_mask;
char *driver_override; /* Driver name to force a match */
} __attribute__ ((aligned(8)));
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index 1fbfb0a93f5f..831850435c23 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -232,7 +232,12 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid,
* belong to a subchannel need to fit 31 bit width (e.g. ccw).
*/
sch->dev.coherent_dma_mask = DMA_BIT_MASK(31);
- sch->dev.dma_mask = &sch->dev.coherent_dma_mask;
+ /*
+ * But we don't have such restrictions imposed on the stuff that
+ * is handled by the streaming API.
+ */
+ sch->dma_mask = DMA_BIT_MASK(64);
+ sch->dev.dma_mask = &sch->dma_mask;
return sch;
err:
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 131430bd48d9..0c6245fc7706 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -710,7 +710,7 @@ static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
if (!cdev->private)
goto err_priv;
cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask;
- cdev->dev.dma_mask = &cdev->dev.coherent_dma_mask;
+ cdev->dev.dma_mask = sch->dev.dma_mask;
dma_pool = cio_gp_dma_create(&cdev->dev, 1);
if (!dma_pool)
goto err_dma_pool;
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 45bdb47f84c1..9157e728a362 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -522,8 +522,7 @@ static int zcrypt_release(struct inode *inode, struct file *filp)
if (filp->f_inode->i_cdev == &zcrypt_cdev) {
struct zcdn_device *zcdndev;
- if (mutex_lock_interruptible(&ap_perms_mutex))
- return -ERESTARTSYS;
+ mutex_lock(&ap_perms_mutex);
zcdndev = find_zcdndev_by_devt(filp->f_inode->i_rdev);
mutex_unlock(&ap_perms_mutex);
if (zcdndev) {
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index e4b55f9aa062..65e31df37b1f 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -839,6 +839,7 @@ struct qeth_card {
struct service_level qeth_service_level;
struct qdio_ssqd_desc ssqd;
debug_info_t *debug;
+ struct mutex sbp_lock;
struct mutex conf_mutex;
struct mutex discipline_mutex;
struct napi_struct napi;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index dda274351c21..83794d7494d4 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -901,30 +901,30 @@ static int qeth_get_problem(struct qeth_card *card, struct ccw_device *cdev,
CCW_DEVID(cdev), dstat, cstat);
print_hex_dump(KERN_WARNING, "qeth: irb ", DUMP_PREFIX_OFFSET,
16, 1, irb, 64, 1);
- return 1;
+ return -EIO;
}
if (dstat & DEV_STAT_UNIT_CHECK) {
if (sense[SENSE_RESETTING_EVENT_BYTE] &
SENSE_RESETTING_EVENT_FLAG) {
QETH_CARD_TEXT(card, 2, "REVIND");
- return 1;
+ return -EIO;
}
if (sense[SENSE_COMMAND_REJECT_BYTE] &
SENSE_COMMAND_REJECT_FLAG) {
QETH_CARD_TEXT(card, 2, "CMDREJi");
- return 1;
+ return -EIO;
}
if ((sense[2] == 0xaf) && (sense[3] == 0xfe)) {
QETH_CARD_TEXT(card, 2, "AFFE");
- return 1;
+ return -EIO;
}
if ((!sense[0]) && (!sense[1]) && (!sense[2]) && (!sense[3])) {
QETH_CARD_TEXT(card, 2, "ZEROSEN");
return 0;
}
QETH_CARD_TEXT(card, 2, "DGENCHK");
- return 1;
+ return -EIO;
}
return 0;
}
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index b8799cd3e7aa..4bccdce19b5a 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -467,10 +467,14 @@ static void qeth_l2_set_promisc_mode(struct qeth_card *card)
if (card->info.promisc_mode == enable)
return;
- if (qeth_adp_supported(card, IPA_SETADP_SET_PROMISC_MODE))
+ if (qeth_adp_supported(card, IPA_SETADP_SET_PROMISC_MODE)) {
qeth_setadp_promisc_mode(card, enable);
- else if (card->options.sbp.reflect_promisc)
- qeth_l2_promisc_to_bridge(card, enable);
+ } else {
+ mutex_lock(&card->sbp_lock);
+ if (card->options.sbp.reflect_promisc)
+ qeth_l2_promisc_to_bridge(card, enable);
+ mutex_unlock(&card->sbp_lock);
+ }
}
/* New MAC address is added to the hash table and marked to be written on card
@@ -631,6 +635,7 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
int rc;
qeth_l2_vnicc_set_defaults(card);
+ mutex_init(&card->sbp_lock);
if (gdev->dev.type == &qeth_generic_devtype) {
rc = qeth_l2_create_device_attributes(&gdev->dev);
@@ -804,10 +809,12 @@ static int qeth_l2_set_online(struct ccwgroup_device *gdev)
} else
card->info.hwtrap = 0;
+ mutex_lock(&card->sbp_lock);
qeth_bridgeport_query_support(card);
if (card->options.sbp.supported_funcs)
dev_info(&card->gdev->dev,
"The device represents a Bridge Capable Port\n");
+ mutex_unlock(&card->sbp_lock);
qeth_l2_register_dev_addr(card);
@@ -1162,9 +1169,9 @@ static void qeth_bridge_state_change_worker(struct work_struct *work)
/* Role should not change by itself, but if it did, */
/* information from the hardware is authoritative. */
- mutex_lock(&data->card->conf_mutex);
+ mutex_lock(&data->card->sbp_lock);
data->card->options.sbp.role = entry->role;
- mutex_unlock(&data->card->conf_mutex);
+ mutex_unlock(&data->card->sbp_lock);
snprintf(env_locrem, sizeof(env_locrem), "BRIDGEPORT=statechange");
snprintf(env_role, sizeof(env_role), "ROLE=%s",
@@ -1230,9 +1237,9 @@ static void qeth_bridge_host_event_worker(struct work_struct *work)
: (data->hostevs.lost_event_mask == 0x02)
? "Bridge port state change"
: "Unknown reason");
- mutex_lock(&data->card->conf_mutex);
+ mutex_lock(&data->card->sbp_lock);
data->card->options.sbp.hostnotification = 0;
- mutex_unlock(&data->card->conf_mutex);
+ mutex_unlock(&data->card->sbp_lock);
qeth_bridge_emit_host_event(data->card, anev_abort,
0, NULL, NULL);
} else
@@ -2021,10 +2028,10 @@ static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc,
static void qeth_l2_vnicc_init(struct qeth_card *card)
{
u32 *timeout = &card->options.vnicc.learning_timeout;
+ bool enable, error = false;
unsigned int chars_len, i;
unsigned long chars_tmp;
u32 sup_cmds, vnicc;
- bool enable, error;
QETH_CARD_TEXT(card, 2, "vniccini");
/* reset rx_bcast */
@@ -2045,17 +2052,24 @@ static void qeth_l2_vnicc_init(struct qeth_card *card)
chars_len = sizeof(card->options.vnicc.sup_chars) * BITS_PER_BYTE;
for_each_set_bit(i, &chars_tmp, chars_len) {
vnicc = BIT(i);
- qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds);
- if (!(sup_cmds & IPA_VNICC_SET_TIMEOUT) ||
- !(sup_cmds & IPA_VNICC_GET_TIMEOUT))
+ if (qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds)) {
+ sup_cmds = 0;
+ error = true;
+ }
+ if ((sup_cmds & IPA_VNICC_SET_TIMEOUT) &&
+ (sup_cmds & IPA_VNICC_GET_TIMEOUT))
+ card->options.vnicc.getset_timeout_sup |= vnicc;
+ else
card->options.vnicc.getset_timeout_sup &= ~vnicc;
- if (!(sup_cmds & IPA_VNICC_ENABLE) ||
- !(sup_cmds & IPA_VNICC_DISABLE))
+ if ((sup_cmds & IPA_VNICC_ENABLE) &&
+ (sup_cmds & IPA_VNICC_DISABLE))
+ card->options.vnicc.set_char_sup |= vnicc;
+ else
card->options.vnicc.set_char_sup &= ~vnicc;
}
/* enforce assumed default values and recover settings, if changed */
- error = qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING,
- timeout);
+ error |= qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING,
+ timeout);
chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT;
chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE;
chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE;
diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c
index f2c3b127b1e4..e2bcb26105a3 100644
--- a/drivers/s390/net/qeth_l2_sys.c
+++ b/drivers/s390/net/qeth_l2_sys.c
@@ -24,6 +24,7 @@ static ssize_t qeth_bridge_port_role_state_show(struct device *dev,
if (qeth_l2_vnicc_is_in_use(card))
return sprintf(buf, "n/a (VNIC characteristics)\n");
+ mutex_lock(&card->sbp_lock);
if (qeth_card_hw_is_reachable(card) &&
card->options.sbp.supported_funcs)
rc = qeth_bridgeport_query_ports(card,
@@ -57,6 +58,7 @@ static ssize_t qeth_bridge_port_role_state_show(struct device *dev,
else
rc = sprintf(buf, "%s\n", word);
}
+ mutex_unlock(&card->sbp_lock);
return rc;
}
@@ -91,6 +93,7 @@ static ssize_t qeth_bridge_port_role_store(struct device *dev,
return -EINVAL;
mutex_lock(&card->conf_mutex);
+ mutex_lock(&card->sbp_lock);
if (qeth_l2_vnicc_is_in_use(card))
rc = -EBUSY;
@@ -104,6 +107,7 @@ static ssize_t qeth_bridge_port_role_store(struct device *dev,
} else
card->options.sbp.role = role;
+ mutex_unlock(&card->sbp_lock);
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
@@ -158,6 +162,7 @@ static ssize_t qeth_bridgeport_hostnotification_store(struct device *dev,
return rc;
mutex_lock(&card->conf_mutex);
+ mutex_lock(&card->sbp_lock);
if (qeth_l2_vnicc_is_in_use(card))
rc = -EBUSY;
@@ -168,6 +173,7 @@ static ssize_t qeth_bridgeport_hostnotification_store(struct device *dev,
} else
card->options.sbp.hostnotification = enable;
+ mutex_unlock(&card->sbp_lock);
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
@@ -223,6 +229,7 @@ static ssize_t qeth_bridgeport_reflect_store(struct device *dev,
return -EINVAL;
mutex_lock(&card->conf_mutex);
+ mutex_lock(&card->sbp_lock);
if (qeth_l2_vnicc_is_in_use(card))
rc = -EBUSY;
@@ -234,6 +241,7 @@ static ssize_t qeth_bridgeport_reflect_store(struct device *dev,
rc = 0;
}
+ mutex_unlock(&card->sbp_lock);
mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
@@ -269,6 +277,8 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card)
return;
if (!card->options.sbp.supported_funcs)
return;
+
+ mutex_lock(&card->sbp_lock);
if (card->options.sbp.role != QETH_SBP_ROLE_NONE) {
/* Conditional to avoid spurious error messages */
qeth_bridgeport_setrole(card, card->options.sbp.role);
@@ -280,8 +290,10 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card)
rc = qeth_bridgeport_an_set(card, 1);
if (rc)
card->options.sbp.hostnotification = 0;
- } else
+ } else {
qeth_bridgeport_an_set(card, 0);
+ }
+ mutex_unlock(&card->sbp_lock);
}
/* VNIC CHARS support */
diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c
index 296bbc3c4606..cf63916814cc 100644
--- a/drivers/s390/scsi/zfcp_fsf.c
+++ b/drivers/s390/scsi/zfcp_fsf.c
@@ -27,6 +27,11 @@
struct kmem_cache *zfcp_fsf_qtcb_cache;
+static bool ber_stop = true;
+module_param(ber_stop, bool, 0600);
+MODULE_PARM_DESC(ber_stop,
+ "Shuts down FCP devices for FCP channels that report a bit-error count in excess of its threshold (default on)");
+
static void zfcp_fsf_request_timeout_handler(struct timer_list *t)
{
struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer);
@@ -236,10 +241,15 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req)
case FSF_STATUS_READ_SENSE_DATA_AVAIL:
break;
case FSF_STATUS_READ_BIT_ERROR_THRESHOLD:
- dev_warn(&adapter->ccw_device->dev,
- "The error threshold for checksum statistics "
- "has been exceeded\n");
zfcp_dbf_hba_bit_err("fssrh_3", req);
+ if (ber_stop) {
+ dev_warn(&adapter->ccw_device->dev,
+ "All paths over this FCP device are disused because of excessive bit errors\n");
+ zfcp_erp_adapter_shutdown(adapter, 0, "fssrh_b");
+ } else {
+ dev_warn(&adapter->ccw_device->dev,
+ "The error threshold for checksum statistics has been exceeded\n");
+ }
break;
case FSF_STATUS_READ_LINK_DOWN:
zfcp_fsf_status_read_link_down(req);
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 1b92f3c19ff3..90cf4691b8c3 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -898,7 +898,7 @@ config SCSI_SNI_53C710
config 53C700_LE_ON_BE
bool
- depends on SCSI_LASI700
+ depends on SCSI_LASI700 || SCSI_SNI_53C710
default y
config SCSI_STEX
diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c
index 5f8153c37f77..76751d6c7f0d 100644
--- a/drivers/scsi/ch.c
+++ b/drivers/scsi/ch.c
@@ -579,7 +579,6 @@ ch_release(struct inode *inode, struct file *file)
scsi_changer *ch = file->private_data;
scsi_device_put(ch->device);
- ch->device = NULL;
file->private_data = NULL;
kref_put(&ch->ref, ch_destroy);
return 0;
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 4971104b1817..f32da0ca529e 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -512,6 +512,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
unsigned int tpg_desc_tbl_off;
unsigned char orig_transition_tmo;
unsigned long flags;
+ bool transitioning_sense = false;
if (!pg->expiry) {
unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
@@ -572,13 +573,19 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
goto retry;
}
/*
- * Retry on ALUA state transition or if any
- * UNIT ATTENTION occurred.
+ * If the array returns with 'ALUA state transition'
+ * sense code here it cannot return RTPG data during
+ * transition. So set the state to 'transitioning' directly.
*/
if (sense_hdr.sense_key == NOT_READY &&
- sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
- err = SCSI_DH_RETRY;
- else if (sense_hdr.sense_key == UNIT_ATTENTION)
+ sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) {
+ transitioning_sense = true;
+ goto skip_rtpg;
+ }
+ /*
+ * Retry on any other UNIT ATTENTION occurred.
+ */
+ if (sense_hdr.sense_key == UNIT_ATTENTION)
err = SCSI_DH_RETRY;
if (err == SCSI_DH_RETRY &&
pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
@@ -666,7 +673,11 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
off = 8 + (desc[7] * 4);
}
+ skip_rtpg:
spin_lock_irqsave(&pg->lock, flags);
+ if (transitioning_sense)
+ pg->state = SCSI_ACCESS_STATE_TRANSITIONING;
+
sdev_printk(KERN_INFO, sdev,
"%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index ac39ed79ccaa..216e557f703e 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -5477,6 +5477,8 @@ static int hpsa_ciss_submit(struct ctlr_info *h,
return SCSI_MLQUEUE_HOST_BUSY;
}
+ c->device = dev;
+
enqueue_cmd_and_start_io(h, c);
/* the cmd'll come back via intr handler in complete_scsi_command() */
return 0;
@@ -5548,6 +5550,7 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h,
hpsa_cmd_init(h, c->cmdindex, c);
c->cmd_type = CMD_SCSI;
c->scsi_cmd = cmd;
+ c->device = dev;
rc = hpsa_scsi_ioaccel_raid_map(h, c);
if (rc < 0) /* scsi_dma_map failed. */
rc = SCSI_MLQUEUE_HOST_BUSY;
@@ -5555,6 +5558,7 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h,
hpsa_cmd_init(h, c->cmdindex, c);
c->cmd_type = CMD_SCSI;
c->scsi_cmd = cmd;
+ c->device = dev;
rc = hpsa_scsi_ioaccel_direct_map(h, c);
if (rc < 0) /* scsi_dma_map failed. */
rc = SCSI_MLQUEUE_HOST_BUSY;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index e91377a4cafe..e8813d26e594 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -9055,7 +9055,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
}
}
-#if defined(BUILD_NVME)
/* Clear NVME stats */
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
for (idx = 0; idx < phba->cfg_hdw_queue; idx++) {
@@ -9063,7 +9062,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba)
sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat));
}
}
-#endif
/* Clear SCSI stats */
if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index f4b879d25fe9..fc6e4546d738 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -851,9 +851,9 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
if (!(vport->fc_flag & FC_PT2PT)) {
/* Check config parameter use-adisc or FCP-2 */
- if ((vport->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) ||
+ if (vport->cfg_use_adisc && ((vport->fc_flag & FC_RSCN_MODE) ||
((ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) &&
- (ndlp->nlp_type & NLP_FCP_TARGET))) {
+ (ndlp->nlp_type & NLP_FCP_TARGET)))) {
spin_lock_irq(shost->host_lock);
ndlp->nlp_flag |= NLP_NPR_ADISC;
spin_unlock_irq(shost->host_lock);
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index fe1097666de4..6822cd9ff8f1 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -528,7 +528,6 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
list_del_init(&psb->list);
psb->exch_busy = 0;
psb->status = IOSTAT_SUCCESS;
-#ifdef BUILD_NVME
if (psb->cur_iocbq.iocb_flag == LPFC_IO_NVME) {
qp->abts_nvme_io_bufs--;
spin_unlock(&qp->abts_io_buf_list_lock);
@@ -536,7 +535,6 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba,
lpfc_sli4_nvme_xri_aborted(phba, axri, psb);
return;
}
-#endif
qp->abts_scsi_io_bufs--;
spin_unlock(&qp->abts_io_buf_list_lock);
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index a0c6945b8139..614f78dddafe 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -7866,7 +7866,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
if (sli4_hba->hdwq) {
for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++) {
eq = phba->sli4_hba.hba_eq_hdl[eqidx].eq;
- if (eq->queue_id == sli4_hba->mbx_cq->assoc_qid) {
+ if (eq && eq->queue_id == sli4_hba->mbx_cq->assoc_qid) {
fpeq = eq;
break;
}
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 30bafd9d21e9..7259bce85e0e 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -440,9 +440,6 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
valid = 0;
if (ha->optrom_size == OPTROM_SIZE_2300 && start == 0)
valid = 1;
- else if (start == (ha->flt_region_boot * 4) ||
- start == (ha->flt_region_fw * 4))
- valid = 1;
else if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha))
valid = 1;
if (!valid) {
@@ -489,8 +486,10 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
"Writing flash region -- 0x%x/0x%x.\n",
ha->optrom_region_start, ha->optrom_region_size);
- ha->isp_ops->write_optrom(vha, ha->optrom_buffer,
+ rval = ha->isp_ops->write_optrom(vha, ha->optrom_buffer,
ha->optrom_region_start, ha->optrom_region_size);
+ if (rval)
+ rval = -EIO;
break;
default:
rval = -EINVAL;
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 28d587a89ba6..99f0a1a08143 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -253,7 +253,7 @@ qla2x00_process_els(struct bsg_job *bsg_job)
srb_t *sp;
const char *type;
int req_sg_cnt, rsp_sg_cnt;
- int rval = (DRIVER_ERROR << 16);
+ int rval = (DID_ERROR << 16);
uint16_t nextlid = 0;
if (bsg_request->msgcode == FC_BSG_RPT_ELS) {
@@ -432,7 +432,7 @@ qla2x00_process_ct(struct bsg_job *bsg_job)
struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
scsi_qla_host_t *vha = shost_priv(host);
struct qla_hw_data *ha = vha->hw;
- int rval = (DRIVER_ERROR << 16);
+ int rval = (DID_ERROR << 16);
int req_sg_cnt, rsp_sg_cnt;
uint16_t loop_id;
struct fc_port *fcport;
@@ -1950,7 +1950,7 @@ qlafx00_mgmt_cmd(struct bsg_job *bsg_job)
struct Scsi_Host *host = fc_bsg_to_shost(bsg_job);
scsi_qla_host_t *vha = shost_priv(host);
struct qla_hw_data *ha = vha->hw;
- int rval = (DRIVER_ERROR << 16);
+ int rval = (DID_ERROR << 16);
struct qla_mt_iocb_rqst_fx00 *piocb_rqst;
srb_t *sp;
int req_sg_cnt = 0, rsp_sg_cnt = 0;
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 4c26630c1c3e..009fd5a33fcd 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2837,8 +2837,6 @@ qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt)
if (sense_len == 0) {
rsp->status_srb = NULL;
sp->done(sp, cp->result);
- } else {
- WARN_ON_ONCE(true);
}
}
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 1cc6913f76c4..4a1f21c11758 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -702,6 +702,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr)
mcp->mb[2] = LSW(risc_addr);
mcp->mb[3] = 0;
mcp->mb[4] = 0;
+ mcp->mb[11] = 0;
ha->flags.using_lr_setting = 0;
if (IS_QLA25XX(ha) || IS_QLA81XX(ha) || IS_QLA83XX(ha) ||
IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
@@ -746,7 +747,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr)
if (ha->flags.exchoffld_enabled)
mcp->mb[4] |= ENABLE_EXCHANGE_OFFLD;
- mcp->out_mb |= MBX_4|MBX_3|MBX_2|MBX_1;
+ mcp->out_mb |= MBX_4 | MBX_3 | MBX_2 | MBX_1 | MBX_11;
mcp->in_mb |= MBX_3 | MBX_2 | MBX_1;
} else {
mcp->mb[1] = LSW(risc_addr);
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 6afad68e5ba2..238240984bc1 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -76,9 +76,11 @@ qla24xx_deallocate_vp_id(scsi_qla_host_t *vha)
* ensures no active vp_list traversal while the vport is removed
* from the queue)
*/
- for (i = 0; i < 10 && atomic_read(&vha->vref_count); i++)
- wait_event_timeout(vha->vref_waitq,
- atomic_read(&vha->vref_count), HZ);
+ for (i = 0; i < 10; i++) {
+ if (wait_event_timeout(vha->vref_waitq,
+ !atomic_read(&vha->vref_count), HZ) > 0)
+ break;
+ }
spin_lock_irqsave(&ha->vport_slock, flags);
if (atomic_read(&vha->vref_count)) {
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 3568031c6504..726ad4cbf4a6 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -1119,9 +1119,11 @@ qla2x00_wait_for_sess_deletion(scsi_qla_host_t *vha)
qla2x00_mark_all_devices_lost(vha, 0);
- for (i = 0; i < 10; i++)
- wait_event_timeout(vha->fcport_waitQ, test_fcport_count(vha),
- HZ);
+ for (i = 0; i < 10; i++) {
+ if (wait_event_timeout(vha->fcport_waitQ,
+ test_fcport_count(vha), HZ) > 0)
+ break;
+ }
flush_workqueue(vha->hw->wq);
}
@@ -3224,6 +3226,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
req->req_q_in, req->req_q_out, rsp->rsp_q_in, rsp->rsp_q_out);
ha->wq = alloc_workqueue("qla2xxx_wq", 0, 0);
+ if (unlikely(!ha->wq)) {
+ ret = -ENOMEM;
+ goto probe_failed;
+ }
if (ha->isp_ops->initialize_adapter(base_vha)) {
ql_log(ql_log_fatal, base_vha, 0x00d6,
@@ -3531,6 +3537,10 @@ qla2x00_shutdown(struct pci_dev *pdev)
qla2x00_try_to_stop_firmware(vha);
}
+ /* Disable timer */
+ if (vha->timer_active)
+ qla2x00_stop_timer(vha);
+
/* Turn adapter off line */
vha->flags.online = 0;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 1c470e31ae81..ae2fa170f6ad 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -967,6 +967,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,
ses->data_direction = scmd->sc_data_direction;
ses->sdb = scmd->sdb;
ses->result = scmd->result;
+ ses->resid_len = scmd->req.resid_len;
ses->underflow = scmd->underflow;
ses->prot_op = scmd->prot_op;
ses->eh_eflags = scmd->eh_eflags;
@@ -977,6 +978,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses,
memset(scmd->cmnd, 0, BLK_MAX_CDB);
memset(&scmd->sdb, 0, sizeof(scmd->sdb));
scmd->result = 0;
+ scmd->req.resid_len = 0;
if (sense_bytes) {
scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE,
@@ -1029,6 +1031,7 @@ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses)
scmd->sc_data_direction = ses->data_direction;
scmd->sdb = ses->sdb;
scmd->result = ses->result;
+ scmd->req.resid_len = ses->resid_len;
scmd->underflow = ses->underflow;
scmd->prot_op = ses->prot_op;
scmd->eh_eflags = ses->eh_eflags;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index dc210b9d4896..91c007d26c1e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1834,6 +1834,7 @@ static const struct blk_mq_ops scsi_mq_ops_no_commit = {
.init_request = scsi_mq_init_request,
.exit_request = scsi_mq_exit_request,
.initialize_rq_fn = scsi_initialize_rq,
+ .cleanup_rq = scsi_cleanup_rq,
.busy = scsi_mq_lld_busy,
.map_queues = scsi_map_queues,
};
@@ -1882,7 +1883,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost)
{
unsigned int cmd_size, sgl_size;
- sgl_size = scsi_mq_inline_sgl_size(shost);
+ sgl_size = max_t(unsigned int, sizeof(struct scatterlist),
+ scsi_mq_inline_sgl_size(shost));
cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size;
if (scsi_host_get_prot(shost))
cmd_size += sizeof(struct scsi_data_buffer) +
@@ -1921,7 +1923,8 @@ struct scsi_device *scsi_device_from_queue(struct request_queue *q)
{
struct scsi_device *sdev = NULL;
- if (q->mq_ops == &scsi_mq_ops)
+ if (q->mq_ops == &scsi_mq_ops_no_commit ||
+ q->mq_ops == &scsi_mq_ops)
sdev = q->queuedata;
if (!sdev || !get_device(&sdev->sdev_gendev))
sdev = NULL;
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 64c96c7828ee..6d7362e7367e 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -730,6 +730,14 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct kernfs_node *kn;
+ struct scsi_device *sdev = to_scsi_device(dev);
+
+ /*
+ * We need to try to get module, avoiding the module been removed
+ * during delete.
+ */
+ if (scsi_device_get(sdev))
+ return -ENODEV;
kn = sysfs_break_active_protection(&dev->kobj, &attr->attr);
WARN_ON_ONCE(!kn);
@@ -744,9 +752,10 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr,
* state into SDEV_DEL.
*/
device_remove_file(dev, attr);
- scsi_remove_device(to_scsi_device(dev));
+ scsi_remove_device(sdev);
if (kn)
sysfs_unbreak_active_protection(kn);
+ scsi_device_put(sdev);
return count;
};
static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 50928bc266eb..470ee6dc3f7e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1166,11 +1166,12 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd)
sector_t lba = sectors_to_logical(sdp, blk_rq_pos(rq));
sector_t threshold;
unsigned int nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq));
- bool dif, dix;
unsigned int mask = logical_to_sectors(sdp, 1) - 1;
bool write = rq_data_dir(rq) == WRITE;
unsigned char protect, fua;
blk_status_t ret;
+ unsigned int dif;
+ bool dix;
ret = scsi_init_io(cmd);
if (ret != BLK_STS_OK)
@@ -1290,9 +1291,17 @@ static blk_status_t sd_init_command(struct scsi_cmnd *cmd)
case REQ_OP_WRITE:
return sd_setup_read_write_cmnd(cmd);
case REQ_OP_ZONE_RESET:
- return sd_zbc_setup_reset_cmnd(cmd, false);
+ return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER,
+ false);
case REQ_OP_ZONE_RESET_ALL:
- return sd_zbc_setup_reset_cmnd(cmd, true);
+ return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_RESET_WRITE_POINTER,
+ true);
+ case REQ_OP_ZONE_OPEN:
+ return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_OPEN_ZONE, false);
+ case REQ_OP_ZONE_CLOSE:
+ return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_CLOSE_ZONE, false);
+ case REQ_OP_ZONE_FINISH:
+ return sd_zbc_setup_zone_mgmt_cmnd(cmd, ZO_FINISH_ZONE, false);
default:
WARN_ON_ONCE(1);
return BLK_STS_NOTSUPP;
@@ -1654,7 +1663,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
/* we need to evaluate the error return */
if (scsi_sense_valid(sshdr) &&
(sshdr->asc == 0x3a || /* medium not present */
- sshdr->asc == 0x20)) /* invalid command */
+ sshdr->asc == 0x20 || /* invalid command */
+ (sshdr->asc == 0x74 && sshdr->ascq == 0x71))) /* drive is password locked */
/* this is no error here */
return 0;
@@ -1959,6 +1969,9 @@ static int sd_done(struct scsi_cmnd *SCpnt)
case REQ_OP_WRITE_SAME:
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_RESET_ALL:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
if (!result) {
good_bytes = blk_rq_bytes(req);
scsi_set_resid(SCpnt, 0);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 1eab779f812b..42fd3f00e4a5 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -209,11 +209,12 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
-extern blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, bool all);
+blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
+ unsigned char op, bool all);
extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
struct scsi_sense_hdr *sshdr);
-extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones);
+int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data);
#else /* CONFIG_BLK_DEV_ZONED */
@@ -225,8 +226,9 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {}
-static inline blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd,
- bool all)
+static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
+ unsigned char op,
+ bool all)
{
return BLK_STS_TARGET;
}
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index de4019dc0f0b..0e5ede48f045 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -19,34 +19,27 @@
#include "sd.h"
-/**
- * sd_zbc_parse_report - Convert a zone descriptor to a struct blk_zone,
- * @sdkp: The disk the report originated from
- * @buf: Address of the report zone descriptor
- * @zone: the destination zone structure
- *
- * All LBA sized values are converted to 512B sectors unit.
- */
-static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
- struct blk_zone *zone)
+static int sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
+ unsigned int idx, report_zones_cb cb, void *data)
{
struct scsi_device *sdp = sdkp->device;
+ struct blk_zone zone = { 0 };
- memset(zone, 0, sizeof(struct blk_zone));
-
- zone->type = buf[0] & 0x0f;
- zone->cond = (buf[1] >> 4) & 0xf;
+ zone.type = buf[0] & 0x0f;
+ zone.cond = (buf[1] >> 4) & 0xf;
if (buf[1] & 0x01)
- zone->reset = 1;
+ zone.reset = 1;
if (buf[1] & 0x02)
- zone->non_seq = 1;
-
- zone->len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8]));
- zone->start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16]));
- zone->wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24]));
- if (zone->type != ZBC_ZONE_TYPE_CONV &&
- zone->cond == ZBC_ZONE_COND_FULL)
- zone->wp = zone->start + zone->len;
+ zone.non_seq = 1;
+
+ zone.len = logical_to_sectors(sdp, get_unaligned_be64(&buf[8]));
+ zone.start = logical_to_sectors(sdp, get_unaligned_be64(&buf[16]));
+ zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24]));
+ if (zone.type != ZBC_ZONE_TYPE_CONV &&
+ zone.cond == ZBC_ZONE_COND_FULL)
+ zone.wp = zone.start + zone.len;
+
+ return cb(&zone, idx, data);
}
/**
@@ -104,11 +97,6 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
return 0;
}
-/*
- * Maximum number of zones to get with one report zones command.
- */
-#define SD_ZBC_REPORT_MAX_ZONES 8192U
-
/**
* Allocate a buffer for report zones reply.
* @sdkp: The target disk
@@ -138,82 +126,94 @@ static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
* sure that the allocated buffer can always be mapped by limiting the
* number of pages allocated to the HBA max segments limit.
*/
- nr_zones = min(nr_zones, SD_ZBC_REPORT_MAX_ZONES);
- bufsize = roundup((nr_zones + 1) * 64, 512);
+ nr_zones = min(nr_zones, sdkp->nr_zones);
+ bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE);
bufsize = min_t(size_t, bufsize,
queue_max_hw_sectors(q) << SECTOR_SHIFT);
bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
- buf = vzalloc(bufsize);
- if (buf)
- *buflen = bufsize;
+ while (bufsize >= SECTOR_SIZE) {
+ buf = __vmalloc(bufsize,
+ GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY,
+ PAGE_KERNEL);
+ if (buf) {
+ *buflen = bufsize;
+ return buf;
+ }
+ bufsize >>= 1;
+ }
- return buf;
+ return NULL;
}
/**
- * sd_zbc_report_zones - Disk report zones operation.
- * @disk: The target disk
- * @sector: Start 512B sector of the report
- * @zones: Array of zone descriptors
- * @nr_zones: Number of descriptors in the array
- *
- * Execute a report zones command on the target disk.
+ * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors.
+ * @sdkp: The target disk
*/
+static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp)
+{
+ return logical_to_sectors(sdkp->device, sdkp->zone_blocks);
+}
+
int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones)
+ unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct scsi_disk *sdkp = scsi_disk(disk);
- unsigned int i, nrz = *nr_zones;
+ unsigned int nr, i;
unsigned char *buf;
- size_t buflen = 0, offset = 0;
- int ret = 0;
+ size_t offset, buflen = 0;
+ int zone_idx = 0;
+ int ret;
if (!sd_is_zoned(sdkp))
/* Not a zoned device */
return -EOPNOTSUPP;
- buf = sd_zbc_alloc_report_buffer(sdkp, nrz, &buflen);
+ buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen);
if (!buf)
return -ENOMEM;
- ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
- sectors_to_logical(sdkp->device, sector), true);
- if (ret)
- goto out;
+ while (zone_idx < nr_zones && sector < get_capacity(disk)) {
+ ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
+ sectors_to_logical(sdkp->device, sector), true);
+ if (ret)
+ goto out;
+
+ offset = 0;
+ nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64);
+ if (!nr)
+ break;
+
+ for (i = 0; i < nr && zone_idx < nr_zones; i++) {
+ offset += 64;
+ ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx,
+ cb, data);
+ if (ret)
+ goto out;
+ zone_idx++;
+ }
- nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64);
- for (i = 0; i < nrz; i++) {
- offset += 64;
- sd_zbc_parse_report(sdkp, buf + offset, zones);
- zones++;
+ sector += sd_zbc_zone_sectors(sdkp) * i;
}
- *nr_zones = nrz;
-
+ ret = zone_idx;
out:
kvfree(buf);
-
return ret;
}
/**
- * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors.
- * @sdkp: The target disk
- */
-static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp)
-{
- return logical_to_sectors(sdkp->device, sdkp->zone_blocks);
-}
-
-/**
- * sd_zbc_setup_reset_cmnd - Prepare a RESET WRITE POINTER scsi command.
+ * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
+ * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
* @cmd: the command to setup
- * @all: Reset all zones control.
+ * @op: Operation to be performed
+ * @all: All zones control
*
- * Called from sd_init_command() for a REQ_OP_ZONE_RESET request.
+ * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL,
+ * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests.
*/
-blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, bool all)
+blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
+ unsigned char op, bool all)
{
struct request *rq = cmd->request;
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
@@ -234,7 +234,7 @@ blk_status_t sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd, bool all)
cmd->cmd_len = 16;
memset(cmd->cmnd, 0, cmd->cmd_len);
cmd->cmnd[0] = ZBC_OUT;
- cmd->cmnd[1] = ZO_RESET_WRITE_POINTER;
+ cmd->cmnd[1] = op;
if (all)
cmd->cmnd[14] = 0x1;
else
@@ -263,25 +263,16 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
int result = cmd->result;
struct request *rq = cmd->request;
- switch (req_op(rq)) {
- case REQ_OP_ZONE_RESET:
- case REQ_OP_ZONE_RESET_ALL:
-
- if (result &&
- sshdr->sense_key == ILLEGAL_REQUEST &&
- sshdr->asc == 0x24)
- /*
- * INVALID FIELD IN CDB error: reset of a conventional
- * zone was attempted. Nothing to worry about, so be
- * quiet about the error.
- */
- rq->rq_flags |= RQF_QUIET;
- break;
-
- case REQ_OP_WRITE:
- case REQ_OP_WRITE_ZEROES:
- case REQ_OP_WRITE_SAME:
- break;
+ if (op_is_zone_mgmt(req_op(rq)) &&
+ result &&
+ sshdr->sense_key == ILLEGAL_REQUEST &&
+ sshdr->asc == 0x24) {
+ /*
+ * INVALID FIELD IN CDB error: a zone management command was
+ * attempted on a conventional zone. Nothing to worry about,
+ * so be quiet about the error.
+ */
+ rq->rq_flags |= RQF_QUIET;
}
}
@@ -344,32 +335,18 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
* Returns the zone size in number of blocks upon success or an error code
* upon failure.
*/
-static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
+static int sd_zbc_check_zones(struct scsi_disk *sdkp, unsigned char *buf,
+ u32 *zblocks)
{
- size_t bufsize, buflen;
- unsigned int noio_flag;
u64 zone_blocks = 0;
- sector_t max_lba, block = 0;
- unsigned char *buf;
+ sector_t max_lba;
unsigned char *rec;
int ret;
- u8 same;
-
- /* Do all memory allocations as if GFP_NOIO was specified */
- noio_flag = memalloc_noio_save();
-
- /* Get a buffer */
- buf = sd_zbc_alloc_report_buffer(sdkp, SD_ZBC_REPORT_MAX_ZONES,
- &bufsize);
- if (!buf) {
- ret = -ENOMEM;
- goto out;
- }
- /* Do a report zone to get max_lba and the same field */
- ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, 0, false);
+ /* Do a report zone to get max_lba and the size of the first zone */
+ ret = sd_zbc_do_report_zones(sdkp, buf, SD_BUF_SIZE, 0, false);
if (ret)
- goto out_free;
+ return ret;
if (sdkp->rc_basis == 0) {
/* The max_lba field is the capacity of this device */
@@ -384,82 +361,27 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
}
}
- /*
- * Check same field: for any value other than 0, we know that all zones
- * have the same size.
- */
- same = buf[4] & 0x0f;
- if (same > 0) {
- rec = &buf[64];
- zone_blocks = get_unaligned_be64(&rec[8]);
- goto out;
- }
-
- /*
- * Check the size of all zones: all zones must be of
- * equal size, except the last zone which can be smaller
- * than other zones.
- */
- do {
-
- /* Parse REPORT ZONES header */
- buflen = min_t(size_t, get_unaligned_be32(&buf[0]) + 64,
- bufsize);
- rec = buf + 64;
-
- /* Parse zone descriptors */
- while (rec < buf + buflen) {
- u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
-
- if (zone_blocks == 0) {
- zone_blocks = this_zone_blocks;
- } else if (this_zone_blocks != zone_blocks &&
- (block + this_zone_blocks < sdkp->capacity
- || this_zone_blocks > zone_blocks)) {
- zone_blocks = 0;
- goto out;
- }
- block += this_zone_blocks;
- rec += 64;
- }
-
- if (block < sdkp->capacity) {
- ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, block,
- true);
- if (ret)
- goto out_free;
- }
-
- } while (block < sdkp->capacity);
-
-out:
- if (!zone_blocks) {
- if (sdkp->first_scan)
- sd_printk(KERN_NOTICE, sdkp,
- "Devices with non constant zone "
- "size are not supported\n");
- ret = -ENODEV;
- } else if (!is_power_of_2(zone_blocks)) {
+ /* Parse REPORT ZONES header */
+ rec = buf + 64;
+ zone_blocks = get_unaligned_be64(&rec[8]);
+ if (!zone_blocks || !is_power_of_2(zone_blocks)) {
if (sdkp->first_scan)
sd_printk(KERN_NOTICE, sdkp,
"Devices with non power of 2 zone "
"size are not supported\n");
- ret = -ENODEV;
- } else if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) {
+ return -ENODEV;
+ }
+
+ if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) {
if (sdkp->first_scan)
sd_printk(KERN_NOTICE, sdkp,
"Zone size too large\n");
- ret = -EFBIG;
- } else {
- *zblocks = zone_blocks;
- ret = 0;
+ return -EFBIG;
}
-out_free:
- memalloc_noio_restore(noio_flag);
- kvfree(buf);
+ *zblocks = zone_blocks;
- return ret;
+ return 0;
}
int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
@@ -485,7 +407,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
* Check zone size: only devices with a constant zone size (except
* an eventual last runt zone) that is a power of 2 are supported.
*/
- ret = sd_zbc_check_zones(sdkp, &zone_blocks);
+ ret = sd_zbc_check_zones(sdkp, buf, &zone_blocks);
if (ret != 0)
goto err;
diff --git a/drivers/scsi/sni_53c710.c b/drivers/scsi/sni_53c710.c
index aef4881d8e21..a85d52b5dc32 100644
--- a/drivers/scsi/sni_53c710.c
+++ b/drivers/scsi/sni_53c710.c
@@ -66,10 +66,8 @@ static int snirm710_probe(struct platform_device *dev)
base = res->start;
hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL);
- if (!hostdata) {
- dev_printk(KERN_ERR, dev, "Failed to allocate host data\n");
+ if (!hostdata)
return -ENOMEM;
- }
hostdata->dev = &dev->dev;
dma_set_mask(&dev->dev, DMA_BIT_MASK(32));
diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c
index a9344eb4e047..dc2f6d2b46ed 100644
--- a/drivers/scsi/ufs/ufs_bsg.c
+++ b/drivers/scsi/ufs/ufs_bsg.c
@@ -98,6 +98,8 @@ static int ufs_bsg_request(struct bsg_job *job)
bsg_reply->reply_payload_rcv_len = 0;
+ pm_runtime_get_sync(hba->dev);
+
msgcode = bsg_request->msgcode;
switch (msgcode) {
case UPIU_TRANSACTION_QUERY_REQ:
@@ -135,6 +137,8 @@ static int ufs_bsg_request(struct bsg_job *job)
break;
}
+ pm_runtime_put_sync(hba->dev);
+
if (!desc_buff)
goto out;
diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c
index d9231bd3c691..98b9d9a902ae 100644
--- a/drivers/soc/imx/gpc.c
+++ b/drivers/soc/imx/gpc.c
@@ -249,13 +249,13 @@ static struct genpd_power_state imx6_pm_domain_pu_state = {
};
static struct imx_pm_domain imx_gpc_domains[] = {
- [GPC_PGC_DOMAIN_ARM] {
+ [GPC_PGC_DOMAIN_ARM] = {
.base = {
.name = "ARM",
.flags = GENPD_FLAG_ALWAYS_ON,
},
},
- [GPC_PGC_DOMAIN_PU] {
+ [GPC_PGC_DOMAIN_PU] = {
.base = {
.name = "PU",
.power_off = imx6_pm_domain_power_off,
@@ -266,7 +266,7 @@ static struct imx_pm_domain imx_gpc_domains[] = {
.reg_offs = 0x260,
.cntr_pdn_bit = 0,
},
- [GPC_PGC_DOMAIN_DISPLAY] {
+ [GPC_PGC_DOMAIN_DISPLAY] = {
.base = {
.name = "DISPLAY",
.power_off = imx6_pm_domain_power_off,
@@ -275,7 +275,7 @@ static struct imx_pm_domain imx_gpc_domains[] = {
.reg_offs = 0x240,
.cntr_pdn_bit = 4,
},
- [GPC_PGC_DOMAIN_PCI] {
+ [GPC_PGC_DOMAIN_PCI] = {
.base = {
.name = "PCI",
.power_off = imx6_pm_domain_power_off,
diff --git a/drivers/soc/imx/soc-imx-scu.c b/drivers/soc/imx/soc-imx-scu.c
index 50831ebf126a..c68882eb80f7 100644
--- a/drivers/soc/imx/soc-imx-scu.c
+++ b/drivers/soc/imx/soc-imx-scu.c
@@ -46,7 +46,7 @@ static ssize_t soc_uid_show(struct device *dev,
hdr->func = IMX_SC_MISC_FUNC_UNIQUE_ID;
hdr->size = 1;
- ret = imx_scu_call_rpc(soc_ipc_handle, &msg, false);
+ ret = imx_scu_call_rpc(soc_ipc_handle, &msg, true);
if (ret) {
pr_err("%s: get soc uid failed, ret %d\n", __func__, ret);
return ret;
diff --git a/drivers/soundwire/Kconfig b/drivers/soundwire/Kconfig
index f518273cfbe3..c8c80df090d1 100644
--- a/drivers/soundwire/Kconfig
+++ b/drivers/soundwire/Kconfig
@@ -5,6 +5,7 @@
menuconfig SOUNDWIRE
tristate "SoundWire support"
+ depends on ACPI || OF
help
SoundWire is a 2-Pin interface with data and clock line ratified
by the MIPI Alliance. SoundWire is used for transporting data
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index f1e38a293967..13c54eac0cc3 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -900,7 +900,7 @@ static int intel_register_dai(struct sdw_intel *sdw)
/* Create PCM DAIs */
stream = &cdns->pcm;
- ret = intel_create_dai(cdns, dais, INTEL_PDI_IN, stream->num_in,
+ ret = intel_create_dai(cdns, dais, INTEL_PDI_IN, cdns->pcm.num_in,
off, stream->num_ch_in, true);
if (ret)
return ret;
@@ -931,7 +931,7 @@ static int intel_register_dai(struct sdw_intel *sdw)
if (ret)
return ret;
- off += cdns->pdm.num_bd;
+ off += cdns->pdm.num_out;
ret = intel_create_dai(cdns, dais, INTEL_PDI_BD, cdns->pdm.num_bd,
off, stream->num_ch_bd, false);
if (ret)
diff --git a/drivers/soundwire/slave.c b/drivers/soundwire/slave.c
index 48a63ca130d2..6473fa602f82 100644
--- a/drivers/soundwire/slave.c
+++ b/drivers/soundwire/slave.c
@@ -128,7 +128,8 @@ int sdw_of_find_slaves(struct sdw_bus *bus)
struct device_node *node;
for_each_child_of_node(bus->dev->of_node, node) {
- int link_id, sdw_version, ret, len;
+ int link_id, ret, len;
+ unsigned int sdw_version;
const char *compat = NULL;
struct sdw_slave_id id;
const __be32 *addr;
diff --git a/drivers/staging/exfat/Kconfig b/drivers/staging/exfat/Kconfig
index 290dbfc7ace1..ce32dfe33bec 100644
--- a/drivers/staging/exfat/Kconfig
+++ b/drivers/staging/exfat/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
config EXFAT_FS
tristate "exFAT fs support"
depends on BLOCK
@@ -6,7 +7,7 @@ config EXFAT_FS
This adds support for the exFAT file system.
config EXFAT_DONT_MOUNT_VFAT
- bool "Prohibit mounting of fat/vfat filesysems by exFAT"
+ bool "Prohibit mounting of fat/vfat filesystems by exFAT"
depends on EXFAT_FS
default y
help
diff --git a/drivers/staging/exfat/Makefile b/drivers/staging/exfat/Makefile
index 84944dfbae28..6c90aec83feb 100644
--- a/drivers/staging/exfat/Makefile
+++ b/drivers/staging/exfat/Makefile
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: GPL-2.0-or-later
obj-$(CONFIG_EXFAT_FS) += exfat.o
diff --git a/drivers/staging/exfat/exfat.h b/drivers/staging/exfat/exfat.h
index 6c12f2d79f4d..3abab33e932c 100644
--- a/drivers/staging/exfat/exfat.h
+++ b/drivers/staging/exfat/exfat.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
*/
diff --git a/drivers/staging/exfat/exfat_blkdev.c b/drivers/staging/exfat/exfat_blkdev.c
index f086c75e7076..81d20e6241c6 100644
--- a/drivers/staging/exfat/exfat_blkdev.c
+++ b/drivers/staging/exfat/exfat_blkdev.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
*/
diff --git a/drivers/staging/exfat/exfat_cache.c b/drivers/staging/exfat/exfat_cache.c
index 1565ce65d39f..e1b001718709 100644
--- a/drivers/staging/exfat/exfat_cache.c
+++ b/drivers/staging/exfat/exfat_cache.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
*/
diff --git a/drivers/staging/exfat/exfat_core.c b/drivers/staging/exfat/exfat_core.c
index b3e9cf725cf5..79174e5c4145 100644
--- a/drivers/staging/exfat/exfat_core.c
+++ b/drivers/staging/exfat/exfat_core.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
*/
diff --git a/drivers/staging/exfat/exfat_nls.c b/drivers/staging/exfat/exfat_nls.c
index 03cb8290b5d2..a5c4b68925fb 100644
--- a/drivers/staging/exfat/exfat_nls.c
+++ b/drivers/staging/exfat/exfat_nls.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
*/
diff --git a/drivers/staging/exfat/exfat_super.c b/drivers/staging/exfat/exfat_super.c
index 5f6caee819a6..3b2b0ceb7297 100644
--- a/drivers/staging/exfat/exfat_super.c
+++ b/drivers/staging/exfat/exfat_super.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
*/
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/time.h>
#include <linux/slab.h>
+#include <linux/mm.h>
#include <linux/seq_file.h>
#include <linux/pagemap.h>
#include <linux/mpage.h>
@@ -3450,7 +3451,7 @@ static void exfat_free_super(struct exfat_sb_info *sbi)
kfree(sbi->options.iocharset);
/* mutex_init is in exfat_fill_super function. only for 3.7+ */
mutex_destroy(&sbi->s_lock);
- kfree(sbi);
+ kvfree(sbi);
}
static void exfat_put_super(struct super_block *sb)
@@ -3845,7 +3846,7 @@ static int exfat_fill_super(struct super_block *sb, void *data, int silent)
* the filesystem, since we're only just about to mount
* it and have no inodes etc active!
*/
- sbi = kzalloc(sizeof(struct exfat_sb_info), GFP_KERNEL);
+ sbi = kvzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
mutex_init(&sbi->s_lock);
diff --git a/drivers/staging/exfat/exfat_upcase.c b/drivers/staging/exfat/exfat_upcase.c
index 366082fb3dab..b91a1faa0e50 100644
--- a/drivers/staging/exfat/exfat_upcase.c
+++ b/drivers/staging/exfat/exfat_upcase.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2012-2013 Samsung Electronics Co., Ltd.
*/
diff --git a/drivers/staging/fbtft/Kconfig b/drivers/staging/fbtft/Kconfig
index 8ec524a95ec8..cb61c2a772bd 100644
--- a/drivers/staging/fbtft/Kconfig
+++ b/drivers/staging/fbtft/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
menuconfig FB_TFT
tristate "Support for small TFT LCD display modules"
- depends on FB && SPI
+ depends on FB && SPI && OF
depends on GPIOLIB || COMPILE_TEST
select FB_SYS_FILLRECT
select FB_SYS_COPYAREA
@@ -199,13 +199,3 @@ config FB_TFT_WATTEROTT
depends on FB_TFT
help
Generic Framebuffer support for WATTEROTT
-
-config FB_FLEX
- tristate "Generic FB driver for TFT LCD displays"
- depends on FB_TFT
- help
- Generic Framebuffer support for TFT LCD displays.
-
-config FB_TFT_FBTFT_DEVICE
- tristate "Module to for adding FBTFT devices"
- depends on FB_TFT
diff --git a/drivers/staging/fbtft/Makefile b/drivers/staging/fbtft/Makefile
index 6bc03311c9c7..27af43f32f81 100644
--- a/drivers/staging/fbtft/Makefile
+++ b/drivers/staging/fbtft/Makefile
@@ -36,7 +36,3 @@ obj-$(CONFIG_FB_TFT_UC1611) += fb_uc1611.o
obj-$(CONFIG_FB_TFT_UC1701) += fb_uc1701.o
obj-$(CONFIG_FB_TFT_UPD161704) += fb_upd161704.o
obj-$(CONFIG_FB_TFT_WATTEROTT) += fb_watterott.o
-obj-$(CONFIG_FB_FLEX) += flexfb.o
-
-# Device modules
-obj-$(CONFIG_FB_TFT_FBTFT_DEVICE) += fbtft_device.o
diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c
index cf5700a2ea66..a0a67aa517f0 100644
--- a/drivers/staging/fbtft/fbtft-core.c
+++ b/drivers/staging/fbtft/fbtft-core.c
@@ -714,7 +714,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display,
if (par->gamma.curves && gamma) {
if (fbtft_gamma_parse_str(par, par->gamma.curves, gamma,
strlen(gamma)))
- goto alloc_fail;
+ goto release_framebuf;
}
/* Transmit buffer */
@@ -731,7 +731,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display,
if (txbuflen > 0) {
txbuf = devm_kzalloc(par->info->device, txbuflen, GFP_KERNEL);
if (!txbuf)
- goto alloc_fail;
+ goto release_framebuf;
par->txbuf.buf = txbuf;
par->txbuf.len = txbuflen;
}
@@ -753,6 +753,9 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display,
return info;
+release_framebuf:
+ framebuffer_release(info);
+
alloc_fail:
vfree(vmem);
diff --git a/drivers/staging/fbtft/fbtft_device.c b/drivers/staging/fbtft/fbtft_device.c
deleted file mode 100644
index 44e1410eb3fe..000000000000
--- a/drivers/staging/fbtft/fbtft_device.c
+++ /dev/null
@@ -1,1261 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- *
- * Copyright (C) 2013, Noralf Tronnes
- */
-
-#define pr_fmt(fmt) "fbtft_device: " fmt
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/gpio/consumer.h>
-#include <linux/spi/spi.h>
-#include <video/mipi_display.h>
-
-#include "fbtft.h"
-
-#define MAX_GPIOS 32
-
-static struct spi_device *spi_device;
-static struct platform_device *p_device;
-
-static char *name;
-module_param(name, charp, 0000);
-MODULE_PARM_DESC(name,
- "Devicename (required). name=list => list all supported devices.");
-
-static unsigned int rotate;
-module_param(rotate, uint, 0000);
-MODULE_PARM_DESC(rotate,
- "Angle to rotate display counter clockwise: 0, 90, 180, 270");
-
-static unsigned int busnum;
-module_param(busnum, uint, 0000);
-MODULE_PARM_DESC(busnum, "SPI bus number (default=0)");
-
-static unsigned int cs;
-module_param(cs, uint, 0000);
-MODULE_PARM_DESC(cs, "SPI chip select (default=0)");
-
-static unsigned int speed;
-module_param(speed, uint, 0000);
-MODULE_PARM_DESC(speed, "SPI speed (override device default)");
-
-static int mode = -1;
-module_param(mode, int, 0000);
-MODULE_PARM_DESC(mode, "SPI mode (override device default)");
-
-static unsigned int fps;
-module_param(fps, uint, 0000);
-MODULE_PARM_DESC(fps, "Frames per second (override driver default)");
-
-static char *gamma;
-module_param(gamma, charp, 0000);
-MODULE_PARM_DESC(gamma,
- "String representation of Gamma Curve(s). Driver specific.");
-
-static int txbuflen;
-module_param(txbuflen, int, 0000);
-MODULE_PARM_DESC(txbuflen, "txbuflen (override driver default)");
-
-static int bgr = -1;
-module_param(bgr, int, 0000);
-MODULE_PARM_DESC(bgr,
- "BGR bit (supported by some drivers).");
-
-static unsigned int startbyte;
-module_param(startbyte, uint, 0000);
-MODULE_PARM_DESC(startbyte, "Sets the Start byte used by some SPI displays.");
-
-static bool custom;
-module_param(custom, bool, 0000);
-MODULE_PARM_DESC(custom, "Add a custom display device. Use speed= argument to make it a SPI device, else platform_device");
-
-static unsigned int width;
-module_param(width, uint, 0000);
-MODULE_PARM_DESC(width, "Display width, used with the custom argument");
-
-static unsigned int height;
-module_param(height, uint, 0000);
-MODULE_PARM_DESC(height, "Display height, used with the custom argument");
-
-static unsigned int buswidth = 8;
-module_param(buswidth, uint, 0000);
-MODULE_PARM_DESC(buswidth, "Display bus width, used with the custom argument");
-
-static s16 init[FBTFT_MAX_INIT_SEQUENCE];
-static int init_num;
-module_param_array(init, short, &init_num, 0000);
-MODULE_PARM_DESC(init, "Init sequence, used with the custom argument");
-
-static unsigned long debug;
-module_param(debug, ulong, 0000);
-MODULE_PARM_DESC(debug,
- "level: 0-7 (the remaining 29 bits is for advanced usage)");
-
-static unsigned int verbose = 3;
-module_param(verbose, uint, 0000);
-MODULE_PARM_DESC(verbose,
- "0 silent, >1 show devices, >2 show devices before (default=3)");
-
-struct fbtft_device_display {
- char *name;
- struct spi_board_info *spi;
- struct platform_device *pdev;
-};
-
-static void fbtft_device_pdev_release(struct device *dev);
-
-static int write_gpio16_wr_slow(struct fbtft_par *par, void *buf, size_t len);
-static void adafruit18_green_tab_set_addr_win(struct fbtft_par *par,
- int xs, int ys, int xe, int ye);
-
-#define ADAFRUIT18_GAMMA \
- "02 1c 07 12 37 32 29 2d 29 25 2B 39 00 01 03 10\n" \
- "03 1d 07 06 2E 2C 29 2D 2E 2E 37 3F 00 00 02 10"
-
-#define CBERRY28_GAMMA \
- "D0 00 14 15 13 2C 42 43 4E 09 16 14 18 21\n" \
- "D0 00 14 15 13 0B 43 55 53 0C 17 14 23 20"
-
-static const s16 cberry28_init_sequence[] = {
- /* turn off sleep mode */
- -1, MIPI_DCS_EXIT_SLEEP_MODE,
- -2, 120,
-
- /* set pixel format to RGB-565 */
- -1, MIPI_DCS_SET_PIXEL_FORMAT, MIPI_DCS_PIXEL_FMT_16BIT,
-
- -1, 0xB2, 0x0C, 0x0C, 0x00, 0x33, 0x33,
-
- /*
- * VGH = 13.26V
- * VGL = -10.43V
- */
- -1, 0xB7, 0x35,
-
- /*
- * VDV and VRH register values come from command write
- * (instead of NVM)
- */
- -1, 0xC2, 0x01, 0xFF,
-
- /*
- * VAP = 4.7V + (VCOM + VCOM offset + 0.5 * VDV)
- * VAN = -4.7V + (VCOM + VCOM offset + 0.5 * VDV)
- */
- -1, 0xC3, 0x17,
-
- /* VDV = 0V */
- -1, 0xC4, 0x20,
-
- /* VCOM = 0.675V */
- -1, 0xBB, 0x17,
-
- /* VCOM offset = 0V */
- -1, 0xC5, 0x20,
-
- /*
- * AVDD = 6.8V
- * AVCL = -4.8V
- * VDS = 2.3V
- */
- -1, 0xD0, 0xA4, 0xA1,
-
- -1, MIPI_DCS_SET_DISPLAY_ON,
-
- -3,
-};
-
-static const s16 hy28b_init_sequence[] = {
- -1, 0x00e7, 0x0010, -1, 0x0000, 0x0001,
- -1, 0x0001, 0x0100, -1, 0x0002, 0x0700,
- -1, 0x0003, 0x1030, -1, 0x0004, 0x0000,
- -1, 0x0008, 0x0207, -1, 0x0009, 0x0000,
- -1, 0x000a, 0x0000, -1, 0x000c, 0x0001,
- -1, 0x000d, 0x0000, -1, 0x000f, 0x0000,
- -1, 0x0010, 0x0000, -1, 0x0011, 0x0007,
- -1, 0x0012, 0x0000, -1, 0x0013, 0x0000,
- -2, 50, -1, 0x0010, 0x1590, -1, 0x0011,
- 0x0227, -2, 50, -1, 0x0012, 0x009c, -2, 50,
- -1, 0x0013, 0x1900, -1, 0x0029, 0x0023,
- -1, 0x002b, 0x000e, -2, 50,
- -1, 0x0020, 0x0000, -1, 0x0021, 0x0000,
- -2, 50, -1, 0x0050, 0x0000,
- -1, 0x0051, 0x00ef, -1, 0x0052, 0x0000,
- -1, 0x0053, 0x013f, -1, 0x0060, 0xa700,
- -1, 0x0061, 0x0001, -1, 0x006a, 0x0000,
- -1, 0x0080, 0x0000, -1, 0x0081, 0x0000,
- -1, 0x0082, 0x0000, -1, 0x0083, 0x0000,
- -1, 0x0084, 0x0000, -1, 0x0085, 0x0000,
- -1, 0x0090, 0x0010, -1, 0x0092, 0x0000,
- -1, 0x0093, 0x0003, -1, 0x0095, 0x0110,
- -1, 0x0097, 0x0000, -1, 0x0098, 0x0000,
- -1, 0x0007, 0x0133, -1, 0x0020, 0x0000,
- -1, 0x0021, 0x0000, -2, 100, -3 };
-
-#define HY28B_GAMMA \
- "04 1F 4 7 7 0 7 7 6 0\n" \
- "0F 00 1 7 4 0 0 0 6 7"
-
-static const s16 pitft_init_sequence[] = {
- -1, MIPI_DCS_SOFT_RESET,
- -2, 5,
- -1, MIPI_DCS_SET_DISPLAY_OFF,
- -1, 0xEF, 0x03, 0x80, 0x02,
- -1, 0xCF, 0x00, 0xC1, 0x30,
- -1, 0xED, 0x64, 0x03, 0x12, 0x81,
- -1, 0xE8, 0x85, 0x00, 0x78,
- -1, 0xCB, 0x39, 0x2C, 0x00, 0x34, 0x02,
- -1, 0xF7, 0x20,
- -1, 0xEA, 0x00, 0x00,
- -1, 0xC0, 0x23,
- -1, 0xC1, 0x10,
- -1, 0xC5, 0x3E, 0x28,
- -1, 0xC7, 0x86,
- -1, MIPI_DCS_SET_PIXEL_FORMAT, 0x55,
- -1, 0xB1, 0x00, 0x18,
- -1, 0xB6, 0x08, 0x82, 0x27,
- -1, 0xF2, 0x00,
- -1, MIPI_DCS_SET_GAMMA_CURVE, 0x01,
- -1, 0xE0, 0x0F, 0x31, 0x2B, 0x0C, 0x0E, 0x08, 0x4E,
- 0xF1, 0x37, 0x07, 0x10, 0x03, 0x0E, 0x09, 0x00,
- -1, 0xE1, 0x00, 0x0E, 0x14, 0x03, 0x11, 0x07, 0x31,
- 0xC1, 0x48, 0x08, 0x0F, 0x0C, 0x31, 0x36, 0x0F,
- -1, MIPI_DCS_EXIT_SLEEP_MODE,
- -2, 100,
- -1, MIPI_DCS_SET_DISPLAY_ON,
- -2, 20,
- -3
-};
-
-static const s16 waveshare32b_init_sequence[] = {
- -1, 0xCB, 0x39, 0x2C, 0x00, 0x34, 0x02,
- -1, 0xCF, 0x00, 0xC1, 0x30,
- -1, 0xE8, 0x85, 0x00, 0x78,
- -1, 0xEA, 0x00, 0x00,
- -1, 0xED, 0x64, 0x03, 0x12, 0x81,
- -1, 0xF7, 0x20,
- -1, 0xC0, 0x23,
- -1, 0xC1, 0x10,
- -1, 0xC5, 0x3E, 0x28,
- -1, 0xC7, 0x86,
- -1, MIPI_DCS_SET_ADDRESS_MODE, 0x28,
- -1, MIPI_DCS_SET_PIXEL_FORMAT, 0x55,
- -1, 0xB1, 0x00, 0x18,
- -1, 0xB6, 0x08, 0x82, 0x27,
- -1, 0xF2, 0x00,
- -1, MIPI_DCS_SET_GAMMA_CURVE, 0x01,
- -1, 0xE0, 0x0F, 0x31, 0x2B, 0x0C, 0x0E, 0x08, 0x4E,
- 0xF1, 0x37, 0x07, 0x10, 0x03, 0x0E, 0x09, 0x00,
- -1, 0xE1, 0x00, 0x0E, 0x14, 0x03, 0x11, 0x07, 0x31,
- 0xC1, 0x48, 0x08, 0x0F, 0x0C, 0x31, 0x36, 0x0F,
- -1, MIPI_DCS_EXIT_SLEEP_MODE,
- -2, 120,
- -1, MIPI_DCS_SET_DISPLAY_ON,
- -1, MIPI_DCS_WRITE_MEMORY_START,
- -3
-};
-
-#define PIOLED_GAMMA "0 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 " \
- "2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 " \
- "3 3 3 4 4 4 4 4 4 4 4 4 4 4 4"
-
-/* Supported displays in alphabetical order */
-static struct fbtft_device_display displays[] = {
- {
- .name = "adafruit18",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_st7735r",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .gamma = ADAFRUIT18_GAMMA,
- }
- }
- }, {
- .name = "adafruit18_green",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_st7735r",
- .max_speed_hz = 4000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- .fbtftops.set_addr_win =
- adafruit18_green_tab_set_addr_win,
- },
- .bgr = true,
- .gamma = ADAFRUIT18_GAMMA,
- }
- }
- }, {
- .name = "adafruit22",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_hx8340bn",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 9,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "adafruit22a",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9340",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "adafruit28",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9341",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "adafruit13m",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ssd1306",
- .max_speed_hz = 16000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- },
- }
- }
- }, {
- .name = "admatec_c-berry28",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_st7789v",
- .max_speed_hz = 48000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- .init_sequence = cberry28_init_sequence,
- },
- .gamma = CBERRY28_GAMMA,
- }
- }
- }, {
- .name = "agm1264k-fl",
- .pdev = &(struct platform_device) {
- .name = "fb_agm1264k-fl",
- .id = 0,
- .dev = {
- .release = fbtft_device_pdev_release,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = FBTFT_ONBOARD_BACKLIGHT,
- },
- },
- }
- }
- }, {
- .name = "dogs102",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_uc1701",
- .max_speed_hz = 8000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "er_tftm050_2",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ra8875",
- .max_speed_hz = 5000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- .width = 480,
- .height = 272,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "er_tftm070_5",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ra8875",
- .max_speed_hz = 5000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- .width = 800,
- .height = 480,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "ew24ha0",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_uc1611",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- },
- }
- }
- }, {
- .name = "ew24ha0_9bit",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_uc1611",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 9,
- },
- }
- }
- }, {
- .name = "flexfb",
- .spi = &(struct spi_board_info) {
- .modalias = "flexfb",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- }
- }, {
- .name = "flexpfb",
- .pdev = &(struct platform_device) {
- .name = "flexpfb",
- .id = 0,
- .dev = {
- .release = fbtft_device_pdev_release,
- }
- }
- }, {
- .name = "freetronicsoled128",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ssd1351",
- .max_speed_hz = 20000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = FBTFT_ONBOARD_BACKLIGHT,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "hx8353d",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_hx8353d",
- .max_speed_hz = 16000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- }
- }
- }, {
- .name = "hy28a",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9320",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .startbyte = 0x70,
- .bgr = true,
- }
- }
- }, {
- .name = "hy28b",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9325",
- .max_speed_hz = 48000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- .init_sequence = hy28b_init_sequence,
- },
- .startbyte = 0x70,
- .bgr = true,
- .fps = 50,
- .gamma = HY28B_GAMMA,
- }
- }
- }, {
- .name = "ili9481",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9481",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .regwidth = 16,
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "itdb24",
- .pdev = &(struct platform_device) {
- .name = "fb_s6d1121",
- .id = 0,
- .dev = {
- .release = fbtft_device_pdev_release,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = false,
- },
- }
- }
- }, {
- .name = "itdb28",
- .pdev = &(struct platform_device) {
- .name = "fb_ili9325",
- .id = 0,
- .dev = {
- .release = fbtft_device_pdev_release,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- },
- }
- }
- }, {
- .name = "itdb28_spi",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9325",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "mi0283qt-2",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_hx8347d",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .startbyte = 0x70,
- .bgr = true,
- }
- }
- }, {
- .name = "mi0283qt-9a",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9341",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 9,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "mi0283qt-v2",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_watterott",
- .max_speed_hz = 4000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- }
- }
- }, {
- .name = "nokia3310",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_pcd8544",
- .max_speed_hz = 400000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- },
- }
- }
- }, {
- .name = "nokia3310a",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_tls8204",
- .max_speed_hz = 1000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- },
- }
- }
- }, {
- .name = "nokia5110",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9163",
- .max_speed_hz = 12000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "piscreen",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9486",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .regwidth = 16,
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "pitft",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9340",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .chip_select = 0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- .init_sequence = pitft_init_sequence,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "pioled",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ssd1351",
- .max_speed_hz = 20000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- },
- .bgr = true,
- .gamma = PIOLED_GAMMA
- }
- }
- }, {
- .name = "rpi-display",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9341",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "s6d02a1",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_s6d02a1",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "sainsmart18",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_st7735r",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- },
- }
- }
- }, {
- .name = "sainsmart32",
- .pdev = &(struct platform_device) {
- .name = "fb_ssd1289",
- .id = 0,
- .dev = {
- .release = fbtft_device_pdev_release,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 16,
- .txbuflen = -2, /* disable buffer */
- .backlight = 1,
- .fbtftops.write = write_gpio16_wr_slow,
- },
- .bgr = true,
- },
- },
- }
- }, {
- .name = "sainsmart32_fast",
- .pdev = &(struct platform_device) {
- .name = "fb_ssd1289",
- .id = 0,
- .dev = {
- .release = fbtft_device_pdev_release,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 16,
- .txbuflen = -2, /* disable buffer */
- .backlight = 1,
- },
- .bgr = true,
- },
- },
- }
- }, {
- .name = "sainsmart32_latched",
- .pdev = &(struct platform_device) {
- .name = "fb_ssd1289",
- .id = 0,
- .dev = {
- .release = fbtft_device_pdev_release,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 16,
- .txbuflen = -2, /* disable buffer */
- .backlight = 1,
- .fbtftops.write =
- fbtft_write_gpio16_wr_latched,
- },
- .bgr = true,
- },
- },
- }
- }, {
- .name = "sainsmart32_spi",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ssd1289",
- .max_speed_hz = 16000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "spidev",
- .spi = &(struct spi_board_info) {
- .modalias = "spidev",
- .max_speed_hz = 500000,
- .bus_num = 0,
- .chip_select = 0,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- }
- }
- }, {
- .name = "ssd1331",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ssd1331",
- .max_speed_hz = 20000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- },
- }
- }
- }, {
- .name = "tinylcd35",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_tinylcd",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "tm022hdh26",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9341",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "tontec35_9481", /* boards before 02 July 2014 */
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9481",
- .max_speed_hz = 128000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "tontec35_9486", /* boards after 02 July 2014 */
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9486",
- .max_speed_hz = 128000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "upd161704",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_upd161704",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- },
- }
- }
- }, {
- .name = "waveshare32b",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_ili9340",
- .max_speed_hz = 48000000,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- .backlight = 1,
- .init_sequence =
- waveshare32b_init_sequence,
- },
- .bgr = true,
- }
- }
- }, {
- .name = "waveshare22",
- .spi = &(struct spi_board_info) {
- .modalias = "fb_bd663474",
- .max_speed_hz = 32000000,
- .mode = SPI_MODE_3,
- .platform_data = &(struct fbtft_platform_data) {
- .display = {
- .buswidth = 8,
- },
- }
- }
- }, {
- /* This should be the last item.
- * Used with the custom argument
- */
- .name = "",
- .spi = &(struct spi_board_info) {
- .modalias = "",
- .max_speed_hz = 0,
- .mode = SPI_MODE_0,
- .platform_data = &(struct fbtft_platform_data) {
- }
- },
- .pdev = &(struct platform_device) {
- .name = "",
- .id = 0,
- .dev = {
- .release = fbtft_device_pdev_release,
- .platform_data = &(struct fbtft_platform_data) {
- },
- },
- },
- }
-};
-
-static int write_gpio16_wr_slow(struct fbtft_par *par, void *buf, size_t len)
-{
- u16 data;
- int i;
-#ifndef DO_NOT_OPTIMIZE_FBTFT_WRITE_GPIO
- static u16 prev_data;
-#endif
-
- fbtft_par_dbg_hex(DEBUG_WRITE, par, par->info->device, u8, buf, len,
- "%s(len=%zu): ", __func__, len);
-
- while (len) {
- data = *(u16 *)buf;
-
- /* Start writing by pulling down /WR */
- gpiod_set_value(par->gpio.wr, 0);
-
- /* Set data */
-#ifndef DO_NOT_OPTIMIZE_FBTFT_WRITE_GPIO
- if (data == prev_data) {
- gpiod_set_value(par->gpio.wr, 0); /* used as delay */
- } else {
- for (i = 0; i < 16; i++) {
- if ((data & 1) != (prev_data & 1))
- gpiod_set_value(par->gpio.db[i],
- data & 1);
- data >>= 1;
- prev_data >>= 1;
- }
- }
-#else
- for (i = 0; i < 16; i++) {
- gpiod_set_value(par->gpio.db[i], data & 1);
- data >>= 1;
- }
-#endif
-
- /* Pullup /WR */
- gpiod_set_value(par->gpio.wr, 1);
-
-#ifndef DO_NOT_OPTIMIZE_FBTFT_WRITE_GPIO
- prev_data = *(u16 *)buf;
-#endif
- buf += 2;
- len -= 2;
- }
-
- return 0;
-}
-
-static void adafruit18_green_tab_set_addr_win(struct fbtft_par *par,
- int xs, int ys, int xe, int ye)
-{
- write_reg(par, 0x2A, 0, xs + 2, 0, xe + 2);
- write_reg(par, 0x2B, 0, ys + 1, 0, ye + 1);
- write_reg(par, 0x2C);
-}
-
-static void fbtft_device_pdev_release(struct device *dev)
-{
-/* Needed to silence this message:
- * Device 'xxx' does not have a release() function,
- * it is broken and must be fixed
- */
-}
-
-static int spi_device_found(struct device *dev, void *data)
-{
- struct spi_device *spi = to_spi_device(dev);
-
- dev_info(dev, "%s %s %dkHz %d bits mode=0x%02X\n", spi->modalias,
- dev_name(dev), spi->max_speed_hz / 1000, spi->bits_per_word,
- spi->mode);
-
- return 0;
-}
-
-static void pr_spi_devices(void)
-{
- pr_debug("SPI devices registered:\n");
- bus_for_each_dev(&spi_bus_type, NULL, NULL, spi_device_found);
-}
-
-static int p_device_found(struct device *dev, void *data)
-{
- struct platform_device
- *pdev = to_platform_device(dev);
-
- if (strstr(pdev->name, "fb"))
- dev_info(dev, "%s id=%d pdata? %s\n", pdev->name, pdev->id,
- pdev->dev.platform_data ? "yes" : "no");
-
- return 0;
-}
-
-static void pr_p_devices(void)
-{
- pr_debug("'fb' Platform devices registered:\n");
- bus_for_each_dev(&platform_bus_type, NULL, NULL, p_device_found);
-}
-
-#ifdef MODULE
-static void fbtft_device_spi_delete(struct spi_master *master, unsigned int cs)
-{
- struct device *dev;
- char str[32];
-
- snprintf(str, sizeof(str), "%s.%u", dev_name(&master->dev), cs);
-
- dev = bus_find_device_by_name(&spi_bus_type, NULL, str);
- if (dev) {
- if (verbose)
- dev_info(dev, "Deleting %s\n", str);
- device_del(dev);
- }
-}
-
-static int fbtft_device_spi_device_register(struct spi_board_info *spi)
-{
- struct spi_master *master;
-
- master = spi_busnum_to_master(spi->bus_num);
- if (!master) {
- pr_err("spi_busnum_to_master(%d) returned NULL\n",
- spi->bus_num);
- return -EINVAL;
- }
- /* make sure it's available */
- fbtft_device_spi_delete(master, spi->chip_select);
- spi_device = spi_new_device(master, spi);
- put_device(&master->dev);
- if (!spi_device) {
- dev_err(&master->dev, "spi_new_device() returned NULL\n");
- return -EPERM;
- }
- return 0;
-}
-#else
-static int fbtft_device_spi_device_register(struct spi_board_info *spi)
-{
- return spi_register_board_info(spi, 1);
-}
-#endif
-
-static int __init fbtft_device_init(void)
-{
- struct spi_board_info *spi = NULL;
- struct fbtft_platform_data *pdata;
- bool found = false;
- int i = 0;
- int ret = 0;
-
- if (!name) {
-#ifdef MODULE
- pr_err("missing module parameter: 'name'\n");
- return -EINVAL;
-#else
- return 0;
-#endif
- }
-
- if (init_num > FBTFT_MAX_INIT_SEQUENCE) {
- pr_err("init parameter: exceeded max array size: %d\n",
- FBTFT_MAX_INIT_SEQUENCE);
- return -EINVAL;
- }
-
- if (verbose > 2) {
- pr_spi_devices(); /* print list of registered SPI devices */
- pr_p_devices(); /* print list of 'fb' platform devices */
- }
-
- pr_debug("name='%s', busnum=%d, cs=%d\n", name, busnum, cs);
-
- if (rotate > 0 && rotate < 4) {
- rotate = (4 - rotate) * 90;
- pr_warn("argument 'rotate' should be an angle. Values 1-3 is deprecated. Setting it to %d.\n",
- rotate);
- }
- if (rotate != 0 && rotate != 90 && rotate != 180 && rotate != 270) {
- pr_warn("argument 'rotate' illegal value: %d. Setting it to 0.\n",
- rotate);
- rotate = 0;
- }
-
- /* name=list lists all supported displays */
- if (strcmp(name, "list") == 0) {
- pr_info("Supported displays:\n");
-
- for (i = 0; i < ARRAY_SIZE(displays); i++)
- pr_info("%s\n", displays[i].name);
- return -ECANCELED;
- }
-
- if (custom) {
- i = ARRAY_SIZE(displays) - 1;
- displays[i].name = name;
- if (speed == 0) {
- displays[i].pdev->name = name;
- displays[i].spi = NULL;
- } else {
- size_t len;
-
- len = strlcpy(displays[i].spi->modalias, name,
- SPI_NAME_SIZE);
- if (len >= SPI_NAME_SIZE)
- pr_warn("modalias (name) truncated to: %s\n",
- displays[i].spi->modalias);
- displays[i].pdev = NULL;
- }
- }
-
- for (i = 0; i < ARRAY_SIZE(displays); i++) {
- if (strncmp(name, displays[i].name, SPI_NAME_SIZE) == 0) {
- if (displays[i].spi) {
- spi = displays[i].spi;
- spi->chip_select = cs;
- spi->bus_num = busnum;
- if (speed)
- spi->max_speed_hz = speed;
- if (mode != -1)
- spi->mode = mode;
- pdata = (void *)spi->platform_data;
- } else if (displays[i].pdev) {
- p_device = displays[i].pdev;
- pdata = p_device->dev.platform_data;
- } else {
- pr_err("broken displays array\n");
- return -EINVAL;
- }
-
- pdata->rotate = rotate;
- if (bgr == 0)
- pdata->bgr = false;
- else if (bgr == 1)
- pdata->bgr = true;
- if (startbyte)
- pdata->startbyte = startbyte;
- if (gamma)
- pdata->gamma = gamma;
- pdata->display.debug = debug;
- if (fps)
- pdata->fps = fps;
- if (txbuflen)
- pdata->txbuflen = txbuflen;
- if (init_num)
- pdata->display.init_sequence = init;
- if (custom) {
- pdata->display.width = width;
- pdata->display.height = height;
- pdata->display.buswidth = buswidth;
- pdata->display.backlight = 1;
- }
-
- if (displays[i].spi) {
- ret = fbtft_device_spi_device_register(spi);
- if (ret) {
- pr_err("failed to register SPI device\n");
- return ret;
- }
- } else {
- ret = platform_device_register(p_device);
- if (ret < 0) {
- pr_err("platform_device_register() returned %d\n",
- ret);
- return ret;
- }
- }
- found = true;
- break;
- }
- }
-
- if (!found) {
- pr_err("display not supported: '%s'\n", name);
- return -EINVAL;
- }
-
- if (spi_device && (verbose > 1))
- pr_spi_devices();
- if (p_device && (verbose > 1))
- pr_p_devices();
-
- return 0;
-}
-
-static void __exit fbtft_device_exit(void)
-{
- if (spi_device) {
- device_del(&spi_device->dev);
- kfree(spi_device);
- }
-
- if (p_device)
- platform_device_unregister(p_device);
-}
-
-arch_initcall(fbtft_device_init);
-module_exit(fbtft_device_exit);
-
-MODULE_DESCRIPTION("Add a FBTFT device.");
-MODULE_AUTHOR("Noralf Tronnes");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/fbtft/flexfb.c b/drivers/staging/fbtft/flexfb.c
deleted file mode 100644
index 3747321011fa..000000000000
--- a/drivers/staging/fbtft/flexfb.c
+++ /dev/null
@@ -1,851 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Generic FB driver for TFT LCD displays
- *
- * Copyright (C) 2013 Noralf Tronnes
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/gpio/consumer.h>
-#include <linux/spi/spi.h>
-#include <linux/delay.h>
-
-#include "fbtft.h"
-
-#define DRVNAME "flexfb"
-
-static char *chip;
-module_param(chip, charp, 0000);
-MODULE_PARM_DESC(chip, "LCD controller");
-
-static unsigned int width;
-module_param(width, uint, 0000);
-MODULE_PARM_DESC(width, "Display width");
-
-static unsigned int height;
-module_param(height, uint, 0000);
-MODULE_PARM_DESC(height, "Display height");
-
-static s16 init[512];
-static int init_num;
-module_param_array(init, short, &init_num, 0000);
-MODULE_PARM_DESC(init, "Init sequence");
-
-static unsigned int setaddrwin;
-module_param(setaddrwin, uint, 0000);
-MODULE_PARM_DESC(setaddrwin, "Which set_addr_win() implementation to use");
-
-static unsigned int buswidth = 8;
-module_param(buswidth, uint, 0000);
-MODULE_PARM_DESC(buswidth, "Width of databus (default: 8)");
-
-static unsigned int regwidth = 8;
-module_param(regwidth, uint, 0000);
-MODULE_PARM_DESC(regwidth, "Width of controller register (default: 8)");
-
-static bool nobacklight;
-module_param(nobacklight, bool, 0000);
-MODULE_PARM_DESC(nobacklight, "Turn off backlight functionality.");
-
-static bool latched;
-module_param(latched, bool, 0000);
-MODULE_PARM_DESC(latched, "Use with latched 16-bit databus");
-
-static const s16 *initp;
-static int initp_num;
-
-/* default init sequences */
-static const s16 st7735r_init[] = {
- -1, 0x01,
- -2, 150,
- -1, 0x11,
- -2, 500,
- -1, 0xB1, 0x01, 0x2C, 0x2D,
- -1, 0xB2, 0x01, 0x2C, 0x2D,
- -1, 0xB3, 0x01, 0x2C, 0x2D, 0x01, 0x2C, 0x2D,
- -1, 0xB4, 0x07,
- -1, 0xC0, 0xA2, 0x02, 0x84,
- -1, 0xC1, 0xC5,
- -1, 0xC2, 0x0A, 0x00,
- -1, 0xC3, 0x8A, 0x2A,
- -1, 0xC4, 0x8A, 0xEE,
- -1, 0xC5, 0x0E,
- -1, 0x20,
- -1, 0x36, 0xC0,
- -1, 0x3A, 0x05,
- -1, 0xE0, 0x0f, 0x1a, 0x0f, 0x18, 0x2f, 0x28, 0x20, 0x22,
- 0x1f, 0x1b, 0x23, 0x37, 0x00, 0x07, 0x02, 0x10,
- -1, 0xE1, 0x0f, 0x1b, 0x0f, 0x17, 0x33, 0x2c, 0x29, 0x2e,
- 0x30, 0x30, 0x39, 0x3f, 0x00, 0x07, 0x03, 0x10,
- -1, 0x29,
- -2, 100,
- -1, 0x13,
- -2, 10,
- -3
-};
-
-static const s16 ssd1289_init[] = {
- -1, 0x00, 0x0001,
- -1, 0x03, 0xA8A4,
- -1, 0x0C, 0x0000,
- -1, 0x0D, 0x080C,
- -1, 0x0E, 0x2B00,
- -1, 0x1E, 0x00B7,
- -1, 0x01, 0x2B3F,
- -1, 0x02, 0x0600,
- -1, 0x10, 0x0000,
- -1, 0x11, 0x6070,
- -1, 0x05, 0x0000,
- -1, 0x06, 0x0000,
- -1, 0x16, 0xEF1C,
- -1, 0x17, 0x0003,
- -1, 0x07, 0x0233,
- -1, 0x0B, 0x0000,
- -1, 0x0F, 0x0000,
- -1, 0x41, 0x0000,
- -1, 0x42, 0x0000,
- -1, 0x48, 0x0000,
- -1, 0x49, 0x013F,
- -1, 0x4A, 0x0000,
- -1, 0x4B, 0x0000,
- -1, 0x44, 0xEF00,
- -1, 0x45, 0x0000,
- -1, 0x46, 0x013F,
- -1, 0x30, 0x0707,
- -1, 0x31, 0x0204,
- -1, 0x32, 0x0204,
- -1, 0x33, 0x0502,
- -1, 0x34, 0x0507,
- -1, 0x35, 0x0204,
- -1, 0x36, 0x0204,
- -1, 0x37, 0x0502,
- -1, 0x3A, 0x0302,
- -1, 0x3B, 0x0302,
- -1, 0x23, 0x0000,
- -1, 0x24, 0x0000,
- -1, 0x25, 0x8000,
- -1, 0x4f, 0x0000,
- -1, 0x4e, 0x0000,
- -1, 0x22,
- -3
-};
-
-static const s16 hx8340bn_init[] = {
- -1, 0xC1, 0xFF, 0x83, 0x40,
- -1, 0x11,
- -2, 150,
- -1, 0xCA, 0x70, 0x00, 0xD9,
- -1, 0xB0, 0x01, 0x11,
- -1, 0xC9, 0x90, 0x49, 0x10, 0x28, 0x28, 0x10, 0x00, 0x06,
- -2, 20,
- -1, 0xC2, 0x60, 0x71, 0x01, 0x0E, 0x05, 0x02, 0x09, 0x31, 0x0A,
- -1, 0xC3, 0x67, 0x30, 0x61, 0x17, 0x48, 0x07, 0x05, 0x33,
- -2, 10,
- -1, 0xB5, 0x35, 0x20, 0x45,
- -1, 0xB4, 0x33, 0x25, 0x4C,
- -2, 10,
- -1, 0x3A, 0x05,
- -1, 0x29,
- -2, 10,
- -3
-};
-
-static const s16 ili9225_init[] = {
- -1, 0x0001, 0x011C,
- -1, 0x0002, 0x0100,
- -1, 0x0003, 0x1030,
- -1, 0x0008, 0x0808,
- -1, 0x000C, 0x0000,
- -1, 0x000F, 0x0A01,
- -1, 0x0020, 0x0000,
- -1, 0x0021, 0x0000,
- -2, 50,
- -1, 0x0010, 0x0A00,
- -1, 0x0011, 0x1038,
- -2, 50,
- -1, 0x0012, 0x1121,
- -1, 0x0013, 0x004E,
- -1, 0x0014, 0x676F,
- -1, 0x0030, 0x0000,
- -1, 0x0031, 0x00DB,
- -1, 0x0032, 0x0000,
- -1, 0x0033, 0x0000,
- -1, 0x0034, 0x00DB,
- -1, 0x0035, 0x0000,
- -1, 0x0036, 0x00AF,
- -1, 0x0037, 0x0000,
- -1, 0x0038, 0x00DB,
- -1, 0x0039, 0x0000,
- -1, 0x0050, 0x0000,
- -1, 0x0051, 0x060A,
- -1, 0x0052, 0x0D0A,
- -1, 0x0053, 0x0303,
- -1, 0x0054, 0x0A0D,
- -1, 0x0055, 0x0A06,
- -1, 0x0056, 0x0000,
- -1, 0x0057, 0x0303,
- -1, 0x0058, 0x0000,
- -1, 0x0059, 0x0000,
- -2, 50,
- -1, 0x0007, 0x1017,
- -2, 50,
- -3
-};
-
-static const s16 ili9320_init[] = {
- -1, 0x00E5, 0x8000,
- -1, 0x0000, 0x0001,
- -1, 0x0001, 0x0100,
- -1, 0x0002, 0x0700,
- -1, 0x0003, 0x1030,
- -1, 0x0004, 0x0000,
- -1, 0x0008, 0x0202,
- -1, 0x0009, 0x0000,
- -1, 0x000A, 0x0000,
- -1, 0x000C, 0x0000,
- -1, 0x000D, 0x0000,
- -1, 0x000F, 0x0000,
- -1, 0x0010, 0x0000,
- -1, 0x0011, 0x0007,
- -1, 0x0012, 0x0000,
- -1, 0x0013, 0x0000,
- -2, 200,
- -1, 0x0010, 0x17B0,
- -1, 0x0011, 0x0031,
- -2, 50,
- -1, 0x0012, 0x0138,
- -2, 50,
- -1, 0x0013, 0x1800,
- -1, 0x0029, 0x0008,
- -2, 50,
- -1, 0x0020, 0x0000,
- -1, 0x0021, 0x0000,
- -1, 0x0030, 0x0000,
- -1, 0x0031, 0x0505,
- -1, 0x0032, 0x0004,
- -1, 0x0035, 0x0006,
- -1, 0x0036, 0x0707,
- -1, 0x0037, 0x0105,
- -1, 0x0038, 0x0002,
- -1, 0x0039, 0x0707,
- -1, 0x003C, 0x0704,
- -1, 0x003D, 0x0807,
- -1, 0x0050, 0x0000,
- -1, 0x0051, 0x00EF,
- -1, 0x0052, 0x0000,
- -1, 0x0053, 0x013F,
- -1, 0x0060, 0x2700,
- -1, 0x0061, 0x0001,
- -1, 0x006A, 0x0000,
- -1, 0x0080, 0x0000,
- -1, 0x0081, 0x0000,
- -1, 0x0082, 0x0000,
- -1, 0x0083, 0x0000,
- -1, 0x0084, 0x0000,
- -1, 0x0085, 0x0000,
- -1, 0x0090, 0x0010,
- -1, 0x0092, 0x0000,
- -1, 0x0093, 0x0003,
- -1, 0x0095, 0x0110,
- -1, 0x0097, 0x0000,
- -1, 0x0098, 0x0000,
- -1, 0x0007, 0x0173,
- -3
-};
-
-static const s16 ili9325_init[] = {
- -1, 0x00E3, 0x3008,
- -1, 0x00E7, 0x0012,
- -1, 0x00EF, 0x1231,
- -1, 0x0001, 0x0100,
- -1, 0x0002, 0x0700,
- -1, 0x0003, 0x1030,
- -1, 0x0004, 0x0000,
- -1, 0x0008, 0x0207,
- -1, 0x0009, 0x0000,
- -1, 0x000A, 0x0000,
- -1, 0x000C, 0x0000,
- -1, 0x000D, 0x0000,
- -1, 0x000F, 0x0000,
- -1, 0x0010, 0x0000,
- -1, 0x0011, 0x0007,
- -1, 0x0012, 0x0000,
- -1, 0x0013, 0x0000,
- -2, 200,
- -1, 0x0010, 0x1690,
- -1, 0x0011, 0x0223,
- -2, 50,
- -1, 0x0012, 0x000D,
- -2, 50,
- -1, 0x0013, 0x1200,
- -1, 0x0029, 0x000A,
- -1, 0x002B, 0x000C,
- -2, 50,
- -1, 0x0020, 0x0000,
- -1, 0x0021, 0x0000,
- -1, 0x0030, 0x0000,
- -1, 0x0031, 0x0506,
- -1, 0x0032, 0x0104,
- -1, 0x0035, 0x0207,
- -1, 0x0036, 0x000F,
- -1, 0x0037, 0x0306,
- -1, 0x0038, 0x0102,
- -1, 0x0039, 0x0707,
- -1, 0x003C, 0x0702,
- -1, 0x003D, 0x1604,
- -1, 0x0050, 0x0000,
- -1, 0x0051, 0x00EF,
- -1, 0x0052, 0x0000,
- -1, 0x0053, 0x013F,
- -1, 0x0060, 0xA700,
- -1, 0x0061, 0x0001,
- -1, 0x006A, 0x0000,
- -1, 0x0080, 0x0000,
- -1, 0x0081, 0x0000,
- -1, 0x0082, 0x0000,
- -1, 0x0083, 0x0000,
- -1, 0x0084, 0x0000,
- -1, 0x0085, 0x0000,
- -1, 0x0090, 0x0010,
- -1, 0x0092, 0x0600,
- -1, 0x0007, 0x0133,
- -3
-};
-
-static const s16 ili9341_init[] = {
- -1, 0x28,
- -2, 20,
- -1, 0xCF, 0x00, 0x83, 0x30,
- -1, 0xED, 0x64, 0x03, 0x12, 0x81,
- -1, 0xE8, 0x85, 0x01, 0x79,
- -1, 0xCB, 0x39, 0x2c, 0x00, 0x34, 0x02,
- -1, 0xF7, 0x20,
- -1, 0xEA, 0x00, 0x00,
- -1, 0xC0, 0x26,
- -1, 0xC1, 0x11,
- -1, 0xC5, 0x35, 0x3E,
- -1, 0xC7, 0xBE,
- -1, 0xB1, 0x00, 0x1B,
- -1, 0xB6, 0x0a, 0x82, 0x27, 0x00,
- -1, 0xB7, 0x07,
- -1, 0x3A, 0x55,
- -1, 0x36, 0x48,
- -1, 0x11,
- -2, 120,
- -1, 0x29,
- -2, 20,
- -3
-};
-
-static const s16 ssd1351_init[] = {
- -1, 0xfd, 0x12,
- -1, 0xfd, 0xb1,
- -1, 0xae,
- -1, 0xb3, 0xf1,
- -1, 0xca, 0x7f,
- -1, 0xa0, 0x74,
- -1, 0x15, 0x00, 0x7f,
- -1, 0x75, 0x00, 0x7f,
- -1, 0xa1, 0x00,
- -1, 0xa2, 0x00,
- -1, 0xb5, 0x00,
- -1, 0xab, 0x01,
- -1, 0xb1, 0x32,
- -1, 0xb4, 0xa0, 0xb5, 0x55,
- -1, 0xbb, 0x17,
- -1, 0xbe, 0x05,
- -1, 0xc1, 0xc8, 0x80, 0xc8,
- -1, 0xc7, 0x0f,
- -1, 0xb6, 0x01,
- -1, 0xa6,
- -1, 0xaf,
- -3
-};
-
-/**
- * struct flexfb_lcd_controller - Describes the LCD controller properties
- * @name: Model name of the chip
- * @width: Width of display in pixels
- * @height: Height of display in pixels
- * @setaddrwin: Which set_addr_win() implementation to use
- * @regwidth: LCD Controller Register width in bits
- * @init_seq: LCD initialization sequence
- * @init_seq_sz: Size of LCD initialization sequence
- */
-struct flexfb_lcd_controller {
- const char *name;
- unsigned int width;
- unsigned int height;
- unsigned int setaddrwin;
- unsigned int regwidth;
- const s16 *init_seq;
- int init_seq_sz;
-};
-
-static const struct flexfb_lcd_controller flexfb_chip_table[] = {
- {
- .name = "st7735r",
- .width = 120,
- .height = 160,
- .init_seq = st7735r_init,
- .init_seq_sz = ARRAY_SIZE(st7735r_init),
- },
- {
- .name = "hx8340bn",
- .width = 176,
- .height = 220,
- .init_seq = hx8340bn_init,
- .init_seq_sz = ARRAY_SIZE(hx8340bn_init),
- },
- {
- .name = "ili9225",
- .width = 176,
- .height = 220,
- .regwidth = 16,
- .init_seq = ili9225_init,
- .init_seq_sz = ARRAY_SIZE(ili9225_init),
- },
- {
- .name = "ili9320",
- .width = 240,
- .height = 320,
- .setaddrwin = 1,
- .regwidth = 16,
- .init_seq = ili9320_init,
- .init_seq_sz = ARRAY_SIZE(ili9320_init),
- },
- {
- .name = "ili9325",
- .width = 240,
- .height = 320,
- .setaddrwin = 1,
- .regwidth = 16,
- .init_seq = ili9325_init,
- .init_seq_sz = ARRAY_SIZE(ili9325_init),
- },
- {
- .name = "ili9341",
- .width = 240,
- .height = 320,
- .init_seq = ili9341_init,
- .init_seq_sz = ARRAY_SIZE(ili9341_init),
- },
- {
- .name = "ssd1289",
- .width = 240,
- .height = 320,
- .setaddrwin = 2,
- .regwidth = 16,
- .init_seq = ssd1289_init,
- .init_seq_sz = ARRAY_SIZE(ssd1289_init),
- },
- {
- .name = "ssd1351",
- .width = 128,
- .height = 128,
- .setaddrwin = 3,
- .init_seq = ssd1351_init,
- .init_seq_sz = ARRAY_SIZE(ssd1351_init),
- },
-};
-
-/* ili9320, ili9325 */
-static void flexfb_set_addr_win_1(struct fbtft_par *par,
- int xs, int ys, int xe, int ye)
-{
- switch (par->info->var.rotate) {
- /* R20h = Horizontal GRAM Start Address */
- /* R21h = Vertical GRAM Start Address */
- case 0:
- write_reg(par, 0x0020, xs);
- write_reg(par, 0x0021, ys);
- break;
- case 180:
- write_reg(par, 0x0020, width - 1 - xs);
- write_reg(par, 0x0021, height - 1 - ys);
- break;
- case 270:
- write_reg(par, 0x0020, width - 1 - ys);
- write_reg(par, 0x0021, xs);
- break;
- case 90:
- write_reg(par, 0x0020, ys);
- write_reg(par, 0x0021, height - 1 - xs);
- break;
- }
- write_reg(par, 0x0022); /* Write Data to GRAM */
-}
-
-/* ssd1289 */
-static void flexfb_set_addr_win_2(struct fbtft_par *par,
- int xs, int ys, int xe, int ye)
-{
- switch (par->info->var.rotate) {
- /* R4Eh - Set GDDRAM X address counter */
- /* R4Fh - Set GDDRAM Y address counter */
- case 0:
- write_reg(par, 0x4e, xs);
- write_reg(par, 0x4f, ys);
- break;
- case 180:
- write_reg(par, 0x4e, par->info->var.xres - 1 - xs);
- write_reg(par, 0x4f, par->info->var.yres - 1 - ys);
- break;
- case 270:
- write_reg(par, 0x4e, par->info->var.yres - 1 - ys);
- write_reg(par, 0x4f, xs);
- break;
- case 90:
- write_reg(par, 0x4e, ys);
- write_reg(par, 0x4f, par->info->var.xres - 1 - xs);
- break;
- }
-
- /* R22h - RAM data write */
- write_reg(par, 0x22, 0);
-}
-
-/* ssd1351 */
-static void set_addr_win_3(struct fbtft_par *par,
- int xs, int ys, int xe, int ye)
-{
- write_reg(par, 0x15, xs, xe);
- write_reg(par, 0x75, ys, ye);
- write_reg(par, 0x5C);
-}
-
-static int flexfb_verify_gpios_dc(struct fbtft_par *par)
-{
- fbtft_par_dbg(DEBUG_VERIFY_GPIOS, par, "%s()\n", __func__);
-
- if (!par->gpio.dc) {
- dev_err(par->info->device,
- "Missing info about 'dc' gpio. Aborting.\n");
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int flexfb_verify_gpios_db(struct fbtft_par *par)
-{
- int i;
- int num_db = buswidth;
-
- fbtft_par_dbg(DEBUG_VERIFY_GPIOS, par, "%s()\n", __func__);
-
- if (!par->gpio.dc) {
- dev_err(par->info->device, "Missing info about 'dc' gpio. Aborting.\n");
- return -EINVAL;
- }
- if (!par->gpio.wr) {
- dev_err(par->info->device, "Missing info about 'wr' gpio. Aborting.\n");
- return -EINVAL;
- }
- if (latched && !par->gpio.latch) {
- dev_err(par->info->device, "Missing info about 'latch' gpio. Aborting.\n");
- return -EINVAL;
- }
- if (latched)
- num_db = buswidth / 2;
- for (i = 0; i < num_db; i++) {
- if (!par->gpio.db[i]) {
- dev_err(par->info->device,
- "Missing info about 'db%02d' gpio. Aborting.\n",
- i);
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
-static void flexfb_chip_load_param(const struct flexfb_lcd_controller *chip)
-{
- if (!width)
- width = chip->width;
- if (!height)
- height = chip->height;
- setaddrwin = chip->setaddrwin;
- if (chip->regwidth)
- regwidth = chip->regwidth;
- if (!init_num) {
- initp = chip->init_seq;
- initp_num = chip->init_seq_sz;
- }
-}
-
-static struct fbtft_display flex_display = { };
-
-static int flexfb_chip_init(const struct device *dev)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(flexfb_chip_table); i++)
- if (!strcmp(chip, flexfb_chip_table[i].name)) {
- flexfb_chip_load_param(&flexfb_chip_table[i]);
- return 0;
- }
-
- dev_err(dev, "chip=%s is not supported\n", chip);
-
- return -EINVAL;
-}
-
-static int flexfb_probe_common(struct spi_device *sdev,
- struct platform_device *pdev)
-{
- struct device *dev;
- struct fb_info *info;
- struct fbtft_par *par;
- int ret;
-
- initp = init;
- initp_num = init_num;
-
- if (sdev)
- dev = &sdev->dev;
- else
- dev = &pdev->dev;
-
- fbtft_init_dbg(dev, "%s(%s)\n", __func__,
- sdev ? "'SPI device'" : "'Platform device'");
-
- if (chip) {
- ret = flexfb_chip_init(dev);
- if (ret)
- return ret;
- }
-
- if (width == 0 || height == 0) {
- dev_err(dev, "argument(s) missing: width and height has to be set.\n");
- return -EINVAL;
- }
- flex_display.width = width;
- flex_display.height = height;
- fbtft_init_dbg(dev, "Display resolution: %dx%d\n", width, height);
- fbtft_init_dbg(dev, "chip = %s\n", chip ? chip : "not set");
- fbtft_init_dbg(dev, "setaddrwin = %d\n", setaddrwin);
- fbtft_init_dbg(dev, "regwidth = %d\n", regwidth);
- fbtft_init_dbg(dev, "buswidth = %d\n", buswidth);
-
- info = fbtft_framebuffer_alloc(&flex_display, dev, dev->platform_data);
- if (!info)
- return -ENOMEM;
-
- par = info->par;
- if (sdev)
- par->spi = sdev;
- else
- par->pdev = pdev;
- if (!par->init_sequence)
- par->init_sequence = initp;
- par->fbtftops.init_display = fbtft_init_display;
-
- /* registerwrite functions */
- switch (regwidth) {
- case 8:
- par->fbtftops.write_register = fbtft_write_reg8_bus8;
- break;
- case 16:
- par->fbtftops.write_register = fbtft_write_reg16_bus8;
- break;
- default:
- dev_err(dev,
- "argument 'regwidth': %d is not supported.\n",
- regwidth);
- return -EINVAL;
- }
-
- /* bus functions */
- if (sdev) {
- par->fbtftops.write = fbtft_write_spi;
- switch (buswidth) {
- case 8:
- par->fbtftops.write_vmem = fbtft_write_vmem16_bus8;
- if (!par->startbyte)
- par->fbtftops.verify_gpios = flexfb_verify_gpios_dc;
- break;
- case 9:
- if (regwidth == 16) {
- dev_err(dev, "argument 'regwidth': %d is not supported with buswidth=%d and SPI.\n",
- regwidth, buswidth);
- return -EINVAL;
- }
- par->fbtftops.write_register = fbtft_write_reg8_bus9;
- par->fbtftops.write_vmem = fbtft_write_vmem16_bus9;
- if (par->spi->master->bits_per_word_mask
- & SPI_BPW_MASK(9)) {
- par->spi->bits_per_word = 9;
- break;
- }
-
- dev_warn(dev,
- "9-bit SPI not available, emulating using 8-bit.\n");
- /* allocate buffer with room for dc bits */
- par->extra = devm_kzalloc(par->info->device,
- par->txbuf.len
- + (par->txbuf.len / 8) + 8,
- GFP_KERNEL);
- if (!par->extra) {
- ret = -ENOMEM;
- goto out_release;
- }
- par->fbtftops.write = fbtft_write_spi_emulate_9;
-
- break;
- default:
- dev_err(dev,
- "argument 'buswidth': %d is not supported with SPI.\n",
- buswidth);
- return -EINVAL;
- }
- } else {
- par->fbtftops.verify_gpios = flexfb_verify_gpios_db;
- switch (buswidth) {
- case 8:
- par->fbtftops.write = fbtft_write_gpio8_wr;
- par->fbtftops.write_vmem = fbtft_write_vmem16_bus8;
- break;
- case 16:
- par->fbtftops.write_register = fbtft_write_reg16_bus16;
- if (latched)
- par->fbtftops.write = fbtft_write_gpio16_wr_latched;
- else
- par->fbtftops.write = fbtft_write_gpio16_wr;
- par->fbtftops.write_vmem = fbtft_write_vmem16_bus16;
- break;
- default:
- dev_err(dev,
- "argument 'buswidth': %d is not supported with parallel.\n",
- buswidth);
- return -EINVAL;
- }
- }
-
- /* set_addr_win function */
- switch (setaddrwin) {
- case 0:
- /* use default */
- break;
- case 1:
- par->fbtftops.set_addr_win = flexfb_set_addr_win_1;
- break;
- case 2:
- par->fbtftops.set_addr_win = flexfb_set_addr_win_2;
- break;
- case 3:
- par->fbtftops.set_addr_win = set_addr_win_3;
- break;
- default:
- dev_err(dev, "argument 'setaddrwin': unknown value %d.\n",
- setaddrwin);
- return -EINVAL;
- }
-
- if (!nobacklight)
- par->fbtftops.register_backlight = fbtft_register_backlight;
-
- ret = fbtft_register_framebuffer(info);
- if (ret < 0)
- goto out_release;
-
- return 0;
-
-out_release:
- fbtft_framebuffer_release(info);
-
- return ret;
-}
-
-static int flexfb_remove_common(struct device *dev, struct fb_info *info)
-{
- struct fbtft_par *par;
-
- if (!info)
- return -EINVAL;
- par = info->par;
- if (par)
- fbtft_par_dbg(DEBUG_DRIVER_INIT_FUNCTIONS, par, "%s()\n",
- __func__);
- fbtft_unregister_framebuffer(info);
- fbtft_framebuffer_release(info);
-
- return 0;
-}
-
-static int flexfb_probe_spi(struct spi_device *spi)
-{
- return flexfb_probe_common(spi, NULL);
-}
-
-static int flexfb_remove_spi(struct spi_device *spi)
-{
- struct fb_info *info = spi_get_drvdata(spi);
-
- return flexfb_remove_common(&spi->dev, info);
-}
-
-static int flexfb_probe_pdev(struct platform_device *pdev)
-{
- return flexfb_probe_common(NULL, pdev);
-}
-
-static int flexfb_remove_pdev(struct platform_device *pdev)
-{
- struct fb_info *info = platform_get_drvdata(pdev);
-
- return flexfb_remove_common(&pdev->dev, info);
-}
-
-static struct spi_driver flexfb_spi_driver = {
- .driver = {
- .name = DRVNAME,
- },
- .probe = flexfb_probe_spi,
- .remove = flexfb_remove_spi,
-};
-
-static const struct platform_device_id flexfb_platform_ids[] = {
- { "flexpfb", 0 },
- { },
-};
-MODULE_DEVICE_TABLE(platform, flexfb_platform_ids);
-
-static struct platform_driver flexfb_platform_driver = {
- .driver = {
- .name = DRVNAME,
- },
- .id_table = flexfb_platform_ids,
- .probe = flexfb_probe_pdev,
- .remove = flexfb_remove_pdev,
-};
-
-static int __init flexfb_init(void)
-{
- int ret, ret2;
-
- ret = spi_register_driver(&flexfb_spi_driver);
- ret2 = platform_driver_register(&flexfb_platform_driver);
- if (ret < 0)
- return ret;
- return ret2;
-}
-
-static void __exit flexfb_exit(void)
-{
- spi_unregister_driver(&flexfb_spi_driver);
- platform_driver_unregister(&flexfb_platform_driver);
-}
-
-/* ------------------------------------------------------------------------- */
-
-module_init(flexfb_init);
-module_exit(flexfb_exit);
-
-MODULE_DESCRIPTION("Generic FB driver for TFT LCD displays");
-MODULE_AUTHOR("Noralf Tronnes");
-MODULE_LICENSE("GPL");
diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c
index a62057555d1b..83469061a542 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -261,11 +261,11 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
/* Build the PKO buffer pointer */
hw_buffer.u64 = 0;
if (skb_shinfo(skb)->nr_frags == 0) {
- hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data);
+ hw_buffer.s.addr = XKPHYS_TO_PHYS((uintptr_t)skb->data);
hw_buffer.s.pool = 0;
hw_buffer.s.size = skb->len;
} else {
- hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)skb->data);
+ hw_buffer.s.addr = XKPHYS_TO_PHYS((uintptr_t)skb->data);
hw_buffer.s.pool = 0;
hw_buffer.s.size = skb_headlen(skb);
CVM_OCT_SKB_CB(skb)[0] = hw_buffer.u64;
@@ -273,11 +273,12 @@ int cvm_oct_xmit(struct sk_buff *skb, struct net_device *dev)
skb_frag_t *fs = skb_shinfo(skb)->frags + i;
hw_buffer.s.addr =
- XKPHYS_TO_PHYS((u64)skb_frag_address(fs));
+ XKPHYS_TO_PHYS((uintptr_t)skb_frag_address(fs));
hw_buffer.s.size = skb_frag_size(fs);
CVM_OCT_SKB_CB(skb)[i + 1] = hw_buffer.u64;
}
- hw_buffer.s.addr = XKPHYS_TO_PHYS((u64)CVM_OCT_SKB_CB(skb));
+ hw_buffer.s.addr =
+ XKPHYS_TO_PHYS((uintptr_t)CVM_OCT_SKB_CB(skb));
hw_buffer.s.size = skb_shinfo(skb)->nr_frags + 1;
pko_command.s.segs = skb_shinfo(skb)->nr_frags + 1;
pko_command.s.gather = 1;
diff --git a/drivers/staging/octeon/octeon-stubs.h b/drivers/staging/octeon/octeon-stubs.h
index a4ac3bfb62a8..b78ce9eaab85 100644
--- a/drivers/staging/octeon/octeon-stubs.h
+++ b/drivers/staging/octeon/octeon-stubs.h
@@ -1202,7 +1202,7 @@ static inline int cvmx_wqe_get_grp(cvmx_wqe_t *work)
static inline void *cvmx_phys_to_ptr(uint64_t physical_address)
{
- return (void *)(physical_address);
+ return (void *)(uintptr_t)(physical_address);
}
static inline uint64_t cvmx_ptr_to_phys(void *ptr)
diff --git a/drivers/staging/rtl8188eu/hal/hal8188e_rate_adaptive.c b/drivers/staging/rtl8188eu/hal/hal8188e_rate_adaptive.c
index 9ddd51685063..5792f491b59a 100644
--- a/drivers/staging/rtl8188eu/hal/hal8188e_rate_adaptive.c
+++ b/drivers/staging/rtl8188eu/hal/hal8188e_rate_adaptive.c
@@ -409,7 +409,7 @@ static int odm_ARFBRefresh_8188E(struct odm_dm_struct *dm_odm, struct odm_ra_inf
pRaInfo->PTModeSS = 3;
else if (pRaInfo->HighestRate > 0x0b)
pRaInfo->PTModeSS = 2;
- else if (pRaInfo->HighestRate > 0x0b)
+ else if (pRaInfo->HighestRate > 0x03)
pRaInfo->PTModeSS = 1;
else
pRaInfo->PTModeSS = 0;
diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
index 664d93a7f90d..4fac9dca798e 100644
--- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c
+++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
@@ -348,8 +348,10 @@ static struct adapter *rtw_usb_if1_init(struct dvobj_priv *dvobj,
}
padapter->HalData = kzalloc(sizeof(struct hal_data_8188e), GFP_KERNEL);
- if (!padapter->HalData)
- DBG_88E("cant not alloc memory for HAL DATA\n");
+ if (!padapter->HalData) {
+ DBG_88E("Failed to allocate memory for HAL data\n");
+ goto free_adapter;
+ }
/* step read_chip_version */
rtw_hal_read_chip_version(padapter);
diff --git a/drivers/staging/speakup/sysfs-driver-speakup b/drivers/staging/speakup/sysfs-driver-speakup
new file mode 100644
index 000000000000..be3f5d6962e9
--- /dev/null
+++ b/drivers/staging/speakup/sysfs-driver-speakup
@@ -0,0 +1,369 @@
+What: /sys/accessibility/speakup/attrib_bleep
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Beeps the PC speaker when there is an attribute change such as
+ foreground or background color when using speakup review
+ commands. One = on, zero = off.
+
+What: /sys/accessibility/speakup/bell_pos
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This works much like a typewriter bell. If for example 72 is
+ echoed to bell_pos, it will beep the PC speaker when typing on
+ a line past character 72.
+
+What: /sys/accessibility/speakup/bleeps
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This controls whether one hears beeps through the PC speaker
+ when using speakup's review commands.
+ TODO: what values does it accept?
+
+What: /sys/accessibility/speakup/bleep_time
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This controls the duration of the PC speaker beeps speakup
+ produces.
+ TODO: What are the units? Jiffies?
+
+What: /sys/accessibility/speakup/cursor_time
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This controls cursor delay when using arrow keys. When a
+ connection is very slow, with the default setting, when moving
+ with the arrows, or backspacing etc. speakup says the incorrect
+ characters. Set this to a higher value to adjust for the delay
+ and better synchronisation between cursor position and speech.
+
+What: /sys/accessibility/speakup/delimiters
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Delimit a word from speakup.
+ TODO: add more info
+
+What: /sys/accessibility/speakup/ex_num
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/key_echo
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Controls if speakup speaks keys when they are typed. One = on,
+ zero = off or don't echo keys.
+
+What: /sys/accessibility/speakup/keymap
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Speakup keymap remaps keys to Speakup functions.
+ It uses a binary
+ format. A special program called genmap is needed to compile a
+ textual keymap into the binary format which is then loaded into
+ /sys/accessibility/speakup/keymap.
+
+What: /sys/accessibility/speakup/no_interrupt
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Controls if typing interrupts output from speakup. With
+ no_interrupt set to zero, typing on the keyboard will interrupt
+ speakup if for example
+ the say screen command is used before the
+ entire screen is read.
+ With no_interrupt set to one, if the say
+ screen command is used, and one then types on the keyboard,
+ speakup will continue to say the whole screen regardless until
+ it finishes.
+
+What: /sys/accessibility/speakup/punc_all
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This is a list of all the punctuation speakup should speak when
+ punc_level is set to four.
+
+What: /sys/accessibility/speakup/punc_level
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Controls the level of punctuation spoken as the screen is
+ displayed, not reviewed. Levels range from zero no punctuation,
+ to four, all punctuation. One corresponds to punc_some, two
+ corresponds to punc_most, and three as well as four both
+ correspond to punc_all. Some hardware synthesizers may have
+ different levels each corresponding to three and four for
+ punc_level. Also note that if punc_level is set to zero, and
+ key_echo is set to one, typed punctuation is still spoken as it
+ is typed.
+
+What: /sys/accessibility/speakup/punc_most
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This is a list of all the punctuation speakup should speak when
+ punc_level is set to two.
+
+What: /sys/accessibility/speakup/punc_some
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This is a list of all the punctuation speakup should speak when
+ punc_level is set to one.
+
+What: /sys/accessibility/speakup/reading_punc
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Almost the same as punc_level, the differences being that
+ reading_punc controls the level of punctuation when reviewing
+ the screen with speakup's screen review commands. The other
+ difference is that reading_punc set to three speaks punc_all,
+ and reading_punc set to four speaks all punctuation, including
+ spaces.
+
+What: /sys/accessibility/speakup/repeats
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: A list of characters speakup repeats. Normally, when there are
+ more than three characters in a row, speakup
+ just reads three of
+ those characters. For example, "......" would be read as dot,
+ dot, dot. If a . is added to the list of characters in repeats,
+ "......" would be read as dot, dot, dot, times six.
+
+What: /sys/accessibility/speakup/say_control
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: If set to one, speakup speaks shift, alt and control when those
+ keys are pressed. If say_control is set to zero, shift, ctrl,
+ and alt are not spoken when they are pressed.
+
+What: /sys/accessibility/speakup/say_word_ctl
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/silent
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/spell_delay
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This controls how fast a word is spelled
+ when speakup's say word
+ review command is pressed twice quickly to speak the current
+ word being reviewed. Zero just speaks the letters one after
+ another, while values one through four
+ seem to introduce more of
+ a pause between the spelling of each letter by speakup.
+
+What: /sys/accessibility/speakup/synth
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the synthesizer driver currently in use. Reading
+ synth returns the synthesizer driver currently in use. Writing
+ synth switches to the given synthesizer driver, provided it is
+ either built into the kernel, or already loaded as a module.
+
+What: /sys/accessibility/speakup/synth_direct
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Sends whatever is written to synth_direct
+ directly to the speech synthesizer in use, bypassing speakup.
+ This could be used to make the synthesizer speak
+ a string, or to
+ send control sequences to the synthesizer to change how the
+ synthesizer behaves.
+
+What: /sys/accessibility/speakup/version
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Reading version returns the version of speakup, and the version
+ of the synthesizer driver currently in use.
+
+What: /sys/accessibility/speakup/i18n/announcements
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This file contains various general announcements, most of which
+ cannot be categorized. You will find messages such as "You
+ killed Speakup", "I'm alive", "leaving help", "parked",
+ "unparked", and others. You will also find the names of the
+ screen edges and cursor tracking modes here.
+
+What: /sys/accessibility/speakup/i18n/chartab
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO
+
+What: /sys/accessibility/speakup/i18n/ctl_keys
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Here, you will find names of control keys. These are used with
+ Speakup's say_control feature.
+
+What: /sys/accessibility/speakup/i18n/function_names
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Here, you will find a list of names for Speakup functions.
+ These are used by the help system. For example, suppose that
+ you have activated help mode, and you pressed
+ keypad 3. Speakup
+ says: "keypad 3 is character, say next."
+ The message "character, say next" names a Speakup function, and
+ it comes from this function_names file.
+
+What: /sys/accessibility/speakup/i18n/states
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This file contains names for key states.
+ Again, these are part of the help system. For instance, if you
+ had pressed speakup + keypad 3, you would hear:
+ "speakup keypad 3 is go to bottom edge."
+ The speakup key is depressed, so the name of the key state is
+ speakup.
+ This part of the message comes from the states collection.
+
+What: /sys/accessibility/speakup/i18n/characters
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Through this sys entry, Speakup gives you the ability to change
+ how Speakup pronounces a given character. You could, for
+ example, change how some punctuation characters are spoken. You
+ can even change how Speakup will pronounce certain letters. For
+ further details see '12. Changing the Pronunciation of
+ Characters' in Speakup User's Guide (file spkguide.txt in
+ source).
+
+What: /sys/accessibility/speakup/i18n/colors
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: When you use the "say attributes" function, Speakup says the
+ name of the foreground and background colors. These names come
+ from the i18n/colors file.
+
+What: /sys/accessibility/speakup/i18n/formatted
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This group of messages contains embedded formatting codes, to
+ specify the type and width of displayed data. If you change
+ these, you must preserve all of the formatting codes, and they
+ must appear in the order used by the default messages.
+
+What: /sys/accessibility/speakup/i18n/key_names
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Again, key_names is used by Speakup's help system. In the
+ previous example, Speakup said that you pressed "keypad 3."
+ This name came from the key_names file.
+
+What: /sys/accessibility/speakup/<synth-name>/
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: In `/sys/accessibility/speakup` is a directory corresponding to
+ the synthesizer driver currently in use (E.G) `soft` for the
+ soft driver. This directory contains files which control the
+ speech synthesizer itself,
+ as opposed to controlling the speakup
+ screen reader. The parameters in this directory have the same
+ names and functions across all
+ supported synthesizers. The range
+ of values for freq, pitch, rate, and vol is the same for all
+ supported synthesizers, with the given range being internally
+ mapped by the driver to more or less fit the range of values
+ supported for a given parameter by the individual synthesizer.
+ Below is a description of values and parameters for soft
+ synthesizer, which is currently the most commonly used.
+
+What: /sys/accessibility/speakup/soft/caps_start
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This is the string that is sent to the synthesizer to cause it
+ to start speaking uppercase letters. For the soft synthesizer
+ and most others, this causes the pitch of the voice to rise
+ above the currently set pitch.
+
+What: /sys/accessibility/speakup/soft/caps_stop
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This is the string sent to the synthesizer to cause it to stop
+ speaking uppercase letters. In the case of the soft synthesizer
+ and most others, this returns the pitch of the voice
+ down to the
+ currently set pitch.
+
+What: /sys/accessibility/speakup/soft/delay_time
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/soft/direct
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Controls if punctuation is spoken by speakup, or by the
+ synthesizer.
+ For example, speakup speaks ">" as "greater", while
+ the espeak synthesizer used by the soft driver speaks "greater
+ than". Zero lets speakup speak the punctuation. One lets the
+ synthesizer itself speak punctuation.
+
+What: /sys/accessibility/speakup/soft/freq
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the frequency of the speech synthesizer. Range is
+ 0-9.
+
+What: /sys/accessibility/speakup/soft/full_time
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/soft/jiffy_delta
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: This controls how many jiffys the kernel gives to the
+ synthesizer. Setting this too high can make a system unstable,
+ or even crash it.
+
+What: /sys/accessibility/speakup/soft/pitch
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the pitch of the synthesizer. The range is 0-9.
+
+What: /sys/accessibility/speakup/soft/punct
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the amount of punctuation spoken by the
+ synthesizer. The range for the soft driver seems to be 0-2.
+ TODO: How is this related to speakup's punc_level, or
+ reading_punc.
+
+What: /sys/accessibility/speakup/soft/rate
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the rate of the synthesizer. Range is from zero
+ slowest, to nine fastest.
+
+What: /sys/accessibility/speakup/soft/tone
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the tone of the speech synthesizer. The range for
+ the soft driver seems to be 0-2. This seems to make no
+ difference if using espeak and the espeakup connector.
+ TODO: does espeakup support different tonalities?
+
+What: /sys/accessibility/speakup/soft/trigger_time
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: TODO:
+
+What: /sys/accessibility/speakup/soft/voice
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the voice used by the synthesizer if the
+ synthesizer can speak in more than one voice. The range for the
+ soft driver is 0-7. Note that while espeak supports multiple
+ voices, this parameter will not set the voice when the espeakup
+ connector is used between speakup and espeak.
+
+What: /sys/accessibility/speakup/soft/vol
+KernelVersion: 2.6
+Contact: speakup@linux-speakup.org
+Description: Gets or sets the volume of the speech synthesizer. Range is 0-9,
+ with zero being the softest, and nine being the loudest.
+
diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c
index bc1eaa3a0773..826016c3431a 100644
--- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c
+++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c
@@ -12,7 +12,7 @@
static const struct snd_pcm_hardware snd_bcm2835_playback_hw = {
.info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
- SNDRV_PCM_INFO_DRAIN_TRIGGER | SNDRV_PCM_INFO_SYNC_APPLPTR),
+ SNDRV_PCM_INFO_SYNC_APPLPTR),
.formats = SNDRV_PCM_FMTBIT_U8 | SNDRV_PCM_FMTBIT_S16_LE,
.rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_8000_48000,
.rate_min = 8000,
@@ -29,7 +29,7 @@ static const struct snd_pcm_hardware snd_bcm2835_playback_hw = {
static const struct snd_pcm_hardware snd_bcm2835_playback_spdif_hw = {
.info = (SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BLOCK_TRANSFER |
SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_MMAP_VALID |
- SNDRV_PCM_INFO_DRAIN_TRIGGER | SNDRV_PCM_INFO_SYNC_APPLPTR),
+ SNDRV_PCM_INFO_SYNC_APPLPTR),
.formats = SNDRV_PCM_FMTBIT_S16_LE,
.rates = SNDRV_PCM_RATE_CONTINUOUS | SNDRV_PCM_RATE_44100 |
SNDRV_PCM_RATE_48000,
diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c
index 23fba01107b9..c6f9cf1913d2 100644
--- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c
+++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-vchiq.c
@@ -289,6 +289,7 @@ int bcm2835_audio_stop(struct bcm2835_alsa_stream *alsa_stream)
VC_AUDIO_MSG_TYPE_STOP, false);
}
+/* FIXME: this doesn't seem working as expected for "draining" */
int bcm2835_audio_drain(struct bcm2835_alsa_stream *alsa_stream)
{
struct vc_audio_msg m = {
diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
index c6bb4aaf9bd0..082302944c37 100644
--- a/drivers/staging/vt6655/device_main.c
+++ b/drivers/staging/vt6655/device_main.c
@@ -1748,8 +1748,10 @@ vt6655_probe(struct pci_dev *pcid, const struct pci_device_id *ent)
priv->hw->max_signal = 100;
- if (vnt_init(priv))
+ if (vnt_init(priv)) {
+ device_free_info(priv);
return -ENODEV;
+ }
device_print_info(priv);
pci_set_drvdata(pcid, priv);
diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c
index eee1998c4b18..fac38c842ac5 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c
@@ -469,10 +469,8 @@ static int prism2_connect(struct wiphy *wiphy, struct net_device *dev,
/* Set the encryption - we only support wep */
if (is_wep) {
if (sme->key) {
- if (sme->key_idx >= NUM_WEPKEYS) {
- err = -EINVAL;
- goto exit;
- }
+ if (sme->key_idx >= NUM_WEPKEYS)
+ return -EINVAL;
result = prism2_domibset_uint32(wlandev,
DIDMIB_DOT11SMT_PRIVACYTABLE_WEPDEFAULTKEYID,
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index c70caf4ea490..a2b5c796bbc4 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -1831,7 +1831,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb)
while (credits) {
struct sk_buff *p = cxgbit_sock_peek_wr(csk);
- const u32 csum = (__force u32)p->csum;
+ u32 csum;
if (unlikely(!p)) {
pr_err("csk 0x%p,%u, cr %u,%u+%u, empty.\n",
@@ -1840,6 +1840,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb)
break;
}
+ csum = (__force u32)p->csum;
if (unlikely(credits < csum)) {
pr_warn("csk 0x%p,%u, cr %u,%u+%u, < %u.\n",
csk, csk->tid,
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 04bf2acd3800..2d19f0e332b0 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -1075,27 +1075,6 @@ passthrough_parse_cdb(struct se_cmd *cmd,
unsigned int size;
/*
- * Clear a lun set in the cdb if the initiator talking to use spoke
- * and old standards version, as we can't assume the underlying device
- * won't choke up on it.
- */
- switch (cdb[0]) {
- case READ_10: /* SBC - RDProtect */
- case READ_12: /* SBC - RDProtect */
- case READ_16: /* SBC - RDProtect */
- case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */
- case VERIFY: /* SBC - VRProtect */
- case VERIFY_16: /* SBC - VRProtect */
- case WRITE_VERIFY: /* SBC - VRProtect */
- case WRITE_VERIFY_12: /* SBC - VRProtect */
- case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */
- break;
- default:
- cdb[1] &= 0x1f; /* clear logical unit number */
- break;
- }
-
- /*
* For REPORT LUNS we always need to emulate the response, for everything
* else, pass it up.
*/
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 391f39776c6a..6b9865c786ba 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -88,7 +88,7 @@ struct cpufreq_cooling_device {
struct cpufreq_policy *policy;
struct list_head node;
struct time_in_idle *idle_time;
- struct dev_pm_qos_request qos_req;
+ struct freq_qos_request qos_req;
};
static DEFINE_IDA(cpufreq_ida);
@@ -331,7 +331,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
cpufreq_cdev->cpufreq_state = state;
- return dev_pm_qos_update_request(&cpufreq_cdev->qos_req,
+ return freq_qos_update_request(&cpufreq_cdev->qos_req,
cpufreq_cdev->freq_table[state].frequency);
}
@@ -615,9 +615,9 @@ __cpufreq_cooling_register(struct device_node *np,
cooling_ops = &cpufreq_cooling_ops;
}
- ret = dev_pm_qos_add_request(dev, &cpufreq_cdev->qos_req,
- DEV_PM_QOS_MAX_FREQUENCY,
- cpufreq_cdev->freq_table[0].frequency);
+ ret = freq_qos_add_request(&policy->constraints,
+ &cpufreq_cdev->qos_req, FREQ_QOS_MAX,
+ cpufreq_cdev->freq_table[0].frequency);
if (ret < 0) {
pr_err("%s: Failed to add freq constraint (%d)\n", __func__,
ret);
@@ -637,7 +637,7 @@ __cpufreq_cooling_register(struct device_node *np,
return cdev;
remove_qos_req:
- dev_pm_qos_remove_request(&cpufreq_cdev->qos_req);
+ freq_qos_remove_request(&cpufreq_cdev->qos_req);
remove_ida:
ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
free_table:
@@ -736,7 +736,7 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
mutex_unlock(&cooling_list_lock);
thermal_cooling_device_unregister(cdev);
- dev_pm_qos_remove_request(&cpufreq_cdev->qos_req);
+ freq_qos_remove_request(&cpufreq_cdev->qos_req);
ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
kfree(cpufreq_cdev->idle_time);
kfree(cpufreq_cdev->freq_table);
diff --git a/drivers/thunderbolt/nhi_ops.c b/drivers/thunderbolt/nhi_ops.c
index 61cd09cef943..6795851aac95 100644
--- a/drivers/thunderbolt/nhi_ops.c
+++ b/drivers/thunderbolt/nhi_ops.c
@@ -80,7 +80,6 @@ static void icl_nhi_lc_mailbox_cmd(struct tb_nhi *nhi, enum icl_lc_mailbox_cmd c
{
u32 data;
- pci_read_config_dword(nhi->pdev, VS_CAP_19, &data);
data = (cmd << VS_CAP_19_CMD_SHIFT) & VS_CAP_19_CMD_MASK;
pci_write_config_dword(nhi->pdev, VS_CAP_19, data | VS_CAP_19_VALID);
}
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 410bf1bceeee..5ea8db667e83 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -896,12 +896,13 @@ int tb_dp_port_set_hops(struct tb_port *port, unsigned int video,
*/
bool tb_dp_port_is_enabled(struct tb_port *port)
{
- u32 data;
+ u32 data[2];
- if (tb_port_read(port, &data, TB_CFG_PORT, port->cap_adap, 1))
+ if (tb_port_read(port, data, TB_CFG_PORT, port->cap_adap,
+ ARRAY_SIZE(data)))
return false;
- return !!(data & (TB_DP_VIDEO_EN | TB_DP_AUX_EN));
+ return !!(data[0] & (TB_DP_VIDEO_EN | TB_DP_AUX_EN));
}
/**
@@ -914,19 +915,21 @@ bool tb_dp_port_is_enabled(struct tb_port *port)
*/
int tb_dp_port_enable(struct tb_port *port, bool enable)
{
- u32 data;
+ u32 data[2];
int ret;
- ret = tb_port_read(port, &data, TB_CFG_PORT, port->cap_adap, 1);
+ ret = tb_port_read(port, data, TB_CFG_PORT, port->cap_adap,
+ ARRAY_SIZE(data));
if (ret)
return ret;
if (enable)
- data |= TB_DP_VIDEO_EN | TB_DP_AUX_EN;
+ data[0] |= TB_DP_VIDEO_EN | TB_DP_AUX_EN;
else
- data &= ~(TB_DP_VIDEO_EN | TB_DP_AUX_EN);
+ data[0] &= ~(TB_DP_VIDEO_EN | TB_DP_AUX_EN);
- return tb_port_write(port, &data, TB_CFG_PORT, port->cap_adap, 1);
+ return tb_port_write(port, data, TB_CFG_PORT, port->cap_adap,
+ ARRAY_SIZE(data));
}
/* switch utility functions */
@@ -1031,13 +1034,6 @@ static int tb_switch_set_authorized(struct tb_switch *sw, unsigned int val)
if (sw->authorized)
goto unlock;
- /*
- * Make sure there is no PCIe rescan ongoing when a new PCIe
- * tunnel is created. Otherwise the PCIe rescan code might find
- * the new tunnel too early.
- */
- pci_lock_rescan_remove();
-
switch (val) {
/* Approve switch */
case 1:
@@ -1057,8 +1053,6 @@ static int tb_switch_set_authorized(struct tb_switch *sw, unsigned int val)
break;
}
- pci_unlock_rescan_remove();
-
if (!ret) {
sw->authorized = val;
/* Notify status change to the userspace */
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index e55c79eb6430..98361acd3053 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -968,6 +968,11 @@ static int __init n_hdlc_init(void)
} /* end of init_module() */
+#ifdef CONFIG_SPARC
+#undef __exitdata
+#define __exitdata
+#endif
+
static const char hdlc_unregister_ok[] __exitdata =
KERN_INFO "N_HDLC: line discipline unregistered\n";
static const char hdlc_unregister_fail[] __exitdata =
diff --git a/drivers/tty/serial/8250/8250_men_mcb.c b/drivers/tty/serial/8250/8250_men_mcb.c
index 02c5aff58a74..8df89e9cd254 100644
--- a/drivers/tty/serial/8250/8250_men_mcb.c
+++ b/drivers/tty/serial/8250/8250_men_mcb.c
@@ -72,8 +72,8 @@ static int serial_8250_men_mcb_probe(struct mcb_device *mdev,
{
struct serial_8250_men_mcb_data *data;
struct resource *mem;
- unsigned int num_ports;
- unsigned int i;
+ int num_ports;
+ int i;
void __iomem *membase;
mem = mcb_get_resource(mdev, IORESOURCE_MEM);
@@ -88,7 +88,7 @@ static int serial_8250_men_mcb_probe(struct mcb_device *mdev,
dev_dbg(&mdev->dev, "found a 16z%03u with %u ports\n",
mdev->id, num_ports);
- if (num_ports == 0 || num_ports > 4) {
+ if (num_ports <= 0 || num_ports > 4) {
dev_err(&mdev->dev, "unexpected number of ports: %u\n",
num_ports);
return -ENODEV;
@@ -133,7 +133,7 @@ static int serial_8250_men_mcb_probe(struct mcb_device *mdev,
static void serial_8250_men_mcb_remove(struct mcb_device *mdev)
{
- unsigned int num_ports, i;
+ int num_ports, i;
struct serial_8250_men_mcb_data *data = mcb_get_drvdata(mdev);
if (!data)
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index c68e2b3a1634..836e736ae188 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -141,7 +141,7 @@ static void omap8250_set_mctrl(struct uart_port *port, unsigned int mctrl)
serial8250_do_set_mctrl(port, mctrl);
- if (!up->gpios) {
+ if (!mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_RTS)) {
/*
* Turn off autoRTS if RTS is lowered and restore autoRTS
* setting if RTS is raised
@@ -456,7 +456,8 @@ static void omap_8250_set_termios(struct uart_port *port,
up->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS | UPSTAT_AUTOXOFF);
if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW &&
- !up->gpios) {
+ !mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_RTS) &&
+ !mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_CTS)) {
/* Enable AUTOCTS (autoRTS is enabled when RTS is raised) */
up->port.status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS;
priv->efr |= UART_EFR_CTS;
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 4789b5d62f63..67a9eb3f94ce 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1032,6 +1032,7 @@ config SERIAL_SIFIVE_CONSOLE
bool "Console on SiFive UART"
depends on SERIAL_SIFIVE=y
select SERIAL_CORE_CONSOLE
+ select SERIAL_EARLYCON
help
Select this option if you would like to use a SiFive UART as the
system console.
diff --git a/drivers/tty/serial/fsl_linflexuart.c b/drivers/tty/serial/fsl_linflexuart.c
index 68d74f2b5106..a32f0d2afd59 100644
--- a/drivers/tty/serial/fsl_linflexuart.c
+++ b/drivers/tty/serial/fsl_linflexuart.c
@@ -3,7 +3,7 @@
* Freescale linflexuart serial port driver
*
* Copyright 2012-2016 Freescale Semiconductor, Inc.
- * Copyright 2017-2018 NXP
+ * Copyright 2017-2019 NXP
*/
#if defined(CONFIG_SERIAL_FSL_LINFLEXUART_CONSOLE) && \
@@ -246,12 +246,14 @@ static irqreturn_t linflex_rxint(int irq, void *dev_id)
struct tty_port *port = &sport->state->port;
unsigned long flags, status;
unsigned char rx;
+ bool brk;
spin_lock_irqsave(&sport->lock, flags);
status = readl(sport->membase + UARTSR);
while (status & LINFLEXD_UARTSR_RMB) {
rx = readb(sport->membase + BDRM);
+ brk = false;
flg = TTY_NORMAL;
sport->icount.rx++;
@@ -261,8 +263,11 @@ static irqreturn_t linflex_rxint(int irq, void *dev_id)
status |= LINFLEXD_UARTSR_SZF;
if (status & LINFLEXD_UARTSR_BOF)
status |= LINFLEXD_UARTSR_BOF;
- if (status & LINFLEXD_UARTSR_FEF)
+ if (status & LINFLEXD_UARTSR_FEF) {
+ if (!rx)
+ brk = true;
status |= LINFLEXD_UARTSR_FEF;
+ }
if (status & LINFLEXD_UARTSR_PE)
status |= LINFLEXD_UARTSR_PE;
}
@@ -271,13 +276,15 @@ static irqreturn_t linflex_rxint(int irq, void *dev_id)
sport->membase + UARTSR);
status = readl(sport->membase + UARTSR);
- if (uart_handle_sysrq_char(sport, (unsigned char)rx))
- continue;
-
+ if (brk) {
+ uart_handle_break(sport);
+ } else {
#ifdef SUPPORT_SYSRQ
- sport->sysrq = 0;
+ if (uart_handle_sysrq_char(sport, (unsigned char)rx))
+ continue;
#endif
- tty_insert_flip_char(port, rx, flg);
+ tty_insert_flip_char(port, rx, flg);
+ }
}
spin_unlock_irqrestore(&sport->lock, flags);
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 3e17bb8a0b16..537896c4d887 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -548,7 +548,7 @@ static void lpuart_flush_buffer(struct uart_port *port)
val |= UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH;
lpuart32_write(&sport->port, val, UARTFIFO);
} else {
- val = readb(sport->port.membase + UARTPFIFO);
+ val = readb(sport->port.membase + UARTCFIFO);
val |= UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH;
writeb(val, sport->port.membase + UARTCFIFO);
}
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 87c58f9f6390..5e08f2657b90 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -2222,8 +2222,8 @@ static int imx_uart_probe(struct platform_device *pdev)
return PTR_ERR(base);
rxirq = platform_get_irq(pdev, 0);
- txirq = platform_get_irq(pdev, 1);
- rtsirq = platform_get_irq(pdev, 2);
+ txirq = platform_get_irq_optional(pdev, 1);
+ rtsirq = platform_get_irq_optional(pdev, 2);
sport->port.dev = &pdev->dev;
sport->port.mapbase = res->start;
diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c
index 03963af77b15..d2d8b3494685 100644
--- a/drivers/tty/serial/owl-uart.c
+++ b/drivers/tty/serial/owl-uart.c
@@ -740,7 +740,7 @@ static int __init owl_uart_init(void)
return ret;
}
-static void __init owl_uart_exit(void)
+static void __exit owl_uart_exit(void)
{
platform_driver_unregister(&owl_uart_platform_driver);
uart_unregister_driver(&owl_uart_driver);
diff --git a/drivers/tty/serial/rda-uart.c b/drivers/tty/serial/rda-uart.c
index c1b0d7662ef9..ff9a27d48bca 100644
--- a/drivers/tty/serial/rda-uart.c
+++ b/drivers/tty/serial/rda-uart.c
@@ -815,7 +815,7 @@ static int __init rda_uart_init(void)
return ret;
}
-static void __init rda_uart_exit(void)
+static void __exit rda_uart_exit(void)
{
platform_driver_unregister(&rda_uart_platform_driver);
uart_unregister_driver(&rda_uart_driver);
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 6e713be1d4e9..c4a414a46c7f 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1964,8 +1964,10 @@ uart_get_console(struct uart_port *ports, int nr, struct console *co)
* console=<name>,io|mmio|mmio16|mmio32|mmio32be|mmio32native,<addr>,<options>
*
* The optional form
+ *
* earlycon=<name>,0x<addr>,<options>
* console=<name>,0x<addr>,<options>
+ *
* is also accepted; the returned @iotype will be UPIO_MEM.
*
* Returns 0 on success or -EINVAL on failure
diff --git a/drivers/tty/serial/serial_mctrl_gpio.c b/drivers/tty/serial/serial_mctrl_gpio.c
index d9074303c88e..fb4781292d40 100644
--- a/drivers/tty/serial/serial_mctrl_gpio.c
+++ b/drivers/tty/serial/serial_mctrl_gpio.c
@@ -66,6 +66,9 @@ EXPORT_SYMBOL_GPL(mctrl_gpio_set);
struct gpio_desc *mctrl_gpio_to_gpiod(struct mctrl_gpios *gpios,
enum mctrl_gpio_idx gidx)
{
+ if (gpios == NULL)
+ return NULL;
+
return gpios->gpio[gidx];
}
EXPORT_SYMBOL_GPL(mctrl_gpio_to_gpiod);
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 4e754a4850e6..22e5d4e13714 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2894,8 +2894,12 @@ static int sci_init_single(struct platform_device *dev,
port->mapbase = res->start;
sci_port->reg_size = resource_size(res);
- for (i = 0; i < ARRAY_SIZE(sci_port->irqs); ++i)
- sci_port->irqs[i] = platform_get_irq(dev, i);
+ for (i = 0; i < ARRAY_SIZE(sci_port->irqs); ++i) {
+ if (i)
+ sci_port->irqs[i] = platform_get_irq_optional(dev, i);
+ else
+ sci_port->irqs[i] = platform_get_irq(dev, i);
+ }
/* The SCI generates several interrupts. They can be muxed together or
* connected to different interrupt lines. In the muxed case only one
diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c
index b8b912b5a8b9..06e79c11141d 100644
--- a/drivers/tty/serial/uartlite.c
+++ b/drivers/tty/serial/uartlite.c
@@ -897,7 +897,8 @@ static int __init ulite_init(void)
static void __exit ulite_exit(void)
{
platform_driver_unregister(&ulite_platform_driver);
- uart_unregister_driver(&ulite_uart_driver);
+ if (ulite_uart_driver.state)
+ uart_unregister_driver(&ulite_uart_driver);
}
module_init(ulite_init);
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index da4563aaaf5c..4e55bc327a54 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -1550,7 +1550,6 @@ static int cdns_uart_probe(struct platform_device *pdev)
goto err_out_id;
}
- uartps_major = cdns_uart_uart_driver->tty_driver->major;
cdns_uart_data->cdns_uart_driver = cdns_uart_uart_driver;
/*
@@ -1680,6 +1679,7 @@ static int cdns_uart_probe(struct platform_device *pdev)
console_port = NULL;
#endif
+ uartps_major = cdns_uart_uart_driver->tty_driver->major;
cdns_uart_data->cts_override = of_property_read_bool(pdev->dev.of_node,
"cts-override");
return 0;
@@ -1741,6 +1741,12 @@ static int cdns_uart_remove(struct platform_device *pdev)
console_port = NULL;
#endif
+ /* If this is last instance major number should be initialized */
+ mutex_lock(&bitmap_lock);
+ if (bitmap_empty(bitmap, MAX_UART_INSTANCES))
+ uartps_major = 0;
+ mutex_unlock(&bitmap_lock);
+
uart_unregister_driver(cdns_uart_data->cdns_uart_driver);
return rc;
}
diff --git a/drivers/usb/cdns3/cdns3-pci-wrap.c b/drivers/usb/cdns3/cdns3-pci-wrap.c
index c41ddb61b857..b0a29efe7d31 100644
--- a/drivers/usb/cdns3/cdns3-pci-wrap.c
+++ b/drivers/usb/cdns3/cdns3-pci-wrap.c
@@ -159,8 +159,9 @@ static int cdns3_pci_probe(struct pci_dev *pdev,
wrap->plat_dev = platform_device_register_full(&plat_info);
if (IS_ERR(wrap->plat_dev)) {
pci_disable_device(pdev);
+ err = PTR_ERR(wrap->plat_dev);
kfree(wrap);
- return PTR_ERR(wrap->plat_dev);
+ return err;
}
}
diff --git a/drivers/usb/cdns3/core.c b/drivers/usb/cdns3/core.c
index 06f1e105be4e..c2123ef8d8a3 100644
--- a/drivers/usb/cdns3/core.c
+++ b/drivers/usb/cdns3/core.c
@@ -160,10 +160,30 @@ static int cdns3_core_init_role(struct cdns3 *cdns)
if (ret)
goto err;
- if (cdns->dr_mode != USB_DR_MODE_OTG) {
+ /* Initialize idle role to start with */
+ ret = cdns3_role_start(cdns, USB_ROLE_NONE);
+ if (ret)
+ goto err;
+
+ switch (cdns->dr_mode) {
+ case USB_DR_MODE_OTG:
ret = cdns3_hw_role_switch(cdns);
if (ret)
goto err;
+ break;
+ case USB_DR_MODE_PERIPHERAL:
+ ret = cdns3_role_start(cdns, USB_ROLE_DEVICE);
+ if (ret)
+ goto err;
+ break;
+ case USB_DR_MODE_HOST:
+ ret = cdns3_role_start(cdns, USB_ROLE_HOST);
+ if (ret)
+ goto err;
+ break;
+ default:
+ ret = -EINVAL;
+ goto err;
}
return ret;
diff --git a/drivers/usb/cdns3/ep0.c b/drivers/usb/cdns3/ep0.c
index 44f652e8b5a2..e71240b386b4 100644
--- a/drivers/usb/cdns3/ep0.c
+++ b/drivers/usb/cdns3/ep0.c
@@ -234,9 +234,11 @@ static int cdns3_req_ep0_set_address(struct cdns3_device *priv_dev,
static int cdns3_req_ep0_get_status(struct cdns3_device *priv_dev,
struct usb_ctrlrequest *ctrl)
{
+ struct cdns3_endpoint *priv_ep;
__le16 *response_pkt;
u16 usb_status = 0;
u32 recip;
+ u8 index;
recip = ctrl->bRequestType & USB_RECIP_MASK;
@@ -262,9 +264,13 @@ static int cdns3_req_ep0_get_status(struct cdns3_device *priv_dev,
case USB_RECIP_INTERFACE:
return cdns3_ep0_delegate_req(priv_dev, ctrl);
case USB_RECIP_ENDPOINT:
- /* check if endpoint is stalled */
+ index = cdns3_ep_addr_to_index(ctrl->wIndex);
+ priv_ep = priv_dev->eps[index];
+
+ /* check if endpoint is stalled or stall is pending */
cdns3_select_ep(priv_dev, ctrl->wIndex);
- if (EP_STS_STALL(readl(&priv_dev->regs->ep_sts)))
+ if (EP_STS_STALL(readl(&priv_dev->regs->ep_sts)) ||
+ (priv_ep->flags & EP_STALL_PENDING))
usb_status = BIT(USB_ENDPOINT_HALT);
break;
default:
@@ -332,7 +338,7 @@ static int cdns3_ep0_feature_handle_device(struct cdns3_device *priv_dev,
* for sending status stage.
* This time should be less then 3ms.
*/
- usleep_range(1000, 2000);
+ mdelay(1);
cdns3_set_register_bit(&priv_dev->regs->usb_cmd,
USB_CMD_STMODE |
USB_STS_TMODE_SEL(tmode - 1));
diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c
index 228cdc4ab886..4c1e75509303 100644
--- a/drivers/usb/cdns3/gadget.c
+++ b/drivers/usb/cdns3/gadget.c
@@ -1145,6 +1145,14 @@ static void cdns3_transfer_completed(struct cdns3_device *priv_dev,
request = cdns3_next_request(&priv_ep->pending_req_list);
priv_req = to_cdns3_request(request);
+ trb = priv_ep->trb_pool + priv_ep->dequeue;
+
+ /* Request was dequeued and TRB was changed to TRB_LINK. */
+ if (TRB_FIELD_TO_TYPE(trb->control) == TRB_LINK) {
+ trace_cdns3_complete_trb(priv_ep, trb);
+ cdns3_move_deq_to_next_trb(priv_req);
+ }
+
/* Re-select endpoint. It could be changed by other CPU during
* handling usb_gadget_giveback_request.
*/
@@ -2067,6 +2075,7 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep,
struct usb_request *req, *req_temp;
struct cdns3_request *priv_req;
struct cdns3_trb *link_trb;
+ u8 req_on_hw_ring = 0;
unsigned long flags;
int ret = 0;
@@ -2083,8 +2092,10 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep,
list_for_each_entry_safe(req, req_temp, &priv_ep->pending_req_list,
list) {
- if (request == req)
+ if (request == req) {
+ req_on_hw_ring = 1;
goto found;
+ }
}
list_for_each_entry_safe(req, req_temp, &priv_ep->deferred_req_list,
@@ -2096,27 +2107,21 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep,
goto not_found;
found:
-
- if (priv_ep->wa1_trb == priv_req->trb)
- cdns3_wa1_restore_cycle_bit(priv_ep);
-
link_trb = priv_req->trb;
- cdns3_move_deq_to_next_trb(priv_req);
- cdns3_gadget_giveback(priv_ep, priv_req, -ECONNRESET);
-
- /* Update ring */
- request = cdns3_next_request(&priv_ep->deferred_req_list);
- if (request) {
- priv_req = to_cdns3_request(request);
+ /* Update ring only if removed request is on pending_req_list list */
+ if (req_on_hw_ring) {
link_trb->buffer = TRB_BUFFER(priv_ep->trb_pool_dma +
(priv_req->start_trb * TRB_SIZE));
link_trb->control = (link_trb->control & TRB_CYCLE) |
- TRB_TYPE(TRB_LINK) | TRB_CHAIN | TRB_TOGGLE;
- } else {
- priv_ep->flags |= EP_UPDATE_EP_TRBADDR;
+ TRB_TYPE(TRB_LINK) | TRB_CHAIN;
+
+ if (priv_ep->wa1_trb == priv_req->trb)
+ cdns3_wa1_restore_cycle_bit(priv_ep);
}
+ cdns3_gadget_giveback(priv_ep, priv_req, -ECONNRESET);
+
not_found:
spin_unlock_irqrestore(&priv_dev->lock, flags);
return ret;
@@ -2324,8 +2329,6 @@ static void cdns3_gadget_config(struct cdns3_device *priv_dev)
writel(USB_CONF_CLK2OFFDS | USB_CONF_L1DS, &regs->usb_conf);
cdns3_configure_dmult(priv_dev, NULL);
-
- cdns3_gadget_pullup(&priv_dev->gadget, 1);
}
/**
@@ -2340,9 +2343,35 @@ static int cdns3_gadget_udc_start(struct usb_gadget *gadget,
{
struct cdns3_device *priv_dev = gadget_to_cdns3_device(gadget);
unsigned long flags;
+ enum usb_device_speed max_speed = driver->max_speed;
spin_lock_irqsave(&priv_dev->lock, flags);
priv_dev->gadget_driver = driver;
+
+ /* limit speed if necessary */
+ max_speed = min(driver->max_speed, gadget->max_speed);
+
+ switch (max_speed) {
+ case USB_SPEED_FULL:
+ writel(USB_CONF_SFORCE_FS, &priv_dev->regs->usb_conf);
+ writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf);
+ break;
+ case USB_SPEED_HIGH:
+ writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf);
+ break;
+ case USB_SPEED_SUPER:
+ break;
+ default:
+ dev_err(priv_dev->dev,
+ "invalid maximum_speed parameter %d\n",
+ max_speed);
+ /* fall through */
+ case USB_SPEED_UNKNOWN:
+ /* default to superspeed */
+ max_speed = USB_SPEED_SUPER;
+ break;
+ }
+
cdns3_gadget_config(priv_dev);
spin_unlock_irqrestore(&priv_dev->lock, flags);
return 0;
@@ -2376,6 +2405,8 @@ static int cdns3_gadget_udc_stop(struct usb_gadget *gadget)
writel(EP_CMD_EPRST, &priv_dev->regs->ep_cmd);
readl_poll_timeout_atomic(&priv_dev->regs->ep_cmd, val,
!(val & EP_CMD_EPRST), 1, 100);
+
+ priv_ep->flags &= ~EP_CLAIMED;
}
/* disable interrupt for device */
@@ -2570,11 +2601,7 @@ static int cdns3_gadget_start(struct cdns3 *cdns)
/* Check the maximum_speed parameter */
switch (max_speed) {
case USB_SPEED_FULL:
- writel(USB_CONF_SFORCE_FS, &priv_dev->regs->usb_conf);
- break;
case USB_SPEED_HIGH:
- writel(USB_CONF_USB3DIS, &priv_dev->regs->usb_conf);
- break;
case USB_SPEED_SUPER:
break;
default:
@@ -2662,6 +2689,13 @@ static int __cdns3_gadget_init(struct cdns3 *cdns)
{
int ret = 0;
+ /* Ensure 32-bit DMA Mask in case we switched back from Host mode */
+ ret = dma_set_mask_and_coherent(cdns->dev, DMA_BIT_MASK(32));
+ if (ret) {
+ dev_err(cdns->dev, "Failed to set dma mask: %d\n", ret);
+ return ret;
+ }
+
cdns3_drd_switch_gadget(cdns, 1);
pm_runtime_get_sync(cdns->dev);
@@ -2700,8 +2734,6 @@ static int cdns3_gadget_suspend(struct cdns3 *cdns, bool do_wakeup)
/* disable interrupt for device */
writel(0, &priv_dev->regs->usb_ien);
- cdns3_gadget_pullup(&priv_dev->gadget, 0);
-
return 0;
}
diff --git a/drivers/usb/cdns3/host-export.h b/drivers/usb/cdns3/host-export.h
index b498a170b7e8..ae11810f8826 100644
--- a/drivers/usb/cdns3/host-export.h
+++ b/drivers/usb/cdns3/host-export.h
@@ -12,7 +12,6 @@
#ifdef CONFIG_USB_CDNS3_HOST
int cdns3_host_init(struct cdns3 *cdns);
-void cdns3_host_exit(struct cdns3 *cdns);
#else
diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c
index 2733a8f71fcd..ad788bf3fe4f 100644
--- a/drivers/usb/cdns3/host.c
+++ b/drivers/usb/cdns3/host.c
@@ -12,6 +12,7 @@
#include <linux/platform_device.h>
#include "core.h"
#include "drd.h"
+#include "host-export.h"
static int __cdns3_host_init(struct cdns3 *cdns)
{
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 7fea4999d352..0d8e3f3804a3 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -445,6 +445,7 @@ static void usblp_cleanup(struct usblp *usblp)
kfree(usblp->readbuf);
kfree(usblp->device_id_string);
kfree(usblp->statusbuf);
+ usb_put_intf(usblp->intf);
kfree(usblp);
}
@@ -461,10 +462,12 @@ static int usblp_release(struct inode *inode, struct file *file)
mutex_lock(&usblp_mutex);
usblp->used = 0;
- if (usblp->present) {
+ if (usblp->present)
usblp_unlink_urbs(usblp);
- usb_autopm_put_interface(usblp->intf);
- } else /* finish cleanup from disconnect */
+
+ usb_autopm_put_interface(usblp->intf);
+
+ if (!usblp->present) /* finish cleanup from disconnect */
usblp_cleanup(usblp);
mutex_unlock(&usblp_mutex);
return 0;
@@ -1111,7 +1114,7 @@ static int usblp_probe(struct usb_interface *intf,
init_waitqueue_head(&usblp->wwait);
init_usb_anchor(&usblp->urbs);
usblp->ifnum = intf->cur_altsetting->desc.bInterfaceNumber;
- usblp->intf = intf;
+ usblp->intf = usb_get_intf(intf);
/* Malloc device ID string buffer to the largest expected length,
* since we can re-query it on an ioctl and a dynamic string
@@ -1196,6 +1199,7 @@ abort:
kfree(usblp->readbuf);
kfree(usblp->statusbuf);
kfree(usblp->device_id_string);
+ usb_put_intf(usblp->intf);
kfree(usblp);
abort_ret:
return retval;
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 151a74a54386..1ac1095bfeac 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -348,6 +348,11 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
/* Validate the wMaxPacketSize field */
maxp = usb_endpoint_maxp(&endpoint->desc);
+ if (maxp == 0) {
+ dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has wMaxPacketSize 0, skipping\n",
+ cfgno, inum, asnum, d->bEndpointAddress);
+ goto skip_to_next_endpoint_or_interface_descriptor;
+ }
/* Find the highest legal maxpacket size for this endpoint */
i = 0; /* additional transactions per microframe */
diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index 89abc6078703..556a876c7896 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -102,6 +102,7 @@ config USB_DWC3_MESON_G12A
depends on ARCH_MESON || COMPILE_TEST
default USB_DWC3
select USB_ROLE_SWITCH
+ select REGMAP_MMIO
help
Support USB2/3 functionality in Amlogic G12A platforms.
Say 'Y' or 'M' if you have one such device.
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 999ce5e84d3c..97d6ae3c4df2 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -312,8 +312,7 @@ static void dwc3_frame_length_adjustment(struct dwc3 *dwc)
reg = dwc3_readl(dwc->regs, DWC3_GFLADJ);
dft = reg & DWC3_GFLADJ_30MHZ_MASK;
- if (!dev_WARN_ONCE(dwc->dev, dft == dwc->fladj,
- "request value same as default, ignoring\n")) {
+ if (dft != dwc->fladj) {
reg &= ~DWC3_GFLADJ_30MHZ_MASK;
reg |= DWC3_GFLADJ_30MHZ_SDBND_SEL | dwc->fladj;
dwc3_writel(dwc->regs, DWC3_GFLADJ, reg);
diff --git a/drivers/usb/dwc3/drd.c b/drivers/usb/dwc3/drd.c
index 726100d1ac0d..c946d64142ad 100644
--- a/drivers/usb/dwc3/drd.c
+++ b/drivers/usb/dwc3/drd.c
@@ -139,14 +139,14 @@ static int dwc3_otg_get_irq(struct dwc3 *dwc)
struct platform_device *dwc3_pdev = to_platform_device(dwc->dev);
int irq;
- irq = platform_get_irq_byname(dwc3_pdev, "otg");
+ irq = platform_get_irq_byname_optional(dwc3_pdev, "otg");
if (irq > 0)
goto out;
if (irq == -EPROBE_DEFER)
goto out;
- irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3");
+ irq = platform_get_irq_byname_optional(dwc3_pdev, "dwc_usb3");
if (irq > 0)
goto out;
@@ -157,9 +157,6 @@ static int dwc3_otg_get_irq(struct dwc3 *dwc)
if (irq > 0)
goto out;
- if (irq != -EPROBE_DEFER)
- dev_err(dwc->dev, "missing OTG IRQ\n");
-
if (!irq)
irq = -EINVAL;
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 5e8e18222f92..023f0357efd7 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -258,7 +258,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
ret = platform_device_add_properties(dwc->dwc3, p);
if (ret < 0)
- return ret;
+ goto err;
ret = dwc3_pci_quirks(dwc);
if (ret)
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 8adb59f8e4f1..a9aba716bf80 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -707,6 +707,12 @@ static void dwc3_remove_requests(struct dwc3 *dwc, struct dwc3_ep *dep)
dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
}
+
+ while (!list_empty(&dep->cancelled_list)) {
+ req = next_request(&dep->cancelled_list);
+
+ dwc3_gadget_giveback(dep, req, -ESHUTDOWN);
+ }
}
/**
@@ -3264,14 +3270,14 @@ static int dwc3_gadget_get_irq(struct dwc3 *dwc)
struct platform_device *dwc3_pdev = to_platform_device(dwc->dev);
int irq;
- irq = platform_get_irq_byname(dwc3_pdev, "peripheral");
+ irq = platform_get_irq_byname_optional(dwc3_pdev, "peripheral");
if (irq > 0)
goto out;
if (irq == -EPROBE_DEFER)
goto out;
- irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3");
+ irq = platform_get_irq_byname_optional(dwc3_pdev, "dwc_usb3");
if (irq > 0)
goto out;
@@ -3282,9 +3288,6 @@ static int dwc3_gadget_get_irq(struct dwc3 *dwc)
if (irq > 0)
goto out;
- if (irq != -EPROBE_DEFER)
- dev_err(dwc->dev, "missing peripheral IRQ\n");
-
if (!irq)
irq = -EINVAL;
diff --git a/drivers/usb/dwc3/host.c b/drivers/usb/dwc3/host.c
index 8deea8c91e03..5567ed2cddbe 100644
--- a/drivers/usb/dwc3/host.c
+++ b/drivers/usb/dwc3/host.c
@@ -16,14 +16,14 @@ static int dwc3_host_get_irq(struct dwc3 *dwc)
struct platform_device *dwc3_pdev = to_platform_device(dwc->dev);
int irq;
- irq = platform_get_irq_byname(dwc3_pdev, "host");
+ irq = platform_get_irq_byname_optional(dwc3_pdev, "host");
if (irq > 0)
goto out;
if (irq == -EPROBE_DEFER)
goto out;
- irq = platform_get_irq_byname(dwc3_pdev, "dwc_usb3");
+ irq = platform_get_irq_byname_optional(dwc3_pdev, "dwc_usb3");
if (irq > 0)
goto out;
@@ -34,9 +34,6 @@ static int dwc3_host_get_irq(struct dwc3 *dwc)
if (irq > 0)
goto out;
- if (irq != -EPROBE_DEFER)
- dev_err(dwc->dev, "missing host IRQ\n");
-
if (!irq)
irq = -EINVAL;
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index d516e8d6cd7f..5ec54b69c29c 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -2170,14 +2170,18 @@ void composite_dev_cleanup(struct usb_composite_dev *cdev)
usb_ep_dequeue(cdev->gadget->ep0, cdev->os_desc_req);
kfree(cdev->os_desc_req->buf);
+ cdev->os_desc_req->buf = NULL;
usb_ep_free_request(cdev->gadget->ep0, cdev->os_desc_req);
+ cdev->os_desc_req = NULL;
}
if (cdev->req) {
if (cdev->setup_pending)
usb_ep_dequeue(cdev->gadget->ep0, cdev->req);
kfree(cdev->req->buf);
+ cdev->req->buf = NULL;
usb_ep_free_request(cdev->gadget->ep0, cdev->req);
+ cdev->req = NULL;
}
cdev->next_string_id = 0;
device_remove_file(&cdev->gadget->dev, &dev_attr_suspended);
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 025129942894..33852c2b29d1 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -61,6 +61,8 @@ struct gadget_info {
bool use_os_desc;
char b_vendor_code;
char qw_sign[OS_STRING_QW_SIGN_LEN];
+ spinlock_t spinlock;
+ bool unbind;
};
static inline struct gadget_info *to_gadget_info(struct config_item *item)
@@ -1244,6 +1246,7 @@ static int configfs_composite_bind(struct usb_gadget *gadget,
int ret;
/* the gi->lock is hold by the caller */
+ gi->unbind = 0;
cdev->gadget = gadget;
set_gadget_data(gadget, cdev);
ret = composite_dev_prepare(composite, cdev);
@@ -1376,31 +1379,128 @@ static void configfs_composite_unbind(struct usb_gadget *gadget)
{
struct usb_composite_dev *cdev;
struct gadget_info *gi;
+ unsigned long flags;
/* the gi->lock is hold by the caller */
cdev = get_gadget_data(gadget);
gi = container_of(cdev, struct gadget_info, cdev);
+ spin_lock_irqsave(&gi->spinlock, flags);
+ gi->unbind = 1;
+ spin_unlock_irqrestore(&gi->spinlock, flags);
kfree(otg_desc[0]);
otg_desc[0] = NULL;
purge_configs_funcs(gi);
composite_dev_cleanup(cdev);
usb_ep_autoconfig_reset(cdev->gadget);
+ spin_lock_irqsave(&gi->spinlock, flags);
cdev->gadget = NULL;
set_gadget_data(gadget, NULL);
+ spin_unlock_irqrestore(&gi->spinlock, flags);
+}
+
+static int configfs_composite_setup(struct usb_gadget *gadget,
+ const struct usb_ctrlrequest *ctrl)
+{
+ struct usb_composite_dev *cdev;
+ struct gadget_info *gi;
+ unsigned long flags;
+ int ret;
+
+ cdev = get_gadget_data(gadget);
+ if (!cdev)
+ return 0;
+
+ gi = container_of(cdev, struct gadget_info, cdev);
+ spin_lock_irqsave(&gi->spinlock, flags);
+ cdev = get_gadget_data(gadget);
+ if (!cdev || gi->unbind) {
+ spin_unlock_irqrestore(&gi->spinlock, flags);
+ return 0;
+ }
+
+ ret = composite_setup(gadget, ctrl);
+ spin_unlock_irqrestore(&gi->spinlock, flags);
+ return ret;
+}
+
+static void configfs_composite_disconnect(struct usb_gadget *gadget)
+{
+ struct usb_composite_dev *cdev;
+ struct gadget_info *gi;
+ unsigned long flags;
+
+ cdev = get_gadget_data(gadget);
+ if (!cdev)
+ return;
+
+ gi = container_of(cdev, struct gadget_info, cdev);
+ spin_lock_irqsave(&gi->spinlock, flags);
+ cdev = get_gadget_data(gadget);
+ if (!cdev || gi->unbind) {
+ spin_unlock_irqrestore(&gi->spinlock, flags);
+ return;
+ }
+
+ composite_disconnect(gadget);
+ spin_unlock_irqrestore(&gi->spinlock, flags);
+}
+
+static void configfs_composite_suspend(struct usb_gadget *gadget)
+{
+ struct usb_composite_dev *cdev;
+ struct gadget_info *gi;
+ unsigned long flags;
+
+ cdev = get_gadget_data(gadget);
+ if (!cdev)
+ return;
+
+ gi = container_of(cdev, struct gadget_info, cdev);
+ spin_lock_irqsave(&gi->spinlock, flags);
+ cdev = get_gadget_data(gadget);
+ if (!cdev || gi->unbind) {
+ spin_unlock_irqrestore(&gi->spinlock, flags);
+ return;
+ }
+
+ composite_suspend(gadget);
+ spin_unlock_irqrestore(&gi->spinlock, flags);
+}
+
+static void configfs_composite_resume(struct usb_gadget *gadget)
+{
+ struct usb_composite_dev *cdev;
+ struct gadget_info *gi;
+ unsigned long flags;
+
+ cdev = get_gadget_data(gadget);
+ if (!cdev)
+ return;
+
+ gi = container_of(cdev, struct gadget_info, cdev);
+ spin_lock_irqsave(&gi->spinlock, flags);
+ cdev = get_gadget_data(gadget);
+ if (!cdev || gi->unbind) {
+ spin_unlock_irqrestore(&gi->spinlock, flags);
+ return;
+ }
+
+ composite_resume(gadget);
+ spin_unlock_irqrestore(&gi->spinlock, flags);
}
static const struct usb_gadget_driver configfs_driver_template = {
.bind = configfs_composite_bind,
.unbind = configfs_composite_unbind,
- .setup = composite_setup,
- .reset = composite_disconnect,
- .disconnect = composite_disconnect,
+ .setup = configfs_composite_setup,
+ .reset = configfs_composite_disconnect,
+ .disconnect = configfs_composite_disconnect,
- .suspend = composite_suspend,
- .resume = composite_resume,
+ .suspend = configfs_composite_suspend,
+ .resume = configfs_composite_resume,
.max_speed = USB_SPEED_SUPER,
.driver = {
diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index d7e611645533..d354036ff6c8 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -45,7 +45,7 @@ config USB_AT91
config USB_LPC32XX
tristate "LPC32XX USB Peripheral Controller"
- depends on ARCH_LPC32XX
+ depends on ARCH_LPC32XX || COMPILE_TEST
depends on I2C
select USB_ISP1301
help
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index 86ffc8307864..1d0d8952a74b 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -449,9 +449,11 @@ static void submit_request(struct usba_ep *ep, struct usba_request *req)
next_fifo_transaction(ep, req);
if (req->last_transaction) {
usba_ep_writel(ep, CTL_DIS, USBA_TX_PK_RDY);
- usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE);
+ if (ep_is_control(ep))
+ usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE);
} else {
- usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE);
+ if (ep_is_control(ep))
+ usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE);
usba_ep_writel(ep, CTL_ENB, USBA_TX_PK_RDY);
}
}
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 92af8dc98c3d..51fa614b4079 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -98,6 +98,17 @@ int usb_ep_enable(struct usb_ep *ep)
if (ep->enabled)
goto out;
+ /* UDC drivers can't handle endpoints with maxpacket size 0 */
+ if (usb_endpoint_maxp(ep->desc) == 0) {
+ /*
+ * We should log an error message here, but we can't call
+ * dev_err() because there's no way to find the gadget
+ * given only ep.
+ */
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = ep->ops->enable(ep, ep->desc);
if (ret)
goto out;
diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index 8414fac74493..3d499d93c083 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -48,6 +48,7 @@
#define DRIVER_VERSION "02 May 2005"
#define POWER_BUDGET 500 /* in mA; use 8 for low-power port testing */
+#define POWER_BUDGET_3 900 /* in mA */
static const char driver_name[] = "dummy_hcd";
static const char driver_desc[] = "USB Host+Gadget Emulator";
@@ -2432,7 +2433,7 @@ static int dummy_start_ss(struct dummy_hcd *dum_hcd)
dum_hcd->rh_state = DUMMY_RH_RUNNING;
dum_hcd->stream_en_ep = 0;
INIT_LIST_HEAD(&dum_hcd->urbp_list);
- dummy_hcd_to_hcd(dum_hcd)->power_budget = POWER_BUDGET;
+ dummy_hcd_to_hcd(dum_hcd)->power_budget = POWER_BUDGET_3;
dummy_hcd_to_hcd(dum_hcd)->state = HC_STATE_RUNNING;
dummy_hcd_to_hcd(dum_hcd)->uses_new_polling = 1;
#ifdef CONFIG_USB_OTG
diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index 20141c3096f6..9a05863b2876 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -2576,7 +2576,7 @@ static int fsl_udc_remove(struct platform_device *pdev)
dma_pool_destroy(udc_controller->td_pool);
free_irq(udc_controller->irq, udc_controller);
iounmap(dr_regs);
- if (pdata->operating_mode == FSL_USB2_DR_DEVICE)
+ if (res && (pdata->operating_mode == FSL_USB2_DR_DEVICE))
release_mem_region(res->start, resource_size(res));
/* free udc --wait for the release() finished */
diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c
index b3e073fb88c6..bf6c81e2f8cc 100644
--- a/drivers/usb/gadget/udc/lpc32xx_udc.c
+++ b/drivers/usb/gadget/udc/lpc32xx_udc.c
@@ -1151,7 +1151,7 @@ static void udc_pop_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes)
u32 *p32, tmp, cbytes;
/* Use optimal data transfer method based on source address and size */
- switch (((u32) data) & 0x3) {
+ switch (((uintptr_t) data) & 0x3) {
case 0: /* 32-bit aligned */
p32 = (u32 *) data;
cbytes = (bytes & ~0x3);
@@ -1177,11 +1177,11 @@ static void udc_pop_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes)
tmp = readl(USBD_RXDATA(udc->udp_baseaddr));
bl = bytes - n;
- if (bl > 3)
- bl = 3;
+ if (bl > 4)
+ bl = 4;
for (i = 0; i < bl; i++)
- data[n + i] = (u8) ((tmp >> (n * 8)) & 0xFF);
+ data[n + i] = (u8) ((tmp >> (i * 8)) & 0xFF);
}
break;
@@ -1252,7 +1252,7 @@ static void udc_stuff_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes)
u32 *p32, tmp, cbytes;
/* Use optimal data transfer method based on source address and size */
- switch (((u32) data) & 0x3) {
+ switch (((uintptr_t) data) & 0x3) {
case 0: /* 32-bit aligned */
p32 = (u32 *) data;
cbytes = (bytes & ~0x3);
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index e098f16c01cb..33703140233a 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -1544,10 +1544,10 @@ static void usb3_set_device_address(struct renesas_usb3 *usb3, u16 addr)
static bool usb3_std_req_set_address(struct renesas_usb3 *usb3,
struct usb_ctrlrequest *ctrl)
{
- if (ctrl->wValue >= 128)
+ if (le16_to_cpu(ctrl->wValue) >= 128)
return true; /* stall */
- usb3_set_device_address(usb3, ctrl->wValue);
+ usb3_set_device_address(usb3, le16_to_cpu(ctrl->wValue));
usb3_set_p0_con_for_no_data(usb3);
return false;
@@ -1582,6 +1582,7 @@ static bool usb3_std_req_get_status(struct renesas_usb3 *usb3,
struct renesas_usb3_ep *usb3_ep;
int num;
u16 status = 0;
+ __le16 tx_data;
switch (ctrl->bRequestType & USB_RECIP_MASK) {
case USB_RECIP_DEVICE:
@@ -1604,10 +1605,10 @@ static bool usb3_std_req_get_status(struct renesas_usb3 *usb3,
}
if (!stall) {
- status = cpu_to_le16(status);
+ tx_data = cpu_to_le16(status);
dev_dbg(usb3_to_dev(usb3), "get_status: req = %p\n",
usb_req_to_usb3_req(usb3->ep0_req));
- usb3_pipe0_internal_xfer(usb3, &status, sizeof(status),
+ usb3_pipe0_internal_xfer(usb3, &tx_data, sizeof(tx_data),
usb3_pipe0_get_status_completion);
}
@@ -1772,7 +1773,7 @@ static bool usb3_std_req_set_sel(struct renesas_usb3 *usb3,
static bool usb3_std_req_set_configuration(struct renesas_usb3 *usb3,
struct usb_ctrlrequest *ctrl)
{
- if (ctrl->wValue > 0)
+ if (le16_to_cpu(ctrl->wValue) > 0)
usb3_set_bit(usb3, USB_COM_CON_CONF, USB3_USB_COM_CON);
else
usb3_clear_bit(usb3, USB_COM_CON_CONF, USB3_USB_COM_CON);
diff --git a/drivers/usb/host/xhci-debugfs.c b/drivers/usb/host/xhci-debugfs.c
index 7ba6afc7ef23..76c3f29562d2 100644
--- a/drivers/usb/host/xhci-debugfs.c
+++ b/drivers/usb/host/xhci-debugfs.c
@@ -202,10 +202,10 @@ static void xhci_ring_dump_segment(struct seq_file *s,
trb = &seg->trbs[i];
dma = seg->dma + i * sizeof(*trb);
seq_printf(s, "%pad: %s\n", &dma,
- xhci_decode_trb(trb->generic.field[0],
- trb->generic.field[1],
- trb->generic.field[2],
- trb->generic.field[3]));
+ xhci_decode_trb(le32_to_cpu(trb->generic.field[0]),
+ le32_to_cpu(trb->generic.field[1]),
+ le32_to_cpu(trb->generic.field[2]),
+ le32_to_cpu(trb->generic.field[3])));
}
}
@@ -263,10 +263,10 @@ static int xhci_slot_context_show(struct seq_file *s, void *unused)
xhci = hcd_to_xhci(bus_to_hcd(dev->udev->bus));
slot_ctx = xhci_get_slot_ctx(xhci, dev->out_ctx);
seq_printf(s, "%pad: %s\n", &dev->out_ctx->dma,
- xhci_decode_slot_context(slot_ctx->dev_info,
- slot_ctx->dev_info2,
- slot_ctx->tt_info,
- slot_ctx->dev_state));
+ xhci_decode_slot_context(le32_to_cpu(slot_ctx->dev_info),
+ le32_to_cpu(slot_ctx->dev_info2),
+ le32_to_cpu(slot_ctx->tt_info),
+ le32_to_cpu(slot_ctx->dev_state)));
return 0;
}
@@ -286,10 +286,10 @@ static int xhci_endpoint_context_show(struct seq_file *s, void *unused)
ep_ctx = xhci_get_ep_ctx(xhci, dev->out_ctx, dci);
dma = dev->out_ctx->dma + dci * CTX_SIZE(xhci->hcc_params);
seq_printf(s, "%pad: %s\n", &dma,
- xhci_decode_ep_context(ep_ctx->ep_info,
- ep_ctx->ep_info2,
- ep_ctx->deq,
- ep_ctx->tx_info));
+ xhci_decode_ep_context(le32_to_cpu(ep_ctx->ep_info),
+ le32_to_cpu(ep_ctx->ep_info2),
+ le64_to_cpu(ep_ctx->deq),
+ le32_to_cpu(ep_ctx->tx_info)));
}
return 0;
diff --git a/drivers/usb/host/xhci-ext-caps.c b/drivers/usb/host/xhci-ext-caps.c
index f498160df969..3351d07c431f 100644
--- a/drivers/usb/host/xhci-ext-caps.c
+++ b/drivers/usb/host/xhci-ext-caps.c
@@ -57,6 +57,7 @@ static int xhci_create_intel_xhci_sw_pdev(struct xhci_hcd *xhci, u32 cap_offset)
ret = platform_device_add_properties(pdev, role_switch_props);
if (ret) {
dev_err(dev, "failed to register device properties\n");
+ platform_device_put(pdev);
return ret;
}
}
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 9741cdeea9d7..e7aab31fd9a5 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3202,10 +3202,10 @@ static int xhci_align_td(struct xhci_hcd *xhci, struct urb *urb, u32 enqd_len,
if (usb_urb_dir_out(urb)) {
len = sg_pcopy_to_buffer(urb->sg, urb->num_sgs,
seg->bounce_buf, new_buff_len, enqd_len);
- if (len != seg->bounce_len)
+ if (len != new_buff_len)
xhci_warn(xhci,
"WARN Wrong bounce buffer write length: %zu != %d\n",
- len, seg->bounce_len);
+ len, new_buff_len);
seg->bounce_dma = dma_map_single(dev, seg->bounce_buf,
max_pkt, DMA_TO_DEVICE);
} else {
@@ -3330,6 +3330,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
if (xhci_urb_suitable_for_idt(urb)) {
memcpy(&send_addr, urb->transfer_buffer,
trb_buff_len);
+ le64_to_cpus(&send_addr);
field |= TRB_IDT;
}
}
@@ -3475,6 +3476,7 @@ int xhci_queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
if (xhci_urb_suitable_for_idt(urb)) {
memcpy(&addr, urb->transfer_buffer,
urb->transfer_buffer_length);
+ le64_to_cpus(&addr);
field |= TRB_IDT;
} else {
addr = (u64) urb->transfer_dma;
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 500865975687..6c17e3fe181a 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1032,7 +1032,7 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup)
writel(command, &xhci->op_regs->command);
xhci->broken_suspend = 0;
if (xhci_handshake(&xhci->op_regs->status,
- STS_SAVE, 0, 10 * 1000)) {
+ STS_SAVE, 0, 20 * 1000)) {
/*
* AMD SNPS xHC 3.0 occasionally does not clear the
* SSS bit of USBSTS and when driver tries to poll
@@ -1108,6 +1108,18 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
hibernated = true;
if (!hibernated) {
+ /*
+ * Some controllers might lose power during suspend, so wait
+ * for controller not ready bit to clear, just as in xHC init.
+ */
+ retval = xhci_handshake(&xhci->op_regs->status,
+ STS_CNR, 0, 10 * 1000 * 1000);
+ if (retval) {
+ xhci_warn(xhci, "Controller not ready at resume %d\n",
+ retval);
+ spin_unlock_irq(&xhci->lock);
+ return retval;
+ }
/* step 1: restore register */
xhci_restore_registers(xhci);
/* step 2: initialize command ring buffer */
@@ -3059,6 +3071,48 @@ void xhci_cleanup_stalled_ring(struct xhci_hcd *xhci, unsigned int ep_index,
}
}
+static void xhci_endpoint_disable(struct usb_hcd *hcd,
+ struct usb_host_endpoint *host_ep)
+{
+ struct xhci_hcd *xhci;
+ struct xhci_virt_device *vdev;
+ struct xhci_virt_ep *ep;
+ struct usb_device *udev;
+ unsigned long flags;
+ unsigned int ep_index;
+
+ xhci = hcd_to_xhci(hcd);
+rescan:
+ spin_lock_irqsave(&xhci->lock, flags);
+
+ udev = (struct usb_device *)host_ep->hcpriv;
+ if (!udev || !udev->slot_id)
+ goto done;
+
+ vdev = xhci->devs[udev->slot_id];
+ if (!vdev)
+ goto done;
+
+ ep_index = xhci_get_endpoint_index(&host_ep->desc);
+ ep = &vdev->eps[ep_index];
+ if (!ep)
+ goto done;
+
+ /* wait for hub_tt_work to finish clearing hub TT */
+ if (ep->ep_state & EP_CLEARING_TT) {
+ spin_unlock_irqrestore(&xhci->lock, flags);
+ schedule_timeout_uninterruptible(1);
+ goto rescan;
+ }
+
+ if (ep->ep_state)
+ xhci_dbg(xhci, "endpoint disable with ep_state 0x%x\n",
+ ep->ep_state);
+done:
+ host_ep->hcpriv = NULL;
+ spin_unlock_irqrestore(&xhci->lock, flags);
+}
+
/*
* Called after usb core issues a clear halt control message.
* The host side of the halt should already be cleared by a reset endpoint
@@ -3083,6 +3137,7 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
unsigned int ep_index;
unsigned long flags;
u32 ep_flag;
+ int err;
xhci = hcd_to_xhci(hcd);
if (!host_ep->hcpriv)
@@ -3142,7 +3197,17 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
xhci_free_command(xhci, cfg_cmd);
goto cleanup;
}
- xhci_queue_stop_endpoint(xhci, stop_cmd, udev->slot_id, ep_index, 0);
+
+ err = xhci_queue_stop_endpoint(xhci, stop_cmd, udev->slot_id,
+ ep_index, 0);
+ if (err < 0) {
+ spin_unlock_irqrestore(&xhci->lock, flags);
+ xhci_free_command(xhci, cfg_cmd);
+ xhci_dbg(xhci, "%s: Failed to queue stop ep command, %d ",
+ __func__, err);
+ goto cleanup;
+ }
+
xhci_ring_cmd_db(xhci);
spin_unlock_irqrestore(&xhci->lock, flags);
@@ -3156,8 +3221,16 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd,
ctrl_ctx, ep_flag, ep_flag);
xhci_endpoint_copy(xhci, cfg_cmd->in_ctx, vdev->out_ctx, ep_index);
- xhci_queue_configure_endpoint(xhci, cfg_cmd, cfg_cmd->in_ctx->dma,
+ err = xhci_queue_configure_endpoint(xhci, cfg_cmd, cfg_cmd->in_ctx->dma,
udev->slot_id, false);
+ if (err < 0) {
+ spin_unlock_irqrestore(&xhci->lock, flags);
+ xhci_free_command(xhci, cfg_cmd);
+ xhci_dbg(xhci, "%s: Failed to queue config ep command, %d ",
+ __func__, err);
+ goto cleanup;
+ }
+
xhci_ring_cmd_db(xhci);
spin_unlock_irqrestore(&xhci->lock, flags);
@@ -4674,12 +4747,12 @@ static int xhci_update_timeout_for_endpoint(struct xhci_hcd *xhci,
alt_timeout = xhci_call_host_update_timeout_for_endpoint(xhci, udev,
desc, state, timeout);
- /* If we found we can't enable hub-initiated LPM, or
+ /* If we found we can't enable hub-initiated LPM, and
* the U1 or U2 exit latency was too high to allow
- * device-initiated LPM as well, just stop searching.
+ * device-initiated LPM as well, then we will disable LPM
+ * for this device, so stop searching any further.
*/
- if (alt_timeout == USB3_LPM_DISABLED ||
- alt_timeout == USB3_LPM_DEVICE_INITIATED) {
+ if (alt_timeout == USB3_LPM_DISABLED) {
*timeout = alt_timeout;
return -E2BIG;
}
@@ -4790,10 +4863,12 @@ static u16 xhci_calculate_lpm_timeout(struct usb_hcd *hcd,
if (intf->dev.driver) {
driver = to_usb_driver(intf->dev.driver);
if (driver && driver->disable_hub_initiated_lpm) {
- dev_dbg(&udev->dev, "Hub-initiated %s disabled "
- "at request of driver %s\n",
- state_name, driver->name);
- return xhci_get_timeout_no_hub_lpm(udev, state);
+ dev_dbg(&udev->dev, "Hub-initiated %s disabled at request of driver %s\n",
+ state_name, driver->name);
+ timeout = xhci_get_timeout_no_hub_lpm(udev,
+ state);
+ if (timeout == USB3_LPM_DISABLED)
+ return timeout;
}
}
@@ -5077,11 +5152,18 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks)
hcd->has_tt = 1;
} else {
/*
- * Some 3.1 hosts return sbrn 0x30, use xhci supported protocol
- * minor revision instead of sbrn. Minor revision is a two digit
- * BCD containing minor and sub-minor numbers, only show minor.
+ * Early xHCI 1.1 spec did not mention USB 3.1 capable hosts
+ * should return 0x31 for sbrn, or that the minor revision
+ * is a two digit BCD containig minor and sub-minor numbers.
+ * This was later clarified in xHCI 1.2.
+ *
+ * Some USB 3.1 capable hosts therefore have sbrn 0x30, and
+ * minor revision set to 0x1 instead of 0x10.
*/
- minor_rev = xhci->usb3_rhub.min_rev / 0x10;
+ if (xhci->usb3_rhub.min_rev == 0x1)
+ minor_rev = 1;
+ else
+ minor_rev = xhci->usb3_rhub.min_rev / 0x10;
switch (minor_rev) {
case 2:
@@ -5199,11 +5281,12 @@ static void xhci_clear_tt_buffer_complete(struct usb_hcd *hcd,
unsigned long flags;
xhci = hcd_to_xhci(hcd);
+
+ spin_lock_irqsave(&xhci->lock, flags);
udev = (struct usb_device *)ep->hcpriv;
slot_id = udev->slot_id;
ep_index = xhci_get_endpoint_index(&ep->desc);
- spin_lock_irqsave(&xhci->lock, flags);
xhci->devs[slot_id]->eps[ep_index].ep_state &= ~EP_CLEARING_TT;
xhci_ring_doorbell_for_active_rings(xhci, slot_id, ep_index);
spin_unlock_irqrestore(&xhci->lock, flags);
@@ -5240,6 +5323,7 @@ static const struct hc_driver xhci_hc_driver = {
.free_streams = xhci_free_streams,
.add_endpoint = xhci_add_endpoint,
.drop_endpoint = xhci_drop_endpoint,
+ .endpoint_disable = xhci_endpoint_disable,
.endpoint_reset = xhci_endpoint_reset,
.check_bandwidth = xhci_check_bandwidth,
.reset_bandwidth = xhci_reset_bandwidth,
diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index 0a57c2cc8e5a..7a6b122c833f 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -716,6 +716,10 @@ static int mts_usb_probe(struct usb_interface *intf,
}
+ if (ep_in_current != &ep_in_set[2]) {
+ MTS_WARNING("couldn't find two input bulk endpoints. Bailing out.\n");
+ return -ENODEV;
+ }
if ( ep_out == -1 ) {
MTS_WARNING( "couldn't find an output bulk endpoint. Bailing out.\n" );
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index bdae62b2ffe0..9bce583aada3 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -47,16 +47,6 @@ config USB_SEVSEG
To compile this driver as a module, choose M here: the
module will be called usbsevseg.
-config USB_RIO500
- tristate "USB Diamond Rio500 support"
- help
- Say Y here if you want to connect a USB Rio500 mp3 player to your
- computer's USB port. Please read <file:Documentation/usb/rio.rst>
- for more information.
-
- To compile this driver as a module, choose M here: the
- module will be called rio500.
-
config USB_LEGOTOWER
tristate "USB Lego Infrared Tower support"
help
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index 109f54f5b9aa..0d416eb624bb 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -17,7 +17,6 @@ obj-$(CONFIG_USB_ISIGHTFW) += isight_firmware.o
obj-$(CONFIG_USB_LCD) += usblcd.o
obj-$(CONFIG_USB_LD) += ldusb.o
obj-$(CONFIG_USB_LEGOTOWER) += legousbtower.o
-obj-$(CONFIG_USB_RIO500) += rio500.o
obj-$(CONFIG_USB_TEST) += usbtest.o
obj-$(CONFIG_USB_EHSET_TEST_FIXTURE) += ehset.o
obj-$(CONFIG_USB_TRANCEVIBRATOR) += trancevibrator.o
diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c
index 344d523b0502..6f5edb9fc61e 100644
--- a/drivers/usb/misc/adutux.c
+++ b/drivers/usb/misc/adutux.c
@@ -75,6 +75,7 @@ struct adu_device {
char serial_number[8];
int open_count; /* number of times this port has been opened */
+ unsigned long disconnected:1;
char *read_buffer_primary;
int read_buffer_length;
@@ -116,7 +117,7 @@ static void adu_abort_transfers(struct adu_device *dev)
{
unsigned long flags;
- if (dev->udev == NULL)
+ if (dev->disconnected)
return;
/* shutdown transfer */
@@ -148,6 +149,7 @@ static void adu_delete(struct adu_device *dev)
kfree(dev->read_buffer_secondary);
kfree(dev->interrupt_in_buffer);
kfree(dev->interrupt_out_buffer);
+ usb_put_dev(dev->udev);
kfree(dev);
}
@@ -243,7 +245,7 @@ static int adu_open(struct inode *inode, struct file *file)
}
dev = usb_get_intfdata(interface);
- if (!dev || !dev->udev) {
+ if (!dev) {
retval = -ENODEV;
goto exit_no_device;
}
@@ -326,7 +328,7 @@ static int adu_release(struct inode *inode, struct file *file)
}
adu_release_internal(dev);
- if (dev->udev == NULL) {
+ if (dev->disconnected) {
/* the device was unplugged before the file was released */
if (!dev->open_count) /* ... and we're the last user */
adu_delete(dev);
@@ -354,7 +356,7 @@ static ssize_t adu_read(struct file *file, __user char *buffer, size_t count,
return -ERESTARTSYS;
/* verify that the device wasn't unplugged */
- if (dev->udev == NULL) {
+ if (dev->disconnected) {
retval = -ENODEV;
pr_err("No device or device unplugged %d\n", retval);
goto exit;
@@ -518,7 +520,7 @@ static ssize_t adu_write(struct file *file, const __user char *buffer,
goto exit_nolock;
/* verify that the device wasn't unplugged */
- if (dev->udev == NULL) {
+ if (dev->disconnected) {
retval = -ENODEV;
pr_err("No device or device unplugged %d\n", retval);
goto exit;
@@ -663,7 +665,7 @@ static int adu_probe(struct usb_interface *interface,
mutex_init(&dev->mtx);
spin_lock_init(&dev->buflock);
- dev->udev = udev;
+ dev->udev = usb_get_dev(udev);
init_waitqueue_head(&dev->read_wait);
init_waitqueue_head(&dev->write_wait);
@@ -762,14 +764,18 @@ static void adu_disconnect(struct usb_interface *interface)
dev = usb_get_intfdata(interface);
- mutex_lock(&dev->mtx); /* not interruptible */
- dev->udev = NULL; /* poison */
usb_deregister_dev(interface, &adu_class);
- mutex_unlock(&dev->mtx);
+
+ usb_poison_urb(dev->interrupt_in_urb);
+ usb_poison_urb(dev->interrupt_out_urb);
mutex_lock(&adutux_mutex);
usb_set_intfdata(interface, NULL);
+ mutex_lock(&dev->mtx); /* not interruptible */
+ dev->disconnected = 1;
+ mutex_unlock(&dev->mtx);
+
/* if the device is not opened, then we clean up right now */
if (!dev->open_count)
adu_delete(dev);
diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c
index cf5828ce927a..34e6cd6f40d3 100644
--- a/drivers/usb/misc/chaoskey.c
+++ b/drivers/usb/misc/chaoskey.c
@@ -98,6 +98,7 @@ static void chaoskey_free(struct chaoskey *dev)
usb_free_urb(dev->urb);
kfree(dev->name);
kfree(dev->buf);
+ usb_put_intf(dev->interface);
kfree(dev);
}
}
@@ -145,6 +146,8 @@ static int chaoskey_probe(struct usb_interface *interface,
if (dev == NULL)
goto out;
+ dev->interface = usb_get_intf(interface);
+
dev->buf = kmalloc(size, GFP_KERNEL);
if (dev->buf == NULL)
@@ -174,8 +177,6 @@ static int chaoskey_probe(struct usb_interface *interface,
goto out;
}
- dev->interface = interface;
-
dev->in_ep = in_ep;
if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID)
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index f5bed9f29e56..dce44fbf031f 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -54,11 +54,7 @@ MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
-/* Module parameters */
-static DEFINE_MUTEX(iowarrior_mutex);
-
static struct usb_driver iowarrior_driver;
-static DEFINE_MUTEX(iowarrior_open_disc_lock);
/*--------------*/
/* data */
@@ -87,6 +83,7 @@ struct iowarrior {
char chip_serial[9]; /* the serial number string of the chip connected */
int report_size; /* number of bytes in a report */
u16 product_id;
+ struct usb_anchor submitted;
};
/*--------------*/
@@ -243,6 +240,7 @@ static inline void iowarrior_delete(struct iowarrior *dev)
kfree(dev->int_in_buffer);
usb_free_urb(dev->int_in_urb);
kfree(dev->read_queue);
+ usb_put_intf(dev->interface);
kfree(dev);
}
@@ -424,11 +422,13 @@ static ssize_t iowarrior_write(struct file *file,
retval = -EFAULT;
goto error;
}
+ usb_anchor_urb(int_out_urb, &dev->submitted);
retval = usb_submit_urb(int_out_urb, GFP_KERNEL);
if (retval) {
dev_dbg(&dev->interface->dev,
"submit error %d for urb nr.%d\n",
retval, atomic_read(&dev->write_busy));
+ usb_unanchor_urb(int_out_urb);
goto error;
}
/* submit was ok */
@@ -477,8 +477,6 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd,
if (!buffer)
return -ENOMEM;
- /* lock this object */
- mutex_lock(&iowarrior_mutex);
mutex_lock(&dev->mutex);
/* verify that the device wasn't unplugged */
@@ -571,7 +569,6 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd,
error_out:
/* unlock the device */
mutex_unlock(&dev->mutex);
- mutex_unlock(&iowarrior_mutex);
kfree(buffer);
return retval;
}
@@ -586,27 +583,20 @@ static int iowarrior_open(struct inode *inode, struct file *file)
int subminor;
int retval = 0;
- mutex_lock(&iowarrior_mutex);
subminor = iminor(inode);
interface = usb_find_interface(&iowarrior_driver, subminor);
if (!interface) {
- mutex_unlock(&iowarrior_mutex);
- printk(KERN_ERR "%s - error, can't find device for minor %d\n",
+ pr_err("%s - error, can't find device for minor %d\n",
__func__, subminor);
return -ENODEV;
}
- mutex_lock(&iowarrior_open_disc_lock);
dev = usb_get_intfdata(interface);
- if (!dev) {
- mutex_unlock(&iowarrior_open_disc_lock);
- mutex_unlock(&iowarrior_mutex);
+ if (!dev)
return -ENODEV;
- }
mutex_lock(&dev->mutex);
- mutex_unlock(&iowarrior_open_disc_lock);
/* Only one process can open each device, no sharing. */
if (dev->opened) {
@@ -628,7 +618,6 @@ static int iowarrior_open(struct inode *inode, struct file *file)
out:
mutex_unlock(&dev->mutex);
- mutex_unlock(&iowarrior_mutex);
return retval;
}
@@ -764,11 +753,13 @@ static int iowarrior_probe(struct usb_interface *interface,
init_waitqueue_head(&dev->write_wait);
dev->udev = udev;
- dev->interface = interface;
+ dev->interface = usb_get_intf(interface);
iface_desc = interface->cur_altsetting;
dev->product_id = le16_to_cpu(udev->descriptor.idProduct);
+ init_usb_anchor(&dev->submitted);
+
res = usb_find_last_int_in_endpoint(iface_desc, &dev->int_in_endpoint);
if (res) {
dev_err(&interface->dev, "no interrupt-in endpoint found\n");
@@ -836,7 +827,6 @@ static int iowarrior_probe(struct usb_interface *interface,
if (retval) {
/* something prevented us from registering this driver */
dev_err(&interface->dev, "Not able to get a minor for this device.\n");
- usb_set_intfdata(interface, NULL);
goto error;
}
@@ -860,26 +850,15 @@ error:
*/
static void iowarrior_disconnect(struct usb_interface *interface)
{
- struct iowarrior *dev;
- int minor;
-
- dev = usb_get_intfdata(interface);
- mutex_lock(&iowarrior_open_disc_lock);
- usb_set_intfdata(interface, NULL);
- /* prevent device read, write and ioctl */
- dev->present = 0;
-
- minor = dev->minor;
- mutex_unlock(&iowarrior_open_disc_lock);
- /* give back our minor - this will call close() locks need to be dropped at this point*/
+ struct iowarrior *dev = usb_get_intfdata(interface);
+ int minor = dev->minor;
usb_deregister_dev(interface, &iowarrior_class);
mutex_lock(&dev->mutex);
/* prevent device read, write and ioctl */
-
- mutex_unlock(&dev->mutex);
+ dev->present = 0;
if (dev->opened) {
/* There is a process that holds a filedescriptor to the device ,
@@ -887,10 +866,13 @@ static void iowarrior_disconnect(struct usb_interface *interface)
Deleting the device is postponed until close() was called.
*/
usb_kill_urb(dev->int_in_urb);
+ usb_kill_anchored_urbs(&dev->submitted);
wake_up_interruptible(&dev->read_wait);
wake_up_interruptible(&dev->write_wait);
+ mutex_unlock(&dev->mutex);
} else {
/* no process is using the device, cleanup now */
+ mutex_unlock(&dev->mutex);
iowarrior_delete(dev);
}
diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c
index 6581774bdfa4..8f86b4ebca89 100644
--- a/drivers/usb/misc/ldusb.c
+++ b/drivers/usb/misc/ldusb.c
@@ -153,6 +153,7 @@ MODULE_PARM_DESC(min_interrupt_out_interval, "Minimum interrupt out interval in
struct ld_usb {
struct mutex mutex; /* locks this structure */
struct usb_interface *intf; /* save off the usb interface pointer */
+ unsigned long disconnected:1;
int open_count; /* number of times this port has been opened */
@@ -192,12 +193,10 @@ static void ld_usb_abort_transfers(struct ld_usb *dev)
/* shutdown transfer */
if (dev->interrupt_in_running) {
dev->interrupt_in_running = 0;
- if (dev->intf)
- usb_kill_urb(dev->interrupt_in_urb);
+ usb_kill_urb(dev->interrupt_in_urb);
}
if (dev->interrupt_out_busy)
- if (dev->intf)
- usb_kill_urb(dev->interrupt_out_urb);
+ usb_kill_urb(dev->interrupt_out_urb);
}
/**
@@ -205,8 +204,6 @@ static void ld_usb_abort_transfers(struct ld_usb *dev)
*/
static void ld_usb_delete(struct ld_usb *dev)
{
- ld_usb_abort_transfers(dev);
-
/* free data structures */
usb_free_urb(dev->interrupt_in_urb);
usb_free_urb(dev->interrupt_out_urb);
@@ -263,7 +260,7 @@ static void ld_usb_interrupt_in_callback(struct urb *urb)
resubmit:
/* resubmit if we're still running */
- if (dev->interrupt_in_running && !dev->buffer_overflow && dev->intf) {
+ if (dev->interrupt_in_running && !dev->buffer_overflow) {
retval = usb_submit_urb(dev->interrupt_in_urb, GFP_ATOMIC);
if (retval) {
dev_err(&dev->intf->dev,
@@ -383,16 +380,13 @@ static int ld_usb_release(struct inode *inode, struct file *file)
goto exit;
}
- if (mutex_lock_interruptible(&dev->mutex)) {
- retval = -ERESTARTSYS;
- goto exit;
- }
+ mutex_lock(&dev->mutex);
if (dev->open_count != 1) {
retval = -ENODEV;
goto unlock_exit;
}
- if (dev->intf == NULL) {
+ if (dev->disconnected) {
/* the device was unplugged before the file was released */
mutex_unlock(&dev->mutex);
/* unlock here as ld_usb_delete frees dev */
@@ -423,7 +417,7 @@ static __poll_t ld_usb_poll(struct file *file, poll_table *wait)
dev = file->private_data;
- if (!dev->intf)
+ if (dev->disconnected)
return EPOLLERR | EPOLLHUP;
poll_wait(file, &dev->read_wait, wait);
@@ -462,7 +456,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count,
}
/* verify that the device wasn't unplugged */
- if (dev->intf == NULL) {
+ if (dev->disconnected) {
retval = -ENODEV;
printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval);
goto unlock_exit;
@@ -470,7 +464,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count,
/* wait for data */
spin_lock_irq(&dev->rbsl);
- if (dev->ring_head == dev->ring_tail) {
+ while (dev->ring_head == dev->ring_tail) {
dev->interrupt_in_done = 0;
spin_unlock_irq(&dev->rbsl);
if (file->f_flags & O_NONBLOCK) {
@@ -480,15 +474,20 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count,
retval = wait_event_interruptible(dev->read_wait, dev->interrupt_in_done);
if (retval < 0)
goto unlock_exit;
- } else {
- spin_unlock_irq(&dev->rbsl);
+
+ spin_lock_irq(&dev->rbsl);
}
+ spin_unlock_irq(&dev->rbsl);
/* actual_buffer contains actual_length + interrupt_in_buffer */
actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_tail * (sizeof(size_t)+dev->interrupt_in_endpoint_size));
+ if (*actual_buffer > dev->interrupt_in_endpoint_size) {
+ retval = -EIO;
+ goto unlock_exit;
+ }
bytes_to_read = min(count, *actual_buffer);
if (bytes_to_read < *actual_buffer)
- dev_warn(&dev->intf->dev, "Read buffer overflow, %zd bytes dropped\n",
+ dev_warn(&dev->intf->dev, "Read buffer overflow, %zu bytes dropped\n",
*actual_buffer-bytes_to_read);
/* copy one interrupt_in_buffer from ring_buffer into userspace */
@@ -496,11 +495,11 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count,
retval = -EFAULT;
goto unlock_exit;
}
- dev->ring_tail = (dev->ring_tail+1) % ring_buffer_size;
-
retval = bytes_to_read;
spin_lock_irq(&dev->rbsl);
+ dev->ring_tail = (dev->ring_tail + 1) % ring_buffer_size;
+
if (dev->buffer_overflow) {
dev->buffer_overflow = 0;
spin_unlock_irq(&dev->rbsl);
@@ -542,7 +541,7 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer,
}
/* verify that the device wasn't unplugged */
- if (dev->intf == NULL) {
+ if (dev->disconnected) {
retval = -ENODEV;
printk(KERN_ERR "ldusb: No device or device unplugged %d\n", retval);
goto unlock_exit;
@@ -563,8 +562,9 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer,
/* write the data into interrupt_out_buffer from userspace */
bytes_to_write = min(count, write_buffer_size*dev->interrupt_out_endpoint_size);
if (bytes_to_write < count)
- dev_warn(&dev->intf->dev, "Write buffer overflow, %zd bytes dropped\n", count-bytes_to_write);
- dev_dbg(&dev->intf->dev, "%s: count = %zd, bytes_to_write = %zd\n",
+ dev_warn(&dev->intf->dev, "Write buffer overflow, %zu bytes dropped\n",
+ count - bytes_to_write);
+ dev_dbg(&dev->intf->dev, "%s: count = %zu, bytes_to_write = %zu\n",
__func__, count, bytes_to_write);
if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write)) {
@@ -581,7 +581,7 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer,
1 << 8, 0,
dev->interrupt_out_buffer,
bytes_to_write,
- USB_CTRL_SET_TIMEOUT * HZ);
+ USB_CTRL_SET_TIMEOUT);
if (retval < 0)
dev_err(&dev->intf->dev,
"Couldn't submit HID_REQ_SET_REPORT %d\n",
@@ -696,10 +696,9 @@ static int ld_usb_probe(struct usb_interface *intf, const struct usb_device_id *
dev_warn(&intf->dev, "Interrupt out endpoint not found (using control endpoint instead)\n");
dev->interrupt_in_endpoint_size = usb_endpoint_maxp(dev->interrupt_in_endpoint);
- dev->ring_buffer =
- kmalloc_array(ring_buffer_size,
- sizeof(size_t) + dev->interrupt_in_endpoint_size,
- GFP_KERNEL);
+ dev->ring_buffer = kcalloc(ring_buffer_size,
+ sizeof(size_t) + dev->interrupt_in_endpoint_size,
+ GFP_KERNEL);
if (!dev->ring_buffer)
goto error;
dev->interrupt_in_buffer = kmalloc(dev->interrupt_in_endpoint_size, GFP_KERNEL);
@@ -764,6 +763,9 @@ static void ld_usb_disconnect(struct usb_interface *intf)
/* give back our minor */
usb_deregister_dev(intf, &ld_usb_class);
+ usb_poison_urb(dev->interrupt_in_urb);
+ usb_poison_urb(dev->interrupt_out_urb);
+
mutex_lock(&dev->mutex);
/* if the device is not opened, then we clean up right now */
@@ -771,7 +773,7 @@ static void ld_usb_disconnect(struct usb_interface *intf)
mutex_unlock(&dev->mutex);
ld_usb_delete(dev);
} else {
- dev->intf = NULL;
+ dev->disconnected = 1;
/* wake up pollers */
wake_up_interruptible_all(&dev->read_wait);
wake_up_interruptible_all(&dev->write_wait);
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index 006cf13b2199..23061f1526b4 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -179,7 +179,6 @@ static const struct usb_device_id tower_table[] = {
};
MODULE_DEVICE_TABLE (usb, tower_table);
-static DEFINE_MUTEX(open_disc_mutex);
#define LEGO_USB_TOWER_MINOR_BASE 160
@@ -191,6 +190,7 @@ struct lego_usb_tower {
unsigned char minor; /* the starting minor number for this device */
int open_count; /* number of times this port has been opened */
+ unsigned long disconnected:1;
char* read_buffer;
size_t read_buffer_length; /* this much came in */
@@ -290,14 +290,13 @@ static inline void lego_usb_tower_debug_data(struct device *dev,
*/
static inline void tower_delete (struct lego_usb_tower *dev)
{
- tower_abort_transfers (dev);
-
/* free data structures */
usb_free_urb(dev->interrupt_in_urb);
usb_free_urb(dev->interrupt_out_urb);
kfree (dev->read_buffer);
kfree (dev->interrupt_in_buffer);
kfree (dev->interrupt_out_buffer);
+ usb_put_dev(dev->udev);
kfree (dev);
}
@@ -332,18 +331,14 @@ static int tower_open (struct inode *inode, struct file *file)
goto exit;
}
- mutex_lock(&open_disc_mutex);
dev = usb_get_intfdata(interface);
-
if (!dev) {
- mutex_unlock(&open_disc_mutex);
retval = -ENODEV;
goto exit;
}
/* lock this device */
if (mutex_lock_interruptible(&dev->lock)) {
- mutex_unlock(&open_disc_mutex);
retval = -ERESTARTSYS;
goto exit;
}
@@ -351,12 +346,9 @@ static int tower_open (struct inode *inode, struct file *file)
/* allow opening only once */
if (dev->open_count) {
- mutex_unlock(&open_disc_mutex);
retval = -EBUSY;
goto unlock_exit;
}
- dev->open_count = 1;
- mutex_unlock(&open_disc_mutex);
/* reset the tower */
result = usb_control_msg (dev->udev,
@@ -396,13 +388,14 @@ static int tower_open (struct inode *inode, struct file *file)
dev_err(&dev->udev->dev,
"Couldn't submit interrupt_in_urb %d\n", retval);
dev->interrupt_in_running = 0;
- dev->open_count = 0;
goto unlock_exit;
}
/* save device in the file's private structure */
file->private_data = dev;
+ dev->open_count = 1;
+
unlock_exit:
mutex_unlock(&dev->lock);
@@ -423,22 +416,19 @@ static int tower_release (struct inode *inode, struct file *file)
if (dev == NULL) {
retval = -ENODEV;
- goto exit_nolock;
- }
-
- mutex_lock(&open_disc_mutex);
- if (mutex_lock_interruptible(&dev->lock)) {
- retval = -ERESTARTSYS;
goto exit;
}
+ mutex_lock(&dev->lock);
+
if (dev->open_count != 1) {
dev_dbg(&dev->udev->dev, "%s: device not opened exactly once\n",
__func__);
retval = -ENODEV;
goto unlock_exit;
}
- if (dev->udev == NULL) {
+
+ if (dev->disconnected) {
/* the device was unplugged before the file was released */
/* unlock here as tower_delete frees dev */
@@ -456,10 +446,7 @@ static int tower_release (struct inode *inode, struct file *file)
unlock_exit:
mutex_unlock(&dev->lock);
-
exit:
- mutex_unlock(&open_disc_mutex);
-exit_nolock:
return retval;
}
@@ -477,10 +464,9 @@ static void tower_abort_transfers (struct lego_usb_tower *dev)
if (dev->interrupt_in_running) {
dev->interrupt_in_running = 0;
mb();
- if (dev->udev)
- usb_kill_urb (dev->interrupt_in_urb);
+ usb_kill_urb(dev->interrupt_in_urb);
}
- if (dev->interrupt_out_busy && dev->udev)
+ if (dev->interrupt_out_busy)
usb_kill_urb(dev->interrupt_out_urb);
}
@@ -516,7 +502,7 @@ static __poll_t tower_poll (struct file *file, poll_table *wait)
dev = file->private_data;
- if (!dev->udev)
+ if (dev->disconnected)
return EPOLLERR | EPOLLHUP;
poll_wait(file, &dev->read_wait, wait);
@@ -563,7 +549,7 @@ static ssize_t tower_read (struct file *file, char __user *buffer, size_t count,
}
/* verify that the device wasn't unplugged */
- if (dev->udev == NULL) {
+ if (dev->disconnected) {
retval = -ENODEV;
pr_err("No device or device unplugged %d\n", retval);
goto unlock_exit;
@@ -649,7 +635,7 @@ static ssize_t tower_write (struct file *file, const char __user *buffer, size_t
}
/* verify that the device wasn't unplugged */
- if (dev->udev == NULL) {
+ if (dev->disconnected) {
retval = -ENODEV;
pr_err("No device or device unplugged %d\n", retval);
goto unlock_exit;
@@ -759,7 +745,7 @@ static void tower_interrupt_in_callback (struct urb *urb)
resubmit:
/* resubmit if we're still running */
- if (dev->interrupt_in_running && dev->udev) {
+ if (dev->interrupt_in_running) {
retval = usb_submit_urb (dev->interrupt_in_urb, GFP_ATOMIC);
if (retval)
dev_err(&dev->udev->dev,
@@ -822,8 +808,9 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
mutex_init(&dev->lock);
- dev->udev = udev;
+ dev->udev = usb_get_dev(udev);
dev->open_count = 0;
+ dev->disconnected = 0;
dev->read_buffer = NULL;
dev->read_buffer_length = 0;
@@ -891,8 +878,10 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
get_version_reply,
sizeof(*get_version_reply),
1000);
- if (result < 0) {
- dev_err(idev, "LEGO USB Tower get version control request failed\n");
+ if (result != sizeof(*get_version_reply)) {
+ if (result >= 0)
+ result = -EIO;
+ dev_err(idev, "get version request failed: %d\n", result);
retval = result;
goto error;
}
@@ -910,7 +899,6 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device
if (retval) {
/* something prevented us from registering this driver */
dev_err(idev, "Not able to get a minor for this device.\n");
- usb_set_intfdata (interface, NULL);
goto error;
}
dev->minor = interface->minor;
@@ -942,23 +930,24 @@ static void tower_disconnect (struct usb_interface *interface)
int minor;
dev = usb_get_intfdata (interface);
- mutex_lock(&open_disc_mutex);
- usb_set_intfdata (interface, NULL);
minor = dev->minor;
- /* give back our minor */
+ /* give back our minor and prevent further open() */
usb_deregister_dev (interface, &tower_class);
+ /* stop I/O */
+ usb_poison_urb(dev->interrupt_in_urb);
+ usb_poison_urb(dev->interrupt_out_urb);
+
mutex_lock(&dev->lock);
- mutex_unlock(&open_disc_mutex);
/* if the device is not opened, then we clean up right now */
if (!dev->open_count) {
mutex_unlock(&dev->lock);
tower_delete (dev);
} else {
- dev->udev = NULL;
+ dev->disconnected = 1;
/* wake up pollers */
wake_up_interruptible_all(&dev->read_wait);
wake_up_interruptible_all(&dev->write_wait);
diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c
deleted file mode 100644
index 30cae5e1954d..000000000000
--- a/drivers/usb/misc/rio500.c
+++ /dev/null
@@ -1,554 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/* -*- linux-c -*- */
-
-/*
- * Driver for USB Rio 500
- *
- * Cesar Miquel (miquel@df.uba.ar)
- *
- * based on hp_scanner.c by David E. Nelson (dnelson@jump.net)
- *
- * Based upon mouse.c (Brad Keryan) and printer.c (Michael Gee).
- *
- * Changelog:
- * 30/05/2003 replaced lock/unlock kernel with up/down
- * Daniele Bellucci bellucda@tiscali.it
- * */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/sched/signal.h>
-#include <linux/mutex.h>
-#include <linux/errno.h>
-#include <linux/random.h>
-#include <linux/poll.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/usb.h>
-#include <linux/wait.h>
-
-#include "rio500_usb.h"
-
-#define DRIVER_AUTHOR "Cesar Miquel <miquel@df.uba.ar>"
-#define DRIVER_DESC "USB Rio 500 driver"
-
-#define RIO_MINOR 64
-
-/* stall/wait timeout for rio */
-#define NAK_TIMEOUT (HZ)
-
-#define IBUF_SIZE 0x1000
-
-/* Size of the rio buffer */
-#define OBUF_SIZE 0x10000
-
-struct rio_usb_data {
- struct usb_device *rio_dev; /* init: probe_rio */
- unsigned int ifnum; /* Interface number of the USB device */
- int isopen; /* nz if open */
- int present; /* Device is present on the bus */
- char *obuf, *ibuf; /* transfer buffers */
- char bulk_in_ep, bulk_out_ep; /* Endpoint assignments */
- wait_queue_head_t wait_q; /* for timeouts */
-};
-
-static DEFINE_MUTEX(rio500_mutex);
-static struct rio_usb_data rio_instance;
-
-static int open_rio(struct inode *inode, struct file *file)
-{
- struct rio_usb_data *rio = &rio_instance;
-
- /* against disconnect() */
- mutex_lock(&rio500_mutex);
-
- if (rio->isopen || !rio->present) {
- mutex_unlock(&rio500_mutex);
- return -EBUSY;
- }
- rio->isopen = 1;
-
- init_waitqueue_head(&rio->wait_q);
-
-
- dev_info(&rio->rio_dev->dev, "Rio opened.\n");
- mutex_unlock(&rio500_mutex);
-
- return 0;
-}
-
-static int close_rio(struct inode *inode, struct file *file)
-{
- struct rio_usb_data *rio = &rio_instance;
-
- /* against disconnect() */
- mutex_lock(&rio500_mutex);
-
- rio->isopen = 0;
- if (!rio->present) {
- /* cleanup has been delayed */
- kfree(rio->ibuf);
- kfree(rio->obuf);
- rio->ibuf = NULL;
- rio->obuf = NULL;
- } else {
- dev_info(&rio->rio_dev->dev, "Rio closed.\n");
- }
- mutex_unlock(&rio500_mutex);
- return 0;
-}
-
-static long ioctl_rio(struct file *file, unsigned int cmd, unsigned long arg)
-{
- struct RioCommand rio_cmd;
- struct rio_usb_data *rio = &rio_instance;
- void __user *data;
- unsigned char *buffer;
- int result, requesttype;
- int retries;
- int retval=0;
-
- mutex_lock(&rio500_mutex);
- /* Sanity check to make sure rio is connected, powered, etc */
- if (rio->present == 0 || rio->rio_dev == NULL) {
- retval = -ENODEV;
- goto err_out;
- }
-
- switch (cmd) {
- case RIO_RECV_COMMAND:
- data = (void __user *) arg;
- if (data == NULL)
- break;
- if (copy_from_user(&rio_cmd, data, sizeof(struct RioCommand))) {
- retval = -EFAULT;
- goto err_out;
- }
- if (rio_cmd.length < 0 || rio_cmd.length > PAGE_SIZE) {
- retval = -EINVAL;
- goto err_out;
- }
- buffer = (unsigned char *) __get_free_page(GFP_KERNEL);
- if (buffer == NULL) {
- retval = -ENOMEM;
- goto err_out;
- }
- if (copy_from_user(buffer, rio_cmd.buffer, rio_cmd.length)) {
- retval = -EFAULT;
- free_page((unsigned long) buffer);
- goto err_out;
- }
-
- requesttype = rio_cmd.requesttype | USB_DIR_IN |
- USB_TYPE_VENDOR | USB_RECIP_DEVICE;
- dev_dbg(&rio->rio_dev->dev,
- "sending command:reqtype=%0x req=%0x value=%0x index=%0x len=%0x\n",
- requesttype, rio_cmd.request, rio_cmd.value,
- rio_cmd.index, rio_cmd.length);
- /* Send rio control message */
- retries = 3;
- while (retries) {
- result = usb_control_msg(rio->rio_dev,
- usb_rcvctrlpipe(rio-> rio_dev, 0),
- rio_cmd.request,
- requesttype,
- rio_cmd.value,
- rio_cmd.index, buffer,
- rio_cmd.length,
- jiffies_to_msecs(rio_cmd.timeout));
- if (result == -ETIMEDOUT)
- retries--;
- else if (result < 0) {
- dev_err(&rio->rio_dev->dev,
- "Error executing ioctrl. code = %d\n",
- result);
- retries = 0;
- } else {
- dev_dbg(&rio->rio_dev->dev,
- "Executed ioctl. Result = %d (data=%02x)\n",
- result, buffer[0]);
- if (copy_to_user(rio_cmd.buffer, buffer,
- rio_cmd.length)) {
- free_page((unsigned long) buffer);
- retval = -EFAULT;
- goto err_out;
- }
- retries = 0;
- }
-
- /* rio_cmd.buffer contains a raw stream of single byte
- data which has been returned from rio. Data is
- interpreted at application level. For data that
- will be cast to data types longer than 1 byte, data
- will be little_endian and will potentially need to
- be swapped at the app level */
-
- }
- free_page((unsigned long) buffer);
- break;
-
- case RIO_SEND_COMMAND:
- data = (void __user *) arg;
- if (data == NULL)
- break;
- if (copy_from_user(&rio_cmd, data, sizeof(struct RioCommand))) {
- retval = -EFAULT;
- goto err_out;
- }
- if (rio_cmd.length < 0 || rio_cmd.length > PAGE_SIZE) {
- retval = -EINVAL;
- goto err_out;
- }
- buffer = (unsigned char *) __get_free_page(GFP_KERNEL);
- if (buffer == NULL) {
- retval = -ENOMEM;
- goto err_out;
- }
- if (copy_from_user(buffer, rio_cmd.buffer, rio_cmd.length)) {
- free_page((unsigned long)buffer);
- retval = -EFAULT;
- goto err_out;
- }
-
- requesttype = rio_cmd.requesttype | USB_DIR_OUT |
- USB_TYPE_VENDOR | USB_RECIP_DEVICE;
- dev_dbg(&rio->rio_dev->dev,
- "sending command: reqtype=%0x req=%0x value=%0x index=%0x len=%0x\n",
- requesttype, rio_cmd.request, rio_cmd.value,
- rio_cmd.index, rio_cmd.length);
- /* Send rio control message */
- retries = 3;
- while (retries) {
- result = usb_control_msg(rio->rio_dev,
- usb_sndctrlpipe(rio-> rio_dev, 0),
- rio_cmd.request,
- requesttype,
- rio_cmd.value,
- rio_cmd.index, buffer,
- rio_cmd.length,
- jiffies_to_msecs(rio_cmd.timeout));
- if (result == -ETIMEDOUT)
- retries--;
- else if (result < 0) {
- dev_err(&rio->rio_dev->dev,
- "Error executing ioctrl. code = %d\n",
- result);
- retries = 0;
- } else {
- dev_dbg(&rio->rio_dev->dev,
- "Executed ioctl. Result = %d\n", result);
- retries = 0;
-
- }
-
- }
- free_page((unsigned long) buffer);
- break;
-
- default:
- retval = -ENOTTY;
- break;
- }
-
-
-err_out:
- mutex_unlock(&rio500_mutex);
- return retval;
-}
-
-static ssize_t
-write_rio(struct file *file, const char __user *buffer,
- size_t count, loff_t * ppos)
-{
- DEFINE_WAIT(wait);
- struct rio_usb_data *rio = &rio_instance;
-
- unsigned long copy_size;
- unsigned long bytes_written = 0;
- unsigned int partial;
-
- int result = 0;
- int maxretry;
- int errn = 0;
- int intr;
-
- intr = mutex_lock_interruptible(&rio500_mutex);
- if (intr)
- return -EINTR;
- /* Sanity check to make sure rio is connected, powered, etc */
- if (rio->present == 0 || rio->rio_dev == NULL) {
- mutex_unlock(&rio500_mutex);
- return -ENODEV;
- }
-
-
-
- do {
- unsigned long thistime;
- char *obuf = rio->obuf;
-
- thistime = copy_size =
- (count >= OBUF_SIZE) ? OBUF_SIZE : count;
- if (copy_from_user(rio->obuf, buffer, copy_size)) {
- errn = -EFAULT;
- goto error;
- }
- maxretry = 5;
- while (thistime) {
- if (!rio->rio_dev) {
- errn = -ENODEV;
- goto error;
- }
- if (signal_pending(current)) {
- mutex_unlock(&rio500_mutex);
- return bytes_written ? bytes_written : -EINTR;
- }
-
- result = usb_bulk_msg(rio->rio_dev,
- usb_sndbulkpipe(rio->rio_dev, 2),
- obuf, thistime, &partial, 5000);
-
- dev_dbg(&rio->rio_dev->dev,
- "write stats: result:%d thistime:%lu partial:%u\n",
- result, thistime, partial);
-
- if (result == -ETIMEDOUT) { /* NAK - so hold for a while */
- if (!maxretry--) {
- errn = -ETIME;
- goto error;
- }
- prepare_to_wait(&rio->wait_q, &wait, TASK_INTERRUPTIBLE);
- schedule_timeout(NAK_TIMEOUT);
- finish_wait(&rio->wait_q, &wait);
- continue;
- } else if (!result && partial) {
- obuf += partial;
- thistime -= partial;
- } else
- break;
- }
- if (result) {
- dev_err(&rio->rio_dev->dev, "Write Whoops - %x\n",
- result);
- errn = -EIO;
- goto error;
- }
- bytes_written += copy_size;
- count -= copy_size;
- buffer += copy_size;
- } while (count > 0);
-
- mutex_unlock(&rio500_mutex);
-
- return bytes_written ? bytes_written : -EIO;
-
-error:
- mutex_unlock(&rio500_mutex);
- return errn;
-}
-
-static ssize_t
-read_rio(struct file *file, char __user *buffer, size_t count, loff_t * ppos)
-{
- DEFINE_WAIT(wait);
- struct rio_usb_data *rio = &rio_instance;
- ssize_t read_count;
- unsigned int partial;
- int this_read;
- int result;
- int maxretry = 10;
- char *ibuf;
- int intr;
-
- intr = mutex_lock_interruptible(&rio500_mutex);
- if (intr)
- return -EINTR;
- /* Sanity check to make sure rio is connected, powered, etc */
- if (rio->present == 0 || rio->rio_dev == NULL) {
- mutex_unlock(&rio500_mutex);
- return -ENODEV;
- }
-
- ibuf = rio->ibuf;
-
- read_count = 0;
-
-
- while (count > 0) {
- if (signal_pending(current)) {
- mutex_unlock(&rio500_mutex);
- return read_count ? read_count : -EINTR;
- }
- if (!rio->rio_dev) {
- mutex_unlock(&rio500_mutex);
- return -ENODEV;
- }
- this_read = (count >= IBUF_SIZE) ? IBUF_SIZE : count;
-
- result = usb_bulk_msg(rio->rio_dev,
- usb_rcvbulkpipe(rio->rio_dev, 1),
- ibuf, this_read, &partial,
- 8000);
-
- dev_dbg(&rio->rio_dev->dev,
- "read stats: result:%d this_read:%u partial:%u\n",
- result, this_read, partial);
-
- if (partial) {
- count = this_read = partial;
- } else if (result == -ETIMEDOUT || result == 15) { /* FIXME: 15 ??? */
- if (!maxretry--) {
- mutex_unlock(&rio500_mutex);
- dev_err(&rio->rio_dev->dev,
- "read_rio: maxretry timeout\n");
- return -ETIME;
- }
- prepare_to_wait(&rio->wait_q, &wait, TASK_INTERRUPTIBLE);
- schedule_timeout(NAK_TIMEOUT);
- finish_wait(&rio->wait_q, &wait);
- continue;
- } else if (result != -EREMOTEIO) {
- mutex_unlock(&rio500_mutex);
- dev_err(&rio->rio_dev->dev,
- "Read Whoops - result:%d partial:%u this_read:%u\n",
- result, partial, this_read);
- return -EIO;
- } else {
- mutex_unlock(&rio500_mutex);
- return (0);
- }
-
- if (this_read) {
- if (copy_to_user(buffer, ibuf, this_read)) {
- mutex_unlock(&rio500_mutex);
- return -EFAULT;
- }
- count -= this_read;
- read_count += this_read;
- buffer += this_read;
- }
- }
- mutex_unlock(&rio500_mutex);
- return read_count;
-}
-
-static const struct file_operations usb_rio_fops = {
- .owner = THIS_MODULE,
- .read = read_rio,
- .write = write_rio,
- .unlocked_ioctl = ioctl_rio,
- .open = open_rio,
- .release = close_rio,
- .llseek = noop_llseek,
-};
-
-static struct usb_class_driver usb_rio_class = {
- .name = "rio500%d",
- .fops = &usb_rio_fops,
- .minor_base = RIO_MINOR,
-};
-
-static int probe_rio(struct usb_interface *intf,
- const struct usb_device_id *id)
-{
- struct usb_device *dev = interface_to_usbdev(intf);
- struct rio_usb_data *rio = &rio_instance;
- int retval = -ENOMEM;
- char *ibuf, *obuf;
-
- if (rio->present) {
- dev_info(&intf->dev, "Second USB Rio at address %d refused\n", dev->devnum);
- return -EBUSY;
- }
- dev_info(&intf->dev, "USB Rio found at address %d\n", dev->devnum);
-
- obuf = kmalloc(OBUF_SIZE, GFP_KERNEL);
- if (!obuf) {
- dev_err(&dev->dev,
- "probe_rio: Not enough memory for the output buffer\n");
- goto err_obuf;
- }
- dev_dbg(&intf->dev, "obuf address: %p\n", obuf);
-
- ibuf = kmalloc(IBUF_SIZE, GFP_KERNEL);
- if (!ibuf) {
- dev_err(&dev->dev,
- "probe_rio: Not enough memory for the input buffer\n");
- goto err_ibuf;
- }
- dev_dbg(&intf->dev, "ibuf address: %p\n", ibuf);
-
- mutex_lock(&rio500_mutex);
- rio->rio_dev = dev;
- rio->ibuf = ibuf;
- rio->obuf = obuf;
- rio->present = 1;
- mutex_unlock(&rio500_mutex);
-
- retval = usb_register_dev(intf, &usb_rio_class);
- if (retval) {
- dev_err(&dev->dev,
- "Not able to get a minor for this device.\n");
- goto err_register;
- }
-
- usb_set_intfdata(intf, rio);
- return retval;
-
- err_register:
- mutex_lock(&rio500_mutex);
- rio->present = 0;
- mutex_unlock(&rio500_mutex);
- err_ibuf:
- kfree(obuf);
- err_obuf:
- return retval;
-}
-
-static void disconnect_rio(struct usb_interface *intf)
-{
- struct rio_usb_data *rio = usb_get_intfdata (intf);
-
- usb_set_intfdata (intf, NULL);
- if (rio) {
- usb_deregister_dev(intf, &usb_rio_class);
-
- mutex_lock(&rio500_mutex);
- if (rio->isopen) {
- rio->isopen = 0;
- /* better let it finish - the release will do whats needed */
- rio->rio_dev = NULL;
- mutex_unlock(&rio500_mutex);
- return;
- }
- kfree(rio->ibuf);
- kfree(rio->obuf);
-
- dev_info(&intf->dev, "USB Rio disconnected.\n");
-
- rio->present = 0;
- mutex_unlock(&rio500_mutex);
- }
-}
-
-static const struct usb_device_id rio_table[] = {
- { USB_DEVICE(0x0841, 1) }, /* Rio 500 */
- { } /* Terminating entry */
-};
-
-MODULE_DEVICE_TABLE (usb, rio_table);
-
-static struct usb_driver rio_driver = {
- .name = "rio500",
- .probe = probe_rio,
- .disconnect = disconnect_rio,
- .id_table = rio_table,
-};
-
-module_usb_driver(rio_driver);
-
-MODULE_AUTHOR( DRIVER_AUTHOR );
-MODULE_DESCRIPTION( DRIVER_DESC );
-MODULE_LICENSE("GPL");
-
diff --git a/drivers/usb/misc/rio500_usb.h b/drivers/usb/misc/rio500_usb.h
deleted file mode 100644
index 6db7a5863496..000000000000
--- a/drivers/usb/misc/rio500_usb.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/* ----------------------------------------------------------------------
- Copyright (C) 2000 Cesar Miquel (miquel@df.uba.ar)
- ---------------------------------------------------------------------- */
-
-#define RIO_SEND_COMMAND 0x1
-#define RIO_RECV_COMMAND 0x2
-
-#define RIO_DIR_OUT 0x0
-#define RIO_DIR_IN 0x1
-
-struct RioCommand {
- short length;
- int request;
- int requesttype;
- int value;
- int index;
- void __user *buffer;
- int timeout;
-};
diff --git a/drivers/usb/misc/usblcd.c b/drivers/usb/misc/usblcd.c
index 9ba4a4e68d91..61e9e987fe4a 100644
--- a/drivers/usb/misc/usblcd.c
+++ b/drivers/usb/misc/usblcd.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/uaccess.h>
#include <linux/usb.h>
@@ -29,16 +30,12 @@
#define IOCTL_GET_DRV_VERSION 2
-static DEFINE_MUTEX(lcd_mutex);
static const struct usb_device_id id_table[] = {
{ .idVendor = 0x10D2, .match_flags = USB_DEVICE_ID_MATCH_VENDOR, },
{ },
};
MODULE_DEVICE_TABLE(usb, id_table);
-static DEFINE_MUTEX(open_disc_mutex);
-
-
struct usb_lcd {
struct usb_device *udev; /* init: probe_lcd */
struct usb_interface *interface; /* the interface for
@@ -57,6 +54,8 @@ struct usb_lcd {
using up all RAM */
struct usb_anchor submitted; /* URBs to wait for
before suspend */
+ struct rw_semaphore io_rwsem;
+ unsigned long disconnected:1;
};
#define to_lcd_dev(d) container_of(d, struct usb_lcd, kref)
@@ -81,40 +80,29 @@ static int lcd_open(struct inode *inode, struct file *file)
struct usb_interface *interface;
int subminor, r;
- mutex_lock(&lcd_mutex);
subminor = iminor(inode);
interface = usb_find_interface(&lcd_driver, subminor);
if (!interface) {
- mutex_unlock(&lcd_mutex);
- printk(KERN_ERR "USBLCD: %s - error, can't find device for minor %d\n",
+ pr_err("USBLCD: %s - error, can't find device for minor %d\n",
__func__, subminor);
return -ENODEV;
}
- mutex_lock(&open_disc_mutex);
dev = usb_get_intfdata(interface);
- if (!dev) {
- mutex_unlock(&open_disc_mutex);
- mutex_unlock(&lcd_mutex);
- return -ENODEV;
- }
/* increment our usage count for the device */
kref_get(&dev->kref);
- mutex_unlock(&open_disc_mutex);
/* grab a power reference */
r = usb_autopm_get_interface(interface);
if (r < 0) {
kref_put(&dev->kref, lcd_delete);
- mutex_unlock(&lcd_mutex);
return r;
}
/* save our object in the file's private structure */
file->private_data = dev;
- mutex_unlock(&lcd_mutex);
return 0;
}
@@ -142,6 +130,13 @@ static ssize_t lcd_read(struct file *file, char __user * buffer,
dev = file->private_data;
+ down_read(&dev->io_rwsem);
+
+ if (dev->disconnected) {
+ retval = -ENODEV;
+ goto out_up_io;
+ }
+
/* do a blocking bulk read to get data from the device */
retval = usb_bulk_msg(dev->udev,
usb_rcvbulkpipe(dev->udev,
@@ -158,6 +153,9 @@ static ssize_t lcd_read(struct file *file, char __user * buffer,
retval = bytes_read;
}
+out_up_io:
+ up_read(&dev->io_rwsem);
+
return retval;
}
@@ -173,14 +171,12 @@ static long lcd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
switch (cmd) {
case IOCTL_GET_HARD_VERSION:
- mutex_lock(&lcd_mutex);
bcdDevice = le16_to_cpu((dev->udev)->descriptor.bcdDevice);
sprintf(buf, "%1d%1d.%1d%1d",
(bcdDevice & 0xF000)>>12,
(bcdDevice & 0xF00)>>8,
(bcdDevice & 0xF0)>>4,
(bcdDevice & 0xF));
- mutex_unlock(&lcd_mutex);
if (copy_to_user((void __user *)arg, buf, strlen(buf)) != 0)
return -EFAULT;
break;
@@ -237,11 +233,18 @@ static ssize_t lcd_write(struct file *file, const char __user * user_buffer,
if (r < 0)
return -EINTR;
+ down_read(&dev->io_rwsem);
+
+ if (dev->disconnected) {
+ retval = -ENODEV;
+ goto err_up_io;
+ }
+
/* create a urb, and a buffer for it, and copy the data to the urb */
urb = usb_alloc_urb(0, GFP_KERNEL);
if (!urb) {
retval = -ENOMEM;
- goto err_no_buf;
+ goto err_up_io;
}
buf = usb_alloc_coherent(dev->udev, count, GFP_KERNEL,
@@ -278,6 +281,7 @@ static ssize_t lcd_write(struct file *file, const char __user * user_buffer,
the USB core will eventually free it entirely */
usb_free_urb(urb);
+ up_read(&dev->io_rwsem);
exit:
return count;
error_unanchor:
@@ -285,7 +289,8 @@ error_unanchor:
error:
usb_free_coherent(dev->udev, count, buf, urb->transfer_dma);
usb_free_urb(urb);
-err_no_buf:
+err_up_io:
+ up_read(&dev->io_rwsem);
up(&dev->limit_sem);
return retval;
}
@@ -325,6 +330,7 @@ static int lcd_probe(struct usb_interface *interface,
kref_init(&dev->kref);
sema_init(&dev->limit_sem, USB_LCD_CONCURRENT_WRITES);
+ init_rwsem(&dev->io_rwsem);
init_usb_anchor(&dev->submitted);
dev->udev = usb_get_dev(interface_to_usbdev(interface));
@@ -365,7 +371,6 @@ static int lcd_probe(struct usb_interface *interface,
/* something prevented us from registering this driver */
dev_err(&interface->dev,
"Not able to get a minor for this device.\n");
- usb_set_intfdata(interface, NULL);
goto error;
}
@@ -411,17 +416,18 @@ static int lcd_resume(struct usb_interface *intf)
static void lcd_disconnect(struct usb_interface *interface)
{
- struct usb_lcd *dev;
+ struct usb_lcd *dev = usb_get_intfdata(interface);
int minor = interface->minor;
- mutex_lock(&open_disc_mutex);
- dev = usb_get_intfdata(interface);
- usb_set_intfdata(interface, NULL);
- mutex_unlock(&open_disc_mutex);
-
/* give back our minor */
usb_deregister_dev(interface, &lcd_class);
+ down_write(&dev->io_rwsem);
+ dev->disconnected = 1;
+ up_write(&dev->io_rwsem);
+
+ usb_kill_anchored_urbs(&dev->submitted);
+
/* decrement our usage count */
kref_put(&dev->kref, lcd_delete);
diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c
index 6715a128e6c8..be0505b8b5d4 100644
--- a/drivers/usb/misc/yurex.c
+++ b/drivers/usb/misc/yurex.c
@@ -60,6 +60,7 @@ struct usb_yurex {
struct kref kref;
struct mutex io_mutex;
+ unsigned long disconnected:1;
struct fasync_struct *async_queue;
wait_queue_head_t waitq;
@@ -107,6 +108,7 @@ static void yurex_delete(struct kref *kref)
dev->int_buffer, dev->urb->transfer_dma);
usb_free_urb(dev->urb);
}
+ usb_put_intf(dev->interface);
usb_put_dev(dev->udev);
kfree(dev);
}
@@ -132,6 +134,7 @@ static void yurex_interrupt(struct urb *urb)
switch (status) {
case 0: /*success*/
break;
+ /* The device is terminated or messed up, give up */
case -EOVERFLOW:
dev_err(&dev->interface->dev,
"%s - overflow with length %d, actual length is %d\n",
@@ -140,12 +143,13 @@ static void yurex_interrupt(struct urb *urb)
case -ENOENT:
case -ESHUTDOWN:
case -EILSEQ:
- /* The device is terminated, clean up */
+ case -EPROTO:
+ case -ETIME:
return;
default:
dev_err(&dev->interface->dev,
"%s - unknown status received: %d\n", __func__, status);
- goto exit;
+ return;
}
/* handle received message */
@@ -177,7 +181,6 @@ static void yurex_interrupt(struct urb *urb)
break;
}
-exit:
retval = usb_submit_urb(dev->urb, GFP_ATOMIC);
if (retval) {
dev_err(&dev->interface->dev, "%s - usb_submit_urb failed: %d\n",
@@ -204,7 +207,7 @@ static int yurex_probe(struct usb_interface *interface, const struct usb_device_
init_waitqueue_head(&dev->waitq);
dev->udev = usb_get_dev(interface_to_usbdev(interface));
- dev->interface = interface;
+ dev->interface = usb_get_intf(interface);
/* set up the endpoint information */
iface_desc = interface->cur_altsetting;
@@ -315,8 +318,9 @@ static void yurex_disconnect(struct usb_interface *interface)
/* prevent more I/O from starting */
usb_poison_urb(dev->urb);
+ usb_poison_urb(dev->cntl_urb);
mutex_lock(&dev->io_mutex);
- dev->interface = NULL;
+ dev->disconnected = 1;
mutex_unlock(&dev->io_mutex);
/* wakeup waiters */
@@ -404,7 +408,7 @@ static ssize_t yurex_read(struct file *file, char __user *buffer, size_t count,
dev = file->private_data;
mutex_lock(&dev->io_mutex);
- if (!dev->interface) { /* already disconnected */
+ if (dev->disconnected) { /* already disconnected */
mutex_unlock(&dev->io_mutex);
return -ENODEV;
}
@@ -439,7 +443,7 @@ static ssize_t yurex_write(struct file *file, const char __user *user_buffer,
goto error;
mutex_lock(&dev->io_mutex);
- if (!dev->interface) { /* already disconnected */
+ if (dev->disconnected) { /* already disconnected */
mutex_unlock(&dev->io_mutex);
retval = -ENODEV;
goto error;
diff --git a/drivers/usb/mtu3/mtu3_core.c b/drivers/usb/mtu3/mtu3_core.c
index c3d5c1206eec..9dd02160cca9 100644
--- a/drivers/usb/mtu3/mtu3_core.c
+++ b/drivers/usb/mtu3/mtu3_core.c
@@ -16,6 +16,7 @@
#include <linux/platform_device.h>
#include "mtu3.h"
+#include "mtu3_dr.h"
#include "mtu3_debug.h"
#include "mtu3_trace.h"
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index 4c3de777ef6c..a3c30b609433 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -162,17 +162,17 @@ void usbhs_usbreq_get_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req)
req->bRequest = (val >> 8) & 0xFF;
req->bRequestType = (val >> 0) & 0xFF;
- req->wValue = usbhs_read(priv, USBVAL);
- req->wIndex = usbhs_read(priv, USBINDX);
- req->wLength = usbhs_read(priv, USBLENG);
+ req->wValue = cpu_to_le16(usbhs_read(priv, USBVAL));
+ req->wIndex = cpu_to_le16(usbhs_read(priv, USBINDX));
+ req->wLength = cpu_to_le16(usbhs_read(priv, USBLENG));
}
void usbhs_usbreq_set_val(struct usbhs_priv *priv, struct usb_ctrlrequest *req)
{
usbhs_write(priv, USBREQ, (req->bRequest << 8) | req->bRequestType);
- usbhs_write(priv, USBVAL, req->wValue);
- usbhs_write(priv, USBINDX, req->wIndex);
- usbhs_write(priv, USBLENG, req->wLength);
+ usbhs_write(priv, USBVAL, le16_to_cpu(req->wValue));
+ usbhs_write(priv, USBINDX, le16_to_cpu(req->wIndex));
+ usbhs_write(priv, USBLENG, le16_to_cpu(req->wLength));
usbhs_bset(priv, DCPCTR, SUREQ, SUREQ);
}
diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h
index d1a0a35ecfff..0824099b905e 100644
--- a/drivers/usb/renesas_usbhs/common.h
+++ b/drivers/usb/renesas_usbhs/common.h
@@ -211,6 +211,7 @@ struct usbhs_priv;
/* DCPCTR */
#define BSTS (1 << 15) /* Buffer Status */
#define SUREQ (1 << 14) /* Sending SETUP Token */
+#define INBUFM (1 << 14) /* (PIPEnCTR) Transfer Buffer Monitor */
#define CSSTS (1 << 12) /* CSSTS Status */
#define ACLRM (1 << 9) /* Buffer Auto-Clear Mode */
#define SQCLR (1 << 8) /* Toggle Bit Clear */
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index 2a01ceb71641..86637cd066cf 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -89,7 +89,7 @@ static void __usbhsf_pkt_del(struct usbhs_pkt *pkt)
list_del_init(&pkt->node);
}
-static struct usbhs_pkt *__usbhsf_pkt_get(struct usbhs_pipe *pipe)
+struct usbhs_pkt *__usbhsf_pkt_get(struct usbhs_pipe *pipe)
{
return list_first_entry_or_null(&pipe->list, struct usbhs_pkt, node);
}
diff --git a/drivers/usb/renesas_usbhs/fifo.h b/drivers/usb/renesas_usbhs/fifo.h
index 88d1816bcda2..c3d3cc35cee0 100644
--- a/drivers/usb/renesas_usbhs/fifo.h
+++ b/drivers/usb/renesas_usbhs/fifo.h
@@ -97,5 +97,6 @@ void usbhs_pkt_push(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt,
void *buf, int len, int zero, int sequence);
struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt);
void usbhs_pkt_start(struct usbhs_pipe *pipe);
+struct usbhs_pkt *__usbhsf_pkt_get(struct usbhs_pipe *pipe);
#endif /* RENESAS_USB_FIFO_H */
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index 4d571a5205e2..cd38d74b3223 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -265,7 +265,7 @@ static int usbhsg_recip_handler_std_set_device(struct usbhs_priv *priv,
case USB_DEVICE_TEST_MODE:
usbhsg_recip_handler_std_control_done(priv, uep, ctrl);
udelay(100);
- usbhs_sys_set_test_mode(priv, le16_to_cpu(ctrl->wIndex >> 8));
+ usbhs_sys_set_test_mode(priv, le16_to_cpu(ctrl->wIndex) >> 8);
break;
default:
usbhsg_recip_handler_std_control_done(priv, uep, ctrl);
@@ -315,7 +315,7 @@ static void __usbhsg_recip_send_status(struct usbhsg_gpriv *gpriv,
struct usbhs_pipe *pipe = usbhsg_uep_to_pipe(dcp);
struct device *dev = usbhsg_gpriv_to_dev(gpriv);
struct usb_request *req;
- unsigned short *buf;
+ __le16 *buf;
/* alloc new usb_request for recip */
req = usb_ep_alloc_request(&dcp->ep, GFP_ATOMIC);
@@ -722,8 +722,7 @@ static int __usbhsg_ep_set_halt_wedge(struct usb_ep *ep, int halt, int wedge)
struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv);
struct device *dev = usbhsg_gpriv_to_dev(gpriv);
unsigned long flags;
-
- usbhsg_pipe_disable(uep);
+ int ret = 0;
dev_dbg(dev, "set halt %d (pipe %d)\n",
halt, usbhs_pipe_number(pipe));
@@ -731,6 +730,18 @@ static int __usbhsg_ep_set_halt_wedge(struct usb_ep *ep, int halt, int wedge)
/******************** spin lock ********************/
usbhs_lock(priv, flags);
+ /*
+ * According to usb_ep_set_halt()'s description, this function should
+ * return -EAGAIN if the IN endpoint has any queue or data. Note
+ * that the usbhs_pipe_is_dir_in() returns false if the pipe is an
+ * IN endpoint in the gadget mode.
+ */
+ if (!usbhs_pipe_is_dir_in(pipe) && (__usbhsf_pkt_get(pipe) ||
+ usbhs_pipe_contains_transmittable_data(pipe))) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
if (halt)
usbhs_pipe_stall(pipe);
else
@@ -741,10 +752,11 @@ static int __usbhsg_ep_set_halt_wedge(struct usb_ep *ep, int halt, int wedge)
else
usbhsg_status_clr(gpriv, USBHSG_STATUS_WEDGE);
+out:
usbhs_unlock(priv, flags);
/******************** spin unlock ******************/
- return 0;
+ return ret;
}
static int usbhsg_ep_set_halt(struct usb_ep *ep, int value)
diff --git a/drivers/usb/renesas_usbhs/pipe.c b/drivers/usb/renesas_usbhs/pipe.c
index c4922b96c93b..9e5afdde1adb 100644
--- a/drivers/usb/renesas_usbhs/pipe.c
+++ b/drivers/usb/renesas_usbhs/pipe.c
@@ -277,6 +277,21 @@ int usbhs_pipe_is_accessible(struct usbhs_pipe *pipe)
return -EBUSY;
}
+bool usbhs_pipe_contains_transmittable_data(struct usbhs_pipe *pipe)
+{
+ u16 val;
+
+ /* Do not support for DCP pipe */
+ if (usbhs_pipe_is_dcp(pipe))
+ return false;
+
+ val = usbhsp_pipectrl_get(pipe);
+ if (val & INBUFM)
+ return true;
+
+ return false;
+}
+
/*
* PID ctrl
*/
diff --git a/drivers/usb/renesas_usbhs/pipe.h b/drivers/usb/renesas_usbhs/pipe.h
index 3080423e600c..3b130529408b 100644
--- a/drivers/usb/renesas_usbhs/pipe.h
+++ b/drivers/usb/renesas_usbhs/pipe.h
@@ -83,6 +83,7 @@ void usbhs_pipe_clear(struct usbhs_pipe *pipe);
void usbhs_pipe_clear_without_sequence(struct usbhs_pipe *pipe,
int needs_bfre, int bfre_enable);
int usbhs_pipe_is_accessible(struct usbhs_pipe *pipe);
+bool usbhs_pipe_contains_transmittable_data(struct usbhs_pipe *pipe);
void usbhs_pipe_enable(struct usbhs_pipe *pipe);
void usbhs_pipe_disable(struct usbhs_pipe *pipe);
void usbhs_pipe_stall(struct usbhs_pipe *pipe);
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index f0688c44b04c..25e81faf4c24 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -1030,6 +1030,9 @@ static const struct usb_device_id id_table_combined[] = {
/* EZPrototypes devices */
{ USB_DEVICE(EZPROTOTYPES_VID, HJELMSLUND_USB485_ISO_PID) },
{ USB_DEVICE_INTERFACE_NUMBER(UNJO_VID, UNJO_ISODEBUG_V1_PID, 1) },
+ /* Sienna devices */
+ { USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) },
+ { USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) },
{ } /* Terminating entry */
};
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index f12d806220b4..22d66217cb41 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -39,6 +39,9 @@
#define FTDI_LUMEL_PD12_PID 0x6002
+/* Sienna Serial Interface by Secyourit GmbH */
+#define FTDI_SIENNA_PID 0x8348
+
/* Cyber Cortex AV by Fabulous Silicon (http://fabuloussilicon.com) */
#define CYBER_CORTEX_AV_PID 0x8698
@@ -689,6 +692,12 @@
#define BANDB_ZZ_PROG1_USB_PID 0xBA02
/*
+ * Echelon USB Serial Interface
+ */
+#define ECHELON_VID 0x0920
+#define ECHELON_U20_PID 0x7500
+
+/*
* Intrepid Control Systems (http://www.intrepidcs.com/) ValueCAN and NeoVI
*/
#define INTREPID_VID 0x093C
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index d34779fe4a8d..e66a59ef43a1 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -1741,8 +1741,8 @@ static struct urb *keyspan_setup_urb(struct usb_serial *serial, int endpoint,
ep_desc = find_ep(serial, endpoint);
if (!ep_desc) {
- /* leak the urb, something's wrong and the callers don't care */
- return urb;
+ usb_free_urb(urb);
+ return NULL;
}
if (usb_endpoint_xfer_int(ep_desc)) {
ep_type_name = "INT";
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 38e920ac7f82..06ab016be0b6 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -419,6 +419,7 @@ static void option_instat_callback(struct urb *urb);
#define CINTERION_PRODUCT_PH8_AUDIO 0x0083
#define CINTERION_PRODUCT_AHXX_2RMNET 0x0084
#define CINTERION_PRODUCT_AHXX_AUDIO 0x0085
+#define CINTERION_PRODUCT_CLS8 0x00b0
/* Olivetti products */
#define OLIVETTI_VENDOR_ID 0x0b3c
@@ -1154,6 +1155,14 @@ static const struct usb_device_id option_ids[] = {
.driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff),
.driver_info = RSVD(0) | RSVD(1) | NCTRL(2) | RSVD(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1050, 0xff), /* Telit FN980 (rmnet) */
+ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1051, 0xff), /* Telit FN980 (MBIM) */
+ .driver_info = NCTRL(0) | RSVD(1) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1052, 0xff), /* Telit FN980 (RNDIS) */
+ .driver_info = NCTRL(2) | RSVD(3) },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1053, 0xff), /* Telit FN980 (ECM) */
+ .driver_info = NCTRL(0) | RSVD(1) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(3) },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM),
@@ -1847,6 +1856,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = RSVD(4) },
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_2RMNET, 0xff) },
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_AHXX_AUDIO, 0xff) },
+ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_CLS8, 0xff),
+ .driver_info = RSVD(0) | RSVD(4) },
{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDM) },
{ USB_DEVICE(CINTERION_VENDOR_ID, CINTERION_PRODUCT_HC28_MDMNET) },
{ USB_DEVICE(SIEMENS_VENDOR_ID, CINTERION_PRODUCT_HC25_MDM) },
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index dd0ad67aa71e..ef23acc9b9ce 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -776,7 +776,6 @@ static void ti_close(struct usb_serial_port *port)
struct ti_port *tport;
int port_number;
int status;
- int do_unlock;
unsigned long flags;
tdev = usb_get_serial_data(port->serial);
@@ -800,16 +799,13 @@ static void ti_close(struct usb_serial_port *port)
"%s - cannot send close port command, %d\n"
, __func__, status);
- /* if mutex_lock is interrupted, continue anyway */
- do_unlock = !mutex_lock_interruptible(&tdev->td_open_close_lock);
- --tport->tp_tdev->td_open_port_count;
- if (tport->tp_tdev->td_open_port_count <= 0) {
+ mutex_lock(&tdev->td_open_close_lock);
+ --tdev->td_open_port_count;
+ if (tdev->td_open_port_count == 0) {
/* last port is closed, shut down interrupt urb */
usb_kill_urb(port->serial->port[0]->interrupt_in_urb);
- tport->tp_tdev->td_open_port_count = 0;
}
- if (do_unlock)
- mutex_unlock(&tdev->td_open_close_lock);
+ mutex_unlock(&tdev->td_open_close_lock);
}
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index a3179fea38c8..8f066bb55d7d 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -314,10 +314,7 @@ static void serial_cleanup(struct tty_struct *tty)
serial = port->serial;
owner = serial->type->driver.owner;
- mutex_lock(&serial->disc_mutex);
- if (!serial->disconnected)
- usb_autopm_put_interface(serial->interface);
- mutex_unlock(&serial->disc_mutex);
+ usb_autopm_put_interface(serial->interface);
usb_serial_put(serial);
module_put(owner);
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 79314d8c94a4..ca3bd58f2025 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -559,6 +559,10 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command,
command_port = port->serial->port[COMMAND_PORT];
command_info = usb_get_serial_port_data(command_port);
+
+ if (command_port->bulk_out_size < datasize + 1)
+ return -EIO;
+
mutex_lock(&command_info->mutex);
command_info->command_finished = false;
@@ -632,6 +636,7 @@ static void firm_setup_port(struct tty_struct *tty)
struct device *dev = &port->dev;
struct whiteheat_port_settings port_settings;
unsigned int cflag = tty->termios.c_cflag;
+ speed_t baud;
port_settings.port = port->port_number + 1;
@@ -692,11 +697,13 @@ static void firm_setup_port(struct tty_struct *tty)
dev_dbg(dev, "%s - XON = %2x, XOFF = %2x\n", __func__, port_settings.xon, port_settings.xoff);
/* get the baud rate wanted */
- port_settings.baud = tty_get_baud_rate(tty);
- dev_dbg(dev, "%s - baud rate = %d\n", __func__, port_settings.baud);
+ baud = tty_get_baud_rate(tty);
+ port_settings.baud = cpu_to_le32(baud);
+ dev_dbg(dev, "%s - baud rate = %u\n", __func__, baud);
/* fixme: should set validated settings */
- tty_encode_baud_rate(tty, port_settings.baud, port_settings.baud);
+ tty_encode_baud_rate(tty, baud, baud);
+
/* handle any settings that aren't specified in the tty structure */
port_settings.lloop = 0;
diff --git a/drivers/usb/serial/whiteheat.h b/drivers/usb/serial/whiteheat.h
index 00398149cd8d..269e727a92f9 100644
--- a/drivers/usb/serial/whiteheat.h
+++ b/drivers/usb/serial/whiteheat.h
@@ -87,7 +87,7 @@ struct whiteheat_simple {
struct whiteheat_port_settings {
__u8 port; /* port number (1 to N) */
- __u32 baud; /* any value 7 - 460800, firmware calculates
+ __le32 baud; /* any value 7 - 460800, firmware calculates
best fit; arrives little endian */
__u8 bits; /* 5, 6, 7, or 8 */
__u8 stop; /* 1 or 2, default 1 (2 = 1.5 if bits = 5) */
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 6737fab94959..54a3c8195c96 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -68,7 +68,6 @@ static const char* host_info(struct Scsi_Host *host)
static int slave_alloc (struct scsi_device *sdev)
{
struct us_data *us = host_to_us(sdev->host);
- int maxp;
/*
* Set the INQUIRY transfer length to 36. We don't use any of
@@ -78,15 +77,6 @@ static int slave_alloc (struct scsi_device *sdev)
sdev->inquiry_len = 36;
/*
- * USB has unusual scatter-gather requirements: the length of each
- * scatterlist element except the last must be divisible by the
- * Bulk maxpacket value. Fortunately this value is always a
- * power of 2. Inform the block layer about this requirement.
- */
- maxp = usb_maxpacket(us->pusb_dev, us->recv_bulk_pipe, 0);
- blk_queue_virt_boundary(sdev->request_queue, maxp - 1);
-
- /*
* Some host controllers may have alignment requirements.
* We'll play it safe by requiring 512-byte alignment always.
*/
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index bf80d6f81f58..34538253f12c 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -789,30 +789,10 @@ static int uas_slave_alloc(struct scsi_device *sdev)
{
struct uas_dev_info *devinfo =
(struct uas_dev_info *)sdev->host->hostdata;
- int maxp;
sdev->hostdata = devinfo;
/*
- * We have two requirements here. We must satisfy the requirements
- * of the physical HC and the demands of the protocol, as we
- * definitely want no additional memory allocation in this path
- * ruling out using bounce buffers.
- *
- * For a transmission on USB to continue we must never send
- * a package that is smaller than maxpacket. Hence the length of each
- * scatterlist element except the last must be divisible by the
- * Bulk maxpacket value.
- * If the HC does not ensure that through SG,
- * the upper layer must do that. We must assume nothing
- * about the capabilities off the HC, so we use the most
- * pessimistic requirement.
- */
-
- maxp = usb_maxpacket(devinfo->udev, devinfo->data_in_pipe, 0);
- blk_queue_virt_boundary(sdev->request_queue, maxp - 1);
-
- /*
* The protocol has no requirements on alignment in the strict sense.
* Controllers may or may not have alignment restrictions.
* As this is not exported, we use an extremely conservative guess.
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 96562744101c..5f61d9977a15 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -4409,18 +4409,20 @@ static int tcpm_fw_get_caps(struct tcpm_port *port,
/* USB data support is optional */
ret = fwnode_property_read_string(fwnode, "data-role", &cap_str);
if (ret == 0) {
- port->typec_caps.data = typec_find_port_data_role(cap_str);
- if (port->typec_caps.data < 0)
- return -EINVAL;
+ ret = typec_find_port_data_role(cap_str);
+ if (ret < 0)
+ return ret;
+ port->typec_caps.data = ret;
}
ret = fwnode_property_read_string(fwnode, "power-role", &cap_str);
if (ret < 0)
return ret;
- port->typec_caps.type = typec_find_port_power_role(cap_str);
- if (port->typec_caps.type < 0)
- return -EINVAL;
+ ret = typec_find_port_power_role(cap_str);
+ if (ret < 0)
+ return ret;
+ port->typec_caps.type = ret;
port->port_type = port->typec_caps.type;
if (port->port_type == TYPEC_PORT_SNK)
diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c
index 6c103697c582..d99700cb4dca 100644
--- a/drivers/usb/typec/ucsi/displayport.c
+++ b/drivers/usb/typec/ucsi/displayport.c
@@ -75,6 +75,8 @@ static int ucsi_displayport_enter(struct typec_altmode *alt)
if (cur != 0xff) {
mutex_unlock(&dp->con->lock);
+ if (dp->con->port_altmode[cur] == alt)
+ return 0;
return -EBUSY;
}
diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
index 907e20e1a71e..d772fce51905 100644
--- a/drivers/usb/typec/ucsi/ucsi_ccg.c
+++ b/drivers/usb/typec/ucsi/ucsi_ccg.c
@@ -195,7 +195,6 @@ struct ucsi_ccg {
/* fw build with vendor information */
u16 fw_build;
- bool run_isr; /* flag to call ISR routine during resume */
struct work_struct pm_work;
};
@@ -224,18 +223,6 @@ static int ccg_read(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len)
if (quirks && quirks->max_read_len)
max_read_len = quirks->max_read_len;
- if (uc->fw_build == CCG_FW_BUILD_NVIDIA &&
- uc->fw_version <= CCG_OLD_FW_VERSION) {
- mutex_lock(&uc->lock);
- /*
- * Do not schedule pm_work to run ISR in
- * ucsi_ccg_runtime_resume() after pm_runtime_get_sync()
- * since we are already in ISR path.
- */
- uc->run_isr = false;
- mutex_unlock(&uc->lock);
- }
-
pm_runtime_get_sync(uc->dev);
while (rem_len > 0) {
msgs[1].buf = &data[len - rem_len];
@@ -278,18 +265,6 @@ static int ccg_write(struct ucsi_ccg *uc, u16 rab, u8 *data, u32 len)
msgs[0].len = len + sizeof(rab);
msgs[0].buf = buf;
- if (uc->fw_build == CCG_FW_BUILD_NVIDIA &&
- uc->fw_version <= CCG_OLD_FW_VERSION) {
- mutex_lock(&uc->lock);
- /*
- * Do not schedule pm_work to run ISR in
- * ucsi_ccg_runtime_resume() after pm_runtime_get_sync()
- * since we are already in ISR path.
- */
- uc->run_isr = false;
- mutex_unlock(&uc->lock);
- }
-
pm_runtime_get_sync(uc->dev);
status = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
if (status < 0) {
@@ -1130,7 +1105,6 @@ static int ucsi_ccg_probe(struct i2c_client *client,
uc->ppm.sync = ucsi_ccg_sync;
uc->dev = dev;
uc->client = client;
- uc->run_isr = true;
mutex_init(&uc->lock);
INIT_WORK(&uc->work, ccg_update_firmware);
INIT_WORK(&uc->pm_work, ccg_pm_workaround_work);
@@ -1188,6 +1162,8 @@ static int ucsi_ccg_probe(struct i2c_client *client,
pm_runtime_set_active(uc->dev);
pm_runtime_enable(uc->dev);
+ pm_runtime_use_autosuspend(uc->dev);
+ pm_runtime_set_autosuspend_delay(uc->dev, 5000);
pm_runtime_idle(uc->dev);
return 0;
@@ -1229,7 +1205,6 @@ static int ucsi_ccg_runtime_resume(struct device *dev)
{
struct i2c_client *client = to_i2c_client(dev);
struct ucsi_ccg *uc = i2c_get_clientdata(client);
- bool schedule = true;
/*
* Firmware version 3.1.10 or earlier, built for NVIDIA has known issue
@@ -1237,17 +1212,8 @@ static int ucsi_ccg_runtime_resume(struct device *dev)
* Schedule a work to call ISR as a workaround.
*/
if (uc->fw_build == CCG_FW_BUILD_NVIDIA &&
- uc->fw_version <= CCG_OLD_FW_VERSION) {
- mutex_lock(&uc->lock);
- if (!uc->run_isr) {
- uc->run_isr = true;
- schedule = false;
- }
- mutex_unlock(&uc->lock);
-
- if (schedule)
- schedule_work(&uc->pm_work);
- }
+ uc->fw_version <= CCG_OLD_FW_VERSION)
+ schedule_work(&uc->pm_work);
return 0;
}
diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c
index c31d17d05810..2dc58766273a 100644
--- a/drivers/usb/usb-skeleton.c
+++ b/drivers/usb/usb-skeleton.c
@@ -61,6 +61,7 @@ struct usb_skel {
spinlock_t err_lock; /* lock for errors */
struct kref kref;
struct mutex io_mutex; /* synchronize I/O with disconnect */
+ unsigned long disconnected:1;
wait_queue_head_t bulk_in_wait; /* to wait for an ongoing read */
};
#define to_skel_dev(d) container_of(d, struct usb_skel, kref)
@@ -73,6 +74,7 @@ static void skel_delete(struct kref *kref)
struct usb_skel *dev = to_skel_dev(kref);
usb_free_urb(dev->bulk_in_urb);
+ usb_put_intf(dev->interface);
usb_put_dev(dev->udev);
kfree(dev->bulk_in_buffer);
kfree(dev);
@@ -124,10 +126,7 @@ static int skel_release(struct inode *inode, struct file *file)
return -ENODEV;
/* allow the device to be autosuspended */
- mutex_lock(&dev->io_mutex);
- if (dev->interface)
- usb_autopm_put_interface(dev->interface);
- mutex_unlock(&dev->io_mutex);
+ usb_autopm_put_interface(dev->interface);
/* decrement the count on our device */
kref_put(&dev->kref, skel_delete);
@@ -231,8 +230,7 @@ static ssize_t skel_read(struct file *file, char *buffer, size_t count,
dev = file->private_data;
- /* if we cannot read at all, return EOF */
- if (!dev->bulk_in_urb || !count)
+ if (!count)
return 0;
/* no concurrent readers */
@@ -240,7 +238,7 @@ static ssize_t skel_read(struct file *file, char *buffer, size_t count,
if (rv < 0)
return rv;
- if (!dev->interface) { /* disconnect() was called */
+ if (dev->disconnected) { /* disconnect() was called */
rv = -ENODEV;
goto exit;
}
@@ -422,7 +420,7 @@ static ssize_t skel_write(struct file *file, const char *user_buffer,
/* this lock makes sure we don't submit URBs to gone devices */
mutex_lock(&dev->io_mutex);
- if (!dev->interface) { /* disconnect() was called */
+ if (dev->disconnected) { /* disconnect() was called */
mutex_unlock(&dev->io_mutex);
retval = -ENODEV;
goto error;
@@ -507,7 +505,7 @@ static int skel_probe(struct usb_interface *interface,
init_waitqueue_head(&dev->bulk_in_wait);
dev->udev = usb_get_dev(interface_to_usbdev(interface));
- dev->interface = interface;
+ dev->interface = usb_get_intf(interface);
/* set up the endpoint information */
/* use only the first bulk-in and bulk-out endpoints */
@@ -573,9 +571,10 @@ static void skel_disconnect(struct usb_interface *interface)
/* prevent more I/O from starting */
mutex_lock(&dev->io_mutex);
- dev->interface = NULL;
+ dev->disconnected = 1;
mutex_unlock(&dev->io_mutex);
+ usb_kill_urb(dev->bulk_in_urb);
usb_kill_anchored_urbs(&dev->submitted);
/* decrement our usage count */
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index 585a84d319bd..65850e9c7190 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -1195,12 +1195,12 @@ static int vhci_start(struct usb_hcd *hcd)
if (id == 0 && usb_hcd_is_primary_hcd(hcd)) {
err = vhci_init_attr_group();
if (err) {
- pr_err("init attr group\n");
+ dev_err(hcd_dev(hcd), "init attr group failed, err = %d\n", err);
return err;
}
err = sysfs_create_group(&hcd_dev(hcd)->kobj, &vhci_attr_group);
if (err) {
- pr_err("create sysfs files\n");
+ dev_err(hcd_dev(hcd), "create sysfs files failed, err = %d\n", err);
vhci_finish_attr_group();
return err;
}
diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c
index c3803785f6ef..0ae40a13a9fe 100644
--- a/drivers/usb/usbip/vhci_tx.c
+++ b/drivers/usb/usbip/vhci_tx.c
@@ -147,7 +147,10 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev)
}
kfree(iov);
+ /* This is only for isochronous case */
kfree(iso_buffer);
+ iso_buffer = NULL;
+
usbip_dbg_vhci_tx("send txdata\n");
total_size += txsize;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 96fddc1dafc3..d864277ea16f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1658,7 +1658,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
struct bus_type *bus = NULL;
int ret;
bool resv_msi, msi_remap;
- phys_addr_t resv_msi_base;
+ phys_addr_t resv_msi_base = 0;
struct iommu_domain_geometry geo;
LIST_HEAD(iova_copy);
LIST_HEAD(group_resv_regions);
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 7804869c6a31..056308008288 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -161,6 +161,7 @@ static int vhost_test_release(struct inode *inode, struct file *f)
vhost_test_stop(n, &private);
vhost_test_flush(n);
+ vhost_dev_stop(&n->dev);
vhost_dev_cleanup(&n->dev);
/* We do an extra flush before freeing memory,
* since jobs can re-queue themselves. */
@@ -237,6 +238,7 @@ static long vhost_test_reset_owner(struct vhost_test *n)
}
vhost_test_stop(n, &priv);
vhost_test_flush(n);
+ vhost_dev_stop(&n->dev);
vhost_dev_reset_owner(&n->dev, umem);
done:
mutex_unlock(&n->dev.mutex);
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 08ad0d1f0476..a0a2d74967ef 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -852,6 +852,12 @@ static inline int xfer_kern(void *src, void *dst, size_t len)
return 0;
}
+static inline int kern_xfer(void *dst, void *src, size_t len)
+{
+ memcpy(dst, src, len);
+ return 0;
+}
+
/**
* vringh_init_kern - initialize a vringh for a kernelspace vring.
* @vrh: the vringh to initialize.
@@ -958,7 +964,7 @@ EXPORT_SYMBOL(vringh_iov_pull_kern);
ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
const void *src, size_t len)
{
- return vringh_iov_xfer(wiov, (void *)src, len, xfer_kern);
+ return vringh_iov_xfer(wiov, (void *)src, len, kern_xfer);
}
EXPORT_SYMBOL(vringh_iov_push_kern);
diff --git a/drivers/video/fbdev/c2p_core.h b/drivers/video/fbdev/c2p_core.h
index e1035a865fb9..45a6d895a7d7 100644
--- a/drivers/video/fbdev/c2p_core.h
+++ b/drivers/video/fbdev/c2p_core.h
@@ -29,7 +29,7 @@ static inline void _transp(u32 d[], unsigned int i1, unsigned int i2,
extern void c2p_unsupported(void);
-static inline u32 get_mask(unsigned int n)
+static __always_inline u32 get_mask(unsigned int n)
{
switch (n) {
case 1:
@@ -57,7 +57,7 @@ static inline u32 get_mask(unsigned int n)
* Transpose operations on 8 32-bit words
*/
-static inline void transp8(u32 d[], unsigned int n, unsigned int m)
+static __always_inline void transp8(u32 d[], unsigned int n, unsigned int m)
{
u32 mask = get_mask(n);
@@ -99,7 +99,7 @@ static inline void transp8(u32 d[], unsigned int n, unsigned int m)
* Transpose operations on 4 32-bit words
*/
-static inline void transp4(u32 d[], unsigned int n, unsigned int m)
+static __always_inline void transp4(u32 d[], unsigned int n, unsigned int m)
{
u32 mask = get_mask(n);
@@ -126,7 +126,7 @@ static inline void transp4(u32 d[], unsigned int n, unsigned int m)
* Transpose operations on 4 32-bit words (reverse order)
*/
-static inline void transp4x(u32 d[], unsigned int n, unsigned int m)
+static __always_inline void transp4x(u32 d[], unsigned int n, unsigned int m)
{
u32 mask = get_mask(n);
diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c
index 75fd140b02ff..43c391626a00 100644
--- a/drivers/virt/vboxguest/vboxguest_utils.c
+++ b/drivers/virt/vboxguest/vboxguest_utils.c
@@ -220,6 +220,8 @@ static int hgcm_call_preprocess_linaddr(
if (!bounce_buf)
return -ENOMEM;
+ *bounce_buf_ret = bounce_buf;
+
if (copy_in) {
ret = copy_from_user(bounce_buf, (void __user *)buf, len);
if (ret)
@@ -228,7 +230,6 @@ static int hgcm_call_preprocess_linaddr(
memset(bounce_buf, 0, len);
}
- *bounce_buf_ret = bounce_buf;
hgcm_call_add_pagelist_size(bounce_buf, len, extra);
return 0;
}
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 226fbb995fb0..e05679c478e2 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -772,6 +772,13 @@ static unsigned long shrink_free_pages(struct virtio_balloon *vb,
return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
}
+static unsigned long leak_balloon_pages(struct virtio_balloon *vb,
+ unsigned long pages_to_free)
+{
+ return leak_balloon(vb, pages_to_free * VIRTIO_BALLOON_PAGES_PER_PAGE) /
+ VIRTIO_BALLOON_PAGES_PER_PAGE;
+}
+
static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
unsigned long pages_to_free)
{
@@ -782,11 +789,10 @@ static unsigned long shrink_balloon_pages(struct virtio_balloon *vb,
* VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it
* multiple times to deflate pages till reaching pages_to_free.
*/
- while (vb->num_pages && pages_to_free) {
- pages_freed += leak_balloon(vb, pages_to_free) /
- VIRTIO_BALLOON_PAGES_PER_PAGE;
- pages_to_free -= pages_freed;
- }
+ while (vb->num_pages && pages_freed < pages_to_free)
+ pages_freed += leak_balloon_pages(vb,
+ pages_to_free - pages_freed);
+
update_balloon_size(vb);
return pages_freed;
@@ -799,7 +805,7 @@ static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker,
struct virtio_balloon *vb = container_of(shrinker,
struct virtio_balloon, shrinker);
- pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE;
+ pages_to_free = sc->nr_to_scan;
if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_FREE_PAGE_HINT))
pages_freed = shrink_free_pages(vb, pages_to_free);
@@ -820,7 +826,7 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
unsigned long count;
count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
- count += vb->num_free_page_blocks >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+ count += vb->num_free_page_blocks << VIRTIO_BALLOON_FREE_PAGE_ORDER;
return count;
}
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index bdc08244a648..867c7ebd3f10 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -583,7 +583,7 @@ unmap_release:
kfree(desc);
END_USE(vq);
- return -EIO;
+ return -ENOMEM;
}
static bool virtqueue_kick_prepare_split(struct virtqueue *_vq)
@@ -1085,7 +1085,7 @@ unmap_release:
kfree(desc);
END_USE(vq);
- return -EIO;
+ return -ENOMEM;
}
static inline int virtqueue_add_packed(struct virtqueue *_vq,
@@ -1499,9 +1499,6 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
* counter first before updating event flags.
*/
virtio_wmb(vq->weak_barriers);
- } else {
- used_idx = vq->last_used_idx;
- wrap_counter = vq->packed.used_wrap_counter;
}
if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DISABLE) {
@@ -1518,7 +1515,9 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
*/
virtio_mb(vq->weak_barriers);
- if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
+ if (is_used_desc_packed(vq,
+ vq->last_used_idx,
+ vq->packed.used_wrap_counter)) {
END_USE(vq);
return false;
}
diff --git a/drivers/w1/slaves/Kconfig b/drivers/w1/slaves/Kconfig
index ebed495b9e69..b7847636501d 100644
--- a/drivers/w1/slaves/Kconfig
+++ b/drivers/w1/slaves/Kconfig
@@ -103,6 +103,7 @@ config W1_SLAVE_DS2438
config W1_SLAVE_DS250X
tristate "512b/1kb/16kb EPROM family support"
+ select CRC16
help
Say Y here if you want to use a 1-wire
512b/1kb/16kb EPROM family device (DS250x).
diff --git a/drivers/watchdog/bd70528_wdt.c b/drivers/watchdog/bd70528_wdt.c
index b0152fef4fc7..bc60e036627a 100644
--- a/drivers/watchdog/bd70528_wdt.c
+++ b/drivers/watchdog/bd70528_wdt.c
@@ -288,3 +288,4 @@ module_platform_driver(bd70528_wdt);
MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
MODULE_DESCRIPTION("BD70528 watchdog driver");
MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:bd70528-wdt");
diff --git a/drivers/watchdog/cpwd.c b/drivers/watchdog/cpwd.c
index 9393be584e72..808eeb4779e4 100644
--- a/drivers/watchdog/cpwd.c
+++ b/drivers/watchdog/cpwd.c
@@ -26,6 +26,7 @@
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/timer.h>
+#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/io.h>
@@ -473,6 +474,11 @@ static long cpwd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return 0;
}
+static long cpwd_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return cpwd_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+
static ssize_t cpwd_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -497,7 +503,7 @@ static ssize_t cpwd_read(struct file *file, char __user *buffer,
static const struct file_operations cpwd_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = cpwd_ioctl,
- .compat_ioctl = compat_ptr_ioctl,
+ .compat_ioctl = cpwd_compat_ioctl,
.open = cpwd_open,
.write = cpwd_write,
.read = cpwd_read,
diff --git a/drivers/watchdog/imx_sc_wdt.c b/drivers/watchdog/imx_sc_wdt.c
index 7ea5cf54e94a..8ed89f032ebf 100644
--- a/drivers/watchdog/imx_sc_wdt.c
+++ b/drivers/watchdog/imx_sc_wdt.c
@@ -99,8 +99,14 @@ static int imx_sc_wdt_set_pretimeout(struct watchdog_device *wdog,
{
struct arm_smccc_res res;
+ /*
+ * SCU firmware calculates pretimeout based on current time
+ * stamp instead of watchdog timeout stamp, need to convert
+ * the pretimeout to SCU firmware's timeout value.
+ */
arm_smccc_smc(IMX_SIP_TIMER, IMX_SIP_TIMER_SET_PRETIME_WDOG,
- pretimeout * 1000, 0, 0, 0, 0, 0, &res);
+ (wdog->timeout - pretimeout) * 1000, 0, 0, 0,
+ 0, 0, &res);
if (res.a0)
return -EACCES;
diff --git a/drivers/watchdog/meson_gxbb_wdt.c b/drivers/watchdog/meson_gxbb_wdt.c
index d17c1a6ed723..5a9ca10fbcfa 100644
--- a/drivers/watchdog/meson_gxbb_wdt.c
+++ b/drivers/watchdog/meson_gxbb_wdt.c
@@ -89,8 +89,8 @@ static unsigned int meson_gxbb_wdt_get_timeleft(struct watchdog_device *wdt_dev)
reg = readl(data->reg_base + GXBB_WDT_TCNT_REG);
- return ((reg >> GXBB_WDT_TCNT_CNT_SHIFT) -
- (reg & GXBB_WDT_TCNT_SETUP_MASK)) / 1000;
+ return ((reg & GXBB_WDT_TCNT_SETUP_MASK) -
+ (reg >> GXBB_WDT_TCNT_CNT_SHIFT)) / 1000;
}
static const struct watchdog_ops meson_gxbb_wdt_ops = {
diff --git a/drivers/watchdog/pm8916_wdt.c b/drivers/watchdog/pm8916_wdt.c
index 2d3652004e39..1213179f863c 100644
--- a/drivers/watchdog/pm8916_wdt.c
+++ b/drivers/watchdog/pm8916_wdt.c
@@ -163,9 +163,17 @@ static int pm8916_wdt_probe(struct platform_device *pdev)
irq = platform_get_irq(pdev, 0);
if (irq > 0) {
- if (devm_request_irq(dev, irq, pm8916_wdt_isr, 0, "pm8916_wdt",
- wdt))
- irq = 0;
+ err = devm_request_irq(dev, irq, pm8916_wdt_isr, 0,
+ "pm8916_wdt", wdt);
+ if (err)
+ return err;
+
+ wdt->wdev.info = &pm8916_wdt_pt_ident;
+ } else {
+ if (irq == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+
+ wdt->wdev.info = &pm8916_wdt_ident;
}
/* Configure watchdog to hard-reset mode */
@@ -177,7 +185,6 @@ static int pm8916_wdt_probe(struct platform_device *pdev)
return err;
}
- wdt->wdev.info = (irq > 0) ? &pm8916_wdt_pt_ident : &pm8916_wdt_ident,
wdt->wdev.ops = &pm8916_wdt_ops,
wdt->wdev.parent = dev;
wdt->wdev.min_timeout = PM8916_WDT_MIN_TIMEOUT;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index a446a7221e13..81401f386c9c 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -22,6 +22,7 @@
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+#include <linux/dma-mapping.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -34,9 +35,6 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/refcount.h>
-#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
-#include <linux/of_device.h>
-#endif
#include <xen/xen.h>
#include <xen/grant_table.h>
@@ -625,14 +623,7 @@ static int gntdev_open(struct inode *inode, struct file *flip)
flip->private_data = priv;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
priv->dma_dev = gntdev_miscdev.this_device;
-
- /*
- * The device is not spawn from a device tree, so arch_setup_dma_ops
- * is not called, thus leaving the device with dummy DMA ops.
- * Fix this by calling of_dma_configure() with a NULL node to set
- * default DMA ops.
- */
- of_dma_configure(priv->dma_dev, NULL, true);
+ dma_coerce_mask_and_coherent(priv->dma_dev, DMA_BIT_MASK(64));
#endif
pr_debug("priv %p\n", priv);
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 7ea6fb6a2e5d..49b381e104ef 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -1363,8 +1363,7 @@ static int gnttab_setup(void)
if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
if (gnttab_shared.addr == NULL) {
- pr_warn("gnttab share frames (addr=0x%08lx) is not mapped!\n",
- (unsigned long)xen_auto_xlat_grant_frames.vaddr);
+ pr_warn("gnttab share frames is not mapped!\n");
return -ENOMEM;
}
}
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index 69a626b0e594..c57c71b7d53d 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -775,7 +775,7 @@ static int pvcalls_back_poll(struct xenbus_device *dev,
mappass->reqcopy = *req;
icsk = inet_csk(mappass->sock->sk);
queue = &icsk->icsk_accept_queue;
- data = queue->rskq_accept_head != NULL;
+ data = READ_ONCE(queue->rskq_accept_head) != NULL;
if (data) {
mappass->reqcopy.cmd = 0;
ret = 0;
diff --git a/fs/Kconfig b/fs/Kconfig
index 2501e6f1f965..7b623e9fc1b0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -322,4 +322,7 @@ source "fs/nls/Kconfig"
source "fs/dlm/Kconfig"
source "fs/unicode/Kconfig"
+config IO_WQ
+ bool
+
endmenu
diff --git a/fs/Makefile b/fs/Makefile
index 14231b4cf383..1148c555c4d3 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_AIO) += aio.o
obj-$(CONFIG_IO_URING) += io_uring.o
+obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FS_DAX) += dax.o
obj-$(CONFIG_FS_ENCRYPTION) += crypto/
obj-$(CONFIG_FS_VERITY) += verity/
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index a92eb6ae2ae2..a755bef7c4c7 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -43,8 +43,8 @@ struct affs_ext_key {
*/
struct affs_inode_info {
atomic_t i_opencnt;
- struct semaphore i_link_lock; /* Protects internal inode access. */
- struct semaphore i_ext_lock; /* Protects internal inode access. */
+ struct mutex i_link_lock; /* Protects internal inode access. */
+ struct mutex i_ext_lock; /* Protects internal inode access. */
#define i_hash_lock i_ext_lock
u32 i_blkcnt; /* block count */
u32 i_extcnt; /* extended block count */
@@ -293,30 +293,30 @@ affs_adjust_bitmapchecksum(struct buffer_head *bh, u32 val)
static inline void
affs_lock_link(struct inode *inode)
{
- down(&AFFS_I(inode)->i_link_lock);
+ mutex_lock(&AFFS_I(inode)->i_link_lock);
}
static inline void
affs_unlock_link(struct inode *inode)
{
- up(&AFFS_I(inode)->i_link_lock);
+ mutex_unlock(&AFFS_I(inode)->i_link_lock);
}
static inline void
affs_lock_dir(struct inode *inode)
{
- down(&AFFS_I(inode)->i_hash_lock);
+ mutex_lock_nested(&AFFS_I(inode)->i_hash_lock, SINGLE_DEPTH_NESTING);
}
static inline void
affs_unlock_dir(struct inode *inode)
{
- up(&AFFS_I(inode)->i_hash_lock);
+ mutex_unlock(&AFFS_I(inode)->i_hash_lock);
}
static inline void
affs_lock_ext(struct inode *inode)
{
- down(&AFFS_I(inode)->i_ext_lock);
+ mutex_lock(&AFFS_I(inode)->i_ext_lock);
}
static inline void
affs_unlock_ext(struct inode *inode)
{
- up(&AFFS_I(inode)->i_ext_lock);
+ mutex_unlock(&AFFS_I(inode)->i_ext_lock);
}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index cc463ae47c12..47107c6712a6 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -121,8 +121,8 @@ static void init_once(void *foo)
{
struct affs_inode_info *ei = (struct affs_inode_info *) foo;
- sema_init(&ei->i_link_lock, 1);
- sema_init(&ei->i_ext_lock, 1);
+ mutex_init(&ei->i_link_lock);
+ mutex_init(&ei->i_ext_lock);
inode_init_once(&ei->vfs_inode);
}
@@ -561,14 +561,9 @@ affs_remount(struct super_block *sb, int *flags, char *data)
int root_block;
unsigned long mount_flags;
int res = 0;
- char *new_opts;
char volume[32];
char *prefix = NULL;
- new_opts = kstrdup(data, GFP_KERNEL);
- if (data && !new_opts)
- return -ENOMEM;
-
pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
sync_filesystem(sb);
@@ -579,7 +574,6 @@ affs_remount(struct super_block *sb, int *flags, char *data)
&blocksize, &prefix, volume,
&mount_flags)) {
kfree(prefix);
- kfree(new_opts);
return -EINVAL;
}
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 6cdd7047c809..2dca8df1a18d 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -312,7 +312,6 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
_enter("%p,%zu,", server, count);
ASSERT(server != NULL);
- ASSERTCMP(count, <=, AFSCBMAX);
/* TODO: Sort the callback break list by volume ID */
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index cc12772d0a4d..497f979018c2 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -803,7 +803,12 @@ success:
continue;
if (cookie->inodes[i]) {
- afs_vnode_commit_status(&fc, AFS_FS_I(cookie->inodes[i]),
+ struct afs_vnode *iv = AFS_FS_I(cookie->inodes[i]);
+
+ if (test_bit(AFS_VNODE_UNSET, &iv->flags))
+ continue;
+
+ afs_vnode_commit_status(&fc, iv,
scb->cb_break, NULL, scb);
continue;
}
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 0e5269374ac1..61498d9f06ef 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -637,6 +637,7 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
call->need_attention = false;
__set_current_state(TASK_RUNNING);
afs_deliver_to_call(call);
+ timeout = rtt2;
continue;
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index f18911e8d770..488641b1a418 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -435,6 +435,7 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
/* fill in the superblock */
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_magic = AFS_FS_MAGIC;
sb->s_op = &afs_super_ops;
if (!as->dyn_root)
diff --git a/fs/aio.c b/fs/aio.c
index 01e0fb9ae45a..0d9a559d488c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2179,7 +2179,7 @@ SYSCALL_DEFINE5(io_getevents_time32, __u32, ctx_id,
#ifdef CONFIG_COMPAT
struct __compat_aio_sigset {
- compat_sigset_t __user *sigmask;
+ compat_uptr_t sigmask;
compat_size_t sigsetsize;
};
@@ -2193,7 +2193,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
struct old_timespec32 __user *, timeout,
const struct __compat_aio_sigset __user *, usig)
{
- struct __compat_aio_sigset ksig = { NULL, };
+ struct __compat_aio_sigset ksig = { 0, };
struct timespec64 t;
bool interrupted;
int ret;
@@ -2204,7 +2204,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize);
+ ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
if (ret)
return ret;
@@ -2228,7 +2228,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
struct __kernel_timespec __user *, timeout,
const struct __compat_aio_sigset __user *, usig)
{
- struct __compat_aio_sigset ksig = { NULL, };
+ struct __compat_aio_sigset ksig = { 0, };
struct timespec64 t;
bool interrupted;
int ret;
@@ -2239,7 +2239,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents_time64,
if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
return -EFAULT;
- ret = set_compat_user_sigmask(ksig.sigmask, ksig.sigsetsize);
+ ret = set_compat_user_sigmask(compat_ptr(ksig.sigmask), ksig.sigsetsize);
if (ret)
return ret;
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index 2866fabf497f..91f5787dae7c 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -459,9 +459,10 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb,
*/
how &= ~AUTOFS_EXP_LEAVES;
found = should_expire(expired, mnt, timeout, how);
- if (!found || found != expired)
- /* Something has changed, continue */
+ if (found != expired) { // something has changed, continue
+ dput(found);
goto next;
+ }
if (expired != dentry)
dput(dentry);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9c073dbdc1b0..ee63c2732fa2 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1403,11 +1403,7 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
"resized disk %s\n",
bdev->bd_disk ? bdev->bd_disk->disk_name : "");
}
-
- if (!bdev->bd_disk)
- return;
- if (disk_part_scan_enabled(bdev->bd_disk))
- bdev->bd_invalidated = 1;
+ bdev->bd_invalidated = 1;
}
/**
@@ -1420,8 +1416,8 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
* and adjusts it if it differs. When shrinking the bdev size, its all caches
* are freed.
*/
-void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
- bool verbose)
+static void check_disk_size_change(struct gendisk *disk,
+ struct block_device *bdev, bool verbose)
{
loff_t disk_size, bdev_size;
@@ -1437,6 +1433,7 @@ void check_disk_size_change(struct gendisk *disk, struct block_device *bdev,
if (bdev_size > disk_size)
flush_disk(bdev, false);
}
+ bdev->bd_invalidated = 0;
}
/**
@@ -1466,7 +1463,6 @@ int revalidate_disk(struct gendisk *disk)
mutex_lock(&bdev->bd_mutex);
check_disk_size_change(disk, bdev, ret == 0);
- bdev->bd_invalidated = 0;
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
}
@@ -1512,6 +1508,45 @@ EXPORT_SYMBOL(bd_set_size);
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
+int bdev_disk_changed(struct block_device *bdev, bool invalidate)
+{
+ struct gendisk *disk = bdev->bd_disk;
+ int ret;
+
+ lockdep_assert_held(&bdev->bd_mutex);
+
+rescan:
+ ret = blk_drop_partitions(disk, bdev);
+ if (ret)
+ return ret;
+
+ if (invalidate)
+ set_capacity(disk, 0);
+ else if (disk->fops->revalidate_disk)
+ disk->fops->revalidate_disk(disk);
+
+ check_disk_size_change(disk, bdev, !invalidate);
+
+ if (get_capacity(disk)) {
+ ret = blk_add_partitions(disk, bdev);
+ if (ret == -EAGAIN)
+ goto rescan;
+ } else {
+ /*
+ * Tell userspace that the media / partition table may have
+ * changed.
+ */
+ kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+ }
+
+ return ret;
+}
+/*
+ * Only exported for for loop and dasd for historic reasons. Don't use in new
+ * code!
+ */
+EXPORT_SYMBOL_GPL(bdev_disk_changed);
+
/*
* bd_mutex locking:
*
@@ -1594,12 +1629,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
* The latter is necessary to prevent ghost
* partitions on a removed medium.
*/
- if (bdev->bd_invalidated) {
- if (!ret)
- rescan_partitions(disk, bdev);
- else if (ret == -ENOMEDIUM)
- invalidate_partitions(disk, bdev);
- }
+ if (bdev->bd_invalidated &&
+ (!ret || ret == -ENOMEDIUM))
+ bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
goto out_clear;
@@ -1632,12 +1664,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (bdev->bd_disk->fops->open)
ret = bdev->bd_disk->fops->open(bdev, mode);
/* the same as first opener case, read comment there */
- if (bdev->bd_invalidated) {
- if (!ret)
- rescan_partitions(bdev->bd_disk, bdev);
- else if (ret == -ENOMEDIUM)
- invalidate_partitions(bdev->bd_disk, bdev);
- }
+ if (bdev->bd_invalidated &&
+ (!ret || ret == -ENOMEDIUM))
+ bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
goto out_unlock_bdev;
}
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 38651fae7f21..75b6d10c9845 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -5,6 +5,8 @@ config BTRFS_FS
select CRYPTO
select CRYPTO_CRC32C
select LIBCRC32C
+ select CRYPTO_XXHASH
+ select CRYPTO_SHA256
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 2e9e13ffbd08..1d32a07bb2d1 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -53,24 +53,12 @@ struct btrfs_workqueue {
struct __btrfs_workqueue *high;
};
-static void normal_work_helper(struct btrfs_work *work);
-
-#define BTRFS_WORK_HELPER(name) \
-noinline_for_stack void btrfs_##name(struct work_struct *arg) \
-{ \
- struct btrfs_work *work = container_of(arg, struct btrfs_work, \
- normal_work); \
- normal_work_helper(work); \
-}
-
-struct btrfs_fs_info *
-btrfs_workqueue_owner(const struct __btrfs_workqueue *wq)
+struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq)
{
return wq->fs_info;
}
-struct btrfs_fs_info *
-btrfs_work_owner(const struct btrfs_work *work)
+struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work)
{
return work->wq->fs_info;
}
@@ -89,29 +77,6 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
}
-BTRFS_WORK_HELPER(worker_helper);
-BTRFS_WORK_HELPER(delalloc_helper);
-BTRFS_WORK_HELPER(flush_delalloc_helper);
-BTRFS_WORK_HELPER(cache_helper);
-BTRFS_WORK_HELPER(submit_helper);
-BTRFS_WORK_HELPER(fixup_helper);
-BTRFS_WORK_HELPER(endio_helper);
-BTRFS_WORK_HELPER(endio_meta_helper);
-BTRFS_WORK_HELPER(endio_meta_write_helper);
-BTRFS_WORK_HELPER(endio_raid56_helper);
-BTRFS_WORK_HELPER(endio_repair_helper);
-BTRFS_WORK_HELPER(rmw_helper);
-BTRFS_WORK_HELPER(endio_write_helper);
-BTRFS_WORK_HELPER(freespace_write_helper);
-BTRFS_WORK_HELPER(delayed_meta_helper);
-BTRFS_WORK_HELPER(readahead_helper);
-BTRFS_WORK_HELPER(qgroup_rescan_helper);
-BTRFS_WORK_HELPER(extent_refs_helper);
-BTRFS_WORK_HELPER(scrub_helper);
-BTRFS_WORK_HELPER(scrubwrc_helper);
-BTRFS_WORK_HELPER(scrubnc_helper);
-BTRFS_WORK_HELPER(scrubparity_helper);
-
static struct __btrfs_workqueue *
__btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info, const char *name,
unsigned int flags, int limit_active, int thresh)
@@ -252,16 +217,16 @@ out:
}
}
-static void run_ordered_work(struct __btrfs_workqueue *wq)
+static void run_ordered_work(struct __btrfs_workqueue *wq,
+ struct btrfs_work *self)
{
struct list_head *list = &wq->ordered_list;
struct btrfs_work *work;
spinlock_t *lock = &wq->list_lock;
unsigned long flags;
+ bool free_self = false;
while (1) {
- void *wtag;
-
spin_lock_irqsave(lock, flags);
if (list_empty(list))
break;
@@ -287,22 +252,53 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
list_del(&work->ordered_list);
spin_unlock_irqrestore(lock, flags);
- /*
- * We don't want to call the ordered free functions with the
- * lock held though. Save the work as tag for the trace event,
- * because the callback could free the structure.
- */
- wtag = work;
- work->ordered_free(work);
- trace_btrfs_all_work_done(wq->fs_info, wtag);
+ if (work == self) {
+ /*
+ * This is the work item that the worker is currently
+ * executing.
+ *
+ * The kernel workqueue code guarantees non-reentrancy
+ * of work items. I.e., if a work item with the same
+ * address and work function is queued twice, the second
+ * execution is blocked until the first one finishes. A
+ * work item may be freed and recycled with the same
+ * work function; the workqueue code assumes that the
+ * original work item cannot depend on the recycled work
+ * item in that case (see find_worker_executing_work()).
+ *
+ * Note that different types of Btrfs work can depend on
+ * each other, and one type of work on one Btrfs
+ * filesystem may even depend on the same type of work
+ * on another Btrfs filesystem via, e.g., a loop device.
+ * Therefore, we must not allow the current work item to
+ * be recycled until we are really done, otherwise we
+ * break the above assumption and can deadlock.
+ */
+ free_self = true;
+ } else {
+ /*
+ * We don't want to call the ordered free functions with
+ * the lock held.
+ */
+ work->ordered_free(work);
+ /* NB: work must not be dereferenced past this point. */
+ trace_btrfs_all_work_done(wq->fs_info, work);
+ }
}
spin_unlock_irqrestore(lock, flags);
+
+ if (free_self) {
+ self->ordered_free(self);
+ /* NB: self must not be dereferenced past this point. */
+ trace_btrfs_all_work_done(wq->fs_info, self);
+ }
}
-static void normal_work_helper(struct btrfs_work *work)
+static void btrfs_work_helper(struct work_struct *normal_work)
{
+ struct btrfs_work *work = container_of(normal_work, struct btrfs_work,
+ normal_work);
struct __btrfs_workqueue *wq;
- void *wtag;
int need_order = 0;
/*
@@ -316,29 +312,26 @@ static void normal_work_helper(struct btrfs_work *work)
if (work->ordered_func)
need_order = 1;
wq = work->wq;
- /* Safe for tracepoints in case work gets freed by the callback */
- wtag = work;
trace_btrfs_work_sched(work);
thresh_exec_hook(wq);
work->func(work);
if (need_order) {
set_bit(WORK_DONE_BIT, &work->flags);
- run_ordered_work(wq);
+ run_ordered_work(wq, work);
+ } else {
+ /* NB: work must not be dereferenced past this point. */
+ trace_btrfs_all_work_done(wq->fs_info, work);
}
- if (!need_order)
- trace_btrfs_all_work_done(wq->fs_info, wtag);
}
-void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
- btrfs_func_t func,
- btrfs_func_t ordered_func,
- btrfs_func_t ordered_free)
+void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
+ btrfs_func_t ordered_func, btrfs_func_t ordered_free)
{
work->func = func;
work->ordered_func = ordered_func;
work->ordered_free = ordered_free;
- INIT_WORK(&work->normal_work, uniq_func);
+ INIT_WORK(&work->normal_work, btrfs_work_helper);
INIT_LIST_HEAD(&work->ordered_list);
work->flags = 0;
}
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index 7861c9feba5f..a4434301d84d 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -29,49 +29,20 @@ struct btrfs_work {
unsigned long flags;
};
-#define BTRFS_WORK_HELPER_PROTO(name) \
-void btrfs_##name(struct work_struct *arg)
-
-BTRFS_WORK_HELPER_PROTO(worker_helper);
-BTRFS_WORK_HELPER_PROTO(delalloc_helper);
-BTRFS_WORK_HELPER_PROTO(flush_delalloc_helper);
-BTRFS_WORK_HELPER_PROTO(cache_helper);
-BTRFS_WORK_HELPER_PROTO(submit_helper);
-BTRFS_WORK_HELPER_PROTO(fixup_helper);
-BTRFS_WORK_HELPER_PROTO(endio_helper);
-BTRFS_WORK_HELPER_PROTO(endio_meta_helper);
-BTRFS_WORK_HELPER_PROTO(endio_meta_write_helper);
-BTRFS_WORK_HELPER_PROTO(endio_raid56_helper);
-BTRFS_WORK_HELPER_PROTO(endio_repair_helper);
-BTRFS_WORK_HELPER_PROTO(rmw_helper);
-BTRFS_WORK_HELPER_PROTO(endio_write_helper);
-BTRFS_WORK_HELPER_PROTO(freespace_write_helper);
-BTRFS_WORK_HELPER_PROTO(delayed_meta_helper);
-BTRFS_WORK_HELPER_PROTO(readahead_helper);
-BTRFS_WORK_HELPER_PROTO(qgroup_rescan_helper);
-BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
-BTRFS_WORK_HELPER_PROTO(scrub_helper);
-BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
-BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
-BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
-
-
struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
const char *name,
unsigned int flags,
int limit_active,
int thresh);
-void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
- btrfs_func_t func,
- btrfs_func_t ordered_func,
- btrfs_func_t ordered_free);
+void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
+ btrfs_func_t ordered_func, btrfs_func_t ordered_free);
void btrfs_queue_work(struct btrfs_workqueue *wq,
struct btrfs_work *work);
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
void btrfs_set_work_high_priority(struct btrfs_work *work);
-struct btrfs_fs_info *btrfs_work_owner(const struct btrfs_work *work);
-struct btrfs_fs_info *btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
+struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work);
+struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);
#endif
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index bf7e3f23bba7..6934a5b8708f 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -120,12 +120,12 @@ u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
return get_alloc_profile(fs_info, orig_flags);
}
-void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
+void btrfs_get_block_group(struct btrfs_block_group *cache)
{
atomic_inc(&cache->count);
}
-void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
+void btrfs_put_block_group(struct btrfs_block_group *cache)
{
if (atomic_dec_and_test(&cache->count)) {
WARN_ON(cache->pinned > 0);
@@ -149,22 +149,21 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
* This adds the block group to the fs_info rb tree for the block group cache
*/
static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
- struct btrfs_block_group_cache *block_group)
+ struct btrfs_block_group *block_group)
{
struct rb_node **p;
struct rb_node *parent = NULL;
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
spin_lock(&info->block_group_cache_lock);
p = &info->block_group_cache_tree.rb_node;
while (*p) {
parent = *p;
- cache = rb_entry(parent, struct btrfs_block_group_cache,
- cache_node);
- if (block_group->key.objectid < cache->key.objectid) {
+ cache = rb_entry(parent, struct btrfs_block_group, cache_node);
+ if (block_group->start < cache->start) {
p = &(*p)->rb_left;
- } else if (block_group->key.objectid > cache->key.objectid) {
+ } else if (block_group->start > cache->start) {
p = &(*p)->rb_right;
} else {
spin_unlock(&info->block_group_cache_lock);
@@ -176,8 +175,8 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
rb_insert_color(&block_group->cache_node,
&info->block_group_cache_tree);
- if (info->first_logical_byte > block_group->key.objectid)
- info->first_logical_byte = block_group->key.objectid;
+ if (info->first_logical_byte > block_group->start)
+ info->first_logical_byte = block_group->start;
spin_unlock(&info->block_group_cache_lock);
@@ -188,10 +187,10 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
* This will return the block group at or after bytenr if contains is 0, else
* it will return the block group that contains the bytenr
*/
-static struct btrfs_block_group_cache *block_group_cache_tree_search(
+static struct btrfs_block_group *block_group_cache_tree_search(
struct btrfs_fs_info *info, u64 bytenr, int contains)
{
- struct btrfs_block_group_cache *cache, *ret = NULL;
+ struct btrfs_block_group *cache, *ret = NULL;
struct rb_node *n;
u64 end, start;
@@ -199,13 +198,12 @@ static struct btrfs_block_group_cache *block_group_cache_tree_search(
n = info->block_group_cache_tree.rb_node;
while (n) {
- cache = rb_entry(n, struct btrfs_block_group_cache,
- cache_node);
- end = cache->key.objectid + cache->key.offset - 1;
- start = cache->key.objectid;
+ cache = rb_entry(n, struct btrfs_block_group, cache_node);
+ end = cache->start + cache->length - 1;
+ start = cache->start;
if (bytenr < start) {
- if (!contains && (!ret || start < ret->key.objectid))
+ if (!contains && (!ret || start < ret->start))
ret = cache;
n = n->rb_left;
} else if (bytenr > start) {
@@ -221,8 +219,8 @@ static struct btrfs_block_group_cache *block_group_cache_tree_search(
}
if (ret) {
btrfs_get_block_group(ret);
- if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
- info->first_logical_byte = ret->key.objectid;
+ if (bytenr == 0 && info->first_logical_byte > ret->start)
+ info->first_logical_byte = ret->start;
}
spin_unlock(&info->block_group_cache_lock);
@@ -232,7 +230,7 @@ static struct btrfs_block_group_cache *block_group_cache_tree_search(
/*
* Return the block group that starts at or after bytenr
*/
-struct btrfs_block_group_cache *btrfs_lookup_first_block_group(
+struct btrfs_block_group *btrfs_lookup_first_block_group(
struct btrfs_fs_info *info, u64 bytenr)
{
return block_group_cache_tree_search(info, bytenr, 0);
@@ -241,14 +239,14 @@ struct btrfs_block_group_cache *btrfs_lookup_first_block_group(
/*
* Return the block group that contains the given bytenr
*/
-struct btrfs_block_group_cache *btrfs_lookup_block_group(
+struct btrfs_block_group *btrfs_lookup_block_group(
struct btrfs_fs_info *info, u64 bytenr)
{
return block_group_cache_tree_search(info, bytenr, 1);
}
-struct btrfs_block_group_cache *btrfs_next_block_group(
- struct btrfs_block_group_cache *cache)
+struct btrfs_block_group *btrfs_next_block_group(
+ struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct rb_node *node;
@@ -257,7 +255,7 @@ struct btrfs_block_group_cache *btrfs_next_block_group(
/* If our block group was removed, we need a full search. */
if (RB_EMPTY_NODE(&cache->cache_node)) {
- const u64 next_bytenr = cache->key.objectid + cache->key.offset;
+ const u64 next_bytenr = cache->start + cache->length;
spin_unlock(&fs_info->block_group_cache_lock);
btrfs_put_block_group(cache);
@@ -266,8 +264,7 @@ struct btrfs_block_group_cache *btrfs_next_block_group(
node = rb_next(&cache->cache_node);
btrfs_put_block_group(cache);
if (node) {
- cache = rb_entry(node, struct btrfs_block_group_cache,
- cache_node);
+ cache = rb_entry(node, struct btrfs_block_group, cache_node);
btrfs_get_block_group(cache);
} else
cache = NULL;
@@ -277,7 +274,7 @@ struct btrfs_block_group_cache *btrfs_next_block_group(
bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
{
- struct btrfs_block_group_cache *bg;
+ struct btrfs_block_group *bg;
bool ret = true;
bg = btrfs_lookup_block_group(fs_info, bytenr);
@@ -300,7 +297,7 @@ bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
{
- struct btrfs_block_group_cache *bg;
+ struct btrfs_block_group *bg;
bg = btrfs_lookup_block_group(fs_info, bytenr);
ASSERT(bg);
@@ -314,7 +311,7 @@ void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr)
btrfs_put_block_group(bg);
}
-void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
+void btrfs_wait_nocow_writers(struct btrfs_block_group *bg)
{
wait_var_event(&bg->nocow_writers, !atomic_read(&bg->nocow_writers));
}
@@ -322,7 +319,7 @@ void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg)
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
const u64 start)
{
- struct btrfs_block_group_cache *bg;
+ struct btrfs_block_group *bg;
bg = btrfs_lookup_block_group(fs_info, start);
ASSERT(bg);
@@ -331,7 +328,7 @@ void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
btrfs_put_block_group(bg);
}
-void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
+void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg)
{
struct btrfs_space_info *space_info = bg->space_info;
@@ -357,7 +354,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg)
}
struct btrfs_caching_control *btrfs_get_caching_control(
- struct btrfs_block_group_cache *cache)
+ struct btrfs_block_group *cache)
{
struct btrfs_caching_control *ctl;
@@ -392,7 +389,7 @@ void btrfs_put_caching_control(struct btrfs_caching_control *ctl)
* Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
* any of the information in this block group.
*/
-void btrfs_wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
+void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes)
{
struct btrfs_caching_control *caching_ctl;
@@ -401,13 +398,13 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group_cache *cache
if (!caching_ctl)
return;
- wait_event(caching_ctl->wait, btrfs_block_group_cache_done(cache) ||
+ wait_event(caching_ctl->wait, btrfs_block_group_done(cache) ||
(cache->free_space_ctl->free_space >= num_bytes));
btrfs_put_caching_control(caching_ctl);
}
-int btrfs_wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
+int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
{
struct btrfs_caching_control *caching_ctl;
int ret = 0;
@@ -416,7 +413,7 @@ int btrfs_wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
if (!caching_ctl)
return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
- wait_event(caching_ctl->wait, btrfs_block_group_cache_done(cache));
+ wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
if (cache->cached == BTRFS_CACHE_ERROR)
ret = -EIO;
btrfs_put_caching_control(caching_ctl);
@@ -424,11 +421,11 @@ int btrfs_wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
}
#ifdef CONFIG_BTRFS_DEBUG
-static void fragment_free_space(struct btrfs_block_group_cache *block_group)
+static void fragment_free_space(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
- u64 start = block_group->key.objectid;
- u64 len = block_group->key.offset;
+ u64 start = block_group->start;
+ u64 len = block_group->length;
u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
fs_info->nodesize : fs_info->sectorsize;
u64 step = chunk << 1;
@@ -450,8 +447,7 @@ static void fragment_free_space(struct btrfs_block_group_cache *block_group)
* used yet since their free space will be released as soon as the transaction
* commits.
*/
-u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
- u64 start, u64 end)
+u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end)
{
struct btrfs_fs_info *info = block_group->fs_info;
u64 extent_start, extent_end, size, total_added = 0;
@@ -491,7 +487,7 @@ u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
{
- struct btrfs_block_group_cache *block_group = caching_ctl->block_group;
+ struct btrfs_block_group *block_group = caching_ctl->block_group;
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_path *path;
@@ -507,7 +503,7 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
if (!path)
return -ENOMEM;
- last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+ last = max_t(u64, block_group->start, BTRFS_SUPER_INFO_OFFSET);
#ifdef CONFIG_BTRFS_DEBUG
/*
@@ -587,13 +583,12 @@ next:
goto next;
}
- if (key.objectid < block_group->key.objectid) {
+ if (key.objectid < block_group->start) {
path->slots[0]++;
continue;
}
- if (key.objectid >= block_group->key.objectid +
- block_group->key.offset)
+ if (key.objectid >= block_group->start + block_group->length)
break;
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
@@ -617,8 +612,7 @@ next:
ret = 0;
total_found += add_new_free_space(block_group, last,
- block_group->key.objectid +
- block_group->key.offset);
+ block_group->start + block_group->length);
caching_ctl->progress = (u64)-1;
out:
@@ -628,7 +622,7 @@ out:
static noinline void caching_thread(struct btrfs_work *work)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_fs_info *fs_info;
struct btrfs_caching_control *caching_ctl;
int ret;
@@ -656,8 +650,7 @@ static noinline void caching_thread(struct btrfs_work *work)
spin_lock(&block_group->space_info->lock);
spin_lock(&block_group->lock);
- bytes_used = block_group->key.offset -
- btrfs_block_group_used(&block_group->item);
+ bytes_used = block_group->length - block_group->used;
block_group->space_info->bytes_used += bytes_used >> 1;
spin_unlock(&block_group->lock);
spin_unlock(&block_group->space_info->lock);
@@ -677,8 +670,7 @@ static noinline void caching_thread(struct btrfs_work *work)
btrfs_put_block_group(block_group);
}
-int btrfs_cache_block_group(struct btrfs_block_group_cache *cache,
- int load_cache_only)
+int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only)
{
DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info;
@@ -693,10 +685,9 @@ int btrfs_cache_block_group(struct btrfs_block_group_cache *cache,
mutex_init(&caching_ctl->mutex);
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
- caching_ctl->progress = cache->key.objectid;
+ caching_ctl->progress = cache->start;
refcount_set(&caching_ctl->count, 1);
- btrfs_init_work(&caching_ctl->work, btrfs_cache_helper,
- caching_thread, NULL, NULL);
+ btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
spin_lock(&cache->lock);
/*
@@ -763,8 +754,7 @@ int btrfs_cache_block_group(struct btrfs_block_group_cache *cache,
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
- bytes_used = cache->key.offset -
- btrfs_block_group_used(&cache->item);
+ bytes_used = cache->length - cache->used;
cache->space_info->bytes_used += bytes_used >> 1;
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
@@ -833,27 +823,36 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
*
* - RAID56 - in case there's neither RAID5 nor RAID6 profile block group
* in the whole filesystem
+ *
+ * - RAID1C34 - same as above for RAID1C3 and RAID1C4 block groups
*/
static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
{
- if (flags & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ bool found_raid56 = false;
+ bool found_raid1c34 = false;
+
+ if ((flags & BTRFS_BLOCK_GROUP_RAID56_MASK) ||
+ (flags & BTRFS_BLOCK_GROUP_RAID1C3) ||
+ (flags & BTRFS_BLOCK_GROUP_RAID1C4)) {
struct list_head *head = &fs_info->space_info;
struct btrfs_space_info *sinfo;
list_for_each_entry_rcu(sinfo, head, list) {
- bool found = false;
-
down_read(&sinfo->groups_sem);
if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID5]))
- found = true;
+ found_raid56 = true;
if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID6]))
- found = true;
+ found_raid56 = true;
+ if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C3]))
+ found_raid1c34 = true;
+ if (!list_empty(&sinfo->block_groups[BTRFS_RAID_RAID1C4]))
+ found_raid1c34 = true;
up_read(&sinfo->groups_sem);
-
- if (found)
- return;
}
- btrfs_clear_fs_incompat(fs_info, RAID56);
+ if (found_raid56)
+ btrfs_clear_fs_incompat(fs_info, RAID56);
+ if (found_raid1c34)
+ btrfs_clear_fs_incompat(fs_info, RAID1C34);
}
}
@@ -863,7 +862,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root = fs_info->extent_root;
struct btrfs_path *path;
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_free_cluster *cluster;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_key key;
@@ -886,10 +885,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* remove it.
*/
btrfs_free_excluded_extents(block_group);
- btrfs_free_ref_tree_range(fs_info, block_group->key.objectid,
- block_group->key.offset);
+ btrfs_free_ref_tree_range(fs_info, block_group->start,
+ block_group->length);
- memcpy(&key, &block_group->key, sizeof(key));
index = btrfs_bg_flags_to_raid_index(block_group->flags);
factor = btrfs_bg_type_to_factor(block_group->flags);
@@ -967,8 +965,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
}
key.objectid = BTRFS_FREE_SPACE_OBJECTID;
- key.offset = block_group->key.objectid;
key.type = 0;
+ key.offset = block_group->start;
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0)
@@ -987,7 +985,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
&fs_info->block_group_cache_tree);
RB_CLEAR_NODE(&block_group->cache_node);
- if (fs_info->first_logical_byte == block_group->key.objectid)
+ if (fs_info->first_logical_byte == block_group->start)
fs_info->first_logical_byte = (u64)-1;
spin_unlock(&fs_info->block_group_cache_lock);
@@ -1048,19 +1046,21 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
if (btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
WARN_ON(block_group->space_info->total_bytes
- < block_group->key.offset);
+ < block_group->length);
WARN_ON(block_group->space_info->bytes_readonly
- < block_group->key.offset);
+ < block_group->length);
WARN_ON(block_group->space_info->disk_total
- < block_group->key.offset * factor);
+ < block_group->length * factor);
}
- block_group->space_info->total_bytes -= block_group->key.offset;
- block_group->space_info->bytes_readonly -= block_group->key.offset;
- block_group->space_info->disk_total -= block_group->key.offset * factor;
+ block_group->space_info->total_bytes -= block_group->length;
+ block_group->space_info->bytes_readonly -= block_group->length;
+ block_group->space_info->disk_total -= block_group->length * factor;
spin_unlock(&block_group->space_info->lock);
- memcpy(&key, &block_group->key, sizeof(key));
+ key.objectid = block_group->start;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = block_group->length;
mutex_lock(&fs_info->chunk_mutex);
spin_lock(&block_group->lock);
@@ -1180,7 +1180,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
* data in this block group. That check should be done by relocation routine,
* not this function.
*/
-static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
+static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
@@ -1209,8 +1209,8 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force)
goto out;
}
- num_bytes = cache->key.offset - cache->reserved - cache->pinned -
- cache->bytes_super - btrfs_block_group_used(&cache->item);
+ num_bytes = cache->length - cache->reserved - cache->pinned -
+ cache->bytes_super - cache->used;
sinfo_used = btrfs_space_info_used(sinfo, true);
/*
@@ -1231,8 +1231,7 @@ out:
spin_unlock(&sinfo->lock);
if (ret == -ENOSPC && btrfs_test_opt(cache->fs_info, ENOSPC_DEBUG)) {
btrfs_info(cache->fs_info,
- "unable to make block group %llu ro",
- cache->key.objectid);
+ "unable to make block group %llu ro", cache->start);
btrfs_info(cache->fs_info,
"sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
sinfo_used, num_bytes, min_allocable_bytes);
@@ -1247,7 +1246,7 @@ out:
*/
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_space_info *space_info;
struct btrfs_trans_handle *trans;
int ret = 0;
@@ -1261,7 +1260,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
int trimming;
block_group = list_first_entry(&fs_info->unused_bgs,
- struct btrfs_block_group_cache,
+ struct btrfs_block_group,
bg_list);
list_del_init(&block_group->bg_list);
@@ -1279,8 +1278,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
down_write(&space_info->groups_sem);
spin_lock(&block_group->lock);
if (block_group->reserved || block_group->pinned ||
- btrfs_block_group_used(&block_group->item) ||
- block_group->ro ||
+ block_group->used || block_group->ro ||
list_is_singular(&block_group->list)) {
/*
* We want to bail if we made new allocations or have
@@ -1308,7 +1306,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* properly if we fail to join the transaction.
*/
trans = btrfs_start_trans_remove_block_group(fs_info,
- block_group->key.objectid);
+ block_group->start);
if (IS_ERR(trans)) {
btrfs_dec_block_group_ro(block_group);
ret = PTR_ERR(trans);
@@ -1319,8 +1317,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* We could have pending pinned extents for this block group,
* just delete them, we don't care about them anymore.
*/
- start = block_group->key.objectid;
- end = start + block_group->key.offset - 1;
+ start = block_group->start;
+ end = start + block_group->length - 1;
/*
* Hold the unused_bg_unpin_mutex lock to avoid racing with
* btrfs_finish_extent_commit(). If we are at transaction N,
@@ -1375,7 +1373,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
* Btrfs_remove_chunk will abort the transaction if things go
* horribly wrong.
*/
- ret = btrfs_remove_chunk(trans, block_group->key.objectid);
+ ret = btrfs_remove_chunk(trans, block_group->start);
if (ret) {
if (trimming)
@@ -1410,7 +1408,7 @@ next:
spin_unlock(&fs_info->unused_bgs_lock);
}
-void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg)
+void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
@@ -1478,7 +1476,7 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info,
read_extent_buffer(leaf, &bg,
btrfs_item_ptr_offset(leaf, slot),
sizeof(bg));
- flags = btrfs_block_group_flags(&bg) &
+ flags = btrfs_stack_block_group_flags(&bg) &
BTRFS_BLOCK_GROUP_TYPE_MASK;
if (flags != (em->map_lookup->type &
@@ -1518,7 +1516,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
write_sequnlock(&fs_info->profiles_lock);
}
-static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
+static int exclude_super_stripes(struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
u64 bytenr;
@@ -1526,10 +1524,10 @@ static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
int stripe_len;
int i, nr, ret;
- if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
- stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
+ if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
+ stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
cache->bytes_super += stripe_len;
- ret = btrfs_add_excluded_extent(fs_info, cache->key.objectid,
+ ret = btrfs_add_excluded_extent(fs_info, cache->start,
stripe_len);
if (ret)
return ret;
@@ -1537,7 +1535,7 @@ static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
- ret = btrfs_rmap_block(fs_info, cache->key.objectid,
+ ret = btrfs_rmap_block(fs_info, cache->start,
bytenr, &logical, &nr, &stripe_len);
if (ret)
return ret;
@@ -1545,21 +1543,19 @@ static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
while (nr--) {
u64 start, len;
- if (logical[nr] > cache->key.objectid +
- cache->key.offset)
+ if (logical[nr] > cache->start + cache->length)
continue;
- if (logical[nr] + stripe_len <= cache->key.objectid)
+ if (logical[nr] + stripe_len <= cache->start)
continue;
start = logical[nr];
- if (start < cache->key.objectid) {
- start = cache->key.objectid;
+ if (start < cache->start) {
+ start = cache->start;
len = (logical[nr] + stripe_len) - start;
} else {
len = min_t(u64, stripe_len,
- cache->key.objectid +
- cache->key.offset - start);
+ cache->start + cache->length - start);
}
cache->bytes_super += len;
@@ -1575,7 +1571,7 @@ static int exclude_super_stripes(struct btrfs_block_group_cache *cache)
return 0;
}
-static void link_block_group(struct btrfs_block_group_cache *cache)
+static void link_block_group(struct btrfs_block_group *cache)
{
struct btrfs_space_info *space_info = cache->space_info;
int index = btrfs_bg_flags_to_raid_index(cache->flags);
@@ -1591,10 +1587,10 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
btrfs_sysfs_add_block_group_type(cache);
}
-static struct btrfs_block_group_cache *btrfs_create_block_group_cache(
+static struct btrfs_block_group *btrfs_create_block_group_cache(
struct btrfs_fs_info *fs_info, u64 start, u64 size)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
cache = kzalloc(sizeof(*cache), GFP_NOFS);
if (!cache)
@@ -1607,9 +1603,8 @@ static struct btrfs_block_group_cache *btrfs_create_block_group_cache(
return NULL;
}
- cache->key.objectid = start;
- cache->key.offset = size;
- cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ cache->start = start;
+ cache->length = size;
cache->fs_info = fs_info;
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
@@ -1640,7 +1635,7 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
{
struct extent_map_tree *map_tree = &fs_info->mapping_tree;
struct extent_map *em;
- struct btrfs_block_group_cache *bg;
+ struct btrfs_block_group *bg;
u64 start = 0;
int ret = 0;
@@ -1665,15 +1660,14 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
free_extent_map(em);
break;
}
- if (bg->key.objectid != em->start ||
- bg->key.offset != em->len ||
+ if (bg->start != em->start || bg->length != em->len ||
(bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
(em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
btrfs_err(fs_info,
"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
em->start, em->len,
em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
- bg->key.objectid, bg->key.offset,
+ bg->start, bg->length,
bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
ret = -EUCLEAN;
free_extent_map(em);
@@ -1687,22 +1681,117 @@ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
return ret;
}
+static int read_one_block_group(struct btrfs_fs_info *info,
+ struct btrfs_path *path,
+ const struct btrfs_key *key,
+ int need_clear)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_block_group *cache;
+ struct btrfs_space_info *space_info;
+ struct btrfs_block_group_item bgi;
+ const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
+ int slot = path->slots[0];
+ int ret;
+
+ ASSERT(key->type == BTRFS_BLOCK_GROUP_ITEM_KEY);
+
+ cache = btrfs_create_block_group_cache(info, key->objectid, key->offset);
+ if (!cache)
+ return -ENOMEM;
+
+ if (need_clear) {
+ /*
+ * When we mount with old space cache, we need to
+ * set BTRFS_DC_CLEAR and set dirty flag.
+ *
+ * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
+ * truncate the old free space cache inode and
+ * setup a new one.
+ * b) Setting 'dirty flag' makes sure that we flush
+ * the new space cache info onto disk.
+ */
+ if (btrfs_test_opt(info, SPACE_CACHE))
+ cache->disk_cache_state = BTRFS_DC_CLEAR;
+ }
+ read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
+ sizeof(bgi));
+ cache->used = btrfs_stack_block_group_used(&bgi);
+ cache->flags = btrfs_stack_block_group_flags(&bgi);
+ if (!mixed && ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
+ btrfs_err(info,
+"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
+ cache->start);
+ ret = -EINVAL;
+ goto error;
+ }
+
+ /*
+ * We need to exclude the super stripes now so that the space info has
+ * super bytes accounted for, otherwise we'll think we have more space
+ * than we actually do.
+ */
+ ret = exclude_super_stripes(cache);
+ if (ret) {
+ /* We may have excluded something, so call this just in case. */
+ btrfs_free_excluded_extents(cache);
+ goto error;
+ }
+
+ /*
+ * Check for two cases, either we are full, and therefore don't need
+ * to bother with the caching work since we won't find any space, or we
+ * are empty, and we can just add all the space in and be done with it.
+ * This saves us _a_lot_ of time, particularly in the full case.
+ */
+ if (key->offset == cache->used) {
+ cache->last_byte_to_unpin = (u64)-1;
+ cache->cached = BTRFS_CACHE_FINISHED;
+ btrfs_free_excluded_extents(cache);
+ } else if (cache->used == 0) {
+ cache->last_byte_to_unpin = (u64)-1;
+ cache->cached = BTRFS_CACHE_FINISHED;
+ add_new_free_space(cache, key->objectid,
+ key->objectid + key->offset);
+ btrfs_free_excluded_extents(cache);
+ }
+
+ ret = btrfs_add_block_group_cache(info, cache);
+ if (ret) {
+ btrfs_remove_free_space_cache(cache);
+ goto error;
+ }
+ trace_btrfs_add_block_group(info, cache, 0);
+ btrfs_update_space_info(info, cache->flags, key->offset,
+ cache->used, cache->bytes_super, &space_info);
+
+ cache->space_info = space_info;
+
+ link_block_group(cache);
+
+ set_avail_alloc_bits(info, cache->flags);
+ if (btrfs_chunk_readonly(info, cache->start)) {
+ inc_block_group_ro(cache, 1);
+ } else if (cache->used == 0) {
+ ASSERT(list_empty(&cache->bg_list));
+ btrfs_mark_bg_unused(cache);
+ }
+ return 0;
+error:
+ btrfs_put_block_group(cache);
+ return ret;
+}
+
int btrfs_read_block_groups(struct btrfs_fs_info *info)
{
struct btrfs_path *path;
int ret;
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
struct btrfs_space_info *space_info;
struct btrfs_key key;
- struct btrfs_key found_key;
- struct extent_buffer *leaf;
int need_clear = 0;
u64 cache_gen;
- u64 feature;
- int mixed;
-
- feature = btrfs_super_incompat_flags(info->super_copy);
- mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
key.objectid = 0;
key.offset = 0;
@@ -1726,107 +1815,13 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
if (ret != 0)
goto error;
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- cache = btrfs_create_block_group_cache(info, found_key.objectid,
- found_key.offset);
- if (!cache) {
- ret = -ENOMEM;
- goto error;
- }
-
- if (need_clear) {
- /*
- * When we mount with old space cache, we need to
- * set BTRFS_DC_CLEAR and set dirty flag.
- *
- * a) Setting 'BTRFS_DC_CLEAR' makes sure that we
- * truncate the old free space cache inode and
- * setup a new one.
- * b) Setting 'dirty flag' makes sure that we flush
- * the new space cache info onto disk.
- */
- if (btrfs_test_opt(info, SPACE_CACHE))
- cache->disk_cache_state = BTRFS_DC_CLEAR;
- }
-
- read_extent_buffer(leaf, &cache->item,
- btrfs_item_ptr_offset(leaf, path->slots[0]),
- sizeof(cache->item));
- cache->flags = btrfs_block_group_flags(&cache->item);
- if (!mixed &&
- ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
- (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
- btrfs_err(info,
-"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
- cache->key.objectid);
- ret = -EINVAL;
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ ret = read_one_block_group(info, path, &key, need_clear);
+ if (ret < 0)
goto error;
- }
-
- key.objectid = found_key.objectid + found_key.offset;
+ key.objectid += key.offset;
+ key.offset = 0;
btrfs_release_path(path);
-
- /*
- * We need to exclude the super stripes now so that the space
- * info has super bytes accounted for, otherwise we'll think
- * we have more space than we actually do.
- */
- ret = exclude_super_stripes(cache);
- if (ret) {
- /*
- * We may have excluded something, so call this just in
- * case.
- */
- btrfs_free_excluded_extents(cache);
- btrfs_put_block_group(cache);
- goto error;
- }
-
- /*
- * Check for two cases, either we are full, and therefore
- * don't need to bother with the caching work since we won't
- * find any space, or we are empty, and we can just add all
- * the space in and be done with it. This saves us _a_lot_ of
- * time, particularly in the full case.
- */
- if (found_key.offset == btrfs_block_group_used(&cache->item)) {
- cache->last_byte_to_unpin = (u64)-1;
- cache->cached = BTRFS_CACHE_FINISHED;
- btrfs_free_excluded_extents(cache);
- } else if (btrfs_block_group_used(&cache->item) == 0) {
- cache->last_byte_to_unpin = (u64)-1;
- cache->cached = BTRFS_CACHE_FINISHED;
- add_new_free_space(cache, found_key.objectid,
- found_key.objectid +
- found_key.offset);
- btrfs_free_excluded_extents(cache);
- }
-
- ret = btrfs_add_block_group_cache(info, cache);
- if (ret) {
- btrfs_remove_free_space_cache(cache);
- btrfs_put_block_group(cache);
- goto error;
- }
-
- trace_btrfs_add_block_group(info, cache, 0);
- btrfs_update_space_info(info, cache->flags, found_key.offset,
- btrfs_block_group_used(&cache->item),
- cache->bytes_super, &space_info);
-
- cache->space_info = space_info;
-
- link_block_group(cache);
-
- set_avail_alloc_bits(info, cache->flags);
- if (btrfs_chunk_readonly(info, cache->key.objectid)) {
- inc_block_group_ro(cache, 1);
- } else if (btrfs_block_group_used(&cache->item) == 0) {
- ASSERT(list_empty(&cache->bg_list));
- btrfs_mark_bg_unused(cache);
- }
}
list_for_each_entry_rcu(space_info, &info->space_info, list) {
@@ -1860,7 +1855,7 @@ error:
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_block_group_item item;
struct btrfs_key key;
@@ -1871,14 +1866,19 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
while (!list_empty(&trans->new_bgs)) {
block_group = list_first_entry(&trans->new_bgs,
- struct btrfs_block_group_cache,
+ struct btrfs_block_group,
bg_list);
if (ret)
goto next;
spin_lock(&block_group->lock);
- memcpy(&item, &block_group->item, sizeof(item));
- memcpy(&key, &block_group->key, sizeof(key));
+ btrfs_set_stack_block_group_used(&item, block_group->used);
+ btrfs_set_stack_block_group_chunk_objectid(&item,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_stack_block_group_flags(&item, block_group->flags);
+ key.objectid = block_group->start;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = block_group->length;
spin_unlock(&block_group->lock);
ret = btrfs_insert_item(trans, extent_root, &key, &item,
@@ -1901,7 +1901,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
u64 type, u64 chunk_offset, u64 size)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
int ret;
btrfs_set_log_full_commit(trans);
@@ -1910,11 +1910,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
if (!cache)
return -ENOMEM;
- btrfs_set_block_group_used(&cache->item, bytes_used);
- btrfs_set_block_group_chunk_objectid(&cache->item,
- BTRFS_FIRST_CHUNK_TREE_OBJECTID);
- btrfs_set_block_group_flags(&cache->item, type);
-
+ cache->used = bytes_used;
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
@@ -2021,8 +2017,17 @@ static u64 update_block_group_flags(struct btrfs_fs_info *fs_info, u64 flags)
return flags;
}
-int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache)
-
+/*
+ * Mark one block group RO, can be called several times for the same block
+ * group.
+ *
+ * @cache: the destination block group
+ * @do_chunk_alloc: whether need to do chunk pre-allocation, this is to
+ * ensure we still have some free space after marking this
+ * block group RO.
+ */
+int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
+ bool do_chunk_alloc)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_trans_handle *trans;
@@ -2052,25 +2057,29 @@ again:
goto again;
}
- /*
- * if we are changing raid levels, try to allocate a corresponding
- * block group with the new raid level.
- */
- alloc_flags = update_block_group_flags(fs_info, cache->flags);
- if (alloc_flags != cache->flags) {
- ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+ if (do_chunk_alloc) {
/*
- * ENOSPC is allowed here, we may have enough space
- * already allocated at the new raid level to
- * carry on
+ * If we are changing raid levels, try to allocate a
+ * corresponding block group with the new raid level.
*/
- if (ret == -ENOSPC)
- ret = 0;
- if (ret < 0)
- goto out;
+ alloc_flags = update_block_group_flags(fs_info, cache->flags);
+ if (alloc_flags != cache->flags) {
+ ret = btrfs_chunk_alloc(trans, alloc_flags,
+ CHUNK_ALLOC_FORCE);
+ /*
+ * ENOSPC is allowed here, we may have enough space
+ * already allocated at the new raid level to carry on
+ */
+ if (ret == -ENOSPC)
+ ret = 0;
+ if (ret < 0)
+ goto out;
+ }
}
- ret = inc_block_group_ro(cache, 0);
+ ret = inc_block_group_ro(cache, !do_chunk_alloc);
+ if (!do_chunk_alloc)
+ goto unlock_out;
if (!ret)
goto out;
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->space_info->flags);
@@ -2085,13 +2094,14 @@ out:
check_system_chunk(trans, alloc_flags);
mutex_unlock(&fs_info->chunk_mutex);
}
+unlock_out:
mutex_unlock(&fs_info->ro_block_group_mutex);
btrfs_end_transaction(trans);
return ret;
}
-void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
+void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
{
struct btrfs_space_info *sinfo = cache->space_info;
u64 num_bytes;
@@ -2101,9 +2111,8 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
if (!--cache->ro) {
- num_bytes = cache->key.offset - cache->reserved -
- cache->pinned - cache->bytes_super -
- btrfs_block_group_used(&cache->item);
+ num_bytes = cache->length - cache->reserved -
+ cache->pinned - cache->bytes_super - cache->used;
sinfo->bytes_readonly -= num_bytes;
list_del_init(&cache->ro_list);
}
@@ -2113,15 +2122,21 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache)
static int write_one_cache_group(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
- struct btrfs_block_group_cache *cache)
+ struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
struct btrfs_root *extent_root = fs_info->extent_root;
unsigned long bi;
struct extent_buffer *leaf;
+ struct btrfs_block_group_item bgi;
+ struct btrfs_key key;
+
+ key.objectid = cache->start;
+ key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ key.offset = cache->length;
- ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
+ ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1);
if (ret) {
if (ret > 0)
ret = -ENOENT;
@@ -2130,7 +2145,11 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
- write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
+ btrfs_set_stack_block_group_used(&bgi, cache->used);
+ btrfs_set_stack_block_group_chunk_objectid(&bgi,
+ BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+ btrfs_set_stack_block_group_flags(&bgi, cache->flags);
+ write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
btrfs_mark_buffer_dirty(leaf);
fail:
btrfs_release_path(path);
@@ -2138,7 +2157,7 @@ fail:
}
-static int cache_save_setup(struct btrfs_block_group_cache *block_group,
+static int cache_save_setup(struct btrfs_block_group *block_group,
struct btrfs_trans_handle *trans,
struct btrfs_path *path)
{
@@ -2156,7 +2175,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
* If this block group is smaller than 100 megs don't bother caching the
* block group.
*/
- if (block_group->key.offset < (100 * SZ_1M)) {
+ if (block_group->length < (100 * SZ_1M)) {
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_WRITTEN;
spin_unlock(&block_group->lock);
@@ -2257,7 +2276,7 @@ again:
* taking up quite a bit since it's not folded into the other space
* cache.
*/
- num_pages = div_u64(block_group->key.offset, SZ_256M);
+ num_pages = div_u64(block_group->length, SZ_256M);
if (!num_pages)
num_pages = 1;
@@ -2302,7 +2321,7 @@ out:
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_group_cache *cache, *tmp;
+ struct btrfs_block_group *cache, *tmp;
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_path *path;
@@ -2340,7 +2359,7 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans)
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
struct btrfs_transaction *cur_trans = trans->transaction;
int ret = 0;
int should_put;
@@ -2377,8 +2396,7 @@ again:
while (!list_empty(&dirty)) {
bool drop_reserve = true;
- cache = list_first_entry(&dirty,
- struct btrfs_block_group_cache,
+ cache = list_first_entry(&dirty, struct btrfs_block_group,
dirty_list);
/*
* This can happen if something re-dirties a block group that
@@ -2503,7 +2521,7 @@ again:
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
struct btrfs_transaction *cur_trans = trans->transaction;
int ret = 0;
int should_put;
@@ -2533,7 +2551,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
spin_lock(&cur_trans->dirty_bgs_lock);
while (!list_empty(&cur_trans->dirty_bgs)) {
cache = list_first_entry(&cur_trans->dirty_bgs,
- struct btrfs_block_group_cache,
+ struct btrfs_block_group,
dirty_list);
/*
@@ -2615,7 +2633,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans)
* to use it without any locking
*/
while (!list_empty(io)) {
- cache = list_first_entry(io, struct btrfs_block_group_cache,
+ cache = list_first_entry(io, struct btrfs_block_group,
io_list);
list_del_init(&cache->io_list);
btrfs_wait_cache_io(trans, cache, path);
@@ -2630,7 +2648,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc)
{
struct btrfs_fs_info *info = trans->fs_info;
- struct btrfs_block_group_cache *cache = NULL;
+ struct btrfs_block_group *cache = NULL;
u64 total = num_bytes;
u64 old_val;
u64 byte_in_group;
@@ -2661,11 +2679,11 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* is because we need the unpinning stage to actually add the
* space back to the block group, otherwise we will leak space.
*/
- if (!alloc && cache->cached == BTRFS_CACHE_NO)
+ if (!alloc && !btrfs_block_group_done(cache))
btrfs_cache_block_group(cache, 1);
- byte_in_group = bytenr - cache->key.objectid;
- WARN_ON(byte_in_group > cache->key.offset);
+ byte_in_group = bytenr - cache->start;
+ WARN_ON(byte_in_group > cache->length);
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
@@ -2674,11 +2692,11 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
cache->disk_cache_state < BTRFS_DC_CLEAR)
cache->disk_cache_state = BTRFS_DC_CLEAR;
- old_val = btrfs_block_group_used(&cache->item);
- num_bytes = min(total, cache->key.offset - byte_in_group);
+ old_val = cache->used;
+ num_bytes = min(total, cache->length - byte_in_group);
if (alloc) {
old_val += num_bytes;
- btrfs_set_block_group_used(&cache->item, old_val);
+ cache->used = old_val;
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
cache->space_info->bytes_used += num_bytes;
@@ -2687,7 +2705,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->space_info->lock);
} else {
old_val -= num_bytes;
- btrfs_set_block_group_used(&cache->item, old_val);
+ cache->used = old_val;
cache->pinned += num_bytes;
btrfs_space_info_update_bytes_pinned(info,
cache->space_info, num_bytes);
@@ -2745,7 +2763,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* reservation and the block group has become read only we cannot make the
* reservation and return -EAGAIN, otherwise this function always succeeds.
*/
-int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
u64 ram_bytes, u64 num_bytes, int delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
@@ -2781,7 +2799,7 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
* A and before transaction A commits you free that leaf, you call this with
* reserve set to 0 in order to clear the reservation.
*/
-void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
u64 num_bytes, int delalloc)
{
struct btrfs_space_info *space_info = cache->space_info;
@@ -2987,9 +3005,7 @@ static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
}
/*
- * If @is_allocation is true, reserve space in the system space info necessary
- * for allocating a chunk, otherwise if it's false, reserve space necessary for
- * removing a chunk.
+ * Reserve space in the system space for allocating or removing a chunk
*/
void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
{
@@ -3046,7 +3062,7 @@ void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
u64 last = 0;
while (1) {
@@ -3074,7 +3090,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
spin_unlock(&block_group->lock);
ASSERT(block_group->io_ctl.inode == NULL);
iput(inode);
- last = block_group->key.objectid + block_group->key.offset;
+ last = block_group->start + block_group->length;
btrfs_put_block_group(block_group);
}
}
@@ -3086,7 +3102,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
*/
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_space_info *space_info;
struct btrfs_caching_control *caching_ctl;
struct rb_node *n;
@@ -3103,7 +3119,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
spin_lock(&info->unused_bgs_lock);
while (!list_empty(&info->unused_bgs)) {
block_group = list_first_entry(&info->unused_bgs,
- struct btrfs_block_group_cache,
+ struct btrfs_block_group,
bg_list);
list_del_init(&block_group->bg_list);
btrfs_put_block_group(block_group);
@@ -3112,7 +3128,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
spin_lock(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
- block_group = rb_entry(n, struct btrfs_block_group_cache,
+ block_group = rb_entry(n, struct btrfs_block_group,
cache_node);
rb_erase(&block_group->cache_node,
&info->block_group_cache_tree);
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index c391800388dd..9b409676c4b2 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -34,7 +34,7 @@ struct btrfs_caching_control {
struct mutex mutex;
wait_queue_head_t wait;
struct btrfs_work work;
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
u64 progress;
refcount_t count;
};
@@ -42,14 +42,15 @@ struct btrfs_caching_control {
/* Once caching_thread() finds this much free space, it will wake up waiters. */
#define CACHING_CTL_WAKE_UP SZ_2M
-struct btrfs_block_group_cache {
- struct btrfs_key key;
- struct btrfs_block_group_item item;
+struct btrfs_block_group {
struct btrfs_fs_info *fs_info;
struct inode *inode;
spinlock_t lock;
+ u64 start;
+ u64 length;
u64 pinned;
u64 reserved;
+ u64 used;
u64 delalloc_bytes;
u64 bytes_super;
u64 flags;
@@ -159,7 +160,7 @@ struct btrfs_block_group_cache {
#ifdef CONFIG_BTRFS_DEBUG
static inline int btrfs_should_fragment_free_space(
- struct btrfs_block_group_cache *block_group)
+ struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
@@ -170,29 +171,29 @@ static inline int btrfs_should_fragment_free_space(
}
#endif
-struct btrfs_block_group_cache *btrfs_lookup_first_block_group(
+struct btrfs_block_group *btrfs_lookup_first_block_group(
struct btrfs_fs_info *info, u64 bytenr);
-struct btrfs_block_group_cache *btrfs_lookup_block_group(
+struct btrfs_block_group *btrfs_lookup_block_group(
struct btrfs_fs_info *info, u64 bytenr);
-struct btrfs_block_group_cache *btrfs_next_block_group(
- struct btrfs_block_group_cache *cache);
-void btrfs_get_block_group(struct btrfs_block_group_cache *cache);
-void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
+struct btrfs_block_group *btrfs_next_block_group(
+ struct btrfs_block_group *cache);
+void btrfs_get_block_group(struct btrfs_block_group *cache);
+void btrfs_put_block_group(struct btrfs_block_group *cache);
void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
const u64 start);
-void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg);
+void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg);
bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr);
-void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg);
-void btrfs_wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
+void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
+void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes);
-int btrfs_wait_block_group_cache_done(struct btrfs_block_group_cache *cache);
-int btrfs_cache_block_group(struct btrfs_block_group_cache *cache,
+int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache);
+int btrfs_cache_block_group(struct btrfs_block_group *cache,
int load_cache_only);
void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control(
- struct btrfs_block_group_cache *cache);
-u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *cache);
+u64 add_new_free_space(struct btrfs_block_group *block_group,
u64 start, u64 end);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
struct btrfs_fs_info *fs_info,
@@ -200,21 +201,22 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
u64 group_start, struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
-void btrfs_mark_bg_unused(struct btrfs_block_group_cache *bg);
+void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
int btrfs_read_block_groups(struct btrfs_fs_info *info);
int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
u64 type, u64 chunk_offset, u64 size);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
-int btrfs_inc_block_group_ro(struct btrfs_block_group_cache *cache);
-void btrfs_dec_block_group_ro(struct btrfs_block_group_cache *cache);
+int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
+ bool do_chunk_alloc);
+void btrfs_dec_block_group_ro(struct btrfs_block_group *cache);
int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
int btrfs_update_block_group(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, int alloc);
-int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
u64 ram_bytes, u64 num_bytes, int delalloc);
-void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
u64 num_bytes, int delalloc);
int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
enum btrfs_chunk_alloc_enum force);
@@ -239,8 +241,7 @@ static inline u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
}
-static inline int btrfs_block_group_cache_done(
- struct btrfs_block_group_cache *cache)
+static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
{
smp_mb();
return cache->cached == BTRFS_CACHE_FINISHED ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f853835c409c..4e12a477d32e 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -63,9 +63,6 @@ struct btrfs_inode {
/* held while logging the inode in tree-log.c */
struct mutex log_mutex;
- /* held while doing delalloc reservations */
- struct mutex delalloc_mutex;
-
/* used to order data wrt metadata */
struct btrfs_ordered_inode_tree ordered_tree;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b05b361e2062..ee834ef7beb4 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -29,6 +29,41 @@
#include "extent_io.h"
#include "extent_map.h"
+int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
+ u64 start, struct page **pages, unsigned long *out_pages,
+ unsigned long *total_in, unsigned long *total_out);
+int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
+int zlib_decompress(struct list_head *ws, unsigned char *data_in,
+ struct page *dest_page, unsigned long start_byte, size_t srclen,
+ size_t destlen);
+struct list_head *zlib_alloc_workspace(unsigned int level);
+void zlib_free_workspace(struct list_head *ws);
+struct list_head *zlib_get_workspace(unsigned int level);
+
+int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
+ u64 start, struct page **pages, unsigned long *out_pages,
+ unsigned long *total_in, unsigned long *total_out);
+int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
+int lzo_decompress(struct list_head *ws, unsigned char *data_in,
+ struct page *dest_page, unsigned long start_byte, size_t srclen,
+ size_t destlen);
+struct list_head *lzo_alloc_workspace(unsigned int level);
+void lzo_free_workspace(struct list_head *ws);
+
+int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
+ u64 start, struct page **pages, unsigned long *out_pages,
+ unsigned long *total_in, unsigned long *total_out);
+int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
+int zstd_decompress(struct list_head *ws, unsigned char *data_in,
+ struct page *dest_page, unsigned long start_byte, size_t srclen,
+ size_t destlen);
+void zstd_init_workspace_manager(void);
+void zstd_cleanup_workspace_manager(void);
+struct list_head *zstd_alloc_workspace(unsigned int level);
+void zstd_free_workspace(struct list_head *ws);
+struct list_head *zstd_get_workspace(unsigned int level);
+void zstd_put_workspace(struct list_head *ws);
+
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
const char* btrfs_compress_type2str(enum btrfs_compression_type type)
@@ -39,6 +74,8 @@ const char* btrfs_compress_type2str(enum btrfs_compression_type type)
case BTRFS_COMPRESS_ZSTD:
case BTRFS_COMPRESS_NONE:
return btrfs_compress_types[type];
+ default:
+ break;
}
return NULL;
@@ -60,6 +97,70 @@ bool btrfs_compress_is_valid_type(const char *str, size_t len)
return false;
}
+static int compression_compress_pages(int type, struct list_head *ws,
+ struct address_space *mapping, u64 start, struct page **pages,
+ unsigned long *out_pages, unsigned long *total_in,
+ unsigned long *total_out)
+{
+ switch (type) {
+ case BTRFS_COMPRESS_ZLIB:
+ return zlib_compress_pages(ws, mapping, start, pages,
+ out_pages, total_in, total_out);
+ case BTRFS_COMPRESS_LZO:
+ return lzo_compress_pages(ws, mapping, start, pages,
+ out_pages, total_in, total_out);
+ case BTRFS_COMPRESS_ZSTD:
+ return zstd_compress_pages(ws, mapping, start, pages,
+ out_pages, total_in, total_out);
+ case BTRFS_COMPRESS_NONE:
+ default:
+ /*
+ * This can't happen, the type is validated several times
+ * before we get here. As a sane fallback, return what the
+ * callers will understand as 'no compression happened'.
+ */
+ return -E2BIG;
+ }
+}
+
+static int compression_decompress_bio(int type, struct list_head *ws,
+ struct compressed_bio *cb)
+{
+ switch (type) {
+ case BTRFS_COMPRESS_ZLIB: return zlib_decompress_bio(ws, cb);
+ case BTRFS_COMPRESS_LZO: return lzo_decompress_bio(ws, cb);
+ case BTRFS_COMPRESS_ZSTD: return zstd_decompress_bio(ws, cb);
+ case BTRFS_COMPRESS_NONE:
+ default:
+ /*
+ * This can't happen, the type is validated several times
+ * before we get here.
+ */
+ BUG();
+ }
+}
+
+static int compression_decompress(int type, struct list_head *ws,
+ unsigned char *data_in, struct page *dest_page,
+ unsigned long start_byte, size_t srclen, size_t destlen)
+{
+ switch (type) {
+ case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page,
+ start_byte, srclen, destlen);
+ case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_page,
+ start_byte, srclen, destlen);
+ case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page,
+ start_byte, srclen, destlen);
+ case BTRFS_COMPRESS_NONE:
+ default:
+ /*
+ * This can't happen, the type is validated several times
+ * before we get here.
+ */
+ BUG();
+ }
+}
+
static int btrfs_decompress_bio(struct compressed_bio *cb);
static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
@@ -311,7 +412,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages,
- unsigned int write_flags)
+ unsigned int write_flags,
+ struct cgroup_subsys_state *blkcg_css)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct bio *bio = NULL;
@@ -320,7 +422,6 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
int pg_index = 0;
struct page *page;
u64 first_byte = disk_start;
- struct block_device *bdev;
blk_status_t ret;
int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -339,13 +440,15 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
cb->orig_bio = NULL;
cb->nr_pages = nr_pages;
- bdev = fs_info->fs_devices->latest_bdev;
-
bio = btrfs_bio_alloc(first_byte);
- bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
+
+ if (blkcg_css) {
+ bio->bi_opf |= REQ_CGROUP_PUNT;
+ bio_associate_blkg_from_css(bio, blkcg_css);
+ }
refcount_set(&cb->pending_bios, 1);
/* create and submit bios for the compressed pages */
@@ -378,14 +481,13 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
BUG_ON(ret); /* -ENOMEM */
}
- ret = btrfs_map_bio(fs_info, bio, 0, 1);
+ ret = btrfs_map_bio(fs_info, bio, 0);
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
}
bio = btrfs_bio_alloc(first_byte);
- bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
@@ -409,7 +511,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
BUG_ON(ret); /* -ENOMEM */
}
- ret = btrfs_map_bio(fs_info, bio, 0, 1);
+ ret = btrfs_map_bio(fs_info, bio, 0);
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
@@ -553,7 +655,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
unsigned long nr_pages;
unsigned long pg_index;
struct page *page;
- struct block_device *bdev;
struct bio *comp_bio;
u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
u64 em_len;
@@ -604,8 +705,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
if (!cb->compressed_pages)
goto fail1;
- bdev = fs_info->fs_devices->latest_bdev;
-
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS |
__GFP_HIGHMEM);
@@ -624,7 +723,6 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->len = bio->bi_iter.bi_size;
comp_bio = btrfs_bio_alloc(cur_disk_byte);
- bio_set_dev(comp_bio, bdev);
comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
@@ -668,14 +766,13 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
fs_info->sectorsize);
sums += csum_size * nr_sectors;
- ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
+ ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
if (ret) {
comp_bio->bi_status = ret;
bio_endio(comp_bio);
}
comp_bio = btrfs_bio_alloc(cur_disk_byte);
- bio_set_dev(comp_bio, bdev);
comp_bio->bi_opf = REQ_OP_READ;
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
@@ -693,7 +790,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
BUG_ON(ret); /* -ENOMEM */
}
- ret = btrfs_map_bio(fs_info, comp_bio, mirror_num, 0);
+ ret = btrfs_map_bio(fs_info, comp_bio, mirror_num);
if (ret) {
comp_bio->bi_status = ret;
bio_endio(comp_bio);
@@ -764,26 +861,6 @@ struct heuristic_ws {
static struct workspace_manager heuristic_wsm;
-static void heuristic_init_workspace_manager(void)
-{
- btrfs_init_workspace_manager(&heuristic_wsm, &btrfs_heuristic_compress);
-}
-
-static void heuristic_cleanup_workspace_manager(void)
-{
- btrfs_cleanup_workspace_manager(&heuristic_wsm);
-}
-
-static struct list_head *heuristic_get_workspace(unsigned int level)
-{
- return btrfs_get_workspace(&heuristic_wsm, level);
-}
-
-static void heuristic_put_workspace(struct list_head *ws)
-{
- btrfs_put_workspace(&heuristic_wsm, ws);
-}
-
static void free_heuristic_ws(struct list_head *ws)
{
struct heuristic_ws *workspace;
@@ -824,12 +901,7 @@ fail:
}
const struct btrfs_compress_op btrfs_heuristic_compress = {
- .init_workspace_manager = heuristic_init_workspace_manager,
- .cleanup_workspace_manager = heuristic_cleanup_workspace_manager,
- .get_workspace = heuristic_get_workspace,
- .put_workspace = heuristic_put_workspace,
- .alloc_workspace = alloc_heuristic_ws,
- .free_workspace = free_heuristic_ws,
+ .workspace_manager = &heuristic_wsm,
};
static const struct btrfs_compress_op * const btrfs_compress_op[] = {
@@ -840,13 +912,44 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
&btrfs_zstd_compress,
};
-void btrfs_init_workspace_manager(struct workspace_manager *wsm,
- const struct btrfs_compress_op *ops)
+static struct list_head *alloc_workspace(int type, unsigned int level)
{
- struct list_head *workspace;
+ switch (type) {
+ case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(level);
+ case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level);
+ case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(level);
+ case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level);
+ default:
+ /*
+ * This can't happen, the type is validated several times
+ * before we get here.
+ */
+ BUG();
+ }
+}
- wsm->ops = ops;
+static void free_workspace(int type, struct list_head *ws)
+{
+ switch (type) {
+ case BTRFS_COMPRESS_NONE: return free_heuristic_ws(ws);
+ case BTRFS_COMPRESS_ZLIB: return zlib_free_workspace(ws);
+ case BTRFS_COMPRESS_LZO: return lzo_free_workspace(ws);
+ case BTRFS_COMPRESS_ZSTD: return zstd_free_workspace(ws);
+ default:
+ /*
+ * This can't happen, the type is validated several times
+ * before we get here.
+ */
+ BUG();
+ }
+}
+
+static void btrfs_init_workspace_manager(int type)
+{
+ struct workspace_manager *wsm;
+ struct list_head *workspace;
+ wsm = btrfs_compress_op[type]->workspace_manager;
INIT_LIST_HEAD(&wsm->idle_ws);
spin_lock_init(&wsm->ws_lock);
atomic_set(&wsm->total_ws, 0);
@@ -856,7 +959,7 @@ void btrfs_init_workspace_manager(struct workspace_manager *wsm,
* Preallocate one workspace for each compression type so we can
* guarantee forward progress in the worst case
*/
- workspace = wsm->ops->alloc_workspace(0);
+ workspace = alloc_workspace(type, 0);
if (IS_ERR(workspace)) {
pr_warn(
"BTRFS: cannot preallocate compression workspace, will try later\n");
@@ -867,14 +970,16 @@ void btrfs_init_workspace_manager(struct workspace_manager *wsm,
}
}
-void btrfs_cleanup_workspace_manager(struct workspace_manager *wsman)
+static void btrfs_cleanup_workspace_manager(int type)
{
+ struct workspace_manager *wsman;
struct list_head *ws;
+ wsman = btrfs_compress_op[type]->workspace_manager;
while (!list_empty(&wsman->idle_ws)) {
ws = wsman->idle_ws.next;
list_del(ws);
- wsman->ops->free_workspace(ws);
+ free_workspace(type, ws);
atomic_dec(&wsman->total_ws);
}
}
@@ -885,9 +990,9 @@ void btrfs_cleanup_workspace_manager(struct workspace_manager *wsman)
* Preallocation makes a forward progress guarantees and we do not return
* errors.
*/
-struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
- unsigned int level)
+struct list_head *btrfs_get_workspace(int type, unsigned int level)
{
+ struct workspace_manager *wsm;
struct list_head *workspace;
int cpus = num_online_cpus();
unsigned nofs_flag;
@@ -897,6 +1002,7 @@ struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
wait_queue_head_t *ws_wait;
int *free_ws;
+ wsm = btrfs_compress_op[type]->workspace_manager;
idle_ws = &wsm->idle_ws;
ws_lock = &wsm->ws_lock;
total_ws = &wsm->total_ws;
@@ -932,7 +1038,7 @@ again:
* context of btrfs_compress_bio/btrfs_compress_pages
*/
nofs_flag = memalloc_nofs_save();
- workspace = wsm->ops->alloc_workspace(level);
+ workspace = alloc_workspace(type, level);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(workspace)) {
@@ -965,21 +1071,34 @@ again:
static struct list_head *get_workspace(int type, int level)
{
- return btrfs_compress_op[type]->get_workspace(level);
+ switch (type) {
+ case BTRFS_COMPRESS_NONE: return btrfs_get_workspace(type, level);
+ case BTRFS_COMPRESS_ZLIB: return zlib_get_workspace(level);
+ case BTRFS_COMPRESS_LZO: return btrfs_get_workspace(type, level);
+ case BTRFS_COMPRESS_ZSTD: return zstd_get_workspace(level);
+ default:
+ /*
+ * This can't happen, the type is validated several times
+ * before we get here.
+ */
+ BUG();
+ }
}
/*
* put a workspace struct back on the list or free it if we have enough
* idle ones sitting around
*/
-void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws)
+void btrfs_put_workspace(int type, struct list_head *ws)
{
+ struct workspace_manager *wsm;
struct list_head *idle_ws;
spinlock_t *ws_lock;
atomic_t *total_ws;
wait_queue_head_t *ws_wait;
int *free_ws;
+ wsm = btrfs_compress_op[type]->workspace_manager;
idle_ws = &wsm->idle_ws;
ws_lock = &wsm->ws_lock;
total_ws = &wsm->total_ws;
@@ -995,7 +1114,7 @@ void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws)
}
spin_unlock(ws_lock);
- wsm->ops->free_workspace(ws);
+ free_workspace(type, ws);
atomic_dec(total_ws);
wake:
cond_wake_up(ws_wait);
@@ -1003,7 +1122,18 @@ wake:
static void put_workspace(int type, struct list_head *ws)
{
- return btrfs_compress_op[type]->put_workspace(ws);
+ switch (type) {
+ case BTRFS_COMPRESS_NONE: return btrfs_put_workspace(type, ws);
+ case BTRFS_COMPRESS_ZLIB: return btrfs_put_workspace(type, ws);
+ case BTRFS_COMPRESS_LZO: return btrfs_put_workspace(type, ws);
+ case BTRFS_COMPRESS_ZSTD: return zstd_put_workspace(ws);
+ default:
+ /*
+ * This can't happen, the type is validated several times
+ * before we get here.
+ */
+ BUG();
+ }
}
/*
@@ -1042,10 +1172,8 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
level = btrfs_compress_set_level(type, level);
workspace = get_workspace(type, level);
- ret = btrfs_compress_op[type]->compress_pages(workspace, mapping,
- start, pages,
- out_pages,
- total_in, total_out);
+ ret = compression_compress_pages(type, workspace, mapping, start, pages,
+ out_pages, total_in, total_out);
put_workspace(type, workspace);
return ret;
}
@@ -1071,7 +1199,7 @@ static int btrfs_decompress_bio(struct compressed_bio *cb)
int type = cb->compress_type;
workspace = get_workspace(type, 0);
- ret = btrfs_compress_op[type]->decompress_bio(workspace, cb);
+ ret = compression_decompress_bio(type, workspace, cb);
put_workspace(type, workspace);
return ret;
@@ -1089,9 +1217,8 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
int ret;
workspace = get_workspace(type, 0);
- ret = btrfs_compress_op[type]->decompress(workspace, data_in,
- dest_page, start_byte,
- srclen, destlen);
+ ret = compression_decompress(type, workspace, data_in, dest_page,
+ start_byte, srclen, destlen);
put_workspace(type, workspace);
return ret;
@@ -1099,18 +1226,18 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
void __init btrfs_init_compress(void)
{
- int i;
-
- for (i = 0; i < BTRFS_NR_WORKSPACE_MANAGERS; i++)
- btrfs_compress_op[i]->init_workspace_manager();
+ btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE);
+ btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB);
+ btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO);
+ zstd_init_workspace_manager();
}
void __cold btrfs_exit_compress(void)
{
- int i;
-
- for (i = 0; i < BTRFS_NR_WORKSPACE_MANAGERS; i++)
- btrfs_compress_op[i]->cleanup_workspace_manager();
+ btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE);
+ btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB);
+ btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO);
+ zstd_cleanup_workspace_manager();
}
/*
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 4cb8be9ff88b..d253f7aa8ed5 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -93,7 +93,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
unsigned long compressed_len,
struct page **compressed_pages,
unsigned long nr_pages,
- unsigned int write_flags);
+ unsigned int write_flags,
+ struct cgroup_subsys_state *blkcg_css);
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags);
@@ -104,11 +105,10 @@ enum btrfs_compression_type {
BTRFS_COMPRESS_ZLIB = 1,
BTRFS_COMPRESS_LZO = 2,
BTRFS_COMPRESS_ZSTD = 3,
- BTRFS_COMPRESS_TYPES = 3,
+ BTRFS_NR_COMPRESS_TYPES = 4,
};
struct workspace_manager {
- const struct btrfs_compress_op *ops;
struct list_head idle_ws;
spinlock_t ws_lock;
/* Number of free workspaces */
@@ -119,50 +119,18 @@ struct workspace_manager {
wait_queue_head_t ws_wait;
};
-void btrfs_init_workspace_manager(struct workspace_manager *wsm,
- const struct btrfs_compress_op *ops);
-struct list_head *btrfs_get_workspace(struct workspace_manager *wsm,
- unsigned int level);
-void btrfs_put_workspace(struct workspace_manager *wsm, struct list_head *ws);
-void btrfs_cleanup_workspace_manager(struct workspace_manager *wsm);
+struct list_head *btrfs_get_workspace(int type, unsigned int level);
+void btrfs_put_workspace(int type, struct list_head *ws);
struct btrfs_compress_op {
- void (*init_workspace_manager)(void);
-
- void (*cleanup_workspace_manager)(void);
-
- struct list_head *(*get_workspace)(unsigned int level);
-
- void (*put_workspace)(struct list_head *ws);
-
- struct list_head *(*alloc_workspace)(unsigned int level);
-
- void (*free_workspace)(struct list_head *workspace);
-
- int (*compress_pages)(struct list_head *workspace,
- struct address_space *mapping,
- u64 start,
- struct page **pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out);
-
- int (*decompress_bio)(struct list_head *workspace,
- struct compressed_bio *cb);
-
- int (*decompress)(struct list_head *workspace,
- unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen);
-
+ struct workspace_manager *workspace_manager;
/* Maximum level supported by the compression algorithm */
unsigned int max_level;
unsigned int default_level;
};
/* The heuristic workspaces are managed via the 0th workspace manager */
-#define BTRFS_NR_WORKSPACE_MANAGERS (BTRFS_COMPRESS_TYPES + 1)
+#define BTRFS_NR_WORKSPACE_MANAGERS BTRFS_NR_COMPRESS_TYPES
extern const struct btrfs_compress_op btrfs_heuristic_compress;
extern const struct btrfs_compress_op btrfs_zlib_compress;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e59cde204b2f..5b6e86aaf2e1 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -32,8 +32,13 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
static const struct btrfs_csums {
u16 size;
const char *name;
+ const char *driver;
} btrfs_csums[] = {
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
+ [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
+ [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
+ [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
+ .driver = "blake2b-256" },
};
int btrfs_super_csum_size(const struct btrfs_super_block *s)
@@ -51,34 +56,25 @@ const char *btrfs_super_csum_name(u16 csum_type)
return btrfs_csums[csum_type].name;
}
-struct btrfs_path *btrfs_alloc_path(void)
+/*
+ * Return driver name if defined, otherwise the name that's also a valid driver
+ * name
+ */
+const char *btrfs_super_csum_driver(u16 csum_type)
{
- return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
+ /* csum type is validated at mount time */
+ return btrfs_csums[csum_type].driver ?:
+ btrfs_csums[csum_type].name;
}
-/*
- * set all locked nodes in the path to blocking locks. This should
- * be done before scheduling
- */
-noinline void btrfs_set_path_blocking(struct btrfs_path *p)
+size_t __const btrfs_get_num_csums(void)
{
- int i;
- for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
- if (!p->nodes[i] || !p->locks[i])
- continue;
- /*
- * If we currently have a spinning reader or writer lock this
- * will bump the count of blocking holders and drop the
- * spinlock.
- */
- if (p->locks[i] == BTRFS_READ_LOCK) {
- btrfs_set_lock_blocking_read(p->nodes[i]);
- p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
- } else if (p->locks[i] == BTRFS_WRITE_LOCK) {
- btrfs_set_lock_blocking_write(p->nodes[i]);
- p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
- }
- }
+ return ARRAY_SIZE(btrfs_csums);
+}
+
+struct btrfs_path *btrfs_alloc_path(void)
+{
+ return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
}
/* this also releases the path */
@@ -1125,7 +1121,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
parent_start = buf->start;
- extent_buffer_get(cow);
+ atomic_inc(&cow->refs);
ret = tree_mod_log_insert_root(root->node, cow, 1);
BUG_ON(ret < 0);
rcu_assign_pointer(root->node, cow);
@@ -1563,7 +1559,7 @@ static int comp_keys(const struct btrfs_disk_key *disk,
/*
* same as comp_keys only with two btrfs_key's
*/
-int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
+int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
{
if (k1->objectid > k2->objectid)
return 1;
@@ -2036,7 +2032,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
/* update the path */
if (left) {
if (btrfs_header_nritems(left) > orig_slot) {
- extent_buffer_get(left);
+ atomic_inc(&left->refs);
/* left was locked after cow */
path->nodes[level] = left;
path->slots[level + 1] -= 1;
@@ -2379,32 +2375,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
}
/*
- * This releases any locks held in the path starting at level and
- * going all the way up to the root.
- *
- * btrfs_search_slot will keep the lock held on higher nodes in a few
- * corner cases, such as COW of the block at slot zero in the node. This
- * ignores those rules, and it should only be called when there are no
- * more updates to be done higher up in the tree.
- */
-noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
-{
- int i;
-
- if (path->keep_locks)
- return;
-
- for (i = level; i < BTRFS_MAX_LEVEL; i++) {
- if (!path->nodes[i])
- continue;
- if (!path->locks[i])
- continue;
- btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
- path->locks[i] = 0;
- }
-}
-
-/*
* helper function for btrfs_search_slot. The goal is to find a block
* in cache without setting the path to blocking. If we find the block
* we return zero and the path is unchanged.
@@ -2652,7 +2622,7 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
} else {
b = root->commit_root;
- extent_buffer_get(b);
+ atomic_inc(&b->refs);
}
level = btrfs_header_level(b);
/*
@@ -2785,12 +2755,10 @@ again:
}
while (b) {
+ int dec = 0;
+
level = btrfs_header_level(b);
- /*
- * setup the path here so we can release it under lock
- * contention with the cow code
- */
if (cow) {
bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
@@ -2861,73 +2829,7 @@ cow_done:
if (ret < 0)
goto done;
- if (level != 0) {
- int dec = 0;
- if (ret && slot > 0) {
- dec = 1;
- slot -= 1;
- }
- p->slots[level] = slot;
- err = setup_nodes_for_search(trans, root, p, b, level,
- ins_len, &write_lock_level);
- if (err == -EAGAIN)
- goto again;
- if (err) {
- ret = err;
- goto done;
- }
- b = p->nodes[level];
- slot = p->slots[level];
-
- /*
- * slot 0 is special, if we change the key
- * we have to update the parent pointer
- * which means we must have a write lock
- * on the parent
- */
- if (slot == 0 && ins_len &&
- write_lock_level < level + 1) {
- write_lock_level = level + 1;
- btrfs_release_path(p);
- goto again;
- }
-
- unlock_up(p, level, lowest_unlock,
- min_write_lock_level, &write_lock_level);
-
- if (level == lowest_level) {
- if (dec)
- p->slots[level]++;
- goto done;
- }
-
- err = read_block_for_search(root, p, &b, level,
- slot, key);
- if (err == -EAGAIN)
- goto again;
- if (err) {
- ret = err;
- goto done;
- }
-
- if (!p->skip_locking) {
- level = btrfs_header_level(b);
- if (level <= write_lock_level) {
- if (!btrfs_try_tree_write_lock(b)) {
- btrfs_set_path_blocking(p);
- btrfs_tree_lock(b);
- }
- p->locks[level] = BTRFS_WRITE_LOCK;
- } else {
- if (!btrfs_tree_read_lock_atomic(b)) {
- btrfs_set_path_blocking(p);
- btrfs_tree_read_lock(b);
- }
- p->locks[level] = BTRFS_READ_LOCK;
- }
- p->nodes[level] = b;
- }
- } else {
+ if (level == 0) {
p->slots[level] = slot;
if (ins_len > 0 &&
btrfs_leaf_free_space(b) < ins_len) {
@@ -2952,6 +2854,67 @@ cow_done:
min_write_lock_level, NULL);
goto done;
}
+ if (ret && slot > 0) {
+ dec = 1;
+ slot--;
+ }
+ p->slots[level] = slot;
+ err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
+ &write_lock_level);
+ if (err == -EAGAIN)
+ goto again;
+ if (err) {
+ ret = err;
+ goto done;
+ }
+ b = p->nodes[level];
+ slot = p->slots[level];
+
+ /*
+ * Slot 0 is special, if we change the key we have to update
+ * the parent pointer which means we must have a write lock on
+ * the parent
+ */
+ if (slot == 0 && ins_len && write_lock_level < level + 1) {
+ write_lock_level = level + 1;
+ btrfs_release_path(p);
+ goto again;
+ }
+
+ unlock_up(p, level, lowest_unlock, min_write_lock_level,
+ &write_lock_level);
+
+ if (level == lowest_level) {
+ if (dec)
+ p->slots[level]++;
+ goto done;
+ }
+
+ err = read_block_for_search(root, p, &b, level, slot, key);
+ if (err == -EAGAIN)
+ goto again;
+ if (err) {
+ ret = err;
+ goto done;
+ }
+
+ if (!p->skip_locking) {
+ level = btrfs_header_level(b);
+ if (level <= write_lock_level) {
+ if (!btrfs_try_tree_write_lock(b)) {
+ btrfs_set_path_blocking(p);
+ btrfs_tree_lock(b);
+ }
+ p->locks[level] = BTRFS_WRITE_LOCK;
+ } else {
+ if (!btrfs_tree_read_lock_atomic(b)) {
+ btrfs_set_path_blocking(p);
+ btrfs_tree_read_lock(b);
+ }
+ p->locks[level] = BTRFS_READ_LOCK;
+ }
+ p->nodes[level] = b;
+ }
}
ret = 1;
done:
@@ -3008,6 +2971,8 @@ again:
p->locks[level] = BTRFS_READ_LOCK;
while (b) {
+ int dec = 0;
+
level = btrfs_header_level(b);
p->nodes[level] = b;
@@ -3028,47 +2993,45 @@ again:
if (ret < 0)
goto done;
- if (level != 0) {
- int dec = 0;
- if (ret && slot > 0) {
- dec = 1;
- slot -= 1;
- }
+ if (level == 0) {
p->slots[level] = slot;
unlock_up(p, level, lowest_unlock, 0, NULL);
+ goto done;
+ }
- if (level == lowest_level) {
- if (dec)
- p->slots[level]++;
- goto done;
- }
+ if (ret && slot > 0) {
+ dec = 1;
+ slot--;
+ }
+ p->slots[level] = slot;
+ unlock_up(p, level, lowest_unlock, 0, NULL);
- err = read_block_for_search(root, p, &b, level,
- slot, key);
- if (err == -EAGAIN)
- goto again;
- if (err) {
- ret = err;
- goto done;
- }
+ if (level == lowest_level) {
+ if (dec)
+ p->slots[level]++;
+ goto done;
+ }
- level = btrfs_header_level(b);
- if (!btrfs_tree_read_lock_atomic(b)) {
- btrfs_set_path_blocking(p);
- btrfs_tree_read_lock(b);
- }
- b = tree_mod_log_rewind(fs_info, p, b, time_seq);
- if (!b) {
- ret = -ENOMEM;
- goto done;
- }
- p->locks[level] = BTRFS_READ_LOCK;
- p->nodes[level] = b;
- } else {
- p->slots[level] = slot;
- unlock_up(p, level, lowest_unlock, 0, NULL);
+ err = read_block_for_search(root, p, &b, level, slot, key);
+ if (err == -EAGAIN)
+ goto again;
+ if (err) {
+ ret = err;
+ goto done;
+ }
+
+ level = btrfs_header_level(b);
+ if (!btrfs_tree_read_lock_atomic(b)) {
+ btrfs_set_path_blocking(p);
+ btrfs_tree_read_lock(b);
+ }
+ b = tree_mod_log_rewind(fs_info, p, b, time_seq);
+ if (!b) {
+ ret = -ENOMEM;
goto done;
}
+ p->locks[level] = BTRFS_READ_LOCK;
+ p->nodes[level] = b;
}
ret = 1;
done:
@@ -3433,7 +3396,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
free_extent_buffer(old);
add_root_to_dirty_list(root);
- extent_buffer_get(c);
+ atomic_inc(&c->refs);
path->nodes[level] = c;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
path->slots[level] = 0;
@@ -4966,7 +4929,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
root_sub_used(root, leaf->len);
- extent_buffer_get(leaf);
+ atomic_inc(&leaf->refs);
btrfs_free_tree_block(trans, root, leaf, 0, 1);
free_extent_buffer_stale(leaf);
}
@@ -5047,7 +5010,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* for possible call to del_ptr below
*/
slot = path->slots[1];
- extent_buffer_get(leaf);
+ atomic_inc(&leaf->refs);
btrfs_set_path_blocking(path);
wret = push_leaf_left(trans, root, path, 1, 1,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 19d669d12ca1..b2e8fd8a8e59 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -28,6 +28,7 @@
#include <linux/dynamic_debug.h>
#include <linux/refcount.h>
#include <linux/crc32c.h>
+#include "extent-io-tree.h"
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
@@ -38,7 +39,7 @@ struct btrfs_transaction;
struct btrfs_pending_snapshot;
struct btrfs_delayed_ref_root;
struct btrfs_space_info;
-struct btrfs_block_group_cache;
+struct btrfs_block_group;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
@@ -56,9 +57,9 @@ struct btrfs_ref;
* filesystem data as well that can be used to read data in order to repair
* read errors on other disks.
*
- * Current value is derived from RAID1 with 2 copies.
+ * Current value is derived from RAID1C4 with 4 copies.
*/
-#define BTRFS_MAX_MIRRORS (2 + 1)
+#define BTRFS_MAX_MIRRORS (4 + 1)
#define BTRFS_MAX_LEVEL 8
@@ -291,7 +292,8 @@ struct btrfs_super_block {
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
- BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
+ BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
+ BTRFS_FEATURE_INCOMPAT_RAID1C34)
#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
@@ -413,7 +415,7 @@ struct btrfs_free_cluster {
/* We did a full search and couldn't create a cluster */
bool fragmented;
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
/*
* when a cluster is allocated from a block group, we put the
* cluster onto a list in the block group so that it can
@@ -476,8 +478,8 @@ struct btrfs_swapfile_pin {
void *ptr;
struct inode *inode;
/*
- * If true, ptr points to a struct btrfs_block_group_cache. Otherwise,
- * ptr points to a struct btrfs_device.
+ * If true, ptr points to a struct btrfs_block_group. Otherwise, ptr
+ * points to a struct btrfs_device.
*/
bool is_block_group;
};
@@ -722,7 +724,6 @@ struct btrfs_fs_info {
struct btrfs_workqueue *endio_meta_write_workers;
struct btrfs_workqueue *endio_write_workers;
struct btrfs_workqueue *endio_freespace_worker;
- struct btrfs_workqueue *submit_workers;
struct btrfs_workqueue *caching_workers;
struct btrfs_workqueue *readahead_workers;
@@ -734,8 +735,6 @@ struct btrfs_fs_info {
struct btrfs_workqueue *fixup_workers;
struct btrfs_workqueue *delayed_workers;
- /* the extent workers do delayed refs on the extent allocation tree */
- struct btrfs_workqueue *extent_workers;
struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread;
u32 thread_pool_size;
@@ -1521,18 +1520,18 @@ static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
}
/* struct btrfs_block_group_item */
-BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
+BTRFS_SETGET_STACK_FUNCS(stack_block_group_used, struct btrfs_block_group_item,
used, 64);
-BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
+BTRFS_SETGET_FUNCS(block_group_used, struct btrfs_block_group_item,
used, 64);
-BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
+BTRFS_SETGET_STACK_FUNCS(stack_block_group_chunk_objectid,
struct btrfs_block_group_item, chunk_objectid, 64);
-BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
+BTRFS_SETGET_FUNCS(block_group_chunk_objectid,
struct btrfs_block_group_item, chunk_objectid, 64);
-BTRFS_SETGET_FUNCS(disk_block_group_flags,
+BTRFS_SETGET_FUNCS(block_group_flags,
struct btrfs_block_group_item, flags, 64);
-BTRFS_SETGET_STACK_FUNCS(block_group_flags,
+BTRFS_SETGET_STACK_FUNCS(stack_block_group_flags,
struct btrfs_block_group_item, flags, 64);
/* struct btrfs_free_space_info */
@@ -2165,6 +2164,9 @@ BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
+const char *btrfs_super_csum_driver(u16 csum_type);
+size_t __const btrfs_get_num_csums(void);
+
/*
* The leaf data grows from end-to-front in the node.
@@ -2399,7 +2401,7 @@ static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info,
int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 num_bytes);
-void btrfs_free_excluded_extents(struct btrfs_block_group_cache *cache);
+void btrfs_free_excluded_extents(struct btrfs_block_group *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
unsigned long count);
void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
@@ -2455,8 +2457,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_ref *generic_ref);
int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr);
-void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
-void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
+void btrfs_get_block_group_trimming(struct btrfs_block_group *cache);
+void btrfs_put_block_group_trimming(struct btrfs_block_group *cache);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
enum btrfs_reserve_flush_enum {
@@ -2489,8 +2491,7 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
int nitems, bool use_global_rsv);
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
- bool qgroup_free);
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
@@ -2510,7 +2511,7 @@ void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int level, int *slot);
-int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
+int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
@@ -2570,8 +2571,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
void btrfs_release_path(struct btrfs_path *p);
struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
-void btrfs_set_path_blocking(struct btrfs_path *p);
-void btrfs_unlock_up_safe(struct btrfs_path *p, int level);
int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int slot, int nr);
@@ -2873,10 +2872,9 @@ int btrfs_drop_inode(struct inode *inode);
int __init btrfs_init_cachep(void);
void __cold btrfs_destroy_cachep(void);
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
- struct btrfs_root *root, int *new,
- struct btrfs_path *path);
+ struct btrfs_root *root, struct btrfs_path *path);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
- struct btrfs_root *root, int *was_new);
+ struct btrfs_root *root);
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset,
u64 start, u64 end, int create);
@@ -2912,7 +2910,7 @@ long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
int btrfs_ioctl_get_supported_features(void __user *arg);
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
-int btrfs_is_empty_uuid(u8 *uuid);
+int __pure btrfs_is_empty_uuid(u8 *uuid);
int btrfs_defrag_file(struct inode *inode, struct file *file,
struct btrfs_ioctl_defrag_range_args *range,
u64 newer_than, unsigned long max_pages);
@@ -3146,7 +3144,7 @@ __cold
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
-const char *btrfs_decode_error(int errno);
+const char * __attribute_const__ btrfs_decode_error(int errno);
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
index d949d7d2abed..4cdac4d834f5 100644
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@@ -307,7 +307,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
unsigned nr_extents;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
int ret = 0;
- bool delalloc_lock = true;
/*
* If we are a free space inode we need to not flush since we will be in
@@ -320,7 +319,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
*/
if (btrfs_is_free_space_inode(inode)) {
flush = BTRFS_RESERVE_NO_FLUSH;
- delalloc_lock = false;
} else {
if (current->journal_info)
flush = BTRFS_RESERVE_FLUSH_LIMIT;
@@ -329,9 +327,6 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
schedule_timeout(1);
}
- if (delalloc_lock)
- mutex_lock(&inode->delalloc_mutex);
-
num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
/*
@@ -348,10 +343,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
&qgroup_reserve);
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
if (ret)
- goto out_fail;
+ return ret;
ret = btrfs_reserve_metadata_bytes(root, block_rsv, meta_reserve, flush);
- if (ret)
- goto out_qgroup;
+ if (ret) {
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
+ return ret;
+ }
/*
* Now we need to update our outstanding extents and csum bytes _first_
@@ -375,16 +372,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
block_rsv->qgroup_rsv_reserved += qgroup_reserve;
spin_unlock(&block_rsv->lock);
- if (delalloc_lock)
- mutex_unlock(&inode->delalloc_mutex);
return 0;
-out_qgroup:
- btrfs_qgroup_free_meta_prealloc(root, qgroup_reserve);
-out_fail:
- btrfs_inode_rsv_release(inode, true);
- if (delalloc_lock)
- mutex_unlock(&inode->delalloc_mutex);
- return ret;
}
/**
@@ -418,7 +406,6 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
* btrfs_delalloc_release_extents - release our outstanding_extents
* @inode: the inode to balance the reservation for.
* @num_bytes: the number of bytes we originally reserved with
- * @qgroup_free: do we need to free qgroup meta reservation or convert them.
*
* When we reserve space we increase outstanding_extents for the extents we may
* add. Once we've set the range as delalloc or created our ordered extents we
@@ -426,8 +413,7 @@ void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes,
* temporarily tracked outstanding_extents. This _must_ be used in conjunction
* with btrfs_delalloc_reserve_metadata.
*/
-void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
- bool qgroup_free)
+void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned num_extents;
@@ -441,7 +427,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes,
if (btrfs_is_testing(fs_info))
return;
- btrfs_inode_rsv_release(inode, qgroup_free);
+ btrfs_inode_rsv_release(inode, true);
}
/**
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 1f7f39b10bd0..d3e15e1d4a91 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -12,6 +12,7 @@
#include "transaction.h"
#include "ctree.h"
#include "qgroup.h"
+#include "locking.h"
#define BTRFS_DELAYED_WRITEBACK 512
#define BTRFS_DELAYED_BACKGROUND 128
@@ -1367,8 +1368,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
return -ENOMEM;
async_work->delayed_root = delayed_root;
- btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
- btrfs_async_run_delayed_root, NULL, NULL);
+ btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, NULL,
+ NULL);
async_work->nr = nr;
btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
@@ -1949,12 +1950,19 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
}
inode_id = delayed_nodes[n - 1]->inode_id + 1;
-
- for (i = 0; i < n; i++)
- refcount_inc(&delayed_nodes[i]->refs);
+ for (i = 0; i < n; i++) {
+ /*
+ * Don't increase refs in case the node is dead and
+ * about to be removed from the tree in the loop below
+ */
+ if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
+ delayed_nodes[i] = NULL;
+ }
spin_unlock(&root->inode_lock);
for (i = 0; i < n; i++) {
+ if (!delayed_nodes[i])
+ continue;
__btrfs_kill_delayed_node(delayed_nodes[i]);
btrfs_release_delayed_node(delayed_nodes[i]);
}
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 48890826b5e6..f639dde2a679 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -986,7 +986,7 @@ static int btrfs_dev_replace_kthread(void *data)
return 0;
}
-int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
+int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
{
if (!dev_replace->is_valid)
return 0;
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 78c5d8f1adda..60b70dacc299 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -17,6 +17,6 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info);
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
-int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
+int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 044981cf6df9..e0edfdc9c82b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -205,7 +205,6 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len,
int create)
{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
int ret;
@@ -213,7 +212,6 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em) {
- em->bdev = fs_info->fs_devices->latest_bdev;
read_unlock(&em_tree->lock);
goto out;
}
@@ -228,7 +226,6 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
em->len = (u64)-1;
em->block_len = (u64)-1;
em->block_start = 0;
- em->bdev = fs_info->fs_devices->latest_bdev;
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
@@ -352,6 +349,9 @@ static bool btrfs_supported_super_csum(u16 csum_type)
{
switch (csum_type) {
case BTRFS_CSUM_TYPE_CRC32:
+ case BTRFS_CSUM_TYPE_XXHASH:
+ case BTRFS_CSUM_TYPE_SHA256:
+ case BTRFS_CSUM_TYPE_BLAKE2:
return true;
default:
return false;
@@ -545,9 +545,11 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
ret = btrfs_check_leaf_full(eb);
if (ret < 0) {
+ btrfs_print_tree(eb, 0);
btrfs_err(fs_info,
"block=%llu write time tree block corruption detected",
eb->start);
+ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
return ret;
}
write_extent_buffer(eb, result, 0, csum_size);
@@ -608,7 +610,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
/* the pending IO might have been the only thing that kept this buffer
* in memory. Make sure we have a ref for all this other checks
*/
- extent_buffer_get(eb);
+ atomic_inc(&eb->refs);
reads_done = atomic_dec_and_test(&eb->io_pages);
if (!reads_done)
@@ -706,43 +708,31 @@ static void end_workqueue_bio(struct bio *bio)
struct btrfs_end_io_wq *end_io_wq = bio->bi_private;
struct btrfs_fs_info *fs_info;
struct btrfs_workqueue *wq;
- btrfs_work_func_t func;
fs_info = end_io_wq->info;
end_io_wq->status = bio->bi_status;
if (bio_op(bio) == REQ_OP_WRITE) {
- if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
+ if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
wq = fs_info->endio_meta_write_workers;
- func = btrfs_endio_meta_write_helper;
- } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
+ else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
wq = fs_info->endio_freespace_worker;
- func = btrfs_freespace_write_helper;
- } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
+ else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
wq = fs_info->endio_raid56_workers;
- func = btrfs_endio_raid56_helper;
- } else {
+ else
wq = fs_info->endio_write_workers;
- func = btrfs_endio_write_helper;
- }
} else {
- if (unlikely(end_io_wq->metadata ==
- BTRFS_WQ_ENDIO_DIO_REPAIR)) {
+ if (unlikely(end_io_wq->metadata == BTRFS_WQ_ENDIO_DIO_REPAIR))
wq = fs_info->endio_repair_workers;
- func = btrfs_endio_repair_helper;
- } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
+ else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
wq = fs_info->endio_raid56_workers;
- func = btrfs_endio_raid56_helper;
- } else if (end_io_wq->metadata) {
+ else if (end_io_wq->metadata)
wq = fs_info->endio_meta_workers;
- func = btrfs_endio_meta_helper;
- } else {
+ else
wq = fs_info->endio_workers;
- func = btrfs_endio_helper;
- }
}
- btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
+ btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
btrfs_queue_work(wq, &end_io_wq->work);
}
@@ -803,8 +793,13 @@ static void run_one_async_done(struct btrfs_work *work)
return;
}
- ret = btrfs_map_bio(btrfs_sb(inode->i_sb), async->bio,
- async->mirror_num, 1);
+ /*
+ * All of the bios that pass through here are from async helpers.
+ * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context.
+ * This changes nothing when cgroups aren't in use.
+ */
+ async->bio->bi_opf |= REQ_CGROUP_PUNT;
+ ret = btrfs_map_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num);
if (ret) {
async->bio->bi_status = ret;
bio_endio(async->bio);
@@ -835,8 +830,8 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
async->mirror_num = mirror_num;
async->submit_bio_start = submit_bio_start;
- btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
- run_one_async_done, run_one_async_free);
+ btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
+ run_one_async_free);
async->bio_offset = bio_offset;
@@ -904,12 +899,12 @@ static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio,
BTRFS_WQ_ENDIO_METADATA);
if (ret)
goto out_w_error;
- ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
+ ret = btrfs_map_bio(fs_info, bio, mirror_num);
} else if (!async) {
ret = btree_csum_one_bio(bio);
if (ret)
goto out_w_error;
- ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
+ ret = btrfs_map_bio(fs_info, bio, mirror_num);
} else {
/*
* kthread helpers are used to submit writes so that
@@ -1657,8 +1652,8 @@ static void end_workqueue_fn(struct btrfs_work *work)
bio->bi_status = end_io_wq->status;
bio->bi_private = end_io_wq->private;
bio->bi_end_io = end_io_wq->end_io;
- kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
bio_endio(bio);
+ kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq);
}
static int cleaner_kthread(void *arg)
@@ -1753,7 +1748,7 @@ static int transaction_kthread(void *arg)
}
now = ktime_get_seconds();
- if (cur->state < TRANS_STATE_BLOCKED &&
+ if (cur->state < TRANS_STATE_COMMIT_START &&
!test_bit(BTRFS_FS_NEED_ASYNC_COMMIT, &fs_info->flags) &&
(now < cur->start_time ||
now - cur->start_time < fs_info->commit_interval)) {
@@ -1792,18 +1787,18 @@ sleep:
}
/*
- * this will find the highest generation in the array of
- * root backups. The index of the highest array is returned,
- * or -1 if we can't find anything.
+ * This will find the highest generation in the array of root backups. The
+ * index of the highest array is returned, or -EINVAL if we can't find
+ * anything.
*
* We check to make sure the array is valid by comparing the
* generation of the latest root in the array with the generation
* in the super block. If they don't match we pitch it.
*/
-static int find_newest_super_backup(struct btrfs_fs_info *info, u64 newest_gen)
+static int find_newest_super_backup(struct btrfs_fs_info *info)
{
+ const u64 newest_gen = btrfs_super_generation(info->super_copy);
u64 cur;
- int newest_index = -1;
struct btrfs_root_backup *root_backup;
int i;
@@ -1811,37 +1806,10 @@ static int find_newest_super_backup(struct btrfs_fs_info *info, u64 newest_gen)
root_backup = info->super_copy->super_roots + i;
cur = btrfs_backup_tree_root_gen(root_backup);
if (cur == newest_gen)
- newest_index = i;
+ return i;
}
- /* check to see if we actually wrapped around */
- if (newest_index == BTRFS_NUM_BACKUP_ROOTS - 1) {
- root_backup = info->super_copy->super_roots;
- cur = btrfs_backup_tree_root_gen(root_backup);
- if (cur == newest_gen)
- newest_index = 0;
- }
- return newest_index;
-}
-
-
-/*
- * find the oldest backup so we know where to store new entries
- * in the backup array. This will set the backup_root_index
- * field in the fs_info struct
- */
-static void find_oldest_super_backup(struct btrfs_fs_info *info,
- u64 newest_gen)
-{
- int newest_index = -1;
-
- newest_index = find_newest_super_backup(info, newest_gen);
- /* if there was garbage in there, just move along */
- if (newest_index == -1) {
- info->backup_root_index = 0;
- } else {
- info->backup_root_index = (newest_index + 1) % BTRFS_NUM_BACKUP_ROOTS;
- }
+ return -EINVAL;
}
/*
@@ -1851,22 +1819,8 @@ static void find_oldest_super_backup(struct btrfs_fs_info *info,
*/
static void backup_super_roots(struct btrfs_fs_info *info)
{
- int next_backup;
+ const int next_backup = info->backup_root_index;
struct btrfs_root_backup *root_backup;
- int last_backup;
-
- next_backup = info->backup_root_index;
- last_backup = (next_backup + BTRFS_NUM_BACKUP_ROOTS - 1) %
- BTRFS_NUM_BACKUP_ROOTS;
-
- /*
- * just overwrite the last backup if we're at the same generation
- * this happens only at umount
- */
- root_backup = info->super_for_commit->super_roots + last_backup;
- if (btrfs_backup_tree_root_gen(root_backup) ==
- btrfs_header_generation(info->tree_root->node))
- next_backup = last_backup;
root_backup = info->super_for_commit->super_roots + next_backup;
@@ -1939,40 +1893,31 @@ static void backup_super_roots(struct btrfs_fs_info *info)
}
/*
- * this copies info out of the root backup array and back into
- * the in-memory super block. It is meant to help iterate through
- * the array, so you send it the number of backups you've already
- * tried and the last backup index you used.
+ * read_backup_root - Reads a backup root based on the passed priority. Prio 0
+ * is the newest, prio 1/2/3 are 2nd newest/3rd newest/4th (oldest) backup roots
*
- * this returns -1 when it has tried all the backups
+ * fs_info - filesystem whose backup roots need to be read
+ * priority - priority of backup root required
+ *
+ * Returns backup root index on success and -EINVAL otherwise.
*/
-static noinline int next_root_backup(struct btrfs_fs_info *info,
- struct btrfs_super_block *super,
- int *num_backups_tried, int *backup_index)
+static int read_backup_root(struct btrfs_fs_info *fs_info, u8 priority)
{
+ int backup_index = find_newest_super_backup(fs_info);
+ struct btrfs_super_block *super = fs_info->super_copy;
struct btrfs_root_backup *root_backup;
- int newest = *backup_index;
-
- if (*num_backups_tried == 0) {
- u64 gen = btrfs_super_generation(super);
- newest = find_newest_super_backup(info, gen);
- if (newest == -1)
- return -1;
+ if (priority < BTRFS_NUM_BACKUP_ROOTS && backup_index >= 0) {
+ if (priority == 0)
+ return backup_index;
- *backup_index = newest;
- *num_backups_tried = 1;
- } else if (*num_backups_tried == BTRFS_NUM_BACKUP_ROOTS) {
- /* we've tried all the backups, all done */
- return -1;
+ backup_index = backup_index + BTRFS_NUM_BACKUP_ROOTS - priority;
+ backup_index %= BTRFS_NUM_BACKUP_ROOTS;
} else {
- /* jump to the next oldest backup */
- newest = (*backup_index + BTRFS_NUM_BACKUP_ROOTS - 1) %
- BTRFS_NUM_BACKUP_ROOTS;
- *backup_index = newest;
- *num_backups_tried += 1;
+ return -EINVAL;
}
- root_backup = super->super_roots + newest;
+
+ root_backup = super->super_roots + backup_index;
btrfs_set_super_generation(super,
btrfs_backup_tree_root_gen(root_backup));
@@ -1982,12 +1927,13 @@ static noinline int next_root_backup(struct btrfs_fs_info *info,
btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup));
/*
- * fixme: the total bytes and num_devices need to match or we should
+ * Fixme: the total bytes and num_devices need to match or we should
* need a fsck
*/
btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup));
btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup));
- return 0;
+
+ return backup_index;
}
/* helper to cleanup workers */
@@ -2002,13 +1948,11 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->rmw_workers);
btrfs_destroy_workqueue(fs_info->endio_write_workers);
btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
- btrfs_destroy_workqueue(fs_info->submit_workers);
btrfs_destroy_workqueue(fs_info->delayed_workers);
btrfs_destroy_workqueue(fs_info->caching_workers);
btrfs_destroy_workqueue(fs_info->readahead_workers);
btrfs_destroy_workqueue(fs_info->flush_workers);
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
- btrfs_destroy_workqueue(fs_info->extent_workers);
/*
* Now that all other work queues are destroyed, we can safely destroy
* the queues used for metadata I/O, since tasks from those other work
@@ -2029,7 +1973,7 @@ static void free_root_extent_buffers(struct btrfs_root *root)
}
/* helper to cleanup tree roots */
-static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
+static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
{
free_root_extent_buffers(info->tree_root);
@@ -2038,7 +1982,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_root_extent_buffers(info->csum_root);
free_root_extent_buffers(info->quota_root);
free_root_extent_buffers(info->uuid_root);
- if (chunk_root)
+ if (free_chunk_root)
free_root_extent_buffers(info->chunk_root);
free_root_extent_buffers(info->free_space_root);
}
@@ -2168,16 +2112,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
fs_info->caching_workers =
btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0);
- /*
- * a higher idle thresh on the submit workers makes it much more
- * likely that bios will be send down in a sane order to the
- * devices
- */
- fs_info->submit_workers =
- btrfs_alloc_workqueue(fs_info, "submit", flags,
- min_t(u64, fs_devices->num_devices,
- max_active), 64);
-
fs_info->fixup_workers =
btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
@@ -2214,13 +2148,9 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
max_active, 2);
fs_info->qgroup_rescan_workers =
btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
- fs_info->extent_workers =
- btrfs_alloc_workqueue(fs_info, "extent-refs", flags,
- min_t(u64, fs_devices->num_devices,
- max_active), 8);
if (!(fs_info->workers && fs_info->delalloc_workers &&
- fs_info->submit_workers && fs_info->flush_workers &&
+ fs_info->flush_workers &&
fs_info->endio_workers && fs_info->endio_meta_workers &&
fs_info->endio_meta_write_workers &&
fs_info->endio_repair_workers &&
@@ -2228,7 +2158,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->readahead_workers &&
fs_info->fixup_workers && fs_info->delayed_workers &&
- fs_info->extent_workers &&
fs_info->qgroup_rescan_workers)) {
return -ENOMEM;
}
@@ -2239,13 +2168,13 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
{
struct crypto_shash *csum_shash;
- const char *csum_name = btrfs_super_csum_name(csum_type);
+ const char *csum_driver = btrfs_super_csum_driver(csum_type);
- csum_shash = crypto_alloc_shash(csum_name, 0, 0);
+ csum_shash = crypto_alloc_shash(csum_driver, 0, 0);
if (IS_ERR(csum_shash)) {
btrfs_err(fs_info, "error allocating %s hash for checksum",
- csum_name);
+ csum_driver);
return PTR_ERR(csum_shash);
}
@@ -2595,7 +2524,101 @@ out:
return ret;
}
-int open_ctree(struct super_block *sb,
+static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
+{
+ int backup_index = find_newest_super_backup(fs_info);
+ struct btrfs_super_block *sb = fs_info->super_copy;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ bool handle_error = false;
+ int ret = 0;
+ int i;
+
+ for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
+ u64 generation;
+ int level;
+
+ if (handle_error) {
+ if (!IS_ERR(tree_root->node))
+ free_extent_buffer(tree_root->node);
+ tree_root->node = NULL;
+
+ if (!btrfs_test_opt(fs_info, USEBACKUPROOT))
+ break;
+
+ free_root_pointers(fs_info, 0);
+
+ /*
+ * Don't use the log in recovery mode, it won't be
+ * valid
+ */
+ btrfs_set_super_log_root(sb, 0);
+
+ /* We can't trust the free space cache either */
+ btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE);
+
+ ret = read_backup_root(fs_info, i);
+ backup_index = ret;
+ if (ret < 0)
+ return ret;
+ }
+ generation = btrfs_super_generation(sb);
+ level = btrfs_super_root_level(sb);
+ tree_root->node = read_tree_block(fs_info, btrfs_super_root(sb),
+ generation, level, NULL);
+ if (IS_ERR(tree_root->node) ||
+ !extent_buffer_uptodate(tree_root->node)) {
+ handle_error = true;
+
+ if (IS_ERR(tree_root->node))
+ ret = PTR_ERR(tree_root->node);
+ else if (!extent_buffer_uptodate(tree_root->node))
+ ret = -EUCLEAN;
+
+ btrfs_warn(fs_info, "failed to read tree root");
+ continue;
+ }
+
+ btrfs_set_root_node(&tree_root->root_item, tree_root->node);
+ tree_root->commit_root = btrfs_root_node(tree_root);
+ btrfs_set_root_refs(&tree_root->root_item, 1);
+
+ /*
+ * No need to hold btrfs_root::objectid_mutex since the fs
+ * hasn't been fully initialised and we are the only user
+ */
+ ret = btrfs_find_highest_objectid(tree_root,
+ &tree_root->highest_objectid);
+ if (ret < 0) {
+ handle_error = true;
+ continue;
+ }
+
+ ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
+
+ ret = btrfs_read_roots(fs_info);
+ if (ret < 0) {
+ handle_error = true;
+ continue;
+ }
+
+ /* All successful */
+ fs_info->generation = generation;
+ fs_info->last_trans_committed = generation;
+
+ /* Always begin writing backup roots after the one being used */
+ if (backup_index < 0) {
+ fs_info->backup_root_index = 0;
+ } else {
+ fs_info->backup_root_index = backup_index + 1;
+ fs_info->backup_root_index %= BTRFS_NUM_BACKUP_ROOTS;
+ }
+ break;
+ }
+
+ return ret;
+}
+
+int __cold open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options)
{
@@ -2613,8 +2636,6 @@ int open_ctree(struct super_block *sb,
struct btrfs_root *chunk_root;
int ret;
int err = -EINVAL;
- int num_backups_tried = 0;
- int backup_index = 0;
int clear_free_space_tree = 0;
int level;
@@ -2885,13 +2906,6 @@ int open_ctree(struct super_block *sb,
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
/*
- * run through our array of backup supers and setup
- * our ring pointer to the oldest one
- */
- generation = btrfs_super_generation(disk_super);
- find_oldest_super_backup(fs_info, generation);
-
- /*
* In the long term, we'll store the compression type in the super
* block, and it'll be used for per file compression control.
*/
@@ -3037,44 +3051,9 @@ int open_ctree(struct super_block *sb,
goto fail_tree_roots;
}
-retry_root_backup:
- generation = btrfs_super_generation(disk_super);
- level = btrfs_super_root_level(disk_super);
-
- tree_root->node = read_tree_block(fs_info,
- btrfs_super_root(disk_super),
- generation, level, NULL);
- if (IS_ERR(tree_root->node) ||
- !extent_buffer_uptodate(tree_root->node)) {
- btrfs_warn(fs_info, "failed to read tree root");
- if (!IS_ERR(tree_root->node))
- free_extent_buffer(tree_root->node);
- tree_root->node = NULL;
- goto recovery_tree_root;
- }
-
- btrfs_set_root_node(&tree_root->root_item, tree_root->node);
- tree_root->commit_root = btrfs_root_node(tree_root);
- btrfs_set_root_refs(&tree_root->root_item, 1);
-
- mutex_lock(&tree_root->objectid_mutex);
- ret = btrfs_find_highest_objectid(tree_root,
- &tree_root->highest_objectid);
- if (ret) {
- mutex_unlock(&tree_root->objectid_mutex);
- goto recovery_tree_root;
- }
-
- ASSERT(tree_root->highest_objectid <= BTRFS_LAST_FREE_OBJECTID);
-
- mutex_unlock(&tree_root->objectid_mutex);
-
- ret = btrfs_read_roots(fs_info);
+ ret = init_tree_roots(fs_info);
if (ret)
- goto recovery_tree_root;
-
- fs_info->generation = generation;
- fs_info->last_trans_committed = generation;
+ goto fail_tree_roots;
ret = btrfs_verify_dev_extents(fs_info);
if (ret) {
@@ -3342,7 +3321,7 @@ fail_block_groups:
btrfs_put_block_group_cache(fs_info);
fail_tree_roots:
- free_root_pointers(fs_info, 1);
+ free_root_pointers(fs_info, true);
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
fail_sb_buffer:
@@ -3369,24 +3348,6 @@ fail:
btrfs_free_stripe_hash_table(fs_info);
btrfs_close_devices(fs_info->fs_devices);
return err;
-
-recovery_tree_root:
- if (!btrfs_test_opt(fs_info, USEBACKUPROOT))
- goto fail_tree_roots;
-
- free_root_pointers(fs_info, 0);
-
- /* don't use the log in recovery mode, it won't be valid */
- btrfs_set_super_log_root(disk_super, 0);
-
- /* we can't trust the free space cache either */
- btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE);
-
- ret = next_root_backup(fs_info, fs_info->super_copy,
- &num_backups_tried, &backup_index);
- if (ret == -1)
- goto fail_block_groups;
- goto retry_root_backup;
}
ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
@@ -3980,7 +3941,7 @@ int btrfs_commit_super(struct btrfs_fs_info *fs_info)
return btrfs_commit_transaction(trans);
}
-void close_ctree(struct btrfs_fs_info *fs_info)
+void __cold close_ctree(struct btrfs_fs_info *fs_info)
{
int ret;
@@ -4068,7 +4029,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
btrfs_free_block_groups(fs_info);
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
- free_root_pointers(fs_info, 1);
+ free_root_pointers(fs_info, true);
iput(fs_info->btree_inode);
@@ -4445,7 +4406,7 @@ again:
return 0;
}
-static void btrfs_cleanup_bg_io(struct btrfs_block_group_cache *cache)
+static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
{
struct inode *inode;
@@ -4462,12 +4423,12 @@ static void btrfs_cleanup_bg_io(struct btrfs_block_group_cache *cache)
void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
struct btrfs_fs_info *fs_info)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
spin_lock(&cur_trans->dirty_bgs_lock);
while (!list_empty(&cur_trans->dirty_bgs)) {
cache = list_first_entry(&cur_trans->dirty_bgs,
- struct btrfs_block_group_cache,
+ struct btrfs_block_group,
dirty_list);
if (!list_empty(&cache->io_list)) {
@@ -4495,7 +4456,7 @@ void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
*/
while (!list_empty(&cur_trans->io_bgs)) {
cache = list_first_entry(&cur_trans->io_bgs,
- struct btrfs_block_group_cache,
+ struct btrfs_block_group,
io_list);
list_del_init(&cache->io_list);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index a6958103d87e..76f123ebb292 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -49,10 +49,10 @@ struct extent_buffer *btrfs_find_create_tree_block(
struct btrfs_fs_info *fs_info,
u64 bytenr);
void btrfs_clean_tree_block(struct extent_buffer *buf);
-int open_ctree(struct super_block *sb,
+int __cold open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
-void close_ctree(struct btrfs_fs_info *fs_info);
+void __cold close_ctree(struct btrfs_fs_info *fs_info);
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ddf28ecf17f9..72e312cae69d 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -87,7 +87,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- inode = btrfs_iget(sb, &key, root, NULL);
+ inode = btrfs_iget(sb, &key, root);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto fail;
@@ -214,7 +214,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- return d_obtain_alias(btrfs_iget(fs_info->sb, &key, root, NULL));
+ return d_obtain_alias(btrfs_iget(fs_info->sb, &key, root));
fail:
btrfs_free_path(path);
return ERR_PTR(ret);
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
new file mode 100644
index 000000000000..a3febe746c79
--- /dev/null
+++ b/fs/btrfs/extent-io-tree.h
@@ -0,0 +1,248 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef BTRFS_EXTENT_IO_TREE_H
+#define BTRFS_EXTENT_IO_TREE_H
+
+struct extent_changeset;
+struct io_failure_record;
+
+/* Bits for the extent state */
+#define EXTENT_DIRTY (1U << 0)
+#define EXTENT_UPTODATE (1U << 1)
+#define EXTENT_LOCKED (1U << 2)
+#define EXTENT_NEW (1U << 3)
+#define EXTENT_DELALLOC (1U << 4)
+#define EXTENT_DEFRAG (1U << 5)
+#define EXTENT_BOUNDARY (1U << 6)
+#define EXTENT_NODATASUM (1U << 7)
+#define EXTENT_CLEAR_META_RESV (1U << 8)
+#define EXTENT_NEED_WAIT (1U << 9)
+#define EXTENT_DAMAGED (1U << 10)
+#define EXTENT_NORESERVE (1U << 11)
+#define EXTENT_QGROUP_RESERVED (1U << 12)
+#define EXTENT_CLEAR_DATA_RESV (1U << 13)
+#define EXTENT_DELALLOC_NEW (1U << 14)
+#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
+ EXTENT_CLEAR_DATA_RESV)
+#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
+
+/*
+ * Redefined bits above which are used only in the device allocation tree,
+ * shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV
+ * / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit
+ * manipulation functions
+ */
+#define CHUNK_ALLOCATED EXTENT_DIRTY
+#define CHUNK_TRIMMED EXTENT_DEFRAG
+
+enum {
+ IO_TREE_FS_INFO_FREED_EXTENTS0,
+ IO_TREE_FS_INFO_FREED_EXTENTS1,
+ IO_TREE_INODE_IO,
+ IO_TREE_INODE_IO_FAILURE,
+ IO_TREE_RELOC_BLOCKS,
+ IO_TREE_TRANS_DIRTY_PAGES,
+ IO_TREE_ROOT_DIRTY_LOG_PAGES,
+ IO_TREE_SELFTEST,
+};
+
+struct extent_io_tree {
+ struct rb_root state;
+ struct btrfs_fs_info *fs_info;
+ void *private_data;
+ u64 dirty_bytes;
+ bool track_uptodate;
+
+ /* Who owns this io tree, should be one of IO_TREE_* */
+ u8 owner;
+
+ spinlock_t lock;
+ const struct extent_io_ops *ops;
+};
+
+struct extent_state {
+ u64 start;
+ u64 end; /* inclusive */
+ struct rb_node rb_node;
+
+ /* ADD NEW ELEMENTS AFTER THIS */
+ wait_queue_head_t wq;
+ refcount_t refs;
+ unsigned state;
+
+ struct io_failure_record *failrec;
+
+#ifdef CONFIG_BTRFS_DEBUG
+ struct list_head leak_list;
+#endif
+};
+
+int __init extent_state_cache_init(void);
+void __cold extent_state_cache_exit(void);
+
+void extent_io_tree_init(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *tree, unsigned int owner,
+ void *private_data);
+void extent_io_tree_release(struct extent_io_tree *tree);
+
+int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ struct extent_state **cached);
+
+static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ return lock_extent_bits(tree, start, end, NULL);
+}
+
+int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
+
+int __init extent_io_init(void);
+void __cold extent_io_exit(void);
+
+u64 count_range_bits(struct extent_io_tree *tree,
+ u64 *start, u64 search_end,
+ u64 max_bytes, unsigned bits, int contig);
+
+void free_extent_state(struct extent_state *state);
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits, int filled,
+ struct extent_state *cached_state);
+int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits, struct extent_changeset *changeset);
+int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits, int wake, int delete,
+ struct extent_state **cached);
+int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits, int wake, int delete,
+ struct extent_state **cached, gfp_t mask,
+ struct extent_changeset *changeset);
+
+static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL);
+}
+
+static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached)
+{
+ return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+ GFP_NOFS, NULL);
+}
+
+static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
+ u64 start, u64 end, struct extent_state **cached)
+{
+ return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+ GFP_ATOMIC, NULL);
+}
+
+static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, unsigned bits)
+{
+ int wake = 0;
+
+ if (bits & EXTENT_LOCKED)
+ wake = 1;
+
+ return clear_extent_bit(tree, start, end, bits, wake, 0, NULL);
+}
+
+int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits, struct extent_changeset *changeset);
+int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits, u64 *failed_start,
+ struct extent_state **cached_state, gfp_t mask);
+int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits);
+
+static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
+ u64 end, unsigned bits)
+{
+ return set_extent_bit(tree, start, end, bits, NULL, NULL, GFP_NOFS);
+}
+
+static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state)
+{
+ return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
+ cached_state, GFP_NOFS, NULL);
+}
+
+static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
+ u64 end, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
+ NULL, mask);
+}
+
+static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached)
+{
+ return clear_extent_bit(tree, start, end,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_DO_ACCOUNTING, 0, 0, cached);
+}
+
+int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits, unsigned clear_bits,
+ struct extent_state **cached_state);
+
+static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
+ u64 end, unsigned int extra_bits,
+ struct extent_state **cached_state)
+{
+ return set_extent_bit(tree, start, end,
+ EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
+ NULL, cached_state, GFP_NOFS);
+}
+
+static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state)
+{
+ return set_extent_bit(tree, start, end,
+ EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
+ NULL, cached_state, GFP_NOFS);
+}
+
+static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
+ u64 end)
+{
+ return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL,
+ GFP_NOFS);
+}
+
+static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
+ u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+ return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
+ cached_state, mask);
+}
+
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, unsigned bits,
+ struct extent_state **cached_state);
+void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, unsigned bits);
+int extent_invalidatepage(struct extent_io_tree *tree,
+ struct page *page, unsigned long offset);
+bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
+ u64 *end, u64 max_bytes,
+ struct extent_state **cached_state);
+
+/* This should be reworked in the future and put elsewhere. */
+int get_state_failrec(struct extent_io_tree *tree, u64 start,
+ struct io_failure_record **failrec);
+int set_state_failrec(struct extent_io_tree *tree, u64 start,
+ struct io_failure_record *failrec);
+void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
+ u64 end);
+int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
+ struct io_failure_record **failrec_ret);
+int free_io_failure(struct extent_io_tree *failure_tree,
+ struct extent_io_tree *io_tree,
+ struct io_failure_record *rec);
+int clean_io_failure(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *failure_tree,
+ struct extent_io_tree *io_tree, u64 start,
+ struct page *page, u64 ino, unsigned int pg_offset);
+
+#endif /* BTRFS_EXTENT_IO_TREE_H */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 49cb26fa7c63..153f71a5bba9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -54,7 +54,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key);
-static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
+static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
{
return (cache->flags & bits) == bits;
}
@@ -70,13 +70,13 @@ int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
return 0;
}
-void btrfs_free_excluded_extents(struct btrfs_block_group_cache *cache)
+void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
u64 start, end;
- start = cache->key.objectid;
- end = start + cache->key.offset - 1;
+ start = cache->start;
+ end = start + cache->length - 1;
clear_extent_bits(&fs_info->freed_extents[0],
start, end, EXTENT_UPTODATE);
@@ -1306,8 +1306,10 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes)
{
- int ret;
+ int ret = 0;
u64 discarded_bytes = 0;
+ u64 end = bytenr + num_bytes;
+ u64 cur = bytenr;
struct btrfs_bio *bbio = NULL;
@@ -1316,15 +1318,23 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
* associated to its stripes that don't go away while we are discarding.
*/
btrfs_bio_counter_inc_blocked(fs_info);
- /* Tell the block device(s) that the sectors can be discarded */
- ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, bytenr, &num_bytes,
- &bbio, 0);
- /* Error condition is -ENOMEM */
- if (!ret) {
- struct btrfs_bio_stripe *stripe = bbio->stripes;
+ while (cur < end) {
+ struct btrfs_bio_stripe *stripe;
int i;
+ num_bytes = end - cur;
+ /* Tell the block device(s) that the sectors can be discarded */
+ ret = btrfs_map_block(fs_info, BTRFS_MAP_DISCARD, cur,
+ &num_bytes, &bbio, 0);
+ /*
+ * Error can be -ENOMEM, -ENOENT (no such chunk mapping) or
+ * -EOPNOTSUPP. For any such error, @num_bytes is not updated,
+ * thus we can't continue anyway.
+ */
+ if (ret < 0)
+ goto out;
+ stripe = bbio->stripes;
for (i = 0; i < bbio->num_stripes; i++, stripe++) {
u64 bytes;
struct request_queue *req_q;
@@ -1341,10 +1351,19 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
stripe->physical,
stripe->length,
&bytes);
- if (!ret)
+ if (!ret) {
discarded_bytes += bytes;
- else if (ret != -EOPNOTSUPP)
- break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
+ } else if (ret != -EOPNOTSUPP) {
+ /*
+ * Logic errors or -ENOMEM, or -EIO, but
+ * unlikely to happen.
+ *
+ * And since there are two loops, explicitly
+ * go to out to avoid confusion.
+ */
+ btrfs_put_bbio(bbio);
+ goto out;
+ }
/*
* Just in case we get back EOPNOTSUPP for some reason,
@@ -1354,7 +1373,9 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
ret = 0;
}
btrfs_put_bbio(bbio);
+ cur += num_bytes;
}
+out:
btrfs_bio_counter_dec(fs_info);
if (actual_bytes)
@@ -2516,7 +2537,7 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_extent_readonly(struct btrfs_fs_info *fs_info, u64 bytenr)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
int readonly = 0;
block_group = btrfs_lookup_block_group(fs_info, bytenr);
@@ -2546,7 +2567,7 @@ static u64 get_alloc_profile_by_root(struct btrfs_root *root, int data)
static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
u64 bytenr;
spin_lock(&fs_info->block_group_cache_lock);
@@ -2560,13 +2581,13 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
if (!cache)
return 0;
- bytenr = cache->key.objectid;
+ bytenr = cache->start;
btrfs_put_block_group(cache);
return bytenr;
}
-static int pin_down_extent(struct btrfs_block_group_cache *cache,
+static int pin_down_extent(struct btrfs_block_group *cache,
u64 bytenr, u64 num_bytes, int reserved)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
@@ -2590,13 +2611,12 @@ static int pin_down_extent(struct btrfs_block_group_cache *cache,
return 0;
}
-/*
- * this function must be called within transaction
- */
int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, int reserved)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
+
+ ASSERT(fs_info->running_transaction);
cache = btrfs_lookup_block_group(fs_info, bytenr);
BUG_ON(!cache); /* Logic error */
@@ -2613,7 +2633,7 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
int ret;
cache = btrfs_lookup_block_group(fs_info, bytenr);
@@ -2640,7 +2660,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 num_bytes)
{
int ret;
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_caching_control *caching_ctl;
block_group = btrfs_lookup_block_group(fs_info, start);
@@ -2652,7 +2672,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
if (!caching_ctl) {
/* Logic error */
- BUG_ON(!btrfs_block_group_cache_done(block_group));
+ BUG_ON(!btrfs_block_group_done(block_group));
ret = btrfs_remove_free_space(block_group, start, num_bytes);
} else {
mutex_lock(&caching_ctl->mutex);
@@ -2717,7 +2737,7 @@ int btrfs_exclude_logged_extents(struct extent_buffer *eb)
}
static void
-btrfs_inc_block_group_reservations(struct btrfs_block_group_cache *bg)
+btrfs_inc_block_group_reservations(struct btrfs_block_group *bg)
{
atomic_inc(&bg->reservations);
}
@@ -2726,14 +2746,14 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
{
struct btrfs_caching_control *next;
struct btrfs_caching_control *caching_ctl;
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
down_write(&fs_info->commit_root_sem);
list_for_each_entry_safe(caching_ctl, next,
&fs_info->caching_block_groups, list) {
cache = caching_ctl->block_group;
- if (btrfs_block_group_cache_done(cache)) {
+ if (btrfs_block_group_done(cache)) {
cache->last_byte_to_unpin = (u64)-1;
list_del_init(&caching_ctl->list);
btrfs_put_caching_control(caching_ctl);
@@ -2785,7 +2805,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end,
const bool return_free_space)
{
- struct btrfs_block_group_cache *cache = NULL;
+ struct btrfs_block_group *cache = NULL;
struct btrfs_space_info *space_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_free_cluster *cluster = NULL;
@@ -2797,7 +2817,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
while (start <= end) {
readonly = false;
if (!cache ||
- start >= cache->key.objectid + cache->key.offset) {
+ start >= cache->start + cache->length) {
if (cache)
btrfs_put_block_group(cache);
total_unpinned = 0;
@@ -2810,7 +2830,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
empty_cluster <<= 1;
}
- len = cache->key.objectid + cache->key.offset - start;
+ len = cache->start + cache->length - start;
len = min(len, end + 1 - start);
if (start < cache->last_byte_to_unpin) {
@@ -2880,7 +2900,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_group_cache *block_group, *tmp;
+ struct btrfs_block_group *block_group, *tmp;
struct list_head *deleted_bgs;
struct extent_io_tree *unpin;
u64 start;
@@ -2926,8 +2946,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
ret = -EROFS;
if (!trans->aborted)
ret = btrfs_discard_extent(fs_info,
- block_group->key.objectid,
- block_group->key.offset,
+ block_group->start,
+ block_group->length,
&trimmed);
list_del_init(&block_group->bg_list);
@@ -3262,7 +3282,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
}
if (last_ref && btrfs_header_generation(buf) == trans->transid) {
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
ret = check_ref_cleanup(trans, buf->start);
@@ -3349,15 +3369,14 @@ enum btrfs_loop_type {
};
static inline void
-btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
+btrfs_lock_block_group(struct btrfs_block_group *cache,
int delalloc)
{
if (delalloc)
down_read(&cache->data_rwsem);
}
-static inline void
-btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
+static inline void btrfs_grab_block_group(struct btrfs_block_group *cache,
int delalloc)
{
btrfs_get_block_group(cache);
@@ -3365,12 +3384,12 @@ btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
down_read(&cache->data_rwsem);
}
-static struct btrfs_block_group_cache *
-btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
+static struct btrfs_block_group *btrfs_lock_cluster(
+ struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster,
int delalloc)
{
- struct btrfs_block_group_cache *used_bg = NULL;
+ struct btrfs_block_group *used_bg = NULL;
spin_lock(&cluster->refill_lock);
while (1) {
@@ -3404,7 +3423,7 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
}
static inline void
-btrfs_release_block_group(struct btrfs_block_group_cache *cache,
+btrfs_release_block_group(struct btrfs_block_group *cache,
int delalloc)
{
if (delalloc)
@@ -3475,12 +3494,12 @@ struct find_free_extent_ctl {
* Return >0 to inform caller that we find nothing
* Return 0 means we have found a location and set ffe_ctl->found_offset.
*/
-static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
+static int find_free_extent_clustered(struct btrfs_block_group *bg,
struct btrfs_free_cluster *last_ptr,
struct find_free_extent_ctl *ffe_ctl,
- struct btrfs_block_group_cache **cluster_bg_ret)
+ struct btrfs_block_group **cluster_bg_ret)
{
- struct btrfs_block_group_cache *cluster_bg;
+ struct btrfs_block_group *cluster_bg;
u64 aligned_cluster;
u64 offset;
int ret;
@@ -3493,7 +3512,7 @@ static int find_free_extent_clustered(struct btrfs_block_group_cache *bg,
goto release_cluster;
offset = btrfs_alloc_from_cluster(cluster_bg, last_ptr,
- ffe_ctl->num_bytes, cluster_bg->key.objectid,
+ ffe_ctl->num_bytes, cluster_bg->start,
&ffe_ctl->max_extent_size);
if (offset) {
/* We have a block, we're done */
@@ -3579,7 +3598,7 @@ refill_cluster:
* Return 0 when we found an free extent and set ffe_ctrl->found_offset
* Return -EAGAIN to inform caller that we need to re-search this block group
*/
-static int find_free_extent_unclustered(struct btrfs_block_group_cache *bg,
+static int find_free_extent_unclustered(struct btrfs_block_group *bg,
struct btrfs_free_cluster *last_ptr,
struct find_free_extent_ctl *ffe_ctl)
{
@@ -3781,7 +3800,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
{
int ret = 0;
struct btrfs_free_cluster *last_ptr = NULL;
- struct btrfs_block_group_cache *block_group = NULL;
+ struct btrfs_block_group *block_group = NULL;
struct find_free_extent_ctl ffe_ctl = {0};
struct btrfs_space_info *space_info;
bool use_cluster = true;
@@ -3904,7 +3923,7 @@ search:
continue;
btrfs_grab_block_group(block_group, delalloc);
- ffe_ctl.search_start = block_group->key.objectid;
+ ffe_ctl.search_start = block_group->start;
/*
* this can happen if we end up cycling through all the
@@ -3935,7 +3954,7 @@ search:
}
have_block_group:
- ffe_ctl.cached = btrfs_block_group_cache_done(block_group);
+ ffe_ctl.cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl.cached)) {
ffe_ctl.have_caching_bg = true;
ret = btrfs_cache_block_group(block_group, 0);
@@ -3951,7 +3970,7 @@ have_block_group:
* lets look there
*/
if (last_ptr && use_cluster) {
- struct btrfs_block_group_cache *cluster_bg = NULL;
+ struct btrfs_block_group *cluster_bg = NULL;
ret = find_free_extent_clustered(block_group, last_ptr,
&ffe_ctl, &cluster_bg);
@@ -3984,7 +4003,7 @@ checks:
/* move on to the next group */
if (ffe_ctl.search_start + num_bytes >
- block_group->key.objectid + block_group->key.offset) {
+ block_group->start + block_group->length) {
btrfs_add_free_space(block_group, ffe_ctl.found_offset,
num_bytes);
goto loop;
@@ -4133,7 +4152,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 len,
int pin, int delalloc)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
int ret = 0;
cache = btrfs_lookup_block_group(fs_info, start);
@@ -4366,7 +4385,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_space_info *space_info;
/*
@@ -5436,7 +5455,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
btrfs_assert_tree_locked(parent);
parent_level = btrfs_header_level(parent);
- extent_buffer_get(parent);
+ atomic_inc(&parent->refs);
path->nodes[parent_level] = parent;
path->slots[parent_level] = btrfs_header_nritems(parent);
@@ -5480,7 +5499,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
*/
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
u64 free_bytes = 0;
int factor;
@@ -5498,9 +5517,8 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
}
factor = btrfs_bg_type_to_factor(block_group->flags);
- free_bytes += (block_group->key.offset -
- btrfs_block_group_used(&block_group->item)) *
- factor;
+ free_bytes += (block_group->length -
+ block_group->used) * factor;
spin_unlock(&block_group->lock);
}
@@ -5623,7 +5641,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
*/
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
{
- struct btrfs_block_group_cache *cache = NULL;
+ struct btrfs_block_group *cache = NULL;
struct btrfs_device *device;
struct list_head *devices;
u64 group_trimmed;
@@ -5647,16 +5665,16 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
cache = btrfs_lookup_first_block_group(fs_info, range->start);
for (; cache; cache = btrfs_next_block_group(cache)) {
- if (cache->key.objectid >= range_end) {
+ if (cache->start >= range_end) {
btrfs_put_block_group(cache);
break;
}
- start = max(range->start, cache->key.objectid);
- end = min(range_end, cache->key.objectid + cache->key.offset);
+ start = max(range->start, cache->start);
+ end = min(range_end, cache->start + cache->length);
if (end - start >= range->minlen) {
- if (!btrfs_block_group_cache_done(cache)) {
+ if (!btrfs_block_group_done(cache)) {
ret = btrfs_cache_block_group(cache, 0);
if (ret) {
bg_failed++;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cceaf05aada2..eb8bd0258360 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -14,6 +14,7 @@
#include <linux/prefetch.h>
#include <linux/cleancache.h>
#include "extent_io.h"
+#include "extent-io-tree.h"
#include "extent_map.h"
#include "ctree.h"
#include "btrfs_inode.h"
@@ -59,12 +60,23 @@ void btrfs_leak_debug_del(struct list_head *entry)
spin_unlock_irqrestore(&leak_lock, flags);
}
-static inline
-void btrfs_leak_debug_check(void)
+static inline void btrfs_extent_buffer_leak_debug_check(void)
{
- struct extent_state *state;
struct extent_buffer *eb;
+ while (!list_empty(&buffers)) {
+ eb = list_entry(buffers.next, struct extent_buffer, leak_list);
+ pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
+ eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
+ list_del(&eb->leak_list);
+ kmem_cache_free(extent_buffer_cache, eb);
+ }
+}
+
+static inline void btrfs_extent_state_leak_debug_check(void)
+{
+ struct extent_state *state;
+
while (!list_empty(&states)) {
state = list_entry(states.next, struct extent_state, leak_list);
pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n",
@@ -74,14 +86,6 @@ void btrfs_leak_debug_check(void)
list_del(&state->leak_list);
kmem_cache_free(extent_state_cache, state);
}
-
- while (!list_empty(&buffers)) {
- eb = list_entry(buffers.next, struct extent_buffer, leak_list);
- pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
- eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
- list_del(&eb->leak_list);
- kmem_cache_free(extent_buffer_cache, eb);
- }
}
#define btrfs_debug_check_extent_io_range(tree, start, end) \
@@ -105,7 +109,8 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
#else
#define btrfs_leak_debug_add(new, head) do {} while (0)
#define btrfs_leak_debug_del(entry) do {} while (0)
-#define btrfs_leak_debug_check() do {} while (0)
+#define btrfs_extent_buffer_leak_debug_check() do {} while (0)
+#define btrfs_extent_state_leak_debug_check() do {} while (0)
#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
#endif
@@ -196,19 +201,23 @@ static int __must_check flush_write_bio(struct extent_page_data *epd)
return ret;
}
-int __init extent_io_init(void)
+int __init extent_state_cache_init(void)
{
extent_state_cache = kmem_cache_create("btrfs_extent_state",
sizeof(struct extent_state), 0,
SLAB_MEM_SPREAD, NULL);
if (!extent_state_cache)
return -ENOMEM;
+ return 0;
+}
+int __init extent_io_init(void)
+{
extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
sizeof(struct extent_buffer), 0,
SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
- goto free_state_cache;
+ return -ENOMEM;
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
offsetof(struct btrfs_io_bio, bio),
@@ -226,23 +235,24 @@ free_bioset:
free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
extent_buffer_cache = NULL;
+ return -ENOMEM;
+}
-free_state_cache:
+void __cold extent_state_cache_exit(void)
+{
+ btrfs_extent_state_leak_debug_check();
kmem_cache_destroy(extent_state_cache);
- extent_state_cache = NULL;
- return -ENOMEM;
}
void __cold extent_io_exit(void)
{
- btrfs_leak_debug_check();
+ btrfs_extent_buffer_leak_debug_check();
/*
* Make sure all delayed rcu free are flushed before we
* destroy caches.
*/
rcu_barrier();
- kmem_cache_destroy(extent_state_cache);
kmem_cache_destroy(extent_buffer_cache);
bioset_exit(&btrfs_bioset);
}
@@ -1676,9 +1686,9 @@ out:
*
* true is returned if we find something, false if nothing was in the tree
*/
-static noinline bool find_delalloc_range(struct extent_io_tree *tree,
- u64 *start, u64 *end, u64 max_bytes,
- struct extent_state **cached_state)
+bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
+ u64 *end, u64 max_bytes,
+ struct extent_state **cached_state)
{
struct rb_node *node;
struct extent_state *state;
@@ -1796,8 +1806,8 @@ again:
/* step one, find a bunch of delalloc bytes starting at start */
delalloc_start = *start;
delalloc_end = 0;
- found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
- max_bytes, &cached_state);
+ found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
+ max_bytes, &cached_state);
if (!found || delalloc_end <= *start) {
*start = delalloc_start;
*end = delalloc_end;
@@ -1899,7 +1909,7 @@ static int __process_pages_contig(struct address_space *mapping,
if (page_ops & PAGE_SET_PRIVATE2)
SetPagePrivate2(pages[i]);
- if (pages[i] == locked_page) {
+ if (locked_page && pages[i] == locked_page) {
put_page(pages[i]);
pages_locked++;
continue;
@@ -2014,8 +2024,8 @@ out:
* set the private field for a given byte offset in the tree. If there isn't
* an extent_state there already, this does nothing.
*/
-static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
- struct io_failure_record *failrec)
+int set_state_failrec(struct extent_io_tree *tree, u64 start,
+ struct io_failure_record *failrec)
{
struct rb_node *node;
struct extent_state *state;
@@ -2042,8 +2052,8 @@ out:
return ret;
}
-static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
- struct io_failure_record **failrec)
+int get_state_failrec(struct extent_io_tree *tree, u64 start,
+ struct io_failure_record **failrec)
{
struct rb_node *node;
struct extent_state *state;
@@ -2534,7 +2544,6 @@ struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
bio = btrfs_io_bio_alloc(1);
bio->bi_end_io = endio_func;
bio->bi_iter.bi_sector = failrec->logical >> 9;
- bio_set_dev(bio, fs_info->fs_devices->latest_bdev);
bio->bi_iter.bi_size = 0;
bio->bi_private = data;
@@ -2920,7 +2929,6 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
* a contiguous page to the previous one
* @size: portion of page that we want to write
* @offset: starting offset in the page
- * @bdev: attach newly created bios to this bdev
* @bio_ret: must be valid pointer, newly allocated bio will be stored there
* @end_io_func: end_io callback for new bio
* @mirror_num: desired mirror to read/write
@@ -2931,7 +2939,6 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
struct writeback_control *wbc,
struct page *page, u64 offset,
size_t size, unsigned long pg_offset,
- struct block_device *bdev,
struct bio **bio_ret,
bio_end_io_t end_io_func,
int mirror_num,
@@ -2977,13 +2984,16 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
}
bio = btrfs_bio_alloc(offset);
- bio_set_dev(bio, bdev);
bio_add_page(bio, page, page_size, pg_offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
bio->bi_write_hint = page->mapping->host->i_write_hint;
bio->bi_opf = opf;
if (wbc) {
+ struct block_device *bdev;
+
+ bdev = BTRFS_I(page->mapping->host)->root->fs_info->fs_devices->latest_bdev;
+ bio_set_dev(bio, bdev);
wbc_init_bio(wbc, bio);
wbc_account_cgroup_owner(wbc, page, page_size);
}
@@ -3065,7 +3075,6 @@ static int __do_readpage(struct extent_io_tree *tree,
u64 block_start;
u64 cur_end;
struct extent_map *em;
- struct block_device *bdev;
int ret = 0;
int nr = 0;
size_t pg_offset = 0;
@@ -3142,7 +3151,6 @@ static int __do_readpage(struct extent_io_tree *tree,
offset = em->block_start + extent_offset;
disk_io_size = iosize;
}
- bdev = em->bdev;
block_start = em->block_start;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
block_start = EXTENT_MAP_HOLE;
@@ -3232,7 +3240,7 @@ static int __do_readpage(struct extent_io_tree *tree,
ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
page, offset, disk_io_size,
- pg_offset, bdev, bio,
+ pg_offset, bio,
end_bio_extent_readpage, mirror_num,
*bio_flags,
this_bio_flag,
@@ -3409,7 +3417,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
struct extent_page_data *epd,
loff_t i_size,
unsigned long nr_written,
- unsigned int write_flags, int *nr_ret)
+ int *nr_ret)
{
struct extent_io_tree *tree = epd->tree;
u64 start = page_offset(page);
@@ -3420,11 +3428,11 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
u64 block_start;
u64 iosize;
struct extent_map *em;
- struct block_device *bdev;
size_t pg_offset = 0;
size_t blocksize;
int ret = 0;
int nr = 0;
+ const unsigned int write_flags = wbc_to_write_flags(wbc);
bool compressed;
ret = btrfs_writepage_cow_fixup(page, start, page_end);
@@ -3478,7 +3486,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
iosize = min(em_end - cur, end - cur + 1);
iosize = ALIGN(iosize, blocksize);
offset = em->block_start + extent_offset;
- bdev = em->bdev;
block_start = em->block_start;
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
free_extent_map(em);
@@ -3520,7 +3527,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
page, offset, iosize, pg_offset,
- bdev, &epd->bio,
+ &epd->bio,
end_bio_extent_writepage,
0, 0, 0, false);
if (ret) {
@@ -3558,11 +3565,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
size_t pg_offset = 0;
loff_t i_size = i_size_read(inode);
unsigned long end_index = i_size >> PAGE_SHIFT;
- unsigned int write_flags = 0;
unsigned long nr_written = 0;
- write_flags = wbc_to_write_flags(wbc);
-
trace___extent_writepage(page, inode, wbc);
WARN_ON(!PageLocked(page));
@@ -3600,7 +3604,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
}
ret = __extent_writepage_io(inode, page, wbc, epd,
- i_size, nr_written, write_flags, &nr);
+ i_size, nr_written, &nr);
if (ret == 1)
goto done_unlocked;
@@ -3849,7 +3853,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
struct extent_page_data *epd)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
- struct block_device *bdev = fs_info->fs_devices->latest_bdev;
struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
u64 offset = eb->start;
u32 nritems;
@@ -3884,7 +3887,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_page_dirty_for_io(p);
set_page_writeback(p);
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
- p, offset, PAGE_SIZE, 0, bdev,
+ p, offset, PAGE_SIZE, 0,
&epd->bio,
end_bio_extent_buffer_writepage,
0, 0, 0, false);
@@ -4121,7 +4124,7 @@ retry:
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- done_index = page->index;
+ done_index = page->index + 1;
/*
* At this point we hold neither the i_pages lock nor
* the page lock: the page may be truncated or
@@ -4156,16 +4159,6 @@ retry:
ret = __extent_writepage(page, wbc, epd);
if (ret < 0) {
- /*
- * done_index is set past this page,
- * so media errors will not choke
- * background writeout for the entire
- * file. This has consequences for
- * range_cyclic semantics (ie. it may
- * not be suitable for data integrity
- * writeout).
- */
- done_index = page->index + 1;
done = 1;
break;
}
@@ -4240,8 +4233,12 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
.nr_to_write = nr_pages * 2,
.range_start = start,
.range_end = end + 1,
+ /* We're called from an async helper function */
+ .punt_to_cgroup = 1,
+ .no_cgroup_owner = 1,
};
+ wbc_attach_fdatawrite_inode(&wbc_writepages, inode);
while (start <= end) {
page = find_get_page(mapping, start >> PAGE_SHIFT);
if (clear_page_dirty_for_io(page))
@@ -4256,11 +4253,12 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
}
ASSERT(ret <= 0);
- if (ret < 0) {
+ if (ret == 0)
+ ret = flush_write_bio(&epd);
+ else
end_write_bio(&epd, ret);
- return ret;
- }
- ret = flush_write_bio(&epd);
+
+ wbc_detach_inode(&wbc_writepages);
return ret;
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index cf3424d58fec..a8551a1f56e2 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -7,35 +7,6 @@
#include <linux/refcount.h>
#include "ulist.h"
-/* bits for the extent state */
-#define EXTENT_DIRTY (1U << 0)
-#define EXTENT_UPTODATE (1U << 1)
-#define EXTENT_LOCKED (1U << 2)
-#define EXTENT_NEW (1U << 3)
-#define EXTENT_DELALLOC (1U << 4)
-#define EXTENT_DEFRAG (1U << 5)
-#define EXTENT_BOUNDARY (1U << 6)
-#define EXTENT_NODATASUM (1U << 7)
-#define EXTENT_CLEAR_META_RESV (1U << 8)
-#define EXTENT_NEED_WAIT (1U << 9)
-#define EXTENT_DAMAGED (1U << 10)
-#define EXTENT_NORESERVE (1U << 11)
-#define EXTENT_QGROUP_RESERVED (1U << 12)
-#define EXTENT_CLEAR_DATA_RESV (1U << 13)
-#define EXTENT_DELALLOC_NEW (1U << 14)
-#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
- EXTENT_CLEAR_DATA_RESV)
-#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
-
-/*
- * Redefined bits above which are used only in the device allocation tree,
- * shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV
- * / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit
- * manipulation functions
- */
-#define CHUNK_ALLOCATED EXTENT_DIRTY
-#define CHUNK_TRIMMED EXTENT_DEFRAG
-
/*
* flags for bio submission. The high bits indicate the compression
* type for this bio
@@ -89,12 +60,11 @@ enum {
#define BITMAP_LAST_BYTE_MASK(nbits) \
(BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
-struct extent_state;
struct btrfs_root;
struct btrfs_inode;
struct btrfs_io_bio;
struct io_failure_record;
-
+struct extent_io_tree;
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
struct bio *bio, u64 bio_offset);
@@ -111,47 +81,6 @@ struct extent_io_ops {
int mirror);
};
-enum {
- IO_TREE_FS_INFO_FREED_EXTENTS0,
- IO_TREE_FS_INFO_FREED_EXTENTS1,
- IO_TREE_INODE_IO,
- IO_TREE_INODE_IO_FAILURE,
- IO_TREE_RELOC_BLOCKS,
- IO_TREE_TRANS_DIRTY_PAGES,
- IO_TREE_ROOT_DIRTY_LOG_PAGES,
- IO_TREE_SELFTEST,
-};
-
-struct extent_io_tree {
- struct rb_root state;
- struct btrfs_fs_info *fs_info;
- void *private_data;
- u64 dirty_bytes;
- bool track_uptodate;
-
- /* Who owns this io tree, should be one of IO_TREE_* */
- u8 owner;
-
- spinlock_t lock;
- const struct extent_io_ops *ops;
-};
-
-struct extent_state {
- u64 start;
- u64 end; /* inclusive */
- struct rb_node rb_node;
-
- /* ADD NEW ELEMENTS AFTER THIS */
- wait_queue_head_t wq;
- refcount_t refs;
- unsigned state;
-
- struct io_failure_record *failrec;
-
-#ifdef CONFIG_BTRFS_DEBUG
- struct list_head leak_list;
-#endif
-};
#define INLINE_EXTENT_BUFFER_PAGES 16
#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE)
@@ -259,152 +188,11 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
u64 start, u64 len,
int create);
-void extent_io_tree_init(struct btrfs_fs_info *fs_info,
- struct extent_io_tree *tree, unsigned int owner,
- void *private_data);
-void extent_io_tree_release(struct extent_io_tree *tree);
int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
-int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- struct extent_state **cached);
-static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
- return lock_extent_bits(tree, start, end, NULL);
-}
-
-int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
get_extent_t *get_extent, int mirror_num);
-int __init extent_io_init(void);
-void __cold extent_io_exit(void);
-
-u64 count_range_bits(struct extent_io_tree *tree,
- u64 *start, u64 search_end,
- u64 max_bytes, unsigned bits, int contig);
-
-void free_extent_state(struct extent_state *state);
-int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, int filled,
- struct extent_state *cached_state);
-int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, struct extent_changeset *changeset);
-int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, int wake, int delete,
- struct extent_state **cached);
-int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, int wake, int delete,
- struct extent_state **cached, gfp_t mask,
- struct extent_changeset *changeset);
-
-static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL);
-}
-
-static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached)
-{
- return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
- GFP_NOFS, NULL);
-}
-
-static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
- u64 start, u64 end, struct extent_state **cached)
-{
- return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
- GFP_ATOMIC, NULL);
-}
-
-static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
- u64 end, unsigned bits)
-{
- int wake = 0;
-
- if (bits & EXTENT_LOCKED)
- wake = 1;
-
- return clear_extent_bit(tree, start, end, bits, wake, 0, NULL);
-}
-
-int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, struct extent_changeset *changeset);
-int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, u64 *failed_start,
- struct extent_state **cached_state, gfp_t mask);
-int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits);
-
-static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
- u64 end, unsigned bits)
-{
- return set_extent_bit(tree, start, end, bits, NULL, NULL, GFP_NOFS);
-}
-
-static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached_state)
-{
- return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
- cached_state, GFP_NOFS, NULL);
-}
-
-static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
- u64 end, gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
- NULL, mask);
-}
-
-static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached)
-{
- return clear_extent_bit(tree, start, end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, 0, 0, cached);
-}
-
-int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, unsigned clear_bits,
- struct extent_state **cached_state);
-
-static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
- u64 end, unsigned int extra_bits,
- struct extent_state **cached_state)
-{
- return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
- NULL, cached_state, GFP_NOFS);
-}
-
-static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached_state)
-{
- return set_extent_bit(tree, start, end,
- EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
- NULL, cached_state, GFP_NOFS);
-}
-
-static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
- u64 end)
-{
- return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL,
- GFP_NOFS);
-}
-
-static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached_state, gfp_t mask)
-{
- return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
- cached_state, mask);
-}
-
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, unsigned bits,
- struct extent_state **cached_state);
-void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, unsigned bits);
-int extent_invalidatepage(struct extent_io_tree *tree,
- struct page *page, unsigned long offset);
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
int mode);
@@ -442,11 +230,6 @@ static inline int num_extent_pages(const struct extent_buffer *eb)
(eb->start >> PAGE_SHIFT);
}
-static inline void extent_buffer_get(struct extent_buffer *eb)
-{
- atomic_inc(&eb->refs);
-}
-
static inline int extent_buffer_uptodate(struct extent_buffer *eb)
{
return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -508,10 +291,6 @@ struct btrfs_inode;
int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
u64 length, u64 logical, struct page *page,
unsigned int pg_offset, int mirror_num);
-int clean_io_failure(struct btrfs_fs_info *fs_info,
- struct extent_io_tree *failure_tree,
- struct extent_io_tree *io_tree, u64 start,
- struct page *page, u64 ino, unsigned int pg_offset);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num);
@@ -535,19 +314,12 @@ struct io_failure_record {
};
-void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
- u64 end);
-int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
- struct io_failure_record **failrec_ret);
bool btrfs_check_repairable(struct inode *inode, unsigned failed_bio_pages,
struct io_failure_record *failrec, int fail_mirror);
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
struct io_failure_record *failrec,
struct page *page, int pg_offset, int icsum,
bio_end_io_t *endio_func, void *data);
-int free_io_failure(struct extent_io_tree *failure_tree,
- struct extent_io_tree *io_tree,
- struct io_failure_record *rec);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,
@@ -555,5 +327,4 @@ bool find_lock_delalloc_range(struct inode *inode,
#endif
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start);
-
#endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 9d30acca55e1..6f417ff68980 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -214,9 +214,13 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
ASSERT(next->block_start != EXTENT_MAP_DELALLOC &&
prev->block_start != EXTENT_MAP_DELALLOC);
+ if (prev->map_lookup || next->map_lookup)
+ ASSERT(test_bit(EXTENT_FLAG_FS_MAPPING, &prev->flags) &&
+ test_bit(EXTENT_FLAG_FS_MAPPING, &next->flags));
+
if (extent_map_end(prev) == next->start &&
prev->flags == next->flags &&
- prev->bdev == next->bdev &&
+ prev->map_lookup == next->map_lookup &&
((next->block_start == EXTENT_MAP_HOLE &&
prev->block_start == EXTENT_MAP_HOLE) ||
(next->block_start == EXTENT_MAP_INLINE &&
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 473f039fcd7c..8e217337dff9 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -42,15 +42,8 @@ struct extent_map {
u64 block_len;
u64 generation;
unsigned long flags;
- union {
- struct block_device *bdev;
-
- /*
- * used for chunk mappings
- * flags & EXTENT_FLAG_FS_MAPPING must be set
- */
- struct map_lookup *map_lookup;
- };
+ /* Used for chunk mappings, flag EXTENT_FLAG_FS_MAPPING must be set */
+ struct map_lookup *map_lookup;
refcount_t refs;
unsigned int compress_type;
struct list_head list;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 1a599f50837b..3270a40b0777 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -945,7 +945,6 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
u8 type = btrfs_file_extent_type(leaf, fi);
int compress_type = btrfs_file_extent_compression(leaf, fi);
- em->bdev = fs_info->fs_devices->latest_bdev;
btrfs_item_key_to_cpu(leaf, &key, slot);
extent_start = key.offset;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8fe4eb7e5045..0cb43b682789 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -296,7 +296,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
key.objectid = defrag->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
+ inode = btrfs_iget(fs_info->sb, &key, inode_root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
goto cleanup;
@@ -667,7 +667,6 @@ void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
}
split->generation = gen;
- split->bdev = em->bdev;
split->flags = flags;
split->compress_type = em->compress_type;
replace_extent_mapping(em_tree, em, split, modified);
@@ -680,7 +679,6 @@ void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
split->start = start + len;
split->len = em->start + em->len - (start + len);
- split->bdev = em->bdev;
split->flags = flags;
split->compress_type = em->compress_type;
split->generation = gen;
@@ -1591,7 +1589,6 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
- struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
u64 release_bytes = 0;
u64 lockstart;
@@ -1611,6 +1608,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
return -ENOMEM;
while (iov_iter_count(i) > 0) {
+ struct extent_state *cached_state = NULL;
size_t offset = offset_in_page(pos);
size_t sector_offset;
size_t write_bytes = min(iov_iter_count(i),
@@ -1636,6 +1634,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
break;
}
+ only_release_metadata = false;
sector_offset = pos & (fs_info->sectorsize - 1);
reserve_bytes = round_up(write_bytes + sector_offset,
fs_info->sectorsize);
@@ -1692,7 +1691,7 @@ again:
force_page_uptodate);
if (ret) {
btrfs_delalloc_release_extents(BTRFS_I(inode),
- reserve_bytes, true);
+ reserve_bytes);
break;
}
@@ -1704,7 +1703,7 @@ again:
if (extents_locked == -EAGAIN)
goto again;
btrfs_delalloc_release_extents(BTRFS_I(inode),
- reserve_bytes, true);
+ reserve_bytes);
ret = extents_locked;
break;
}
@@ -1758,11 +1757,21 @@ again:
if (copied > 0)
ret = btrfs_dirty_pages(inode, pages, dirty_pages,
pos, copied, &cached_state);
+
+ /*
+ * If we have not locked the extent range, because the range's
+ * start offset is >= i_size, we might still have a non-NULL
+ * cached extent state, acquired while marking the extent range
+ * as delalloc through btrfs_dirty_pages(). Therefore free any
+ * possible cached extent state to avoid a memory leak.
+ */
if (extents_locked)
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state);
- btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes,
- true);
+ else
+ free_extent_state(cached_state);
+
+ btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
if (ret) {
btrfs_drop_pages(pages, num_pages);
break;
@@ -1781,7 +1790,6 @@ again:
set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_NORESERVE, NULL,
NULL, GFP_NOFS);
- only_release_metadata = false;
}
btrfs_drop_pages(pages, num_pages);
@@ -1893,9 +1901,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
(iocb->ki_flags & IOCB_NOWAIT))
return -EOPNOTSUPP;
- if (!inode_trylock(inode)) {
- if (iocb->ki_flags & IOCB_NOWAIT)
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock(inode))
return -EAGAIN;
+ } else {
inode_lock(inode);
}
@@ -2057,25 +2066,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
int ret = 0, err;
- u64 len;
- /*
- * If the inode needs a full sync, make sure we use a full range to
- * avoid log tree corruption, due to hole detection racing with ordered
- * extent completion for adjacent ranges, and assertion failures during
- * hole detection.
- */
- if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
- &BTRFS_I(inode)->runtime_flags)) {
- start = 0;
- end = LLONG_MAX;
- }
-
- /*
- * The range length can be represented by u64, we have to do the typecasts
- * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
- */
- len = (u64)end - (u64)start + 1;
trace_btrfs_sync_file(file, datasync);
btrfs_init_log_ctx(&ctx, inode);
@@ -2102,6 +2093,19 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
/*
+ * If the inode needs a full sync, make sure we use a full range to
+ * avoid log tree corruption, due to hole detection racing with ordered
+ * extent completion for adjacent ranges, and assertion failures during
+ * hole detection. Do this while holding the inode lock, to avoid races
+ * with other tasks.
+ */
+ if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags)) {
+ start = 0;
+ end = LLONG_MAX;
+ }
+
+ /*
* Before we acquired the inode's lock, someone may have dirtied more
* pages in the target range. We need to make sure that writeback for
* any such pages does not start while we are logging the inode, because
@@ -2128,8 +2132,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
/*
* We have to do this here to avoid the priority inversion of waiting on
* IO of a lower priority task while holding a transaction open.
+ *
+ * Also, the range length can be represented by u64, we have to do the
+ * typecasts to avoid signed overflow if it's [0, LLONG_MAX].
*/
- ret = btrfs_wait_ordered_range(inode, start, len);
+ ret = btrfs_wait_ordered_range(inode, start, (u64)end - (u64)start + 1);
if (ret) {
up_write(&BTRFS_I(inode)->dio_sem);
inode_unlock(inode);
@@ -2351,7 +2358,6 @@ out:
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->orig_block_len = 0;
- hole_em->bdev = fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = trans->transid;
@@ -3342,29 +3348,30 @@ out:
return ret;
}
-static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
+static loff_t find_desired_extent(struct inode *inode, loff_t offset,
+ int whence)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
+ loff_t i_size = inode->i_size;
u64 lockstart;
u64 lockend;
u64 start;
u64 len;
int ret = 0;
- if (inode->i_size == 0)
+ if (i_size == 0 || offset >= i_size)
return -ENXIO;
/*
- * *offset can be negative, in this case we start finding DATA/HOLE from
+ * offset can be negative, in this case we start finding DATA/HOLE from
* the very start of the file.
*/
- start = max_t(loff_t, 0, *offset);
+ start = max_t(loff_t, 0, offset);
lockstart = round_down(start, fs_info->sectorsize);
- lockend = round_up(i_size_read(inode),
- fs_info->sectorsize);
+ lockend = round_up(i_size, fs_info->sectorsize);
if (lockend <= lockstart)
lockend = lockstart + fs_info->sectorsize;
lockend--;
@@ -3373,7 +3380,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
- while (start < inode->i_size) {
+ while (start < i_size) {
em = btrfs_get_extent_fiemap(BTRFS_I(inode), start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@@ -3396,46 +3403,39 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
cond_resched();
}
free_extent_map(em);
- if (!ret) {
- if (whence == SEEK_DATA && start >= inode->i_size)
- ret = -ENXIO;
- else
- *offset = min_t(loff_t, start, inode->i_size);
- }
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
- return ret;
+ if (ret) {
+ offset = ret;
+ } else {
+ if (whence == SEEK_DATA && start >= i_size)
+ offset = -ENXIO;
+ else
+ offset = min_t(loff_t, start, i_size);
+ }
+
+ return offset;
}
static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
- int ret;
- inode_lock(inode);
switch (whence) {
- case SEEK_END:
- case SEEK_CUR:
- offset = generic_file_llseek(file, offset, whence);
- goto out;
+ default:
+ return generic_file_llseek(file, offset, whence);
case SEEK_DATA:
case SEEK_HOLE:
- if (offset >= i_size_read(inode)) {
- inode_unlock(inode);
- return -ENXIO;
- }
-
- ret = find_desired_extent(inode, &offset, whence);
- if (ret) {
- inode_unlock(inode);
- return ret;
- }
+ inode_lock_shared(inode);
+ offset = find_desired_extent(inode, offset, whence);
+ inode_unlock_shared(inode);
+ break;
}
- offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
-out:
- inode_unlock(inode);
- return offset;
+ if (offset < 0)
+ return offset;
+
+ return vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
}
static int btrfs_file_open(struct inode *inode, struct file *filp)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index d54dcd0ab230..3283da419200 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -78,7 +78,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
* sure NOFS is set to keep us from deadlocking.
*/
nofs_flag = memalloc_nofs_save();
- inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
+ inode = btrfs_iget_path(fs_info->sb, &location, root, path);
btrfs_release_path(path);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(inode))
@@ -91,8 +91,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
return inode;
}
-struct inode *lookup_free_space_inode(
- struct btrfs_block_group_cache *block_group,
+struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
@@ -107,7 +106,7 @@ struct inode *lookup_free_space_inode(
return inode;
inode = __lookup_free_space_inode(fs_info->tree_root, path,
- block_group->key.objectid);
+ block_group->start);
if (IS_ERR(inode))
return inode;
@@ -190,7 +189,7 @@ static int __create_free_space_inode(struct btrfs_root *root,
}
int create_free_space_inode(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
int ret;
@@ -201,7 +200,7 @@ int create_free_space_inode(struct btrfs_trans_handle *trans,
return ret;
return __create_free_space_inode(trans->fs_info->tree_root, trans, path,
- ino, block_group->key.objectid);
+ ino, block_group->start);
}
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
@@ -224,7 +223,7 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
}
int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -385,6 +384,12 @@ static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode
if (uptodate && !PageUptodate(page)) {
btrfs_readpage(NULL, page);
lock_page(page);
+ if (page->mapping != inode->i_mapping) {
+ btrfs_err(BTRFS_I(inode)->root->fs_info,
+ "free space cache page truncated");
+ io_ctl_drop_pages(io_ctl);
+ return -EIO;
+ }
if (!PageUptodate(page)) {
btrfs_err(BTRFS_I(inode)->root->fs_info,
"error reading free space cache");
@@ -814,7 +819,7 @@ free_cache:
goto out;
}
-int load_free_space_cache(struct btrfs_block_group_cache *block_group)
+int load_free_space_cache(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -822,7 +827,7 @@ int load_free_space_cache(struct btrfs_block_group_cache *block_group)
struct btrfs_path *path;
int ret = 0;
bool matched;
- u64 used = btrfs_block_group_used(&block_group->item);
+ u64 used = block_group->used;
/*
* If this block group has been marked to be cleared for one reason or
@@ -876,13 +881,13 @@ int load_free_space_cache(struct btrfs_block_group_cache *block_group)
spin_unlock(&block_group->lock);
ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
- path, block_group->key.objectid);
+ path, block_group->start);
btrfs_free_path(path);
if (ret <= 0)
goto out;
spin_lock(&ctl->tree_lock);
- matched = (ctl->free_space == (block_group->key.offset - used -
+ matched = (ctl->free_space == (block_group->length - used -
block_group->bytes_super));
spin_unlock(&ctl->tree_lock);
@@ -890,7 +895,7 @@ int load_free_space_cache(struct btrfs_block_group_cache *block_group)
__btrfs_remove_free_space_cache(ctl);
btrfs_warn(fs_info,
"block group %llu has wrong amount of free space",
- block_group->key.objectid);
+ block_group->start);
ret = -1;
}
out:
@@ -903,7 +908,7 @@ out:
btrfs_warn(fs_info,
"failed to load free space cache for block group %llu, rebuilding it now",
- block_group->key.objectid);
+ block_group->start);
}
iput(inode);
@@ -913,7 +918,7 @@ out:
static noinline_for_stack
int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
struct btrfs_free_space_ctl *ctl,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
int *entries, int *bitmaps,
struct list_head *bitmap_list)
{
@@ -1041,7 +1046,7 @@ fail:
}
static noinline_for_stack int write_pinned_extent_entries(
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_io_ctl *io_ctl,
int *entries)
{
@@ -1061,9 +1066,9 @@ static noinline_for_stack int write_pinned_extent_entries(
*/
unpin = block_group->fs_info->pinned_extents;
- start = block_group->key.objectid;
+ start = block_group->start;
- while (start < block_group->key.objectid + block_group->key.offset) {
+ while (start < block_group->start + block_group->length) {
ret = find_first_extent_bit(unpin, start,
&extent_start, &extent_end,
EXTENT_DIRTY, NULL);
@@ -1071,13 +1076,12 @@ static noinline_for_stack int write_pinned_extent_entries(
return 0;
/* This pinned extent is out of our range */
- if (extent_start >= block_group->key.objectid +
- block_group->key.offset)
+ if (extent_start >= block_group->start + block_group->length)
return 0;
extent_start = max(extent_start, start);
- extent_end = min(block_group->key.objectid +
- block_group->key.offset, extent_end + 1);
+ extent_end = min(block_group->start + block_group->length,
+ extent_end + 1);
len = extent_end - extent_start;
*entries += 1;
@@ -1141,7 +1145,7 @@ cleanup_write_cache_enospc(struct inode *inode,
static int __btrfs_wait_cache_io(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_io_ctl *io_ctl,
struct btrfs_path *path, u64 offset)
{
@@ -1168,7 +1172,7 @@ out:
#ifdef DEBUG
btrfs_err(root->fs_info,
"failed to write free space cache for block group %llu",
- block_group->key.objectid);
+ block_group->start);
#endif
}
}
@@ -1210,12 +1214,12 @@ static int btrfs_wait_cache_io_root(struct btrfs_root *root,
}
int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
return __btrfs_wait_cache_io(block_group->fs_info->tree_root, trans,
block_group, &block_group->io_ctl,
- path, block_group->key.objectid);
+ path, block_group->start);
}
/**
@@ -1231,7 +1235,7 @@ int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
*/
static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
struct btrfs_free_space_ctl *ctl,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_io_ctl *io_ctl,
struct btrfs_trans_handle *trans)
{
@@ -1369,7 +1373,7 @@ out_unlock:
}
int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -1394,7 +1398,7 @@ int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
#ifdef DEBUG
btrfs_err(fs_info,
"failed to write free space cache for block group %llu",
- block_group->key.objectid);
+ block_group->start);
#endif
spin_lock(&block_group->lock);
block_group->disk_cache_state = BTRFS_DC_ERROR;
@@ -1647,11 +1651,11 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl,
static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
{
- struct btrfs_block_group_cache *block_group = ctl->private;
+ struct btrfs_block_group *block_group = ctl->private;
u64 max_bytes;
u64 bitmap_bytes;
u64 extent_bytes;
- u64 size = block_group->key.offset;
+ u64 size = block_group->length;
u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
u64 max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
@@ -1991,7 +1995,7 @@ static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
- struct btrfs_block_group_cache *block_group = ctl->private;
+ struct btrfs_block_group *block_group = ctl->private;
struct btrfs_fs_info *fs_info = block_group->fs_info;
bool forced = false;
@@ -2028,7 +2032,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
* so allow those block groups to still be allowed to have a bitmap
* entry.
*/
- if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset)
+ if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->length)
return false;
return true;
@@ -2043,7 +2047,7 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info)
{
struct btrfs_free_space *bitmap_info;
- struct btrfs_block_group_cache *block_group = NULL;
+ struct btrfs_block_group *block_group = NULL;
int added = 0;
u64 bytes, offset, bytes_added;
int ret;
@@ -2380,7 +2384,7 @@ out:
return ret;
}
-int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
+int btrfs_add_free_space(struct btrfs_block_group *block_group,
u64 bytenr, u64 size)
{
return __btrfs_add_free_space(block_group->fs_info,
@@ -2388,7 +2392,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
bytenr, size);
}
-int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
+int btrfs_remove_free_space(struct btrfs_block_group *block_group,
u64 offset, u64 bytes)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -2478,7 +2482,7 @@ out:
return ret;
}
-void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
+void btrfs_dump_free_space(struct btrfs_block_group *block_group,
u64 bytes)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
@@ -2503,14 +2507,14 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
"%d blocks of free space at or bigger than bytes is", count);
}
-void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
+void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
spin_lock_init(&ctl->tree_lock);
ctl->unit = fs_info->sectorsize;
- ctl->start = block_group->key.objectid;
+ ctl->start = block_group->start;
ctl->private = block_group;
ctl->op = &free_space_op;
INIT_LIST_HEAD(&ctl->trimming_ranges);
@@ -2532,7 +2536,7 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
*/
static int
__btrfs_return_cluster_to_free_space(
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -2598,7 +2602,7 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
spin_unlock(&ctl->tree_lock);
}
-void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
+void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_cluster *cluster;
@@ -2620,7 +2624,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
}
-u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
+u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
u64 offset, u64 bytes, u64 empty_size,
u64 *max_extent_size)
{
@@ -2674,7 +2678,7 @@ out:
* cluster and remove the cluster from it.
*/
int btrfs_return_cluster_to_free_space(
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster)
{
struct btrfs_free_space_ctl *ctl;
@@ -2708,7 +2712,7 @@ int btrfs_return_cluster_to_free_space(
return ret;
}
-static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
+static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster,
struct btrfs_free_space *entry,
u64 bytes, u64 min_start,
@@ -2741,7 +2745,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
* if it couldn't find anything suitably large, or a logical disk offset
* if things worked out
*/
-u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
+u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
u64 min_start, u64 *max_extent_size)
{
@@ -2827,7 +2831,7 @@ out:
return ret;
}
-static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
+static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
struct btrfs_free_space *entry,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes,
@@ -2909,7 +2913,7 @@ again:
* extent of cont1_bytes, and other clusters of at least min_bytes.
*/
static noinline int
-setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
+setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster,
struct list_head *bitmaps, u64 offset, u64 bytes,
u64 cont1_bytes, u64 min_bytes)
@@ -3000,7 +3004,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
* that we have already failed to find extents that will work.
*/
static noinline int
-setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
+setup_cluster_bitmap(struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster,
struct list_head *bitmaps, u64 offset, u64 bytes,
u64 cont1_bytes, u64 min_bytes)
@@ -3050,7 +3054,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
* returns zero and sets up cluster if things worked out, otherwise
* it returns -enospc
*/
-int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group,
+int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size)
{
@@ -3141,7 +3145,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
cluster->block_group = NULL;
}
-static int do_trimming(struct btrfs_block_group_cache *block_group,
+static int do_trimming(struct btrfs_block_group *block_group,
u64 *total_trimmed, u64 start, u64 bytes,
u64 reserved_start, u64 reserved_bytes,
struct btrfs_trim_range *trim_entry)
@@ -3186,7 +3190,7 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
return ret;
}
-static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
+static int trim_no_bitmap(struct btrfs_block_group *block_group,
u64 *total_trimmed, u64 start, u64 end, u64 minlen)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -3271,7 +3275,7 @@ out:
return ret;
}
-static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
+static int trim_bitmaps(struct btrfs_block_group *block_group,
u64 *total_trimmed, u64 start, u64 end, u64 minlen)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@@ -3352,12 +3356,12 @@ next:
return ret;
}
-void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache)
+void btrfs_get_block_group_trimming(struct btrfs_block_group *cache)
{
atomic_inc(&cache->trimming);
}
-void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
+void btrfs_put_block_group_trimming(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct extent_map_tree *em_tree;
@@ -3373,7 +3377,7 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
mutex_lock(&fs_info->chunk_mutex);
em_tree = &fs_info->mapping_tree;
write_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, block_group->key.objectid,
+ em = lookup_extent_mapping(em_tree, block_group->start,
1);
BUG_ON(!em); /* logic error, can't happen */
remove_extent_mapping(em_tree, em);
@@ -3392,7 +3396,7 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
}
}
-int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+int btrfs_trim_block_group(struct btrfs_block_group *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
{
int ret;
@@ -3590,7 +3594,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
* how the free space cache loading stuff works, so you can get really weird
* configurations.
*/
-int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
+int test_add_free_space_entry(struct btrfs_block_group *cache,
u64 offset, u64 bytes, bool bitmap)
{
struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
@@ -3658,7 +3662,7 @@ again:
* just used to check the absence of space, so if there is free space in the
* range at all we will return 1.
*/
-int test_check_exists(struct btrfs_block_group_cache *cache,
+int test_check_exists(struct btrfs_block_group *cache,
u64 offset, u64 bytes)
{
struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 39c32c8fc24f..ba9a23241101 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -50,24 +50,23 @@ struct btrfs_io_ctl {
unsigned check_crcs:1;
};
-struct inode *lookup_free_space_inode(
- struct btrfs_block_group_cache *block_group,
+struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
struct btrfs_path *path);
int create_free_space_inode(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path);
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct inode *inode);
-int load_free_space_cache(struct btrfs_block_group_cache *block_group);
+int load_free_space_cache(struct btrfs_block_group *block_group);
int btrfs_wait_cache_io(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path);
int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path);
struct inode *lookup_free_ino_inode(struct btrfs_root *root,
struct btrfs_path *path);
@@ -81,42 +80,40 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
struct btrfs_path *path,
struct inode *inode);
-void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group);
+void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group);
int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_free_space_ctl *ctl,
u64 bytenr, u64 size);
-int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
+int btrfs_add_free_space(struct btrfs_block_group *block_group,
u64 bytenr, u64 size);
-int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
+int btrfs_remove_free_space(struct btrfs_block_group *block_group,
u64 bytenr, u64 size);
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
-void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
- *block_group);
-u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
+void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group);
+u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
u64 offset, u64 bytes, u64 empty_size,
u64 *max_extent_size);
u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
-void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
+void btrfs_dump_free_space(struct btrfs_block_group *block_group,
u64 bytes);
-int btrfs_find_space_cluster(struct btrfs_block_group_cache *block_group,
+int btrfs_find_space_cluster(struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 empty_size);
void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
-u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
+u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster, u64 bytes,
u64 min_start, u64 *max_extent_size);
int btrfs_return_cluster_to_free_space(
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_free_cluster *cluster);
-int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+int btrfs_trim_block_group(struct btrfs_block_group *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen);
/* Support functions for running our sanity tests */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
+int test_add_free_space_entry(struct btrfs_block_group *cache,
u64 offset, u64 bytes, bool bitmap);
-int test_check_exists(struct btrfs_block_group_cache *cache,
- u64 offset, u64 bytes);
+int test_check_exists(struct btrfs_block_group *cache, u64 offset, u64 bytes);
#endif
#endif
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 48a03f5240f5..258cb3fae17a 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -13,10 +13,10 @@
#include "block-group.h"
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path);
-void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
+void set_free_space_tree_thresholds(struct btrfs_block_group *cache)
{
u32 bitmap_range;
size_t bitmap_size;
@@ -27,8 +27,7 @@ void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
* exceeds that required for using bitmaps.
*/
bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
- num_bitmaps = div_u64(cache->key.offset + bitmap_range - 1,
- bitmap_range);
+ num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range);
bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
total_bitmap_size = num_bitmaps * bitmap_size;
cache->bitmap_high_thresh = div_u64(total_bitmap_size,
@@ -45,7 +44,7 @@ void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
}
static int add_new_free_space_info(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
struct btrfs_root *root = trans->fs_info->free_space_root;
@@ -54,9 +53,9 @@ static int add_new_free_space_info(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
int ret;
- key.objectid = block_group->key.objectid;
+ key.objectid = block_group->start;
key.type = BTRFS_FREE_SPACE_INFO_KEY;
- key.offset = block_group->key.offset;
+ key.offset = block_group->length;
ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
if (ret)
@@ -78,7 +77,7 @@ out:
EXPORT_FOR_TESTS
struct btrfs_free_space_info *search_free_space_info(
struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path, int cow)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
@@ -86,16 +85,16 @@ struct btrfs_free_space_info *search_free_space_info(
struct btrfs_key key;
int ret;
- key.objectid = block_group->key.objectid;
+ key.objectid = block_group->start;
key.type = BTRFS_FREE_SPACE_INFO_KEY;
- key.offset = block_group->key.offset;
+ key.offset = block_group->length;
ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
if (ret < 0)
return ERR_PTR(ret);
if (ret != 0) {
btrfs_warn(fs_info, "missing free space info for %llu",
- block_group->key.objectid);
+ block_group->start);
ASSERT(0);
return ERR_PTR(-ENOENT);
}
@@ -180,7 +179,7 @@ static void le_bitmap_set(unsigned long *map, unsigned int start, int len)
EXPORT_FOR_TESTS
int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -197,7 +196,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
int done = 0, nr;
int ret;
- bitmap_size = free_space_bitmap_size(block_group->key.offset,
+ bitmap_size = free_space_bitmap_size(block_group->length,
fs_info->sectorsize);
bitmap = alloc_bitmap(bitmap_size);
if (!bitmap) {
@@ -205,8 +204,8 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
goto out;
}
- start = block_group->key.objectid;
- end = block_group->key.objectid + block_group->key.offset;
+ start = block_group->start;
+ end = block_group->start + block_group->length;
key.objectid = end - 1;
key.type = (u8)-1;
@@ -224,8 +223,8 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
- ASSERT(found_key.objectid == block_group->key.objectid);
- ASSERT(found_key.offset == block_group->key.offset);
+ ASSERT(found_key.objectid == block_group->start);
+ ASSERT(found_key.offset == block_group->length);
done = 1;
break;
} else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
@@ -271,7 +270,7 @@ int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
if (extent_count != expected_extent_count) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
- block_group->key.objectid, extent_count,
+ block_group->start, extent_count,
expected_extent_count);
ASSERT(0);
ret = -EIO;
@@ -320,7 +319,7 @@ out:
EXPORT_FOR_TESTS
int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -336,7 +335,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
int done = 0, nr;
int ret;
- bitmap_size = free_space_bitmap_size(block_group->key.offset,
+ bitmap_size = free_space_bitmap_size(block_group->length,
fs_info->sectorsize);
bitmap = alloc_bitmap(bitmap_size);
if (!bitmap) {
@@ -344,8 +343,8 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
goto out;
}
- start = block_group->key.objectid;
- end = block_group->key.objectid + block_group->key.offset;
+ start = block_group->start;
+ end = block_group->start + block_group->length;
key.objectid = end - 1;
key.type = (u8)-1;
@@ -363,8 +362,8 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
- ASSERT(found_key.objectid == block_group->key.objectid);
- ASSERT(found_key.offset == block_group->key.offset);
+ ASSERT(found_key.objectid == block_group->start);
+ ASSERT(found_key.offset == block_group->length);
done = 1;
break;
} else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
@@ -413,7 +412,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
- nrbits = div_u64(block_group->key.offset, block_group->fs_info->sectorsize);
+ nrbits = div_u64(block_group->length, block_group->fs_info->sectorsize);
start_bit = find_next_bit_le(bitmap, nrbits, 0);
while (start_bit < nrbits) {
@@ -437,7 +436,7 @@ int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
if (extent_count != expected_extent_count) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
- block_group->key.objectid, extent_count,
+ block_group->start, extent_count,
expected_extent_count);
ASSERT(0);
ret = -EIO;
@@ -453,7 +452,7 @@ out:
}
static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path,
int new_extents)
{
@@ -491,7 +490,7 @@ out:
}
EXPORT_FOR_TESTS
-int free_space_test_bit(struct btrfs_block_group_cache *block_group,
+int free_space_test_bit(struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 offset)
{
struct extent_buffer *leaf;
@@ -513,7 +512,7 @@ int free_space_test_bit(struct btrfs_block_group_cache *block_group,
return !!extent_buffer_test_bit(leaf, ptr, i);
}
-static void free_space_set_bits(struct btrfs_block_group_cache *block_group,
+static void free_space_set_bits(struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 *start, u64 *size,
int bit)
{
@@ -581,7 +580,7 @@ static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
* the bitmap.
*/
static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path,
u64 start, u64 size, int remove)
{
@@ -597,7 +596,7 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
* Read the bit for the block immediately before the extent of space if
* that block is within the block group.
*/
- if (start > block_group->key.objectid) {
+ if (start > block_group->start) {
u64 prev_block = start - block_group->fs_info->sectorsize;
key.objectid = prev_block;
@@ -649,7 +648,7 @@ static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
* Read the bit for the block immediately after the extent of space if
* that block is within the block group.
*/
- if (end < block_group->key.objectid + block_group->key.offset) {
+ if (end < block_group->start + block_group->length) {
/* The next block may be in the next bitmap. */
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (end >= key.objectid + key.offset) {
@@ -694,7 +693,7 @@ out:
}
static int remove_free_space_extent(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path,
u64 start, u64 size)
{
@@ -781,7 +780,7 @@ out:
EXPORT_FOR_TESTS
int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 start, u64 size)
{
struct btrfs_free_space_info *info;
@@ -812,7 +811,7 @@ int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
u64 start, u64 size)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_path *path;
int ret;
@@ -846,7 +845,7 @@ out:
}
static int add_free_space_extent(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path,
u64 start, u64 size)
{
@@ -880,7 +879,7 @@ static int add_free_space_extent(struct btrfs_trans_handle *trans,
new_key.offset = size;
/* Search for a neighbor on the left. */
- if (start == block_group->key.objectid)
+ if (start == block_group->start)
goto right;
key.objectid = start - 1;
key.type = (u8)-1;
@@ -900,8 +899,8 @@ static int add_free_space_extent(struct btrfs_trans_handle *trans,
found_start = key.objectid;
found_end = key.objectid + key.offset;
- ASSERT(found_start >= block_group->key.objectid &&
- found_end > block_group->key.objectid);
+ ASSERT(found_start >= block_group->start &&
+ found_end > block_group->start);
ASSERT(found_start < start && found_end <= start);
/*
@@ -920,7 +919,7 @@ static int add_free_space_extent(struct btrfs_trans_handle *trans,
right:
/* Search for a neighbor on the right. */
- if (end == block_group->key.objectid + block_group->key.offset)
+ if (end == block_group->start + block_group->length)
goto insert;
key.objectid = end;
key.type = (u8)-1;
@@ -940,8 +939,8 @@ right:
found_start = key.objectid;
found_end = key.objectid + key.offset;
- ASSERT(found_start >= block_group->key.objectid &&
- found_end > block_group->key.objectid);
+ ASSERT(found_start >= block_group->start &&
+ found_end > block_group->start);
ASSERT((found_start < start && found_end <= start) ||
(found_start >= end && found_end > end));
@@ -974,7 +973,7 @@ out:
EXPORT_FOR_TESTS
int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 start, u64 size)
{
struct btrfs_free_space_info *info;
@@ -1005,7 +1004,7 @@ int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
int add_to_free_space_tree(struct btrfs_trans_handle *trans,
u64 start, u64 size)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_path *path;
int ret;
@@ -1043,7 +1042,7 @@ out:
* through the normal add/remove hooks.
*/
static int populate_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group)
+ struct btrfs_block_group *block_group)
{
struct btrfs_root *extent_root = trans->fs_info->extent_root;
struct btrfs_path *path, *path2;
@@ -1075,7 +1074,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
* BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
* contained in.
*/
- key.objectid = block_group->key.objectid;
+ key.objectid = block_group->start;
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = 0;
@@ -1084,8 +1083,8 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
goto out_locked;
ASSERT(ret == 0);
- start = block_group->key.objectid;
- end = block_group->key.objectid + block_group->key.offset;
+ start = block_group->start;
+ end = block_group->start + block_group->length;
while (1) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
@@ -1109,7 +1108,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
else
start += key.offset;
} else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
- if (key.objectid != block_group->key.objectid)
+ if (key.objectid != block_group->start)
break;
}
@@ -1140,7 +1139,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
struct btrfs_trans_handle *trans;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *free_space_root;
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct rb_node *node;
int ret;
@@ -1159,7 +1158,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
node = rb_first(&fs_info->block_group_cache_tree);
while (node) {
- block_group = rb_entry(node, struct btrfs_block_group_cache,
+ block_group = rb_entry(node, struct btrfs_block_group,
cache_node);
ret = populate_free_space_tree(trans, block_group);
if (ret)
@@ -1265,7 +1264,7 @@ abort:
}
static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path)
{
int ret;
@@ -1277,12 +1276,12 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
return ret;
return __add_to_free_space_tree(trans, block_group, path,
- block_group->key.objectid,
- block_group->key.offset);
+ block_group->start,
+ block_group->length);
}
int add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group)
+ struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_path *path = NULL;
@@ -1312,7 +1311,7 @@ out:
}
int remove_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group)
+ struct btrfs_block_group *block_group)
{
struct btrfs_root *root = trans->fs_info->free_space_root;
struct btrfs_path *path;
@@ -1336,8 +1335,8 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans,
goto out;
}
- start = block_group->key.objectid;
- end = block_group->key.objectid + block_group->key.offset;
+ start = block_group->start;
+ end = block_group->start + block_group->length;
key.objectid = end - 1;
key.type = (u8)-1;
@@ -1355,8 +1354,8 @@ int remove_block_group_free_space(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
- ASSERT(found_key.objectid == block_group->key.objectid);
- ASSERT(found_key.offset == block_group->key.offset);
+ ASSERT(found_key.objectid == block_group->start);
+ ASSERT(found_key.offset == block_group->length);
done = 1;
nr++;
path->slots[0]--;
@@ -1391,7 +1390,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
struct btrfs_path *path,
u32 expected_extent_count)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_fs_info *fs_info;
struct btrfs_root *root;
struct btrfs_key key;
@@ -1407,7 +1406,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
fs_info = block_group->fs_info;
root = fs_info->free_space_root;
- end = block_group->key.objectid + block_group->key.offset;
+ end = block_group->start + block_group->length;
while (1) {
ret = btrfs_next_item(root, path);
@@ -1454,7 +1453,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
if (extent_count != expected_extent_count) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
- block_group->key.objectid, extent_count,
+ block_group->start, extent_count,
expected_extent_count);
ASSERT(0);
ret = -EIO;
@@ -1472,7 +1471,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
struct btrfs_path *path,
u32 expected_extent_count)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_fs_info *fs_info;
struct btrfs_root *root;
struct btrfs_key key;
@@ -1485,7 +1484,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
fs_info = block_group->fs_info;
root = fs_info->free_space_root;
- end = block_group->key.objectid + block_group->key.offset;
+ end = block_group->start + block_group->length;
while (1) {
ret = btrfs_next_item(root, path);
@@ -1516,7 +1515,7 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
if (extent_count != expected_extent_count) {
btrfs_err(fs_info,
"incorrect extent count for %llu; counted %u, expected %u",
- block_group->key.objectid, extent_count,
+ block_group->start, extent_count,
expected_extent_count);
ASSERT(0);
ret = -EIO;
@@ -1532,7 +1531,7 @@ out:
int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
struct btrfs_free_space_info *info;
struct btrfs_path *path;
u32 extent_count, flags;
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
index 360d50e1cdea..dc2463e4cfe3 100644
--- a/fs/btrfs/free-space-tree.h
+++ b/fs/btrfs/free-space-tree.h
@@ -16,14 +16,14 @@ struct btrfs_caching_control;
#define BTRFS_FREE_SPACE_BITMAP_SIZE 256
#define BTRFS_FREE_SPACE_BITMAP_BITS (BTRFS_FREE_SPACE_BITMAP_SIZE * BITS_PER_BYTE)
-void set_free_space_tree_thresholds(struct btrfs_block_group_cache *block_group);
+void set_free_space_tree_thresholds(struct btrfs_block_group *block_group);
int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
int add_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group);
+ struct btrfs_block_group *block_group);
int remove_block_group_free_space(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group);
+ struct btrfs_block_group *block_group);
int add_to_free_space_tree(struct btrfs_trans_handle *trans,
u64 start, u64 size);
int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
@@ -32,21 +32,21 @@ int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_free_space_info *
search_free_space_info(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path, int cow);
int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 start, u64 size);
int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 start, u64 size);
int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path);
int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct btrfs_path *path);
-int free_space_test_bit(struct btrfs_block_group_cache *block_group,
+int free_space_test_bit(struct btrfs_block_group *block_group,
struct btrfs_path *path, u64 offset);
#endif
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 63cad7865d75..37345fb6191d 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -501,13 +501,13 @@ again:
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
prealloc, prealloc, &alloc_hint);
if (ret) {
- btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc, true);
goto out_put;
}
ret = btrfs_write_out_ino_cache(root, trans, path, inode);
- btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc, false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc);
out_put:
iput(inode);
out_release:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0546401bc0a..56032c518b26 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -368,6 +368,7 @@ struct async_chunk {
u64 end;
unsigned int write_flags;
struct list_head extents;
+ struct cgroup_subsys_state *blkcg_css;
struct btrfs_work work;
atomic_t *pending;
};
@@ -474,6 +475,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
u64 start = async_chunk->start;
u64 end = async_chunk->end;
u64 actual_end;
+ u64 i_size;
int ret = 0;
struct page **pages = NULL;
unsigned long nr_pages;
@@ -488,7 +490,19 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
inode_should_defrag(BTRFS_I(inode), start, end, end - start + 1,
SZ_16K);
- actual_end = min_t(u64, i_size_read(inode), end + 1);
+ /*
+ * We need to save i_size before now because it could change in between
+ * us evaluating the size and assigning it. This is because we lock and
+ * unlock the page in truncate and fallocate, and then modify the i_size
+ * later on.
+ *
+ * The barriers are to emulate READ_ONCE, remove that once i_size_read
+ * does that for us.
+ */
+ barrier();
+ i_size = i_size_read(inode);
+ barrier();
+ actual_end = min_t(u64, i_size, end + 1);
again:
will_compress = 0;
nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
@@ -699,10 +713,12 @@ cleanup_and_bail_uncompressed:
* to our extent and set things up for the async work queue to run
* cow_file_range to do the normal delalloc dance.
*/
- if (page_offset(async_chunk->locked_page) >= start &&
- page_offset(async_chunk->locked_page) <= end)
+ if (async_chunk->locked_page &&
+ (page_offset(async_chunk->locked_page) >= start &&
+ page_offset(async_chunk->locked_page)) <= end) {
__set_page_dirty_nobuffers(async_chunk->locked_page);
/* unlocked later on in the async handlers */
+ }
if (redirty)
extent_range_redirty_for_io(inode, start, end);
@@ -782,7 +798,7 @@ retry:
async_extent->start +
async_extent->ram_size - 1,
WB_SYNC_ALL);
- else if (ret)
+ else if (ret && async_chunk->locked_page)
unlock_page(async_chunk->locked_page);
kfree(async_extent);
cond_resched();
@@ -865,7 +881,8 @@ retry:
ins.objectid,
ins.offset, async_extent->pages,
async_extent->nr_pages,
- async_chunk->write_flags)) {
+ async_chunk->write_flags,
+ async_chunk->blkcg_css)) {
struct page *p = async_extent->pages[0];
const u64 start = async_extent->start;
const u64 end = start + async_extent->ram_size - 1;
@@ -1183,6 +1200,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
async_chunk = container_of(work, struct async_chunk, work);
if (async_chunk->inode)
btrfs_add_delayed_iput(async_chunk->inode);
+ if (async_chunk->blkcg_css)
+ css_put(async_chunk->blkcg_css);
/*
* Since the pointer to 'pending' is at the beginning of the array of
* async_chunk's, freeing it ensures the whole array has been freed.
@@ -1191,12 +1210,14 @@ static noinline void async_cow_free(struct btrfs_work *work)
kvfree(async_chunk->pending);
}
-static int cow_file_range_async(struct inode *inode, struct page *locked_page,
+static int cow_file_range_async(struct inode *inode,
+ struct writeback_control *wbc,
+ struct page *locked_page,
u64 start, u64 end, int *page_started,
- unsigned long *nr_written,
- unsigned int write_flags)
+ unsigned long *nr_written)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
struct async_cow *ctx;
struct async_chunk *async_chunk;
unsigned long nr_pages;
@@ -1205,6 +1226,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
int i;
bool should_compress;
unsigned nofs_flag;
+ const unsigned int write_flags = wbc_to_write_flags(wbc);
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
@@ -1251,14 +1273,45 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
async_chunk[i].inode = inode;
async_chunk[i].start = start;
async_chunk[i].end = cur_end;
- async_chunk[i].locked_page = locked_page;
async_chunk[i].write_flags = write_flags;
INIT_LIST_HEAD(&async_chunk[i].extents);
- btrfs_init_work(&async_chunk[i].work,
- btrfs_delalloc_helper,
- async_cow_start, async_cow_submit,
- async_cow_free);
+ /*
+ * The locked_page comes all the way from writepage and its
+ * the original page we were actually given. As we spread
+ * this large delalloc region across multiple async_chunk
+ * structs, only the first struct needs a pointer to locked_page
+ *
+ * This way we don't need racey decisions about who is supposed
+ * to unlock it.
+ */
+ if (locked_page) {
+ /*
+ * Depending on the compressibility, the pages might or
+ * might not go through async. We want all of them to
+ * be accounted against wbc once. Let's do it here
+ * before the paths diverge. wbc accounting is used
+ * only for foreign writeback detection and doesn't
+ * need full accuracy. Just account the whole thing
+ * against the first page.
+ */
+ wbc_account_cgroup_owner(wbc, locked_page,
+ cur_end - start);
+ async_chunk[i].locked_page = locked_page;
+ locked_page = NULL;
+ } else {
+ async_chunk[i].locked_page = NULL;
+ }
+
+ if (blkcg_css != blkcg_root_css) {
+ css_get(blkcg_css);
+ async_chunk[i].blkcg_css = blkcg_css;
+ } else {
+ async_chunk[i].blkcg_css = NULL;
+ }
+
+ btrfs_init_work(&async_chunk[i].work, async_cow_start,
+ async_cow_submit, async_cow_free);
nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
atomic_add(nr_pages, &fs_info->async_delalloc_pages);
@@ -1684,7 +1737,6 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
{
int ret;
int force_cow = need_force_cow(inode, start, end);
- unsigned int write_flags = wbc_to_write_flags(wbc);
if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
@@ -1699,9 +1751,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
&BTRFS_I(inode)->runtime_flags);
- ret = cow_file_range_async(inode, locked_page, start, end,
- page_started, nr_written,
- write_flags);
+ ret = cow_file_range_async(inode, wbc, locked_page, start, end,
+ page_started, nr_written);
}
if (ret)
btrfs_cleanup_ordered_extents(inode, locked_page, start,
@@ -2097,7 +2148,7 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio,
}
mapit:
- ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
+ ret = btrfs_map_bio(fs_info, bio, mirror_num);
out:
if (ret) {
@@ -2201,12 +2252,16 @@ again:
mapping_set_error(page->mapping, ret);
end_extent_writepage(page, ret, page_start, page_end);
ClearPageChecked(page);
- goto out;
+ goto out_reserved;
}
ClearPageChecked(page);
set_page_dirty(page);
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, false);
+out_reserved:
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+ if (ret)
+ btrfs_delalloc_release_space(inode, data_reserved, page_start,
+ PAGE_SIZE, true);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
@@ -2247,8 +2302,7 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end)
SetPageChecked(page);
get_page(page);
- btrfs_init_work(&fixup->work, btrfs_fixup_helper,
- btrfs_writepage_fixup_worker, NULL, NULL);
+ btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);
fixup->page = page;
btrfs_queue_work(fs_info->fixup_workers, &fixup->work);
return -EBUSY;
@@ -2662,7 +2716,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ inode = btrfs_iget(fs_info->sb, &key, root);
if (IS_ERR(inode)) {
srcu_read_unlock(&fs_info->subvol_srcu, index);
return 0;
@@ -2986,7 +3040,7 @@ out_kfree:
static void btrfs_release_delalloc_bytes(struct btrfs_fs_info *fs_info,
u64 start, u64 len)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
cache = btrfs_lookup_block_group(fs_info, start);
ASSERT(cache);
@@ -3014,7 +3068,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
int compress_type = 0;
int ret = 0;
u64 logical_len = ordered_extent->len;
- bool nolock;
+ bool freespace_inode;
bool truncated = false;
bool range_locked = false;
bool clear_new_delalloc_bytes = false;
@@ -3025,7 +3079,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
!test_bit(BTRFS_ORDERED_DIRECT, &ordered_extent->flags))
clear_new_delalloc_bytes = true;
- nolock = btrfs_is_free_space_inode(BTRFS_I(inode));
+ freespace_inode = btrfs_is_free_space_inode(BTRFS_I(inode));
if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
ret = -EIO;
@@ -3056,8 +3110,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
ordered_extent->len);
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
+ if (freespace_inode)
+ trans = btrfs_join_transaction_spacecache(root);
else
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
@@ -3091,8 +3145,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
EXTENT_DEFRAG, 0, 0, &cached_state);
}
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
+ if (freespace_inode)
+ trans = btrfs_join_transaction_spacecache(root);
else
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
@@ -3241,7 +3295,6 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_extent *ordered_extent = NULL;
struct btrfs_workqueue *wq;
- btrfs_work_func_t func;
trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
@@ -3250,16 +3303,12 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
end - start + 1, uptodate))
return;
- if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+ if (btrfs_is_free_space_inode(BTRFS_I(inode)))
wq = fs_info->endio_freespace_worker;
- func = btrfs_freespace_write_helper;
- } else {
+ else
wq = fs_info->endio_write_workers;
- func = btrfs_endio_write_helper;
- }
- btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
- NULL);
+ btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);
btrfs_queue_work(wq, &ordered_extent->work);
}
@@ -3518,7 +3567,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
found_key.objectid = found_key.offset;
found_key.type = BTRFS_INODE_ITEM_KEY;
found_key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &found_key, root, NULL);
+ inode = btrfs_iget(fs_info->sb, &found_key, root);
ret = PTR_ERR_OR_ZERO(inode);
if (ret && ret != -ENOENT)
goto out;
@@ -4951,7 +5000,7 @@ again:
if (!page) {
btrfs_delalloc_release_space(inode, data_reserved,
block_start, blocksize, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
ret = -ENOMEM;
goto out;
}
@@ -5018,7 +5067,7 @@ out_unlock:
if (ret)
btrfs_delalloc_release_space(inode, data_reserved, block_start,
blocksize, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize, (ret != 0));
+ btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
unlock_page(page);
put_page(page);
out:
@@ -5140,7 +5189,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_em->block_len = 0;
hole_em->orig_block_len = 0;
hole_em->ram_bytes = hole_size;
- hole_em->bdev = fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = fs_info->generation;
@@ -5737,12 +5785,14 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
return inode;
}
-/* Get an inode object given its location and corresponding root.
- * Returns in *is_new if the inode was read from disk
+/*
+ * Get an inode object given its location and corresponding root.
+ * Path can be preallocated to prevent recursing back to iget through
+ * allocator. NULL is also valid but may require an additional allocation
+ * later.
*/
struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
- struct btrfs_root *root, int *new,
- struct btrfs_path *path)
+ struct btrfs_root *root, struct btrfs_path *path)
{
struct inode *inode;
@@ -5757,8 +5807,6 @@ struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
if (!ret) {
inode_tree_add(inode);
unlock_new_inode(inode);
- if (new)
- *new = 1;
} else {
iget_failed(inode);
/*
@@ -5776,9 +5824,9 @@ struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
}
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
- struct btrfs_root *root, int *new)
+ struct btrfs_root *root)
{
- return btrfs_iget_path(s, location, root, new, NULL);
+ return btrfs_iget_path(s, location, root, NULL);
}
static struct inode *new_simple_dir(struct super_block *s,
@@ -5844,7 +5892,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
return ERR_PTR(ret);
if (location.type == BTRFS_INODE_ITEM_KEY) {
- inode = btrfs_iget(dir->i_sb, &location, root, NULL);
+ inode = btrfs_iget(dir->i_sb, &location, root);
if (IS_ERR(inode))
return inode;
@@ -5869,7 +5917,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
else
inode = new_simple_dir(dir->i_sb, &location, sub_root);
} else {
- inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
+ inode = btrfs_iget(dir->i_sb, &location, sub_root);
}
srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -6305,13 +6353,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
u32 sizes[2];
int nitems = name ? 2 : 1;
unsigned long ptr;
+ unsigned int nofs_flag;
int ret;
path = btrfs_alloc_path();
if (!path)
return ERR_PTR(-ENOMEM);
+ nofs_flag = memalloc_nofs_save();
inode = new_inode(fs_info->sb);
+ memalloc_nofs_restore(nofs_flag);
if (!inode) {
btrfs_free_path(path);
return ERR_PTR(-ENOMEM);
@@ -6915,8 +6966,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
- if (em)
- em->bdev = fs_info->fs_devices->latest_bdev;
read_unlock(&em_tree->lock);
if (em) {
@@ -6932,7 +6981,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
err = -ENOMEM;
goto out;
}
- em->bdev = fs_info->fs_devices->latest_bdev;
em->start = EXTENT_MAP_HOLE;
em->orig_start = EXTENT_MAP_HOLE;
em->len = (u64)-1;
@@ -7191,7 +7239,6 @@ struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
err = -ENOMEM;
goto out;
}
- em->bdev = NULL;
ASSERT(hole_em);
/*
@@ -7551,7 +7598,6 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
{
struct extent_map_tree *em_tree;
struct extent_map *em;
- struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
ASSERT(type == BTRFS_ORDERED_PREALLOC ||
@@ -7569,7 +7615,6 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len,
em->len = len;
em->block_len = block_len;
em->block_start = block_start;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
em->orig_block_len = orig_block_len;
em->ram_bytes = ram_bytes;
em->generation = -1;
@@ -7608,6 +7653,8 @@ static int btrfs_get_blocks_direct_read(struct extent_map *em,
struct inode *inode,
u64 start, u64 len)
{
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+
if (em->block_start == EXTENT_MAP_HOLE ||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
return -ENOENT;
@@ -7617,7 +7664,7 @@ static int btrfs_get_blocks_direct_read(struct extent_map *em,
bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
inode->i_blkbits;
bh_result->b_size = len;
- bh_result->b_bdev = em->bdev;
+ bh_result->b_bdev = fs_info->fs_devices->latest_bdev;
set_buffer_mapped(bh_result);
return 0;
@@ -7700,7 +7747,7 @@ skip_cow:
bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
inode->i_blkbits;
bh_result->b_size = len;
- bh_result->b_bdev = em->bdev;
+ bh_result->b_bdev = fs_info->fs_devices->latest_bdev;
set_buffer_mapped(bh_result);
if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
@@ -7842,7 +7889,7 @@ static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
if (ret)
return ret;
- ret = btrfs_map_bio(fs_info, bio, mirror_num, 0);
+ ret = btrfs_map_bio(fs_info, bio, mirror_num);
return ret;
}
@@ -8195,18 +8242,14 @@ static void __endio_write_update_ordered(struct inode *inode,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ordered_extent *ordered = NULL;
struct btrfs_workqueue *wq;
- btrfs_work_func_t func;
u64 ordered_offset = offset;
u64 ordered_bytes = bytes;
u64 last_offset;
- if (btrfs_is_free_space_inode(BTRFS_I(inode))) {
+ if (btrfs_is_free_space_inode(BTRFS_I(inode)))
wq = fs_info->endio_freespace_worker;
- func = btrfs_freespace_write_helper;
- } else {
+ else
wq = fs_info->endio_write_workers;
- func = btrfs_endio_write_helper;
- }
while (ordered_offset < offset + bytes) {
last_offset = ordered_offset;
@@ -8214,9 +8257,8 @@ static void __endio_write_update_ordered(struct inode *inode,
&ordered_offset,
ordered_bytes,
uptodate)) {
- btrfs_init_work(&ordered->work, func,
- finish_ordered_fn,
- NULL, NULL);
+ btrfs_init_work(&ordered->work, finish_ordered_fn, NULL,
+ NULL);
btrfs_queue_work(wq, &ordered->work);
}
/*
@@ -8373,7 +8415,7 @@ static inline blk_status_t btrfs_submit_dio_bio(struct bio *bio,
goto err;
}
map:
- ret = btrfs_map_bio(fs_info, bio, 0, 0);
+ ret = btrfs_map_bio(fs_info, bio, 0);
err:
return ret;
}
@@ -8706,7 +8748,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
} else if (ret >= 0 && (size_t)ret < count)
btrfs_delalloc_release_space(inode, data_reserved,
offset, count - (size_t)ret, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), count, false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), count);
}
out:
if (wakeup)
@@ -9056,7 +9098,7 @@ again:
unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
if (!ret2) {
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
sb_end_pagefault(inode->i_sb);
extent_changeset_free(data_reserved);
return VM_FAULT_LOCKED;
@@ -9065,7 +9107,7 @@ again:
out_unlock:
unlock_page(page);
out:
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0));
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
btrfs_delalloc_release_space(inode, data_reserved, page_start,
reserved_space, (ret != 0));
out_noreserve:
@@ -9305,7 +9347,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->io_failure_tree.track_uptodate = true;
atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
- mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
INIT_LIST_HEAD(&ei->delayed_iput);
@@ -9534,6 +9575,9 @@ static int btrfs_rename_exchange(struct inode *old_dir,
goto out_notrans;
}
+ if (dest != root)
+ btrfs_record_root_in_trans(trans, dest);
+
/*
* We need to find a free sequence number both in the source and
* in the destination directory for the exchange.
@@ -9728,6 +9772,18 @@ out_fail:
commit_transaction = true;
}
if (commit_transaction) {
+ /*
+ * We may have set commit_transaction when logging the new name
+ * in the destination root, in which case we left the source
+ * root context in the list of log contextes. So make sure we
+ * remove it to avoid invalid memory accesses, since the context
+ * was allocated in our stack frame.
+ */
+ if (sync_log_root) {
+ mutex_lock(&root->log_mutex);
+ list_del_init(&ctx_root.list);
+ mutex_unlock(&root->log_mutex);
+ }
ret = btrfs_commit_transaction(trans);
} else {
int ret2;
@@ -9741,6 +9797,9 @@ out_notrans:
if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
+ ASSERT(list_empty(&ctx_root.list));
+ ASSERT(list_empty(&ctx_dest.list));
+
return ret;
}
@@ -10085,8 +10144,7 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
init_completion(&work->completion);
INIT_LIST_HEAD(&work->list);
work->inode = inode;
- btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
- btrfs_run_delalloc_work, NULL, NULL);
+ btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);
return work;
}
@@ -10419,7 +10477,6 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
em->block_len = ins.offset;
em->orig_block_len = ins.offset;
em->ram_bytes = ins.offset;
- em->bdev = fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
em->generation = trans->transid;
@@ -10775,7 +10832,7 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
start = 0;
while (start < isize) {
u64 logical_block_start, physical_block_start;
- struct btrfs_block_group_cache *bg;
+ struct btrfs_block_group *bg;
u64 len = isize - start;
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index de730e56d3f5..a1ee0b775e65 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -479,10 +479,9 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
return put_user(inode->i_generation, arg);
}
-static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
+static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_device *device;
struct request_queue *q;
struct fstrim_range range;
@@ -541,7 +540,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
return 0;
}
-int btrfs_is_empty_uuid(u8 *uuid)
+int __pure btrfs_is_empty_uuid(u8 *uuid)
{
int i;
@@ -1360,8 +1359,7 @@ again:
unlock_page(pages[i]);
put_page(pages[i]);
}
- btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
- false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return i_done;
out:
@@ -1372,8 +1370,7 @@ out:
btrfs_delalloc_release_space(inode, data_reserved,
start_index << PAGE_SHIFT,
page_cnt << PAGE_SHIFT, true);
- btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT,
- true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT);
extent_changeset_free(data_reserved);
return ret;
@@ -1411,7 +1408,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
return -EINVAL;
if (do_compress) {
- if (range->compress_type > BTRFS_COMPRESS_TYPES)
+ if (range->compress_type >= BTRFS_NR_COMPRESS_TYPES)
return -EINVAL;
if (range->compress_type)
compress_type = range->compress_type;
@@ -2464,7 +2461,7 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
goto out;
}
- temp_inode = btrfs_iget(sb, &key2, root, NULL);
+ temp_inode = btrfs_iget(sb, &key2, root);
if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode);
goto out;
@@ -4034,16 +4031,15 @@ out:
static void get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space)
{
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
space->total_bytes = 0;
space->used_bytes = 0;
space->flags = 0;
list_for_each_entry(block_group, groups_list, list) {
space->flags = block_group->flags;
- space->total_bytes += block_group->key.offset;
- space->used_bytes +=
- btrfs_block_group_used(&block_group->item);
+ space->total_bytes += block_group->length;
+ space->used_bytes += block_group->used;
}
}
@@ -4197,9 +4193,6 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
u64 transid;
int ret;
- btrfs_warn(root->fs_info,
- "START_SYNC ioctl is deprecated and will be removed in kernel 5.7");
-
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT)
@@ -4227,9 +4220,6 @@ static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
{
u64 transid;
- btrfs_warn(fs_info,
- "WAIT_SYNC ioctl is deprecated and will be removed in kernel 5.7");
-
if (argp) {
if (copy_from_user(&transid, argp, sizeof(transid)))
return -EFAULT;
@@ -4960,10 +4950,9 @@ drop_write:
return ret;
}
-static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
+static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_ioctl_quota_rescan_args *qsa;
int ret = 0;
@@ -4986,11 +4975,9 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
return ret;
}
-static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
+static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -5162,10 +5149,9 @@ out:
return ret;
}
-static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
+static int btrfs_ioctl_get_fslabel(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
size_t len;
int ret;
char label[BTRFS_LABEL_SIZE];
@@ -5249,10 +5235,9 @@ int btrfs_ioctl_get_supported_features(void __user *arg)
return 0;
}
-static int btrfs_ioctl_get_features(struct file *file, void __user *arg)
+static int btrfs_ioctl_get_features(struct btrfs_fs_info *fs_info,
+ void __user *arg)
{
- struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_super_block *super_block = fs_info->super_copy;
struct btrfs_ioctl_feature_flags features;
@@ -5453,11 +5438,11 @@ long btrfs_ioctl(struct file *file, unsigned int
case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp);
case FS_IOC_GETFSLABEL:
- return btrfs_ioctl_get_fslabel(file, argp);
+ return btrfs_ioctl_get_fslabel(fs_info, argp);
case FS_IOC_SETFSLABEL:
return btrfs_ioctl_set_fslabel(file, argp);
case FITRIM:
- return btrfs_ioctl_fitrim(file, argp);
+ return btrfs_ioctl_fitrim(fs_info, argp);
case BTRFS_IOC_SNAP_CREATE:
return btrfs_ioctl_snap_create(file, argp, 0);
case BTRFS_IOC_SNAP_CREATE_V2:
@@ -5562,15 +5547,15 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_QUOTA_RESCAN:
return btrfs_ioctl_quota_rescan(file, argp);
case BTRFS_IOC_QUOTA_RESCAN_STATUS:
- return btrfs_ioctl_quota_rescan_status(file, argp);
+ return btrfs_ioctl_quota_rescan_status(fs_info, argp);
case BTRFS_IOC_QUOTA_RESCAN_WAIT:
- return btrfs_ioctl_quota_rescan_wait(file, argp);
+ return btrfs_ioctl_quota_rescan_wait(fs_info, argp);
case BTRFS_IOC_DEV_REPLACE:
return btrfs_ioctl_dev_replace(fs_info, argp);
case BTRFS_IOC_GET_SUPPORTED_FEATURES:
return btrfs_ioctl_get_supported_features(argp);
case BTRFS_IOC_GET_FEATURES:
- return btrfs_ioctl_get_features(file, argp);
+ return btrfs_ioctl_get_features(fs_info, argp);
case BTRFS_IOC_SET_FEATURES:
return btrfs_ioctl_set_features(file, argp);
case FS_IOC_FSGETXATTR:
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 7f9a578a1a20..571c4826c428 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -13,65 +13,164 @@
#include "extent_io.h"
#include "locking.h"
+/*
+ * Extent buffer locking
+ * =====================
+ *
+ * The locks use a custom scheme that allows to do more operations than are
+ * available fromt current locking primitives. The building blocks are still
+ * rwlock and wait queues.
+ *
+ * Required semantics:
+ *
+ * - reader/writer exclusion
+ * - writer/writer exclusion
+ * - reader/reader sharing
+ * - spinning lock semantics
+ * - blocking lock semantics
+ * - try-lock semantics for readers and writers
+ * - one level nesting, allowing read lock to be taken by the same thread that
+ * already has write lock
+ *
+ * The extent buffer locks (also called tree locks) manage access to eb data
+ * related to the storage in the b-tree (keys, items, but not the individual
+ * members of eb).
+ * We want concurrency of many readers and safe updates. The underlying locking
+ * is done by read-write spinlock and the blocking part is implemented using
+ * counters and wait queues.
+ *
+ * spinning semantics - the low-level rwlock is held so all other threads that
+ * want to take it are spinning on it.
+ *
+ * blocking semantics - the low-level rwlock is not held but the counter
+ * denotes how many times the blocking lock was held;
+ * sleeping is possible
+ *
+ * Write lock always allows only one thread to access the data.
+ *
+ *
+ * Debugging
+ * ---------
+ *
+ * There are additional state counters that are asserted in various contexts,
+ * removed from non-debug build to reduce extent_buffer size and for
+ * performance reasons.
+ *
+ *
+ * Lock nesting
+ * ------------
+ *
+ * A write operation on a tree might indirectly start a look up on the same
+ * tree. This can happen when btrfs_cow_block locks the tree and needs to
+ * lookup free extents.
+ *
+ * btrfs_cow_block
+ * ..
+ * alloc_tree_block_no_bg_flush
+ * btrfs_alloc_tree_block
+ * btrfs_reserve_extent
+ * ..
+ * load_free_space_cache
+ * ..
+ * btrfs_lookup_file_extent
+ * btrfs_search_slot
+ *
+ *
+ * Locking pattern - spinning
+ * --------------------------
+ *
+ * The simple locking scenario, the +--+ denotes the spinning section.
+ *
+ * +- btrfs_tree_lock
+ * | - extent_buffer::rwlock is held
+ * | - no heavy operations should happen, eg. IO, memory allocations, large
+ * | structure traversals
+ * +- btrfs_tree_unock
+*
+*
+ * Locking pattern - blocking
+ * --------------------------
+ *
+ * The blocking write uses the following scheme. The +--+ denotes the spinning
+ * section.
+ *
+ * +- btrfs_tree_lock
+ * |
+ * +- btrfs_set_lock_blocking_write
+ *
+ * - allowed: IO, memory allocations, etc.
+ *
+ * -- btrfs_tree_unlock - note, no explicit unblocking necessary
+ *
+ *
+ * Blocking read is similar.
+ *
+ * +- btrfs_tree_read_lock
+ * |
+ * +- btrfs_set_lock_blocking_read
+ *
+ * - heavy operations allowed
+ *
+ * +- btrfs_tree_read_unlock_blocking
+ * |
+ * +- btrfs_tree_read_unlock
+ *
+ */
+
#ifdef CONFIG_BTRFS_DEBUG
-static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
+static inline void btrfs_assert_spinning_writers_get(struct extent_buffer *eb)
{
WARN_ON(eb->spinning_writers);
eb->spinning_writers++;
}
-static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
+static inline void btrfs_assert_spinning_writers_put(struct extent_buffer *eb)
{
WARN_ON(eb->spinning_writers != 1);
eb->spinning_writers--;
}
-static void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
+static inline void btrfs_assert_no_spinning_writers(struct extent_buffer *eb)
{
WARN_ON(eb->spinning_writers);
}
-static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
+static inline void btrfs_assert_spinning_readers_get(struct extent_buffer *eb)
{
atomic_inc(&eb->spinning_readers);
}
-static void btrfs_assert_spinning_readers_put(struct extent_buffer *eb)
+static inline void btrfs_assert_spinning_readers_put(struct extent_buffer *eb)
{
WARN_ON(atomic_read(&eb->spinning_readers) == 0);
atomic_dec(&eb->spinning_readers);
}
-static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb)
+static inline void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb)
{
atomic_inc(&eb->read_locks);
}
-static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb)
+static inline void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb)
{
atomic_dec(&eb->read_locks);
}
-static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
+static inline void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->read_locks));
}
-static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
+static inline void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb)
{
eb->write_locks++;
}
-static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
+static inline void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb)
{
eb->write_locks--;
}
-void btrfs_assert_tree_locked(struct extent_buffer *eb)
-{
- BUG_ON(!eb->write_locks);
-}
-
#else
static void btrfs_assert_spinning_writers_get(struct extent_buffer *eb) { }
static void btrfs_assert_spinning_writers_put(struct extent_buffer *eb) { }
@@ -81,11 +180,19 @@ static void btrfs_assert_spinning_readers_get(struct extent_buffer *eb) { }
static void btrfs_assert_tree_read_locked(struct extent_buffer *eb) { }
static void btrfs_assert_tree_read_locks_get(struct extent_buffer *eb) { }
static void btrfs_assert_tree_read_locks_put(struct extent_buffer *eb) { }
-void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
static void btrfs_assert_tree_write_locks_get(struct extent_buffer *eb) { }
static void btrfs_assert_tree_write_locks_put(struct extent_buffer *eb) { }
#endif
+/*
+ * Mark already held read lock as blocking. Can be nested in write lock by the
+ * same thread.
+ *
+ * Use when there are potentially long operations ahead so other thread waiting
+ * on the lock will not actively spin but sleep instead.
+ *
+ * The rwlock is released and blocking reader counter is increased.
+ */
void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
{
trace_btrfs_set_lock_blocking_read(eb);
@@ -102,6 +209,14 @@ void btrfs_set_lock_blocking_read(struct extent_buffer *eb)
read_unlock(&eb->lock);
}
+/*
+ * Mark already held write lock as blocking.
+ *
+ * Use when there are potentially long operations ahead so other threads
+ * waiting on the lock will not actively spin but sleep instead.
+ *
+ * The rwlock is released and blocking writers is set.
+ */
void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
{
trace_btrfs_set_lock_blocking_write(eb);
@@ -115,14 +230,19 @@ void btrfs_set_lock_blocking_write(struct extent_buffer *eb)
if (eb->blocking_writers == 0) {
btrfs_assert_spinning_writers_put(eb);
btrfs_assert_tree_locked(eb);
- eb->blocking_writers++;
+ WRITE_ONCE(eb->blocking_writers, 1);
write_unlock(&eb->lock);
}
}
/*
- * take a spinning read lock. This will wait for any blocking
- * writers
+ * Lock the extent buffer for read. Wait for any writers (spinning or blocking).
+ * Can be nested in write lock by the same thread.
+ *
+ * Use when the locked section does only lightweight actions and busy waiting
+ * would be cheaper than making other threads do the wait/wake loop.
+ *
+ * The rwlock is held upon exit.
*/
void btrfs_tree_read_lock(struct extent_buffer *eb)
{
@@ -134,23 +254,24 @@ again:
read_lock(&eb->lock);
BUG_ON(eb->blocking_writers == 0 &&
current->pid == eb->lock_owner);
- if (eb->blocking_writers && current->pid == eb->lock_owner) {
- /*
- * This extent is already write-locked by our thread. We allow
- * an additional read lock to be added because it's for the same
- * thread. btrfs_find_all_roots() depends on this as it may be
- * called on a partly (write-)locked tree.
- */
- BUG_ON(eb->lock_nested);
- eb->lock_nested = true;
- read_unlock(&eb->lock);
- trace_btrfs_tree_read_lock(eb, start_ns);
- return;
- }
if (eb->blocking_writers) {
+ if (current->pid == eb->lock_owner) {
+ /*
+ * This extent is already write-locked by our thread.
+ * We allow an additional read lock to be added because
+ * it's for the same thread. btrfs_find_all_roots()
+ * depends on this as it may be called on a partly
+ * (write-)locked tree.
+ */
+ BUG_ON(eb->lock_nested);
+ eb->lock_nested = true;
+ read_unlock(&eb->lock);
+ trace_btrfs_tree_read_lock(eb, start_ns);
+ return;
+ }
read_unlock(&eb->lock);
wait_event(eb->write_lock_wq,
- eb->blocking_writers == 0);
+ READ_ONCE(eb->blocking_writers) == 0);
goto again;
}
btrfs_assert_tree_read_locks_get(eb);
@@ -159,17 +280,19 @@ again:
}
/*
- * take a spinning read lock.
- * returns 1 if we get the read lock and 0 if we don't
- * this won't wait for blocking writers
+ * Lock extent buffer for read, optimistically expecting that there are no
+ * contending blocking writers. If there are, don't wait.
+ *
+ * Return 1 if the rwlock has been taken, 0 otherwise
*/
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
{
- if (eb->blocking_writers)
+ if (READ_ONCE(eb->blocking_writers))
return 0;
read_lock(&eb->lock);
- if (eb->blocking_writers) {
+ /* Refetch value after lock */
+ if (READ_ONCE(eb->blocking_writers)) {
read_unlock(&eb->lock);
return 0;
}
@@ -180,18 +303,20 @@ int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
}
/*
- * returns 1 if we get the read lock and 0 if we don't
- * this won't wait for blocking writers
+ * Try-lock for read. Don't block or wait for contending writers.
+ *
+ * Retrun 1 if the rwlock has been taken, 0 otherwise
*/
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
- if (eb->blocking_writers)
+ if (READ_ONCE(eb->blocking_writers))
return 0;
if (!read_trylock(&eb->lock))
return 0;
- if (eb->blocking_writers) {
+ /* Refetch value after lock */
+ if (READ_ONCE(eb->blocking_writers)) {
read_unlock(&eb->lock);
return 0;
}
@@ -202,16 +327,19 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
}
/*
- * returns 1 if we get the read lock and 0 if we don't
- * this won't wait for blocking writers or readers
+ * Try-lock for write. May block until the lock is uncontended, but does not
+ * wait until it is free.
+ *
+ * Retrun 1 if the rwlock has been taken, 0 otherwise
*/
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
{
- if (eb->blocking_writers || atomic_read(&eb->blocking_readers))
+ if (READ_ONCE(eb->blocking_writers) || atomic_read(&eb->blocking_readers))
return 0;
write_lock(&eb->lock);
- if (eb->blocking_writers || atomic_read(&eb->blocking_readers)) {
+ /* Refetch value after lock */
+ if (READ_ONCE(eb->blocking_writers) || atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
return 0;
}
@@ -223,7 +351,10 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
}
/*
- * drop a spinning read lock
+ * Release read lock. Must be used only if the lock is in spinning mode. If
+ * the read lock is nested, must pair with read lock before the write unlock.
+ *
+ * The rwlock is not held upon exit.
*/
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
@@ -245,7 +376,11 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb)
}
/*
- * drop a blocking read lock
+ * Release read lock, previously set to blocking by a pairing call to
+ * btrfs_set_lock_blocking_read(). Can be nested in write lock by the same
+ * thread.
+ *
+ * State of rwlock is unchanged, last reader wakes waiting threads.
*/
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
{
@@ -269,8 +404,10 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
}
/*
- * take a spinning write lock. This will wait for both
- * blocking readers or writers
+ * Lock for write. Wait for all blocking and spinning readers and writers. This
+ * starts context where reader lock could be nested by the same thread.
+ *
+ * The rwlock is held for write upon exit.
*/
void btrfs_tree_lock(struct extent_buffer *eb)
{
@@ -282,9 +419,11 @@ void btrfs_tree_lock(struct extent_buffer *eb)
WARN_ON(eb->lock_owner == current->pid);
again:
wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0);
- wait_event(eb->write_lock_wq, eb->blocking_writers == 0);
+ wait_event(eb->write_lock_wq, READ_ONCE(eb->blocking_writers) == 0);
write_lock(&eb->lock);
- if (atomic_read(&eb->blocking_readers) || eb->blocking_writers) {
+ /* Refetch value after lock */
+ if (atomic_read(&eb->blocking_readers) ||
+ READ_ONCE(eb->blocking_writers)) {
write_unlock(&eb->lock);
goto again;
}
@@ -295,10 +434,19 @@ again:
}
/*
- * drop a spinning or a blocking write lock.
+ * Release the write lock, either blocking or spinning (ie. there's no need
+ * for an explicit blocking unlock, like btrfs_tree_read_unlock_blocking).
+ * This also ends the context for nesting, the read lock must have been
+ * released already.
+ *
+ * Tasks blocked and waiting are woken, rwlock is not held upon exit.
*/
void btrfs_tree_unlock(struct extent_buffer *eb)
{
+ /*
+ * This is read both locked and unlocked but always by the same thread
+ * that already owns the lock so we don't need to use READ_ONCE
+ */
int blockers = eb->blocking_writers;
BUG_ON(blockers > 1);
@@ -310,7 +458,8 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
if (blockers) {
btrfs_assert_no_spinning_writers(eb);
- eb->blocking_writers--;
+ /* Unlocked write */
+ WRITE_ONCE(eb->blocking_writers, 0);
/*
* We need to order modifying blocking_writers above with
* actually waking up the sleepers to ensure they see the
@@ -322,3 +471,55 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
write_unlock(&eb->lock);
}
}
+
+/*
+ * Set all locked nodes in the path to blocking locks. This should be done
+ * before scheduling
+ */
+void btrfs_set_path_blocking(struct btrfs_path *p)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+ if (!p->nodes[i] || !p->locks[i])
+ continue;
+ /*
+ * If we currently have a spinning reader or writer lock this
+ * will bump the count of blocking holders and drop the
+ * spinlock.
+ */
+ if (p->locks[i] == BTRFS_READ_LOCK) {
+ btrfs_set_lock_blocking_read(p->nodes[i]);
+ p->locks[i] = BTRFS_READ_LOCK_BLOCKING;
+ } else if (p->locks[i] == BTRFS_WRITE_LOCK) {
+ btrfs_set_lock_blocking_write(p->nodes[i]);
+ p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING;
+ }
+ }
+}
+
+/*
+ * This releases any locks held in the path starting at level and going all the
+ * way up to the root.
+ *
+ * btrfs_search_slot will keep the lock held on higher nodes in a few corner
+ * cases, such as COW of the block at slot zero in the node. This ignores
+ * those rules, and it should only be called when there are no more updates to
+ * be done higher up in the tree.
+ */
+void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
+{
+ int i;
+
+ if (path->keep_locks)
+ return;
+
+ for (i = level; i < BTRFS_MAX_LEVEL; i++) {
+ if (!path->nodes[i])
+ continue;
+ if (!path->locks[i])
+ continue;
+ btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
+ path->locks[i] = 0;
+ }
+}
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index b775a4207ed9..21a285883e89 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -6,6 +6,8 @@
#ifndef BTRFS_LOCKING_H
#define BTRFS_LOCKING_H
+#include "extent_io.h"
+
#define BTRFS_WRITE_LOCK 1
#define BTRFS_READ_LOCK 2
#define BTRFS_WRITE_LOCK_BLOCKING 3
@@ -19,11 +21,20 @@ void btrfs_tree_read_unlock(struct extent_buffer *eb);
void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb);
void btrfs_set_lock_blocking_read(struct extent_buffer *eb);
void btrfs_set_lock_blocking_write(struct extent_buffer *eb);
-void btrfs_assert_tree_locked(struct extent_buffer *eb);
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb);
+#ifdef CONFIG_BTRFS_DEBUG
+static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) {
+ BUG_ON(!eb->write_locks);
+}
+#else
+static inline void btrfs_assert_tree_locked(struct extent_buffer *eb) { }
+#endif
+
+void btrfs_set_path_blocking(struct btrfs_path *p);
+void btrfs_unlock_up_safe(struct btrfs_path *path, int level);
static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
{
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index acad4174f68d..aa9cd11f4b78 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -63,27 +63,7 @@ struct workspace {
static struct workspace_manager wsm;
-static void lzo_init_workspace_manager(void)
-{
- btrfs_init_workspace_manager(&wsm, &btrfs_lzo_compress);
-}
-
-static void lzo_cleanup_workspace_manager(void)
-{
- btrfs_cleanup_workspace_manager(&wsm);
-}
-
-static struct list_head *lzo_get_workspace(unsigned int level)
-{
- return btrfs_get_workspace(&wsm, level);
-}
-
-static void lzo_put_workspace(struct list_head *ws)
-{
- btrfs_put_workspace(&wsm, ws);
-}
-
-static void lzo_free_workspace(struct list_head *ws)
+void lzo_free_workspace(struct list_head *ws)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
@@ -93,7 +73,7 @@ static void lzo_free_workspace(struct list_head *ws)
kfree(workspace);
}
-static struct list_head *lzo_alloc_workspace(unsigned int level)
+struct list_head *lzo_alloc_workspace(unsigned int level)
{
struct workspace *workspace;
@@ -131,13 +111,9 @@ static inline size_t read_compress_length(const char *buf)
return le32_to_cpu(dlen);
}
-static int lzo_compress_pages(struct list_head *ws,
- struct address_space *mapping,
- u64 start,
- struct page **pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out)
+int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
+ u64 start, struct page **pages, unsigned long *out_pages,
+ unsigned long *total_in, unsigned long *total_out)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0;
@@ -303,7 +279,7 @@ out:
return ret;
}
-static int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
+int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0, ret2;
@@ -444,10 +420,9 @@ done:
return ret;
}
-static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen)
+int lzo_decompress(struct list_head *ws, unsigned char *data_in,
+ struct page *dest_page, unsigned long start_byte, size_t srclen,
+ size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
size_t in_len;
@@ -508,15 +483,7 @@ out:
}
const struct btrfs_compress_op btrfs_lzo_compress = {
- .init_workspace_manager = lzo_init_workspace_manager,
- .cleanup_workspace_manager = lzo_cleanup_workspace_manager,
- .get_workspace = lzo_get_workspace,
- .put_workspace = lzo_put_workspace,
- .alloc_workspace = lzo_alloc_workspace,
- .free_workspace = lzo_free_workspace,
- .compress_pages = lzo_compress_pages,
- .decompress_bio = lzo_decompress_bio,
- .decompress = lzo_decompress,
+ .workspace_manager = &wsm,
.max_level = 1,
.default_level = 1,
};
diff --git a/fs/btrfs/misc.h b/fs/btrfs/misc.h
index 7d564924dfeb..72bab64ecf60 100644
--- a/fs/btrfs/misc.h
+++ b/fs/btrfs/misc.h
@@ -47,4 +47,15 @@ static inline u64 div_factor_fine(u64 num, int factor)
return div_u64(num, 100);
}
+/* Copy of is_power_of_two that is 64bit safe */
+static inline bool is_power_of_two_u64(u64 n)
+{
+ return n != 0 && (n & (n - 1)) == 0;
+}
+
+static inline bool has_single_bit_set(u64 n)
+{
+ return is_power_of_two_u64(n);
+}
+
#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 24b6c72b9a59..fb09bc2f8e4d 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -547,7 +547,6 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
spin_unlock(&root->ordered_extent_lock);
btrfs_init_work(&ordered->flush_work,
- btrfs_flush_delalloc_helper,
btrfs_run_ordered_extent_work, NULL, NULL);
list_add_tail(&ordered->work_list, &works);
btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);
@@ -573,12 +572,11 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
return count;
}
-u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
+void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
const u64 range_start, const u64 range_len)
{
struct btrfs_root *root;
struct list_head splice;
- u64 total_done = 0;
u64 done;
INIT_LIST_HEAD(&splice);
@@ -598,7 +596,6 @@ u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
done = btrfs_wait_ordered_extents(root, nr,
range_start, range_len);
btrfs_put_fs_root(root);
- total_done += done;
spin_lock(&fs_info->ordered_root_lock);
if (nr != U64_MAX) {
@@ -608,8 +605,6 @@ u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
list_splice_tail(&splice, &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
mutex_unlock(&fs_info->ordered_operations_mutex);
-
- return total_done;
}
/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 5204171ea962..4eb0319a86d7 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -186,7 +186,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u8 *sum, int len);
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
const u64 range_start, const u64 range_len);
-u64 btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
+void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
const u64 range_start, const u64 range_len);
void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
struct btrfs_inode *inode, u64 start,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 9cb50577d982..873b6b694107 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -266,9 +266,9 @@ void btrfs_print_leaf(struct extent_buffer *l)
struct btrfs_block_group_item);
pr_info(
"\t\tblock group used %llu chunk_objectid %llu flags %llu\n",
- btrfs_disk_block_group_used(l, bi),
- btrfs_disk_block_group_chunk_objectid(l, bi),
- btrfs_disk_block_group_flags(l, bi));
+ btrfs_block_group_used(l, bi),
+ btrfs_block_group_chunk_objectid(l, bi),
+ btrfs_block_group_flags(l, bi));
break;
case BTRFS_CHUNK_ITEM_KEY:
print_chunk(l, btrfs_item_ptr(l, i,
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index 1e664e0b59b8..deb59e7cfcac 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -416,11 +416,11 @@ int btrfs_subvol_inherit_props(struct btrfs_trans_handle *trans,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- parent_inode = btrfs_iget(sb, &key, parent_root, NULL);
+ parent_inode = btrfs_iget(sb, &key, parent_root);
if (IS_ERR(parent_inode))
return PTR_ERR(parent_inode);
- child_inode = btrfs_iget(sb, &key, root, NULL);
+ child_inode = btrfs_iget(sb, &key, root);
if (IS_ERR(child_inode)) {
iput(parent_inode);
return PTR_ERR(child_inode);
@@ -437,8 +437,6 @@ void __init btrfs_props_init(void)
{
int i;
- hash_init(prop_handlers_ht);
-
for (i = 0; i < ARRAY_SIZE(prop_handlers); i++) {
struct prop_handler *p = &prop_handlers[i];
u64 h = btrfs_name_hash(p->xattr_name, strlen(p->xattr_name));
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c4bb69941c77..93aeb2e539a4 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1811,7 +1811,7 @@ static int qgroup_trace_extent_swap(struct btrfs_trans_handle* trans,
btrfs_item_key_to_cpu(dst_path->nodes[dst_level], &key, 0);
/* For src_path */
- extent_buffer_get(src_eb);
+ atomic_inc(&src_eb->refs);
src_path->nodes[root_level] = src_eb;
src_path->slots[root_level] = dst_path->slots[root_level];
src_path->locks[root_level] = 0;
@@ -2067,7 +2067,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
goto out;
}
/* For dst_path */
- extent_buffer_get(dst_eb);
+ atomic_inc(&dst_eb->refs);
dst_path->nodes[level] = dst_eb;
dst_path->slots[level] = 0;
dst_path->locks[level] = 0;
@@ -2126,7 +2126,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
* walk back up the tree (adjusting slot pointers as we go)
* and restart the search process.
*/
- extent_buffer_get(root_eb); /* For path */
+ atomic_inc(&root_eb->refs); /* For path */
path->nodes[root_level] = root_eb;
path->slots[root_level] = 0;
path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
@@ -3277,10 +3277,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
- memset(&fs_info->qgroup_rescan_work, 0,
- sizeof(fs_info->qgroup_rescan_work));
btrfs_init_work(&fs_info->qgroup_rescan_work,
- btrfs_qgroup_rescan_helper,
btrfs_qgroup_rescan_worker, NULL, NULL);
return 0;
}
@@ -3629,7 +3626,7 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
return 0;
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
- trace_qgroup_meta_reserve(root, type, (s64)num_bytes);
+ trace_qgroup_meta_reserve(root, (s64)num_bytes, type);
ret = qgroup_reserve(root, num_bytes, enforce, type);
if (ret < 0)
return ret;
@@ -3676,7 +3673,7 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
*/
num_bytes = sub_root_meta_rsv(root, num_bytes, type);
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
- trace_qgroup_meta_reserve(root, type, -(s64)num_bytes);
+ trace_qgroup_meta_reserve(root, -(s64)num_bytes, type);
btrfs_qgroup_free_refroot(fs_info, root->root_key.objectid,
num_bytes, type);
}
@@ -3826,7 +3823,7 @@ out:
*/
int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *subvol_root,
- struct btrfs_block_group_cache *bg,
+ struct btrfs_block_group *bg,
struct extent_buffer *subvol_parent, int subvol_slot,
struct extent_buffer *reloc_parent, int reloc_slot,
u64 last_snapshot)
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 46ba7bd2961c..236f12224d52 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -408,7 +408,7 @@ void btrfs_qgroup_init_swapped_blocks(
void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root);
int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *subvol_root,
- struct btrfs_block_group_cache *bg,
+ struct btrfs_block_group *bg,
struct extent_buffer *subvol_parent, int subvol_slot,
struct extent_buffer *reloc_parent, int reloc_slot,
u64 last_snapshot);
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 57a2ac721985..a8e53c8e7b01 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -190,7 +190,7 @@ static void scrub_parity_work(struct btrfs_work *work);
static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func)
{
- btrfs_init_work(&rbio->work, btrfs_rmw_helper, work_func, NULL, NULL);
+ btrfs_init_work(&rbio->work, work_func, NULL, NULL);
btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work);
}
@@ -671,8 +671,7 @@ static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
*/
static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
{
- int bucket = rbio_bucket(rbio);
- struct btrfs_stripe_hash *h = rbio->fs_info->stripe_hash_table->table + bucket;
+ struct btrfs_stripe_hash *h;
struct btrfs_raid_bio *cur;
struct btrfs_raid_bio *pending;
unsigned long flags;
@@ -680,64 +679,63 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
struct btrfs_raid_bio *cache_drop = NULL;
int ret = 0;
+ h = rbio->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
+
spin_lock_irqsave(&h->lock, flags);
list_for_each_entry(cur, &h->hash_list, hash_list) {
- if (cur->bbio->raid_map[0] == rbio->bbio->raid_map[0]) {
- spin_lock(&cur->bio_list_lock);
-
- /* can we steal this cached rbio's pages? */
- if (bio_list_empty(&cur->bio_list) &&
- list_empty(&cur->plug_list) &&
- test_bit(RBIO_CACHE_BIT, &cur->flags) &&
- !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
- list_del_init(&cur->hash_list);
- refcount_dec(&cur->refs);
-
- steal_rbio(cur, rbio);
- cache_drop = cur;
- spin_unlock(&cur->bio_list_lock);
+ if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0])
+ continue;
- goto lockit;
- }
+ spin_lock(&cur->bio_list_lock);
- /* can we merge into the lock owner? */
- if (rbio_can_merge(cur, rbio)) {
- merge_rbio(cur, rbio);
- spin_unlock(&cur->bio_list_lock);
- freeit = rbio;
- ret = 1;
- goto out;
- }
+ /* Can we steal this cached rbio's pages? */
+ if (bio_list_empty(&cur->bio_list) &&
+ list_empty(&cur->plug_list) &&
+ test_bit(RBIO_CACHE_BIT, &cur->flags) &&
+ !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
+ list_del_init(&cur->hash_list);
+ refcount_dec(&cur->refs);
+ steal_rbio(cur, rbio);
+ cache_drop = cur;
+ spin_unlock(&cur->bio_list_lock);
- /*
- * we couldn't merge with the running
- * rbio, see if we can merge with the
- * pending ones. We don't have to
- * check for rmw_locked because there
- * is no way they are inside finish_rmw
- * right now
- */
- list_for_each_entry(pending, &cur->plug_list,
- plug_list) {
- if (rbio_can_merge(pending, rbio)) {
- merge_rbio(pending, rbio);
- spin_unlock(&cur->bio_list_lock);
- freeit = rbio;
- ret = 1;
- goto out;
- }
- }
+ goto lockit;
+ }
- /* no merging, put us on the tail of the plug list,
- * our rbio will be started with the currently
- * running rbio unlocks
- */
- list_add_tail(&rbio->plug_list, &cur->plug_list);
+ /* Can we merge into the lock owner? */
+ if (rbio_can_merge(cur, rbio)) {
+ merge_rbio(cur, rbio);
spin_unlock(&cur->bio_list_lock);
+ freeit = rbio;
ret = 1;
goto out;
}
+
+
+ /*
+ * We couldn't merge with the running rbio, see if we can merge
+ * with the pending ones. We don't have to check for rmw_locked
+ * because there is no way they are inside finish_rmw right now
+ */
+ list_for_each_entry(pending, &cur->plug_list, plug_list) {
+ if (rbio_can_merge(pending, rbio)) {
+ merge_rbio(pending, rbio);
+ spin_unlock(&cur->bio_list_lock);
+ freeit = rbio;
+ ret = 1;
+ goto out;
+ }
+ }
+
+ /*
+ * No merging, put us on the tail of the plug list, our rbio
+ * will be started with the currently running rbio unlocks
+ */
+ list_add_tail(&rbio->plug_list, &cur->plug_list);
+ spin_unlock(&cur->bio_list_lock);
+ ret = 1;
+ goto out;
}
lockit:
refcount_inc(&rbio->refs);
@@ -1743,8 +1741,7 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
plug = container_of(cb, struct btrfs_plug_cb, cb);
if (from_schedule) {
- btrfs_init_work(&plug->work, btrfs_rmw_helper,
- unplug_work, NULL, NULL);
+ btrfs_init_work(&plug->work, unplug_work, NULL, NULL);
btrfs_queue_work(plug->info->rmw_workers,
&plug->work);
return;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index ee6f60547a8d..243a2e44526e 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -227,7 +227,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
struct btrfs_fs_info *fs_info = dev->fs_info;
int ret;
struct reada_zone *zone;
- struct btrfs_block_group_cache *cache = NULL;
+ struct btrfs_block_group *cache = NULL;
u64 start;
u64 end;
int i;
@@ -248,8 +248,8 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
if (!cache)
return NULL;
- start = cache->key.objectid;
- end = start + cache->key.offset - 1;
+ start = cache->start;
+ end = start + cache->length - 1;
btrfs_put_block_group(cache);
zone = kzalloc(sizeof(*zone), GFP_KERNEL);
@@ -752,21 +752,19 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
static void reada_start_machine_worker(struct btrfs_work *work)
{
struct reada_machine_work *rmw;
- struct btrfs_fs_info *fs_info;
int old_ioprio;
rmw = container_of(work, struct reada_machine_work, work);
- fs_info = rmw->fs_info;
-
- kfree(rmw);
old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current),
task_nice_ioprio(current));
set_task_ioprio(current, BTRFS_IOPRIO_READA);
- __reada_start_machine(fs_info);
+ __reada_start_machine(rmw->fs_info);
set_task_ioprio(current, old_ioprio);
- atomic_dec(&fs_info->reada_works_cnt);
+ atomic_dec(&rmw->fs_info->reada_works_cnt);
+
+ kfree(rmw);
}
static void __reada_start_machine(struct btrfs_fs_info *fs_info)
@@ -821,8 +819,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
/* FIXME we cannot handle this properly right now */
BUG();
}
- btrfs_init_work(&rmw->work, btrfs_readahead_helper,
- reada_start_machine_worker, NULL, NULL);
+ btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL);
rmw->fs_info = fs_info;
btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index e87cbdad02a3..b57f3618e58e 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -500,7 +500,7 @@ static int process_leaf(struct btrfs_root *root,
struct btrfs_extent_data_ref *dref;
struct btrfs_shared_data_ref *sref;
u32 count;
- int i = 0, tree_block_level = 0, ret;
+ int i = 0, tree_block_level = 0, ret = 0;
struct btrfs_key key;
int nritems = btrfs_header_nritems(leaf);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 00504657b602..d897a8e5e430 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -147,7 +147,7 @@ struct file_extent_cluster {
struct reloc_control {
/* block group to relocate */
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
/* extent tree */
struct btrfs_root *extent_root;
/* inode for moving data */
@@ -1560,11 +1560,10 @@ again:
return NULL;
}
-static int in_block_group(u64 bytenr,
- struct btrfs_block_group_cache *block_group)
+static int in_block_group(u64 bytenr, struct btrfs_block_group *block_group)
{
- if (bytenr >= block_group->key.objectid &&
- bytenr < block_group->key.objectid + block_group->key.offset)
+ if (bytenr >= block_group->start &&
+ bytenr < block_group->start + block_group->length)
return 1;
return 0;
}
@@ -2246,7 +2245,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
level = btrfs_root_level(root_item);
- extent_buffer_get(reloc_root->node);
+ atomic_inc(&reloc_root->node->refs);
path->nodes[level] = reloc_root->node;
path->slots[level] = 0;
} else {
@@ -3195,7 +3194,6 @@ static noinline_for_stack
int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
u64 block_start)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em;
int ret = 0;
@@ -3208,7 +3206,6 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
em->len = end + 1 - start;
em->block_len = em->len;
em->block_start = block_start;
- em->bdev = fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
@@ -3277,6 +3274,8 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (!page) {
btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE, true);
+ btrfs_delalloc_release_extents(BTRFS_I(inode),
+ PAGE_SIZE);
ret = -ENOMEM;
goto out;
}
@@ -3297,7 +3296,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE, true);
btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE, true);
+ PAGE_SIZE);
ret = -EIO;
goto out;
}
@@ -3326,7 +3325,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
btrfs_delalloc_release_metadata(BTRFS_I(inode),
PAGE_SIZE, true);
btrfs_delalloc_release_extents(BTRFS_I(inode),
- PAGE_SIZE, true);
+ PAGE_SIZE);
clear_extent_bits(&BTRFS_I(inode)->io_tree,
page_start, page_end,
@@ -3342,8 +3341,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
put_page(page);
index++;
- btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE,
- false);
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
balance_dirty_pages_ratelimited(inode->i_mapping);
btrfs_throttle(fs_info);
}
@@ -3543,7 +3541,7 @@ static int block_use_full_backref(struct reloc_control *rc,
}
static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *block_group,
+ struct btrfs_block_group *block_group,
struct inode *inode,
u64 ino)
{
@@ -3559,7 +3557,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ inode = btrfs_iget(fs_info->sb, &key, root);
if (IS_ERR(inode))
return -ENOENT;
@@ -3862,7 +3860,7 @@ int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
u64 start, end, last;
int ret;
- last = rc->block_group->key.objectid + rc->block_group->key.offset;
+ last = rc->block_group->start + rc->block_group->length;
while (1) {
cond_resched();
if (rc->search_start >= last) {
@@ -3979,7 +3977,7 @@ int prepare_to_relocate(struct reloc_control *rc)
return -ENOMEM;
memset(&rc->cluster, 0, sizeof(rc->cluster));
- rc->search_start = rc->block_group->key.objectid;
+ rc->search_start = rc->block_group->start;
rc->extents_found = 0;
rc->nodes_relocated = 0;
rc->merging_rsv_size = 0;
@@ -4218,7 +4216,7 @@ out:
*/
static noinline_for_stack
struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *group)
+ struct btrfs_block_group *group)
{
struct inode *inode = NULL;
struct btrfs_trans_handle *trans;
@@ -4245,9 +4243,9 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
key.objectid = objectid;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ inode = btrfs_iget(fs_info->sb, &key, root);
BUG_ON(IS_ERR(inode));
- BTRFS_I(inode)->index_cnt = group->key.objectid;
+ BTRFS_I(inode)->index_cnt = group->start;
err = btrfs_orphan_add(trans, BTRFS_I(inode));
out:
@@ -4282,7 +4280,7 @@ static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)
* Print the block group being relocated
*/
static void describe_relocation(struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *block_group)
+ struct btrfs_block_group *block_group)
{
char buf[128] = {'\0'};
@@ -4290,7 +4288,7 @@ static void describe_relocation(struct btrfs_fs_info *fs_info,
btrfs_info(fs_info,
"relocating block group %llu flags %s",
- block_group->key.objectid, buf);
+ block_group->start, buf);
}
/*
@@ -4298,7 +4296,7 @@ static void describe_relocation(struct btrfs_fs_info *fs_info,
*/
int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
{
- struct btrfs_block_group_cache *bg;
+ struct btrfs_block_group *bg;
struct btrfs_root *extent_root = fs_info->extent_root;
struct reloc_control *rc;
struct inode *inode;
@@ -4325,7 +4323,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
rc->extent_root = extent_root;
rc->block_group = bg;
- ret = btrfs_inc_block_group_ro(rc->block_group);
+ ret = btrfs_inc_block_group_ro(rc->block_group, true);
if (ret) {
err = ret;
goto out;
@@ -4363,8 +4361,8 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
btrfs_wait_block_group_reservations(rc->block_group);
btrfs_wait_nocow_writers(rc->block_group);
btrfs_wait_ordered_roots(fs_info, U64_MAX,
- rc->block_group->key.objectid,
- rc->block_group->key.offset);
+ rc->block_group->start,
+ rc->block_group->length);
while (1) {
mutex_lock(&fs_info->cleaner_mutex);
@@ -4404,7 +4402,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
WARN_ON(rc->block_group->pinned > 0);
WARN_ON(rc->block_group->reserved > 0);
- WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
+ WARN_ON(rc->block_group->used > 0);
out:
if (err && rw)
btrfs_dec_block_group_ro(rc->block_group);
@@ -4687,7 +4685,7 @@ int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
node->new_bytenr != buf->start);
drop_node_buffer(node);
- extent_buffer_get(cow);
+ atomic_inc(&cow->refs);
node->eb = cow;
node->new_bytenr = cow->start;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f7d4e03f4c5d..21de630b0730 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -389,8 +389,7 @@ static struct full_stripe_lock *search_full_stripe_lock(
*
* Caller must ensure @cache is a RAID56 block group.
*/
-static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
- u64 bytenr)
+static u64 get_full_stripe_logical(struct btrfs_block_group *cache, u64 bytenr)
{
u64 ret;
@@ -404,8 +403,8 @@ static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
* round_down() can only handle power of 2, while RAID56 full
* stripe length can be 64KiB * n, so we need to manually round down.
*/
- ret = div64_u64(bytenr - cache->key.objectid, cache->full_stripe_len) *
- cache->full_stripe_len + cache->key.objectid;
+ ret = div64_u64(bytenr - cache->start, cache->full_stripe_len) *
+ cache->full_stripe_len + cache->start;
return ret;
}
@@ -423,7 +422,7 @@ static u64 get_full_stripe_logical(struct btrfs_block_group_cache *cache,
static int lock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
bool *locked_ret)
{
- struct btrfs_block_group_cache *bg_cache;
+ struct btrfs_block_group *bg_cache;
struct btrfs_full_stripe_locks_tree *locks_root;
struct full_stripe_lock *existing;
u64 fstripe_start;
@@ -470,7 +469,7 @@ out:
static int unlock_full_stripe(struct btrfs_fs_info *fs_info, u64 bytenr,
bool locked)
{
- struct btrfs_block_group_cache *bg_cache;
+ struct btrfs_block_group *bg_cache;
struct btrfs_full_stripe_locks_tree *locks_root;
struct full_stripe_lock *fstripe_lock;
u64 fstripe_start;
@@ -598,8 +597,8 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
sbio->index = i;
sbio->sctx = sctx;
sbio->page_count = 0;
- btrfs_init_work(&sbio->work, btrfs_scrub_helper,
- scrub_bio_end_io_worker, NULL, NULL);
+ btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, NULL,
+ NULL);
if (i != SCRUB_BIOS_PER_SCTX - 1)
sctx->bios[i]->next_free = i + 1;
@@ -1720,8 +1719,7 @@ static void scrub_wr_bio_end_io(struct bio *bio)
sbio->status = bio->bi_status;
sbio->bio = bio;
- btrfs_init_work(&sbio->work, btrfs_scrubwrc_helper,
- scrub_wr_bio_end_io_worker, NULL, NULL);
+ btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
}
@@ -2149,14 +2147,13 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work)
scrub_write_block_to_dev_replace(sblock);
}
- scrub_block_put(sblock);
-
if (sctx->is_dev_replace && sctx->flush_all_writes) {
mutex_lock(&sctx->wr_lock);
scrub_wr_submit(sctx);
mutex_unlock(&sctx->wr_lock);
}
+ scrub_block_put(sblock);
scrub_pending_bio_dec(sctx);
}
@@ -2204,8 +2201,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock)
raid56_add_scrub_pages(rbio, spage->page, spage->logical);
}
- btrfs_init_work(&sblock->work, btrfs_scrub_helper,
- scrub_missing_raid56_worker, NULL, NULL);
+ btrfs_init_work(&sblock->work, scrub_missing_raid56_worker, NULL, NULL);
scrub_block_get(sblock);
scrub_pending_bio_inc(sctx);
raid56_submit_missing_rbio(rbio);
@@ -2743,8 +2739,8 @@ static void scrub_parity_bio_endio(struct bio *bio)
bio_put(bio);
- btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
- scrub_parity_bio_endio_worker, NULL, NULL);
+ btrfs_init_work(&sparity->work, scrub_parity_bio_endio_worker, NULL,
+ NULL);
btrfs_queue_work(fs_info->scrub_parity_workers, &sparity->work);
}
@@ -3420,7 +3416,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx,
struct btrfs_device *scrub_dev,
u64 chunk_offset, u64 length,
u64 dev_offset,
- struct btrfs_block_group_cache *cache)
+ struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct extent_map_tree *map_tree = &fs_info->mapping_tree;
@@ -3484,7 +3480,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
struct extent_buffer *l;
struct btrfs_key key;
struct btrfs_key found_key;
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
path = btrfs_alloc_path();
@@ -3563,46 +3559,26 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* -> btrfs_scrub_pause()
*/
scrub_pause_on(fs_info);
- ret = btrfs_inc_block_group_ro(cache);
- if (!ret && sctx->is_dev_replace) {
- /*
- * If we are doing a device replace wait for any tasks
- * that started delalloc right before we set the block
- * group to RO mode, as they might have just allocated
- * an extent from it or decided they could do a nocow
- * write. And if any such tasks did that, wait for their
- * ordered extents to complete and then commit the
- * current transaction, so that we can later see the new
- * extent items in the extent tree - the ordered extents
- * create delayed data references (for cow writes) when
- * they complete, which will be run and insert the
- * corresponding extent items into the extent tree when
- * we commit the transaction they used when running
- * inode.c:btrfs_finish_ordered_io(). We later use
- * the commit root of the extent tree to find extents
- * to copy from the srcdev into the tgtdev, and we don't
- * want to miss any new extents.
- */
- btrfs_wait_block_group_reservations(cache);
- btrfs_wait_nocow_writers(cache);
- ret = btrfs_wait_ordered_roots(fs_info, U64_MAX,
- cache->key.objectid,
- cache->key.offset);
- if (ret > 0) {
- struct btrfs_trans_handle *trans;
-
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- ret = PTR_ERR(trans);
- else
- ret = btrfs_commit_transaction(trans);
- if (ret) {
- scrub_pause_off(fs_info);
- btrfs_put_block_group(cache);
- break;
- }
- }
- }
+
+ /*
+ * Don't do chunk preallocation for scrub.
+ *
+ * This is especially important for SYSTEM bgs, or we can hit
+ * -EFBIG from btrfs_finish_chunk_alloc() like:
+ * 1. The only SYSTEM bg is marked RO.
+ * Since SYSTEM bg is small, that's pretty common.
+ * 2. New SYSTEM bg will be allocated
+ * Due to regular version will allocate new chunk.
+ * 3. New SYSTEM bg is empty and will get cleaned up
+ * Before cleanup really happens, it's marked RO again.
+ * 4. Empty SYSTEM bg get scrubbed
+ * We go back to 2.
+ *
+ * This can easily boost the amount of SYSTEM chunks if cleaner
+ * thread can't be triggered fast enough, and use up all space
+ * of btrfs_super_block::sys_chunk_array
+ */
+ ret = btrfs_inc_block_group_ro(cache, false);
scrub_pause_off(fs_info);
if (ret == 0) {
@@ -3623,7 +3599,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
break;
}
- down_write(&fs_info->dev_replace.rwsem);
+ down_write(&dev_replace->rwsem);
dev_replace->cursor_right = found_key.offset + length;
dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1;
@@ -3664,10 +3640,10 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
scrub_pause_off(fs_info);
- down_write(&fs_info->dev_replace.rwsem);
+ down_write(&dev_replace->rwsem);
dev_replace->cursor_left = dev_replace->cursor_right;
dev_replace->item_needs_writeback = 1;
- up_write(&fs_info->dev_replace.rwsem);
+ up_write(&dev_replace->rwsem);
if (ro_set)
btrfs_dec_block_group_ro(cache);
@@ -3681,7 +3657,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
*/
spin_lock(&cache->lock);
if (!cache->removed && !cache->ro && cache->reserved == 0 &&
- btrfs_block_group_used(&cache->item) == 0) {
+ cache->used == 0) {
spin_unlock(&cache->lock);
btrfs_mark_bg_unused(cache);
} else {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f3215028235c..ae2db5eb1549 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -25,6 +25,14 @@
#include "compression.h"
/*
+ * Maximum number of references an extent can have in order for us to attempt to
+ * issue clone operations instead of write operations. This currently exists to
+ * avoid hitting limitations of the backreference walking code (taking a lot of
+ * time and using too much memory for extents with large number of references).
+ */
+#define SEND_MAX_EXTENT_REFS 64
+
+/*
* A fs_path is a helper to dynamically build path names with unknown size.
* It reallocates the internal buffer on demand.
* It allows fast adding of path elements on the right side (normal path) and
@@ -1248,12 +1256,20 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
*/
if (found->root == bctx->sctx->send_root) {
/*
- * TODO for the moment we don't accept clones from the inode
- * that is currently send. We may change this when
- * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same
- * file.
+ * If the source inode was not yet processed we can't issue a
+ * clone operation, as the source extent does not exist yet at
+ * the destination of the stream.
*/
- if (ino >= bctx->cur_objectid)
+ if (ino > bctx->cur_objectid)
+ return 0;
+ /*
+ * We clone from the inode currently being sent as long as the
+ * source extent is already processed, otherwise we could try
+ * to clone from an extent that does not exist yet at the
+ * destination of the stream.
+ */
+ if (ino == bctx->cur_objectid &&
+ offset >= bctx->sctx->cur_inode_next_write_offset)
return 0;
}
@@ -1302,6 +1318,7 @@ static int find_extent_clone(struct send_ctx *sctx,
struct clone_root *cur_clone_root;
struct btrfs_key found_key;
struct btrfs_path *tmp_path;
+ struct btrfs_extent_item *ei;
int compressed;
u32 i;
@@ -1349,7 +1366,6 @@ static int find_extent_clone(struct send_ctx *sctx,
ret = extent_from_logical(fs_info, disk_byte, tmp_path,
&found_key, &flags);
up_read(&fs_info->commit_root_sem);
- btrfs_release_path(tmp_path);
if (ret < 0)
goto out;
@@ -1358,6 +1374,21 @@ static int find_extent_clone(struct send_ctx *sctx,
goto out;
}
+ ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0],
+ struct btrfs_extent_item);
+ /*
+ * Backreference walking (iterate_extent_inodes() below) is currently
+ * too expensive when an extent has a large number of references, both
+ * in time spent and used memory. So for now just fallback to write
+ * operations instead of clone operations when an extent has more than
+ * a certain amount of references.
+ */
+ if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) {
+ ret = -ENOENT;
+ goto out;
+ }
+ btrfs_release_path(tmp_path);
+
/*
* Setup the clone roots.
*/
@@ -4779,7 +4810,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ inode = btrfs_iget(fs_info->sb, &key, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -5085,7 +5116,7 @@ static int clone_range(struct send_ctx *sctx,
struct btrfs_path *path;
struct btrfs_key key;
int ret;
- u64 clone_src_i_size;
+ u64 clone_src_i_size = 0;
/*
* Prevent cloning from a zero offset with a length matching the sector
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 98dc092a905e..f09aa6ee9113 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -10,7 +10,7 @@
#include "transaction.h"
#include "block-group.h"
-u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
+u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
bool may_use_included)
{
ASSERT(s_info);
@@ -58,7 +58,6 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
spin_lock_init(&space_info->lock);
space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
- init_waitqueue_head(&space_info->wait);
INIT_LIST_HEAD(&space_info->ro_bgs);
INIT_LIST_HEAD(&space_info->tickets);
INIT_LIST_HEAD(&space_info->priority_tickets);
@@ -285,7 +284,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *info, u64 bytes,
int dump_block_groups)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
int index = 0;
spin_lock(&info->lock);
@@ -301,8 +300,7 @@ again:
spin_lock(&cache->lock);
btrfs_info(fs_info,
"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s",
- cache->key.objectid, cache->key.offset,
- btrfs_block_group_used(&cache->item), cache->pinned,
+ cache->start, cache->length, cache->used, cache->pinned,
cache->reserved, cache->ro ? "[readonly]" : "");
btrfs_dump_free_space(cache, bytes);
spin_unlock(&cache->lock);
@@ -893,6 +891,15 @@ static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
while (ticket->bytes > 0 && ticket->error == 0) {
ret = prepare_to_wait_event(&ticket->wait, &wait, TASK_KILLABLE);
if (ret) {
+ /*
+ * Delete us from the list. After we unlock the space
+ * info, we don't want the async reclaim job to reserve
+ * space for this ticket. If that would happen, then the
+ * ticket's task would not known that space was reserved
+ * despite getting an error, resulting in a space leak
+ * (bytes_may_use counter of our space_info).
+ */
+ list_del_init(&ticket->list);
ticket->error = -EINTR;
break;
}
@@ -945,12 +952,24 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
ret = ticket->error;
if (ticket->bytes || ticket->error) {
+ /*
+ * Need to delete here for priority tickets. For regular tickets
+ * either the async reclaim job deletes the ticket from the list
+ * or we delete it ourselves at wait_reserve_ticket().
+ */
list_del_init(&ticket->list);
if (!ret)
ret = -ENOSPC;
}
spin_unlock(&space_info->lock);
ASSERT(list_empty(&ticket->list));
+ /*
+ * Check that we can't have an error set if the reservation succeeded,
+ * as that would confuse tasks and lead them to error out without
+ * releasing reserved space (if an error happens the expectation is that
+ * space wasn't reserved at all).
+ */
+ ASSERT(!(ticket->bytes == 0 && ticket->error));
return ret;
}
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index 8867e84aa33d..1a349e3f9cc1 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -63,7 +63,6 @@ struct btrfs_space_info {
struct rw_semaphore groups_sem;
/* for block groups in our same type */
struct list_head block_groups[BTRFS_NR_RAID_TYPES];
- wait_queue_head_t wait;
struct kobject kobj;
struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
@@ -116,7 +115,7 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
struct btrfs_space_info **space_info);
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
u64 flags);
-u64 btrfs_space_info_used(struct btrfs_space_info *s_info,
+u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
bool may_use_included);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1b151af25772..a98c3c71fc54 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,7 +66,7 @@ static struct file_system_type btrfs_root_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
-const char *btrfs_decode_error(int errno)
+const char * __attribute_const__ btrfs_decode_error(int errno)
{
char *errstr = "unknown";
@@ -187,7 +187,7 @@ static struct ratelimit_state printk_limits[] = {
RATELIMIT_STATE_INIT(printk_limits[7], DEFAULT_RATELIMIT_INTERVAL, 100),
};
-void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
+void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0";
struct va_format vaf;
@@ -1219,7 +1219,7 @@ static int btrfs_fill_super(struct super_block *sb,
key.objectid = BTRFS_FIRST_FREE_OBJECTID;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- inode = btrfs_iget(sb, &key, fs_info->fs_root, NULL);
+ inode = btrfs_iget(sb, &key, fs_info->fs_root);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
goto fail_close;
@@ -1669,7 +1669,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
- btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size);
@@ -1936,6 +1935,10 @@ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
num_stripes = nr_devices;
else if (type & BTRFS_BLOCK_GROUP_RAID1)
num_stripes = 2;
+ else if (type & BTRFS_BLOCK_GROUP_RAID1C3)
+ num_stripes = 3;
+ else if (type & BTRFS_BLOCK_GROUP_RAID1C4)
+ num_stripes = 4;
else if (type & BTRFS_BLOCK_GROUP_RAID10)
num_stripes = 4;
@@ -2022,7 +2025,6 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
struct btrfs_super_block *disk_super = fs_info->super_copy;
- struct list_head *head = &fs_info->space_info;
struct btrfs_space_info *found;
u64 total_used = 0;
u64 total_free_data = 0;
@@ -2036,7 +2038,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
int mixed = 0;
rcu_read_lock();
- list_for_each_entry_rcu(found, head, list) {
+ list_for_each_entry_rcu(found, &fs_info->space_info, list) {
if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
int i;
@@ -2360,10 +2362,14 @@ static int __init init_btrfs_fs(void)
if (err)
goto free_cachep;
- err = extent_map_init();
+ err = extent_state_cache_init();
if (err)
goto free_extent_io;
+ err = extent_map_init();
+ if (err)
+ goto free_extent_state_cache;
+
err = ordered_data_init();
if (err)
goto free_extent_map;
@@ -2422,6 +2428,8 @@ free_ordered_data:
ordered_data_exit();
free_extent_map:
extent_map_exit();
+free_extent_state_cache:
+ extent_state_cache_exit();
free_extent_io:
extent_io_exit();
free_cachep:
@@ -2442,6 +2450,7 @@ static void __exit exit_btrfs_fs(void)
btrfs_prelim_ref_exit();
ordered_data_exit();
extent_map_exit();
+ extent_state_cache_exit();
extent_io_exit();
btrfs_interface_exit();
btrfs_end_io_wq_exit();
@@ -2456,3 +2465,6 @@ module_exit(exit_btrfs_fs)
MODULE_LICENSE("GPL");
MODULE_SOFTDEP("pre: crc32c");
+MODULE_SOFTDEP("pre: xxhash64");
+MODULE_SOFTDEP("pre: sha256");
+MODULE_SOFTDEP("pre: blake2b-256");
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index f6d3c80f2e28..5ebbe8a5ee76 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -9,6 +9,7 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/bug.h>
+#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
@@ -258,6 +259,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
+BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(mixed_backref),
@@ -272,6 +274,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(no_holes),
BTRFS_FEAT_ATTR_PTR(metadata_uuid),
BTRFS_FEAT_ATTR_PTR(free_space_tree),
+ BTRFS_FEAT_ATTR_PTR(raid1c34),
NULL
};
@@ -295,8 +298,30 @@ static ssize_t rmdir_subvol_show(struct kobject *kobj,
}
BTRFS_ATTR(static_feature, rmdir_subvol, rmdir_subvol_show);
+static ssize_t supported_checksums_show(struct kobject *kobj,
+ struct kobj_attribute *a, char *buf)
+{
+ ssize_t ret = 0;
+ int i;
+
+ for (i = 0; i < btrfs_get_num_csums(); i++) {
+ /*
+ * This "trick" only works as long as 'enum btrfs_csum_type' has
+ * no holes in it
+ */
+ ret += snprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
+ (i == 0 ? "" : " "), btrfs_super_csum_name(i));
+
+ }
+
+ ret += snprintf(buf + ret, PAGE_SIZE - ret, "\n");
+ return ret;
+}
+BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show);
+
static struct attribute *btrfs_supported_static_feature_attrs[] = {
BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
+ BTRFS_ATTR_PTR(static_feature, supported_checksums),
NULL
};
@@ -372,16 +397,16 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
{
struct btrfs_space_info *sinfo = to_space_info(kobj->parent);
- struct btrfs_block_group_cache *block_group;
+ struct btrfs_block_group *block_group;
int index = btrfs_bg_flags_to_raid_index(to_raid_kobj(kobj)->flags);
u64 val = 0;
down_read(&sinfo->groups_sem);
list_for_each_entry(block_group, &sinfo->block_groups[index], list) {
if (&attr->attr == BTRFS_ATTR_PTR(raid, total_bytes))
- val += block_group->key.offset;
+ val += block_group->length;
else
- val += btrfs_block_group_used(&block_group->item);
+ val += block_group->used;
}
up_read(&sinfo->groups_sem);
return snprintf(buf, PAGE_SIZE, "%llu\n", val);
@@ -604,6 +629,19 @@ static ssize_t btrfs_metadata_uuid_show(struct kobject *kobj,
BTRFS_ATTR(, metadata_uuid, btrfs_metadata_uuid_show);
+static ssize_t btrfs_checksum_show(struct kobject *kobj,
+ struct kobj_attribute *a, char *buf)
+{
+ struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+ u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
+
+ return snprintf(buf, PAGE_SIZE, "%s (%s)\n",
+ btrfs_super_csum_name(csum_type),
+ crypto_shash_driver_name(fs_info->csum_shash));
+}
+
+BTRFS_ATTR(, checksum, btrfs_checksum_show);
+
static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, label),
BTRFS_ATTR_PTR(, nodesize),
@@ -611,6 +649,7 @@ static const struct attribute *btrfs_attrs[] = {
BTRFS_ATTR_PTR(, clone_alignment),
BTRFS_ATTR_PTR(, quota_override),
BTRFS_ATTR_PTR(, metadata_uuid),
+ BTRFS_ATTR_PTR(, checksum),
NULL,
};
@@ -822,7 +861,7 @@ static void init_feature_attrs(void)
* Create a sysfs entry for a given block group type at path
* /sys/fs/btrfs/UUID/allocation/data/TYPE
*/
-void btrfs_sysfs_add_block_group_type(struct btrfs_block_group_cache *cache)
+void btrfs_sysfs_add_block_group_type(struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_space_info *space_info = cache->space_info;
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h
index 610e9c36a94c..e10c3adfc30f 100644
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -32,7 +32,7 @@ int __init btrfs_init_sysfs(void);
void __cold btrfs_exit_sysfs(void);
int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info);
void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
-void btrfs_sysfs_add_block_group_type(struct btrfs_block_group_cache *cache);
+void btrfs_sysfs_add_block_group_type(struct btrfs_block_group *cache);
int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info);
void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info);
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 99fe9bf3fdac..a7aca4141788 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -202,11 +202,11 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
kfree(root);
}
-struct btrfs_block_group_cache *
+struct btrfs_block_group *
btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info,
unsigned long length)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (!cache)
@@ -218,9 +218,8 @@ btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info,
return NULL;
}
- cache->key.objectid = 0;
- cache->key.offset = length;
- cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ cache->start = 0;
+ cache->length = length;
cache->full_stripe_len = fs_info->sectorsize;
cache->fs_info = fs_info;
@@ -233,7 +232,7 @@ btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info,
return cache;
}
-void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache)
+void btrfs_free_dummy_block_group(struct btrfs_block_group *cache)
{
if (!cache)
return;
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
index ee277bbd939b..9e52527357d8 100644
--- a/fs/btrfs/tests/btrfs-tests.h
+++ b/fs/btrfs/tests/btrfs-tests.h
@@ -41,9 +41,9 @@ struct inode *btrfs_new_test_inode(void);
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize);
void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info);
void btrfs_free_dummy_root(struct btrfs_root *root);
-struct btrfs_block_group_cache *
+struct btrfs_block_group *
btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info, unsigned long length);
-void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
+void btrfs_free_dummy_block_group(struct btrfs_block_group *cache);
void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
#else
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index 43ec7060fcd2..aebdf23f0cdd 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -17,7 +17,7 @@
* entry and remove space from either end and the middle, and make sure we can
* remove space that covers adjacent extent entries.
*/
-static int test_extents(struct btrfs_block_group_cache *cache)
+static int test_extents(struct btrfs_block_group *cache)
{
int ret = 0;
@@ -87,8 +87,7 @@ static int test_extents(struct btrfs_block_group_cache *cache)
return 0;
}
-static int test_bitmaps(struct btrfs_block_group_cache *cache,
- u32 sectorsize)
+static int test_bitmaps(struct btrfs_block_group *cache, u32 sectorsize)
{
u64 next_bitmap_offset;
int ret;
@@ -156,7 +155,7 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache,
}
/* This is the high grade jackassery */
-static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache,
+static int test_bitmaps_and_extents(struct btrfs_block_group *cache,
u32 sectorsize)
{
u64 bitmap_offset = (u64)(BITS_PER_BITMAP * sectorsize);
@@ -331,7 +330,7 @@ static bool test_use_bitmap(struct btrfs_free_space_ctl *ctl,
/* Used by test_steal_space_from_bitmap_to_extent(). */
static int
-check_num_extents_and_bitmaps(const struct btrfs_block_group_cache *cache,
+check_num_extents_and_bitmaps(const struct btrfs_block_group *cache,
const int num_extents,
const int num_bitmaps)
{
@@ -351,7 +350,7 @@ check_num_extents_and_bitmaps(const struct btrfs_block_group_cache *cache,
}
/* Used by test_steal_space_from_bitmap_to_extent(). */
-static int check_cache_empty(struct btrfs_block_group_cache *cache)
+static int check_cache_empty(struct btrfs_block_group *cache)
{
u64 offset;
u64 max_extent_size;
@@ -393,7 +392,7 @@ static int check_cache_empty(struct btrfs_block_group_cache *cache)
* requests.
*/
static int
-test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
+test_steal_space_from_bitmap_to_extent(struct btrfs_block_group *cache,
u32 sectorsize)
{
int ret;
@@ -829,7 +828,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache,
int btrfs_test_free_space_cache(u32 sectorsize, u32 nodesize)
{
struct btrfs_fs_info *fs_info;
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
struct btrfs_root *root = NULL;
int ret = -ENOMEM;
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index bc92df977630..1a846bf6e197 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -18,7 +18,7 @@ struct free_space_extent {
static int __check_free_space_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
const struct free_space_extent * const extents,
unsigned int num_extents)
@@ -48,7 +48,7 @@ static int __check_free_space_extents(struct btrfs_trans_handle *trans,
if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
if (path->slots[0] != 0)
goto invalid;
- end = cache->key.objectid + cache->key.offset;
+ end = cache->start + cache->length;
i = 0;
while (++path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
@@ -107,7 +107,7 @@ invalid:
static int check_free_space_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
const struct free_space_extent * const extents,
unsigned int num_extents)
@@ -150,12 +150,12 @@ static int check_free_space_extents(struct btrfs_trans_handle *trans,
static int test_empty_block_group(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
u32 alignment)
{
const struct free_space_extent extents[] = {
- {cache->key.objectid, cache->key.offset},
+ {cache->start, cache->length},
};
return check_free_space_extents(trans, fs_info, cache, path,
@@ -164,7 +164,7 @@ static int test_empty_block_group(struct btrfs_trans_handle *trans,
static int test_remove_all(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
u32 alignment)
{
@@ -172,8 +172,8 @@ static int test_remove_all(struct btrfs_trans_handle *trans,
int ret;
ret = __remove_from_free_space_tree(trans, cache, path,
- cache->key.objectid,
- cache->key.offset);
+ cache->start,
+ cache->length);
if (ret) {
test_err("could not remove free space");
return ret;
@@ -185,18 +185,17 @@ static int test_remove_all(struct btrfs_trans_handle *trans,
static int test_remove_beginning(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
u32 alignment)
{
const struct free_space_extent extents[] = {
- {cache->key.objectid + alignment,
- cache->key.offset - alignment},
+ {cache->start + alignment, cache->length - alignment},
};
int ret;
ret = __remove_from_free_space_tree(trans, cache, path,
- cache->key.objectid, alignment);
+ cache->start, alignment);
if (ret) {
test_err("could not remove free space");
return ret;
@@ -209,19 +208,18 @@ static int test_remove_beginning(struct btrfs_trans_handle *trans,
static int test_remove_end(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
u32 alignment)
{
const struct free_space_extent extents[] = {
- {cache->key.objectid, cache->key.offset - alignment},
+ {cache->start, cache->length - alignment},
};
int ret;
ret = __remove_from_free_space_tree(trans, cache, path,
- cache->key.objectid +
- cache->key.offset - alignment,
- alignment);
+ cache->start + cache->length - alignment,
+ alignment);
if (ret) {
test_err("could not remove free space");
return ret;
@@ -233,19 +231,18 @@ static int test_remove_end(struct btrfs_trans_handle *trans,
static int test_remove_middle(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
u32 alignment)
{
const struct free_space_extent extents[] = {
- {cache->key.objectid, alignment},
- {cache->key.objectid + 2 * alignment,
- cache->key.offset - 2 * alignment},
+ {cache->start, alignment},
+ {cache->start + 2 * alignment, cache->length - 2 * alignment},
};
int ret;
ret = __remove_from_free_space_tree(trans, cache, path,
- cache->key.objectid + alignment,
+ cache->start + alignment,
alignment);
if (ret) {
test_err("could not remove free space");
@@ -258,24 +255,23 @@ static int test_remove_middle(struct btrfs_trans_handle *trans,
static int test_merge_left(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
u32 alignment)
{
const struct free_space_extent extents[] = {
- {cache->key.objectid, 2 * alignment},
+ {cache->start, 2 * alignment},
};
int ret;
ret = __remove_from_free_space_tree(trans, cache, path,
- cache->key.objectid,
- cache->key.offset);
+ cache->start, cache->length);
if (ret) {
test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+ ret = __add_to_free_space_tree(trans, cache, path, cache->start,
alignment);
if (ret) {
test_err("could not add free space");
@@ -283,7 +279,7 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
}
ret = __add_to_free_space_tree(trans, cache, path,
- cache->key.objectid + alignment,
+ cache->start + alignment,
alignment);
if (ret) {
test_err("could not add free space");
@@ -296,25 +292,24 @@ static int test_merge_left(struct btrfs_trans_handle *trans,
static int test_merge_right(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
u32 alignment)
{
const struct free_space_extent extents[] = {
- {cache->key.objectid + alignment, 2 * alignment},
+ {cache->start + alignment, 2 * alignment},
};
int ret;
ret = __remove_from_free_space_tree(trans, cache, path,
- cache->key.objectid,
- cache->key.offset);
+ cache->start, cache->length);
if (ret) {
test_err("could not remove free space");
return ret;
}
ret = __add_to_free_space_tree(trans, cache, path,
- cache->key.objectid + 2 * alignment,
+ cache->start + 2 * alignment,
alignment);
if (ret) {
test_err("could not add free space");
@@ -322,7 +317,7 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
}
ret = __add_to_free_space_tree(trans, cache, path,
- cache->key.objectid + alignment,
+ cache->start + alignment,
alignment);
if (ret) {
test_err("could not add free space");
@@ -335,24 +330,23 @@ static int test_merge_right(struct btrfs_trans_handle *trans,
static int test_merge_both(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
u32 alignment)
{
const struct free_space_extent extents[] = {
- {cache->key.objectid, 3 * alignment},
+ {cache->start, 3 * alignment},
};
int ret;
ret = __remove_from_free_space_tree(trans, cache, path,
- cache->key.objectid,
- cache->key.offset);
+ cache->start, cache->length);
if (ret) {
test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+ ret = __add_to_free_space_tree(trans, cache, path, cache->start,
alignment);
if (ret) {
test_err("could not add free space");
@@ -360,16 +354,14 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
}
ret = __add_to_free_space_tree(trans, cache, path,
- cache->key.objectid + 2 * alignment,
- alignment);
+ cache->start + 2 * alignment, alignment);
if (ret) {
test_err("could not add free space");
return ret;
}
ret = __add_to_free_space_tree(trans, cache, path,
- cache->key.objectid + alignment,
- alignment);
+ cache->start + alignment, alignment);
if (ret) {
test_err("could not add free space");
return ret;
@@ -381,26 +373,25 @@ static int test_merge_both(struct btrfs_trans_handle *trans,
static int test_merge_none(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
- struct btrfs_block_group_cache *cache,
+ struct btrfs_block_group *cache,
struct btrfs_path *path,
u32 alignment)
{
const struct free_space_extent extents[] = {
- {cache->key.objectid, alignment},
- {cache->key.objectid + 2 * alignment, alignment},
- {cache->key.objectid + 4 * alignment, alignment},
+ {cache->start, alignment},
+ {cache->start + 2 * alignment, alignment},
+ {cache->start + 4 * alignment, alignment},
};
int ret;
ret = __remove_from_free_space_tree(trans, cache, path,
- cache->key.objectid,
- cache->key.offset);
+ cache->start, cache->length);
if (ret) {
test_err("could not remove free space");
return ret;
}
- ret = __add_to_free_space_tree(trans, cache, path, cache->key.objectid,
+ ret = __add_to_free_space_tree(trans, cache, path, cache->start,
alignment);
if (ret) {
test_err("could not add free space");
@@ -408,16 +399,14 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
}
ret = __add_to_free_space_tree(trans, cache, path,
- cache->key.objectid + 4 * alignment,
- alignment);
+ cache->start + 4 * alignment, alignment);
if (ret) {
test_err("could not add free space");
return ret;
}
ret = __add_to_free_space_tree(trans, cache, path,
- cache->key.objectid + 2 * alignment,
- alignment);
+ cache->start + 2 * alignment, alignment);
if (ret) {
test_err("could not add free space");
return ret;
@@ -429,7 +418,7 @@ static int test_merge_none(struct btrfs_trans_handle *trans,
typedef int (*test_func_t)(struct btrfs_trans_handle *,
struct btrfs_fs_info *,
- struct btrfs_block_group_cache *,
+ struct btrfs_block_group *,
struct btrfs_path *,
u32 alignment);
@@ -438,7 +427,7 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
{
struct btrfs_fs_info *fs_info;
struct btrfs_root *root = NULL;
- struct btrfs_block_group_cache *cache = NULL;
+ struct btrfs_block_group *cache = NULL;
struct btrfs_trans_handle trans;
struct btrfs_path *path = NULL;
int ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8624bdee8c5b..cfc08ef9b876 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -24,9 +24,79 @@
#define BTRFS_ROOT_TRANS_TAG 0
+/*
+ * Transaction states and transitions
+ *
+ * No running transaction (fs tree blocks are not modified)
+ * |
+ * | To next stage:
+ * | Call start_transaction() variants. Except btrfs_join_transaction_nostart().
+ * V
+ * Transaction N [[TRANS_STATE_RUNNING]]
+ * |
+ * | New trans handles can be attached to transaction N by calling all
+ * | start_transaction() variants.
+ * |
+ * | To next stage:
+ * | Call btrfs_commit_transaction() on any trans handle attached to
+ * | transaction N
+ * V
+ * Transaction N [[TRANS_STATE_COMMIT_START]]
+ * |
+ * | Will wait for previous running transaction to completely finish if there
+ * | is one
+ * |
+ * | Then one of the following happes:
+ * | - Wait for all other trans handle holders to release.
+ * | The btrfs_commit_transaction() caller will do the commit work.
+ * | - Wait for current transaction to be committed by others.
+ * | Other btrfs_commit_transaction() caller will do the commit work.
+ * |
+ * | At this stage, only btrfs_join_transaction*() variants can attach
+ * | to this running transaction.
+ * | All other variants will wait for current one to finish and attach to
+ * | transaction N+1.
+ * |
+ * | To next stage:
+ * | Caller is chosen to commit transaction N, and all other trans handle
+ * | haven been released.
+ * V
+ * Transaction N [[TRANS_STATE_COMMIT_DOING]]
+ * |
+ * | The heavy lifting transaction work is started.
+ * | From running delayed refs (modifying extent tree) to creating pending
+ * | snapshots, running qgroups.
+ * | In short, modify supporting trees to reflect modifications of subvolume
+ * | trees.
+ * |
+ * | At this stage, all start_transaction() calls will wait for this
+ * | transaction to finish and attach to transaction N+1.
+ * |
+ * | To next stage:
+ * | Until all supporting trees are updated.
+ * V
+ * Transaction N [[TRANS_STATE_UNBLOCKED]]
+ * | Transaction N+1
+ * | All needed trees are modified, thus we only [[TRANS_STATE_RUNNING]]
+ * | need to write them back to disk and update |
+ * | super blocks. |
+ * | |
+ * | At this stage, new transaction is allowed to |
+ * | start. |
+ * | All new start_transaction() calls will be |
+ * | attached to transid N+1. |
+ * | |
+ * | To next stage: |
+ * | Until all tree blocks are super blocks are |
+ * | written to block devices |
+ * V |
+ * Transaction N [[TRANS_STATE_COMPLETED]] V
+ * All tree blocks and super blocks are written. Transaction N+1
+ * This transaction is finished and all its [[TRANS_STATE_COMMIT_START]]
+ * data structures will be cleaned up. | Life goes on
+ */
static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
[TRANS_STATE_RUNNING] = 0U,
- [TRANS_STATE_BLOCKED] = __TRANS_START,
[TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
[TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
__TRANS_ATTACH |
@@ -63,10 +133,10 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
* discard the physical locations of the block groups.
*/
while (!list_empty(&transaction->deleted_bgs)) {
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
cache = list_first_entry(&transaction->deleted_bgs,
- struct btrfs_block_group_cache,
+ struct btrfs_block_group,
bg_list);
list_del_init(&cache->bg_list);
btrfs_put_block_group_trimming(cache);
@@ -383,7 +453,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
static inline int is_transaction_blocked(struct btrfs_transaction *trans)
{
- return (trans->state >= TRANS_STATE_BLOCKED &&
+ return (trans->state >= TRANS_STATE_COMMIT_START &&
trans->state < TRANS_STATE_UNBLOCKED &&
!trans->aborted);
}
@@ -570,7 +640,7 @@ again:
INIT_LIST_HEAD(&h->new_bgs);
smp_mb();
- if (cur_trans->state >= TRANS_STATE_BLOCKED &&
+ if (cur_trans->state >= TRANS_STATE_COMMIT_START &&
may_wait_transaction(fs_info, type)) {
current->journal_info = h;
btrfs_commit_transaction(h);
@@ -659,7 +729,7 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
true);
}
-struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
+struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root)
{
return start_transaction(root, 0, TRANS_JOIN_NOLOCK,
BTRFS_RESERVE_NO_FLUSH, true);
@@ -798,7 +868,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans)
struct btrfs_transaction *cur_trans = trans->transaction;
smp_mb();
- if (cur_trans->state >= TRANS_STATE_BLOCKED ||
+ if (cur_trans->state >= TRANS_STATE_COMMIT_START ||
cur_trans->delayed_refs.flushing)
return 1;
@@ -831,7 +901,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *info = trans->fs_info;
struct btrfs_transaction *cur_trans = trans->transaction;
- int lock = (trans->type != TRANS_JOIN_NOLOCK);
int err = 0;
if (refcount_read(&trans->use_count) > 1) {
@@ -847,13 +916,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_chunk_metadata(trans);
- if (lock && READ_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) {
- if (throttle)
- return btrfs_commit_transaction(trans);
- else
- wake_up_process(info->transaction_kthread);
- }
-
if (trans->type & __TRANS_FREEZABLE)
sb_end_intwrite(info->sb);
@@ -990,7 +1052,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
return werr;
}
-int btrfs_wait_extents(struct btrfs_fs_info *fs_info,
+static int btrfs_wait_extents(struct btrfs_fs_info *fs_info,
struct extent_io_tree *dirty_pages)
{
bool errors = false;
@@ -1875,7 +1937,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_block_group_cache *block_group, *tmp;
+ struct btrfs_block_group *block_group, *tmp;
list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
btrfs_delayed_refs_rsv_release(fs_info, 1);
@@ -1949,6 +2011,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
struct btrfs_transaction *prev_trans = NULL;
int ret;
+ ASSERT(refcount_read(&trans->use_count) == 1);
+
/* Stop the commit early if ->aborted is set */
if (unlikely(READ_ONCE(cur_trans->aborted))) {
ret = cur_trans->aborted;
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 2c5a6f6e5bb0..49f7196368f5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -13,7 +13,6 @@
enum btrfs_trans_state {
TRANS_STATE_RUNNING,
- TRANS_STATE_BLOCKED,
TRANS_STATE_COMMIT_START,
TRANS_STATE_COMMIT_DOING,
TRANS_STATE_UNBLOCKED,
@@ -184,7 +183,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
unsigned int num_items,
int min_factor);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
-struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
+struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
@@ -218,8 +217,6 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
struct extent_io_tree *dirty_pages, int mark);
-int btrfs_wait_extents(struct btrfs_fs_info *fs_info,
- struct extent_io_tree *dirty_pages);
int btrfs_wait_tree_log_extents(struct btrfs_root *root, int mark);
int btrfs_transaction_blocked(struct btrfs_fs_info *info);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 43e488f5d063..493d4d9e0f79 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -23,6 +23,7 @@
#include "disk-io.h"
#include "compression.h"
#include "volumes.h"
+#include "misc.h"
/*
* Error message should follow the following format:
@@ -124,6 +125,74 @@ static u64 file_extent_end(struct extent_buffer *leaf,
return end;
}
+/*
+ * Customized report for dir_item, the only new important information is
+ * key->objectid, which represents inode number
+ */
+__printf(3, 4)
+__cold
+static void dir_item_err(const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ const struct btrfs_fs_info *fs_info = eb->fs_info;
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node",
+ btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
+ key.objectid, &vaf);
+ va_end(args);
+}
+
+/*
+ * This functions checks prev_key->objectid, to ensure current key and prev_key
+ * share the same objectid as inode number.
+ *
+ * This is to detect missing INODE_ITEM in subvolume trees.
+ *
+ * Return true if everything is OK or we don't need to check.
+ * Return false if anything is wrong.
+ */
+static bool check_prev_ino(struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot,
+ struct btrfs_key *prev_key)
+{
+ /* No prev key, skip check */
+ if (slot == 0)
+ return true;
+
+ /* Only these key->types needs to be checked */
+ ASSERT(key->type == BTRFS_XATTR_ITEM_KEY ||
+ key->type == BTRFS_INODE_REF_KEY ||
+ key->type == BTRFS_DIR_INDEX_KEY ||
+ key->type == BTRFS_DIR_ITEM_KEY ||
+ key->type == BTRFS_EXTENT_DATA_KEY);
+
+ /*
+ * Only subvolume trees along with their reloc trees need this check.
+ * Things like log tree doesn't follow this ino requirement.
+ */
+ if (!is_fstree(btrfs_header_owner(leaf)))
+ return true;
+
+ if (key->objectid == prev_key->objectid)
+ return true;
+
+ /* Error found */
+ dir_item_err(leaf, slot,
+ "invalid previous key objectid, have %llu expect %llu",
+ prev_key->objectid, key->objectid);
+ return false;
+}
static int check_extent_data_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot,
struct btrfs_key *prev_key)
@@ -141,13 +210,33 @@ static int check_extent_data_item(struct extent_buffer *leaf,
return -EUCLEAN;
}
+ /*
+ * Previous key must have the same key->objectid (ino).
+ * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA.
+ * But if objectids mismatch, it means we have a missing
+ * INODE_ITEM.
+ */
+ if (!check_prev_ino(leaf, key, slot, prev_key))
+ return -EUCLEAN;
+
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
- if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
+ /*
+ * Make sure the item contains at least inline header, so the file
+ * extent type is not some garbage.
+ */
+ if (item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START) {
+ file_extent_err(leaf, slot,
+ "invalid item size, have %u expect [%lu, %u)",
+ item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START,
+ SZ_4K);
+ return -EUCLEAN;
+ }
+ if (btrfs_file_extent_type(leaf, fi) >= BTRFS_NR_FILE_EXTENT_TYPES) {
file_extent_err(leaf, slot,
"invalid type for file extent, have %u expect range [0, %u]",
btrfs_file_extent_type(leaf, fi),
- BTRFS_FILE_EXTENT_TYPES);
+ BTRFS_NR_FILE_EXTENT_TYPES - 1);
return -EUCLEAN;
}
@@ -155,11 +244,11 @@ static int check_extent_data_item(struct extent_buffer *leaf,
* Support for new compression/encryption must introduce incompat flag,
* and must be caught in open_ctree().
*/
- if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
+ if (btrfs_file_extent_compression(leaf, fi) >= BTRFS_NR_COMPRESS_TYPES) {
file_extent_err(leaf, slot,
"invalid compression for file extent, have %u expect range [0, %u]",
btrfs_file_extent_compression(leaf, fi),
- BTRFS_COMPRESS_TYPES);
+ BTRFS_NR_COMPRESS_TYPES - 1);
return -EUCLEAN;
}
if (btrfs_file_extent_encryption(leaf, fi)) {
@@ -270,42 +359,17 @@ static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
return 0;
}
-/*
- * Customized reported for dir_item, only important new info is key->objectid,
- * which represents inode number
- */
-__printf(3, 4)
-__cold
-static void dir_item_err(const struct extent_buffer *eb, int slot,
- const char *fmt, ...)
-{
- const struct btrfs_fs_info *fs_info = eb->fs_info;
- struct btrfs_key key;
- struct va_format vaf;
- va_list args;
-
- btrfs_item_key_to_cpu(eb, &key, slot);
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- btrfs_crit(fs_info,
- "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
- btrfs_header_level(eb) == 0 ? "leaf" : "node",
- btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
- key.objectid, &vaf);
- va_end(args);
-}
-
static int check_dir_item(struct extent_buffer *leaf,
- struct btrfs_key *key, int slot)
+ struct btrfs_key *key, struct btrfs_key *prev_key,
+ int slot)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_dir_item *di;
u32 item_size = btrfs_item_size_nr(leaf, slot);
u32 cur = 0;
+ if (!check_prev_ino(leaf, key, slot, prev_key))
+ return -EUCLEAN;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
while (cur < item_size) {
u32 name_len;
@@ -459,23 +523,23 @@ static int check_block_group_item(struct extent_buffer *leaf,
read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
sizeof(bgi));
- if (btrfs_block_group_chunk_objectid(&bgi) !=
+ if (btrfs_stack_block_group_chunk_objectid(&bgi) !=
BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
block_group_err(leaf, slot,
"invalid block group chunk objectid, have %llu expect %llu",
- btrfs_block_group_chunk_objectid(&bgi),
+ btrfs_stack_block_group_chunk_objectid(&bgi),
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
return -EUCLEAN;
}
- if (btrfs_block_group_used(&bgi) > key->offset) {
+ if (btrfs_stack_block_group_used(&bgi) > key->offset) {
block_group_err(leaf, slot,
"invalid block group used, have %llu expect [0, %llu)",
- btrfs_block_group_used(&bgi), key->offset);
+ btrfs_stack_block_group_used(&bgi), key->offset);
return -EUCLEAN;
}
- flags = btrfs_block_group_flags(&bgi);
+ flags = btrfs_stack_block_group_flags(&bgi);
if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
block_group_err(leaf, slot,
"invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
@@ -609,7 +673,7 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
return -EUCLEAN;
}
- if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+ if (!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
chunk_err(leaf, chunk, logical,
"invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
@@ -686,9 +750,7 @@ static void dev_item_err(const struct extent_buffer *eb, int slot,
static int check_dev_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot)
{
- struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_dev_item *ditem;
- u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK);
if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) {
dev_item_err(leaf, slot,
@@ -696,12 +758,6 @@ static int check_dev_item(struct extent_buffer *leaf,
key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
return -EUCLEAN;
}
- if (key->offset > max_devid) {
- dev_item_err(leaf, slot,
- "invalid devid: has=%llu expect=[0, %llu]",
- key->offset, max_devid);
- return -EUCLEAN;
- }
ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
if (btrfs_device_id(leaf, ditem) != key->offset) {
dev_item_err(leaf, slot,
@@ -793,11 +849,11 @@ static int check_inode_item(struct extent_buffer *leaf,
}
/*
- * S_IFMT is not bit mapped so we can't completely rely on is_power_of_2,
- * but is_power_of_2() can save us from checking FIFO/CHR/DIR/REG.
- * Only needs to check BLK, LNK and SOCKS
+ * S_IFMT is not bit mapped so we can't completely rely on
+ * is_power_of_2/has_single_bit_set, but it can save us from checking
+ * FIFO/CHR/DIR/REG. Only needs to check BLK, LNK and SOCKS
*/
- if (!is_power_of_2(mode & S_IFMT)) {
+ if (!has_single_bit_set(mode & S_IFMT)) {
if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
inode_item_err(fs_info, leaf, slot,
"invalid mode: has 0%o expect valid S_IF* bit(s)",
@@ -1018,8 +1074,8 @@ static int check_extent_item(struct extent_buffer *leaf,
btrfs_super_generation(fs_info->super_copy) + 1);
return -EUCLEAN;
}
- if (!is_power_of_2(flags & (BTRFS_EXTENT_FLAG_DATA |
- BTRFS_EXTENT_FLAG_TREE_BLOCK))) {
+ if (!has_single_bit_set(flags & (BTRFS_EXTENT_FLAG_DATA |
+ BTRFS_EXTENT_FLAG_TREE_BLOCK))) {
extent_err(leaf, slot,
"invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx",
flags, BTRFS_EXTENT_FLAG_DATA |
@@ -1232,6 +1288,58 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
return 0;
}
+#define inode_ref_err(fs_info, eb, slot, fmt, args...) \
+ inode_item_err(fs_info, eb, slot, fmt, ##args)
+static int check_inode_ref(struct extent_buffer *leaf,
+ struct btrfs_key *key, struct btrfs_key *prev_key,
+ int slot)
+{
+ struct btrfs_inode_ref *iref;
+ unsigned long ptr;
+ unsigned long end;
+
+ if (!check_prev_ino(leaf, key, slot, prev_key))
+ return -EUCLEAN;
+ /* namelen can't be 0, so item_size == sizeof() is also invalid */
+ if (btrfs_item_size_nr(leaf, slot) <= sizeof(*iref)) {
+ inode_ref_err(fs_info, leaf, slot,
+ "invalid item size, have %u expect (%zu, %u)",
+ btrfs_item_size_nr(leaf, slot),
+ sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
+ return -EUCLEAN;
+ }
+
+ ptr = btrfs_item_ptr_offset(leaf, slot);
+ end = ptr + btrfs_item_size_nr(leaf, slot);
+ while (ptr < end) {
+ u16 namelen;
+
+ if (ptr + sizeof(iref) > end) {
+ inode_ref_err(fs_info, leaf, slot,
+ "inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
+ ptr, end, sizeof(iref));
+ return -EUCLEAN;
+ }
+
+ iref = (struct btrfs_inode_ref *)ptr;
+ namelen = btrfs_inode_ref_name_len(leaf, iref);
+ if (ptr + sizeof(*iref) + namelen > end) {
+ inode_ref_err(fs_info, leaf, slot,
+ "inode ref overflow, ptr %lu end %lu namelen %u",
+ ptr, end, namelen);
+ return -EUCLEAN;
+ }
+
+ /*
+ * NOTE: In theory we should record all found index numbers
+ * to find any duplicated indexes, but that will be too time
+ * consuming for inodes with too many hard links.
+ */
+ ptr += sizeof(*iref) + namelen;
+ }
+ return 0;
+}
+
/*
* Common point to switch the item-specific validation.
*/
@@ -1252,7 +1360,10 @@ static int check_leaf_item(struct extent_buffer *leaf,
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
case BTRFS_XATTR_ITEM_KEY:
- ret = check_dir_item(leaf, key, slot);
+ ret = check_dir_item(leaf, key, prev_key, slot);
+ break;
+ case BTRFS_INODE_REF_KEY:
+ ret = check_inode_ref(leaf, key, prev_key, slot);
break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
ret = check_block_group_item(leaf, key, slot);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 29b82a795522..6f757361db53 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -559,7 +559,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
key.objectid = objectid;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
+ inode = btrfs_iget(root->fs_info->sb, &key, root);
if (IS_ERR(inode))
inode = NULL;
return inode;
@@ -945,54 +945,32 @@ static noinline int backref_in_log(struct btrfs_root *log,
const char *name, int namelen)
{
struct btrfs_path *path;
- struct btrfs_inode_ref *ref;
- unsigned long ptr;
- unsigned long ptr_end;
- unsigned long name_ptr;
- int found_name_len;
- int item_size;
int ret;
- int match = 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_search_slot(NULL, log, key, path, 0, 0);
- if (ret != 0)
+ if (ret < 0) {
goto out;
-
- ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]);
-
- if (key->type == BTRFS_INODE_EXTREF_KEY) {
- if (btrfs_find_name_in_ext_backref(path->nodes[0],
- path->slots[0],
- ref_objectid,
- name, namelen))
- match = 1;
-
+ } else if (ret == 1) {
+ ret = 0;
goto out;
}
- item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
- ptr_end = ptr + item_size;
- while (ptr < ptr_end) {
- ref = (struct btrfs_inode_ref *)ptr;
- found_name_len = btrfs_inode_ref_name_len(path->nodes[0], ref);
- if (found_name_len == namelen) {
- name_ptr = (unsigned long)(ref + 1);
- ret = memcmp_extent_buffer(path->nodes[0], name,
- name_ptr, namelen);
- if (ret == 0) {
- match = 1;
- goto out;
- }
- }
- ptr = (unsigned long)(ref + 1) + found_name_len;
- }
+ if (key->type == BTRFS_INODE_EXTREF_KEY)
+ ret = !!btrfs_find_name_in_ext_backref(path->nodes[0],
+ path->slots[0],
+ ref_objectid,
+ name, namelen);
+ else
+ ret = !!btrfs_find_name_in_backref(path->nodes[0],
+ path->slots[0],
+ name, namelen);
out:
btrfs_free_path(path);
- return match;
+ return ret;
}
static inline int __add_inode_ref(struct btrfs_trans_handle *trans,
@@ -1050,10 +1028,13 @@ again:
(unsigned long)(victim_ref + 1),
victim_name_len);
- if (!backref_in_log(log_root, &search_key,
- parent_objectid,
- victim_name,
- victim_name_len)) {
+ ret = backref_in_log(log_root, &search_key,
+ parent_objectid, victim_name,
+ victim_name_len);
+ if (ret < 0) {
+ kfree(victim_name);
+ return ret;
+ } else if (!ret) {
inc_nlink(&inode->vfs_inode);
btrfs_release_path(path);
@@ -1115,10 +1096,12 @@ again:
search_key.offset = btrfs_extref_hash(parent_objectid,
victim_name,
victim_name_len);
- ret = 0;
- if (!backref_in_log(log_root, &search_key,
- parent_objectid, victim_name,
- victim_name_len)) {
+ ret = backref_in_log(log_root, &search_key,
+ parent_objectid, victim_name,
+ victim_name_len);
+ if (ret < 0) {
+ return ret;
+ } else if (!ret) {
ret = -ENOENT;
victim_parent = read_one_inode(root,
parent_objectid);
@@ -1885,30 +1868,6 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
}
/*
- * Return true if an inode reference exists in the log for the given name,
- * inode and parent inode.
- */
-static bool name_in_log_ref(struct btrfs_root *log_root,
- const char *name, const int name_len,
- const u64 dirid, const u64 ino)
-{
- struct btrfs_key search_key;
-
- search_key.objectid = ino;
- search_key.type = BTRFS_INODE_REF_KEY;
- search_key.offset = dirid;
- if (backref_in_log(log_root, &search_key, dirid, name, name_len))
- return true;
-
- search_key.type = BTRFS_INODE_EXTREF_KEY;
- search_key.offset = btrfs_extref_hash(dirid, name, name_len);
- if (backref_in_log(log_root, &search_key, dirid, name, name_len))
- return true;
-
- return false;
-}
-
-/*
* take a single entry in a log directory item and replay it into
* the subvolume.
*
@@ -2024,8 +1983,31 @@ out:
return ret;
insert:
- if (name_in_log_ref(root->log_root, name, name_len,
- key->objectid, log_key.objectid)) {
+ /*
+ * Check if the inode reference exists in the log for the given name,
+ * inode and parent inode
+ */
+ found_key.objectid = log_key.objectid;
+ found_key.type = BTRFS_INODE_REF_KEY;
+ found_key.offset = key->objectid;
+ ret = backref_in_log(root->log_root, &found_key, 0, name, name_len);
+ if (ret < 0) {
+ goto out;
+ } else if (ret) {
+ /* The dentry will be added later. */
+ ret = 0;
+ update_size = false;
+ goto out;
+ }
+
+ found_key.objectid = log_key.objectid;
+ found_key.type = BTRFS_INODE_EXTREF_KEY;
+ found_key.offset = key->objectid;
+ ret = backref_in_log(root->log_root, &found_key, key->objectid, name,
+ name_len);
+ if (ret < 0) {
+ goto out;
+ } else if (ret) {
/* The dentry will be added later. */
ret = 0;
update_size = false;
@@ -2869,7 +2851,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
level = btrfs_header_level(log->node);
orig_level = level;
path->nodes[level] = log->node;
- extent_buffer_get(log->node);
+ atomic_inc(&log->node->refs);
path->slots[level] = 0;
while (1) {
@@ -2932,7 +2914,8 @@ out:
* in the tree of log roots
*/
static int update_log_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *log)
+ struct btrfs_root *log,
+ struct btrfs_root_item *root_item)
{
struct btrfs_fs_info *fs_info = log->fs_info;
int ret;
@@ -2940,10 +2923,10 @@ static int update_log_root(struct btrfs_trans_handle *trans,
if (log->log_transid == 1) {
/* insert root item on the first sync */
ret = btrfs_insert_root(trans, fs_info->log_root_tree,
- &log->root_key, &log->root_item);
+ &log->root_key, root_item);
} else {
ret = btrfs_update_root(trans, fs_info->log_root_tree,
- &log->root_key, &log->root_item);
+ &log->root_key, root_item);
}
return ret;
}
@@ -3041,6 +3024,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = fs_info->log_root_tree;
+ struct btrfs_root_item new_root_item;
int log_transid = 0;
struct btrfs_log_ctx root_log_ctx;
struct blk_plug plug;
@@ -3104,18 +3088,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out;
}
+ /*
+ * We _must_ update under the root->log_mutex in order to make sure we
+ * have a consistent view of the log root we are trying to commit at
+ * this moment.
+ *
+ * We _must_ copy this into a local copy, because we are not holding the
+ * log_root_tree->log_mutex yet. This is important because when we
+ * commit the log_root_tree we must have a consistent view of the
+ * log_root_tree when we update the super block to point at the
+ * log_root_tree bytenr. If we update the log_root_tree here we'll race
+ * with the commit and possibly point at the new block which we may not
+ * have written out.
+ */
btrfs_set_root_node(&log->root_item, log->node);
+ memcpy(&new_root_item, &log->root_item, sizeof(new_root_item));
root->log_transid++;
log->log_transid = root->log_transid;
root->log_start_pid = 0;
/*
- * Update or create log root item under the root's log_mutex to prevent
- * races with concurrent log syncs that can lead to failure to update
- * log root item because it was not created yet.
- */
- ret = update_log_root(trans, log);
- /*
* IO has been started, blocks of the log tree have WRITTEN flag set
* in their headers. new modifications of the log will be written to
* new positions. so it's safe to allow log writers to go in.
@@ -3135,6 +3127,14 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&log_root_tree->log_mutex);
mutex_lock(&log_root_tree->log_mutex);
+
+ /*
+ * Now we are safe to update the log_root_tree because we're under the
+ * log_mutex, and we're a current writer so we're holding the commit
+ * open until we drop the log_mutex.
+ */
+ ret = update_log_root(trans, log, &new_root_item);
+
if (atomic_dec_and_test(&log_root_tree->log_writers)) {
/* atomic_dec_and_test implies a barrier */
cond_wake_up_nomb(&log_root_tree->log_writer_wait);
@@ -4965,7 +4965,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ inode = btrfs_iget(fs_info->sb, &key, root);
/*
* If the other inode that had a conflicting dir entry was
* deleted in the current transaction, we need to log its parent
@@ -4975,8 +4975,7 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
ret = PTR_ERR(inode);
if (ret == -ENOENT) {
key.objectid = parent;
- inode = btrfs_iget(fs_info->sb, &key, root,
- NULL);
+ inode = btrfs_iget(fs_info->sb, &key, root);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
} else {
@@ -5681,7 +5680,7 @@ process_leaf:
continue;
btrfs_release_path(path);
- di_inode = btrfs_iget(fs_info->sb, &di_key, root, NULL);
+ di_inode = btrfs_iget(fs_info->sb, &di_key, root);
if (IS_ERR(di_inode)) {
ret = PTR_ERR(di_inode);
goto next_dir_inode;
@@ -5807,8 +5806,7 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
cur_offset = item_size;
}
- dir_inode = btrfs_iget(fs_info->sb, &inode_key,
- root, NULL);
+ dir_inode = btrfs_iget(fs_info->sb, &inode_key, root);
/*
* If the parent inode was deleted, return an error to
* fallback to a transaction commit. This is to prevent
@@ -5882,7 +5880,7 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
search_key.objectid = found_key.offset;
search_key.type = BTRFS_INODE_ITEM_KEY;
search_key.offset = 0;
- inode = btrfs_iget(fs_info->sb, &search_key, root, NULL);
+ inode = btrfs_iget(fs_info->sb, &search_key, root);
if (IS_ERR(inode))
return PTR_ERR(inode);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cdd7af424033..d8e5560db285 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -58,6 +58,30 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
.bg_flag = BTRFS_BLOCK_GROUP_RAID1,
.mindev_error = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
},
+ [BTRFS_RAID_RAID1C3] = {
+ .sub_stripes = 1,
+ .dev_stripes = 1,
+ .devs_max = 0,
+ .devs_min = 3,
+ .tolerated_failures = 2,
+ .devs_increment = 3,
+ .ncopies = 3,
+ .raid_name = "raid1c3",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
+ .mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
+ },
+ [BTRFS_RAID_RAID1C4] = {
+ .sub_stripes = 1,
+ .dev_stripes = 1,
+ .devs_max = 0,
+ .devs_min = 4,
+ .tolerated_failures = 3,
+ .devs_increment = 4,
+ .ncopies = 4,
+ .raid_name = "raid1c4",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
+ .mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
+ },
[BTRFS_RAID_DUP] = {
.sub_stripes = 1,
.dev_stripes = 2,
@@ -297,7 +321,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
-struct list_head *btrfs_get_fs_uuids(void)
+struct list_head * __attribute_const__ btrfs_get_fs_uuids(void)
{
return &fs_uuids;
}
@@ -397,8 +421,6 @@ static struct btrfs_device *__alloc_device(void)
INIT_LIST_HEAD(&dev->dev_alloc_list);
INIT_LIST_HEAD(&dev->post_commit_list);
- spin_lock_init(&dev->io_lock);
-
atomic_set(&dev->reada_in_flight, 0);
atomic_set(&dev->dev_stats_ccnt, 0);
btrfs_device_data_ordered_init(dev);
@@ -501,212 +523,6 @@ error:
return ret;
}
-static void requeue_list(struct btrfs_pending_bios *pending_bios,
- struct bio *head, struct bio *tail)
-{
-
- struct bio *old_head;
-
- old_head = pending_bios->head;
- pending_bios->head = head;
- if (pending_bios->tail)
- tail->bi_next = old_head;
- else
- pending_bios->tail = tail;
-}
-
-/*
- * we try to collect pending bios for a device so we don't get a large
- * number of procs sending bios down to the same device. This greatly
- * improves the schedulers ability to collect and merge the bios.
- *
- * But, it also turns into a long list of bios to process and that is sure
- * to eventually make the worker thread block. The solution here is to
- * make some progress and then put this work struct back at the end of
- * the list if the block device is congested. This way, multiple devices
- * can make progress from a single worker thread.
- */
-static noinline void run_scheduled_bios(struct btrfs_device *device)
-{
- struct btrfs_fs_info *fs_info = device->fs_info;
- struct bio *pending;
- struct backing_dev_info *bdi;
- struct btrfs_pending_bios *pending_bios;
- struct bio *tail;
- struct bio *cur;
- int again = 0;
- unsigned long num_run;
- unsigned long batch_run = 0;
- unsigned long last_waited = 0;
- int force_reg = 0;
- int sync_pending = 0;
- struct blk_plug plug;
-
- /*
- * this function runs all the bios we've collected for
- * a particular device. We don't want to wander off to
- * another device without first sending all of these down.
- * So, setup a plug here and finish it off before we return
- */
- blk_start_plug(&plug);
-
- bdi = device->bdev->bd_bdi;
-
-loop:
- spin_lock(&device->io_lock);
-
-loop_lock:
- num_run = 0;
-
- /* take all the bios off the list at once and process them
- * later on (without the lock held). But, remember the
- * tail and other pointers so the bios can be properly reinserted
- * into the list if we hit congestion
- */
- if (!force_reg && device->pending_sync_bios.head) {
- pending_bios = &device->pending_sync_bios;
- force_reg = 1;
- } else {
- pending_bios = &device->pending_bios;
- force_reg = 0;
- }
-
- pending = pending_bios->head;
- tail = pending_bios->tail;
- WARN_ON(pending && !tail);
-
- /*
- * if pending was null this time around, no bios need processing
- * at all and we can stop. Otherwise it'll loop back up again
- * and do an additional check so no bios are missed.
- *
- * device->running_pending is used to synchronize with the
- * schedule_bio code.
- */
- if (device->pending_sync_bios.head == NULL &&
- device->pending_bios.head == NULL) {
- again = 0;
- device->running_pending = 0;
- } else {
- again = 1;
- device->running_pending = 1;
- }
-
- pending_bios->head = NULL;
- pending_bios->tail = NULL;
-
- spin_unlock(&device->io_lock);
-
- while (pending) {
-
- rmb();
- /* we want to work on both lists, but do more bios on the
- * sync list than the regular list
- */
- if ((num_run > 32 &&
- pending_bios != &device->pending_sync_bios &&
- device->pending_sync_bios.head) ||
- (num_run > 64 && pending_bios == &device->pending_sync_bios &&
- device->pending_bios.head)) {
- spin_lock(&device->io_lock);
- requeue_list(pending_bios, pending, tail);
- goto loop_lock;
- }
-
- cur = pending;
- pending = pending->bi_next;
- cur->bi_next = NULL;
-
- BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
-
- /*
- * if we're doing the sync list, record that our
- * plug has some sync requests on it
- *
- * If we're doing the regular list and there are
- * sync requests sitting around, unplug before
- * we add more
- */
- if (pending_bios == &device->pending_sync_bios) {
- sync_pending = 1;
- } else if (sync_pending) {
- blk_finish_plug(&plug);
- blk_start_plug(&plug);
- sync_pending = 0;
- }
-
- btrfsic_submit_bio(cur);
- num_run++;
- batch_run++;
-
- cond_resched();
-
- /*
- * we made progress, there is more work to do and the bdi
- * is now congested. Back off and let other work structs
- * run instead
- */
- if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
- fs_info->fs_devices->open_devices > 1) {
- struct io_context *ioc;
-
- ioc = current->io_context;
-
- /*
- * the main goal here is that we don't want to
- * block if we're going to be able to submit
- * more requests without blocking.
- *
- * This code does two great things, it pokes into
- * the elevator code from a filesystem _and_
- * it makes assumptions about how batching works.
- */
- if (ioc && ioc->nr_batch_requests > 0 &&
- time_before(jiffies, ioc->last_waited + HZ/50UL) &&
- (last_waited == 0 ||
- ioc->last_waited == last_waited)) {
- /*
- * we want to go through our batch of
- * requests and stop. So, we copy out
- * the ioc->last_waited time and test
- * against it before looping
- */
- last_waited = ioc->last_waited;
- cond_resched();
- continue;
- }
- spin_lock(&device->io_lock);
- requeue_list(pending_bios, pending, tail);
- device->running_pending = 1;
-
- spin_unlock(&device->io_lock);
- btrfs_queue_work(fs_info->submit_workers,
- &device->work);
- goto done;
- }
- }
-
- cond_resched();
- if (again)
- goto loop;
-
- spin_lock(&device->io_lock);
- if (device->pending_bios.head || device->pending_sync_bios.head)
- goto loop_lock;
- spin_unlock(&device->io_lock);
-
-done:
- blk_finish_plug(&plug);
-}
-
-static void pending_bios_fn(struct btrfs_work *work)
-{
- struct btrfs_device *device;
-
- device = container_of(work, struct btrfs_device, work);
- run_scheduled_bios(device);
-}
-
static bool device_path_matched(const char *path, struct btrfs_device *device)
{
int found;
@@ -818,7 +634,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
}
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
- fs_devices->seeding = 1;
+ fs_devices->seeding = true;
} else {
if (bdev_read_only(bdev))
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
@@ -828,7 +644,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
q = bdev_get_queue(bdev);
if (!blk_queue_nonrot(q))
- fs_devices->rotating = 1;
+ fs_devices->rotating = true;
device->bdev = bdev;
clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
@@ -1005,11 +821,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
*new_device_added = true;
if (disk_super->label[0])
- pr_info("BTRFS: device label %s devid %llu transid %llu %s\n",
- disk_super->label, devid, found_transid, path);
+ pr_info(
+ "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n",
+ disk_super->label, devid, found_transid, path,
+ current->comm, task_pid_nr(current));
else
- pr_info("BTRFS: device fsid %pU devid %llu transid %llu %s\n",
- disk_super->fsid, devid, found_transid, path);
+ pr_info(
+ "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n",
+ disk_super->fsid, devid, found_transid, path,
+ current->comm, task_pid_nr(current));
} else if (!device->name || strcmp(device->name->str, path)) {
/*
@@ -1295,7 +1115,7 @@ static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
WARN_ON(fs_devices->open_devices);
WARN_ON(fs_devices->rw_devices);
fs_devices->opened = 0;
- fs_devices->seeding = 0;
+ fs_devices->seeding = false;
return 0;
}
@@ -2048,7 +1868,7 @@ static struct btrfs_device * btrfs_find_next_active_device(
* where this function called, there should be always be another device (or
* this_dev) which is active.
*/
-void btrfs_assign_next_active_device(struct btrfs_device *device,
+void __cold btrfs_assign_next_active_device(struct btrfs_device *device,
struct btrfs_device *this_dev)
{
struct btrfs_fs_info *fs_info = device->fs_info;
@@ -2450,11 +2270,11 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
mutex_unlock(&fs_info->chunk_mutex);
- fs_devices->seeding = 0;
+ fs_devices->seeding = false;
fs_devices->num_devices = 0;
fs_devices->open_devices = 0;
fs_devices->missing_devices = 0;
- fs_devices->rotating = 0;
+ fs_devices->rotating = false;
fs_devices->seed = seed_devices;
generate_random_uuid(fs_devices->fsid);
@@ -2649,7 +2469,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
if (!blk_queue_nonrot(q))
- fs_devices->rotating = 1;
+ fs_devices->rotating = true;
orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
btrfs_set_super_total_bytes(fs_info->super_copy,
@@ -3177,7 +2997,7 @@ error:
static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
u64 chunk_offset)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
u64 bytes_used;
u64 chunk_type;
@@ -3186,27 +3006,28 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
chunk_type = cache->flags;
btrfs_put_block_group(cache);
- if (chunk_type & BTRFS_BLOCK_GROUP_DATA) {
- spin_lock(&fs_info->data_sinfo->lock);
- bytes_used = fs_info->data_sinfo->bytes_used;
- spin_unlock(&fs_info->data_sinfo->lock);
+ if (!(chunk_type & BTRFS_BLOCK_GROUP_DATA))
+ return 0;
- if (!bytes_used) {
- struct btrfs_trans_handle *trans;
- int ret;
+ spin_lock(&fs_info->data_sinfo->lock);
+ bytes_used = fs_info->data_sinfo->bytes_used;
+ spin_unlock(&fs_info->data_sinfo->lock);
- trans = btrfs_join_transaction(fs_info->tree_root);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ if (!bytes_used) {
+ struct btrfs_trans_handle *trans;
+ int ret;
- ret = btrfs_force_chunk_alloc(trans,
- BTRFS_BLOCK_GROUP_DATA);
- btrfs_end_transaction(trans);
- if (ret < 0)
- return ret;
- return 1;
- }
+ trans = btrfs_join_transaction(fs_info->tree_root);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ ret = btrfs_force_chunk_alloc(trans, BTRFS_BLOCK_GROUP_DATA);
+ btrfs_end_transaction(trans);
+ if (ret < 0)
+ return ret;
+ return 1;
}
+
return 0;
}
@@ -3385,28 +3206,28 @@ static int chunk_profiles_filter(u64 chunk_type,
static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
struct btrfs_balance_args *bargs)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
u64 chunk_used;
u64 user_thresh_min;
u64 user_thresh_max;
int ret = 1;
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
- chunk_used = btrfs_block_group_used(&cache->item);
+ chunk_used = cache->used;
if (bargs->usage_min == 0)
user_thresh_min = 0;
else
- user_thresh_min = div_factor_fine(cache->key.offset,
- bargs->usage_min);
+ user_thresh_min = div_factor_fine(cache->length,
+ bargs->usage_min);
if (bargs->usage_max == 0)
user_thresh_max = 1;
else if (bargs->usage_max > 100)
- user_thresh_max = cache->key.offset;
+ user_thresh_max = cache->length;
else
- user_thresh_max = div_factor_fine(cache->key.offset,
- bargs->usage_max);
+ user_thresh_max = div_factor_fine(cache->length,
+ bargs->usage_max);
if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
ret = 0;
@@ -3418,20 +3239,19 @@ static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_off
static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
u64 chunk_offset, struct btrfs_balance_args *bargs)
{
- struct btrfs_block_group_cache *cache;
+ struct btrfs_block_group *cache;
u64 chunk_used, user_thresh;
int ret = 1;
cache = btrfs_lookup_block_group(fs_info, chunk_offset);
- chunk_used = btrfs_block_group_used(&cache->item);
+ chunk_used = cache->used;
if (bargs->usage_min == 0)
user_thresh = 1;
else if (bargs->usage > 100)
- user_thresh = cache->key.offset;
+ user_thresh = cache->length;
else
- user_thresh = div_factor_fine(cache->key.offset,
- bargs->usage);
+ user_thresh = div_factor_fine(cache->length, bargs->usage);
if (chunk_used < user_thresh)
ret = 0;
@@ -3844,8 +3664,7 @@ static int alloc_profile_is_valid(u64 flags, int extended)
if (flags == 0)
return !extended; /* "0" is valid for usual profiles */
- /* true if exactly one bit set */
- return is_power_of_2(flags);
+ return has_single_bit_set(flags);
}
static inline int balance_need_close(struct btrfs_fs_info *fs_info)
@@ -4032,7 +3851,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
int ret;
u64 num_devices;
unsigned seq;
- bool reducing_integrity;
+ bool reducing_redundancy;
int i;
if (btrfs_fs_closing(fs_info) ||
@@ -4115,9 +3934,9 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(fs_info->avail_metadata_alloc_bits & allowed) &&
!(bctl->meta.target & allowed)))
- reducing_integrity = true;
+ reducing_redundancy = true;
else
- reducing_integrity = false;
+ reducing_redundancy = false;
/* if we're not converting, the target field is uninitialized */
meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
@@ -4126,13 +3945,13 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
bctl->data.target : fs_info->avail_data_alloc_bits;
} while (read_seqretry(&fs_info->profiles_lock, seq));
- if (reducing_integrity) {
+ if (reducing_redundancy) {
if (bctl->flags & BTRFS_BALANCE_FORCE) {
btrfs_info(fs_info,
- "balance: force reducing metadata integrity");
+ "balance: force reducing metadata redundancy");
} else {
btrfs_err(fs_info,
- "balance: reduces metadata integrity, use --force if you want this");
+ "balance: reduces metadata redundancy, use --force if you want this");
ret = -EINVAL;
goto out;
}
@@ -4898,6 +4717,14 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
btrfs_set_fs_incompat(info, RAID56);
}
+static void check_raid1c34_incompat_flag(struct btrfs_fs_info *info, u64 type)
+{
+ if (!(type & (BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4)))
+ return;
+
+ btrfs_set_fs_incompat(info, RAID1C34);
+}
+
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
u64 start, u64 type)
{
@@ -4963,6 +4790,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
max_stripe_size = SZ_32M;
max_chunk_size = 2 * max_stripe_size;
+ devs_max = min_t(int, devs_max, BTRFS_MAX_DEVS_SYS_CHUNK);
} else {
btrfs_err(info, "invalid chunk type 0x%llx requested",
type);
@@ -5043,8 +4871,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
btrfs_cmp_device_info, NULL);
- /* round down to number of usable stripes */
- ndevs = round_down(ndevs, devs_increment);
+ /*
+ * Round down to number of usable stripes, devs_increment can be any
+ * number so we can't use round_down()
+ */
+ ndevs -= ndevs % devs_increment;
if (ndevs < devs_min) {
ret = -ENOSPC;
@@ -5160,6 +4991,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
free_extent_map(em);
check_raid56_incompat_flag(info, type);
+ check_raid1c34_incompat_flag(info, type);
kfree(devices_info);
return 0;
@@ -5578,12 +5410,13 @@ void btrfs_put_bbio(struct btrfs_bio *bbio)
* replace.
*/
static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
- u64 logical, u64 length,
+ u64 logical, u64 *length_ret,
struct btrfs_bio **bbio_ret)
{
struct extent_map *em;
struct map_lookup *map;
struct btrfs_bio *bbio;
+ u64 length = *length_ret;
u64 offset;
u64 stripe_nr;
u64 stripe_nr_end;
@@ -5616,7 +5449,8 @@ static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
}
offset = logical - em->start;
- length = min_t(u64, em->len - offset, length);
+ length = min_t(u64, em->start + em->len - logical, length);
+ *length_ret = length;
stripe_len = map->stripe_len;
/*
@@ -6031,7 +5865,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
if (op == BTRFS_MAP_DISCARD)
return __btrfs_map_block_for_discard(fs_info, logical,
- *length, bbio_ret);
+ length, bbio_ret);
ret = btrfs_get_io_geometry(fs_info, op, logical, *length, &geom);
if (ret < 0)
@@ -6411,52 +6245,8 @@ static void btrfs_end_bio(struct bio *bio)
}
}
-/*
- * see run_scheduled_bios for a description of why bios are collected for
- * async submit.
- *
- * This will add one bio to the pending list for a device and make sure
- * the work struct is scheduled.
- */
-static noinline void btrfs_schedule_bio(struct btrfs_device *device,
- struct bio *bio)
-{
- struct btrfs_fs_info *fs_info = device->fs_info;
- int should_queue = 1;
- struct btrfs_pending_bios *pending_bios;
-
- /* don't bother with additional async steps for reads, right now */
- if (bio_op(bio) == REQ_OP_READ) {
- btrfsic_submit_bio(bio);
- return;
- }
-
- WARN_ON(bio->bi_next);
- bio->bi_next = NULL;
-
- spin_lock(&device->io_lock);
- if (op_is_sync(bio->bi_opf))
- pending_bios = &device->pending_sync_bios;
- else
- pending_bios = &device->pending_bios;
-
- if (pending_bios->tail)
- pending_bios->tail->bi_next = bio;
-
- pending_bios->tail = bio;
- if (!pending_bios->head)
- pending_bios->head = bio;
- if (device->running_pending)
- should_queue = 0;
-
- spin_unlock(&device->io_lock);
-
- if (should_queue)
- btrfs_queue_work(fs_info->submit_workers, &device->work);
-}
-
static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
- u64 physical, int dev_nr, int async)
+ u64 physical, int dev_nr)
{
struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
struct btrfs_fs_info *fs_info = bbio->fs_info;
@@ -6474,10 +6264,7 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
btrfs_bio_counter_inc_noblocked(fs_info);
- if (async)
- btrfs_schedule_bio(dev, bio);
- else
- btrfsic_submit_bio(bio);
+ btrfsic_submit_bio(bio);
}
static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
@@ -6498,7 +6285,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
}
blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
- int mirror_num, int async_submit)
+ int mirror_num)
{
struct btrfs_device *dev;
struct bio *first_bio = bio;
@@ -6567,7 +6354,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
bio = first_bio;
submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical,
- dev_nr, async_submit);
+ dev_nr);
}
btrfs_bio_counter_dec(fs_info);
return BLK_STS_OK;
@@ -6671,9 +6458,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
else
generate_random_uuid(dev->uuid);
- btrfs_init_work(&dev->work, btrfs_submit_helper,
- pending_bios_fn, NULL, NULL);
-
return dev;
}
@@ -6870,7 +6654,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
if (IS_ERR(fs_devices))
return fs_devices;
- fs_devices->seeding = 1;
+ fs_devices->seeding = true;
fs_devices->opened = 1;
return fs_devices;
}
@@ -7059,48 +6843,49 @@ int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
sb_array_offset += len;
cur_offset += len;
- if (key.type == BTRFS_CHUNK_ITEM_KEY) {
- chunk = (struct btrfs_chunk *)sb_array_offset;
- /*
- * At least one btrfs_chunk with one stripe must be
- * present, exact stripe count check comes afterwards
- */
- len = btrfs_chunk_item_size(1);
- if (cur_offset + len > array_size)
- goto out_short_read;
-
- num_stripes = btrfs_chunk_num_stripes(sb, chunk);
- if (!num_stripes) {
- btrfs_err(fs_info,
- "invalid number of stripes %u in sys_array at offset %u",
- num_stripes, cur_offset);
- ret = -EIO;
- break;
- }
+ if (key.type != BTRFS_CHUNK_ITEM_KEY) {
+ btrfs_err(fs_info,
+ "unexpected item type %u in sys_array at offset %u",
+ (u32)key.type, cur_offset);
+ ret = -EIO;
+ break;
+ }
- type = btrfs_chunk_type(sb, chunk);
- if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
- btrfs_err(fs_info,
- "invalid chunk type %llu in sys_array at offset %u",
- type, cur_offset);
- ret = -EIO;
- break;
- }
+ chunk = (struct btrfs_chunk *)sb_array_offset;
+ /*
+ * At least one btrfs_chunk with one stripe must be present,
+ * exact stripe count check comes afterwards
+ */
+ len = btrfs_chunk_item_size(1);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
- len = btrfs_chunk_item_size(num_stripes);
- if (cur_offset + len > array_size)
- goto out_short_read;
+ num_stripes = btrfs_chunk_num_stripes(sb, chunk);
+ if (!num_stripes) {
+ btrfs_err(fs_info,
+ "invalid number of stripes %u in sys_array at offset %u",
+ num_stripes, cur_offset);
+ ret = -EIO;
+ break;
+ }
- ret = read_one_chunk(&key, sb, chunk);
- if (ret)
- break;
- } else {
+ type = btrfs_chunk_type(sb, chunk);
+ if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
btrfs_err(fs_info,
- "unexpected item type %u in sys_array at offset %u",
- (u32)key.type, cur_offset);
+ "invalid chunk type %llu in sys_array at offset %u",
+ type, cur_offset);
ret = -EIO;
break;
}
+
+ len = btrfs_chunk_item_size(num_stripes);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ ret = read_one_chunk(&key, sb, chunk);
+ if (ret)
+ break;
+
array_ptr += len;
sb_array_offset += len;
cur_offset += len;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index a7da1f3e3627..fc1b564b9cfe 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -18,10 +18,6 @@ extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN SZ_64K
struct buffer_head;
-struct btrfs_pending_bios {
- struct bio *head;
- struct bio *tail;
-};
struct btrfs_io_geometry {
/* remaining bytes before crossing a stripe */
@@ -68,13 +64,6 @@ struct btrfs_device {
u64 generation;
- spinlock_t io_lock ____cacheline_aligned;
- int running_pending;
- /* regular prio bios */
- struct btrfs_pending_bios pending_bios;
- /* sync bios */
- struct btrfs_pending_bios pending_sync_bios;
-
struct block_device *bdev;
/* the mode sent to blkdev_get */
@@ -254,14 +243,14 @@ struct btrfs_fs_devices {
struct list_head alloc_list;
struct btrfs_fs_devices *seed;
- int seeding;
+ bool seeding;
int opened;
/* set when we find or add a device that doesn't have the
* nonrot flag set
*/
- int rotating;
+ bool rotating;
struct btrfs_fs_info *fs_info;
/* sysfs kobjects */
@@ -330,7 +319,6 @@ struct btrfs_bio {
u64 map_type; /* get from map_lookup->type */
bio_end_io_t *end_io;
struct bio *orig_bio;
- unsigned long flags;
void *private;
atomic_t error;
int max_errors;
@@ -436,7 +424,7 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
void btrfs_mapping_tree_free(struct extent_map_tree *tree);
blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
- int mirror_num, int async_submit);
+ int mirror_num);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder);
struct btrfs_device *btrfs_scan_one_device(const char *path,
@@ -557,6 +545,10 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
return BTRFS_RAID_RAID10;
else if (flags & BTRFS_BLOCK_GROUP_RAID1)
return BTRFS_RAID_RAID1;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
+ return BTRFS_RAID_RAID1C3;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
+ return BTRFS_RAID_RAID1C4;
else if (flags & BTRFS_BLOCK_GROUP_DUP)
return BTRFS_RAID_DUP;
else if (flags & BTRFS_BLOCK_GROUP_RAID0)
@@ -571,7 +563,7 @@ static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
-struct list_head *btrfs_get_fs_uuids(void);
+struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);
void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index df1aace5df50..a6c90a003c12 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -29,19 +29,9 @@ struct workspace {
static struct workspace_manager wsm;
-static void zlib_init_workspace_manager(void)
+struct list_head *zlib_get_workspace(unsigned int level)
{
- btrfs_init_workspace_manager(&wsm, &btrfs_zlib_compress);
-}
-
-static void zlib_cleanup_workspace_manager(void)
-{
- btrfs_cleanup_workspace_manager(&wsm);
-}
-
-static struct list_head *zlib_get_workspace(unsigned int level)
-{
- struct list_head *ws = btrfs_get_workspace(&wsm, level);
+ struct list_head *ws = btrfs_get_workspace(BTRFS_COMPRESS_ZLIB, level);
struct workspace *workspace = list_entry(ws, struct workspace, list);
workspace->level = level;
@@ -49,12 +39,7 @@ static struct list_head *zlib_get_workspace(unsigned int level)
return ws;
}
-static void zlib_put_workspace(struct list_head *ws)
-{
- btrfs_put_workspace(&wsm, ws);
-}
-
-static void zlib_free_workspace(struct list_head *ws)
+void zlib_free_workspace(struct list_head *ws)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
@@ -63,7 +48,7 @@ static void zlib_free_workspace(struct list_head *ws)
kfree(workspace);
}
-static struct list_head *zlib_alloc_workspace(unsigned int level)
+struct list_head *zlib_alloc_workspace(unsigned int level)
{
struct workspace *workspace;
int workspacesize;
@@ -88,13 +73,9 @@ fail:
return ERR_PTR(-ENOMEM);
}
-static int zlib_compress_pages(struct list_head *ws,
- struct address_space *mapping,
- u64 start,
- struct page **pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out)
+int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
+ u64 start, struct page **pages, unsigned long *out_pages,
+ unsigned long *total_in, unsigned long *total_out)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret;
@@ -228,7 +209,7 @@ out:
return ret;
}
-static int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
+int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0, ret2;
@@ -319,10 +300,9 @@ done:
return ret;
}
-static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen)
+int zlib_decompress(struct list_head *ws, unsigned char *data_in,
+ struct page *dest_page, unsigned long start_byte, size_t srclen,
+ size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
int ret = 0;
@@ -419,15 +399,7 @@ next:
}
const struct btrfs_compress_op btrfs_zlib_compress = {
- .init_workspace_manager = zlib_init_workspace_manager,
- .cleanup_workspace_manager = zlib_cleanup_workspace_manager,
- .get_workspace = zlib_get_workspace,
- .put_workspace = zlib_put_workspace,
- .alloc_workspace = zlib_alloc_workspace,
- .free_workspace = zlib_free_workspace,
- .compress_pages = zlib_compress_pages,
- .decompress_bio = zlib_decompress_bio,
- .decompress = zlib_decompress,
+ .workspace_manager = &wsm,
.max_level = 9,
.default_level = BTRFS_ZLIB_DEFAULT_LEVEL,
};
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 764d47b107e5..9a4871636c6c 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -91,9 +91,8 @@ static inline struct workspace *list_to_workspace(struct list_head *list)
return container_of(list, struct workspace, list);
}
-static void zstd_free_workspace(struct list_head *ws);
-static struct list_head *zstd_alloc_workspace(unsigned int level);
-
+void zstd_free_workspace(struct list_head *ws);
+struct list_head *zstd_alloc_workspace(unsigned int level);
/*
* zstd_reclaim_timer_fn - reclaim timer
* @t: timer
@@ -168,7 +167,7 @@ static void zstd_calc_ws_mem_sizes(void)
}
}
-static void zstd_init_workspace_manager(void)
+void zstd_init_workspace_manager(void)
{
struct list_head *ws;
int i;
@@ -194,7 +193,7 @@ static void zstd_init_workspace_manager(void)
}
}
-static void zstd_cleanup_workspace_manager(void)
+void zstd_cleanup_workspace_manager(void)
{
struct workspace *workspace;
int i;
@@ -261,7 +260,7 @@ static struct list_head *zstd_find_workspace(unsigned int level)
* attempt to allocate a new workspace. If we fail to allocate one due to
* memory pressure, go to sleep waiting for the max level workspace to free up.
*/
-static struct list_head *zstd_get_workspace(unsigned int level)
+struct list_head *zstd_get_workspace(unsigned int level)
{
struct list_head *ws;
unsigned int nofs_flag;
@@ -302,7 +301,7 @@ again:
* isn't set, it is also set here. Only the max level workspace tries and wakes
* up waiting workspaces.
*/
-static void zstd_put_workspace(struct list_head *ws)
+void zstd_put_workspace(struct list_head *ws)
{
struct workspace *workspace = list_to_workspace(ws);
@@ -332,7 +331,7 @@ static void zstd_put_workspace(struct list_head *ws)
cond_wake_up(&wsm.wait);
}
-static void zstd_free_workspace(struct list_head *ws)
+void zstd_free_workspace(struct list_head *ws)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
@@ -341,7 +340,7 @@ static void zstd_free_workspace(struct list_head *ws)
kfree(workspace);
}
-static struct list_head *zstd_alloc_workspace(unsigned int level)
+struct list_head *zstd_alloc_workspace(unsigned int level)
{
struct workspace *workspace;
@@ -367,13 +366,9 @@ fail:
return ERR_PTR(-ENOMEM);
}
-static int zstd_compress_pages(struct list_head *ws,
- struct address_space *mapping,
- u64 start,
- struct page **pages,
- unsigned long *out_pages,
- unsigned long *total_in,
- unsigned long *total_out)
+int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
+ u64 start, struct page **pages, unsigned long *out_pages,
+ unsigned long *total_in, unsigned long *total_out)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
ZSTD_CStream *stream;
@@ -548,7 +543,7 @@ out:
return ret;
}
-static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
+int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
struct page **pages_in = cb->compressed_pages;
@@ -626,10 +621,9 @@ done:
return ret;
}
-static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
- struct page *dest_page,
- unsigned long start_byte,
- size_t srclen, size_t destlen)
+int zstd_decompress(struct list_head *ws, unsigned char *data_in,
+ struct page *dest_page, unsigned long start_byte, size_t srclen,
+ size_t destlen)
{
struct workspace *workspace = list_entry(ws, struct workspace, list);
ZSTD_DStream *stream;
@@ -712,15 +706,8 @@ finish:
}
const struct btrfs_compress_op btrfs_zstd_compress = {
- .init_workspace_manager = zstd_init_workspace_manager,
- .cleanup_workspace_manager = zstd_cleanup_workspace_manager,
- .get_workspace = zstd_get_workspace,
- .put_workspace = zstd_put_workspace,
- .alloc_workspace = zstd_alloc_workspace,
- .free_workspace = zstd_free_workspace,
- .compress_pages = zstd_compress_pages,
- .decompress_bio = zstd_decompress_bio,
- .decompress = zstd_decompress,
+ /* ZSTD uses own workspace manager */
+ .workspace_manager = NULL,
.max_level = ZSTD_BTRFS_MAX_LEVEL,
.default_level = ZSTD_BTRFS_DEFAULT_LEVEL,
};
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d3b9c9d5c1bd..f5a38910a82b 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1058,6 +1058,11 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
+ /* remove from inode's cap rbtree, and clear auth cap */
+ rb_erase(&cap->ci_node, &ci->i_caps);
+ if (ci->i_auth_cap == cap)
+ ci->i_auth_cap = NULL;
+
/* remove from session list */
spin_lock(&session->s_cap_lock);
if (session->s_cap_iterator == cap) {
@@ -1091,11 +1096,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
spin_unlock(&session->s_cap_lock);
- /* remove from inode list */
- rb_erase(&cap->ci_node, &ci->i_caps);
- if (ci->i_auth_cap == cap)
- ci->i_auth_cap = NULL;
-
if (removed)
ceph_put_cap(mdsc, cap);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 4ca0b8ff9a72..d17a789fd856 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1553,36 +1553,37 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
{
int valid = 0;
struct dentry *parent;
- struct inode *dir;
+ struct inode *dir, *inode;
if (flags & LOOKUP_RCU) {
parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
return -ECHILD;
+ inode = d_inode_rcu(dentry);
} else {
parent = dget_parent(dentry);
dir = d_inode(parent);
+ inode = d_inode(dentry);
}
dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry,
- dentry, d_inode(dentry), ceph_dentry(dentry)->offset);
+ dentry, inode, ceph_dentry(dentry)->offset);
/* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) {
dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
- dentry, d_inode(dentry));
+ dentry, inode);
valid = 1;
- } else if (d_really_is_positive(dentry) &&
- ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) {
+ } else if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
valid = 1;
} else {
valid = dentry_lease_is_valid(dentry, flags);
if (valid == -ECHILD)
return valid;
if (valid || dir_lease_is_valid(dir, dentry)) {
- if (d_really_is_positive(dentry))
- valid = ceph_is_any_caps(d_inode(dentry));
+ if (inode)
+ valid = ceph_is_any_caps(inode);
else
valid = 1;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d277f71abe0b..8de633964dc3 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -462,6 +462,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
err = ceph_security_init_secctx(dentry, mode, &as_ctx);
if (err < 0)
goto out_ctx;
+ } else if (!d_in_lookup(dentry)) {
+ /* If it's not being looked up, it's negative */
+ return -ENOENT;
}
/* do the open */
@@ -750,6 +753,9 @@ static void ceph_aio_complete(struct inode *inode,
if (!atomic_dec_and_test(&aio_req->pending_reqs))
return;
+ if (aio_req->iocb->ki_flags & IOCB_DIRECT)
+ inode_dio_end(inode);
+
ret = aio_req->error;
if (!ret)
ret = aio_req->total_len;
@@ -1088,6 +1094,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
CEPH_CAP_FILE_RD);
list_splice(&aio_req->osd_reqs, &osd_reqs);
+ inode_dio_begin(inode);
while (!list_empty(&osd_reqs)) {
req = list_first_entry(&osd_reqs,
struct ceph_osd_request,
@@ -1261,14 +1268,24 @@ again:
dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
+ if (iocb->ki_flags & IOCB_DIRECT)
+ ceph_start_io_direct(inode);
+ else
+ ceph_start_io_read(inode);
+
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1,
&got, &pinned_page);
- if (ret < 0)
+ if (ret < 0) {
+ if (iocb->ki_flags & IOCB_DIRECT)
+ ceph_end_io_direct(inode);
+ else
+ ceph_end_io_read(inode);
return ret;
+ }
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_flags & IOCB_DIRECT) ||
@@ -1280,16 +1297,12 @@ again:
if (ci->i_inline_version == CEPH_INLINE_NONE) {
if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
- ceph_start_io_direct(inode);
ret = ceph_direct_read_write(iocb, to,
NULL, NULL);
- ceph_end_io_direct(inode);
if (ret >= 0 && ret < len)
retry_op = CHECK_EOF;
} else {
- ceph_start_io_read(inode);
ret = ceph_sync_read(iocb, to, &retry_op);
- ceph_end_io_read(inode);
}
} else {
retry_op = READ_INLINE;
@@ -1300,11 +1313,10 @@ again:
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
ceph_cap_string(got));
ceph_add_rw_context(fi, &rw_ctx);
- ceph_start_io_read(inode);
ret = generic_file_read_iter(iocb, to);
- ceph_end_io_read(inode);
ceph_del_rw_context(fi, &rw_ctx);
}
+
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
if (pinned_page) {
@@ -1312,6 +1324,12 @@ again:
pinned_page = NULL;
}
ceph_put_cap_refs(ci, got);
+
+ if (iocb->ki_flags & IOCB_DIRECT)
+ ceph_end_io_direct(inode);
+ else
+ ceph_end_io_read(inode);
+
if (retry_op > HAVE_RETRIED && ret >= 0) {
int statret;
struct page *page = NULL;
@@ -1956,10 +1974,18 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
if (ceph_test_mount_opt(src_fsc, NOCOPYFROM))
return -EOPNOTSUPP;
+ /*
+ * Striped file layouts require that we copy partial objects, but the
+ * OSD copy-from operation only supports full-object copies. Limit
+ * this to non-striped file layouts for now.
+ */
if ((src_ci->i_layout.stripe_unit != dst_ci->i_layout.stripe_unit) ||
- (src_ci->i_layout.stripe_count != dst_ci->i_layout.stripe_count) ||
- (src_ci->i_layout.object_size != dst_ci->i_layout.object_size))
+ (src_ci->i_layout.stripe_count != 1) ||
+ (dst_ci->i_layout.stripe_count != 1) ||
+ (src_ci->i_layout.object_size != dst_ci->i_layout.object_size)) {
+ dout("Invalid src/dst files layout\n");
return -EOPNOTSUPP;
+ }
if (len < src_ci->i_layout.object_size)
return -EOPNOTSUPP; /* no remote copy will be done */
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 9f135624ae47..c07407586ce8 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1434,6 +1434,7 @@ retry_lookup:
dout(" final dn %p\n", dn);
} else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
req->r_op == CEPH_MDS_OP_MKSNAP) &&
+ test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
struct inode *dir = req->r_parent;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a8a8f84f3bbf..a5163296d9d9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -384,8 +384,8 @@ static int parse_reply_info_readdir(void **p, void *end,
}
done:
- if (*p != end)
- goto bad;
+ /* Skip over any unrecognized fields */
+ *p = end;
return 0;
bad:
@@ -406,12 +406,10 @@ static int parse_reply_info_filelock(void **p, void *end,
goto bad;
info->filelock_reply = *p;
- *p += sizeof(*info->filelock_reply);
- if (unlikely(*p != end))
- goto bad;
+ /* Skip over any unrecognized fields */
+ *p = end;
return 0;
-
bad:
return -EIO;
}
@@ -425,18 +423,21 @@ static int parse_reply_info_create(void **p, void *end,
{
if (features == (u64)-1 ||
(features & CEPH_FEATURE_REPLY_CREATE_INODE)) {
+ /* Malformed reply? */
if (*p == end) {
info->has_create_ino = false;
} else {
info->has_create_ino = true;
- info->ino = ceph_decode_64(p);
+ ceph_decode_64_safe(p, end, info->ino, bad);
}
+ } else {
+ if (*p != end)
+ goto bad;
}
- if (unlikely(*p != end))
- goto bad;
+ /* Skip over any unrecognized fields */
+ *p = end;
return 0;
-
bad:
return -EIO;
}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index edfd643a8205..b47f43fc2d68 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -268,6 +268,7 @@ static int parse_fsopt_token(char *c, void *private)
}
break;
case Opt_fscache_uniq:
+#ifdef CONFIG_CEPH_FSCACHE
kfree(fsopt->fscache_uniq);
fsopt->fscache_uniq = kstrndup(argstr[0].from,
argstr[0].to-argstr[0].from,
@@ -276,7 +277,10 @@ static int parse_fsopt_token(char *c, void *private)
return -ENOMEM;
fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
break;
- /* misc */
+#else
+ pr_err("fscache support is disabled\n");
+ return -EINVAL;
+#endif
case Opt_wsize:
if (intval < (int)PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE)
return -EINVAL;
@@ -353,10 +357,15 @@ static int parse_fsopt_token(char *c, void *private)
fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
break;
case Opt_fscache:
+#ifdef CONFIG_CEPH_FSCACHE
fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
kfree(fsopt->fscache_uniq);
fsopt->fscache_uniq = NULL;
break;
+#else
+ pr_err("fscache support is disabled\n");
+ return -EINVAL;
+#endif
case Opt_nofscache:
fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
kfree(fsopt->fscache_uniq);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2e9c7f493f99..1a135d1b85bd 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -169,18 +169,32 @@ cifs_read_super(struct super_block *sb)
else
sb->s_maxbytes = MAX_NON_LFS;
- /* BB FIXME fix time_gran to be larger for LANMAN sessions */
- sb->s_time_gran = 100;
-
- if (tcon->unix_ext) {
- ts = cifs_NTtimeToUnix(0);
+ /*
+ * Some very old servers like DOS and OS/2 used 2 second granularity
+ * (while all current servers use 100ns granularity - see MS-DTYP)
+ * but 1 second is the maximum allowed granularity for the VFS
+ * so for old servers set time granularity to 1 second while for
+ * everything else (current servers) set it to 100ns.
+ */
+ if ((tcon->ses->server->vals->protocol_id == SMB10_PROT_ID) &&
+ ((tcon->ses->capabilities &
+ tcon->ses->server->vals->cap_nt_find) == 0) &&
+ !tcon->unix_ext) {
+ sb->s_time_gran = 1000000000; /* 1 second is max allowed gran */
+ ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0);
sb->s_time_min = ts.tv_sec;
- ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX));
+ ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX),
+ cpu_to_le16(SMB_TIME_MAX), 0);
sb->s_time_max = ts.tv_sec;
} else {
- ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MIN), 0, 0);
+ /*
+ * Almost every server, including all SMB2+, uses DCE TIME
+ * ie 100 nanosecond units, since 1601. See MS-DTYP and MS-FSCC
+ */
+ sb->s_time_gran = 100;
+ ts = cifs_NTtimeToUnix(0);
sb->s_time_min = ts.tv_sec;
- ts = cnvrtDosUnixTm(cpu_to_le16(SMB_DATE_MAX), cpu_to_le16(SMB_TIME_MAX), 0);
+ ts = cifs_NTtimeToUnix(cpu_to_le64(S64_MAX));
sb->s_time_max = ts.tv_sec;
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 2e960e1049db..d78bfcc19156 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1210,7 +1210,7 @@ struct cifs_search_info {
bool smallBuf:1; /* so we know which buf_release function to call */
};
-#define ACL_NO_MODE -1
+#define ACL_NO_MODE ((umode_t)(-1))
struct cifs_open_parms {
struct cifs_tcon *tcon;
struct cifs_sb_info *cifs_sb;
@@ -1391,6 +1391,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
struct cifsInodeInfo {
bool can_cache_brlcks;
struct list_head llist; /* locks helb by this inode */
+ /*
+ * NOTE: Some code paths call down_read(lock_sem) twice, so
+ * we must always use use cifs_down_write() instead of down_write()
+ * for this semaphore to avoid deadlocks.
+ */
struct rw_semaphore lock_sem; /* protect the fields above */
/* BB add in lists for dirty pages i.e. write caching info for oplock */
struct list_head openFileList;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e53e9f62b87b..fe597d3d5208 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -170,6 +170,7 @@ extern int cifs_unlock_range(struct cifsFileInfo *cfile,
struct file_lock *flock, const unsigned int xid);
extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);
+extern void cifs_down_write(struct rw_semaphore *sem);
extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid,
struct file *file,
struct tcon_link *tlink,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2850c3ce4391..ccaa8bad336f 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -564,9 +564,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
spin_lock(&GlobalMid_Lock);
list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
+ kref_get(&mid_entry->refcount);
if (mid_entry->mid_state == MID_REQUEST_SUBMITTED)
mid_entry->mid_state = MID_RETRY_NEEDED;
list_move(&mid_entry->qhead, &retry_list);
+ mid_entry->mid_flags |= MID_DELETED;
}
spin_unlock(&GlobalMid_Lock);
mutex_unlock(&server->srv_mutex);
@@ -576,6 +578,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
list_del_init(&mid_entry->qhead);
mid_entry->callback(mid_entry);
+ cifs_mid_q_entry_release(mid_entry);
}
if (cifs_rdma_enabled(server)) {
@@ -895,8 +898,10 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
if (mid->mid_flags & MID_DELETED)
printk_once(KERN_WARNING
"trying to dequeue a deleted mid\n");
- else
+ else {
list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
spin_unlock(&GlobalMid_Lock);
}
@@ -966,8 +971,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid);
+ kref_get(&mid_entry->refcount);
mid_entry->mid_state = MID_SHUTDOWN;
list_move(&mid_entry->qhead, &dispose_list);
+ mid_entry->mid_flags |= MID_DELETED;
}
spin_unlock(&GlobalMid_Lock);
@@ -977,6 +984,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid);
list_del_init(&mid_entry->qhead);
mid_entry->callback(mid_entry);
+ cifs_mid_q_entry_release(mid_entry);
}
/* 1/8th of sec is more than enough time for them to exit */
msleep(125);
@@ -3882,8 +3890,12 @@ generic_ip_connect(struct TCP_Server_Info *server)
rc = socket->ops->connect(socket, saddr, slen,
server->noblockcnt ? O_NONBLOCK : 0);
-
- if (rc == -EINPROGRESS)
+ /*
+ * When mounting SMB root file systems, we do not want to block in
+ * connect. Otherwise bail out and then let cifs_reconnect() perform
+ * reconnect failover - if possible.
+ */
+ if (server->noblockcnt && rc == -EINPROGRESS)
rc = 0;
if (rc < 0) {
cifs_dbg(FYI, "Error %d connecting to server\n", rc);
@@ -4264,7 +4276,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
server->ops->qfs_tcon(*xid, tcon);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) {
if (tcon->fsDevInfo.DeviceCharacteristics &
- FILE_READ_ONLY_DEVICE)
+ cpu_to_le32(FILE_READ_ONLY_DEVICE))
cifs_dbg(VFS, "mounted to read only share\n");
else if ((cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_RW_CACHE) == 0)
@@ -4445,7 +4457,7 @@ static int setup_dfs_tgt_conn(const char *path,
int rc;
struct dfs_info3_param ref = {0};
char *mdata = NULL, *fake_devname = NULL;
- struct smb_vol fake_vol = {0};
+ struct smb_vol fake_vol = {NULL};
cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index dd5ac841aefa..7ce689d31aa2 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -738,10 +738,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
static int
cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
{
+ struct inode *inode;
+
if (flags & LOOKUP_RCU)
return -ECHILD;
if (d_really_is_positive(direntry)) {
+ inode = d_inode(direntry);
+ if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode)))
+ CIFS_I(inode)->time = 0; /* force reval */
+
if (cifs_revalidate_dentry(direntry))
return 0;
else {
@@ -752,7 +758,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
* attributes will have been updated by
* cifs_revalidate_dentry().
*/
- if (IS_AUTOMOUNT(d_inode(direntry)) &&
+ if (IS_AUTOMOUNT(inode) &&
!(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) {
spin_lock(&direntry->d_lock);
direntry->d_flags |= DCACHE_NEED_AUTOMOUNT;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 4b95700c507c..fa7b0fa72bb3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -253,6 +253,12 @@ cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
xid, fid);
+ if (rc) {
+ server->ops->close(xid, tcon, fid);
+ if (rc == -ESTALE)
+ rc = -EOPENSTALE;
+ }
+
out:
kfree(buf);
return rc;
@@ -275,6 +281,13 @@ cifs_has_mand_locks(struct cifsInodeInfo *cinode)
return has_locks;
}
+void
+cifs_down_write(struct rw_semaphore *sem)
+{
+ while (!down_write_trylock(sem))
+ msleep(10);
+}
+
struct cifsFileInfo *
cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct tcon_link *tlink, __u32 oplock)
@@ -300,7 +313,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
INIT_LIST_HEAD(&fdlocks->locks);
fdlocks->cfile = cfile;
cfile->llist = fdlocks;
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
list_add(&fdlocks->llist, &cinode->llist);
up_write(&cinode->lock_sem);
@@ -399,10 +412,11 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
bool oplock_break_cancelled;
spin_lock(&tcon->open_file_lock);
-
+ spin_lock(&cifsi->open_file_lock);
spin_lock(&cifs_file->file_info_lock);
if (--cifs_file->count > 0) {
spin_unlock(&cifs_file->file_info_lock);
+ spin_unlock(&cifsi->open_file_lock);
spin_unlock(&tcon->open_file_lock);
return;
}
@@ -415,9 +429,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
/* remove it from the lists */
- spin_lock(&cifsi->open_file_lock);
list_del(&cifs_file->flist);
- spin_unlock(&cifsi->open_file_lock);
list_del(&cifs_file->tlist);
atomic_dec(&tcon->num_local_opens);
@@ -434,6 +446,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
cifs_set_oplock_level(cifsi, 0);
}
+ spin_unlock(&cifsi->open_file_lock);
spin_unlock(&tcon->open_file_lock);
oplock_break_cancelled = wait_oplock_handler ?
@@ -458,7 +471,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
* Delete any outstanding lock records. We'll lose them when the file
* is closed anyway.
*/
- down_write(&cifsi->lock_sem);
+ cifs_down_write(&cifsi->lock_sem);
list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
list_del(&li->llist);
cifs_del_lock_waiters(li);
@@ -1021,7 +1034,7 @@ static void
cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
{
struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
list_add_tail(&lock->llist, &cfile->llist->locks);
up_write(&cinode->lock_sem);
}
@@ -1043,7 +1056,7 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
try_again:
exist = false;
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
lock->type, lock->flags, &conf_lock,
@@ -1066,7 +1079,7 @@ try_again:
(lock->blist.next == &lock->blist));
if (!rc)
goto try_again;
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
list_del_init(&lock->blist);
}
@@ -1119,7 +1132,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock)
return rc;
try_again:
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
if (!cinode->can_cache_brlcks) {
up_write(&cinode->lock_sem);
return rc;
@@ -1325,7 +1338,7 @@ cifs_push_locks(struct cifsFileInfo *cfile)
int rc = 0;
/* we are going to update can_cache_brlcks here - need a write access */
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
if (!cinode->can_cache_brlcks) {
up_write(&cinode->lock_sem);
return rc;
@@ -1516,7 +1529,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
if (!buf)
return -ENOMEM;
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
for (i = 0; i < 2; i++) {
cur = buf;
num = 0;
@@ -1840,13 +1853,12 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
{
struct cifsFileInfo *open_file = NULL;
struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
- struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
- spin_lock(&tcon->open_file_lock);
+ spin_lock(&cifs_inode->open_file_lock);
/* we could simply get the first_list_entry since write-only entries
are always at the end of the list but since the first entry might
have a close pending, we go through the whole list */
@@ -1858,7 +1870,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
/* found a good file */
/* lock it so it will not be closed on us */
cifsFileInfo_get(open_file);
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
return open_file;
} /* else might as well continue, and look for
another, or simply have the caller reopen it
@@ -1866,7 +1878,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
} else /* write only file */
break; /* write only files are last so must be done */
}
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
return NULL;
}
@@ -1877,7 +1889,6 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
{
struct cifsFileInfo *open_file, *inv_file = NULL;
struct cifs_sb_info *cifs_sb;
- struct cifs_tcon *tcon;
bool any_available = false;
int rc = -EBADF;
unsigned int refind = 0;
@@ -1897,16 +1908,15 @@ cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
}
cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
- tcon = cifs_sb_master_tcon(cifs_sb);
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
- spin_lock(&tcon->open_file_lock);
+ spin_lock(&cifs_inode->open_file_lock);
refind_writable:
if (refind > MAX_REOPEN_ATT) {
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
return rc;
}
list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
@@ -1918,7 +1928,7 @@ refind_writable:
if (!open_file->invalidHandle) {
/* found a good writable file */
cifsFileInfo_get(open_file);
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
*ret_file = open_file;
return 0;
} else {
@@ -1938,7 +1948,7 @@ refind_writable:
cifsFileInfo_get(inv_file);
}
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
if (inv_file) {
rc = cifs_reopen_file(inv_file, false);
@@ -1953,7 +1963,7 @@ refind_writable:
cifsFileInfo_put(inv_file);
++refind;
inv_file = NULL;
- spin_lock(&tcon->open_file_lock);
+ spin_lock(&cifs_inode->open_file_lock);
goto refind_writable;
}
@@ -4461,17 +4471,15 @@ static int cifs_readpage(struct file *file, struct page *page)
static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
{
struct cifsFileInfo *open_file;
- struct cifs_tcon *tcon =
- cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
- spin_lock(&tcon->open_file_lock);
+ spin_lock(&cifs_inode->open_file_lock);
list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
return 1;
}
}
- spin_unlock(&tcon->open_file_lock);
+ spin_unlock(&cifs_inode->open_file_lock);
return 0;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3bae2e53f0b8..df9377828e2f 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -414,6 +414,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
/* if uniqueid is different, return error */
if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) {
+ CIFS_I(*pinode)->time = 0; /* force reval */
rc = -ESTALE;
goto cgiiu_exit;
}
@@ -421,6 +422,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
/* if filetype is different, return error */
if (unlikely(((*pinode)->i_mode & S_IFMT) !=
(fattr.cf_mode & S_IFMT))) {
+ CIFS_I(*pinode)->time = 0; /* force reval */
rc = -ESTALE;
goto cgiiu_exit;
}
@@ -933,6 +935,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
/* if uniqueid is different, return error */
if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) {
+ CIFS_I(*inode)->time = 0; /* force reval */
rc = -ESTALE;
goto cgii_exit;
}
@@ -940,6 +943,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
/* if filetype is different, return error */
if (unlikely(((*inode)->i_mode & S_IFMT) !=
(fattr.cf_mode & S_IFMT))) {
+ CIFS_I(*inode)->time = 0; /* force reval */
rc = -ESTALE;
goto cgii_exit;
}
@@ -2471,9 +2475,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid);
cifsFileInfo_put(wfile);
if (rc)
- return rc;
+ goto cifs_setattr_exit;
} else if (rc != -EBADF)
- return rc;
+ goto cifs_setattr_exit;
else
rc = 0;
}
diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c
index 49c17ee18254..9b41436fb8db 100644
--- a/fs/cifs/netmisc.c
+++ b/fs/cifs/netmisc.c
@@ -117,10 +117,6 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = {
{0, 0}
};
-static const struct smb_to_posix_error mapping_table_ERRHRD[] = {
- {0, 0}
-};
-
/*
* Convert a string containing text IPv4 or IPv6 address to binary form.
*
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index b7421a096319..514810694c0f 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -171,6 +171,9 @@ cifs_get_next_mid(struct TCP_Server_Info *server)
/* we do not want to loop forever */
last_mid = cur_mid;
cur_mid++;
+ /* avoid 0xFFFF MID */
+ if (cur_mid == 0xffff)
+ cur_mid++;
/*
* This nested loop looks more expensive than it is.
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index e6a1fc72018f..8b0b512c5792 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -145,7 +145,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
cur = buf;
- down_write(&cinode->lock_sem);
+ cifs_down_write(&cinode->lock_sem);
list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
if (flock->fl_start > li->offset ||
(flock->fl_start + length) <
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 4c0922596467..cd55af9b7cc5 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -4084,6 +4084,7 @@ free_pages:
kfree(dw->ppages);
cifs_small_buf_release(dw->buf);
+ kfree(dw);
}
@@ -4157,7 +4158,7 @@ receive_encrypted_read(struct TCP_Server_Info *server, struct mid_q_entry **mid,
dw->server = server;
dw->ppages = pages;
dw->len = len;
- queue_work(cifsiod_wq, &dw->decrypt);
+ queue_work(decrypt_wq, &dw->decrypt);
*num_mids = 0; /* worker thread takes care of finding mid */
return -1;
}
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 85f9d614d968..05149862aea4 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -751,8 +751,8 @@ add_posix_context(struct kvec *iov, unsigned int *num_iovec, umode_t mode)
unsigned int num = *num_iovec;
iov[num].iov_base = create_posix_buf(mode);
- if (mode == -1)
- cifs_dbg(VFS, "illegal mode\n"); /* BB REMOVEME */
+ if (mode == ACL_NO_MODE)
+ cifs_dbg(FYI, "illegal mode\n");
if (iov[num].iov_base == NULL)
return -ENOMEM;
iov[num].iov_len = sizeof(struct create_posix);
@@ -2521,11 +2521,8 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock,
return rc;
}
- /* TODO: add handling for the mode on create */
- if (oparms->disposition == FILE_CREATE)
- cifs_dbg(VFS, "mode is 0x%x\n", oparms->mode); /* BB REMOVEME */
-
- if ((oparms->disposition == FILE_CREATE) && (oparms->mode != -1)) {
+ if ((oparms->disposition == FILE_CREATE) &&
+ (oparms->mode != ACL_NO_MODE)) {
if (n_iov > 2) {
struct create_context *ccontext =
(struct create_context *)iov[n_iov-1].iov_base;
@@ -3217,7 +3214,8 @@ SMB2_notify_init(const unsigned int xid, struct smb_rqst *rqst,
req->PersistentFileId = persistent_fid;
req->VolatileFileId = volatile_fid;
- req->OutputBufferLength = SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE;
+ req->OutputBufferLength =
+ cpu_to_le32(SMB2_MAX_BUFFER_SIZE - MAX_SMB2_HDR_SIZE);
req->CompletionFilter = cpu_to_le32(completion_filter);
if (watch_tree)
req->Flags = cpu_to_le16(SMB2_WATCH_TREE);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index ea735d59c36e..0abfde6d0b05 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -838,6 +838,7 @@ struct create_durable_handle_reconnect_v2 {
struct create_context ccontext;
__u8 Name[8];
struct durable_reconnect_context_v2 dcontext;
+ __u8 Pad[4];
} __packed;
/* See MS-SMB2 2.2.13.2.5 */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index da3a6d580808..71b2930b8e0b 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -150,6 +150,10 @@ extern int SMB2_ioctl_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
bool is_fsctl, char *in_data, u32 indatalen,
__u32 max_response_size);
extern void SMB2_ioctl_free(struct smb_rqst *rqst);
+extern int SMB2_change_notify(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, bool watch_tree,
+ u32 completion_filter);
+
extern int SMB2_close(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id);
extern int SMB2_close_flags(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 308ad0f495e1..ca3de62688d6 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -86,22 +86,8 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct TCP_Server_Info *server)
static void _cifs_mid_q_entry_release(struct kref *refcount)
{
- struct mid_q_entry *mid = container_of(refcount, struct mid_q_entry,
- refcount);
-
- mempool_free(mid, cifs_mid_poolp);
-}
-
-void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
-{
- spin_lock(&GlobalMid_Lock);
- kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
- spin_unlock(&GlobalMid_Lock);
-}
-
-void
-DeleteMidQEntry(struct mid_q_entry *midEntry)
-{
+ struct mid_q_entry *midEntry =
+ container_of(refcount, struct mid_q_entry, refcount);
#ifdef CONFIG_CIFS_STATS2
__le16 command = midEntry->server->vals->lock_cmd;
__u16 smb_cmd = le16_to_cpu(midEntry->command);
@@ -166,6 +152,19 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
}
}
#endif
+
+ mempool_free(midEntry, cifs_mid_poolp);
+}
+
+void cifs_mid_q_entry_release(struct mid_q_entry *midEntry)
+{
+ spin_lock(&GlobalMid_Lock);
+ kref_put(&midEntry->refcount, _cifs_mid_q_entry_release);
+ spin_unlock(&GlobalMid_Lock);
+}
+
+void DeleteMidQEntry(struct mid_q_entry *midEntry)
+{
cifs_mid_q_entry_release(midEntry);
}
@@ -173,8 +172,10 @@ void
cifs_delete_mid(struct mid_q_entry *mid)
{
spin_lock(&GlobalMid_Lock);
- list_del_init(&mid->qhead);
- mid->mid_flags |= MID_DELETED;
+ if (!(mid->mid_flags & MID_DELETED)) {
+ list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
spin_unlock(&GlobalMid_Lock);
DeleteMidQEntry(mid);
@@ -872,7 +873,10 @@ cifs_sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
rc = -EHOSTDOWN;
break;
default:
- list_del_init(&mid->qhead);
+ if (!(mid->mid_flags & MID_DELETED)) {
+ list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
cifs_server_dbg(VFS, "%s: invalid mid state mid=%llu state=%d\n",
__func__, mid->mid, mid->mid_state);
rc = -EIO;
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index dc5dbf6a81d7..cb61467478ca 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -101,7 +101,7 @@ static int create_link(struct config_item *parent_item,
}
target_sd->s_links++;
spin_unlock(&configfs_dirent_lock);
- ret = configfs_get_target_path(item, item, body);
+ ret = configfs_get_target_path(parent_item, item, body);
if (!ret)
ret = configfs_create_link(target_sd, parent_item->ci_dentry,
dentry, body);
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index d12ea28836a5..2f04024c3588 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -958,8 +958,8 @@ static int cramfs_get_tree(struct fs_context *fc)
if (IS_ENABLED(CONFIG_CRAMFS_MTD)) {
ret = get_tree_mtd(fc, cramfs_mtd_fill_super);
- if (ret < 0)
- return ret;
+ if (!ret)
+ return 0;
}
if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV))
ret = get_tree_bdev(fc, cramfs_blkdev_fill_super);
diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c
index 82da2510721f..1f4b8a277060 100644
--- a/fs/crypto/bio.c
+++ b/fs/crypto/bio.c
@@ -26,7 +26,7 @@
#include <linux/namei.h>
#include "fscrypt_private.h"
-static void __fscrypt_decrypt_bio(struct bio *bio, bool done)
+void fscrypt_decrypt_bio(struct bio *bio)
{
struct bio_vec *bv;
struct bvec_iter_all iter_all;
@@ -37,37 +37,10 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done)
bv->bv_offset);
if (ret)
SetPageError(page);
- else if (done)
- SetPageUptodate(page);
- if (done)
- unlock_page(page);
}
}
-
-void fscrypt_decrypt_bio(struct bio *bio)
-{
- __fscrypt_decrypt_bio(bio, false);
-}
EXPORT_SYMBOL(fscrypt_decrypt_bio);
-static void completion_pages(struct work_struct *work)
-{
- struct fscrypt_ctx *ctx = container_of(work, struct fscrypt_ctx, work);
- struct bio *bio = ctx->bio;
-
- __fscrypt_decrypt_bio(bio, true);
- fscrypt_release_ctx(ctx);
- bio_put(bio);
-}
-
-void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio)
-{
- INIT_WORK(&ctx->work, completion_pages);
- ctx->bio = bio;
- fscrypt_enqueue_decrypt_work(&ctx->work);
-}
-EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio);
-
int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
index 32a7ad0098cc..3719efa546c6 100644
--- a/fs/crypto/crypto.c
+++ b/fs/crypto/crypto.c
@@ -27,29 +27,20 @@
#include <linux/ratelimit.h>
#include <linux/dcache.h>
#include <linux/namei.h>
-#include <crypto/aes.h>
#include <crypto/skcipher.h>
#include "fscrypt_private.h"
static unsigned int num_prealloc_crypto_pages = 32;
-static unsigned int num_prealloc_crypto_ctxs = 128;
module_param(num_prealloc_crypto_pages, uint, 0444);
MODULE_PARM_DESC(num_prealloc_crypto_pages,
"Number of crypto pages to preallocate");
-module_param(num_prealloc_crypto_ctxs, uint, 0444);
-MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
- "Number of crypto contexts to preallocate");
static mempool_t *fscrypt_bounce_page_pool = NULL;
-static LIST_HEAD(fscrypt_free_ctxs);
-static DEFINE_SPINLOCK(fscrypt_ctx_lock);
-
static struct workqueue_struct *fscrypt_read_workqueue;
static DEFINE_MUTEX(fscrypt_init_mutex);
-static struct kmem_cache *fscrypt_ctx_cachep;
struct kmem_cache *fscrypt_info_cachep;
void fscrypt_enqueue_decrypt_work(struct work_struct *work)
@@ -58,62 +49,6 @@ void fscrypt_enqueue_decrypt_work(struct work_struct *work)
}
EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work);
-/**
- * fscrypt_release_ctx() - Release a decryption context
- * @ctx: The decryption context to release.
- *
- * If the decryption context was allocated from the pre-allocated pool, return
- * it to that pool. Else, free it.
- */
-void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
-{
- unsigned long flags;
-
- if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
- kmem_cache_free(fscrypt_ctx_cachep, ctx);
- } else {
- spin_lock_irqsave(&fscrypt_ctx_lock, flags);
- list_add(&ctx->free_list, &fscrypt_free_ctxs);
- spin_unlock_irqrestore(&fscrypt_ctx_lock, flags);
- }
-}
-EXPORT_SYMBOL(fscrypt_release_ctx);
-
-/**
- * fscrypt_get_ctx() - Get a decryption context
- * @gfp_flags: The gfp flag for memory allocation
- *
- * Allocate and initialize a decryption context.
- *
- * Return: A new decryption context on success; an ERR_PTR() otherwise.
- */
-struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
-{
- struct fscrypt_ctx *ctx;
- unsigned long flags;
-
- /*
- * First try getting a ctx from the free list so that we don't have to
- * call into the slab allocator.
- */
- spin_lock_irqsave(&fscrypt_ctx_lock, flags);
- ctx = list_first_entry_or_null(&fscrypt_free_ctxs,
- struct fscrypt_ctx, free_list);
- if (ctx)
- list_del(&ctx->free_list);
- spin_unlock_irqrestore(&fscrypt_ctx_lock, flags);
- if (!ctx) {
- ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, gfp_flags);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
- ctx->flags |= FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
- } else {
- ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
- }
- return ctx;
-}
-EXPORT_SYMBOL(fscrypt_get_ctx);
-
struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags)
{
return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
@@ -138,14 +73,17 @@ EXPORT_SYMBOL(fscrypt_free_bounce_page);
void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num,
const struct fscrypt_info *ci)
{
+ u8 flags = fscrypt_policy_flags(&ci->ci_policy);
+
memset(iv, 0, ci->ci_mode->ivsize);
- iv->lblk_num = cpu_to_le64(lblk_num);
- if (fscrypt_is_direct_key_policy(&ci->ci_policy))
+ if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) {
+ WARN_ON_ONCE((u32)lblk_num != lblk_num);
+ lblk_num |= (u64)ci->ci_inode->i_ino << 32;
+ } else if (flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) {
memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE);
-
- if (ci->ci_essiv_tfm != NULL)
- crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw);
+ }
+ iv->lblk_num = cpu_to_le64(lblk_num);
}
/* Encrypt or decrypt a single filesystem block of file contents */
@@ -396,17 +334,6 @@ const struct dentry_operations fscrypt_d_ops = {
.d_revalidate = fscrypt_d_revalidate,
};
-static void fscrypt_destroy(void)
-{
- struct fscrypt_ctx *pos, *n;
-
- list_for_each_entry_safe(pos, n, &fscrypt_free_ctxs, free_list)
- kmem_cache_free(fscrypt_ctx_cachep, pos);
- INIT_LIST_HEAD(&fscrypt_free_ctxs);
- mempool_destroy(fscrypt_bounce_page_pool);
- fscrypt_bounce_page_pool = NULL;
-}
-
/**
* fscrypt_initialize() - allocate major buffers for fs encryption.
* @cop_flags: fscrypt operations flags
@@ -414,11 +341,11 @@ static void fscrypt_destroy(void)
* We only call this when we start accessing encrypted files, since it
* results in memory getting allocated that wouldn't otherwise be used.
*
- * Return: Zero on success, non-zero otherwise.
+ * Return: 0 on success; -errno on failure
*/
int fscrypt_initialize(unsigned int cop_flags)
{
- int i, res = -ENOMEM;
+ int err = 0;
/* No need to allocate a bounce page pool if this FS won't use it. */
if (cop_flags & FS_CFLG_OWN_PAGES)
@@ -426,29 +353,18 @@ int fscrypt_initialize(unsigned int cop_flags)
mutex_lock(&fscrypt_init_mutex);
if (fscrypt_bounce_page_pool)
- goto already_initialized;
-
- for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
- struct fscrypt_ctx *ctx;
-
- ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, GFP_NOFS);
- if (!ctx)
- goto fail;
- list_add(&ctx->free_list, &fscrypt_free_ctxs);
- }
+ goto out_unlock;
+ err = -ENOMEM;
fscrypt_bounce_page_pool =
mempool_create_page_pool(num_prealloc_crypto_pages, 0);
if (!fscrypt_bounce_page_pool)
- goto fail;
+ goto out_unlock;
-already_initialized:
- mutex_unlock(&fscrypt_init_mutex);
- return 0;
-fail:
- fscrypt_destroy();
+ err = 0;
+out_unlock:
mutex_unlock(&fscrypt_init_mutex);
- return res;
+ return err;
}
void fscrypt_msg(const struct inode *inode, const char *level,
@@ -494,13 +410,9 @@ static int __init fscrypt_init(void)
if (!fscrypt_read_workqueue)
goto fail;
- fscrypt_ctx_cachep = KMEM_CACHE(fscrypt_ctx, SLAB_RECLAIM_ACCOUNT);
- if (!fscrypt_ctx_cachep)
- goto fail_free_queue;
-
fscrypt_info_cachep = KMEM_CACHE(fscrypt_info, SLAB_RECLAIM_ACCOUNT);
if (!fscrypt_info_cachep)
- goto fail_free_ctx;
+ goto fail_free_queue;
err = fscrypt_init_keyring();
if (err)
@@ -510,8 +422,6 @@ static int __init fscrypt_init(void)
fail_free_info:
kmem_cache_destroy(fscrypt_info_cachep);
-fail_free_ctx:
- kmem_cache_destroy(fscrypt_ctx_cachep);
fail_free_queue:
destroy_workqueue(fscrypt_read_workqueue);
fail:
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index e84efc01512e..130b50e5a011 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -163,11 +163,8 @@ struct fscrypt_info {
/* The actual crypto transform used for encryption and decryption */
struct crypto_skcipher *ci_ctfm;
- /*
- * Cipher for ESSIV IV generation. Only set for CBC contents
- * encryption, otherwise is NULL.
- */
- struct crypto_cipher *ci_essiv_tfm;
+ /* True if the key should be freed when this fscrypt_info is freed */
+ bool ci_owns_key;
/*
* Encryption mode used for this inode. It corresponds to either the
@@ -209,8 +206,6 @@ typedef enum {
FS_ENCRYPT,
} fscrypt_direction_t;
-#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
-
static inline bool fscrypt_valid_enc_modes(u32 contents_mode,
u32 filenames_mode)
{
@@ -289,7 +284,8 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key,
*/
#define HKDF_CONTEXT_KEY_IDENTIFIER 1
#define HKDF_CONTEXT_PER_FILE_KEY 2
-#define HKDF_CONTEXT_PER_MODE_KEY 3
+#define HKDF_CONTEXT_DIRECT_KEY 3
+#define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4
extern int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context,
const u8 *info, unsigned int infolen,
@@ -386,8 +382,14 @@ struct fscrypt_master_key {
struct list_head mk_decrypted_inodes;
spinlock_t mk_decrypted_inodes_lock;
- /* Per-mode tfms for DIRECT_KEY policies, allocated on-demand */
- struct crypto_skcipher *mk_mode_keys[__FSCRYPT_MODE_MAX + 1];
+ /* Crypto API transforms for DIRECT_KEY policies, allocated on-demand */
+ struct crypto_skcipher *mk_direct_tfms[__FSCRYPT_MODE_MAX + 1];
+
+ /*
+ * Crypto API transforms for filesystem-layer implementation of
+ * IV_INO_LBLK_64 policies, allocated on-demand.
+ */
+ struct crypto_skcipher *mk_iv_ino_lblk_64_tfms[__FSCRYPT_MODE_MAX + 1];
} __randomize_layout;
@@ -443,8 +445,7 @@ struct fscrypt_mode {
const char *cipher_str;
int keysize;
int ivsize;
- bool logged_impl_name;
- bool needs_essiv;
+ int logged_impl_name;
};
static inline bool
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index c34fa7c61b43..040df1f5e1c8 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -43,8 +43,10 @@ static void free_master_key(struct fscrypt_master_key *mk)
wipe_master_key_secret(&mk->mk_secret);
- for (i = 0; i < ARRAY_SIZE(mk->mk_mode_keys); i++)
- crypto_free_skcipher(mk->mk_mode_keys[i]);
+ for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) {
+ crypto_free_skcipher(mk->mk_direct_tfms[i]);
+ crypto_free_skcipher(mk->mk_iv_ino_lblk_64_tfms[i]);
+ }
key_put(mk->mk_users);
kzfree(mk);
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index d71c2d6dd162..f577bb6613f9 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -8,15 +8,11 @@
* Heavily modified since then.
*/
-#include <crypto/aes.h>
-#include <crypto/sha.h>
#include <crypto/skcipher.h>
#include <linux/key.h>
#include "fscrypt_private.h"
-static struct crypto_shash *essiv_hash_tfm;
-
static struct fscrypt_mode available_modes[] = {
[FSCRYPT_MODE_AES_256_XTS] = {
.friendly_name = "AES-256-XTS",
@@ -31,11 +27,10 @@ static struct fscrypt_mode available_modes[] = {
.ivsize = 16,
},
[FSCRYPT_MODE_AES_128_CBC] = {
- .friendly_name = "AES-128-CBC",
- .cipher_str = "cbc(aes)",
+ .friendly_name = "AES-128-CBC-ESSIV",
+ .cipher_str = "essiv(cbc(aes),sha256)",
.keysize = 16,
.ivsize = 16,
- .needs_essiv = true,
},
[FSCRYPT_MODE_AES_128_CTS] = {
.friendly_name = "AES-128-CTS-CBC",
@@ -86,15 +81,13 @@ struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode,
mode->cipher_str, PTR_ERR(tfm));
return tfm;
}
- if (unlikely(!mode->logged_impl_name)) {
+ if (!xchg(&mode->logged_impl_name, 1)) {
/*
* fscrypt performance can vary greatly depending on which
* crypto algorithm implementation is used. Help people debug
* performance problems by logging the ->cra_driver_name the
- * first time a mode is used. Note that multiple threads can
- * race here, but it doesn't really matter.
+ * first time a mode is used.
*/
- mode->logged_impl_name = true;
pr_info("fscrypt: %s using implementation \"%s\"\n",
mode->friendly_name,
crypto_skcipher_alg(tfm)->base.cra_driver_name);
@@ -111,131 +104,64 @@ err_free_tfm:
return ERR_PTR(err);
}
-static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt)
-{
- struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm);
-
- /* init hash transform on demand */
- if (unlikely(!tfm)) {
- struct crypto_shash *prev_tfm;
-
- tfm = crypto_alloc_shash("sha256", 0, 0);
- if (IS_ERR(tfm)) {
- if (PTR_ERR(tfm) == -ENOENT) {
- fscrypt_warn(NULL,
- "Missing crypto API support for SHA-256");
- return -ENOPKG;
- }
- fscrypt_err(NULL,
- "Error allocating SHA-256 transform: %ld",
- PTR_ERR(tfm));
- return PTR_ERR(tfm);
- }
- prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm);
- if (prev_tfm) {
- crypto_free_shash(tfm);
- tfm = prev_tfm;
- }
- }
-
- {
- SHASH_DESC_ON_STACK(desc, tfm);
- desc->tfm = tfm;
-
- return crypto_shash_digest(desc, key, keysize, salt);
- }
-}
-
-static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key,
- int keysize)
-{
- int err;
- struct crypto_cipher *essiv_tfm;
- u8 salt[SHA256_DIGEST_SIZE];
-
- if (WARN_ON(ci->ci_mode->ivsize != AES_BLOCK_SIZE))
- return -EINVAL;
-
- essiv_tfm = crypto_alloc_cipher("aes", 0, 0);
- if (IS_ERR(essiv_tfm))
- return PTR_ERR(essiv_tfm);
-
- ci->ci_essiv_tfm = essiv_tfm;
-
- err = derive_essiv_salt(raw_key, keysize, salt);
- if (err)
- goto out;
-
- /*
- * Using SHA256 to derive the salt/key will result in AES-256 being
- * used for IV generation. File contents encryption will still use the
- * configured keysize (AES-128) nevertheless.
- */
- err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt));
- if (err)
- goto out;
-
-out:
- memzero_explicit(salt, sizeof(salt));
- return err;
-}
-
-/* Given the per-file key, set up the file's crypto transform object(s) */
+/* Given the per-file key, set up the file's crypto transform object */
int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key)
{
- struct fscrypt_mode *mode = ci->ci_mode;
- struct crypto_skcipher *ctfm;
- int err;
-
- ctfm = fscrypt_allocate_skcipher(mode, derived_key, ci->ci_inode);
- if (IS_ERR(ctfm))
- return PTR_ERR(ctfm);
+ struct crypto_skcipher *tfm;
- ci->ci_ctfm = ctfm;
+ tfm = fscrypt_allocate_skcipher(ci->ci_mode, derived_key, ci->ci_inode);
+ if (IS_ERR(tfm))
+ return PTR_ERR(tfm);
- if (mode->needs_essiv) {
- err = init_essiv_generator(ci, derived_key, mode->keysize);
- if (err) {
- fscrypt_warn(ci->ci_inode,
- "Error initializing ESSIV generator: %d",
- err);
- return err;
- }
- }
+ ci->ci_ctfm = tfm;
+ ci->ci_owns_key = true;
return 0;
}
static int setup_per_mode_key(struct fscrypt_info *ci,
- struct fscrypt_master_key *mk)
+ struct fscrypt_master_key *mk,
+ struct crypto_skcipher **tfms,
+ u8 hkdf_context, bool include_fs_uuid)
{
+ const struct inode *inode = ci->ci_inode;
+ const struct super_block *sb = inode->i_sb;
struct fscrypt_mode *mode = ci->ci_mode;
u8 mode_num = mode - available_modes;
struct crypto_skcipher *tfm, *prev_tfm;
u8 mode_key[FSCRYPT_MAX_KEY_SIZE];
+ u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)];
+ unsigned int hkdf_infolen = 0;
int err;
- if (WARN_ON(mode_num >= ARRAY_SIZE(mk->mk_mode_keys)))
+ if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX))
return -EINVAL;
/* pairs with cmpxchg() below */
- tfm = READ_ONCE(mk->mk_mode_keys[mode_num]);
+ tfm = READ_ONCE(tfms[mode_num]);
if (likely(tfm != NULL))
goto done;
BUILD_BUG_ON(sizeof(mode_num) != 1);
+ BUILD_BUG_ON(sizeof(sb->s_uuid) != 16);
+ BUILD_BUG_ON(sizeof(hkdf_info) != 17);
+ hkdf_info[hkdf_infolen++] = mode_num;
+ if (include_fs_uuid) {
+ memcpy(&hkdf_info[hkdf_infolen], &sb->s_uuid,
+ sizeof(sb->s_uuid));
+ hkdf_infolen += sizeof(sb->s_uuid);
+ }
err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf,
- HKDF_CONTEXT_PER_MODE_KEY,
- &mode_num, sizeof(mode_num),
+ hkdf_context, hkdf_info, hkdf_infolen,
mode_key, mode->keysize);
if (err)
return err;
- tfm = fscrypt_allocate_skcipher(mode, mode_key, ci->ci_inode);
+ tfm = fscrypt_allocate_skcipher(mode, mode_key, inode);
memzero_explicit(mode_key, mode->keysize);
if (IS_ERR(tfm))
return PTR_ERR(tfm);
/* pairs with READ_ONCE() above */
- prev_tfm = cmpxchg(&mk->mk_mode_keys[mode_num], NULL, tfm);
+ prev_tfm = cmpxchg(&tfms[mode_num], NULL, tfm);
if (prev_tfm != NULL) {
crypto_free_skcipher(tfm);
tfm = prev_tfm;
@@ -266,7 +192,19 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci,
ci->ci_mode->friendly_name);
return -EINVAL;
}
- return setup_per_mode_key(ci, mk);
+ return setup_per_mode_key(ci, mk, mk->mk_direct_tfms,
+ HKDF_CONTEXT_DIRECT_KEY, false);
+ } else if (ci->ci_policy.v2.flags &
+ FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) {
+ /*
+ * IV_INO_LBLK_64: encryption keys are derived from (master_key,
+ * mode_num, filesystem_uuid), and inode number is included in
+ * the IVs. This format is optimized for use with inline
+ * encryption hardware compliant with the UFS or eMMC standards.
+ */
+ return setup_per_mode_key(ci, mk, mk->mk_iv_ino_lblk_64_tfms,
+ HKDF_CONTEXT_IV_INO_LBLK_64_KEY,
+ true);
}
err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf,
@@ -388,13 +326,10 @@ static void put_crypt_info(struct fscrypt_info *ci)
if (!ci)
return;
- if (ci->ci_direct_key) {
+ if (ci->ci_direct_key)
fscrypt_put_direct_key(ci->ci_direct_key);
- } else if ((ci->ci_ctfm != NULL || ci->ci_essiv_tfm != NULL) &&
- !fscrypt_is_direct_key_policy(&ci->ci_policy)) {
+ else if (ci->ci_owns_key)
crypto_free_skcipher(ci->ci_ctfm);
- crypto_free_cipher(ci->ci_essiv_tfm);
- }
key = ci->ci_master_key;
if (key) {
@@ -415,6 +350,7 @@ static void put_crypt_info(struct fscrypt_info *ci)
key_invalidate(key);
key_put(key);
}
+ memzero_explicit(ci, sizeof(*ci));
kmem_cache_free(fscrypt_info_cachep, ci);
}
diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c
index ad1a36c370c3..5298ef22aa85 100644
--- a/fs/crypto/keysetup_v1.c
+++ b/fs/crypto/keysetup_v1.c
@@ -270,10 +270,6 @@ static int setup_v1_file_key_direct(struct fscrypt_info *ci,
return -EINVAL;
}
- /* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */
- if (WARN_ON(mode->needs_essiv))
- return -EINVAL;
-
dk = fscrypt_get_direct_key(ci, raw_master_key);
if (IS_ERR(dk))
return PTR_ERR(dk);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 4072ba644595..96f528071bed 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -29,6 +29,40 @@ bool fscrypt_policies_equal(const union fscrypt_policy *policy1,
return !memcmp(policy1, policy2, fscrypt_policy_size(policy1));
}
+static bool supported_iv_ino_lblk_64_policy(
+ const struct fscrypt_policy_v2 *policy,
+ const struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+ int ino_bits = 64, lblk_bits = 64;
+
+ if (policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) {
+ fscrypt_warn(inode,
+ "The DIRECT_KEY and IV_INO_LBLK_64 flags are mutually exclusive");
+ return false;
+ }
+ /*
+ * It's unsafe to include inode numbers in the IVs if the filesystem can
+ * potentially renumber inodes, e.g. via filesystem shrinking.
+ */
+ if (!sb->s_cop->has_stable_inodes ||
+ !sb->s_cop->has_stable_inodes(sb)) {
+ fscrypt_warn(inode,
+ "Can't use IV_INO_LBLK_64 policy on filesystem '%s' because it doesn't have stable inode numbers",
+ sb->s_id);
+ return false;
+ }
+ if (sb->s_cop->get_ino_and_lblk_bits)
+ sb->s_cop->get_ino_and_lblk_bits(sb, &ino_bits, &lblk_bits);
+ if (ino_bits > 32 || lblk_bits > 32) {
+ fscrypt_warn(inode,
+ "Can't use IV_INO_LBLK_64 policy on filesystem '%s' because it doesn't use 32-bit inode and block numbers",
+ sb->s_id);
+ return false;
+ }
+ return true;
+}
+
/**
* fscrypt_supported_policy - check whether an encryption policy is supported
*
@@ -55,7 +89,8 @@ bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
return false;
}
- if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
+ if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK |
+ FSCRYPT_POLICY_FLAG_DIRECT_KEY)) {
fscrypt_warn(inode,
"Unsupported encryption flags (0x%02x)",
policy->flags);
@@ -83,6 +118,10 @@ bool fscrypt_supported_policy(const union fscrypt_policy *policy_u,
return false;
}
+ if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) &&
+ !supported_iv_ino_lblk_64_policy(policy, inode))
+ return false;
+
if (memchr_inv(policy->__reserved, 0,
sizeof(policy->__reserved))) {
fscrypt_warn(inode,
diff --git a/fs/dax.c b/fs/dax.c
index 6bf81f931de3..2cc43cd914eb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -220,10 +220,11 @@ static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
for (;;) {
entry = xas_find_conflict(xas);
+ if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
+ return entry;
if (dax_entry_order(entry) < order)
return XA_RETRY_ENTRY;
- if (!entry || WARN_ON_ONCE(!xa_is_value(entry)) ||
- !dax_is_locked(entry))
+ if (!dax_is_locked(entry))
return entry;
wq = dax_entry_waitqueue(xas, entry, &ewait.key);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index ae196784f487..9329ced91f1d 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -241,9 +241,8 @@ void dio_warn_stale_pagecache(struct file *filp)
}
}
-/**
+/*
* dio_complete() - called when all DIO BIO I/O has been completed
- * @offset: the byte offset in the file of the completed operation
*
* This drops i_dio_count, lets interested parties know that a DIO operation
* has completed, and calculates the resulting return code for the operation.
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 18426f4855f1..e23752d9a79f 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -128,13 +128,20 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
struct inode *inode)
{
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
struct dentry *lower_dir_dentry;
+ struct inode *lower_dir_inode;
int rc;
- dget(lower_dentry);
- lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
+ lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
+ lower_dir_inode = d_inode(lower_dir_dentry);
+ inode_lock_nested(lower_dir_inode, I_MUTEX_PARENT);
+ dget(lower_dentry); // don't even try to make the lower negative
+ if (lower_dentry->d_parent != lower_dir_dentry)
+ rc = -EINVAL;
+ else if (d_unhashed(lower_dentry))
+ rc = -EINVAL;
+ else
+ rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
if (rc) {
printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
goto out_unlock;
@@ -142,10 +149,11 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
fsstack_copy_attr_times(dir, lower_dir_inode);
set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink);
inode->i_ctime = dir->i_ctime;
- d_drop(dentry);
out_unlock:
- unlock_dir(lower_dir_dentry);
dput(lower_dentry);
+ inode_unlock(lower_dir_inode);
+ if (!rc)
+ d_drop(dentry);
return rc;
}
@@ -311,9 +319,9 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode)
static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
struct dentry *lower_dentry)
{
- struct inode *inode, *lower_inode = d_inode(lower_dentry);
+ struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent);
+ struct inode *inode, *lower_inode;
struct ecryptfs_dentry_info *dentry_info;
- struct vfsmount *lower_mnt;
int rc = 0;
dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
@@ -322,16 +330,23 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
return ERR_PTR(-ENOMEM);
}
- lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
fsstack_copy_attr_atime(d_inode(dentry->d_parent),
- d_inode(lower_dentry->d_parent));
+ d_inode(path->dentry));
BUG_ON(!d_count(lower_dentry));
ecryptfs_set_dentry_private(dentry, dentry_info);
- dentry_info->lower_path.mnt = lower_mnt;
+ dentry_info->lower_path.mnt = mntget(path->mnt);
dentry_info->lower_path.dentry = lower_dentry;
- if (d_really_is_negative(lower_dentry)) {
+ /*
+ * negative dentry can go positive under us here - its parent is not
+ * locked. That's OK and that could happen just as we return from
+ * ecryptfs_lookup() anyway. Just need to be careful and fetch
+ * ->d_inode only once - it's not stable here.
+ */
+ lower_inode = READ_ONCE(lower_dentry->d_inode);
+
+ if (!lower_inode) {
/* We want to add because we couldn't find in lower */
d_add(dentry, NULL);
return NULL;
@@ -512,22 +527,30 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct dentry *lower_dentry;
struct dentry *lower_dir_dentry;
+ struct inode *lower_dir_inode;
int rc;
lower_dentry = ecryptfs_dentry_to_lower(dentry);
- dget(dentry);
- lower_dir_dentry = lock_parent(lower_dentry);
- dget(lower_dentry);
- rc = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry);
- dput(lower_dentry);
- if (!rc && d_really_is_positive(dentry))
+ lower_dir_dentry = ecryptfs_dentry_to_lower(dentry->d_parent);
+ lower_dir_inode = d_inode(lower_dir_dentry);
+
+ inode_lock_nested(lower_dir_inode, I_MUTEX_PARENT);
+ dget(lower_dentry); // don't even try to make the lower negative
+ if (lower_dentry->d_parent != lower_dir_dentry)
+ rc = -EINVAL;
+ else if (d_unhashed(lower_dentry))
+ rc = -EINVAL;
+ else
+ rc = vfs_rmdir(lower_dir_inode, lower_dentry);
+ if (!rc) {
clear_nlink(d_inode(dentry));
- fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
- set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
- unlock_dir(lower_dir_dentry);
+ fsstack_copy_attr_times(dir, lower_dir_inode);
+ set_nlink(dir, lower_dir_inode->i_nlink);
+ }
+ dput(lower_dentry);
+ inode_unlock(lower_dir_inode);
if (!rc)
d_drop(dentry);
- dput(dentry);
return rc;
}
@@ -565,20 +588,22 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct dentry *lower_new_dentry;
struct dentry *lower_old_dir_dentry;
struct dentry *lower_new_dir_dentry;
- struct dentry *trap = NULL;
+ struct dentry *trap;
struct inode *target_inode;
if (flags)
return -EINVAL;
+ lower_old_dir_dentry = ecryptfs_dentry_to_lower(old_dentry->d_parent);
+ lower_new_dir_dentry = ecryptfs_dentry_to_lower(new_dentry->d_parent);
+
lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
- dget(lower_old_dentry);
- dget(lower_new_dentry);
- lower_old_dir_dentry = dget_parent(lower_old_dentry);
- lower_new_dir_dentry = dget_parent(lower_new_dentry);
+
target_inode = d_inode(new_dentry);
+
trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ dget(lower_new_dentry);
rc = -EINVAL;
if (lower_old_dentry->d_parent != lower_old_dir_dentry)
goto out_lock;
@@ -606,11 +631,8 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_dir != old_dir)
fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry));
out_lock:
- unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
- dput(lower_new_dir_dentry);
- dput(lower_old_dir_dentry);
dput(lower_new_dentry);
- dput(lower_old_dentry);
+ unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
return rc;
}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 09bc68708d28..2dd55b172d57 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -519,26 +519,33 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
* inode is actually connected to the parent.
*/
err = exportfs_get_name(mnt, target_dir, nbuf, result);
- if (!err) {
- inode_lock(target_dir->d_inode);
- nresult = lookup_one_len(nbuf, target_dir,
- strlen(nbuf));
- inode_unlock(target_dir->d_inode);
- if (!IS_ERR(nresult)) {
- if (nresult->d_inode) {
- dput(result);
- result = nresult;
- } else
- dput(nresult);
- }
+ if (err) {
+ dput(target_dir);
+ goto err_result;
}
+ inode_lock(target_dir->d_inode);
+ nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf));
+ if (!IS_ERR(nresult)) {
+ if (unlikely(nresult->d_inode != result->d_inode)) {
+ dput(nresult);
+ nresult = ERR_PTR(-ESTALE);
+ }
+ }
+ inode_unlock(target_dir->d_inode);
/*
* At this point we are done with the parent, but it's pinned
* by the child dentry anyway.
*/
dput(target_dir);
+ if (IS_ERR(nresult)) {
+ err = PTR_ERR(nresult);
+ goto err_result;
+ }
+ dput(result);
+ result = nresult;
+
/*
* And finally make sure the dentry is actually acceptable
* to NFSD.
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index cbb5ca830e57..ef42ab040905 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -106,3 +106,20 @@ config EXT4_DEBUG
If you select Y here, then you will be able to turn on debugging
with a command such as:
echo 1 > /sys/module/ext4/parameters/mballoc_debug
+
+config EXT4_KUNIT_TESTS
+ bool "KUnit tests for ext4"
+ select EXT4_FS
+ depends on KUNIT
+ help
+ This builds the ext4 KUnit tests.
+
+ KUnit tests run during boot and output the results to the debug log
+ in TAP format (http://testanything.org/). Only useful for kernel devs
+ running KUnit test harness and are not for inclusion into a production
+ build.
+
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index b17ddc229ac5..840b91d040f1 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -13,4 +13,5 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
+ext4-$(CONFIG_EXT4_KUNIT_TESTS) += inode-test.o
ext4-$(CONFIG_FS_VERITY) += verity.o
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 03db3e71676c..b3a2cc7c0252 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1678,6 +1678,7 @@ static inline bool ext4_verity_in_progress(struct inode *inode)
#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
#define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200
+#define EXT4_FEATURE_COMPAT_STABLE_INODES 0x0800
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
@@ -1779,6 +1780,7 @@ EXT4_FEATURE_COMPAT_FUNCS(xattr, EXT_ATTR)
EXT4_FEATURE_COMPAT_FUNCS(resize_inode, RESIZE_INODE)
EXT4_FEATURE_COMPAT_FUNCS(dir_index, DIR_INDEX)
EXT4_FEATURE_COMPAT_FUNCS(sparse_super2, SPARSE_SUPER2)
+EXT4_FEATURE_COMPAT_FUNCS(stable_inodes, STABLE_INODES)
EXT4_FEATURE_RO_COMPAT_FUNCS(sparse_super, SPARSE_SUPER)
EXT4_FEATURE_RO_COMPAT_FUNCS(large_file, LARGE_FILE)
diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
new file mode 100644
index 000000000000..92a9da1774aa
--- /dev/null
+++ b/fs/ext4/inode-test.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test of ext4 inode that verify the seconds part of [a/c/m]
+ * timestamps in ext4 inode structs are decoded correctly.
+ */
+
+#include <kunit/test.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+
+#include "ext4.h"
+
+/*
+ * For constructing the nonnegative timestamp lower bound value.
+ * binary: 00000000 00000000 00000000 00000000
+ */
+#define LOWER_MSB_0 0L
+/*
+ * For constructing the nonnegative timestamp upper bound value.
+ * binary: 01111111 11111111 11111111 11111111
+ *
+ */
+#define UPPER_MSB_0 0x7fffffffL
+/*
+ * For constructing the negative timestamp lower bound value.
+ * binary: 10000000 00000000 00000000 00000000
+ */
+#define LOWER_MSB_1 (-0x80000000L)
+/*
+ * For constructing the negative timestamp upper bound value.
+ * binary: 11111111 11111111 11111111 11111111
+ */
+#define UPPER_MSB_1 (-1L)
+/*
+ * Upper bound for nanoseconds value supported by the encoding.
+ * binary: 00111111 11111111 11111111 11111111
+ */
+#define MAX_NANOSECONDS ((1L << 30) - 1)
+
+#define CASE_NAME_FORMAT "%s: msb:%x lower_bound:%x extra_bits: %x"
+
+#define LOWER_BOUND_NEG_NO_EXTRA_BITS_CASE\
+ "1901-12-13 Lower bound of 32bit < 0 timestamp, no extra bits"
+#define UPPER_BOUND_NEG_NO_EXTRA_BITS_CASE\
+ "1969-12-31 Upper bound of 32bit < 0 timestamp, no extra bits"
+#define LOWER_BOUND_NONNEG_NO_EXTRA_BITS_CASE\
+ "1970-01-01 Lower bound of 32bit >=0 timestamp, no extra bits"
+#define UPPER_BOUND_NONNEG_NO_EXTRA_BITS_CASE\
+ "2038-01-19 Upper bound of 32bit >=0 timestamp, no extra bits"
+#define LOWER_BOUND_NEG_LO_1_CASE\
+ "2038-01-19 Lower bound of 32bit <0 timestamp, lo extra sec bit on"
+#define UPPER_BOUND_NEG_LO_1_CASE\
+ "2106-02-07 Upper bound of 32bit <0 timestamp, lo extra sec bit on"
+#define LOWER_BOUND_NONNEG_LO_1_CASE\
+ "2106-02-07 Lower bound of 32bit >=0 timestamp, lo extra sec bit on"
+#define UPPER_BOUND_NONNEG_LO_1_CASE\
+ "2174-02-25 Upper bound of 32bit >=0 timestamp, lo extra sec bit on"
+#define LOWER_BOUND_NEG_HI_1_CASE\
+ "2174-02-25 Lower bound of 32bit <0 timestamp, hi extra sec bit on"
+#define UPPER_BOUND_NEG_HI_1_CASE\
+ "2242-03-16 Upper bound of 32bit <0 timestamp, hi extra sec bit on"
+#define LOWER_BOUND_NONNEG_HI_1_CASE\
+ "2242-03-16 Lower bound of 32bit >=0 timestamp, hi extra sec bit on"
+#define UPPER_BOUND_NONNEG_HI_1_CASE\
+ "2310-04-04 Upper bound of 32bit >=0 timestamp, hi extra sec bit on"
+#define UPPER_BOUND_NONNEG_HI_1_NS_1_CASE\
+ "2310-04-04 Upper bound of 32bit>=0 timestamp, hi extra sec bit 1. 1 ns"
+#define LOWER_BOUND_NONNEG_HI_1_NS_MAX_CASE\
+ "2378-04-22 Lower bound of 32bit>= timestamp. Extra sec bits 1. Max ns"
+#define LOWER_BOUND_NONNEG_EXTRA_BITS_1_CASE\
+ "2378-04-22 Lower bound of 32bit >=0 timestamp. All extra sec bits on"
+#define UPPER_BOUND_NONNEG_EXTRA_BITS_1_CASE\
+ "2446-05-10 Upper bound of 32bit >=0 timestamp. All extra sec bits on"
+
+struct timestamp_expectation {
+ const char *test_case_name;
+ struct timespec64 expected;
+ u32 extra_bits;
+ bool msb_set;
+ bool lower_bound;
+};
+
+static time64_t get_32bit_time(const struct timestamp_expectation * const test)
+{
+ if (test->msb_set) {
+ if (test->lower_bound)
+ return LOWER_MSB_1;
+
+ return UPPER_MSB_1;
+ }
+
+ if (test->lower_bound)
+ return LOWER_MSB_0;
+ return UPPER_MSB_0;
+}
+
+
+/*
+ * Test data is derived from the table in the Inode Timestamps section of
+ * Documentation/filesystems/ext4/inodes.rst.
+ */
+static void inode_test_xtimestamp_decoding(struct kunit *test)
+{
+ const struct timestamp_expectation test_data[] = {
+ {
+ .test_case_name = LOWER_BOUND_NEG_NO_EXTRA_BITS_CASE,
+ .msb_set = true,
+ .lower_bound = true,
+ .extra_bits = 0,
+ .expected = {.tv_sec = -0x80000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NEG_NO_EXTRA_BITS_CASE,
+ .msb_set = true,
+ .lower_bound = false,
+ .extra_bits = 0,
+ .expected = {.tv_sec = -1LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NONNEG_NO_EXTRA_BITS_CASE,
+ .msb_set = false,
+ .lower_bound = true,
+ .extra_bits = 0,
+ .expected = {0LL, 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NONNEG_NO_EXTRA_BITS_CASE,
+ .msb_set = false,
+ .lower_bound = false,
+ .extra_bits = 0,
+ .expected = {.tv_sec = 0x7fffffffLL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NEG_LO_1_CASE,
+ .msb_set = true,
+ .lower_bound = true,
+ .extra_bits = 1,
+ .expected = {.tv_sec = 0x80000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NEG_LO_1_CASE,
+ .msb_set = true,
+ .lower_bound = false,
+ .extra_bits = 1,
+ .expected = {.tv_sec = 0xffffffffLL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NONNEG_LO_1_CASE,
+ .msb_set = false,
+ .lower_bound = true,
+ .extra_bits = 1,
+ .expected = {.tv_sec = 0x100000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NONNEG_LO_1_CASE,
+ .msb_set = false,
+ .lower_bound = false,
+ .extra_bits = 1,
+ .expected = {.tv_sec = 0x17fffffffLL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NEG_HI_1_CASE,
+ .msb_set = true,
+ .lower_bound = true,
+ .extra_bits = 2,
+ .expected = {.tv_sec = 0x180000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NEG_HI_1_CASE,
+ .msb_set = true,
+ .lower_bound = false,
+ .extra_bits = 2,
+ .expected = {.tv_sec = 0x1ffffffffLL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NONNEG_HI_1_CASE,
+ .msb_set = false,
+ .lower_bound = true,
+ .extra_bits = 2,
+ .expected = {.tv_sec = 0x200000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NONNEG_HI_1_CASE,
+ .msb_set = false,
+ .lower_bound = false,
+ .extra_bits = 2,
+ .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NONNEG_HI_1_NS_1_CASE,
+ .msb_set = false,
+ .lower_bound = false,
+ .extra_bits = 6,
+ .expected = {.tv_sec = 0x27fffffffLL, .tv_nsec = 1L},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NONNEG_HI_1_NS_MAX_CASE,
+ .msb_set = false,
+ .lower_bound = true,
+ .extra_bits = 0xFFFFFFFF,
+ .expected = {.tv_sec = 0x300000000LL,
+ .tv_nsec = MAX_NANOSECONDS},
+ },
+
+ {
+ .test_case_name = LOWER_BOUND_NONNEG_EXTRA_BITS_1_CASE,
+ .msb_set = false,
+ .lower_bound = true,
+ .extra_bits = 3,
+ .expected = {.tv_sec = 0x300000000LL, .tv_nsec = 0L},
+ },
+
+ {
+ .test_case_name = UPPER_BOUND_NONNEG_EXTRA_BITS_1_CASE,
+ .msb_set = false,
+ .lower_bound = false,
+ .extra_bits = 3,
+ .expected = {.tv_sec = 0x37fffffffLL, .tv_nsec = 0L},
+ }
+ };
+
+ struct timespec64 timestamp;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_data); ++i) {
+ timestamp.tv_sec = get_32bit_time(&test_data[i]);
+ ext4_decode_extra_time(&timestamp,
+ cpu_to_le32(test_data[i].extra_bits));
+
+ KUNIT_EXPECT_EQ_MSG(test,
+ test_data[i].expected.tv_sec,
+ timestamp.tv_sec,
+ CASE_NAME_FORMAT,
+ test_data[i].test_case_name,
+ test_data[i].msb_set,
+ test_data[i].lower_bound,
+ test_data[i].extra_bits);
+ KUNIT_EXPECT_EQ_MSG(test,
+ test_data[i].expected.tv_nsec,
+ timestamp.tv_nsec,
+ CASE_NAME_FORMAT,
+ test_data[i].test_case_name,
+ test_data[i].msb_set,
+ test_data[i].lower_bound,
+ test_data[i].extra_bits);
+ }
+}
+
+static struct kunit_case ext4_inode_test_cases[] = {
+ KUNIT_CASE(inode_test_xtimestamp_decoding),
+ {}
+};
+
+static struct kunit_suite ext4_inode_test_suite = {
+ .name = "ext4_inode_test",
+ .test_cases = ext4_inode_test_cases,
+};
+
+kunit_test_suite(ext4_inode_test_suite);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 516faa280ced..a7ca65177980 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5717,12 +5717,15 @@ int ext4_getattr(const struct path *path, struct kstat *stat,
stat->attributes |= STATX_ATTR_IMMUTABLE;
if (flags & EXT4_NODUMP_FL)
stat->attributes |= STATX_ATTR_NODUMP;
+ if (flags & EXT4_VERITY_FL)
+ stat->attributes |= STATX_ATTR_VERITY;
stat->attributes_mask |= (STATX_ATTR_APPEND |
STATX_ATTR_COMPRESSED |
STATX_ATTR_ENCRYPTED |
STATX_ATTR_IMMUTABLE |
- STATX_ATTR_NODUMP);
+ STATX_ATTR_NODUMP |
+ STATX_ATTR_VERITY);
generic_fillattr(inode, stat);
return 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dd654e53ba3d..b3cbf8622eab 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1345,6 +1345,18 @@ static bool ext4_dummy_context(struct inode *inode)
return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
}
+static bool ext4_has_stable_inodes(struct super_block *sb)
+{
+ return ext4_has_feature_stable_inodes(sb);
+}
+
+static void ext4_get_ino_and_lblk_bits(struct super_block *sb,
+ int *ino_bits_ret, int *lblk_bits_ret)
+{
+ *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count);
+ *lblk_bits_ret = 8 * sizeof(ext4_lblk_t);
+}
+
static const struct fscrypt_operations ext4_cryptops = {
.key_prefix = "ext4:",
.get_context = ext4_get_context,
@@ -1352,6 +1364,8 @@ static const struct fscrypt_operations ext4_cryptops = {
.dummy_context = ext4_dummy_context,
.empty_dir = ext4_empty_dir,
.max_namelen = EXT4_NAME_LEN,
+ .has_stable_inodes = ext4_has_stable_inodes,
+ .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits,
};
#endif
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 29bc0a542759..6a2e5b7d8fc7 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -726,11 +726,14 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
stat->attributes |= STATX_ATTR_IMMUTABLE;
if (flags & F2FS_NODUMP_FL)
stat->attributes |= STATX_ATTR_NODUMP;
+ if (IS_VERITY(inode))
+ stat->attributes |= STATX_ATTR_VERITY;
stat->attributes_mask |= (STATX_ATTR_APPEND |
STATX_ATTR_ENCRYPTED |
STATX_ATTR_IMMUTABLE |
- STATX_ATTR_NODUMP);
+ STATX_ATTR_NODUMP |
+ STATX_ATTR_VERITY);
generic_fillattr(inode, stat);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 808709581481..2c997f94a3b2 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1771,7 +1771,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
+ return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
+ sector, nr_sects, GFP_NOFS);
}
/* For conventional zones, use regular discard if supported */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1443cee15863..197ad6b314de 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2308,13 +2308,27 @@ static bool f2fs_dummy_context(struct inode *inode)
return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode));
}
+static bool f2fs_has_stable_inodes(struct super_block *sb)
+{
+ return true;
+}
+
+static void f2fs_get_ino_and_lblk_bits(struct super_block *sb,
+ int *ino_bits_ret, int *lblk_bits_ret)
+{
+ *ino_bits_ret = 8 * sizeof(nid_t);
+ *lblk_bits_ret = 8 * sizeof(block_t);
+}
+
static const struct fscrypt_operations f2fs_cryptops = {
- .key_prefix = "f2fs:",
- .get_context = f2fs_get_context,
- .set_context = f2fs_set_context,
- .dummy_context = f2fs_dummy_context,
- .empty_dir = f2fs_empty_dir,
- .max_namelen = F2FS_NAME_LEN,
+ .key_prefix = "f2fs:",
+ .get_context = f2fs_get_context,
+ .set_context = f2fs_set_context,
+ .dummy_context = f2fs_dummy_context,
+ .empty_dir = f2fs_empty_dir,
+ .max_namelen = F2FS_NAME_LEN,
+ .has_stable_inodes = f2fs_has_stable_inodes,
+ .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits,
};
#endif
@@ -2857,15 +2871,21 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
}
#ifdef CONFIG_BLK_DEV_ZONED
+static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
+ struct f2fs_dev_info *dev = data;
+
+ if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
+ set_bit(idx, dev->blkz_seq);
+ return 0;
+}
+
static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
{
struct block_device *bdev = FDEV(devi).bdev;
sector_t nr_sectors = bdev->bd_part->nr_sects;
- sector_t sector = 0;
- struct blk_zone *zones;
- unsigned int i, nr_zones;
- unsigned int n = 0;
- int err = -EIO;
+ int ret;
if (!f2fs_sb_has_blkzoned(sbi))
return 0;
@@ -2890,38 +2910,13 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (!FDEV(devi).blkz_seq)
return -ENOMEM;
-#define F2FS_REPORT_NR_ZONES 4096
-
- zones = f2fs_kzalloc(sbi,
- array_size(F2FS_REPORT_NR_ZONES,
- sizeof(struct blk_zone)),
- GFP_KERNEL);
- if (!zones)
- return -ENOMEM;
-
/* Get block zones type */
- while (zones && sector < nr_sectors) {
-
- nr_zones = F2FS_REPORT_NR_ZONES;
- err = blkdev_report_zones(bdev, sector, zones, &nr_zones);
- if (err)
- break;
- if (!nr_zones) {
- err = -EIO;
- break;
- }
-
- for (i = 0; i < nr_zones; i++) {
- if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
- set_bit(n, FDEV(devi).blkz_seq);
- sector += zones[i].len;
- n++;
- }
- }
-
- kvfree(zones);
+ ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb,
+ &FDEV(devi));
+ if (ret < 0)
+ return ret;
- return err;
+ return 0;
}
#endif
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3d40771e8e7c..41b6438bd2d9 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -261,7 +261,7 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
static bool rw_hint_valid(enum rw_hint hint)
{
switch (hint) {
- case RWF_WRITE_LIFE_NOT_SET:
+ case RWH_WRITE_LIFE_NOT_SET:
case RWH_WRITE_LIFE_NONE:
case RWH_WRITE_LIFE_SHORT:
case RWH_WRITE_LIFE_MEDIUM:
diff --git a/fs/file.c b/fs/file.c
index 3da91a112bab..b241ea7f1aa4 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -795,7 +795,7 @@ unsigned long __fdget_pos(unsigned int fd)
unsigned long v = __fdget(fd);
struct file *file = (struct file *)(v & ~3);
- if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
+ if (file && !(file->f_mode & FMODE_STREAM)) {
if (file_count(file) > 1) {
v |= FDPUT_POS_UNLOCK;
mutex_lock(&file->f_pos_lock);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8aaa7eec7b74..335607b8c5c0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -164,8 +164,13 @@ static void finish_writeback_work(struct bdi_writeback *wb,
if (work->auto_free)
kfree(work);
- if (done && atomic_dec_and_test(&done->cnt))
- wake_up_all(done->waitq);
+ if (done) {
+ wait_queue_head_t *waitq = done->waitq;
+
+ /* @done can't be accessed after the following dec */
+ if (atomic_dec_and_test(&done->cnt))
+ wake_up_all(waitq);
+ }
}
static void wb_queue_work(struct bdi_writeback *wb,
@@ -571,10 +576,13 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
spin_unlock(&inode->i_lock);
/*
- * A dying wb indicates that the memcg-blkcg mapping has changed
- * and a new wb is already serving the memcg. Switch immediately.
+ * A dying wb indicates that either the blkcg associated with the
+ * memcg changed or the associated memcg is dying. In the first
+ * case, a replacement wb should already be available and we should
+ * refresh the wb immediately. In the second case, trying to
+ * refresh will keep failing.
*/
- if (unlikely(wb_dying(wbc->wb)))
+ if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css)))
inode_switch_wbs(inode, wbc->wb_id);
}
EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode);
@@ -900,7 +908,7 @@ restart:
* cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
* @bdi_id: target bdi id
* @memcg_id: target memcg css id
- * @nr_pages: number of pages to write, 0 for best-effort dirty flushing
+ * @nr: number of pages to write, 0 for best-effort dirty flushing
* @reason: reason why some writeback work initiated
* @done: target wb_completion
*
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 6419a2b3510d..3e8cebfb59b7 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
-obj-$(CONFIG_VIRTIO_FS) += virtio_fs.o
+obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
+virtiofs-y += virtio_fs.o
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index dadd617d826c..ed1abc9e33cf 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -276,10 +276,12 @@ static void flush_bg_queue(struct fuse_conn *fc)
void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req)
{
struct fuse_iqueue *fiq = &fc->iq;
- bool async = req->args->end;
+ bool async;
if (test_and_set_bit(FR_FINISHED, &req->flags))
goto put_request;
+
+ async = req->args->end;
/*
* test_and_set_bit() implies smp_mb() between bit
* changing and below intr_entry check. Pairs with
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d572c900bb0f..54d638f9ba1c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -405,7 +405,8 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
else
fuse_invalidate_entry_cache(entry);
- fuse_advise_use_readdirplus(dir);
+ if (inode)
+ fuse_advise_use_readdirplus(dir);
return newent;
out_iput:
@@ -1521,6 +1522,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
is_truncate = true;
}
+ /* Flush dirty data/metadata before non-truncate SETATTR */
+ if (is_wb && S_ISREG(inode->i_mode) &&
+ attr->ia_valid &
+ (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
+ ATTR_TIMES_SET)) {
+ err = write_inode_now(inode, true);
+ if (err)
+ return err;
+
+ fuse_set_nowrite(inode);
+ fuse_release_nowrite(inode);
+ }
+
if (is_truncate) {
fuse_set_nowrite(inode);
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 0f0225686aee..db48a5cf8620 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -217,7 +217,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
{
struct fuse_conn *fc = get_fuse_conn(inode);
int err;
- bool lock_inode = (file->f_flags & O_TRUNC) &&
+ bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
fc->atomic_o_trunc &&
fc->writeback_cache;
@@ -225,16 +225,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (err)
return err;
- if (lock_inode)
+ if (is_wb_truncate) {
inode_lock(inode);
+ fuse_set_nowrite(inode);
+ }
err = fuse_do_open(fc, get_node_id(inode), file, isdir);
if (!err)
fuse_finish_open(inode, file);
- if (lock_inode)
+ if (is_wb_truncate) {
+ fuse_release_nowrite(inode);
inode_unlock(inode);
+ }
return err;
}
@@ -1997,7 +2001,7 @@ static int fuse_writepages_fill(struct page *page,
if (!data->ff) {
err = -EIO;
- data->ff = fuse_write_file_get(fc, get_fuse_inode(inode));
+ data->ff = fuse_write_file_get(fc, fi);
if (!data->ff)
goto out_unlock;
}
@@ -2042,8 +2046,6 @@ static int fuse_writepages_fill(struct page *page,
* under writeback, so we can release the page lock.
*/
if (data->wpa == NULL) {
- struct fuse_inode *fi = get_fuse_inode(inode);
-
err = -ENOMEM;
wpa = fuse_writepage_args_alloc();
if (!wpa) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 956aeaf961ae..d148188cfca4 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -479,6 +479,7 @@ struct fuse_fs_context {
bool destroy:1;
bool no_control:1;
bool no_force_umount:1;
+ bool no_mount_options:1;
unsigned int max_read;
unsigned int blksize;
const char *subtype;
@@ -713,6 +714,9 @@ struct fuse_conn {
/** Do not allow MNT_FORCE umount */
unsigned int no_force_umount:1;
+ /* Do not show mount options */
+ unsigned int no_mount_options:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e040e2a2b621..16aec32f7f3d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -558,6 +558,9 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
struct super_block *sb = root->d_sb;
struct fuse_conn *fc = get_fuse_conn_super(sb);
+ if (fc->no_mount_options)
+ return 0;
+
seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
if (fc->default_permissions)
@@ -1180,6 +1183,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
fc->destroy = ctx->destroy;
fc->no_control = ctx->no_control;
fc->no_force_umount = ctx->no_force_umount;
+ fc->no_mount_options = ctx->no_mount_options;
err = -ENOMEM;
root = fuse_get_root_inode(sb, ctx->rootmode);
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 6af3f131e468..a5c86048b96e 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -30,6 +30,7 @@ struct virtio_fs_vq {
struct virtqueue *vq; /* protected by ->lock */
struct work_struct done_work;
struct list_head queued_reqs;
+ struct list_head end_reqs; /* End these requests */
struct delayed_work dispatch_work;
struct fuse_dev *fud;
bool connected;
@@ -54,6 +55,9 @@ struct virtio_fs_forget {
struct list_head list;
};
+static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
+ struct fuse_req *req, bool in_flight);
+
static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
{
struct virtio_fs *fs = vq->vdev->priv;
@@ -66,6 +70,19 @@ static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
return &vq_to_fsvq(vq)->fud->pq;
}
+/* Should be called with fsvq->lock held. */
+static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
+{
+ fsvq->in_flight++;
+}
+
+/* Should be called with fsvq->lock held. */
+static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq)
+{
+ WARN_ON(fsvq->in_flight <= 0);
+ fsvq->in_flight--;
+}
+
static void release_virtio_fs_obj(struct kref *ref)
{
struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount);
@@ -109,22 +126,6 @@ static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq)
flush_delayed_work(&fsvq->dispatch_work);
}
-static inline void drain_hiprio_queued_reqs(struct virtio_fs_vq *fsvq)
-{
- struct virtio_fs_forget *forget;
-
- spin_lock(&fsvq->lock);
- while (1) {
- forget = list_first_entry_or_null(&fsvq->queued_reqs,
- struct virtio_fs_forget, list);
- if (!forget)
- break;
- list_del(&forget->list);
- kfree(forget);
- }
- spin_unlock(&fsvq->lock);
-}
-
static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
{
struct virtio_fs_vq *fsvq;
@@ -132,9 +133,6 @@ static void virtio_fs_drain_all_queues(struct virtio_fs *fs)
for (i = 0; i < fs->nvqs; i++) {
fsvq = &fs->vqs[i];
- if (i == VQ_HIPRIO)
- drain_hiprio_queued_reqs(fsvq);
-
virtio_fs_drain_queue(fsvq);
}
}
@@ -253,14 +251,66 @@ static void virtio_fs_hiprio_done_work(struct work_struct *work)
while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
kfree(req);
- fsvq->in_flight--;
+ dec_in_flight_req(fsvq);
}
} while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq)));
spin_unlock(&fsvq->lock);
}
-static void virtio_fs_dummy_dispatch_work(struct work_struct *work)
+static void virtio_fs_request_dispatch_work(struct work_struct *work)
{
+ struct fuse_req *req;
+ struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq,
+ dispatch_work.work);
+ struct fuse_conn *fc = fsvq->fud->fc;
+ int ret;
+
+ pr_debug("virtio-fs: worker %s called.\n", __func__);
+ while (1) {
+ spin_lock(&fsvq->lock);
+ req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req,
+ list);
+ if (!req) {
+ spin_unlock(&fsvq->lock);
+ break;
+ }
+
+ list_del_init(&req->list);
+ spin_unlock(&fsvq->lock);
+ fuse_request_end(fc, req);
+ }
+
+ /* Dispatch pending requests */
+ while (1) {
+ spin_lock(&fsvq->lock);
+ req = list_first_entry_or_null(&fsvq->queued_reqs,
+ struct fuse_req, list);
+ if (!req) {
+ spin_unlock(&fsvq->lock);
+ return;
+ }
+ list_del_init(&req->list);
+ spin_unlock(&fsvq->lock);
+
+ ret = virtio_fs_enqueue_req(fsvq, req, true);
+ if (ret < 0) {
+ if (ret == -ENOMEM || ret == -ENOSPC) {
+ spin_lock(&fsvq->lock);
+ list_add_tail(&req->list, &fsvq->queued_reqs);
+ schedule_delayed_work(&fsvq->dispatch_work,
+ msecs_to_jiffies(1));
+ spin_unlock(&fsvq->lock);
+ return;
+ }
+ req->out.h.error = ret;
+ spin_lock(&fsvq->lock);
+ dec_in_flight_req(fsvq);
+ spin_unlock(&fsvq->lock);
+ pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n",
+ ret);
+ fuse_request_end(fc, req);
+ }
+ }
}
static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
@@ -286,6 +336,7 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
list_del(&forget->list);
if (!fsvq->connected) {
+ dec_in_flight_req(fsvq);
spin_unlock(&fsvq->lock);
kfree(forget);
continue;
@@ -307,13 +358,13 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
} else {
pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
ret);
+ dec_in_flight_req(fsvq);
kfree(forget);
}
spin_unlock(&fsvq->lock);
return;
}
- fsvq->in_flight++;
notify = virtqueue_kick_prepare(vq);
spin_unlock(&fsvq->lock);
@@ -452,7 +503,7 @@ static void virtio_fs_requests_done_work(struct work_struct *work)
fuse_request_end(fc, req);
spin_lock(&fsvq->lock);
- fsvq->in_flight--;
+ dec_in_flight_req(fsvq);
spin_unlock(&fsvq->lock);
}
}
@@ -502,6 +553,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name;
INIT_WORK(&fs->vqs[VQ_HIPRIO].done_work, virtio_fs_hiprio_done_work);
INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].queued_reqs);
+ INIT_LIST_HEAD(&fs->vqs[VQ_HIPRIO].end_reqs);
INIT_DELAYED_WORK(&fs->vqs[VQ_HIPRIO].dispatch_work,
virtio_fs_hiprio_dispatch_work);
spin_lock_init(&fs->vqs[VQ_HIPRIO].lock);
@@ -511,8 +563,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev,
spin_lock_init(&fs->vqs[i].lock);
INIT_WORK(&fs->vqs[i].done_work, virtio_fs_requests_done_work);
INIT_DELAYED_WORK(&fs->vqs[i].dispatch_work,
- virtio_fs_dummy_dispatch_work);
+ virtio_fs_request_dispatch_work);
INIT_LIST_HEAD(&fs->vqs[i].queued_reqs);
+ INIT_LIST_HEAD(&fs->vqs[i].end_reqs);
snprintf(fs->vqs[i].name, sizeof(fs->vqs[i].name),
"requests.%u", i - VQ_REQUEST);
callbacks[i] = virtio_fs_vq_done;
@@ -708,6 +761,7 @@ __releases(fiq->lock)
list_add_tail(&forget->list, &fsvq->queued_reqs);
schedule_delayed_work(&fsvq->dispatch_work,
msecs_to_jiffies(1));
+ inc_in_flight_req(fsvq);
} else {
pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n",
ret);
@@ -717,7 +771,7 @@ __releases(fiq->lock)
goto out;
}
- fsvq->in_flight++;
+ inc_in_flight_req(fsvq);
notify = virtqueue_kick_prepare(vq);
spin_unlock(&fsvq->lock);
@@ -819,7 +873,7 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
/* Add a request to a virtqueue and kick the device */
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
- struct fuse_req *req)
+ struct fuse_req *req, bool in_flight)
{
/* requests need at least 4 elements */
struct scatterlist *stack_sgs[6];
@@ -835,6 +889,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
unsigned int i;
int ret;
bool notify;
+ struct fuse_pqueue *fpq;
/* Does the sglist fit on the stack? */
total_sgs = sg_count_fuse_req(req);
@@ -889,7 +944,17 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
goto out;
}
- fsvq->in_flight++;
+ /* Request successfully sent. */
+ fpq = &fsvq->fud->pq;
+ spin_lock(&fpq->lock);
+ list_add_tail(&req->list, fpq->processing);
+ spin_unlock(&fpq->lock);
+ set_bit(FR_SENT, &req->flags);
+ /* matches barrier in request_wait_answer() */
+ smp_mb__after_atomic();
+
+ if (!in_flight)
+ inc_in_flight_req(fsvq);
notify = virtqueue_kick_prepare(vq);
spin_unlock(&fsvq->lock);
@@ -915,9 +980,8 @@ __releases(fiq->lock)
{
unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */
struct virtio_fs *fs;
- struct fuse_conn *fc;
struct fuse_req *req;
- struct fuse_pqueue *fpq;
+ struct virtio_fs_vq *fsvq;
int ret;
WARN_ON(list_empty(&fiq->pending));
@@ -928,44 +992,36 @@ __releases(fiq->lock)
spin_unlock(&fiq->lock);
fs = fiq->priv;
- fc = fs->vqs[queue_id].fud->fc;
pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n",
__func__, req->in.h.opcode, req->in.h.unique,
req->in.h.nodeid, req->in.h.len,
fuse_len_args(req->args->out_numargs, req->args->out_args));
- fpq = &fs->vqs[queue_id].fud->pq;
- spin_lock(&fpq->lock);
- if (!fpq->connected) {
- spin_unlock(&fpq->lock);
- req->out.h.error = -ENODEV;
- pr_err("virtio-fs: %s disconnected\n", __func__);
- fuse_request_end(fc, req);
- return;
- }
- list_add_tail(&req->list, fpq->processing);
- spin_unlock(&fpq->lock);
- set_bit(FR_SENT, &req->flags);
- /* matches barrier in request_wait_answer() */
- smp_mb__after_atomic();
-
-retry:
- ret = virtio_fs_enqueue_req(&fs->vqs[queue_id], req);
+ fsvq = &fs->vqs[queue_id];
+ ret = virtio_fs_enqueue_req(fsvq, req, false);
if (ret < 0) {
if (ret == -ENOMEM || ret == -ENOSPC) {
- /* Virtqueue full. Retry submission */
- /* TODO use completion instead of timeout */
- usleep_range(20, 30);
- goto retry;
+ /*
+ * Virtqueue full. Retry submission from worker
+ * context as we might be holding fc->bg_lock.
+ */
+ spin_lock(&fsvq->lock);
+ list_add_tail(&req->list, &fsvq->queued_reqs);
+ inc_in_flight_req(fsvq);
+ schedule_delayed_work(&fsvq->dispatch_work,
+ msecs_to_jiffies(1));
+ spin_unlock(&fsvq->lock);
+ return;
}
req->out.h.error = ret;
pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret);
- spin_lock(&fpq->lock);
- clear_bit(FR_SENT, &req->flags);
- list_del_init(&req->list);
- spin_unlock(&fpq->lock);
- fuse_request_end(fc, req);
+
+ /* Can't end request in submission context. Use a worker */
+ spin_lock(&fsvq->lock);
+ list_add_tail(&req->list, &fsvq->end_reqs);
+ schedule_delayed_work(&fsvq->dispatch_work, 0);
+ spin_unlock(&fsvq->lock);
return;
}
}
@@ -992,6 +1048,7 @@ static int virtio_fs_fill_super(struct super_block *sb)
.destroy = true,
.no_control = true,
.no_force_umount = true,
+ .no_mount_options = true,
};
mutex_lock(&virtio_fs_mutex);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 681b44682b0d..18daf494abab 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1540,17 +1540,23 @@ static int gfs2_init_fs_context(struct fs_context *fc)
{
struct gfs2_args *args;
- args = kzalloc(sizeof(*args), GFP_KERNEL);
+ args = kmalloc(sizeof(*args), GFP_KERNEL);
if (args == NULL)
return -ENOMEM;
- args->ar_quota = GFS2_QUOTA_DEFAULT;
- args->ar_data = GFS2_DATA_DEFAULT;
- args->ar_commit = 30;
- args->ar_statfs_quantum = 30;
- args->ar_quota_quantum = 60;
- args->ar_errors = GFS2_ERRORS_DEFAULT;
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ struct gfs2_sbd *sdp = fc->root->d_sb->s_fs_info;
+ *args = sdp->sd_args;
+ } else {
+ memset(args, 0, sizeof(*args));
+ args->ar_quota = GFS2_QUOTA_DEFAULT;
+ args->ar_data = GFS2_DATA_DEFAULT;
+ args->ar_commit = 30;
+ args->ar_statfs_quantum = 30;
+ args->ar_quota_quantum = 60;
+ args->ar_errors = GFS2_ERRORS_DEFAULT;
+ }
fc->fs_private = args;
fc->ops = &gfs2_context_ops;
return 0;
@@ -1600,6 +1606,7 @@ static int gfs2_meta_get_tree(struct fs_context *fc)
}
static const struct fs_context_operations gfs2_meta_context_ops = {
+ .free = gfs2_fc_free,
.get_tree = gfs2_meta_get_tree,
};
diff --git a/fs/io-wq.c b/fs/io-wq.c
new file mode 100644
index 000000000000..9174007ce107
--- /dev/null
+++ b/fs/io-wq.c
@@ -0,0 +1,1065 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic worker thread pool for io_uring
+ *
+ * Copyright (C) 2019 Jens Axboe
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched/signal.h>
+#include <linux/mm.h>
+#include <linux/mmu_context.h>
+#include <linux/sched/mm.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/kthread.h>
+#include <linux/rculist_nulls.h>
+
+#include "io-wq.h"
+
+#define WORKER_IDLE_TIMEOUT (5 * HZ)
+
+enum {
+ IO_WORKER_F_UP = 1, /* up and active */
+ IO_WORKER_F_RUNNING = 2, /* account as running */
+ IO_WORKER_F_FREE = 4, /* worker on free list */
+ IO_WORKER_F_EXITING = 8, /* worker exiting */
+ IO_WORKER_F_FIXED = 16, /* static idle worker */
+ IO_WORKER_F_BOUND = 32, /* is doing bounded work */
+};
+
+enum {
+ IO_WQ_BIT_EXIT = 0, /* wq exiting */
+ IO_WQ_BIT_CANCEL = 1, /* cancel work on list */
+};
+
+enum {
+ IO_WQE_FLAG_STALLED = 1, /* stalled on hash */
+};
+
+/*
+ * One for each thread in a wqe pool
+ */
+struct io_worker {
+ refcount_t ref;
+ unsigned flags;
+ struct hlist_nulls_node nulls_node;
+ struct list_head all_list;
+ struct task_struct *task;
+ wait_queue_head_t wait;
+ struct io_wqe *wqe;
+
+ struct io_wq_work *cur_work;
+ spinlock_t lock;
+
+ struct rcu_head rcu;
+ struct mm_struct *mm;
+ struct files_struct *restore_files;
+};
+
+#if BITS_PER_LONG == 64
+#define IO_WQ_HASH_ORDER 6
+#else
+#define IO_WQ_HASH_ORDER 5
+#endif
+
+struct io_wqe_acct {
+ unsigned nr_workers;
+ unsigned max_workers;
+ atomic_t nr_running;
+};
+
+enum {
+ IO_WQ_ACCT_BOUND,
+ IO_WQ_ACCT_UNBOUND,
+};
+
+/*
+ * Per-node worker thread pool
+ */
+struct io_wqe {
+ struct {
+ spinlock_t lock;
+ struct list_head work_list;
+ unsigned long hash_map;
+ unsigned flags;
+ } ____cacheline_aligned_in_smp;
+
+ int node;
+ struct io_wqe_acct acct[2];
+
+ struct hlist_nulls_head free_list;
+ struct hlist_nulls_head busy_list;
+ struct list_head all_list;
+
+ struct io_wq *wq;
+};
+
+/*
+ * Per io_wq state
+ */
+struct io_wq {
+ struct io_wqe **wqes;
+ unsigned long state;
+ unsigned nr_wqes;
+
+ get_work_fn *get_work;
+ put_work_fn *put_work;
+
+ struct task_struct *manager;
+ struct user_struct *user;
+ struct mm_struct *mm;
+ refcount_t refs;
+ struct completion done;
+};
+
+static bool io_worker_get(struct io_worker *worker)
+{
+ return refcount_inc_not_zero(&worker->ref);
+}
+
+static void io_worker_release(struct io_worker *worker)
+{
+ if (refcount_dec_and_test(&worker->ref))
+ wake_up_process(worker->task);
+}
+
+/*
+ * Note: drops the wqe->lock if returning true! The caller must re-acquire
+ * the lock in that case. Some callers need to restart handling if this
+ * happens, so we can't just re-acquire the lock on behalf of the caller.
+ */
+static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
+{
+ bool dropped_lock = false;
+
+ if (current->files != worker->restore_files) {
+ __acquire(&wqe->lock);
+ spin_unlock_irq(&wqe->lock);
+ dropped_lock = true;
+
+ task_lock(current);
+ current->files = worker->restore_files;
+ task_unlock(current);
+ }
+
+ /*
+ * If we have an active mm, we need to drop the wq lock before unusing
+ * it. If we do, return true and let the caller retry the idle loop.
+ */
+ if (worker->mm) {
+ if (!dropped_lock) {
+ __acquire(&wqe->lock);
+ spin_unlock_irq(&wqe->lock);
+ dropped_lock = true;
+ }
+ __set_current_state(TASK_RUNNING);
+ set_fs(KERNEL_DS);
+ unuse_mm(worker->mm);
+ mmput(worker->mm);
+ worker->mm = NULL;
+ }
+
+ return dropped_lock;
+}
+
+static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
+ struct io_wq_work *work)
+{
+ if (work->flags & IO_WQ_WORK_UNBOUND)
+ return &wqe->acct[IO_WQ_ACCT_UNBOUND];
+
+ return &wqe->acct[IO_WQ_ACCT_BOUND];
+}
+
+static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe,
+ struct io_worker *worker)
+{
+ if (worker->flags & IO_WORKER_F_BOUND)
+ return &wqe->acct[IO_WQ_ACCT_BOUND];
+
+ return &wqe->acct[IO_WQ_ACCT_UNBOUND];
+}
+
+static void io_worker_exit(struct io_worker *worker)
+{
+ struct io_wqe *wqe = worker->wqe;
+ struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+ unsigned nr_workers;
+
+ /*
+ * If we're not at zero, someone else is holding a brief reference
+ * to the worker. Wait for that to go away.
+ */
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!refcount_dec_and_test(&worker->ref))
+ schedule();
+ __set_current_state(TASK_RUNNING);
+
+ preempt_disable();
+ current->flags &= ~PF_IO_WORKER;
+ if (worker->flags & IO_WORKER_F_RUNNING)
+ atomic_dec(&acct->nr_running);
+ if (!(worker->flags & IO_WORKER_F_BOUND))
+ atomic_dec(&wqe->wq->user->processes);
+ worker->flags = 0;
+ preempt_enable();
+
+ spin_lock_irq(&wqe->lock);
+ hlist_nulls_del_rcu(&worker->nulls_node);
+ list_del_rcu(&worker->all_list);
+ if (__io_worker_unuse(wqe, worker)) {
+ __release(&wqe->lock);
+ spin_lock_irq(&wqe->lock);
+ }
+ acct->nr_workers--;
+ nr_workers = wqe->acct[IO_WQ_ACCT_BOUND].nr_workers +
+ wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers;
+ spin_unlock_irq(&wqe->lock);
+
+ /* all workers gone, wq exit can proceed */
+ if (!nr_workers && refcount_dec_and_test(&wqe->wq->refs))
+ complete(&wqe->wq->done);
+
+ kfree_rcu(worker, rcu);
+}
+
+static inline bool io_wqe_run_queue(struct io_wqe *wqe)
+ __must_hold(wqe->lock)
+{
+ if (!list_empty(&wqe->work_list) && !(wqe->flags & IO_WQE_FLAG_STALLED))
+ return true;
+ return false;
+}
+
+/*
+ * Check head of free list for an available worker. If one isn't available,
+ * caller must wake up the wq manager to create one.
+ */
+static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
+ __must_hold(RCU)
+{
+ struct hlist_nulls_node *n;
+ struct io_worker *worker;
+
+ n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list));
+ if (is_a_nulls(n))
+ return false;
+
+ worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
+ if (io_worker_get(worker)) {
+ wake_up(&worker->wait);
+ io_worker_release(worker);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * We need a worker. If we find a free one, we're good. If not, and we're
+ * below the max number of workers, wake up the manager to create one.
+ */
+static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
+{
+ bool ret;
+
+ /*
+ * Most likely an attempt to queue unbounded work on an io_wq that
+ * wasn't setup with any unbounded workers.
+ */
+ WARN_ON_ONCE(!acct->max_workers);
+
+ rcu_read_lock();
+ ret = io_wqe_activate_free_worker(wqe);
+ rcu_read_unlock();
+
+ if (!ret && acct->nr_workers < acct->max_workers)
+ wake_up_process(wqe->wq->manager);
+}
+
+static void io_wqe_inc_running(struct io_wqe *wqe, struct io_worker *worker)
+{
+ struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+
+ atomic_inc(&acct->nr_running);
+}
+
+static void io_wqe_dec_running(struct io_wqe *wqe, struct io_worker *worker)
+ __must_hold(wqe->lock)
+{
+ struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+
+ if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe))
+ io_wqe_wake_worker(wqe, acct);
+}
+
+static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
+{
+ allow_kernel_signal(SIGINT);
+
+ current->flags |= PF_IO_WORKER;
+
+ worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
+ worker->restore_files = current->files;
+ io_wqe_inc_running(wqe, worker);
+}
+
+/*
+ * Worker will start processing some work. Move it to the busy list, if
+ * it's currently on the freelist
+ */
+static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
+ struct io_wq_work *work)
+ __must_hold(wqe->lock)
+{
+ bool worker_bound, work_bound;
+
+ if (worker->flags & IO_WORKER_F_FREE) {
+ worker->flags &= ~IO_WORKER_F_FREE;
+ hlist_nulls_del_init_rcu(&worker->nulls_node);
+ hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->busy_list);
+ }
+
+ /*
+ * If worker is moving from bound to unbound (or vice versa), then
+ * ensure we update the running accounting.
+ */
+ worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
+ work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
+ if (worker_bound != work_bound) {
+ io_wqe_dec_running(wqe, worker);
+ if (work_bound) {
+ worker->flags |= IO_WORKER_F_BOUND;
+ wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--;
+ wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++;
+ atomic_dec(&wqe->wq->user->processes);
+ } else {
+ worker->flags &= ~IO_WORKER_F_BOUND;
+ wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++;
+ wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--;
+ atomic_inc(&wqe->wq->user->processes);
+ }
+ io_wqe_inc_running(wqe, worker);
+ }
+}
+
+/*
+ * No work, worker going to sleep. Move to freelist, and unuse mm if we
+ * have one attached. Dropping the mm may potentially sleep, so we drop
+ * the lock in that case and return success. Since the caller has to
+ * retry the loop in that case (we changed task state), we don't regrab
+ * the lock if we return success.
+ */
+static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
+ __must_hold(wqe->lock)
+{
+ if (!(worker->flags & IO_WORKER_F_FREE)) {
+ worker->flags |= IO_WORKER_F_FREE;
+ hlist_nulls_del_init_rcu(&worker->nulls_node);
+ hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
+ }
+
+ return __io_worker_unuse(wqe, worker);
+}
+
+static struct io_wq_work *io_get_next_work(struct io_wqe *wqe, unsigned *hash)
+ __must_hold(wqe->lock)
+{
+ struct io_wq_work *work;
+
+ list_for_each_entry(work, &wqe->work_list, list) {
+ /* not hashed, can run anytime */
+ if (!(work->flags & IO_WQ_WORK_HASHED)) {
+ list_del(&work->list);
+ return work;
+ }
+
+ /* hashed, can run if not already running */
+ *hash = work->flags >> IO_WQ_HASH_SHIFT;
+ if (!(wqe->hash_map & BIT_ULL(*hash))) {
+ wqe->hash_map |= BIT_ULL(*hash);
+ list_del(&work->list);
+ return work;
+ }
+ }
+
+ return NULL;
+}
+
+static void io_worker_handle_work(struct io_worker *worker)
+ __releases(wqe->lock)
+{
+ struct io_wq_work *work, *old_work = NULL, *put_work = NULL;
+ struct io_wqe *wqe = worker->wqe;
+ struct io_wq *wq = wqe->wq;
+
+ do {
+ unsigned hash = -1U;
+
+ /*
+ * If we got some work, mark us as busy. If we didn't, but
+ * the list isn't empty, it means we stalled on hashed work.
+ * Mark us stalled so we don't keep looking for work when we
+ * can't make progress, any work completion or insertion will
+ * clear the stalled flag.
+ */
+ work = io_get_next_work(wqe, &hash);
+ if (work)
+ __io_worker_busy(wqe, worker, work);
+ else if (!list_empty(&wqe->work_list))
+ wqe->flags |= IO_WQE_FLAG_STALLED;
+
+ spin_unlock_irq(&wqe->lock);
+ if (put_work && wq->put_work)
+ wq->put_work(old_work);
+ if (!work)
+ break;
+next:
+ /* flush any pending signals before assigning new work */
+ if (signal_pending(current))
+ flush_signals(current);
+
+ spin_lock_irq(&worker->lock);
+ worker->cur_work = work;
+ spin_unlock_irq(&worker->lock);
+
+ if ((work->flags & IO_WQ_WORK_NEEDS_FILES) &&
+ current->files != work->files) {
+ task_lock(current);
+ current->files = work->files;
+ task_unlock(current);
+ }
+ if ((work->flags & IO_WQ_WORK_NEEDS_USER) && !worker->mm &&
+ wq->mm && mmget_not_zero(wq->mm)) {
+ use_mm(wq->mm);
+ set_fs(USER_DS);
+ worker->mm = wq->mm;
+ }
+ if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
+ work->flags |= IO_WQ_WORK_CANCEL;
+ if (worker->mm)
+ work->flags |= IO_WQ_WORK_HAS_MM;
+
+ if (wq->get_work && !(work->flags & IO_WQ_WORK_INTERNAL)) {
+ put_work = work;
+ wq->get_work(work);
+ }
+
+ old_work = work;
+ work->func(&work);
+
+ spin_lock_irq(&worker->lock);
+ worker->cur_work = NULL;
+ spin_unlock_irq(&worker->lock);
+
+ spin_lock_irq(&wqe->lock);
+
+ if (hash != -1U) {
+ wqe->hash_map &= ~BIT_ULL(hash);
+ wqe->flags &= ~IO_WQE_FLAG_STALLED;
+ }
+ if (work && work != old_work) {
+ spin_unlock_irq(&wqe->lock);
+
+ if (put_work && wq->put_work) {
+ wq->put_work(put_work);
+ put_work = NULL;
+ }
+
+ /* dependent work not hashed */
+ hash = -1U;
+ goto next;
+ }
+ } while (1);
+}
+
+static int io_wqe_worker(void *data)
+{
+ struct io_worker *worker = data;
+ struct io_wqe *wqe = worker->wqe;
+ struct io_wq *wq = wqe->wq;
+ DEFINE_WAIT(wait);
+
+ io_worker_start(wqe, worker);
+
+ while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+ prepare_to_wait(&worker->wait, &wait, TASK_INTERRUPTIBLE);
+
+ spin_lock_irq(&wqe->lock);
+ if (io_wqe_run_queue(wqe)) {
+ __set_current_state(TASK_RUNNING);
+ io_worker_handle_work(worker);
+ continue;
+ }
+ /* drops the lock on success, retry */
+ if (__io_worker_idle(wqe, worker)) {
+ __release(&wqe->lock);
+ continue;
+ }
+ spin_unlock_irq(&wqe->lock);
+ if (signal_pending(current))
+ flush_signals(current);
+ if (schedule_timeout(WORKER_IDLE_TIMEOUT))
+ continue;
+ /* timed out, exit unless we're the fixed worker */
+ if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
+ !(worker->flags & IO_WORKER_F_FIXED))
+ break;
+ }
+
+ finish_wait(&worker->wait, &wait);
+
+ if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+ spin_lock_irq(&wqe->lock);
+ if (!list_empty(&wqe->work_list))
+ io_worker_handle_work(worker);
+ else
+ spin_unlock_irq(&wqe->lock);
+ }
+
+ io_worker_exit(worker);
+ return 0;
+}
+
+/*
+ * Called when a worker is scheduled in. Mark us as currently running.
+ */
+void io_wq_worker_running(struct task_struct *tsk)
+{
+ struct io_worker *worker = kthread_data(tsk);
+ struct io_wqe *wqe = worker->wqe;
+
+ if (!(worker->flags & IO_WORKER_F_UP))
+ return;
+ if (worker->flags & IO_WORKER_F_RUNNING)
+ return;
+ worker->flags |= IO_WORKER_F_RUNNING;
+ io_wqe_inc_running(wqe, worker);
+}
+
+/*
+ * Called when worker is going to sleep. If there are no workers currently
+ * running and we have work pending, wake up a free one or have the manager
+ * set one up.
+ */
+void io_wq_worker_sleeping(struct task_struct *tsk)
+{
+ struct io_worker *worker = kthread_data(tsk);
+ struct io_wqe *wqe = worker->wqe;
+
+ if (!(worker->flags & IO_WORKER_F_UP))
+ return;
+ if (!(worker->flags & IO_WORKER_F_RUNNING))
+ return;
+
+ worker->flags &= ~IO_WORKER_F_RUNNING;
+
+ spin_lock_irq(&wqe->lock);
+ io_wqe_dec_running(wqe, worker);
+ spin_unlock_irq(&wqe->lock);
+}
+
+static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+{
+ struct io_wqe_acct *acct =&wqe->acct[index];
+ struct io_worker *worker;
+
+ worker = kcalloc_node(1, sizeof(*worker), GFP_KERNEL, wqe->node);
+ if (!worker)
+ return;
+
+ refcount_set(&worker->ref, 1);
+ worker->nulls_node.pprev = NULL;
+ init_waitqueue_head(&worker->wait);
+ worker->wqe = wqe;
+ spin_lock_init(&worker->lock);
+
+ worker->task = kthread_create_on_node(io_wqe_worker, worker, wqe->node,
+ "io_wqe_worker-%d/%d", index, wqe->node);
+ if (IS_ERR(worker->task)) {
+ kfree(worker);
+ return;
+ }
+
+ spin_lock_irq(&wqe->lock);
+ hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
+ list_add_tail_rcu(&worker->all_list, &wqe->all_list);
+ worker->flags |= IO_WORKER_F_FREE;
+ if (index == IO_WQ_ACCT_BOUND)
+ worker->flags |= IO_WORKER_F_BOUND;
+ if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
+ worker->flags |= IO_WORKER_F_FIXED;
+ acct->nr_workers++;
+ spin_unlock_irq(&wqe->lock);
+
+ if (index == IO_WQ_ACCT_UNBOUND)
+ atomic_inc(&wq->user->processes);
+
+ wake_up_process(worker->task);
+}
+
+static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
+ __must_hold(wqe->lock)
+{
+ struct io_wqe_acct *acct = &wqe->acct[index];
+
+ /* always ensure we have one bounded worker */
+ if (index == IO_WQ_ACCT_BOUND && !acct->nr_workers)
+ return true;
+ /* if we have available workers or no work, no need */
+ if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
+ return false;
+ return acct->nr_workers < acct->max_workers;
+}
+
+/*
+ * Manager thread. Tasked with creating new workers, if we need them.
+ */
+static int io_wq_manager(void *data)
+{
+ struct io_wq *wq = data;
+
+ while (!kthread_should_stop()) {
+ int i;
+
+ for (i = 0; i < wq->nr_wqes; i++) {
+ struct io_wqe *wqe = wq->wqes[i];
+ bool fork_worker[2] = { false, false };
+
+ spin_lock_irq(&wqe->lock);
+ if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
+ fork_worker[IO_WQ_ACCT_BOUND] = true;
+ if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
+ fork_worker[IO_WQ_ACCT_UNBOUND] = true;
+ spin_unlock_irq(&wqe->lock);
+ if (fork_worker[IO_WQ_ACCT_BOUND])
+ create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
+ if (fork_worker[IO_WQ_ACCT_UNBOUND])
+ create_io_worker(wq, wqe, IO_WQ_ACCT_UNBOUND);
+ }
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ);
+ }
+
+ return 0;
+}
+
+static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
+ struct io_wq_work *work)
+{
+ bool free_worker;
+
+ if (!(work->flags & IO_WQ_WORK_UNBOUND))
+ return true;
+ if (atomic_read(&acct->nr_running))
+ return true;
+
+ rcu_read_lock();
+ free_worker = !hlist_nulls_empty(&wqe->free_list);
+ rcu_read_unlock();
+ if (free_worker)
+ return true;
+
+ if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers &&
+ !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN)))
+ return false;
+
+ return true;
+}
+
+static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
+{
+ struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
+ unsigned long flags;
+
+ /*
+ * Do early check to see if we need a new unbound worker, and if we do,
+ * if we're allowed to do so. This isn't 100% accurate as there's a
+ * gap between this check and incrementing the value, but that's OK.
+ * It's close enough to not be an issue, fork() has the same delay.
+ */
+ if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
+ work->flags |= IO_WQ_WORK_CANCEL;
+ work->func(&work);
+ return;
+ }
+
+ spin_lock_irqsave(&wqe->lock, flags);
+ list_add_tail(&work->list, &wqe->work_list);
+ wqe->flags &= ~IO_WQE_FLAG_STALLED;
+ spin_unlock_irqrestore(&wqe->lock, flags);
+
+ if (!atomic_read(&acct->nr_running))
+ io_wqe_wake_worker(wqe, acct);
+}
+
+void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
+{
+ struct io_wqe *wqe = wq->wqes[numa_node_id()];
+
+ io_wqe_enqueue(wqe, work);
+}
+
+/*
+ * Enqueue work, hashed by some key. Work items that hash to the same value
+ * will not be done in parallel. Used to limit concurrent writes, generally
+ * hashed by inode.
+ */
+void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val)
+{
+ struct io_wqe *wqe = wq->wqes[numa_node_id()];
+ unsigned bit;
+
+
+ bit = hash_ptr(val, IO_WQ_HASH_ORDER);
+ work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
+ io_wqe_enqueue(wqe, work);
+}
+
+static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
+{
+ send_sig(SIGINT, worker->task, 1);
+ return false;
+}
+
+/*
+ * Iterate the passed in list and call the specific function for each
+ * worker that isn't exiting
+ */
+static bool io_wq_for_each_worker(struct io_wqe *wqe,
+ bool (*func)(struct io_worker *, void *),
+ void *data)
+{
+ struct io_worker *worker;
+ bool ret = false;
+
+ list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
+ if (io_worker_get(worker)) {
+ ret = func(worker, data);
+ io_worker_release(worker);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+void io_wq_cancel_all(struct io_wq *wq)
+{
+ int i;
+
+ set_bit(IO_WQ_BIT_CANCEL, &wq->state);
+
+ /*
+ * Browse both lists, as there's a gap between handing work off
+ * to a worker and the worker putting itself on the busy_list
+ */
+ rcu_read_lock();
+ for (i = 0; i < wq->nr_wqes; i++) {
+ struct io_wqe *wqe = wq->wqes[i];
+
+ io_wq_for_each_worker(wqe, io_wqe_worker_send_sig, NULL);
+ }
+ rcu_read_unlock();
+}
+
+struct io_cb_cancel_data {
+ struct io_wqe *wqe;
+ work_cancel_fn *cancel;
+ void *caller_data;
+};
+
+static bool io_work_cancel(struct io_worker *worker, void *cancel_data)
+{
+ struct io_cb_cancel_data *data = cancel_data;
+ unsigned long flags;
+ bool ret = false;
+
+ /*
+ * Hold the lock to avoid ->cur_work going out of scope, caller
+ * may dereference the passed in work.
+ */
+ spin_lock_irqsave(&worker->lock, flags);
+ if (worker->cur_work &&
+ data->cancel(worker->cur_work, data->caller_data)) {
+ send_sig(SIGINT, worker->task, 1);
+ ret = true;
+ }
+ spin_unlock_irqrestore(&worker->lock, flags);
+
+ return ret;
+}
+
+static enum io_wq_cancel io_wqe_cancel_cb_work(struct io_wqe *wqe,
+ work_cancel_fn *cancel,
+ void *cancel_data)
+{
+ struct io_cb_cancel_data data = {
+ .wqe = wqe,
+ .cancel = cancel,
+ .caller_data = cancel_data,
+ };
+ struct io_wq_work *work;
+ unsigned long flags;
+ bool found = false;
+
+ spin_lock_irqsave(&wqe->lock, flags);
+ list_for_each_entry(work, &wqe->work_list, list) {
+ if (cancel(work, cancel_data)) {
+ list_del(&work->list);
+ found = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&wqe->lock, flags);
+
+ if (found) {
+ work->flags |= IO_WQ_WORK_CANCEL;
+ work->func(&work);
+ return IO_WQ_CANCEL_OK;
+ }
+
+ rcu_read_lock();
+ found = io_wq_for_each_worker(wqe, io_work_cancel, &data);
+ rcu_read_unlock();
+ return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
+}
+
+enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
+ void *data)
+{
+ enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
+ int i;
+
+ for (i = 0; i < wq->nr_wqes; i++) {
+ struct io_wqe *wqe = wq->wqes[i];
+
+ ret = io_wqe_cancel_cb_work(wqe, cancel, data);
+ if (ret != IO_WQ_CANCEL_NOTFOUND)
+ break;
+ }
+
+ return ret;
+}
+
+static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
+{
+ struct io_wq_work *work = data;
+ unsigned long flags;
+ bool ret = false;
+
+ if (worker->cur_work != work)
+ return false;
+
+ spin_lock_irqsave(&worker->lock, flags);
+ if (worker->cur_work == work) {
+ send_sig(SIGINT, worker->task, 1);
+ ret = true;
+ }
+ spin_unlock_irqrestore(&worker->lock, flags);
+
+ return ret;
+}
+
+static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
+ struct io_wq_work *cwork)
+{
+ struct io_wq_work *work;
+ unsigned long flags;
+ bool found = false;
+
+ cwork->flags |= IO_WQ_WORK_CANCEL;
+
+ /*
+ * First check pending list, if we're lucky we can just remove it
+ * from there. CANCEL_OK means that the work is returned as-new,
+ * no completion will be posted for it.
+ */
+ spin_lock_irqsave(&wqe->lock, flags);
+ list_for_each_entry(work, &wqe->work_list, list) {
+ if (work == cwork) {
+ list_del(&work->list);
+ found = true;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&wqe->lock, flags);
+
+ if (found) {
+ work->flags |= IO_WQ_WORK_CANCEL;
+ work->func(&work);
+ return IO_WQ_CANCEL_OK;
+ }
+
+ /*
+ * Now check if a free (going busy) or busy worker has the work
+ * currently running. If we find it there, we'll return CANCEL_RUNNING
+ * as an indication that we attempte to signal cancellation. The
+ * completion will run normally in this case.
+ */
+ rcu_read_lock();
+ found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, cwork);
+ rcu_read_unlock();
+ return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
+}
+
+enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
+{
+ enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
+ int i;
+
+ for (i = 0; i < wq->nr_wqes; i++) {
+ struct io_wqe *wqe = wq->wqes[i];
+
+ ret = io_wqe_cancel_work(wqe, cwork);
+ if (ret != IO_WQ_CANCEL_NOTFOUND)
+ break;
+ }
+
+ return ret;
+}
+
+struct io_wq_flush_data {
+ struct io_wq_work work;
+ struct completion done;
+};
+
+static void io_wq_flush_func(struct io_wq_work **workptr)
+{
+ struct io_wq_work *work = *workptr;
+ struct io_wq_flush_data *data;
+
+ data = container_of(work, struct io_wq_flush_data, work);
+ complete(&data->done);
+}
+
+/*
+ * Doesn't wait for previously queued work to finish. When this completes,
+ * it just means that previously queued work was started.
+ */
+void io_wq_flush(struct io_wq *wq)
+{
+ struct io_wq_flush_data data;
+ int i;
+
+ for (i = 0; i < wq->nr_wqes; i++) {
+ struct io_wqe *wqe = wq->wqes[i];
+
+ init_completion(&data.done);
+ INIT_IO_WORK(&data.work, io_wq_flush_func);
+ data.work.flags |= IO_WQ_WORK_INTERNAL;
+ io_wqe_enqueue(wqe, &data.work);
+ wait_for_completion(&data.done);
+ }
+}
+
+struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm,
+ struct user_struct *user, get_work_fn *get_work,
+ put_work_fn *put_work)
+{
+ int ret = -ENOMEM, i, node;
+ struct io_wq *wq;
+
+ wq = kcalloc(1, sizeof(*wq), GFP_KERNEL);
+ if (!wq)
+ return ERR_PTR(-ENOMEM);
+
+ wq->nr_wqes = num_online_nodes();
+ wq->wqes = kcalloc(wq->nr_wqes, sizeof(struct io_wqe *), GFP_KERNEL);
+ if (!wq->wqes) {
+ kfree(wq);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ wq->get_work = get_work;
+ wq->put_work = put_work;
+
+ /* caller must already hold a reference to this */
+ wq->user = user;
+
+ i = 0;
+ refcount_set(&wq->refs, wq->nr_wqes);
+ for_each_online_node(node) {
+ struct io_wqe *wqe;
+
+ wqe = kcalloc_node(1, sizeof(struct io_wqe), GFP_KERNEL, node);
+ if (!wqe)
+ break;
+ wq->wqes[i] = wqe;
+ wqe->node = node;
+ wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
+ atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
+ if (user) {
+ wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
+ task_rlimit(current, RLIMIT_NPROC);
+ }
+ atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
+ wqe->node = node;
+ wqe->wq = wq;
+ spin_lock_init(&wqe->lock);
+ INIT_LIST_HEAD(&wqe->work_list);
+ INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
+ INIT_HLIST_NULLS_HEAD(&wqe->busy_list, 1);
+ INIT_LIST_HEAD(&wqe->all_list);
+
+ i++;
+ }
+
+ init_completion(&wq->done);
+
+ if (i != wq->nr_wqes)
+ goto err;
+
+ /* caller must have already done mmgrab() on this mm */
+ wq->mm = mm;
+
+ wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
+ if (!IS_ERR(wq->manager)) {
+ wake_up_process(wq->manager);
+ return wq;
+ }
+
+ ret = PTR_ERR(wq->manager);
+ wq->manager = NULL;
+err:
+ complete(&wq->done);
+ io_wq_destroy(wq);
+ return ERR_PTR(ret);
+}
+
+static bool io_wq_worker_wake(struct io_worker *worker, void *data)
+{
+ wake_up_process(worker->task);
+ return false;
+}
+
+void io_wq_destroy(struct io_wq *wq)
+{
+ int i;
+
+ if (wq->manager) {
+ set_bit(IO_WQ_BIT_EXIT, &wq->state);
+ kthread_stop(wq->manager);
+ }
+
+ rcu_read_lock();
+ for (i = 0; i < wq->nr_wqes; i++) {
+ struct io_wqe *wqe = wq->wqes[i];
+
+ if (!wqe)
+ continue;
+ io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL);
+ }
+ rcu_read_unlock();
+
+ wait_for_completion(&wq->done);
+
+ for (i = 0; i < wq->nr_wqes; i++)
+ kfree(wq->wqes[i]);
+ kfree(wq->wqes);
+ kfree(wq);
+}
diff --git a/fs/io-wq.h b/fs/io-wq.h
new file mode 100644
index 000000000000..4b29f922f80c
--- /dev/null
+++ b/fs/io-wq.h
@@ -0,0 +1,74 @@
+#ifndef INTERNAL_IO_WQ_H
+#define INTERNAL_IO_WQ_H
+
+struct io_wq;
+
+enum {
+ IO_WQ_WORK_CANCEL = 1,
+ IO_WQ_WORK_HAS_MM = 2,
+ IO_WQ_WORK_HASHED = 4,
+ IO_WQ_WORK_NEEDS_USER = 8,
+ IO_WQ_WORK_NEEDS_FILES = 16,
+ IO_WQ_WORK_UNBOUND = 32,
+ IO_WQ_WORK_INTERNAL = 64,
+
+ IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
+};
+
+enum io_wq_cancel {
+ IO_WQ_CANCEL_OK, /* cancelled before started */
+ IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */
+ IO_WQ_CANCEL_NOTFOUND, /* work not found */
+};
+
+struct io_wq_work {
+ struct list_head list;
+ void (*func)(struct io_wq_work **);
+ unsigned flags;
+ struct files_struct *files;
+};
+
+#define INIT_IO_WORK(work, _func) \
+ do { \
+ (work)->func = _func; \
+ (work)->flags = 0; \
+ (work)->files = NULL; \
+ } while (0) \
+
+typedef void (get_work_fn)(struct io_wq_work *);
+typedef void (put_work_fn)(struct io_wq_work *);
+
+struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm,
+ struct user_struct *user,
+ get_work_fn *get_work, put_work_fn *put_work);
+void io_wq_destroy(struct io_wq *wq);
+
+void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
+void io_wq_enqueue_hashed(struct io_wq *wq, struct io_wq_work *work, void *val);
+void io_wq_flush(struct io_wq *wq);
+
+void io_wq_cancel_all(struct io_wq *wq);
+enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
+
+typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
+
+enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
+ void *data);
+
+#if defined(CONFIG_IO_WQ)
+extern void io_wq_worker_sleeping(struct task_struct *);
+extern void io_wq_worker_running(struct task_struct *);
+#else
+static inline void io_wq_worker_sleeping(struct task_struct *tsk)
+{
+}
+static inline void io_wq_worker_running(struct task_struct *tsk)
+{
+}
+#endif
+
+static inline bool io_wq_current_is_worker(void)
+{
+ return in_task() && (current->flags & PF_IO_WORKER);
+}
+#endif
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8a0381f1a43b..4c030a92de79 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -56,7 +56,6 @@
#include <linux/mmu_context.h>
#include <linux/percpu.h>
#include <linux/slab.h>
-#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/blkdev.h>
#include <linux/bvec.h>
@@ -71,12 +70,24 @@
#include <linux/sizes.h>
#include <linux/hugetlb.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/io_uring.h>
+
#include <uapi/linux/io_uring.h>
#include "internal.h"
+#include "io-wq.h"
#define IORING_MAX_ENTRIES 32768
-#define IORING_MAX_FIXED_FILES 1024
+#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES)
+
+/*
+ * Shift of 9 is 512 entries, or exactly one page on 64-bit archs
+ */
+#define IORING_FILE_TABLE_SHIFT 9
+#define IORING_MAX_FILES_TABLE (1U << IORING_FILE_TABLE_SHIFT)
+#define IORING_FILE_TABLE_MASK (IORING_MAX_FILES_TABLE - 1)
+#define IORING_MAX_FIXED_FILES (64 * IORING_MAX_FILES_TABLE)
struct io_uring {
u32 head ____cacheline_aligned_in_smp;
@@ -161,14 +172,8 @@ struct io_mapped_ubuf {
unsigned int nr_bvecs;
};
-struct async_list {
- spinlock_t lock;
- atomic_t cnt;
- struct list_head list;
-
- struct file *file;
- off_t io_start;
- size_t io_len;
+struct fixed_file_table {
+ struct file **files;
};
struct io_ring_ctx {
@@ -180,6 +185,7 @@ struct io_ring_ctx {
unsigned int flags;
bool compat;
bool account_mem;
+ bool cq_overflow_flushed;
/*
* Ring buffer of indices into array of io_uring_sqe, which is
@@ -197,37 +203,31 @@ struct io_ring_ctx {
unsigned sq_entries;
unsigned sq_mask;
unsigned sq_thread_idle;
+ unsigned cached_sq_dropped;
+ atomic_t cached_cq_overflow;
struct io_uring_sqe *sq_sqes;
struct list_head defer_list;
struct list_head timeout_list;
+ struct list_head cq_overflow_list;
+
+ wait_queue_head_t inflight_wait;
} ____cacheline_aligned_in_smp;
+ struct io_rings *rings;
+
/* IO offload */
- struct workqueue_struct *sqo_wq[2];
+ struct io_wq *io_wq;
struct task_struct *sqo_thread; /* if using sq thread polling */
struct mm_struct *sqo_mm;
wait_queue_head_t sqo_wait;
- struct completion sqo_thread_started;
-
- struct {
- unsigned cached_cq_tail;
- unsigned cq_entries;
- unsigned cq_mask;
- struct wait_queue_head cq_wait;
- struct fasync_struct *cq_fasync;
- struct eventfd_ctx *cq_ev_fd;
- atomic_t cq_timeouts;
- } ____cacheline_aligned_in_smp;
-
- struct io_rings *rings;
/*
* If used, fixed file set. Writers must ensure that ->refs is dead,
* readers must ensure that ->refs is alive as long as the file* is
* used. Only updated through io_uring_register(2).
*/
- struct file **user_files;
+ struct fixed_file_table *file_table;
unsigned nr_user_files;
/* if used, fixed mapped user buffers */
@@ -236,7 +236,25 @@ struct io_ring_ctx {
struct user_struct *user;
- struct completion ctx_done;
+ /* 0 is for ctx quiesce/reinit/free, 1 is for sqo_thread started */
+ struct completion *completions;
+
+ /* if all else fails... */
+ struct io_kiocb *fallback_req;
+
+#if defined(CONFIG_UNIX)
+ struct socket *ring_sock;
+#endif
+
+ struct {
+ unsigned cached_cq_tail;
+ unsigned cq_entries;
+ unsigned cq_mask;
+ atomic_t cq_timeouts;
+ struct wait_queue_head cq_wait;
+ struct fasync_struct *cq_fasync;
+ struct eventfd_ctx *cq_ev_fd;
+ } ____cacheline_aligned_in_smp;
struct {
struct mutex uring_lock;
@@ -253,22 +271,20 @@ struct io_ring_ctx {
* manipulate the list, hence no extra locking is needed there.
*/
struct list_head poll_list;
- struct list_head cancel_list;
- } ____cacheline_aligned_in_smp;
+ struct rb_root cancel_tree;
- struct async_list pending_async[2];
-
-#if defined(CONFIG_UNIX)
- struct socket *ring_sock;
-#endif
+ spinlock_t inflight_lock;
+ struct list_head inflight_list;
+ } ____cacheline_aligned_in_smp;
};
struct sqe_submit {
const struct io_uring_sqe *sqe;
- unsigned short index;
+ struct file *ring_file;
+ int ring_fd;
u32 sequence;
bool has_user;
- bool needs_lock;
+ bool in_async;
bool needs_fixed_file;
};
@@ -307,7 +323,10 @@ struct io_kiocb {
struct sqe_submit submit;
struct io_ring_ctx *ctx;
- struct list_head list;
+ union {
+ struct list_head list;
+ struct rb_node rb_node;
+ };
struct list_head link_list;
unsigned int flags;
refcount_t refs;
@@ -318,15 +337,22 @@ struct io_kiocb {
#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
#define REQ_F_IO_DRAINED 32 /* drain done */
#define REQ_F_LINK 64 /* linked sqes */
-#define REQ_F_LINK_DONE 128 /* linked sqes done */
+#define REQ_F_LINK_TIMEOUT 128 /* has linked timeout */
#define REQ_F_FAIL_LINK 256 /* fail rest of links */
#define REQ_F_SHADOW_DRAIN 512 /* link-drain shadow req */
#define REQ_F_TIMEOUT 1024 /* timeout request */
+#define REQ_F_ISREG 2048 /* regular file */
+#define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
+#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */
+#define REQ_F_INFLIGHT 16384 /* on inflight list */
+#define REQ_F_COMP_LOCKED 32768 /* completion under lock */
u64 user_data;
u32 result;
u32 sequence;
- struct work_struct work;
+ struct list_head inflight_entry;
+
+ struct io_wq_work work;
};
#define IO_PLUG_THRESHOLD 2
@@ -352,10 +378,11 @@ struct io_submit_state {
unsigned int ios_left;
};
-static void io_sq_wq_submit_work(struct work_struct *work);
-static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
- long res);
+static void io_wq_submit_work(struct io_wq_work **workptr);
+static void io_cqring_fill_event(struct io_kiocb *req, long res);
static void __io_free_req(struct io_kiocb *req);
+static void io_put_req(struct io_kiocb *req);
+static void io_double_put_req(struct io_kiocb *req);
static struct kmem_cache *req_cachep;
@@ -378,64 +405,75 @@ static void io_ring_ctx_ref_free(struct percpu_ref *ref)
{
struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
- complete(&ctx->ctx_done);
+ complete(&ctx->completions[0]);
}
static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
{
struct io_ring_ctx *ctx;
- int i;
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return NULL;
+ ctx->fallback_req = kmem_cache_alloc(req_cachep, GFP_KERNEL);
+ if (!ctx->fallback_req)
+ goto err;
+
+ ctx->completions = kmalloc(2 * sizeof(struct completion), GFP_KERNEL);
+ if (!ctx->completions)
+ goto err;
+
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
- PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
- kfree(ctx);
- return NULL;
- }
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
+ goto err;
ctx->flags = p->flags;
init_waitqueue_head(&ctx->cq_wait);
- init_completion(&ctx->ctx_done);
- init_completion(&ctx->sqo_thread_started);
+ INIT_LIST_HEAD(&ctx->cq_overflow_list);
+ init_completion(&ctx->completions[0]);
+ init_completion(&ctx->completions[1]);
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
- for (i = 0; i < ARRAY_SIZE(ctx->pending_async); i++) {
- spin_lock_init(&ctx->pending_async[i].lock);
- INIT_LIST_HEAD(&ctx->pending_async[i].list);
- atomic_set(&ctx->pending_async[i].cnt, 0);
- }
spin_lock_init(&ctx->completion_lock);
INIT_LIST_HEAD(&ctx->poll_list);
- INIT_LIST_HEAD(&ctx->cancel_list);
+ ctx->cancel_tree = RB_ROOT;
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
+ init_waitqueue_head(&ctx->inflight_wait);
+ spin_lock_init(&ctx->inflight_lock);
+ INIT_LIST_HEAD(&ctx->inflight_list);
return ctx;
+err:
+ if (ctx->fallback_req)
+ kmem_cache_free(req_cachep, ctx->fallback_req);
+ kfree(ctx->completions);
+ kfree(ctx);
+ return NULL;
}
-static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
+static inline bool __req_need_defer(struct io_kiocb *req)
{
- /* timeout requests always honor sequence */
- if (!(req->flags & REQ_F_TIMEOUT) &&
- (req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
- return false;
+ struct io_ring_ctx *ctx = req->ctx;
- return req->sequence != ctx->cached_cq_tail + ctx->rings->sq_dropped;
+ return req->sequence != ctx->cached_cq_tail + ctx->cached_sq_dropped
+ + atomic_read(&ctx->cached_cq_overflow);
}
-static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx,
- struct list_head *list)
+static inline bool req_need_defer(struct io_kiocb *req)
{
- struct io_kiocb *req;
+ if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) == REQ_F_IO_DRAIN)
+ return __req_need_defer(req);
- if (list_empty(list))
- return NULL;
+ return false;
+}
+
+static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
+{
+ struct io_kiocb *req;
- req = list_first_entry(list, struct io_kiocb, list);
- if (!io_sequence_defer(ctx, req)) {
+ req = list_first_entry_or_null(&ctx->defer_list, struct io_kiocb, list);
+ if (req && !req_need_defer(req)) {
list_del_init(&req->list);
return req;
}
@@ -443,14 +481,21 @@ static struct io_kiocb *__io_get_deferred_req(struct io_ring_ctx *ctx,
return NULL;
}
-static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
-{
- return __io_get_deferred_req(ctx, &ctx->defer_list);
-}
-
static struct io_kiocb *io_get_timeout_req(struct io_ring_ctx *ctx)
{
- return __io_get_deferred_req(ctx, &ctx->timeout_list);
+ struct io_kiocb *req;
+
+ req = list_first_entry_or_null(&ctx->timeout_list, struct io_kiocb, list);
+ if (req) {
+ if (req->flags & REQ_F_TIMEOUT_NOSEQ)
+ return NULL;
+ if (!__req_need_defer(req)) {
+ list_del_init(&req->list);
+ return req;
+ }
+ }
+
+ return NULL;
}
static void __io_commit_cqring(struct io_ring_ctx *ctx)
@@ -468,21 +513,59 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
}
}
-static inline void io_queue_async_work(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
+static inline bool io_sqe_needs_user(const struct io_uring_sqe *sqe)
{
- int rw = 0;
+ u8 opcode = READ_ONCE(sqe->opcode);
+
+ return !(opcode == IORING_OP_READ_FIXED ||
+ opcode == IORING_OP_WRITE_FIXED);
+}
+
+static inline bool io_prep_async_work(struct io_kiocb *req)
+{
+ bool do_hashed = false;
if (req->submit.sqe) {
switch (req->submit.sqe->opcode) {
case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
- rw = !(req->rw.ki_flags & IOCB_DIRECT);
+ do_hashed = true;
+ /* fall-through */
+ case IORING_OP_READV:
+ case IORING_OP_READ_FIXED:
+ case IORING_OP_SENDMSG:
+ case IORING_OP_RECVMSG:
+ case IORING_OP_ACCEPT:
+ case IORING_OP_POLL_ADD:
+ /*
+ * We know REQ_F_ISREG is not set on some of these
+ * opcodes, but this enables us to keep the check in
+ * just one place.
+ */
+ if (!(req->flags & REQ_F_ISREG))
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
break;
}
+ if (io_sqe_needs_user(req->submit.sqe))
+ req->work.flags |= IO_WQ_WORK_NEEDS_USER;
}
- queue_work(ctx->sqo_wq[rw], &req->work);
+ return do_hashed;
+}
+
+static inline void io_queue_async_work(struct io_kiocb *req)
+{
+ bool do_hashed = io_prep_async_work(req);
+ struct io_ring_ctx *ctx = req->ctx;
+
+ trace_io_uring_queue_async_work(ctx, do_hashed, req, &req->work,
+ req->flags);
+ if (!do_hashed) {
+ io_wq_enqueue(ctx->io_wq, &req->work);
+ } else {
+ io_wq_enqueue_hashed(ctx->io_wq, &req->work,
+ file_inode(req->file));
+ }
}
static void io_kill_timeout(struct io_kiocb *req)
@@ -492,9 +575,9 @@ static void io_kill_timeout(struct io_kiocb *req)
ret = hrtimer_try_to_cancel(&req->timeout.timer);
if (ret != -1) {
atomic_inc(&req->ctx->cq_timeouts);
- list_del(&req->list);
- io_cqring_fill_event(req->ctx, req->user_data, 0);
- __io_free_req(req);
+ list_del_init(&req->list);
+ io_cqring_fill_event(req, 0);
+ io_put_req(req);
}
}
@@ -524,7 +607,7 @@ static void io_commit_cqring(struct io_ring_ctx *ctx)
continue;
}
req->flags |= REQ_F_IO_DRAINED;
- io_queue_async_work(ctx, req);
+ io_queue_async_work(req);
}
}
@@ -546,57 +629,122 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
return &rings->cqes[tail & ctx->cq_mask];
}
-static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data,
- long res)
+static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+{
+ if (waitqueue_active(&ctx->wait))
+ wake_up(&ctx->wait);
+ if (waitqueue_active(&ctx->sqo_wait))
+ wake_up(&ctx->sqo_wait);
+ if (ctx->cq_ev_fd)
+ eventfd_signal(ctx->cq_ev_fd, 1);
+}
+
+static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
+{
+ struct io_rings *rings = ctx->rings;
+ struct io_uring_cqe *cqe;
+ struct io_kiocb *req;
+ unsigned long flags;
+ LIST_HEAD(list);
+
+ if (!force) {
+ if (list_empty_careful(&ctx->cq_overflow_list))
+ return;
+ if ((ctx->cached_cq_tail - READ_ONCE(rings->cq.head) ==
+ rings->cq_ring_entries))
+ return;
+ }
+
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+
+ /* if force is set, the ring is going away. always drop after that */
+ if (force)
+ ctx->cq_overflow_flushed = true;
+
+ while (!list_empty(&ctx->cq_overflow_list)) {
+ cqe = io_get_cqring(ctx);
+ if (!cqe && !force)
+ break;
+
+ req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
+ list);
+ list_move(&req->list, &list);
+ if (cqe) {
+ WRITE_ONCE(cqe->user_data, req->user_data);
+ WRITE_ONCE(cqe->res, req->result);
+ WRITE_ONCE(cqe->flags, 0);
+ } else {
+ WRITE_ONCE(ctx->rings->cq_overflow,
+ atomic_inc_return(&ctx->cached_cq_overflow));
+ }
+ }
+
+ io_commit_cqring(ctx);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ io_cqring_ev_posted(ctx);
+
+ while (!list_empty(&list)) {
+ req = list_first_entry(&list, struct io_kiocb, list);
+ list_del(&req->list);
+ io_put_req(req);
+ }
+}
+
+static void io_cqring_fill_event(struct io_kiocb *req, long res)
{
+ struct io_ring_ctx *ctx = req->ctx;
struct io_uring_cqe *cqe;
+ trace_io_uring_complete(ctx, req->user_data, res);
+
/*
* If we can't get a cq entry, userspace overflowed the
* submission (by quite a lot). Increment the overflow count in
* the ring.
*/
cqe = io_get_cqring(ctx);
- if (cqe) {
- WRITE_ONCE(cqe->user_data, ki_user_data);
+ if (likely(cqe)) {
+ WRITE_ONCE(cqe->user_data, req->user_data);
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, 0);
+ } else if (ctx->cq_overflow_flushed) {
+ WRITE_ONCE(ctx->rings->cq_overflow,
+ atomic_inc_return(&ctx->cached_cq_overflow));
} else {
- unsigned overflow = READ_ONCE(ctx->rings->cq_overflow);
-
- WRITE_ONCE(ctx->rings->cq_overflow, overflow + 1);
+ refcount_inc(&req->refs);
+ req->result = res;
+ list_add_tail(&req->list, &ctx->cq_overflow_list);
}
}
-static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
-{
- if (waitqueue_active(&ctx->wait))
- wake_up(&ctx->wait);
- if (waitqueue_active(&ctx->sqo_wait))
- wake_up(&ctx->sqo_wait);
- if (ctx->cq_ev_fd)
- eventfd_signal(ctx->cq_ev_fd, 1);
-}
-
-static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data,
- long res)
+static void io_cqring_add_event(struct io_kiocb *req, long res)
{
+ struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
spin_lock_irqsave(&ctx->completion_lock, flags);
- io_cqring_fill_event(ctx, user_data, res);
+ io_cqring_fill_event(req, res);
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
}
-static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs)
+static inline bool io_is_fallback_req(struct io_kiocb *req)
{
- percpu_ref_put_many(&ctx->refs, refs);
+ return req == (struct io_kiocb *)
+ ((unsigned long) req->ctx->fallback_req & ~1UL);
+}
- if (waitqueue_active(&ctx->wait))
- wake_up(&ctx->wait);
+static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
+{
+ struct io_kiocb *req;
+
+ req = ctx->fallback_req;
+ if (!test_and_set_bit_lock(0, (unsigned long *) ctx->fallback_req))
+ return req;
+
+ return NULL;
}
static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
@@ -611,7 +759,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
if (!state) {
req = kmem_cache_alloc(req_cachep, gfp);
if (unlikely(!req))
- goto out;
+ goto fallback;
} else if (!state->free_reqs) {
size_t sz;
int ret;
@@ -626,7 +774,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
if (unlikely(ret <= 0)) {
state->reqs[0] = kmem_cache_alloc(req_cachep, gfp);
if (!state->reqs[0])
- goto out;
+ goto fallback;
ret = 1;
}
state->free_reqs = ret - 1;
@@ -638,15 +786,20 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
state->cur_req++;
}
+got_it:
req->file = NULL;
req->ctx = ctx;
req->flags = 0;
/* one is dropped after submission, the other at completion */
refcount_set(&req->refs, 2);
req->result = 0;
+ INIT_IO_WORK(&req->work, io_wq_submit_work);
return req;
-out:
- io_ring_drop_ctx_refs(ctx, 1);
+fallback:
+ req = io_get_fallback_req(ctx);
+ if (req)
+ goto got_it;
+ percpu_ref_put(&ctx->refs);
return NULL;
}
@@ -654,22 +807,55 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
{
if (*nr) {
kmem_cache_free_bulk(req_cachep, *nr, reqs);
- io_ring_drop_ctx_refs(ctx, *nr);
+ percpu_ref_put_many(&ctx->refs, *nr);
*nr = 0;
}
}
static void __io_free_req(struct io_kiocb *req)
{
+ struct io_ring_ctx *ctx = req->ctx;
+
if (req->file && !(req->flags & REQ_F_FIXED_FILE))
fput(req->file);
- io_ring_drop_ctx_refs(req->ctx, 1);
- kmem_cache_free(req_cachep, req);
+ if (req->flags & REQ_F_INFLIGHT) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->inflight_lock, flags);
+ list_del(&req->inflight_entry);
+ if (waitqueue_active(&ctx->inflight_wait))
+ wake_up(&ctx->inflight_wait);
+ spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+ }
+ percpu_ref_put(&ctx->refs);
+ if (likely(!io_is_fallback_req(req)))
+ kmem_cache_free(req_cachep, req);
+ else
+ clear_bit_unlock(0, (unsigned long *) ctx->fallback_req);
+}
+
+static bool io_link_cancel_timeout(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ int ret;
+
+ ret = hrtimer_try_to_cancel(&req->timeout.timer);
+ if (ret != -1) {
+ io_cqring_fill_event(req, -ECANCELED);
+ io_commit_cqring(ctx);
+ req->flags &= ~REQ_F_LINK;
+ io_put_req(req);
+ return true;
+ }
+
+ return false;
}
-static void io_req_link_next(struct io_kiocb *req)
+static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
{
+ struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *nxt;
+ bool wake_ev = false;
/*
* The list should never be empty when we are called here. But could
@@ -677,18 +863,35 @@ static void io_req_link_next(struct io_kiocb *req)
* safe side.
*/
nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list);
- if (nxt) {
- list_del(&nxt->list);
+ while (nxt) {
+ list_del_init(&nxt->list);
if (!list_empty(&req->link_list)) {
INIT_LIST_HEAD(&nxt->link_list);
list_splice(&req->link_list, &nxt->link_list);
nxt->flags |= REQ_F_LINK;
}
- nxt->flags |= REQ_F_LINK_DONE;
- INIT_WORK(&nxt->work, io_sq_wq_submit_work);
- io_queue_async_work(req->ctx, nxt);
+ /*
+ * If we're in async work, we can continue processing the chain
+ * in this context instead of having to queue up new async work.
+ */
+ if (req->flags & REQ_F_LINK_TIMEOUT) {
+ wake_ev = io_link_cancel_timeout(nxt);
+
+ /* we dropped this link, get next */
+ nxt = list_first_entry_or_null(&req->link_list,
+ struct io_kiocb, list);
+ } else if (nxtptr && io_wq_current_is_worker()) {
+ *nxtptr = nxt;
+ break;
+ } else {
+ io_queue_async_work(nxt);
+ break;
+ }
}
+
+ if (wake_ev)
+ io_cqring_ev_posted(ctx);
}
/*
@@ -696,48 +899,131 @@ static void io_req_link_next(struct io_kiocb *req)
*/
static void io_fail_links(struct io_kiocb *req)
{
+ struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *link;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->completion_lock, flags);
while (!list_empty(&req->link_list)) {
link = list_first_entry(&req->link_list, struct io_kiocb, list);
- list_del(&link->list);
+ list_del_init(&link->list);
+
+ trace_io_uring_fail_link(req, link);
- io_cqring_add_event(req->ctx, link->user_data, -ECANCELED);
- __io_free_req(link);
+ if ((req->flags & REQ_F_LINK_TIMEOUT) &&
+ link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
+ io_link_cancel_timeout(link);
+ } else {
+ io_cqring_fill_event(link, -ECANCELED);
+ io_double_put_req(link);
+ }
}
+
+ io_commit_cqring(ctx);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ io_cqring_ev_posted(ctx);
}
-static void io_free_req(struct io_kiocb *req)
+static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
{
+ if (likely(!(req->flags & REQ_F_LINK))) {
+ __io_free_req(req);
+ return;
+ }
+
/*
* If LINK is set, we have dependent requests in this chain. If we
* didn't fail this request, queue the first one up, moving any other
* dependencies to the next request. In case of failure, fail the rest
* of the chain.
*/
- if (req->flags & REQ_F_LINK) {
- if (req->flags & REQ_F_FAIL_LINK)
- io_fail_links(req);
- else
- io_req_link_next(req);
+ if (req->flags & REQ_F_FAIL_LINK) {
+ io_fail_links(req);
+ } else if ((req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_COMP_LOCKED)) ==
+ REQ_F_LINK_TIMEOUT) {
+ struct io_ring_ctx *ctx = req->ctx;
+ unsigned long flags;
+
+ /*
+ * If this is a timeout link, we could be racing with the
+ * timeout timer. Grab the completion lock for this case to
+ * protect against that.
+ */
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ io_req_link_next(req, nxt);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ } else {
+ io_req_link_next(req, nxt);
}
__io_free_req(req);
}
+static void io_free_req(struct io_kiocb *req)
+{
+ io_free_req_find_next(req, NULL);
+}
+
+/*
+ * Drop reference to request, return next in chain (if there is one) if this
+ * was the last reference to this request.
+ */
+static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
+{
+ struct io_kiocb *nxt = NULL;
+
+ if (refcount_dec_and_test(&req->refs))
+ io_free_req_find_next(req, &nxt);
+
+ if (nxt) {
+ if (nxtptr)
+ *nxtptr = nxt;
+ else
+ io_queue_async_work(nxt);
+ }
+}
+
static void io_put_req(struct io_kiocb *req)
{
if (refcount_dec_and_test(&req->refs))
io_free_req(req);
}
-static unsigned io_cqring_events(struct io_rings *rings)
+static void io_double_put_req(struct io_kiocb *req)
+{
+ /* drop both submit and complete references */
+ if (refcount_sub_and_test(2, &req->refs))
+ __io_free_req(req);
+}
+
+static unsigned io_cqring_events(struct io_ring_ctx *ctx, bool noflush)
{
+ struct io_rings *rings = ctx->rings;
+
+ /*
+ * noflush == true is from the waitqueue handler, just ensure we wake
+ * up the task, and the next invocation will flush the entries. We
+ * cannot safely to it from here.
+ */
+ if (noflush && !list_empty(&ctx->cq_overflow_list))
+ return -1U;
+
+ io_cqring_overflow_flush(ctx, false);
+
/* See comment at the top of this file */
smp_rmb();
return READ_ONCE(rings->cq.tail) - READ_ONCE(rings->cq.head);
}
+static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
+{
+ struct io_rings *rings = ctx->rings;
+
+ /* make sure SQ entry isn't read before tail */
+ return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
+}
+
/*
* Find and free completed poll iocbs
*/
@@ -753,7 +1039,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list);
- io_cqring_fill_event(ctx, req->user_data, req->result);
+ io_cqring_fill_event(req, req->result);
(*nr_events)++;
if (refcount_dec_and_test(&req->refs)) {
@@ -762,8 +1048,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
* completions for those, only batch free for fixed
* file and non-linked commands.
*/
- if ((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
- REQ_F_FIXED_FILE) {
+ if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
+ REQ_F_FIXED_FILE) && !io_is_fallback_req(req)) {
reqs[to_free++] = req;
if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free);
@@ -867,19 +1153,11 @@ static void io_iopoll_reap_events(struct io_ring_ctx *ctx)
mutex_unlock(&ctx->uring_lock);
}
-static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
+static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ long min)
{
- int iters, ret = 0;
+ int iters = 0, ret = 0;
- /*
- * We disallow the app entering submit/complete with polling, but we
- * still need to lock the ring to prevent racing with polled issue
- * that got punted to a workqueue.
- */
- mutex_lock(&ctx->uring_lock);
-
- iters = 0;
do {
int tmin = 0;
@@ -888,7 +1166,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
* If we do, we can potentially be spinning for commands that
* already triggered a CQE (eg in error).
*/
- if (io_cqring_events(ctx->rings))
+ if (io_cqring_events(ctx, false))
break;
/*
@@ -915,42 +1193,76 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
ret = 0;
} while (min && !*nr_events && !need_resched());
+ return ret;
+}
+
+static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ long min)
+{
+ int ret;
+
+ /*
+ * We disallow the app entering submit/complete with polling, but we
+ * still need to lock the ring to prevent racing with polled issue
+ * that got punted to a workqueue.
+ */
+ mutex_lock(&ctx->uring_lock);
+ ret = __io_iopoll_check(ctx, nr_events, min);
mutex_unlock(&ctx->uring_lock);
return ret;
}
-static void kiocb_end_write(struct kiocb *kiocb)
+static void kiocb_end_write(struct io_kiocb *req)
{
- if (kiocb->ki_flags & IOCB_WRITE) {
- struct inode *inode = file_inode(kiocb->ki_filp);
+ /*
+ * Tell lockdep we inherited freeze protection from submission
+ * thread.
+ */
+ if (req->flags & REQ_F_ISREG) {
+ struct inode *inode = file_inode(req->file);
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
- if (S_ISREG(inode->i_mode))
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
- file_end_write(kiocb->ki_filp);
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
}
+ file_end_write(req->file);
}
-static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
+static void io_complete_rw_common(struct kiocb *kiocb, long res)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
- kiocb_end_write(kiocb);
+ if (kiocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
if ((req->flags & REQ_F_LINK) && res != req->result)
req->flags |= REQ_F_FAIL_LINK;
- io_cqring_add_event(req->ctx, req->user_data, res);
+ io_cqring_add_event(req, res);
+}
+
+static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
+{
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+
+ io_complete_rw_common(kiocb, res);
io_put_req(req);
}
+static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
+{
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *nxt = NULL;
+
+ io_complete_rw_common(kiocb, res);
+ io_put_req_find_next(req, &nxt);
+
+ return nxt;
+}
+
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
- kiocb_end_write(kiocb);
+ if (kiocb->ki_flags & IOCB_WRITE)
+ kiocb_end_write(req);
if ((req->flags & REQ_F_LINK) && res != req->result)
req->flags |= REQ_F_FAIL_LINK;
@@ -1052,10 +1364,9 @@ static bool io_file_supports_async(struct file *file)
return false;
}
-static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
- bool force_nonblock)
+static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
{
- const struct io_uring_sqe *sqe = s->sqe;
+ const struct io_uring_sqe *sqe = req->submit.sqe;
struct io_ring_ctx *ctx = req->ctx;
struct kiocb *kiocb = &req->rw;
unsigned ioprio;
@@ -1064,8 +1375,17 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
if (!req->file)
return -EBADF;
- if (force_nonblock && !io_file_supports_async(req->file))
- force_nonblock = false;
+ if (S_ISREG(file_inode(req->file)->i_mode))
+ req->flags |= REQ_F_ISREG;
+
+ /*
+ * If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
+ * we know to async punt it even if it was opened O_NONBLOCK
+ */
+ if (force_nonblock && !io_file_supports_async(req->file)) {
+ req->flags |= REQ_F_MUST_PUNT;
+ return -EAGAIN;
+ }
kiocb->ki_pos = READ_ONCE(sqe->off);
kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
@@ -1086,7 +1406,8 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
return ret;
/* don't allow async punt if RWF_NOWAIT was requested */
- if (kiocb->ki_flags & IOCB_NOWAIT)
+ if ((kiocb->ki_flags & IOCB_NOWAIT) ||
+ (req->file->f_flags & O_NONBLOCK))
req->flags |= REQ_F_NOWAIT;
if (force_nonblock)
@@ -1099,6 +1420,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll;
+ req->result = 0;
} else {
if (kiocb->ki_flags & IOCB_HIPRI)
return -EINVAL;
@@ -1128,6 +1450,15 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
}
}
+static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt,
+ bool in_async)
+{
+ if (in_async && ret >= 0 && nxt && kiocb->ki_complete == io_complete_rw)
+ *nxt = __io_complete_rw(kiocb, ret);
+ else
+ io_rw_done(kiocb, ret);
+}
+
static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
const struct io_uring_sqe *sqe,
struct iov_iter *iter)
@@ -1199,7 +1530,7 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
}
}
- return 0;
+ return len;
}
static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw,
@@ -1239,65 +1570,6 @@ static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw,
return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter);
}
-static inline bool io_should_merge(struct async_list *al, struct kiocb *kiocb)
-{
- if (al->file == kiocb->ki_filp) {
- off_t start, end;
-
- /*
- * Allow merging if we're anywhere in the range of the same
- * page. Generally this happens for sub-page reads or writes,
- * and it's beneficial to allow the first worker to bring the
- * page in and the piggy backed work can then work on the
- * cached page.
- */
- start = al->io_start & PAGE_MASK;
- end = (al->io_start + al->io_len + PAGE_SIZE - 1) & PAGE_MASK;
- if (kiocb->ki_pos >= start && kiocb->ki_pos <= end)
- return true;
- }
-
- al->file = NULL;
- return false;
-}
-
-/*
- * Make a note of the last file/offset/direction we punted to async
- * context. We'll use this information to see if we can piggy back a
- * sequential request onto the previous one, if it's still hasn't been
- * completed by the async worker.
- */
-static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
-{
- struct async_list *async_list = &req->ctx->pending_async[rw];
- struct kiocb *kiocb = &req->rw;
- struct file *filp = kiocb->ki_filp;
-
- if (io_should_merge(async_list, kiocb)) {
- unsigned long max_bytes;
-
- /* Use 8x RA size as a decent limiter for both reads/writes */
- max_bytes = filp->f_ra.ra_pages << (PAGE_SHIFT + 3);
- if (!max_bytes)
- max_bytes = VM_READAHEAD_PAGES << (PAGE_SHIFT + 3);
-
- /* If max len are exceeded, reset the state */
- if (async_list->io_len + len <= max_bytes) {
- req->flags |= REQ_F_SEQ_PREV;
- async_list->io_len += len;
- } else {
- async_list->file = NULL;
- }
- }
-
- /* New file? Reset state. */
- if (async_list->file != filp) {
- async_list->io_start = kiocb->ki_pos;
- async_list->io_len = len;
- async_list->file = filp;
- }
-}
-
/*
* For files that don't have ->read_iter() and ->write_iter(), handle them
* by looping over ->read() or ->write() manually.
@@ -1343,7 +1615,7 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
return ret;
}
-static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
+static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
@@ -1353,7 +1625,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
size_t iov_count;
ssize_t read_size, ret;
- ret = io_prep_rw(req, s, force_nonblock);
+ ret = io_prep_rw(req, force_nonblock);
if (ret)
return ret;
file = kiocb->ki_filp;
@@ -1361,7 +1633,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
if (unlikely(!(file->f_mode & FMODE_READ)))
return -EBADF;
- ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
+ ret = io_import_iovec(req->ctx, READ, &req->submit, &iovec, &iter);
if (ret < 0)
return ret;
@@ -1387,26 +1659,21 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
* need async punt anyway, so it's more efficient to do it
* here.
*/
- if (force_nonblock && ret2 > 0 && ret2 < read_size)
+ if (force_nonblock && !(req->flags & REQ_F_NOWAIT) &&
+ (req->flags & REQ_F_ISREG) &&
+ ret2 > 0 && ret2 < read_size)
ret2 = -EAGAIN;
/* Catch -EAGAIN return for forced non-blocking submission */
- if (!force_nonblock || ret2 != -EAGAIN) {
- io_rw_done(kiocb, ret2);
- } else {
- /*
- * If ->needs_lock is true, we're already in async
- * context.
- */
- if (!s->needs_lock)
- io_async_list_note(READ, req, iov_count);
+ if (!force_nonblock || ret2 != -EAGAIN)
+ kiocb_done(kiocb, ret2, nxt, req->submit.in_async);
+ else
ret = -EAGAIN;
- }
}
kfree(iovec);
return ret;
}
-static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
+static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
@@ -1416,7 +1683,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
size_t iov_count;
ssize_t ret;
- ret = io_prep_rw(req, s, force_nonblock);
+ ret = io_prep_rw(req, force_nonblock);
if (ret)
return ret;
@@ -1424,7 +1691,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
if (unlikely(!(file->f_mode & FMODE_WRITE)))
return -EBADF;
- ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
+ ret = io_import_iovec(req->ctx, WRITE, &req->submit, &iovec, &iter);
if (ret < 0)
return ret;
@@ -1434,12 +1701,8 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
iov_count = iov_iter_count(&iter);
ret = -EAGAIN;
- if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT)) {
- /* If ->needs_lock is true, we're already in async context. */
- if (!s->needs_lock)
- io_async_list_note(WRITE, req, iov_count);
+ if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT))
goto out_free;
- }
ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count);
if (!ret) {
@@ -1452,7 +1715,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
* released so that it doesn't complain about the held lock when
* we return to userspace.
*/
- if (S_ISREG(file_inode(file)->i_mode)) {
+ if (req->flags & REQ_F_ISREG) {
__sb_start_write(file_inode(file)->i_sb,
SB_FREEZE_WRITE, true);
__sb_writers_release(file_inode(file)->i_sb,
@@ -1464,17 +1727,10 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
ret2 = call_write_iter(file, kiocb, &iter);
else
ret2 = loop_rw_iter(WRITE, file, kiocb, &iter);
- if (!force_nonblock || ret2 != -EAGAIN) {
- io_rw_done(kiocb, ret2);
- } else {
- /*
- * If ->needs_lock is true, we're already in async
- * context.
- */
- if (!s->needs_lock)
- io_async_list_note(WRITE, req, iov_count);
+ if (!force_nonblock || ret2 != -EAGAIN)
+ kiocb_done(kiocb, ret2, nxt, req->submit.in_async);
+ else
ret = -EAGAIN;
- }
}
out_free:
kfree(iovec);
@@ -1484,15 +1740,14 @@ out_free:
/*
* IORING_OP_NOP just posts a completion event, nothing else.
*/
-static int io_nop(struct io_kiocb *req, u64 user_data)
+static int io_nop(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- long err = 0;
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- io_cqring_add_event(ctx, user_data, err);
+ io_cqring_add_event(req, 0);
io_put_req(req);
return 0;
}
@@ -1513,7 +1768,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- bool force_nonblock)
+ struct io_kiocb **nxt, bool force_nonblock)
{
loff_t sqe_off = READ_ONCE(sqe->off);
loff_t sqe_len = READ_ONCE(sqe->len);
@@ -1539,8 +1794,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (ret < 0 && (req->flags & REQ_F_LINK))
req->flags |= REQ_F_FAIL_LINK;
- io_cqring_add_event(req->ctx, sqe->user_data, ret);
- io_put_req(req);
+ io_cqring_add_event(req, ret);
+ io_put_req_find_next(req, nxt);
return 0;
}
@@ -1562,6 +1817,7 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
static int io_sync_file_range(struct io_kiocb *req,
const struct io_uring_sqe *sqe,
+ struct io_kiocb **nxt,
bool force_nonblock)
{
loff_t sqe_off;
@@ -1585,14 +1841,14 @@ static int io_sync_file_range(struct io_kiocb *req,
if (ret < 0 && (req->flags & REQ_F_LINK))
req->flags |= REQ_F_FAIL_LINK;
- io_cqring_add_event(req->ctx, sqe->user_data, ret);
- io_put_req(req);
+ io_cqring_add_event(req, ret);
+ io_put_req_find_next(req, nxt);
return 0;
}
#if defined(CONFIG_NET)
static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- bool force_nonblock,
+ struct io_kiocb **nxt, bool force_nonblock,
long (*fn)(struct socket *, struct user_msghdr __user *,
unsigned int))
{
@@ -1621,32 +1877,80 @@ static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
return ret;
}
- io_cqring_add_event(req->ctx, sqe->user_data, ret);
- io_put_req(req);
+ io_cqring_add_event(req, ret);
+ if (ret < 0 && (req->flags & REQ_F_LINK))
+ req->flags |= REQ_F_FAIL_LINK;
+ io_put_req_find_next(req, nxt);
return 0;
}
#endif
static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- bool force_nonblock)
+ struct io_kiocb **nxt, bool force_nonblock)
{
#if defined(CONFIG_NET)
- return io_send_recvmsg(req, sqe, force_nonblock, __sys_sendmsg_sock);
+ return io_send_recvmsg(req, sqe, nxt, force_nonblock,
+ __sys_sendmsg_sock);
#else
return -EOPNOTSUPP;
#endif
}
static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- bool force_nonblock)
+ struct io_kiocb **nxt, bool force_nonblock)
+{
+#if defined(CONFIG_NET)
+ return io_send_recvmsg(req, sqe, nxt, force_nonblock,
+ __sys_recvmsg_sock);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int io_accept(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ struct io_kiocb **nxt, bool force_nonblock)
{
#if defined(CONFIG_NET)
- return io_send_recvmsg(req, sqe, force_nonblock, __sys_recvmsg_sock);
+ struct sockaddr __user *addr;
+ int __user *addr_len;
+ unsigned file_flags;
+ int flags, ret;
+
+ if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index)
+ return -EINVAL;
+
+ addr = (struct sockaddr __user *) (unsigned long) READ_ONCE(sqe->addr);
+ addr_len = (int __user *) (unsigned long) READ_ONCE(sqe->addr2);
+ flags = READ_ONCE(sqe->accept_flags);
+ file_flags = force_nonblock ? O_NONBLOCK : 0;
+
+ ret = __sys_accept4_file(req->file, file_flags, addr, addr_len, flags);
+ if (ret == -EAGAIN && force_nonblock) {
+ req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+ return -EAGAIN;
+ }
+ if (ret == -ERESTARTSYS)
+ ret = -EINTR;
+ if (ret < 0 && (req->flags & REQ_F_LINK))
+ req->flags |= REQ_F_FAIL_LINK;
+ io_cqring_add_event(req, ret);
+ io_put_req_find_next(req, nxt);
+ return 0;
#else
return -EOPNOTSUPP;
#endif
}
+static inline void io_poll_remove_req(struct io_kiocb *req)
+{
+ if (!RB_EMPTY_NODE(&req->rb_node)) {
+ rb_erase(&req->rb_node, &req->ctx->cancel_tree);
+ RB_CLEAR_NODE(&req->rb_node);
+ }
+}
+
static void io_poll_remove_one(struct io_kiocb *req)
{
struct io_poll_iocb *poll = &req->poll;
@@ -1655,25 +1959,47 @@ static void io_poll_remove_one(struct io_kiocb *req)
WRITE_ONCE(poll->canceled, true);
if (!list_empty(&poll->wait.entry)) {
list_del_init(&poll->wait.entry);
- io_queue_async_work(req->ctx, req);
+ io_queue_async_work(req);
}
spin_unlock(&poll->head->lock);
-
- list_del_init(&req->list);
+ io_poll_remove_req(req);
}
static void io_poll_remove_all(struct io_ring_ctx *ctx)
{
+ struct rb_node *node;
struct io_kiocb *req;
spin_lock_irq(&ctx->completion_lock);
- while (!list_empty(&ctx->cancel_list)) {
- req = list_first_entry(&ctx->cancel_list, struct io_kiocb,list);
+ while ((node = rb_first(&ctx->cancel_tree)) != NULL) {
+ req = rb_entry(node, struct io_kiocb, rb_node);
io_poll_remove_one(req);
}
spin_unlock_irq(&ctx->completion_lock);
}
+static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
+{
+ struct rb_node *p, *parent = NULL;
+ struct io_kiocb *req;
+
+ p = ctx->cancel_tree.rb_node;
+ while (p) {
+ parent = p;
+ req = rb_entry(parent, struct io_kiocb, rb_node);
+ if (sqe_addr < req->user_data) {
+ p = p->rb_left;
+ } else if (sqe_addr > req->user_data) {
+ p = p->rb_right;
+ } else {
+ io_poll_remove_one(req);
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
/*
* Find a running poll command that matches one specified in sqe->addr,
* and remove it if found.
@@ -1681,8 +2007,7 @@ static void io_poll_remove_all(struct io_ring_ctx *ctx)
static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
- struct io_kiocb *poll_req, *next;
- int ret = -ENOENT;
+ int ret;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -1691,36 +2016,38 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EINVAL;
spin_lock_irq(&ctx->completion_lock);
- list_for_each_entry_safe(poll_req, next, &ctx->cancel_list, list) {
- if (READ_ONCE(sqe->addr) == poll_req->user_data) {
- io_poll_remove_one(poll_req);
- ret = 0;
- break;
- }
- }
+ ret = io_poll_cancel(ctx, READ_ONCE(sqe->addr));
spin_unlock_irq(&ctx->completion_lock);
- io_cqring_add_event(req->ctx, sqe->user_data, ret);
+ io_cqring_add_event(req, ret);
+ if (ret < 0 && (req->flags & REQ_F_LINK))
+ req->flags |= REQ_F_FAIL_LINK;
io_put_req(req);
return 0;
}
-static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req,
- __poll_t mask)
+static void io_poll_complete(struct io_kiocb *req, __poll_t mask)
{
+ struct io_ring_ctx *ctx = req->ctx;
+
req->poll.done = true;
- io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask));
+ io_cqring_fill_event(req, mangle_poll(mask));
io_commit_cqring(ctx);
}
-static void io_poll_complete_work(struct work_struct *work)
+static void io_poll_complete_work(struct io_wq_work **workptr)
{
+ struct io_wq_work *work = *workptr;
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct io_poll_iocb *poll = &req->poll;
struct poll_table_struct pt = { ._key = poll->events };
struct io_ring_ctx *ctx = req->ctx;
+ struct io_kiocb *nxt = NULL;
__poll_t mask = 0;
+ if (work->flags & IO_WQ_WORK_CANCEL)
+ WRITE_ONCE(poll->canceled, true);
+
if (!READ_ONCE(poll->canceled))
mask = vfs_poll(poll->file, &pt) & poll->events;
@@ -1737,12 +2064,15 @@ static void io_poll_complete_work(struct work_struct *work)
spin_unlock_irq(&ctx->completion_lock);
return;
}
- list_del_init(&req->list);
- io_poll_complete(ctx, req, mask);
+ io_poll_remove_req(req);
+ io_poll_complete(req, mask);
spin_unlock_irq(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
- io_put_req(req);
+
+ io_put_req_find_next(req, &nxt);
+ if (nxt)
+ *workptr = &nxt->work;
}
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -1761,15 +2091,22 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
list_del_init(&poll->wait.entry);
+ /*
+ * Run completion inline if we can. We're using trylock here because
+ * we are violating the completion_lock -> poll wq lock ordering.
+ * If we have a link timeout we're going to need the completion_lock
+ * for finalizing the request, mark us as having grabbed that already.
+ */
if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) {
- list_del(&req->list);
- io_poll_complete(ctx, req, mask);
+ io_poll_remove_req(req);
+ io_poll_complete(req, mask);
+ req->flags |= REQ_F_COMP_LOCKED;
+ io_put_req(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
- io_put_req(req);
} else {
- io_queue_async_work(ctx, req);
+ io_queue_async_work(req);
}
return 1;
@@ -1796,7 +2133,27 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
add_wait_queue(head, &pt->req->poll.wait);
}
-static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static void io_poll_req_insert(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct rb_node **p = &ctx->cancel_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct io_kiocb *tmp;
+
+ while (*p) {
+ parent = *p;
+ tmp = rb_entry(parent, struct io_kiocb, rb_node);
+ if (req->user_data < tmp->user_data)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&req->rb_node, parent, p);
+ rb_insert_color(&req->rb_node, &ctx->cancel_tree);
+}
+
+static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ struct io_kiocb **nxt)
{
struct io_poll_iocb *poll = &req->poll;
struct io_ring_ctx *ctx = req->ctx;
@@ -1813,9 +2170,10 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EBADF;
req->submit.sqe = NULL;
- INIT_WORK(&req->work, io_poll_complete_work);
+ INIT_IO_WORK(&req->work, io_poll_complete_work);
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
+ RB_CLEAR_NODE(&req->rb_node);
poll->head = NULL;
poll->done = false;
@@ -1848,18 +2206,18 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
else if (cancel)
WRITE_ONCE(poll->canceled, true);
else if (!poll->done) /* actually waiting for an event */
- list_add_tail(&req->list, &ctx->cancel_list);
+ io_poll_req_insert(req);
spin_unlock(&poll->head->lock);
}
if (mask) { /* no async, we'd stolen it */
ipt.error = 0;
- io_poll_complete(ctx, req, mask);
+ io_poll_complete(req, mask);
}
spin_unlock_irq(&ctx->completion_lock);
if (mask) {
io_cqring_ev_posted(ctx);
- io_put_req(req);
+ io_put_req_find_next(req, nxt);
}
return ipt.error;
}
@@ -1875,76 +2233,265 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
atomic_inc(&ctx->cq_timeouts);
spin_lock_irqsave(&ctx->completion_lock, flags);
- list_del(&req->list);
+ /*
+ * We could be racing with timeout deletion. If the list is empty,
+ * then timeout lookup already found it and will be handling it.
+ */
+ if (!list_empty(&req->list)) {
+ struct io_kiocb *prev;
- io_cqring_fill_event(ctx, req->user_data, -ETIME);
+ /*
+ * Adjust the reqs sequence before the current one because it
+ * will consume a slot in the cq_ring and the the cq_tail
+ * pointer will be increased, otherwise other timeout reqs may
+ * return in advance without waiting for enough wait_nr.
+ */
+ prev = req;
+ list_for_each_entry_continue_reverse(prev, &ctx->timeout_list, list)
+ prev->sequence++;
+ list_del_init(&req->list);
+ }
+
+ io_cqring_fill_event(req, -ETIME);
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
-
+ if (req->flags & REQ_F_LINK)
+ req->flags |= REQ_F_FAIL_LINK;
io_put_req(req);
return HRTIMER_NORESTART;
}
+static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
+{
+ struct io_kiocb *req;
+ int ret = -ENOENT;
+
+ list_for_each_entry(req, &ctx->timeout_list, list) {
+ if (user_data == req->user_data) {
+ list_del_init(&req->list);
+ ret = 0;
+ break;
+ }
+ }
+
+ if (ret == -ENOENT)
+ return ret;
+
+ ret = hrtimer_try_to_cancel(&req->timeout.timer);
+ if (ret == -1)
+ return -EALREADY;
+
+ io_cqring_fill_event(req, -ECANCELED);
+ io_put_req(req);
+ return 0;
+}
+
+/*
+ * Remove or update an existing timeout command
+ */
+static int io_timeout_remove(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ unsigned flags;
+ int ret;
+
+ if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len)
+ return -EINVAL;
+ flags = READ_ONCE(sqe->timeout_flags);
+ if (flags)
+ return -EINVAL;
+
+ spin_lock_irq(&ctx->completion_lock);
+ ret = io_timeout_cancel(ctx, READ_ONCE(sqe->addr));
+
+ io_cqring_fill_event(req, ret);
+ io_commit_cqring(ctx);
+ spin_unlock_irq(&ctx->completion_lock);
+ io_cqring_ev_posted(ctx);
+ if (ret < 0 && req->flags & REQ_F_LINK)
+ req->flags |= REQ_F_FAIL_LINK;
+ io_put_req(req);
+ return 0;
+}
+
static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- unsigned count, req_dist, tail_index;
+ unsigned count;
struct io_ring_ctx *ctx = req->ctx;
struct list_head *entry;
+ enum hrtimer_mode mode;
struct timespec64 ts;
+ unsigned span = 0;
+ unsigned flags;
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
- if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->timeout_flags ||
- sqe->len != 1)
+ if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len != 1)
+ return -EINVAL;
+ flags = READ_ONCE(sqe->timeout_flags);
+ if (flags & ~IORING_TIMEOUT_ABS)
return -EINVAL;
if (get_timespec64(&ts, u64_to_user_ptr(sqe->addr)))
return -EFAULT;
+ if (flags & IORING_TIMEOUT_ABS)
+ mode = HRTIMER_MODE_ABS;
+ else
+ mode = HRTIMER_MODE_REL;
+
+ hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, mode);
+ req->flags |= REQ_F_TIMEOUT;
+
/*
* sqe->off holds how many events that need to occur for this
- * timeout event to be satisfied.
+ * timeout event to be satisfied. If it isn't set, then this is
+ * a pure timeout request, sequence isn't used.
*/
count = READ_ONCE(sqe->off);
- if (!count)
- count = 1;
+ if (!count) {
+ req->flags |= REQ_F_TIMEOUT_NOSEQ;
+ spin_lock_irq(&ctx->completion_lock);
+ entry = ctx->timeout_list.prev;
+ goto add;
+ }
req->sequence = ctx->cached_sq_head + count - 1;
- req->flags |= REQ_F_TIMEOUT;
+ /* reuse it to store the count */
+ req->submit.sequence = count;
/*
* Insertion sort, ensuring the first entry in the list is always
* the one we need first.
*/
- tail_index = ctx->cached_cq_tail - ctx->rings->sq_dropped;
- req_dist = req->sequence - tail_index;
spin_lock_irq(&ctx->completion_lock);
list_for_each_prev(entry, &ctx->timeout_list) {
struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, list);
- unsigned dist;
+ unsigned nxt_sq_head;
+ long long tmp, tmp_nxt;
+
+ if (nxt->flags & REQ_F_TIMEOUT_NOSEQ)
+ continue;
+
+ /*
+ * Since cached_sq_head + count - 1 can overflow, use type long
+ * long to store it.
+ */
+ tmp = (long long)ctx->cached_sq_head + count - 1;
+ nxt_sq_head = nxt->sequence - nxt->submit.sequence + 1;
+ tmp_nxt = (long long)nxt_sq_head + nxt->submit.sequence - 1;
+
+ /*
+ * cached_sq_head may overflow, and it will never overflow twice
+ * once there is some timeout req still be valid.
+ */
+ if (ctx->cached_sq_head < nxt_sq_head)
+ tmp += UINT_MAX;
- dist = nxt->sequence - tail_index;
- if (req_dist >= dist)
+ if (tmp > tmp_nxt)
break;
+
+ /*
+ * Sequence of reqs after the insert one and itself should
+ * be adjusted because each timeout req consumes a slot.
+ */
+ span++;
+ nxt->sequence++;
}
+ req->sequence -= span;
+add:
list_add(&req->list, entry);
+ req->timeout.timer.function = io_timeout_fn;
+ hrtimer_start(&req->timeout.timer, timespec64_to_ktime(ts), mode);
spin_unlock_irq(&ctx->completion_lock);
+ return 0;
+}
- hrtimer_init(&req->timeout.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- req->timeout.timer.function = io_timeout_fn;
- hrtimer_start(&req->timeout.timer, timespec64_to_ktime(ts),
- HRTIMER_MODE_REL);
+static bool io_cancel_cb(struct io_wq_work *work, void *data)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+
+ return req->user_data == (unsigned long) data;
+}
+
+static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr)
+{
+ enum io_wq_cancel cancel_ret;
+ int ret = 0;
+
+ cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr);
+ switch (cancel_ret) {
+ case IO_WQ_CANCEL_OK:
+ ret = 0;
+ break;
+ case IO_WQ_CANCEL_RUNNING:
+ ret = -EALREADY;
+ break;
+ case IO_WQ_CANCEL_NOTFOUND:
+ ret = -ENOENT;
+ break;
+ }
+
+ return ret;
+}
+
+static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
+ struct io_kiocb *req, __u64 sqe_addr,
+ struct io_kiocb **nxt)
+{
+ unsigned long flags;
+ int ret;
+
+ ret = io_async_cancel_one(ctx, (void *) (unsigned long) sqe_addr);
+ if (ret != -ENOENT) {
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ goto done;
+ }
+
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+ ret = io_timeout_cancel(ctx, sqe_addr);
+ if (ret != -ENOENT)
+ goto done;
+ ret = io_poll_cancel(ctx, sqe_addr);
+done:
+ io_cqring_fill_event(req, ret);
+ io_commit_cqring(ctx);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ io_cqring_ev_posted(ctx);
+
+ if (ret < 0 && (req->flags & REQ_F_LINK))
+ req->flags |= REQ_F_FAIL_LINK;
+ io_put_req_find_next(req, nxt);
+}
+
+static int io_async_cancel(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ struct io_kiocb **nxt)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+
+ if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->flags || sqe->ioprio || sqe->off || sqe->len ||
+ sqe->cancel_flags)
+ return -EINVAL;
+
+ io_async_find_and_cancel(ctx, req, READ_ONCE(sqe->addr), NULL);
return 0;
}
-static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+static int io_req_defer(struct io_kiocb *req)
{
+ const struct io_uring_sqe *sqe = req->submit.sqe;
struct io_uring_sqe *sqe_copy;
+ struct io_ring_ctx *ctx = req->ctx;
- if (!io_sequence_defer(ctx, req) && list_empty(&ctx->defer_list))
+ /* Still need defer if there is pending req in defer list. */
+ if (!req_need_defer(req) && list_empty(&ctx->defer_list))
return 0;
sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL);
@@ -1952,7 +2499,7 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
return -EAGAIN;
spin_lock_irq(&ctx->completion_lock);
- if (!io_sequence_defer(ctx, req) && list_empty(&ctx->defer_list)) {
+ if (!req_need_defer(req) && list_empty(&ctx->defer_list)) {
spin_unlock_irq(&ctx->completion_lock);
kfree(sqe_copy);
return 0;
@@ -1961,64 +2508,70 @@ static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req,
memcpy(sqe_copy, sqe, sizeof(*sqe_copy));
req->submit.sqe = sqe_copy;
- INIT_WORK(&req->work, io_sq_wq_submit_work);
+ trace_io_uring_defer(ctx, req, false);
list_add_tail(&req->list, &ctx->defer_list);
spin_unlock_irq(&ctx->completion_lock);
return -EIOCBQUEUED;
}
-static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct sqe_submit *s, bool force_nonblock)
+static int __io_submit_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
{
int ret, opcode;
-
- req->user_data = READ_ONCE(s->sqe->user_data);
-
- if (unlikely(s->index >= ctx->sq_entries))
- return -EINVAL;
+ struct sqe_submit *s = &req->submit;
+ struct io_ring_ctx *ctx = req->ctx;
opcode = READ_ONCE(s->sqe->opcode);
switch (opcode) {
case IORING_OP_NOP:
- ret = io_nop(req, req->user_data);
+ ret = io_nop(req);
break;
case IORING_OP_READV:
if (unlikely(s->sqe->buf_index))
return -EINVAL;
- ret = io_read(req, s, force_nonblock);
+ ret = io_read(req, nxt, force_nonblock);
break;
case IORING_OP_WRITEV:
if (unlikely(s->sqe->buf_index))
return -EINVAL;
- ret = io_write(req, s, force_nonblock);
+ ret = io_write(req, nxt, force_nonblock);
break;
case IORING_OP_READ_FIXED:
- ret = io_read(req, s, force_nonblock);
+ ret = io_read(req, nxt, force_nonblock);
break;
case IORING_OP_WRITE_FIXED:
- ret = io_write(req, s, force_nonblock);
+ ret = io_write(req, nxt, force_nonblock);
break;
case IORING_OP_FSYNC:
- ret = io_fsync(req, s->sqe, force_nonblock);
+ ret = io_fsync(req, s->sqe, nxt, force_nonblock);
break;
case IORING_OP_POLL_ADD:
- ret = io_poll_add(req, s->sqe);
+ ret = io_poll_add(req, s->sqe, nxt);
break;
case IORING_OP_POLL_REMOVE:
ret = io_poll_remove(req, s->sqe);
break;
case IORING_OP_SYNC_FILE_RANGE:
- ret = io_sync_file_range(req, s->sqe, force_nonblock);
+ ret = io_sync_file_range(req, s->sqe, nxt, force_nonblock);
break;
case IORING_OP_SENDMSG:
- ret = io_sendmsg(req, s->sqe, force_nonblock);
+ ret = io_sendmsg(req, s->sqe, nxt, force_nonblock);
break;
case IORING_OP_RECVMSG:
- ret = io_recvmsg(req, s->sqe, force_nonblock);
+ ret = io_recvmsg(req, s->sqe, nxt, force_nonblock);
break;
case IORING_OP_TIMEOUT:
ret = io_timeout(req, s->sqe);
break;
+ case IORING_OP_TIMEOUT_REMOVE:
+ ret = io_timeout_remove(req, s->sqe);
+ break;
+ case IORING_OP_ACCEPT:
+ ret = io_accept(req, s->sqe, nxt, force_nonblock);
+ break;
+ case IORING_OP_ASYNC_CANCEL:
+ ret = io_async_cancel(req, s->sqe, nxt);
+ break;
default:
ret = -EINVAL;
break;
@@ -2032,187 +2585,65 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
return -EAGAIN;
/* workqueue context doesn't hold uring_lock, grab it now */
- if (s->needs_lock)
+ if (s->in_async)
mutex_lock(&ctx->uring_lock);
io_iopoll_req_issued(req);
- if (s->needs_lock)
+ if (s->in_async)
mutex_unlock(&ctx->uring_lock);
}
return 0;
}
-static struct async_list *io_async_list_from_sqe(struct io_ring_ctx *ctx,
- const struct io_uring_sqe *sqe)
-{
- switch (sqe->opcode) {
- case IORING_OP_READV:
- case IORING_OP_READ_FIXED:
- return &ctx->pending_async[READ];
- case IORING_OP_WRITEV:
- case IORING_OP_WRITE_FIXED:
- return &ctx->pending_async[WRITE];
- default:
- return NULL;
- }
-}
-
-static inline bool io_sqe_needs_user(const struct io_uring_sqe *sqe)
-{
- u8 opcode = READ_ONCE(sqe->opcode);
-
- return !(opcode == IORING_OP_READ_FIXED ||
- opcode == IORING_OP_WRITE_FIXED);
-}
-
-static void io_sq_wq_submit_work(struct work_struct *work)
+static void io_wq_submit_work(struct io_wq_work **workptr)
{
+ struct io_wq_work *work = *workptr;
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
- struct io_ring_ctx *ctx = req->ctx;
- struct mm_struct *cur_mm = NULL;
- struct async_list *async_list;
- LIST_HEAD(req_list);
- mm_segment_t old_fs;
- int ret;
+ struct sqe_submit *s = &req->submit;
+ const struct io_uring_sqe *sqe = s->sqe;
+ struct io_kiocb *nxt = NULL;
+ int ret = 0;
- async_list = io_async_list_from_sqe(ctx, req->submit.sqe);
-restart:
- do {
- struct sqe_submit *s = &req->submit;
- const struct io_uring_sqe *sqe = s->sqe;
- unsigned int flags = req->flags;
+ /* Ensure we clear previously set non-block flag */
+ req->rw.ki_flags &= ~IOCB_NOWAIT;
- /* Ensure we clear previously set non-block flag */
- req->rw.ki_flags &= ~IOCB_NOWAIT;
+ if (work->flags & IO_WQ_WORK_CANCEL)
+ ret = -ECANCELED;
- ret = 0;
- if (io_sqe_needs_user(sqe) && !cur_mm) {
- if (!mmget_not_zero(ctx->sqo_mm)) {
- ret = -EFAULT;
- } else {
- cur_mm = ctx->sqo_mm;
- use_mm(cur_mm);
- old_fs = get_fs();
- set_fs(USER_DS);
- }
- }
+ if (!ret) {
+ s->has_user = (work->flags & IO_WQ_WORK_HAS_MM) != 0;
+ s->in_async = true;
+ do {
+ ret = __io_submit_sqe(req, &nxt, false);
+ /*
+ * We can get EAGAIN for polled IO even though we're
+ * forcing a sync submission from here, since we can't
+ * wait for request slots on the block side.
+ */
+ if (ret != -EAGAIN)
+ break;
+ cond_resched();
+ } while (1);
+ }
- if (!ret) {
- s->has_user = cur_mm != NULL;
- s->needs_lock = true;
- do {
- ret = __io_submit_sqe(ctx, req, s, false);
- /*
- * We can get EAGAIN for polled IO even though
- * we're forcing a sync submission from here,
- * since we can't wait for request slots on the
- * block side.
- */
- if (ret != -EAGAIN)
- break;
- cond_resched();
- } while (1);
- }
+ /* drop submission reference */
+ io_put_req(req);
- /* drop submission reference */
+ if (ret) {
+ if (req->flags & REQ_F_LINK)
+ req->flags |= REQ_F_FAIL_LINK;
+ io_cqring_add_event(req, ret);
io_put_req(req);
-
- if (ret) {
- io_cqring_add_event(ctx, sqe->user_data, ret);
- io_put_req(req);
- }
-
- /* async context always use a copy of the sqe */
- kfree(sqe);
-
- /* req from defer and link list needn't decrease async cnt */
- if (flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE))
- goto out;
-
- if (!async_list)
- break;
- if (!list_empty(&req_list)) {
- req = list_first_entry(&req_list, struct io_kiocb,
- list);
- list_del(&req->list);
- continue;
- }
- if (list_empty(&async_list->list))
- break;
-
- req = NULL;
- spin_lock(&async_list->lock);
- if (list_empty(&async_list->list)) {
- spin_unlock(&async_list->lock);
- break;
- }
- list_splice_init(&async_list->list, &req_list);
- spin_unlock(&async_list->lock);
-
- req = list_first_entry(&req_list, struct io_kiocb, list);
- list_del(&req->list);
- } while (req);
-
- /*
- * Rare case of racing with a submitter. If we find the count has
- * dropped to zero AND we have pending work items, then restart
- * the processing. This is a tiny race window.
- */
- if (async_list) {
- ret = atomic_dec_return(&async_list->cnt);
- while (!ret && !list_empty(&async_list->list)) {
- spin_lock(&async_list->lock);
- atomic_inc(&async_list->cnt);
- list_splice_init(&async_list->list, &req_list);
- spin_unlock(&async_list->lock);
-
- if (!list_empty(&req_list)) {
- req = list_first_entry(&req_list,
- struct io_kiocb, list);
- list_del(&req->list);
- goto restart;
- }
- ret = atomic_dec_return(&async_list->cnt);
- }
}
-out:
- if (cur_mm) {
- set_fs(old_fs);
- unuse_mm(cur_mm);
- mmput(cur_mm);
- }
-}
-
-/*
- * See if we can piggy back onto previously submitted work, that is still
- * running. We currently only allow this if the new request is sequential
- * to the previous one we punted.
- */
-static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)
-{
- bool ret;
-
- if (!list)
- return false;
- if (!(req->flags & REQ_F_SEQ_PREV))
- return false;
- if (!atomic_read(&list->cnt))
- return false;
+ /* async context always use a copy of the sqe */
+ kfree(sqe);
- ret = true;
- spin_lock(&list->lock);
- list_add_tail(&req->list, &list->list);
- /*
- * Ensure we see a simultaneous modification from io_sq_wq_submit_work()
- */
- smp_mb();
- if (!atomic_read(&list->cnt)) {
- list_del_init(&req->list);
- ret = false;
+ /* if a dependent link is ready, pass it back */
+ if (!ret && nxt) {
+ io_prep_async_work(nxt);
+ *workptr = &nxt->work;
}
- spin_unlock(&list->lock);
- return ret;
}
static bool io_op_needs_file(const struct io_uring_sqe *sqe)
@@ -2222,15 +2653,29 @@ static bool io_op_needs_file(const struct io_uring_sqe *sqe)
switch (op) {
case IORING_OP_NOP:
case IORING_OP_POLL_REMOVE:
+ case IORING_OP_TIMEOUT:
+ case IORING_OP_TIMEOUT_REMOVE:
+ case IORING_OP_ASYNC_CANCEL:
+ case IORING_OP_LINK_TIMEOUT:
return false;
default:
return true;
}
}
-static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
- struct io_submit_state *state, struct io_kiocb *req)
+static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
+ int index)
{
+ struct fixed_file_table *table;
+
+ table = &ctx->file_table[index >> IORING_FILE_TABLE_SHIFT];
+ return table->files[index & IORING_FILE_TABLE_MASK];
+}
+
+static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req)
+{
+ struct sqe_submit *s = &req->submit;
+ struct io_ring_ctx *ctx = req->ctx;
unsigned flags;
int fd;
@@ -2250,14 +2695,18 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
return 0;
if (flags & IOSQE_FIXED_FILE) {
- if (unlikely(!ctx->user_files ||
+ if (unlikely(!ctx->file_table ||
(unsigned) fd >= ctx->nr_user_files))
return -EBADF;
- req->file = ctx->user_files[fd];
+ fd = array_index_nospec(fd, ctx->nr_user_files);
+ req->file = io_file_from_index(ctx, fd);
+ if (!req->file)
+ return -EBADF;
req->flags |= REQ_F_FIXED_FILE;
} else {
if (s->needs_fixed_file)
return -EBADF;
+ trace_io_uring_file_get(ctx, fd);
req->file = io_file_get(state, fd);
if (unlikely(!req->file))
return -EBADF;
@@ -2266,43 +2715,194 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
return 0;
}
-static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- struct sqe_submit *s, bool force_nonblock)
+static int io_grab_files(struct io_kiocb *req)
+{
+ int ret = -EBADF;
+ struct io_ring_ctx *ctx = req->ctx;
+
+ rcu_read_lock();
+ spin_lock_irq(&ctx->inflight_lock);
+ /*
+ * We use the f_ops->flush() handler to ensure that we can flush
+ * out work accessing these files if the fd is closed. Check if
+ * the fd has changed since we started down this path, and disallow
+ * this operation if it has.
+ */
+ if (fcheck(req->submit.ring_fd) == req->submit.ring_file) {
+ list_add(&req->inflight_entry, &ctx->inflight_list);
+ req->flags |= REQ_F_INFLIGHT;
+ req->work.files = current->files;
+ ret = 0;
+ }
+ spin_unlock_irq(&ctx->inflight_lock);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
+{
+ struct io_kiocb *req = container_of(timer, struct io_kiocb,
+ timeout.timer);
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_kiocb *prev = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->completion_lock, flags);
+
+ /*
+ * We don't expect the list to be empty, that will only happen if we
+ * race with the completion of the linked work.
+ */
+ if (!list_empty(&req->list)) {
+ prev = list_entry(req->list.prev, struct io_kiocb, link_list);
+ if (refcount_inc_not_zero(&prev->refs))
+ list_del_init(&req->list);
+ else
+ prev = NULL;
+ }
+
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+ if (prev) {
+ io_async_find_and_cancel(ctx, req, prev->user_data, NULL);
+ io_put_req(prev);
+ } else {
+ io_cqring_add_event(req, -ETIME);
+ io_put_req(req);
+ }
+ return HRTIMER_NORESTART;
+}
+
+static void io_queue_linked_timeout(struct io_kiocb *req, struct timespec64 *ts,
+ enum hrtimer_mode *mode)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+
+ /*
+ * If the list is now empty, then our linked request finished before
+ * we got a chance to setup the timer
+ */
+ spin_lock_irq(&ctx->completion_lock);
+ if (!list_empty(&req->list)) {
+ req->timeout.timer.function = io_link_timeout_fn;
+ hrtimer_start(&req->timeout.timer, timespec64_to_ktime(*ts),
+ *mode);
+ }
+ spin_unlock_irq(&ctx->completion_lock);
+
+ /* drop submission reference */
+ io_put_req(req);
+}
+
+static int io_validate_link_timeout(const struct io_uring_sqe *sqe,
+ struct timespec64 *ts)
+{
+ if (sqe->ioprio || sqe->buf_index || sqe->len != 1 || sqe->off)
+ return -EINVAL;
+ if (sqe->timeout_flags & ~IORING_TIMEOUT_ABS)
+ return -EINVAL;
+ if (get_timespec64(ts, u64_to_user_ptr(sqe->addr)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req,
+ struct timespec64 *ts,
+ enum hrtimer_mode *mode)
{
+ struct io_kiocb *nxt;
int ret;
- ret = __io_submit_sqe(ctx, req, s, force_nonblock);
- if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
+ if (!(req->flags & REQ_F_LINK))
+ return NULL;
+
+ nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list);
+ if (!nxt || nxt->submit.sqe->opcode != IORING_OP_LINK_TIMEOUT)
+ return NULL;
+
+ ret = io_validate_link_timeout(nxt->submit.sqe, ts);
+ if (ret) {
+ list_del_init(&nxt->list);
+ io_cqring_add_event(nxt, ret);
+ io_double_put_req(nxt);
+ return ERR_PTR(-ECANCELED);
+ }
+
+ if (nxt->submit.sqe->timeout_flags & IORING_TIMEOUT_ABS)
+ *mode = HRTIMER_MODE_ABS;
+ else
+ *mode = HRTIMER_MODE_REL;
+
+ req->flags |= REQ_F_LINK_TIMEOUT;
+ hrtimer_init(&nxt->timeout.timer, CLOCK_MONOTONIC, *mode);
+ return nxt;
+}
+
+static int __io_queue_sqe(struct io_kiocb *req)
+{
+ enum hrtimer_mode mode;
+ struct io_kiocb *nxt;
+ struct timespec64 ts;
+ int ret;
+
+ nxt = io_prep_linked_timeout(req, &ts, &mode);
+ if (IS_ERR(nxt)) {
+ ret = PTR_ERR(nxt);
+ nxt = NULL;
+ goto err;
+ }
+
+ ret = __io_submit_sqe(req, NULL, true);
+
+ /*
+ * We async punt it if the file wasn't marked NOWAIT, or if the file
+ * doesn't support non-blocking read/write attempts
+ */
+ if (ret == -EAGAIN && (!(req->flags & REQ_F_NOWAIT) ||
+ (req->flags & REQ_F_MUST_PUNT))) {
+ struct sqe_submit *s = &req->submit;
struct io_uring_sqe *sqe_copy;
sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
if (sqe_copy) {
- struct async_list *list;
-
s->sqe = sqe_copy;
- memcpy(&req->submit, s, sizeof(*s));
- list = io_async_list_from_sqe(ctx, s->sqe);
- if (!io_add_to_prev_work(list, req)) {
- if (list)
- atomic_inc(&list->cnt);
- INIT_WORK(&req->work, io_sq_wq_submit_work);
- io_queue_async_work(ctx, req);
+ if (req->work.flags & IO_WQ_WORK_NEEDS_FILES) {
+ ret = io_grab_files(req);
+ if (ret) {
+ kfree(sqe_copy);
+ goto err;
+ }
}
/*
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
+ io_queue_async_work(req);
+
+ if (nxt)
+ io_queue_linked_timeout(nxt, &ts, &mode);
+
return 0;
}
}
+err:
/* drop submission reference */
io_put_req(req);
+ if (nxt) {
+ if (!ret)
+ io_queue_linked_timeout(nxt, &ts, &mode);
+ else
+ io_put_req(nxt);
+ }
+
/* and drop final reference, if we failed */
if (ret) {
- io_cqring_add_event(ctx, req->user_data, ret);
+ io_cqring_add_event(req, ret);
if (req->flags & REQ_F_LINK)
req->flags |= REQ_F_FAIL_LINK;
io_put_req(req);
@@ -2311,32 +2911,30 @@ static int __io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
return ret;
}
-static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
- struct sqe_submit *s, bool force_nonblock)
+static int io_queue_sqe(struct io_kiocb *req)
{
int ret;
- ret = io_req_defer(ctx, req, s->sqe);
+ ret = io_req_defer(req);
if (ret) {
if (ret != -EIOCBQUEUED) {
- io_free_req(req);
- io_cqring_add_event(ctx, s->sqe->user_data, ret);
+ io_cqring_add_event(req, ret);
+ io_double_put_req(req);
}
return 0;
}
- return __io_queue_sqe(ctx, req, s, force_nonblock);
+ return __io_queue_sqe(req);
}
-static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
- struct sqe_submit *s, struct io_kiocb *shadow,
- bool force_nonblock)
+static int io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
{
int ret;
int need_submit = false;
+ struct io_ring_ctx *ctx = req->ctx;
if (!shadow)
- return io_queue_sqe(ctx, req, s, force_nonblock);
+ return io_queue_sqe(req);
/*
* Mark the first IO in link list as DRAIN, let all the following
@@ -2344,11 +2942,12 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
* list.
*/
req->flags |= REQ_F_IO_DRAIN;
- ret = io_req_defer(ctx, req, s->sqe);
+ ret = io_req_defer(req);
if (ret) {
if (ret != -EIOCBQUEUED) {
- io_free_req(req);
- io_cqring_add_event(ctx, s->sqe->user_data, ret);
+ io_cqring_add_event(req, ret);
+ io_double_put_req(req);
+ __io_free_req(shadow);
return 0;
}
} else {
@@ -2361,43 +2960,39 @@ static int io_queue_link_head(struct io_ring_ctx *ctx, struct io_kiocb *req,
/* Insert shadow req to defer_list, blocking next IOs */
spin_lock_irq(&ctx->completion_lock);
+ trace_io_uring_defer(ctx, shadow, true);
list_add_tail(&shadow->list, &ctx->defer_list);
spin_unlock_irq(&ctx->completion_lock);
if (need_submit)
- return __io_queue_sqe(ctx, req, s, force_nonblock);
+ return __io_queue_sqe(req);
return 0;
}
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
-static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
- struct io_submit_state *state, struct io_kiocb **link,
- bool force_nonblock)
+static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
+ struct io_kiocb **link)
{
struct io_uring_sqe *sqe_copy;
- struct io_kiocb *req;
+ struct sqe_submit *s = &req->submit;
+ struct io_ring_ctx *ctx = req->ctx;
int ret;
+ req->user_data = s->sqe->user_data;
+
/* enforce forwards compatibility on users */
if (unlikely(s->sqe->flags & ~SQE_VALID_FLAGS)) {
ret = -EINVAL;
- goto err;
- }
-
- req = io_get_req(ctx, state);
- if (unlikely(!req)) {
- ret = -EAGAIN;
- goto err;
+ goto err_req;
}
- ret = io_req_set_file(ctx, s, state, req);
+ ret = io_req_set_file(state, req);
if (unlikely(ret)) {
err_req:
- io_free_req(req);
-err:
- io_cqring_add_event(ctx, s->sqe->user_data, ret);
+ io_cqring_add_event(req, ret);
+ io_double_put_req(req);
return;
}
@@ -2418,16 +3013,19 @@ err:
}
s->sqe = sqe_copy;
- memcpy(&req->submit, s, sizeof(*s));
+ trace_io_uring_link(ctx, req, prev);
list_add_tail(&req->list, &prev->link_list);
} else if (s->sqe->flags & IOSQE_IO_LINK) {
req->flags |= REQ_F_LINK;
- memcpy(&req->submit, s, sizeof(*s));
INIT_LIST_HEAD(&req->link_list);
*link = req;
+ } else if (READ_ONCE(s->sqe->opcode) == IORING_OP_LINK_TIMEOUT) {
+ /* Only valid as a linked SQE */
+ ret = -EINVAL;
+ goto err_req;
} else {
- io_queue_sqe(ctx, req, s, force_nonblock);
+ io_queue_sqe(req);
}
}
@@ -2498,7 +3096,7 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
head = READ_ONCE(sq_array[head & ctx->sq_mask]);
if (head < ctx->sq_entries) {
- s->index = head;
+ s->ring_file = NULL;
s->sqe = &ctx->sq_sqes[head];
s->sequence = ctx->cached_sq_head;
ctx->cached_sq_head++;
@@ -2507,18 +3105,25 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct sqe_submit *s)
/* drop invalid entries */
ctx->cached_sq_head++;
- rings->sq_dropped++;
+ ctx->cached_sq_dropped++;
+ WRITE_ONCE(rings->sq_dropped, ctx->cached_sq_dropped);
return false;
}
-static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
- unsigned int nr, bool has_user, bool mm_fault)
+static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
+ struct file *ring_file, int ring_fd,
+ struct mm_struct **mm, bool async)
{
struct io_submit_state state, *statep = NULL;
struct io_kiocb *link = NULL;
struct io_kiocb *shadow_req = NULL;
- bool prev_was_link = false;
int i, submitted = 0;
+ bool mm_fault = false;
+
+ if (!list_empty(&ctx->cq_overflow_list)) {
+ io_cqring_overflow_flush(ctx, false);
+ return -EBUSY;
+ }
if (nr > IO_PLUG_THRESHOLD) {
io_submit_state_start(&state, ctx, nr);
@@ -2526,19 +3131,31 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
}
for (i = 0; i < nr; i++) {
- /*
- * If previous wasn't linked and we have a linked command,
- * that's the end of the chain. Submit the previous link.
- */
- if (!prev_was_link && link) {
- io_queue_link_head(ctx, link, &link->submit, shadow_req,
- true);
- link = NULL;
- shadow_req = NULL;
+ struct io_kiocb *req;
+ unsigned int sqe_flags;
+
+ req = io_get_req(ctx, statep);
+ if (unlikely(!req)) {
+ if (!submitted)
+ submitted = -EAGAIN;
+ break;
+ }
+ if (!io_get_sqring(ctx, &req->submit)) {
+ __io_free_req(req);
+ break;
+ }
+
+ if (io_sqe_needs_user(req->submit.sqe) && !*mm) {
+ mm_fault = mm_fault || !mmget_not_zero(ctx->sqo_mm);
+ if (!mm_fault) {
+ use_mm(ctx->sqo_mm);
+ *mm = ctx->sqo_mm;
+ }
}
- prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
- if (link && (sqes[i].sqe->flags & IOSQE_IO_DRAIN)) {
+ sqe_flags = req->submit.sqe->flags;
+
+ if (link && (sqe_flags & IOSQE_IO_DRAIN)) {
if (!shadow_req) {
shadow_req = io_get_req(ctx, NULL);
if (unlikely(!shadow_req))
@@ -2546,55 +3163,79 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
refcount_dec(&shadow_req->refs);
}
- shadow_req->sequence = sqes[i].sequence;
+ shadow_req->sequence = req->submit.sequence;
}
out:
- if (unlikely(mm_fault)) {
- io_cqring_add_event(ctx, sqes[i].sqe->user_data,
- -EFAULT);
- } else {
- sqes[i].has_user = has_user;
- sqes[i].needs_lock = true;
- sqes[i].needs_fixed_file = true;
- io_submit_sqe(ctx, &sqes[i], statep, &link, true);
- submitted++;
+ req->submit.ring_file = ring_file;
+ req->submit.ring_fd = ring_fd;
+ req->submit.has_user = *mm != NULL;
+ req->submit.in_async = async;
+ req->submit.needs_fixed_file = async;
+ trace_io_uring_submit_sqe(ctx, req->submit.sqe->user_data,
+ true, async);
+ io_submit_sqe(req, statep, &link);
+ submitted++;
+
+ /*
+ * If previous wasn't linked and we have a linked command,
+ * that's the end of the chain. Submit the previous link.
+ */
+ if (!(sqe_flags & IOSQE_IO_LINK) && link) {
+ io_queue_link_head(link, shadow_req);
+ link = NULL;
+ shadow_req = NULL;
}
}
if (link)
- io_queue_link_head(ctx, link, &link->submit, shadow_req, true);
+ io_queue_link_head(link, shadow_req);
if (statep)
io_submit_state_end(&state);
+ /* Commit SQ ring head once we've consumed and submitted all SQEs */
+ io_commit_sqring(ctx);
+
return submitted;
}
static int io_sq_thread(void *data)
{
- struct sqe_submit sqes[IO_IOPOLL_BATCH];
struct io_ring_ctx *ctx = data;
struct mm_struct *cur_mm = NULL;
mm_segment_t old_fs;
DEFINE_WAIT(wait);
unsigned inflight;
unsigned long timeout;
+ int ret;
- complete(&ctx->sqo_thread_started);
+ complete(&ctx->completions[1]);
old_fs = get_fs();
set_fs(USER_DS);
- timeout = inflight = 0;
+ ret = timeout = inflight = 0;
while (!kthread_should_park()) {
- bool all_fixed, mm_fault = false;
- int i;
+ unsigned int to_submit;
if (inflight) {
unsigned nr_events = 0;
if (ctx->flags & IORING_SETUP_IOPOLL) {
- io_iopoll_check(ctx, &nr_events, 0);
+ /*
+ * inflight is the count of the maximum possible
+ * entries we submitted, but it can be smaller
+ * if we dropped some of them. If we don't have
+ * poll entries available, then we know that we
+ * have nothing left to poll for. Reset the
+ * inflight count to zero in that case.
+ */
+ mutex_lock(&ctx->uring_lock);
+ if (!list_empty(&ctx->poll_list))
+ __io_iopoll_check(ctx, &nr_events, 0);
+ else
+ inflight = 0;
+ mutex_unlock(&ctx->uring_lock);
} else {
/*
* Normal IO, just pretend everything completed.
@@ -2608,13 +3249,22 @@ static int io_sq_thread(void *data)
timeout = jiffies + ctx->sq_thread_idle;
}
- if (!io_get_sqring(ctx, &sqes[0])) {
+ to_submit = io_sqring_entries(ctx);
+
+ /*
+ * If submit got -EBUSY, flag us as needing the application
+ * to enter the kernel to reap and flush events.
+ */
+ if (!to_submit || ret == -EBUSY) {
/*
* We're polling. If we're within the defined idle
* period, then let us spin without work before going
- * to sleep.
+ * to sleep. The exception is if we got EBUSY doing
+ * more IO, we should wait for the application to
+ * reap events and wake us up.
*/
- if (inflight || !time_after(jiffies, timeout)) {
+ if (inflight ||
+ (!time_after(jiffies, timeout) && ret != -EBUSY)) {
cond_resched();
continue;
}
@@ -2639,7 +3289,8 @@ static int io_sq_thread(void *data)
/* make sure to read SQ tail after writing flags */
smp_mb();
- if (!io_get_sqring(ctx, &sqes[0])) {
+ to_submit = io_sqring_entries(ctx);
+ if (!to_submit || ret == -EBUSY) {
if (kthread_should_park()) {
finish_wait(&ctx->sqo_wait, &wait);
break;
@@ -2657,31 +3308,10 @@ static int io_sq_thread(void *data)
ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP;
}
- i = 0;
- all_fixed = true;
- do {
- if (all_fixed && io_sqe_needs_user(sqes[i].sqe))
- all_fixed = false;
-
- i++;
- if (i == ARRAY_SIZE(sqes))
- break;
- } while (io_get_sqring(ctx, &sqes[i]));
-
- /* Unless all new commands are FIXED regions, grab mm */
- if (!all_fixed && !cur_mm) {
- mm_fault = !mmget_not_zero(ctx->sqo_mm);
- if (!mm_fault) {
- use_mm(ctx->sqo_mm);
- cur_mm = ctx->sqo_mm;
- }
- }
-
- inflight += io_submit_sqes(ctx, sqes, i, cur_mm != NULL,
- mm_fault);
-
- /* Commit SQ ring head once we've consumed all SQEs */
- io_commit_sqring(ctx);
+ to_submit = min(to_submit, ctx->sq_entries);
+ ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true);
+ if (ret > 0)
+ inflight += ret;
}
set_fs(old_fs);
@@ -2695,79 +3325,6 @@ static int io_sq_thread(void *data)
return 0;
}
-static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit,
- bool block_for_last)
-{
- struct io_submit_state state, *statep = NULL;
- struct io_kiocb *link = NULL;
- struct io_kiocb *shadow_req = NULL;
- bool prev_was_link = false;
- int i, submit = 0;
-
- if (to_submit > IO_PLUG_THRESHOLD) {
- io_submit_state_start(&state, ctx, to_submit);
- statep = &state;
- }
-
- for (i = 0; i < to_submit; i++) {
- bool force_nonblock = true;
- struct sqe_submit s;
-
- if (!io_get_sqring(ctx, &s))
- break;
-
- /*
- * If previous wasn't linked and we have a linked command,
- * that's the end of the chain. Submit the previous link.
- */
- if (!prev_was_link && link) {
- io_queue_link_head(ctx, link, &link->submit, shadow_req,
- force_nonblock);
- link = NULL;
- shadow_req = NULL;
- }
- prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
-
- if (link && (s.sqe->flags & IOSQE_IO_DRAIN)) {
- if (!shadow_req) {
- shadow_req = io_get_req(ctx, NULL);
- if (unlikely(!shadow_req))
- goto out;
- shadow_req->flags |= (REQ_F_IO_DRAIN | REQ_F_SHADOW_DRAIN);
- refcount_dec(&shadow_req->refs);
- }
- shadow_req->sequence = s.sequence;
- }
-
-out:
- s.has_user = true;
- s.needs_lock = false;
- s.needs_fixed_file = false;
- submit++;
-
- /*
- * The caller will block for events after submit, submit the
- * last IO non-blocking. This is either the only IO it's
- * submitting, or it already submitted the previous ones. This
- * improves performance by avoiding an async punt that we don't
- * need to do.
- */
- if (block_for_last && submit == to_submit)
- force_nonblock = false;
-
- io_submit_sqe(ctx, &s, statep, &link, force_nonblock);
- }
- io_commit_sqring(ctx);
-
- if (link)
- io_queue_link_head(ctx, link, &link->submit, shadow_req,
- block_for_last);
- if (statep)
- io_submit_state_end(statep);
-
- return submit;
-}
-
struct io_wait_queue {
struct wait_queue_entry wq;
struct io_ring_ctx *ctx;
@@ -2775,7 +3332,7 @@ struct io_wait_queue {
unsigned nr_timeouts;
};
-static inline bool io_should_wake(struct io_wait_queue *iowq)
+static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush)
{
struct io_ring_ctx *ctx = iowq->ctx;
@@ -2784,7 +3341,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq)
* started waiting. For timeouts, we always want to return to userspace,
* regardless of event count.
*/
- return io_cqring_events(ctx->rings) >= iowq->to_wait ||
+ return io_cqring_events(ctx, noflush) >= iowq->to_wait ||
atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts;
}
@@ -2794,7 +3351,8 @@ static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode,
struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue,
wq);
- if (!io_should_wake(iowq))
+ /* use noflush == true, as we can't safely rely on locking context */
+ if (!io_should_wake(iowq, true))
return -1;
return autoremove_wake_function(curr, mode, wake_flags, key);
@@ -2817,9 +3375,9 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
.to_wait = min_events,
};
struct io_rings *rings = ctx->rings;
- int ret;
+ int ret = 0;
- if (io_cqring_events(rings) >= min_events)
+ if (io_cqring_events(ctx, false) >= min_events)
return 0;
if (sig) {
@@ -2835,24 +3393,22 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return ret;
}
- ret = 0;
iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
+ trace_io_uring_cqring_wait(ctx, min_events);
do {
prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
TASK_INTERRUPTIBLE);
- if (io_should_wake(&iowq))
+ if (io_should_wake(&iowq, false))
break;
schedule();
if (signal_pending(current)) {
- ret = -ERESTARTSYS;
+ ret = -EINTR;
break;
}
} while (1);
finish_wait(&ctx->wait, &iowq.wq);
- restore_saved_sigmask_unless(ret == -ERESTARTSYS);
- if (ret == -ERESTARTSYS)
- ret = -EINTR;
+ restore_saved_sigmask_unless(ret == -EINTR);
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
}
@@ -2870,19 +3426,29 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
#else
int i;
- for (i = 0; i < ctx->nr_user_files; i++)
- fput(ctx->user_files[i]);
+ for (i = 0; i < ctx->nr_user_files; i++) {
+ struct file *file;
+
+ file = io_file_from_index(ctx, i);
+ if (file)
+ fput(file);
+ }
#endif
}
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
- if (!ctx->user_files)
+ unsigned nr_tables, i;
+
+ if (!ctx->file_table)
return -ENXIO;
__io_sqe_files_unregister(ctx);
- kfree(ctx->user_files);
- ctx->user_files = NULL;
+ nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
+ for (i = 0; i < nr_tables; i++)
+ kfree(ctx->file_table[i].files);
+ kfree(ctx->file_table);
+ ctx->file_table = NULL;
ctx->nr_user_files = 0;
return 0;
}
@@ -2890,7 +3456,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
static void io_sq_thread_stop(struct io_ring_ctx *ctx)
{
if (ctx->sqo_thread) {
- wait_for_completion(&ctx->sqo_thread_started);
+ wait_for_completion(&ctx->completions[1]);
/*
* The park is a bit of a work-around, without it we get
* warning spews on shutdown with SQPOLL set and affinity
@@ -2904,15 +3470,11 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx)
static void io_finish_async(struct io_ring_ctx *ctx)
{
- int i;
-
io_sq_thread_stop(ctx);
- for (i = 0; i < ARRAY_SIZE(ctx->sqo_wq); i++) {
- if (ctx->sqo_wq[i]) {
- destroy_workqueue(ctx->sqo_wq[i]);
- ctx->sqo_wq[i] = NULL;
- }
+ if (ctx->io_wq) {
+ io_wq_destroy(ctx->io_wq);
+ ctx->io_wq = NULL;
}
}
@@ -2921,7 +3483,9 @@ static void io_destruct_skb(struct sk_buff *skb)
{
struct io_ring_ctx *ctx = skb->sk->sk_user_data;
- io_finish_async(ctx);
+ if (ctx->io_wq)
+ io_wq_flush(ctx->io_wq);
+
unix_destruct_scm(skb);
}
@@ -2935,7 +3499,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
struct sock *sk = ctx->ring_sock->sk;
struct scm_fp_list *fpl;
struct sk_buff *skb;
- int i;
+ int i, nr_files;
if (!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
unsigned long inflight = ctx->user->unix_inflight + nr;
@@ -2955,21 +3519,33 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
}
skb->sk = sk;
- skb->destructor = io_destruct_skb;
+ nr_files = 0;
fpl->user = get_uid(ctx->user);
for (i = 0; i < nr; i++) {
- fpl->fp[i] = get_file(ctx->user_files[i + offset]);
- unix_inflight(fpl->user, fpl->fp[i]);
+ struct file *file = io_file_from_index(ctx, i + offset);
+
+ if (!file)
+ continue;
+ fpl->fp[nr_files] = get_file(file);
+ unix_inflight(fpl->user, fpl->fp[nr_files]);
+ nr_files++;
}
- fpl->max = fpl->count = nr;
- UNIXCB(skb).fp = fpl;
- refcount_add(skb->truesize, &sk->sk_wmem_alloc);
- skb_queue_head(&sk->sk_receive_queue, skb);
+ if (nr_files) {
+ fpl->max = SCM_MAX_FD;
+ fpl->count = nr_files;
+ UNIXCB(skb).fp = fpl;
+ skb->destructor = io_destruct_skb;
+ refcount_add(skb->truesize, &sk->sk_wmem_alloc);
+ skb_queue_head(&sk->sk_receive_queue, skb);
- for (i = 0; i < nr; i++)
- fput(fpl->fp[i]);
+ for (i = 0; i < nr_files; i++)
+ fput(fpl->fp[i]);
+ } else {
+ kfree_skb(skb);
+ kfree(fpl);
+ }
return 0;
}
@@ -3000,7 +3576,10 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx)
return 0;
while (total < ctx->nr_user_files) {
- fput(ctx->user_files[total]);
+ struct file *file = io_file_from_index(ctx, total);
+
+ if (file)
+ fput(file);
total++;
}
@@ -3013,33 +3592,79 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx)
}
#endif
+static int io_sqe_alloc_file_tables(struct io_ring_ctx *ctx, unsigned nr_tables,
+ unsigned nr_files)
+{
+ int i;
+
+ for (i = 0; i < nr_tables; i++) {
+ struct fixed_file_table *table = &ctx->file_table[i];
+ unsigned this_files;
+
+ this_files = min(nr_files, IORING_MAX_FILES_TABLE);
+ table->files = kcalloc(this_files, sizeof(struct file *),
+ GFP_KERNEL);
+ if (!table->files)
+ break;
+ nr_files -= this_files;
+ }
+
+ if (i == nr_tables)
+ return 0;
+
+ for (i = 0; i < nr_tables; i++) {
+ struct fixed_file_table *table = &ctx->file_table[i];
+ kfree(table->files);
+ }
+ return 1;
+}
+
static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args)
{
__s32 __user *fds = (__s32 __user *) arg;
+ unsigned nr_tables;
int fd, ret = 0;
unsigned i;
- if (ctx->user_files)
+ if (ctx->file_table)
return -EBUSY;
if (!nr_args)
return -EINVAL;
if (nr_args > IORING_MAX_FIXED_FILES)
return -EMFILE;
- ctx->user_files = kcalloc(nr_args, sizeof(struct file *), GFP_KERNEL);
- if (!ctx->user_files)
+ nr_tables = DIV_ROUND_UP(nr_args, IORING_MAX_FILES_TABLE);
+ ctx->file_table = kcalloc(nr_tables, sizeof(struct fixed_file_table),
+ GFP_KERNEL);
+ if (!ctx->file_table)
return -ENOMEM;
- for (i = 0; i < nr_args; i++) {
+ if (io_sqe_alloc_file_tables(ctx, nr_tables, nr_args)) {
+ kfree(ctx->file_table);
+ ctx->file_table = NULL;
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
+ struct fixed_file_table *table;
+ unsigned index;
+
ret = -EFAULT;
if (copy_from_user(&fd, &fds[i], sizeof(fd)))
break;
+ /* allow sparse sets */
+ if (fd == -1) {
+ ret = 0;
+ continue;
+ }
- ctx->user_files[i] = fget(fd);
+ table = &ctx->file_table[i >> IORING_FILE_TABLE_SHIFT];
+ index = i & IORING_FILE_TABLE_MASK;
+ table->files[index] = fget(fd);
ret = -EBADF;
- if (!ctx->user_files[i])
+ if (!table->files[index])
break;
/*
* Don't allow io_uring instances to be registered. If UNIX
@@ -3048,20 +3673,26 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
* handle it just fine, but there's still no point in allowing
* a ring fd as it doesn't support regular read/write anyway.
*/
- if (ctx->user_files[i]->f_op == &io_uring_fops) {
- fput(ctx->user_files[i]);
+ if (table->files[index]->f_op == &io_uring_fops) {
+ fput(table->files[index]);
break;
}
- ctx->nr_user_files++;
ret = 0;
}
if (ret) {
- for (i = 0; i < ctx->nr_user_files; i++)
- fput(ctx->user_files[i]);
+ for (i = 0; i < ctx->nr_user_files; i++) {
+ struct file *file;
+
+ file = io_file_from_index(ctx, i);
+ if (file)
+ fput(file);
+ }
+ for (i = 0; i < nr_tables; i++)
+ kfree(ctx->file_table[i].files);
- kfree(ctx->user_files);
- ctx->user_files = NULL;
+ kfree(ctx->file_table);
+ ctx->file_table = NULL;
ctx->nr_user_files = 0;
return ret;
}
@@ -3073,9 +3704,201 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
return ret;
}
+static void io_sqe_file_unregister(struct io_ring_ctx *ctx, int index)
+{
+#if defined(CONFIG_UNIX)
+ struct file *file = io_file_from_index(ctx, index);
+ struct sock *sock = ctx->ring_sock->sk;
+ struct sk_buff_head list, *head = &sock->sk_receive_queue;
+ struct sk_buff *skb;
+ int i;
+
+ __skb_queue_head_init(&list);
+
+ /*
+ * Find the skb that holds this file in its SCM_RIGHTS. When found,
+ * remove this entry and rearrange the file array.
+ */
+ skb = skb_dequeue(head);
+ while (skb) {
+ struct scm_fp_list *fp;
+
+ fp = UNIXCB(skb).fp;
+ for (i = 0; i < fp->count; i++) {
+ int left;
+
+ if (fp->fp[i] != file)
+ continue;
+
+ unix_notinflight(fp->user, fp->fp[i]);
+ left = fp->count - 1 - i;
+ if (left) {
+ memmove(&fp->fp[i], &fp->fp[i + 1],
+ left * sizeof(struct file *));
+ }
+ fp->count--;
+ if (!fp->count) {
+ kfree_skb(skb);
+ skb = NULL;
+ } else {
+ __skb_queue_tail(&list, skb);
+ }
+ fput(file);
+ file = NULL;
+ break;
+ }
+
+ if (!file)
+ break;
+
+ __skb_queue_tail(&list, skb);
+
+ skb = skb_dequeue(head);
+ }
+
+ if (skb_peek(&list)) {
+ spin_lock_irq(&head->lock);
+ while ((skb = __skb_dequeue(&list)) != NULL)
+ __skb_queue_tail(head, skb);
+ spin_unlock_irq(&head->lock);
+ }
+#else
+ fput(io_file_from_index(ctx, index));
+#endif
+}
+
+static int io_sqe_file_register(struct io_ring_ctx *ctx, struct file *file,
+ int index)
+{
+#if defined(CONFIG_UNIX)
+ struct sock *sock = ctx->ring_sock->sk;
+ struct sk_buff_head *head = &sock->sk_receive_queue;
+ struct sk_buff *skb;
+
+ /*
+ * See if we can merge this file into an existing skb SCM_RIGHTS
+ * file set. If there's no room, fall back to allocating a new skb
+ * and filling it in.
+ */
+ spin_lock_irq(&head->lock);
+ skb = skb_peek(head);
+ if (skb) {
+ struct scm_fp_list *fpl = UNIXCB(skb).fp;
+
+ if (fpl->count < SCM_MAX_FD) {
+ __skb_unlink(skb, head);
+ spin_unlock_irq(&head->lock);
+ fpl->fp[fpl->count] = get_file(file);
+ unix_inflight(fpl->user, fpl->fp[fpl->count]);
+ fpl->count++;
+ spin_lock_irq(&head->lock);
+ __skb_queue_head(head, skb);
+ } else {
+ skb = NULL;
+ }
+ }
+ spin_unlock_irq(&head->lock);
+
+ if (skb) {
+ fput(file);
+ return 0;
+ }
+
+ return __io_sqe_files_scm(ctx, 1, index);
+#else
+ return 0;
+#endif
+}
+
+static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg,
+ unsigned nr_args)
+{
+ struct io_uring_files_update up;
+ __s32 __user *fds;
+ int fd, i, err;
+ __u32 done;
+
+ if (!ctx->file_table)
+ return -ENXIO;
+ if (!nr_args)
+ return -EINVAL;
+ if (copy_from_user(&up, arg, sizeof(up)))
+ return -EFAULT;
+ if (check_add_overflow(up.offset, nr_args, &done))
+ return -EOVERFLOW;
+ if (done > ctx->nr_user_files)
+ return -EINVAL;
+
+ done = 0;
+ fds = (__s32 __user *) up.fds;
+ while (nr_args) {
+ struct fixed_file_table *table;
+ unsigned index;
+
+ err = 0;
+ if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
+ err = -EFAULT;
+ break;
+ }
+ i = array_index_nospec(up.offset, ctx->nr_user_files);
+ table = &ctx->file_table[i >> IORING_FILE_TABLE_SHIFT];
+ index = i & IORING_FILE_TABLE_MASK;
+ if (table->files[index]) {
+ io_sqe_file_unregister(ctx, i);
+ table->files[index] = NULL;
+ }
+ if (fd != -1) {
+ struct file *file;
+
+ file = fget(fd);
+ if (!file) {
+ err = -EBADF;
+ break;
+ }
+ /*
+ * Don't allow io_uring instances to be registered. If
+ * UNIX isn't enabled, then this causes a reference
+ * cycle and this instance can never get freed. If UNIX
+ * is enabled we'll handle it just fine, but there's
+ * still no point in allowing a ring fd as it doesn't
+ * support regular read/write anyway.
+ */
+ if (file->f_op == &io_uring_fops) {
+ fput(file);
+ err = -EBADF;
+ break;
+ }
+ table->files[index] = file;
+ err = io_sqe_file_register(ctx, file, i);
+ if (err)
+ break;
+ }
+ nr_args--;
+ done++;
+ up.offset++;
+ }
+
+ return done ? done : err;
+}
+
+static void io_put_work(struct io_wq_work *work)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+
+ io_put_req(req);
+}
+
+static void io_get_work(struct io_wq_work *work)
+{
+ struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+
+ refcount_inc(&req->refs);
+}
+
static int io_sq_offload_start(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
+ unsigned concurrency;
int ret;
init_waitqueue_head(&ctx->sqo_wait);
@@ -3119,26 +3942,13 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
goto err;
}
- /* Do QD, or 2 * CPUS, whatever is smallest */
- ctx->sqo_wq[0] = alloc_workqueue("io_ring-wq",
- WQ_UNBOUND | WQ_FREEZABLE,
- min(ctx->sq_entries - 1, 2 * num_online_cpus()));
- if (!ctx->sqo_wq[0]) {
- ret = -ENOMEM;
- goto err;
- }
-
- /*
- * This is for buffered writes, where we want to limit the parallelism
- * due to file locking in file systems. As "normal" buffered writes
- * should parellelize on writeout quite nicely, limit us to having 2
- * pending. This avoids massive contention on the inode when doing
- * buffered async writes.
- */
- ctx->sqo_wq[1] = alloc_workqueue("io_ring-write-wq",
- WQ_UNBOUND | WQ_FREEZABLE, 2);
- if (!ctx->sqo_wq[1]) {
- ret = -ENOMEM;
+ /* Do QD, or 4 * CPUS, whatever is smallest */
+ concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
+ ctx->io_wq = io_wq_create(concurrency, ctx->sqo_mm, ctx->user,
+ io_get_work, io_put_work);
+ if (IS_ERR(ctx->io_wq)) {
+ ret = PTR_ERR(ctx->io_wq);
+ ctx->io_wq = NULL;
goto err;
}
@@ -3484,6 +4294,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_unaccount_mem(ctx->user,
ring_pages(ctx->sq_entries, ctx->cq_entries));
free_uid(ctx->user);
+ kfree(ctx->completions);
+ kmem_cache_free(req_cachep, ctx->fallback_req);
kfree(ctx);
}
@@ -3522,8 +4334,15 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
io_kill_timeouts(ctx);
io_poll_remove_all(ctx);
+
+ if (ctx->io_wq)
+ io_wq_cancel_all(ctx->io_wq);
+
io_iopoll_reap_events(ctx);
- wait_for_completion(&ctx->ctx_done);
+ /* if we failed setting up the ctx, we might not have any rings */
+ if (ctx->rings)
+ io_cqring_overflow_flush(ctx, true);
+ wait_for_completion(&ctx->completions[0]);
io_ring_ctx_free(ctx);
}
@@ -3536,6 +4355,53 @@ static int io_uring_release(struct inode *inode, struct file *file)
return 0;
}
+static void io_uring_cancel_files(struct io_ring_ctx *ctx,
+ struct files_struct *files)
+{
+ struct io_kiocb *req;
+ DEFINE_WAIT(wait);
+
+ while (!list_empty_careful(&ctx->inflight_list)) {
+ struct io_kiocb *cancel_req = NULL;
+
+ spin_lock_irq(&ctx->inflight_lock);
+ list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
+ if (req->work.files != files)
+ continue;
+ /* req is being completed, ignore */
+ if (!refcount_inc_not_zero(&req->refs))
+ continue;
+ cancel_req = req;
+ break;
+ }
+ if (cancel_req)
+ prepare_to_wait(&ctx->inflight_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ spin_unlock_irq(&ctx->inflight_lock);
+
+ /* We need to keep going until we don't find a matching req */
+ if (!cancel_req)
+ break;
+
+ io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
+ io_put_req(cancel_req);
+ schedule();
+ }
+ finish_wait(&ctx->inflight_wait, &wait);
+}
+
+static int io_uring_flush(struct file *file, void *data)
+{
+ struct io_ring_ctx *ctx = file->private_data;
+
+ io_uring_cancel_files(ctx, data);
+ if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) {
+ io_cqring_overflow_flush(ctx, true);
+ io_wq_cancel_all(ctx->io_wq);
+ }
+ return 0;
+}
+
static int io_uring_mmap(struct file *file, struct vm_area_struct *vma)
{
loff_t offset = (loff_t) vma->vm_pgoff << PAGE_SHIFT;
@@ -3597,25 +4463,20 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
*/
ret = 0;
if (ctx->flags & IORING_SETUP_SQPOLL) {
+ if (!list_empty_careful(&ctx->cq_overflow_list))
+ io_cqring_overflow_flush(ctx, false);
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sqo_wait);
submitted = to_submit;
} else if (to_submit) {
- bool block_for_last = false;
+ struct mm_struct *cur_mm;
to_submit = min(to_submit, ctx->sq_entries);
-
- /*
- * Allow last submission to block in a series, IFF the caller
- * asked to wait for events and we don't currently have
- * enough. This potentially avoids an async punt.
- */
- if (to_submit == min_complete &&
- io_cqring_events(ctx->rings) < min_complete)
- block_for_last = true;
-
mutex_lock(&ctx->uring_lock);
- submitted = io_ring_submit(ctx, to_submit, block_for_last);
+ /* already have mm, so io_submit_sqes() won't try to grab it */
+ cur_mm = ctx->sqo_mm;
+ submitted = io_submit_sqes(ctx, to_submit, f.file, fd,
+ &cur_mm, false);
mutex_unlock(&ctx->uring_lock);
}
if (flags & IORING_ENTER_GETEVENTS) {
@@ -3630,7 +4491,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
}
}
- io_ring_drop_ctx_refs(ctx, 1);
+ percpu_ref_put(&ctx->refs);
out_fput:
fdput(f);
return submitted ? submitted : ret;
@@ -3638,6 +4499,7 @@ out_fput:
static const struct file_operations io_uring_fops = {
.release = io_uring_release,
+ .flush = io_uring_flush,
.mmap = io_uring_mmap,
.poll = io_uring_poll,
.fasync = io_uring_fasync,
@@ -3737,10 +4599,23 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
* Use twice as many entries for the CQ ring. It's possible for the
* application to drive a higher depth than the size of the SQ ring,
* since the sqes are only used at submission time. This allows for
- * some flexibility in overcommitting a bit.
+ * some flexibility in overcommitting a bit. If the application has
+ * set IORING_SETUP_CQSIZE, it will have passed in the desired number
+ * of CQ ring entries manually.
*/
p->sq_entries = roundup_pow_of_two(entries);
- p->cq_entries = 2 * p->sq_entries;
+ if (p->flags & IORING_SETUP_CQSIZE) {
+ /*
+ * If IORING_SETUP_CQSIZE is set, we do the same roundup
+ * to a power-of-two, if it isn't already. We do NOT impose
+ * any cq vs sq ring sizing.
+ */
+ if (p->cq_entries < p->sq_entries || p->cq_entries > IORING_MAX_CQ_ENTRIES)
+ return -EINVAL;
+ p->cq_entries = roundup_pow_of_two(p->cq_entries);
+ } else {
+ p->cq_entries = 2 * p->sq_entries;
+ }
user = get_uid(current_user());
account_mem = !capable(CAP_IPC_LOCK);
@@ -3774,10 +4649,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
if (ret)
goto err;
- ret = io_uring_get_fd(ctx);
- if (ret < 0)
- goto err;
-
memset(&p->sq_off, 0, sizeof(p->sq_off));
p->sq_off.head = offsetof(struct io_rings, sq.head);
p->sq_off.tail = offsetof(struct io_rings, sq.tail);
@@ -3795,7 +4666,16 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
p->cq_off.overflow = offsetof(struct io_rings, cq_overflow);
p->cq_off.cqes = offsetof(struct io_rings, cqes);
- p->features = IORING_FEAT_SINGLE_MMAP;
+ /*
+ * Install ring fd as the very last thing, so we don't risk someone
+ * having closed it before we finish setup
+ */
+ ret = io_uring_get_fd(ctx);
+ if (ret < 0)
+ goto err;
+
+ p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP;
+ trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
return ret;
err:
io_ring_ctx_wait_and_kill(ctx);
@@ -3821,7 +4701,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
}
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
- IORING_SETUP_SQ_AFF))
+ IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE))
return -EINVAL;
ret = io_uring_create(entries, &p);
@@ -3865,7 +4745,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
* no new references will come in after we've killed the percpu ref.
*/
mutex_unlock(&ctx->uring_lock);
- wait_for_completion(&ctx->ctx_done);
+ wait_for_completion(&ctx->completions[0]);
mutex_lock(&ctx->uring_lock);
switch (opcode) {
@@ -3887,6 +4767,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_sqe_files_unregister(ctx);
break;
+ case IORING_REGISTER_FILES_UPDATE:
+ ret = io_sqe_files_update(ctx, arg, nr_args);
+ break;
case IORING_REGISTER_EVENTFD:
ret = -EINVAL;
if (nr_args != 1)
@@ -3905,7 +4788,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
}
/* bring the ctx back to life */
- reinit_completion(&ctx->ctx_done);
+ reinit_completion(&ctx->completions[0]);
percpu_ref_reinit(&ctx->refs);
return ret;
}
@@ -3930,6 +4813,8 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
mutex_lock(&ctx->uring_lock);
ret = __io_uring_register(ctx, opcode, arg, nr_args);
mutex_unlock(&ctx->uring_lock);
+ trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs,
+ ctx->cq_ev_fd != NULL, ret);
out_fput:
fdput(f);
return ret;
diff --git a/fs/libfs.c b/fs/libfs.c
index c9b2850c0f7c..1463b038ffc4 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -89,58 +89,45 @@ int dcache_dir_close(struct inode *inode, struct file *file)
EXPORT_SYMBOL(dcache_dir_close);
/* parent is locked at least shared */
-static struct dentry *next_positive(struct dentry *parent,
- struct list_head *from,
- int count)
+/*
+ * Returns an element of siblings' list.
+ * We are looking for <count>th positive after <p>; if
+ * found, dentry is grabbed and returned to caller.
+ * If no such element exists, NULL is returned.
+ */
+static struct dentry *scan_positives(struct dentry *cursor,
+ struct list_head *p,
+ loff_t count,
+ struct dentry *last)
{
- unsigned *seq = &parent->d_inode->i_dir_seq, n;
- struct dentry *res;
- struct list_head *p;
- bool skipped;
- int i;
+ struct dentry *dentry = cursor->d_parent, *found = NULL;
-retry:
- i = count;
- skipped = false;
- n = smp_load_acquire(seq) & ~1;
- res = NULL;
- rcu_read_lock();
- for (p = from->next; p != &parent->d_subdirs; p = p->next) {
+ spin_lock(&dentry->d_lock);
+ while ((p = p->next) != &dentry->d_subdirs) {
struct dentry *d = list_entry(p, struct dentry, d_child);
- if (!simple_positive(d)) {
- skipped = true;
- } else if (!--i) {
- res = d;
- break;
+ // we must at least skip cursors, to avoid livelocks
+ if (d->d_flags & DCACHE_DENTRY_CURSOR)
+ continue;
+ if (simple_positive(d) && !--count) {
+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
+ if (simple_positive(d))
+ found = dget_dlock(d);
+ spin_unlock(&d->d_lock);
+ if (likely(found))
+ break;
+ count = 1;
+ }
+ if (need_resched()) {
+ list_move(&cursor->d_child, p);
+ p = &cursor->d_child;
+ spin_unlock(&dentry->d_lock);
+ cond_resched();
+ spin_lock(&dentry->d_lock);
}
}
- rcu_read_unlock();
- if (skipped) {
- smp_rmb();
- if (unlikely(*seq != n))
- goto retry;
- }
- return res;
-}
-
-static void move_cursor(struct dentry *cursor, struct list_head *after)
-{
- struct dentry *parent = cursor->d_parent;
- unsigned n, *seq = &parent->d_inode->i_dir_seq;
- spin_lock(&parent->d_lock);
- for (;;) {
- n = *seq;
- if (!(n & 1) && cmpxchg(seq, n, n + 1) == n)
- break;
- cpu_relax();
- }
- __list_del(cursor->d_child.prev, cursor->d_child.next);
- if (after)
- list_add(&cursor->d_child, after);
- else
- list_add_tail(&cursor->d_child, &parent->d_subdirs);
- smp_store_release(seq, n + 2);
- spin_unlock(&parent->d_lock);
+ spin_unlock(&dentry->d_lock);
+ dput(last);
+ return found;
}
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
@@ -158,17 +145,25 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
return -EINVAL;
}
if (offset != file->f_pos) {
+ struct dentry *cursor = file->private_data;
+ struct dentry *to = NULL;
+
+ inode_lock_shared(dentry->d_inode);
+
+ if (offset > 2)
+ to = scan_positives(cursor, &dentry->d_subdirs,
+ offset - 2, NULL);
+ spin_lock(&dentry->d_lock);
+ if (to)
+ list_move(&cursor->d_child, &to->d_child);
+ else
+ list_del_init(&cursor->d_child);
+ spin_unlock(&dentry->d_lock);
+ dput(to);
+
file->f_pos = offset;
- if (file->f_pos >= 2) {
- struct dentry *cursor = file->private_data;
- struct dentry *to;
- loff_t n = file->f_pos - 2;
-
- inode_lock_shared(dentry->d_inode);
- to = next_positive(dentry, &dentry->d_subdirs, n);
- move_cursor(cursor, to ? &to->d_child : NULL);
- inode_unlock_shared(dentry->d_inode);
- }
+
+ inode_unlock_shared(dentry->d_inode);
}
return offset;
}
@@ -190,25 +185,35 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
{
struct dentry *dentry = file->f_path.dentry;
struct dentry *cursor = file->private_data;
- struct list_head *p = &cursor->d_child;
- struct dentry *next;
- bool moved = false;
+ struct list_head *anchor = &dentry->d_subdirs;
+ struct dentry *next = NULL;
+ struct list_head *p;
if (!dir_emit_dots(file, ctx))
return 0;
if (ctx->pos == 2)
- p = &dentry->d_subdirs;
- while ((next = next_positive(dentry, p, 1)) != NULL) {
+ p = anchor;
+ else if (!list_empty(&cursor->d_child))
+ p = &cursor->d_child;
+ else
+ return 0;
+
+ while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
d_inode(next)->i_ino, dt_type(d_inode(next))))
break;
- moved = true;
- p = &next->d_child;
ctx->pos++;
+ p = &next->d_child;
}
- if (moved)
- move_cursor(cursor, p);
+ spin_lock(&dentry->d_lock);
+ if (next)
+ list_move_tail(&cursor->d_child, &next->d_child);
+ else
+ list_del_init(&cursor->d_child);
+ spin_unlock(&dentry->d_lock);
+ dput(next);
+
return 0;
}
EXPORT_SYMBOL(dcache_readdir);
@@ -468,8 +473,7 @@ EXPORT_SYMBOL(simple_write_begin);
/**
* simple_write_end - .write_end helper for non-block-device FSes
- * @available: See .write_end of address_space_operations
- * @file: "
+ * @file: See .write_end of address_space_operations
* @mapping: "
* @pos: "
* @len: "
diff --git a/fs/namespace.c b/fs/namespace.c
index fe0e9e1410fe..2adfe7b166a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2478,8 +2478,10 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
time64_to_tm(sb->s_time_max, 0, &tm);
- pr_warn("Mounted %s file system at %s supports timestamps until %04ld (0x%llx)\n",
- sb->s_type->name, mntpath,
+ pr_warn("%s filesystem being %s at %s supports timestamps until %04ld (0x%llx)\n",
+ sb->s_type->name,
+ is_mounted(mnt) ? "remounted" : "mounted",
+ mntpath,
tm.tm_year+1900, (unsigned long long)sb->s_time_max);
free_page((unsigned long)buf);
@@ -2764,14 +2766,11 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
if (IS_ERR(mnt))
return PTR_ERR(mnt);
- error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
- if (error < 0) {
- mntput(mnt);
- return error;
- }
-
mnt_warn_timestamp_expiry(mountpoint, mnt);
+ error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
+ if (error < 0)
+ mntput(mnt);
return error;
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 071b90a45933..af549d70ec50 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -53,6 +53,16 @@ nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
return false;
}
+struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (nfs4_is_valid_delegation(delegation, 0))
+ return delegation;
+ return NULL;
+}
+
static int
nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
{
@@ -1181,7 +1191,7 @@ bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
if (delegation != NULL &&
nfs4_stateid_match_other(dst, &delegation->stateid)) {
dst->seqid = delegation->stateid.seqid;
- return ret;
+ ret = true;
}
rcu_read_unlock();
out:
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9eb87ae4c982..8b14d441e699 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -68,6 +68,7 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state,
bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred);
bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
+struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
int nfs4_have_delegation(struct inode *inode, fmode_t flags);
int nfs4_check_delegation(struct inode *inode, fmode_t flags);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 222d7115db71..040a50fd9bf3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -64,13 +64,6 @@
static struct kmem_cache *nfs_direct_cachep;
-/*
- * This represents a set of asynchronous requests that we're waiting on
- */
-struct nfs_direct_mirror {
- ssize_t count;
-};
-
struct nfs_direct_req {
struct kref kref; /* release manager */
@@ -84,9 +77,6 @@ struct nfs_direct_req {
atomic_t io_count; /* i/os we're waiting for */
spinlock_t lock; /* protect completion state */
- struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX];
- int mirror_count;
-
loff_t io_start; /* Start offset for I/O */
ssize_t count, /* bytes actually processed */
max_count, /* max expected count */
@@ -123,32 +113,42 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
}
static void
-nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
+nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
+ const struct nfs_pgio_header *hdr,
+ ssize_t dreq_len)
{
- int i;
- ssize_t count;
+ if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) ||
+ test_bit(NFS_IOHDR_EOF, &hdr->flags)))
+ return;
+ if (dreq->max_count >= dreq_len) {
+ dreq->max_count = dreq_len;
+ if (dreq->count > dreq_len)
+ dreq->count = dreq_len;
+
+ if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
+ dreq->error = hdr->error;
+ else /* Clear outstanding error if this is EOF */
+ dreq->error = 0;
+ }
+}
- WARN_ON_ONCE(dreq->count >= dreq->max_count);
+static void
+nfs_direct_count_bytes(struct nfs_direct_req *dreq,
+ const struct nfs_pgio_header *hdr)
+{
+ loff_t hdr_end = hdr->io_start + hdr->good_bytes;
+ ssize_t dreq_len = 0;
- if (dreq->mirror_count == 1) {
- dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes;
- dreq->count += hdr->good_bytes;
- } else {
- /* mirrored writes */
- count = dreq->mirrors[hdr->pgio_mirror_idx].count;
- if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
- count = hdr->io_start + hdr->good_bytes - dreq->io_start;
- dreq->mirrors[hdr->pgio_mirror_idx].count = count;
- }
- /* update the dreq->count by finding the minimum agreed count from all
- * mirrors */
- count = dreq->mirrors[0].count;
+ if (hdr_end > dreq->io_start)
+ dreq_len = hdr_end - dreq->io_start;
- for (i = 1; i < dreq->mirror_count; i++)
- count = min(count, dreq->mirrors[i].count);
+ nfs_direct_handle_truncated(dreq, hdr, dreq_len);
- dreq->count = count;
- }
+ if (dreq_len > dreq->max_count)
+ dreq_len = dreq->max_count;
+
+ if (dreq->count < dreq_len)
+ dreq->count = dreq_len;
}
/*
@@ -293,18 +293,6 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
cinfo->completion_ops = &nfs_direct_commit_completion_ops;
}
-static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq,
- struct nfs_pageio_descriptor *pgio,
- struct nfs_page *req)
-{
- int mirror_count = 1;
-
- if (pgio->pg_ops->pg_get_mirror_count)
- mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req);
-
- dreq->mirror_count = mirror_count;
-}
-
static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
{
struct nfs_direct_req *dreq;
@@ -319,7 +307,6 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
INIT_LIST_HEAD(&dreq->mds_cinfo.list);
dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */
INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
- dreq->mirror_count = 1;
spin_lock_init(&dreq->lock);
return dreq;
@@ -402,20 +389,12 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
struct nfs_direct_req *dreq = hdr->dreq;
spin_lock(&dreq->lock);
- if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
- dreq->error = hdr->error;
-
if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
spin_unlock(&dreq->lock);
goto out_put;
}
- if (hdr->good_bytes != 0)
- nfs_direct_good_bytes(dreq, hdr);
-
- if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
- dreq->error = 0;
-
+ nfs_direct_count_bytes(dreq, hdr);
spin_unlock(&dreq->lock);
while (!list_empty(&hdr->pages)) {
@@ -646,29 +625,22 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
LIST_HEAD(reqs);
struct nfs_commit_info cinfo;
LIST_HEAD(failed);
- int i;
nfs_init_cinfo_from_dreq(&cinfo, dreq);
nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
dreq->count = 0;
+ dreq->max_count = 0;
+ list_for_each_entry(req, &reqs, wb_list)
+ dreq->max_count += req->wb_bytes;
dreq->verf.committed = NFS_INVALID_STABLE_HOW;
nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
- for (i = 0; i < dreq->mirror_count; i++)
- dreq->mirrors[i].count = 0;
get_dreq(dreq);
nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
- req = nfs_list_entry(reqs.next);
- nfs_direct_setup_mirroring(dreq, &desc, req);
- if (desc.pg_error < 0) {
- list_splice_init(&reqs, &failed);
- goto out_failed;
- }
-
list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
/* Bump the transmission count */
req->wb_nio++;
@@ -686,7 +658,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
}
nfs_pageio_complete(&desc);
-out_failed:
while (!list_empty(&failed)) {
req = nfs_list_entry(failed.next);
nfs_list_remove_request(req);
@@ -791,17 +762,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
nfs_init_cinfo_from_dreq(&cinfo, dreq);
spin_lock(&dreq->lock);
-
- if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
- dreq->error = hdr->error;
-
if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
spin_unlock(&dreq->lock);
goto out_put;
}
+ nfs_direct_count_bytes(dreq, hdr);
if (hdr->good_bytes != 0) {
- nfs_direct_good_bytes(dreq, hdr);
if (nfs_write_need_commit(hdr)) {
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
request_commit = true;
@@ -923,7 +890,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
break;
}
- nfs_direct_setup_mirroring(dreq, &desc, req);
if (desc.pg_error < 0) {
nfs_free_request(req);
result = desc.pg_error;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 11eafcfc490b..caacf5e7f5e1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1440,8 +1440,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode,
return 0;
if ((delegation->type & fmode) != fmode)
return 0;
- if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
- return 0;
switch (claim) {
case NFS4_OPEN_CLAIM_NULL:
case NFS4_OPEN_CLAIM_FH:
@@ -1810,7 +1808,6 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo
static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
{
struct nfs4_state *state = opendata->state;
- struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs_delegation *delegation;
int open_mode = opendata->o_arg.open_flags;
fmode_t fmode = opendata->o_arg.fmode;
@@ -1827,7 +1824,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
}
spin_unlock(&state->owner->so_lock);
rcu_read_lock();
- delegation = rcu_dereference(nfsi->delegation);
+ delegation = nfs4_get_valid_delegation(state->inode);
if (!can_open_delegated(delegation, fmode, claim)) {
rcu_read_unlock();
break;
@@ -2371,7 +2368,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
data->o_arg.open_flags, claim))
goto out_no_action;
rcu_read_lock();
- delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
+ delegation = nfs4_get_valid_delegation(data->state->inode);
if (can_open_delegated(delegation, data->o_arg.fmode, claim))
goto unlock_no_action;
rcu_read_unlock();
@@ -6106,6 +6103,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
status = nfs4_call_sync_custom(&task_setup_data);
if (setclientid.sc_cred) {
+ kfree(clp->cl_acceptor);
clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred);
put_rpccred(setclientid.sc_cred);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 85ca49549b39..52cab65f91cf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -786,7 +786,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *head;
- atomic_long_dec(&nfsi->nrequests);
if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
head = req->wb_head;
@@ -799,8 +798,10 @@ static void nfs_inode_remove_request(struct nfs_page *req)
spin_unlock(&mapping->private_lock);
}
- if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
+ if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
nfs_release_request(req);
+ atomic_long_dec(&nfsi->nrequests);
+ }
}
static void
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8de1c9d644f6..9cd0a6815933 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2049,7 +2049,8 @@ out_write_size:
inode->i_mtime = inode->i_ctime = current_time(inode);
di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
- ocfs2_update_inode_fsync_trans(handle, inode, 1);
+ if (handle)
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
}
if (handle)
ocfs2_journal_dirty(handle, wc->w_di_bh);
@@ -2146,13 +2147,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
struct ocfs2_dio_write_ctxt *dwc = NULL;
struct buffer_head *di_bh = NULL;
u64 p_blkno;
- loff_t pos = iblock << inode->i_sb->s_blocksize_bits;
+ unsigned int i_blkbits = inode->i_sb->s_blocksize_bits;
+ loff_t pos = iblock << i_blkbits;
+ sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits;
unsigned len, total_len = bh_result->b_size;
int ret = 0, first_get_block = 0;
len = osb->s_clustersize - (pos & (osb->s_clustersize - 1));
len = min(total_len, len);
+ /*
+ * bh_result->b_size is count in get_more_blocks according to write
+ * "pos" and "end", we need map twice to return different buffer state:
+ * 1. area in file size, not set NEW;
+ * 2. area out file size, set NEW.
+ *
+ * iblock endblk
+ * |--------|---------|---------|---------
+ * |<-------area in file------->|
+ */
+
+ if ((iblock <= endblk) &&
+ ((iblock + ((len - 1) >> i_blkbits)) > endblk))
+ len = (endblk - iblock + 1) << i_blkbits;
+
mlog(0, "get block of %lu at %llu:%u req %u\n",
inode->i_ino, pos, len, total_len);
@@ -2236,6 +2254,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
if (desc->c_needs_zero)
set_buffer_new(bh_result);
+ if (iblock > endblk)
+ set_buffer_new(bh_result);
+
/* May sleep in end_io. It should not happen in a irq context. So defer
* it to dio work queue. */
set_buffer_defer_completion(bh_result);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 2e982db3e1ae..9876db52913a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1230,6 +1230,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid));
if (IS_ERR(transfer_to[USRQUOTA])) {
status = PTR_ERR(transfer_to[USRQUOTA]);
+ transfer_to[USRQUOTA] = NULL;
goto bail_unlock;
}
}
@@ -1239,6 +1240,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid));
if (IS_ERR(transfer_to[GRPQUOTA])) {
status = PTR_ERR(transfer_to[GRPQUOTA]);
+ transfer_to[GRPQUOTA] = NULL;
goto bail_unlock;
}
}
@@ -2096,53 +2098,89 @@ static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos)
return 0;
}
-static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
- struct file *file,
- loff_t pos, size_t count,
- int *meta_level)
+static int ocfs2_inode_lock_for_extent_tree(struct inode *inode,
+ struct buffer_head **di_bh,
+ int meta_level,
+ int overwrite_io,
+ int write_sem,
+ int wait)
{
- int ret;
- struct buffer_head *di_bh = NULL;
- u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
- u32 clusters =
- ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
+ int ret = 0;
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
- if (ret) {
- mlog_errno(ret);
+ if (wait)
+ ret = ocfs2_inode_lock(inode, NULL, meta_level);
+ else
+ ret = ocfs2_try_inode_lock(inode,
+ overwrite_io ? NULL : di_bh, meta_level);
+ if (ret < 0)
goto out;
+
+ if (wait) {
+ if (write_sem)
+ down_write(&OCFS2_I(inode)->ip_alloc_sem);
+ else
+ down_read(&OCFS2_I(inode)->ip_alloc_sem);
+ } else {
+ if (write_sem)
+ ret = down_write_trylock(&OCFS2_I(inode)->ip_alloc_sem);
+ else
+ ret = down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem);
+
+ if (!ret) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
}
- *meta_level = 1;
+ return ret;
- ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
- if (ret)
- mlog_errno(ret);
+out_unlock:
+ brelse(*di_bh);
+ ocfs2_inode_unlock(inode, meta_level);
out:
- brelse(di_bh);
return ret;
}
+static void ocfs2_inode_unlock_for_extent_tree(struct inode *inode,
+ struct buffer_head **di_bh,
+ int meta_level,
+ int write_sem)
+{
+ if (write_sem)
+ up_write(&OCFS2_I(inode)->ip_alloc_sem);
+ else
+ up_read(&OCFS2_I(inode)->ip_alloc_sem);
+
+ brelse(*di_bh);
+ *di_bh = NULL;
+
+ if (meta_level >= 0)
+ ocfs2_inode_unlock(inode, meta_level);
+}
+
static int ocfs2_prepare_inode_for_write(struct file *file,
loff_t pos, size_t count, int wait)
{
int ret = 0, meta_level = 0, overwrite_io = 0;
+ int write_sem = 0;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = d_inode(dentry);
struct buffer_head *di_bh = NULL;
+ u32 cpos;
+ u32 clusters;
/*
* We start with a read level meta lock and only jump to an ex
* if we need to make modifications here.
*/
for(;;) {
- if (wait)
- ret = ocfs2_inode_lock(inode, NULL, meta_level);
- else
- ret = ocfs2_try_inode_lock(inode,
- overwrite_io ? NULL : &di_bh, meta_level);
+ ret = ocfs2_inode_lock_for_extent_tree(inode,
+ &di_bh,
+ meta_level,
+ overwrite_io,
+ write_sem,
+ wait);
if (ret < 0) {
- meta_level = -1;
if (ret != -EAGAIN)
mlog_errno(ret);
goto out;
@@ -2154,15 +2192,8 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
*/
if (!wait && !overwrite_io) {
overwrite_io = 1;
- if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
- ret = -EAGAIN;
- goto out_unlock;
- }
ret = ocfs2_overwrite_io(inode, di_bh, pos, count);
- brelse(di_bh);
- di_bh = NULL;
- up_read(&OCFS2_I(inode)->ip_alloc_sem);
if (ret < 0) {
if (ret != -EAGAIN)
mlog_errno(ret);
@@ -2181,7 +2212,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
* set inode->i_size at the end of a write. */
if (should_remove_suid(dentry)) {
if (meta_level == 0) {
- ocfs2_inode_unlock(inode, meta_level);
+ ocfs2_inode_unlock_for_extent_tree(inode,
+ &di_bh,
+ meta_level,
+ write_sem);
meta_level = 1;
continue;
}
@@ -2195,18 +2229,32 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
ret = ocfs2_check_range_for_refcount(inode, pos, count);
if (ret == 1) {
- ocfs2_inode_unlock(inode, meta_level);
- meta_level = -1;
-
- ret = ocfs2_prepare_inode_for_refcount(inode,
- file,
- pos,
- count,
- &meta_level);
+ ocfs2_inode_unlock_for_extent_tree(inode,
+ &di_bh,
+ meta_level,
+ write_sem);
+ ret = ocfs2_inode_lock_for_extent_tree(inode,
+ &di_bh,
+ meta_level,
+ overwrite_io,
+ 1,
+ wait);
+ write_sem = 1;
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
+ goto out;
+ }
+
+ cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+ clusters =
+ ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
+ ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
}
if (ret < 0) {
- mlog_errno(ret);
+ if (ret != -EAGAIN)
+ mlog_errno(ret);
goto out_unlock;
}
@@ -2217,10 +2265,10 @@ out_unlock:
trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
pos, count, wait);
- brelse(di_bh);
-
- if (meta_level >= 0)
- ocfs2_inode_unlock(inode, meta_level);
+ ocfs2_inode_unlock_for_extent_tree(inode,
+ &di_bh,
+ meta_level,
+ write_sem);
out:
return ret;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index d6f7b299eb23..efeea208fdeb 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -283,7 +283,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
if (inode_alloc)
inode_lock(inode_alloc);
- if (o2info_coherent(&fi->ifi_req)) {
+ if (inode_alloc && o2info_coherent(&fi->ifi_req)) {
status = ocfs2_inode_lock(inode_alloc, &bh, 0);
if (status < 0) {
mlog_errno(status);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 930e3d388579..699a560efbb0 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -217,7 +217,8 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb)
/* At this point, we know that no more recovery threads can be
* launched, so wait for any recovery completion work to
* complete. */
- flush_workqueue(osb->ocfs2_wq);
+ if (osb->ocfs2_wq)
+ flush_workqueue(osb->ocfs2_wq);
/*
* Now that recovery is shut down, and the osb is about to be
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 158e5af767fd..720e9f94957e 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -377,7 +377,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
struct ocfs2_dinode *alloc = NULL;
cancel_delayed_work(&osb->la_enable_wq);
- flush_workqueue(osb->ocfs2_wq);
+ if (osb->ocfs2_wq)
+ flush_workqueue(osb->ocfs2_wq);
if (osb->local_alloc_state == OCFS2_LA_UNUSED)
goto out;
diff --git a/fs/open.c b/fs/open.c
index b62f5c0923a8..5c68282ea79e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -771,10 +771,6 @@ static int do_dentry_open(struct file *f,
f->f_mode |= FMODE_WRITER;
}
- /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
- if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))
- f->f_mode |= FMODE_ATOMIC_POS;
-
f->f_op = fops_get(inode->i_fop);
if (WARN_ON(!f->f_op)) {
error = -ENODEV;
@@ -1256,7 +1252,7 @@ EXPORT_SYMBOL(nonseekable_open);
*/
int stream_open(struct inode *inode, struct file *filp)
{
- filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
+ filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
filp->f_mode |= FMODE_STREAM;
return 0;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 8a2ab2f974bd..a9149199e0e7 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -793,6 +793,8 @@ int create_pipe_files(struct file **res, int flags)
}
res[0]->private_data = inode->i_pipe;
res[1] = f;
+ stream_open(inode, res[0]);
+ stream_open(inode, res[1]);
return 0;
}
@@ -931,9 +933,9 @@ static int fifo_open(struct inode *inode, struct file *filp)
__pipe_lock(pipe);
/* We can only do regular read/write on fifos */
- filp->f_mode &= (FMODE_READ | FMODE_WRITE);
+ stream_open(inode, filp);
- switch (filp->f_mode) {
+ switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) {
case FMODE_READ:
/*
* O_RDONLY
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index ac9247371871..8c1f1bb1a5ce 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -132,9 +132,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
show_val_kb(m, "ShmemPmdMapped: ",
global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
- show_val_kb(m, "FileHugePages: ",
+ show_val_kb(m, "FileHugePages: ",
global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
- show_val_kb(m, "FilePmdMapped: ",
+ show_val_kb(m, "FilePmdMapped: ",
global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
#endif
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 544d1ee15aee..7c952ee732e6 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -42,10 +42,12 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
return -EINVAL;
while (count > 0) {
- if (pfn_valid(pfn))
- ppage = pfn_to_page(pfn);
- else
- ppage = NULL;
+ /*
+ * TODO: ZONE_DEVICE support requires to identify
+ * memmaps that were actually initialized.
+ */
+ ppage = pfn_to_online_page(pfn);
+
if (!ppage || PageSlab(ppage) || page_has_type(ppage))
pcount = 0;
else
@@ -216,10 +218,11 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
return -EINVAL;
while (count > 0) {
- if (pfn_valid(pfn))
- ppage = pfn_to_page(pfn);
- else
- ppage = NULL;
+ /*
+ * TODO: ZONE_DEVICE support requires to identify
+ * memmaps that were actually initialized.
+ */
+ ppage = pfn_to_online_page(pfn);
if (put_user(stable_page_flags(ppage), out)) {
ret = -EFAULT;
@@ -261,10 +264,11 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
return -EINVAL;
while (count > 0) {
- if (pfn_valid(pfn))
- ppage = pfn_to_page(pfn);
- else
- ppage = NULL;
+ /*
+ * TODO: ZONE_DEVICE support requires to identify
+ * memmaps that were actually initialized.
+ */
+ ppage = pfn_to_online_page(pfn);
if (ppage)
ino = page_cgroup_ino(ppage);
diff --git a/fs/readdir.c b/fs/readdir.c
index 19bea591c3f1..d26d5ea4de7b 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -27,53 +27,13 @@
/*
* Note the "unsafe_put_user() semantics: we goto a
* label for errors.
- *
- * Also note how we use a "while()" loop here, even though
- * only the biggest size needs to loop. The compiler (well,
- * at least gcc) is smart enough to turn the smaller sizes
- * into just if-statements, and this way we don't need to
- * care whether 'u64' or 'u32' is the biggest size.
- */
-#define unsafe_copy_loop(dst, src, len, type, label) \
- while (len >= sizeof(type)) { \
- unsafe_put_user(get_unaligned((type *)src), \
- (type __user *)dst, label); \
- dst += sizeof(type); \
- src += sizeof(type); \
- len -= sizeof(type); \
- }
-
-/*
- * We avoid doing 64-bit copies on 32-bit architectures. They
- * might be better, but the component names are mostly small,
- * and the 64-bit cases can end up being much more complex and
- * put much more register pressure on the code, so it's likely
- * not worth the pain of unaligned accesses etc.
- *
- * So limit the copies to "unsigned long" size. I did verify
- * that at least the x86-32 case is ok without this limiting,
- * but I worry about random other legacy 32-bit cases that
- * might not do as well.
- */
-#define unsafe_copy_type(dst, src, len, type, label) do { \
- if (sizeof(type) <= sizeof(unsigned long)) \
- unsafe_copy_loop(dst, src, len, type, label); \
-} while (0)
-
-/*
- * Copy the dirent name to user space, and NUL-terminate
- * it. This should not be a function call, since we're doing
- * the copy inside a "user_access_begin/end()" section.
*/
#define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
char __user *dst = (_dst); \
const char *src = (_src); \
size_t len = (_len); \
- unsafe_copy_type(dst, src, len, u64, label); \
- unsafe_copy_type(dst, src, len, u32, label); \
- unsafe_copy_type(dst, src, len, u16, label); \
- unsafe_copy_type(dst, src, len, u8, label); \
- unsafe_put_user(0, dst, label); \
+ unsafe_put_user(0, dst+len, label); \
+ unsafe_copy_to_user(dst, src, len, label); \
} while (0)
@@ -145,9 +105,9 @@ EXPORT_SYMBOL(iterate_dir);
*/
static int verify_dirent_name(const char *name, int len)
{
- if (WARN_ON_ONCE(!len))
+ if (!len)
return -EIO;
- if (WARN_ON_ONCE(memchr(name, '/', len)))
+ if (memchr(name, '/', len))
return -EIO;
return 0;
}
diff --git a/fs/super.c b/fs/super.c
index f627b7c53d2b..cfadab2cbf35 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1300,6 +1300,7 @@ int get_tree_bdev(struct fs_context *fc,
mutex_lock(&bdev->bd_fsfreeze_mutex);
if (bdev->bd_fsfreeze_count > 0) {
mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ blkdev_put(bdev, mode);
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
return -EBUSY;
}
@@ -1308,8 +1309,10 @@ int get_tree_bdev(struct fs_context *fc,
fc->sget_key = bdev;
s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
- if (IS_ERR(s))
+ if (IS_ERR(s)) {
+ blkdev_put(bdev, mode);
return PTR_ERR(s);
+ }
if (s->s_root) {
/* Don't summarily change the RO/RW state. */
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 9fc14e38927f..0caa151cae4e 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -16,11 +16,11 @@
#include <linux/namei.h>
#include <linux/tracefs.h>
#include <linux/fsnotify.h>
+#include <linux/security.h>
#include <linux/seq_file.h>
#include <linux/parser.h>
#include <linux/magic.h>
#include <linux/slab.h>
-#include <linux/security.h>
#define TRACEFS_DEFAULT_MODE 0700
@@ -28,25 +28,6 @@ static struct vfsmount *tracefs_mount;
static int tracefs_mount_count;
static bool tracefs_registered;
-static int default_open_file(struct inode *inode, struct file *filp)
-{
- struct dentry *dentry = filp->f_path.dentry;
- struct file_operations *real_fops;
- int ret;
-
- if (!dentry)
- return -EINVAL;
-
- ret = security_locked_down(LOCKDOWN_TRACEFS);
- if (ret)
- return ret;
-
- real_fops = dentry->d_fsdata;
- if (!real_fops->open)
- return 0;
- return real_fops->open(inode, filp);
-}
-
static ssize_t default_read_file(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
@@ -241,12 +222,6 @@ static int tracefs_apply_options(struct super_block *sb)
return 0;
}
-static void tracefs_destroy_inode(struct inode *inode)
-{
- if (S_ISREG(inode->i_mode))
- kfree(inode->i_fop);
-}
-
static int tracefs_remount(struct super_block *sb, int *flags, char *data)
{
int err;
@@ -283,7 +258,6 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root)
static const struct super_operations tracefs_super_operations = {
.statfs = simple_statfs,
.remount_fs = tracefs_remount,
- .destroy_inode = tracefs_destroy_inode,
.show_options = tracefs_show_options,
};
@@ -414,10 +388,12 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops)
{
- struct file_operations *proxy_fops;
struct dentry *dentry;
struct inode *inode;
+ if (security_locked_down(LOCKDOWN_TRACEFS))
+ return NULL;
+
if (!(mode & S_IFMT))
mode |= S_IFREG;
BUG_ON(!S_ISREG(mode));
@@ -430,20 +406,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode,
if (unlikely(!inode))
return failed_creating(dentry);
- proxy_fops = kzalloc(sizeof(struct file_operations), GFP_KERNEL);
- if (unlikely(!proxy_fops)) {
- iput(inode);
- return failed_creating(dentry);
- }
-
- if (!fops)
- fops = &tracefs_file_operations;
-
- dentry->d_fsdata = (void *)fops;
- memcpy(proxy_fops, fops, sizeof(*proxy_fops));
- proxy_fops->open = default_open_file;
inode->i_mode = mode;
- inode->i_fop = proxy_fops;
+ inode->i_fop = fops ? fops : &tracefs_file_operations;
inode->i_private = data;
d_instantiate(dentry, inode);
fsnotify_create(dentry->d_parent->d_inode, dentry);
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 5de296b34ab1..14fbdf22b7e7 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -28,12 +28,11 @@ xfs_get_aghdr_buf(
struct xfs_mount *mp,
xfs_daddr_t blkno,
size_t numblks,
- int flags,
const struct xfs_buf_ops *ops)
{
struct xfs_buf *bp;
- bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
+ bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0);
if (!bp)
return NULL;
@@ -345,7 +344,7 @@ xfs_ag_init_hdr(
{
struct xfs_buf *bp;
- bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, 0, ops);
+ bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, ops);
if (!bp)
return -ENOMEM;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index b9f019603d0b..f0089e862216 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -826,32 +826,17 @@ xfs_attr_shortform_to_leaf(
sf = (xfs_attr_shortform_t *)tmpbuffer;
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
- xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK);
+ xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK);
bp = NULL;
error = xfs_da_grow_inode(args, &blkno);
- if (error) {
- /*
- * If we hit an IO error middle of the transaction inside
- * grow_inode(), we may have inconsistent data. Bail out.
- */
- if (error == -EIO)
- goto out;
- xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
- memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
+ if (error)
goto out;
- }
ASSERT(blkno == 0);
error = xfs_attr3_leaf_create(args, blkno, &bp);
- if (error) {
- /* xfs_attr3_leaf_create may not have instantiated a block */
- if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0))
- goto out;
- xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
- memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
+ if (error)
goto out;
- }
memset((char *)&nargs, 0, sizeof(nargs));
nargs.dp = dp;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 4edc25a2ba80..02469d59c787 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -792,6 +792,7 @@ out_root_realloc:
*/
void
xfs_bmap_local_to_extents_empty(
+ struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork)
{
@@ -808,6 +809,7 @@ xfs_bmap_local_to_extents_empty(
ifp->if_u1.if_root = NULL;
ifp->if_height = 0;
XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
@@ -840,7 +842,7 @@ xfs_bmap_local_to_extents(
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
if (!ifp->if_bytes) {
- xfs_bmap_local_to_extents_empty(ip, whichfork);
+ xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
flags = XFS_ILOG_CORE;
goto done;
}
@@ -887,7 +889,7 @@ xfs_bmap_local_to_extents(
/* account for the change in fork size */
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
- xfs_bmap_local_to_extents_empty(ip, whichfork);
+ xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
flags |= XFS_ILOG_CORE;
ifp->if_u1.if_root = NULL;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 5bb446d80542..e2798c6f3a5f 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -182,7 +182,8 @@ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version);
-void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
+void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp,
+ struct xfs_inode *ip, int whichfork);
void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
bool skip_discard);
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 9595ced393dc..49e4bc39e7bb 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -1096,7 +1096,7 @@ xfs_dir2_sf_to_block(
memcpy(sfp, oldsfp, ifp->if_bytes);
xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK);
- xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK);
+ xfs_bmap_local_to_extents_empty(tp, dp, XFS_DATA_FORK);
dp->i_d.di_size = 0;
/*
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 39dd2b908106..e9371a8e0e26 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -366,11 +366,11 @@ struct xfs_bulkstat {
uint64_t bs_blocks; /* number of blocks */
uint64_t bs_xflags; /* extended flags */
- uint64_t bs_atime; /* access time, seconds */
- uint64_t bs_mtime; /* modify time, seconds */
+ int64_t bs_atime; /* access time, seconds */
+ int64_t bs_mtime; /* modify time, seconds */
- uint64_t bs_ctime; /* inode change time, seconds */
- uint64_t bs_btime; /* creation time, seconds */
+ int64_t bs_ctime; /* inode change time, seconds */
+ int64_t bs_btime; /* creation time, seconds */
uint32_t bs_gen; /* generation count */
uint32_t bs_uid; /* user id */
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 93b3793bc5b3..0cab11a5d390 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -341,7 +341,6 @@ xchk_refcountbt_rec(
xfs_extlen_t len;
xfs_nlink_t refcount;
bool has_cowflag;
- int error = 0;
bno = be32_to_cpu(rec->refc.rc_startblock);
len = be32_to_cpu(rec->refc.rc_blockcount);
@@ -366,7 +365,7 @@ xchk_refcountbt_rec(
xchk_refcountbt_xref(bs->sc, bno, len, refcount);
- return error;
+ return 0;
}
/* Make sure we have as many refc blocks as the rmap says. */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 0910cb75b65d..4f443703065e 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -864,6 +864,7 @@ xfs_alloc_file_space(
xfs_filblks_t allocatesize_fsb;
xfs_extlen_t extsz, temp;
xfs_fileoff_t startoffset_fsb;
+ xfs_fileoff_t endoffset_fsb;
int nimaps;
int quota_flag;
int rt;
@@ -891,7 +892,8 @@ xfs_alloc_file_space(
imapp = &imaps[0];
nimaps = 1;
startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
- allocatesize_fsb = XFS_B_TO_FSB(mp, count);
+ endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
+ allocatesize_fsb = endoffset_fsb - startoffset_fsb;
/*
* Allocate file space until done or until there is an error
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 21c243622a79..0abba171aa89 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -345,6 +345,15 @@ xfs_buf_allocate_memory(
unsigned short page_count, i;
xfs_off_t start, end;
int error;
+ xfs_km_flags_t kmflag_mask = 0;
+
+ /*
+ * assure zeroed buffer for non-read cases.
+ */
+ if (!(flags & XBF_READ)) {
+ kmflag_mask |= KM_ZERO;
+ gfp_mask |= __GFP_ZERO;
+ }
/*
* for buffers that are contained within a single page, just allocate
@@ -354,7 +363,8 @@ xfs_buf_allocate_memory(
size = BBTOB(bp->b_length);
if (size < PAGE_SIZE) {
int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
- bp->b_addr = kmem_alloc_io(size, align_mask, KM_NOFS);
+ bp->b_addr = kmem_alloc_io(size, align_mask,
+ KM_NOFS | kmflag_mask);
if (!bp->b_addr) {
/* low memory - use alloc_page loop instead */
goto use_alloc_page;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a2beee9f74da..641d07f30a27 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1443,7 +1443,7 @@ xlog_alloc_log(
prev_iclog = iclog;
iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask,
- KM_MAYFAIL);
+ KM_MAYFAIL | KM_ZERO);
if (!iclog->ic_data)
goto out_free_iclog;
#ifdef DEBUG
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 508319039dce..c1a514ffff55 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -127,7 +127,7 @@ xlog_alloc_buffer(
if (nbblks > 1 && log->l_sectBBsize > 1)
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
- return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL);
+ return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO);
}
/*
diff --git a/include/Kbuild b/include/Kbuild
index ffba79483cc5..95508049ee51 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -65,7 +65,6 @@ header-test- += keys/asymmetric-subtype.h
header-test- += keys/asymmetric-type.h
header-test- += keys/big_key-type.h
header-test- += keys/request_key_auth-type.h
-header-test- += keys/trusted.h
header-test- += kvm/arm_arch_timer.h
header-test- += kvm/arm_pmu.h
header-test-$(CONFIG_ARM) += kvm/arm_psci.h
@@ -1028,6 +1027,7 @@ header-test- += trace/events/fsi_master_gpio.h
header-test- += trace/events/huge_memory.h
header-test- += trace/events/ib_mad.h
header-test- += trace/events/ib_umad.h
+header-test- += trace/events/io_uring.h
header-test- += trace/events/iscsi.h
header-test- += trace/events/jbd2.h
header-test- += trace/events/kvm.h
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index f936033cb9e6..47805172e73d 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -232,8 +232,8 @@ struct acpi_processor {
struct acpi_processor_limit limit;
struct thermal_cooling_device *cdev;
struct device *dev; /* Processor device. */
- struct dev_pm_qos_request perflib_req;
- struct dev_pm_qos_request thermal_req;
+ struct freq_qos_request perflib_req;
+ struct freq_qos_request thermal_req;
};
struct acpi_processor_errata {
@@ -302,8 +302,8 @@ static inline void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx
#ifdef CONFIG_CPU_FREQ
extern bool acpi_processor_cpufreq_init;
void acpi_processor_ignore_ppc_init(void);
-void acpi_processor_ppc_init(int cpu);
-void acpi_processor_ppc_exit(int cpu);
+void acpi_processor_ppc_init(struct cpufreq_policy *policy);
+void acpi_processor_ppc_exit(struct cpufreq_policy *policy);
void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag);
extern int acpi_processor_get_bios_limit(int cpu, unsigned int *limit);
#else
@@ -311,11 +311,11 @@ static inline void acpi_processor_ignore_ppc_init(void)
{
return;
}
-static inline void acpi_processor_ppc_init(int cpu)
+static inline void acpi_processor_ppc_init(struct cpufreq_policy *policy)
{
return;
}
-static inline void acpi_processor_ppc_exit(int cpu)
+static inline void acpi_processor_ppc_exit(struct cpufreq_policy *policy)
{
return;
}
@@ -431,14 +431,14 @@ static inline int acpi_processor_hotplug(struct acpi_processor *pr)
int acpi_processor_get_limit_info(struct acpi_processor *pr);
extern const struct thermal_cooling_device_ops processor_cooling_ops;
#if defined(CONFIG_ACPI_CPU_FREQ_PSS) & defined(CONFIG_CPU_FREQ)
-void acpi_thermal_cpufreq_init(int cpu);
-void acpi_thermal_cpufreq_exit(int cpu);
+void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy);
+void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy);
#else
-static inline void acpi_thermal_cpufreq_init(int cpu)
+static inline void acpi_thermal_cpufreq_init(struct cpufreq_policy *policy)
{
return;
}
-static inline void acpi_thermal_cpufreq_exit(int cpu)
+static inline void acpi_thermal_cpufreq_exit(struct cpufreq_policy *policy)
{
return;
}
diff --git a/include/asm-generic/vdso/vsyscall.h b/include/asm-generic/vdso/vsyscall.h
index e94b19782c92..ce4103208619 100644
--- a/include/asm-generic/vdso/vsyscall.h
+++ b/include/asm-generic/vdso/vsyscall.h
@@ -25,13 +25,6 @@ static __always_inline int __arch_get_clock_mode(struct timekeeper *tk)
}
#endif /* __arch_get_clock_mode */
-#ifndef __arch_use_vsyscall
-static __always_inline int __arch_use_vsyscall(struct vdso_data *vdata)
-{
- return 1;
-}
-#endif /* __arch_use_vsyscall */
-
#ifndef __arch_update_vsyscall
static __always_inline void __arch_update_vsyscall(struct vdso_data *vdata,
struct timekeeper *tk)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index dae64600ccbf..a9c4e4721434 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -110,17 +110,17 @@
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
-#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
-#define MCOUNT_REC() . = ALIGN(8); \
- __start_mcount_loc = .; \
- KEEP(*(__patchable_function_entries)) \
- __stop_mcount_loc = .;
-#else
+/*
+ * The ftrace call sites are logged to a section whose name depends on the
+ * compiler option used. A given kernel image will only use one, AKA
+ * FTRACE_CALLSITE_SECTION. We capture all of them here to avoid header
+ * dependencies for FTRACE_CALLSITE_SECTION's definition.
+ */
#define MCOUNT_REC() . = ALIGN(8); \
__start_mcount_loc = .; \
KEEP(*(__mcount_loc)) \
+ KEEP(*(__patchable_function_entries)) \
__stop_mcount_loc = .;
-#endif
#else
#define MCOUNT_REC()
#endif
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index 01f514521687..7865e6b5d36c 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -44,7 +44,20 @@ struct drm_gem_shmem_object {
*/
unsigned int pages_use_count;
+ /**
+ * @madv: State for madvise
+ *
+ * 0 is active/inuse.
+ * A negative value is the object is purged.
+ * Positive values are driver specific and not used by the helpers.
+ */
int madv;
+
+ /**
+ * @madv_list: List entry for madvise tracking
+ *
+ * Typically used by drivers to track purgeable objects
+ */
struct list_head madv_list;
/**
diff --git a/include/drm/drm_self_refresh_helper.h b/include/drm/drm_self_refresh_helper.h
index 5b79d253fb46..520235c20708 100644
--- a/include/drm/drm_self_refresh_helper.h
+++ b/include/drm/drm_self_refresh_helper.h
@@ -13,7 +13,8 @@ struct drm_crtc;
void drm_self_refresh_helper_alter_state(struct drm_atomic_state *state);
void drm_self_refresh_helper_update_avg_times(struct drm_atomic_state *state,
- unsigned int commit_time_ms);
+ unsigned int commit_time_ms,
+ unsigned int new_self_refresh_mask);
int drm_self_refresh_helper_init(struct drm_crtc *crtc);
void drm_self_refresh_helper_cleanup(struct drm_crtc *crtc);
diff --git a/include/keys/trusted.h b/include/keys/trusted_tpm.h
index 0071298b9b28..a56d8e1298f2 100644
--- a/include/keys/trusted.h
+++ b/include/keys/trusted_tpm.h
@@ -1,14 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __TRUSTED_KEY_H
-#define __TRUSTED_KEY_H
+#ifndef __TRUSTED_TPM_H
+#define __TRUSTED_TPM_H
+
+#include <keys/trusted-type.h>
+#include <linux/tpm_command.h>
/* implementation specific TPM constants */
#define MAX_BUF_SIZE 1024
#define TPM_GETRANDOM_SIZE 14
-#define TPM_OSAP_SIZE 36
-#define TPM_OIAP_SIZE 10
-#define TPM_SEAL_SIZE 87
-#define TPM_UNSEAL_SIZE 104
#define TPM_SIZE_OFFSET 2
#define TPM_RETURN_OFFSET 6
#define TPM_DATA_OFFSET 10
@@ -17,13 +16,6 @@
#define LOAD32N(buffer, offset) (*(uint32_t *)&buffer[offset])
#define LOAD16(buffer, offset) (ntohs(*(uint16_t *)&buffer[offset]))
-struct tpm_buf {
- int len;
- unsigned char data[MAX_BUF_SIZE];
-};
-
-#define INIT_BUF(tb) (tb->len = 0)
-
struct osapsess {
uint32_t handle;
unsigned char secret[SHA1_DIGEST_SIZE];
@@ -48,6 +40,13 @@ int TSS_checkhmac1(unsigned char *buffer,
int trusted_tpm_send(unsigned char *cmd, size_t buflen);
int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce);
+int tpm2_seal_trusted(struct tpm_chip *chip,
+ struct trusted_key_payload *payload,
+ struct trusted_key_options *options);
+int tpm2_unseal_trusted(struct tpm_chip *chip,
+ struct trusted_key_payload *payload,
+ struct trusted_key_options *options);
+
#define TPM_DEBUG 0
#if TPM_DEBUG
@@ -109,28 +108,4 @@ static inline void dump_tpm_buf(unsigned char *buf)
{
}
#endif
-
-static inline void store8(struct tpm_buf *buf, const unsigned char value)
-{
- buf->data[buf->len++] = value;
-}
-
-static inline void store16(struct tpm_buf *buf, const uint16_t value)
-{
- *(uint16_t *) & buf->data[buf->len] = htons(value);
- buf->len += sizeof value;
-}
-
-static inline void store32(struct tpm_buf *buf, const uint32_t value)
-{
- *(uint32_t *) & buf->data[buf->len] = htonl(value);
- buf->len += sizeof value;
-}
-
-static inline void storebytes(struct tpm_buf *buf, const unsigned char *in,
- const int len)
-{
- memcpy(buf->data + buf->len, in, len);
- buf->len += len;
-}
#endif
diff --git a/include/kunit/assert.h b/include/kunit/assert.h
new file mode 100644
index 000000000000..db6a0fca09b4
--- /dev/null
+++ b/include/kunit/assert.h
@@ -0,0 +1,356 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Assertion and expectation serialization API.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+
+#ifndef _KUNIT_ASSERT_H
+#define _KUNIT_ASSERT_H
+
+#include <kunit/string-stream.h>
+#include <linux/err.h>
+
+struct kunit;
+
+/**
+ * enum kunit_assert_type - Type of expectation/assertion.
+ * @KUNIT_ASSERTION: Used to denote that a kunit_assert represents an assertion.
+ * @KUNIT_EXPECTATION: Denotes that a kunit_assert represents an expectation.
+ *
+ * Used in conjunction with a &struct kunit_assert to denote whether it
+ * represents an expectation or an assertion.
+ */
+enum kunit_assert_type {
+ KUNIT_ASSERTION,
+ KUNIT_EXPECTATION,
+};
+
+/**
+ * struct kunit_assert - Data for printing a failed assertion or expectation.
+ * @test: the test case this expectation/assertion is associated with.
+ * @type: the type (either an expectation or an assertion) of this kunit_assert.
+ * @line: the source code line number that the expectation/assertion is at.
+ * @file: the file path of the source file that the expectation/assertion is in.
+ * @message: an optional message to provide additional context.
+ * @format: a function which formats the data in this kunit_assert to a string.
+ *
+ * Represents a failed expectation/assertion. Contains all the data necessary to
+ * format a string to a user reporting the failure.
+ */
+struct kunit_assert {
+ struct kunit *test;
+ enum kunit_assert_type type;
+ int line;
+ const char *file;
+ struct va_format message;
+ void (*format)(const struct kunit_assert *assert,
+ struct string_stream *stream);
+};
+
+/**
+ * KUNIT_INIT_VA_FMT_NULL - Default initializer for struct va_format.
+ *
+ * Used inside a struct initialization block to initialize struct va_format to
+ * default values where fmt and va are null.
+ */
+#define KUNIT_INIT_VA_FMT_NULL { .fmt = NULL, .va = NULL }
+
+/**
+ * KUNIT_INIT_ASSERT_STRUCT() - Initializer for a &struct kunit_assert.
+ * @kunit: The test case that this expectation/assertion is associated with.
+ * @assert_type: The type (assertion or expectation) of this kunit_assert.
+ * @fmt: The formatting function which builds a string out of this kunit_assert.
+ *
+ * The base initializer for a &struct kunit_assert.
+ */
+#define KUNIT_INIT_ASSERT_STRUCT(kunit, assert_type, fmt) { \
+ .test = kunit, \
+ .type = assert_type, \
+ .file = __FILE__, \
+ .line = __LINE__, \
+ .message = KUNIT_INIT_VA_FMT_NULL, \
+ .format = fmt \
+}
+
+void kunit_base_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream);
+
+void kunit_assert_print_msg(const struct kunit_assert *assert,
+ struct string_stream *stream);
+
+/**
+ * struct kunit_fail_assert - Represents a plain fail expectation/assertion.
+ * @assert: The parent of this type.
+ *
+ * Represents a simple KUNIT_FAIL/KUNIT_ASSERT_FAILURE that always fails.
+ */
+struct kunit_fail_assert {
+ struct kunit_assert assert;
+};
+
+void kunit_fail_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream);
+
+/**
+ * KUNIT_INIT_FAIL_ASSERT_STRUCT() - Initializer for &struct kunit_fail_assert.
+ * @test: The test case that this expectation/assertion is associated with.
+ * @type: The type (assertion or expectation) of this kunit_assert.
+ *
+ * Initializes a &struct kunit_fail_assert. Intended to be used in
+ * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros.
+ */
+#define KUNIT_INIT_FAIL_ASSERT_STRUCT(test, type) { \
+ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \
+ type, \
+ kunit_fail_assert_format) \
+}
+
+/**
+ * struct kunit_unary_assert - Represents a KUNIT_{EXPECT|ASSERT}_{TRUE|FALSE}
+ * @assert: The parent of this type.
+ * @condition: A string representation of a conditional expression.
+ * @expected_true: True if of type KUNIT_{EXPECT|ASSERT}_TRUE, false otherwise.
+ *
+ * Represents a simple expectation or assertion that simply asserts something is
+ * true or false. In other words, represents the expectations:
+ * KUNIT_{EXPECT|ASSERT}_{TRUE|FALSE}
+ */
+struct kunit_unary_assert {
+ struct kunit_assert assert;
+ const char *condition;
+ bool expected_true;
+};
+
+void kunit_unary_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream);
+
+/**
+ * KUNIT_INIT_UNARY_ASSERT_STRUCT() - Initializes &struct kunit_unary_assert.
+ * @test: The test case that this expectation/assertion is associated with.
+ * @type: The type (assertion or expectation) of this kunit_assert.
+ * @cond: A string representation of the expression asserted true or false.
+ * @expect_true: True if of type KUNIT_{EXPECT|ASSERT}_TRUE, false otherwise.
+ *
+ * Initializes a &struct kunit_unary_assert. Intended to be used in
+ * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros.
+ */
+#define KUNIT_INIT_UNARY_ASSERT_STRUCT(test, type, cond, expect_true) { \
+ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \
+ type, \
+ kunit_unary_assert_format), \
+ .condition = cond, \
+ .expected_true = expect_true \
+}
+
+/**
+ * struct kunit_ptr_not_err_assert - An expectation/assertion that a pointer is
+ * not NULL and not a -errno.
+ * @assert: The parent of this type.
+ * @text: A string representation of the expression passed to the expectation.
+ * @value: The actual evaluated pointer value of the expression.
+ *
+ * Represents an expectation/assertion that a pointer is not null and is does
+ * not contain a -errno. (See IS_ERR_OR_NULL().)
+ */
+struct kunit_ptr_not_err_assert {
+ struct kunit_assert assert;
+ const char *text;
+ const void *value;
+};
+
+void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream);
+
+/**
+ * KUNIT_INIT_PTR_NOT_ERR_ASSERT_STRUCT() - Initializes a
+ * &struct kunit_ptr_not_err_assert.
+ * @test: The test case that this expectation/assertion is associated with.
+ * @type: The type (assertion or expectation) of this kunit_assert.
+ * @txt: A string representation of the expression passed to the expectation.
+ * @val: The actual evaluated pointer value of the expression.
+ *
+ * Initializes a &struct kunit_ptr_not_err_assert. Intended to be used in
+ * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros.
+ */
+#define KUNIT_INIT_PTR_NOT_ERR_STRUCT(test, type, txt, val) { \
+ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \
+ type, \
+ kunit_ptr_not_err_assert_format), \
+ .text = txt, \
+ .value = val \
+}
+
+/**
+ * struct kunit_binary_assert - An expectation/assertion that compares two
+ * non-pointer values (for example, KUNIT_EXPECT_EQ(test, 1 + 1, 2)).
+ * @assert: The parent of this type.
+ * @operation: A string representation of the comparison operator (e.g. "==").
+ * @left_text: A string representation of the expression in the left slot.
+ * @left_value: The actual evaluated value of the expression in the left slot.
+ * @right_text: A string representation of the expression in the right slot.
+ * @right_value: The actual evaluated value of the expression in the right slot.
+ *
+ * Represents an expectation/assertion that compares two non-pointer values. For
+ * example, to expect that 1 + 1 == 2, you can use the expectation
+ * KUNIT_EXPECT_EQ(test, 1 + 1, 2);
+ */
+struct kunit_binary_assert {
+ struct kunit_assert assert;
+ const char *operation;
+ const char *left_text;
+ long long left_value;
+ const char *right_text;
+ long long right_value;
+};
+
+void kunit_binary_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream);
+
+/**
+ * KUNIT_INIT_BINARY_ASSERT_STRUCT() - Initializes a
+ * &struct kunit_binary_assert.
+ * @test: The test case that this expectation/assertion is associated with.
+ * @type: The type (assertion or expectation) of this kunit_assert.
+ * @op_str: A string representation of the comparison operator (e.g. "==").
+ * @left_str: A string representation of the expression in the left slot.
+ * @left_val: The actual evaluated value of the expression in the left slot.
+ * @right_str: A string representation of the expression in the right slot.
+ * @right_val: The actual evaluated value of the expression in the right slot.
+ *
+ * Initializes a &struct kunit_binary_assert. Intended to be used in
+ * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros.
+ */
+#define KUNIT_INIT_BINARY_ASSERT_STRUCT(test, \
+ type, \
+ op_str, \
+ left_str, \
+ left_val, \
+ right_str, \
+ right_val) { \
+ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \
+ type, \
+ kunit_binary_assert_format), \
+ .operation = op_str, \
+ .left_text = left_str, \
+ .left_value = left_val, \
+ .right_text = right_str, \
+ .right_value = right_val \
+}
+
+/**
+ * struct kunit_binary_ptr_assert - An expectation/assertion that compares two
+ * pointer values (for example, KUNIT_EXPECT_PTR_EQ(test, foo, bar)).
+ * @assert: The parent of this type.
+ * @operation: A string representation of the comparison operator (e.g. "==").
+ * @left_text: A string representation of the expression in the left slot.
+ * @left_value: The actual evaluated value of the expression in the left slot.
+ * @right_text: A string representation of the expression in the right slot.
+ * @right_value: The actual evaluated value of the expression in the right slot.
+ *
+ * Represents an expectation/assertion that compares two pointer values. For
+ * example, to expect that foo and bar point to the same thing, you can use the
+ * expectation KUNIT_EXPECT_PTR_EQ(test, foo, bar);
+ */
+struct kunit_binary_ptr_assert {
+ struct kunit_assert assert;
+ const char *operation;
+ const char *left_text;
+ const void *left_value;
+ const char *right_text;
+ const void *right_value;
+};
+
+void kunit_binary_ptr_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream);
+
+/**
+ * KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT() - Initializes a
+ * &struct kunit_binary_ptr_assert.
+ * @test: The test case that this expectation/assertion is associated with.
+ * @type: The type (assertion or expectation) of this kunit_assert.
+ * @op_str: A string representation of the comparison operator (e.g. "==").
+ * @left_str: A string representation of the expression in the left slot.
+ * @left_val: The actual evaluated value of the expression in the left slot.
+ * @right_str: A string representation of the expression in the right slot.
+ * @right_val: The actual evaluated value of the expression in the right slot.
+ *
+ * Initializes a &struct kunit_binary_ptr_assert. Intended to be used in
+ * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros.
+ */
+#define KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT(test, \
+ type, \
+ op_str, \
+ left_str, \
+ left_val, \
+ right_str, \
+ right_val) { \
+ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \
+ type, \
+ kunit_binary_ptr_assert_format), \
+ .operation = op_str, \
+ .left_text = left_str, \
+ .left_value = left_val, \
+ .right_text = right_str, \
+ .right_value = right_val \
+}
+
+/**
+ * struct kunit_binary_str_assert - An expectation/assertion that compares two
+ * string values (for example, KUNIT_EXPECT_STREQ(test, foo, "bar")).
+ * @assert: The parent of this type.
+ * @operation: A string representation of the comparison operator (e.g. "==").
+ * @left_text: A string representation of the expression in the left slot.
+ * @left_value: The actual evaluated value of the expression in the left slot.
+ * @right_text: A string representation of the expression in the right slot.
+ * @right_value: The actual evaluated value of the expression in the right slot.
+ *
+ * Represents an expectation/assertion that compares two string values. For
+ * example, to expect that the string in foo is equal to "bar", you can use the
+ * expectation KUNIT_EXPECT_STREQ(test, foo, "bar");
+ */
+struct kunit_binary_str_assert {
+ struct kunit_assert assert;
+ const char *operation;
+ const char *left_text;
+ const char *left_value;
+ const char *right_text;
+ const char *right_value;
+};
+
+void kunit_binary_str_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream);
+
+/**
+ * KUNIT_INIT_BINARY_STR_ASSERT_STRUCT() - Initializes a
+ * &struct kunit_binary_str_assert.
+ * @test: The test case that this expectation/assertion is associated with.
+ * @type: The type (assertion or expectation) of this kunit_assert.
+ * @op_str: A string representation of the comparison operator (e.g. "==").
+ * @left_str: A string representation of the expression in the left slot.
+ * @left_val: The actual evaluated value of the expression in the left slot.
+ * @right_str: A string representation of the expression in the right slot.
+ * @right_val: The actual evaluated value of the expression in the right slot.
+ *
+ * Initializes a &struct kunit_binary_str_assert. Intended to be used in
+ * KUNIT_EXPECT_* and KUNIT_ASSERT_* macros.
+ */
+#define KUNIT_INIT_BINARY_STR_ASSERT_STRUCT(test, \
+ type, \
+ op_str, \
+ left_str, \
+ left_val, \
+ right_str, \
+ right_val) { \
+ .assert = KUNIT_INIT_ASSERT_STRUCT(test, \
+ type, \
+ kunit_binary_str_assert_format), \
+ .operation = op_str, \
+ .left_text = left_str, \
+ .left_value = left_val, \
+ .right_text = right_str, \
+ .right_value = right_val \
+}
+
+#endif /* _KUNIT_ASSERT_H */
diff --git a/include/kunit/string-stream.h b/include/kunit/string-stream.h
new file mode 100644
index 000000000000..fe98a00b75a9
--- /dev/null
+++ b/include/kunit/string-stream.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * C++ stream style string builder used in KUnit for building messages.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+
+#ifndef _KUNIT_STRING_STREAM_H
+#define _KUNIT_STRING_STREAM_H
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <stdarg.h>
+
+struct string_stream_fragment {
+ struct kunit *test;
+ struct list_head node;
+ char *fragment;
+};
+
+struct string_stream {
+ size_t length;
+ struct list_head fragments;
+ /* length and fragments are protected by this lock */
+ spinlock_t lock;
+ struct kunit *test;
+ gfp_t gfp;
+};
+
+struct kunit;
+
+struct string_stream *alloc_string_stream(struct kunit *test, gfp_t gfp);
+
+int __printf(2, 3) string_stream_add(struct string_stream *stream,
+ const char *fmt, ...);
+
+int string_stream_vadd(struct string_stream *stream,
+ const char *fmt,
+ va_list args);
+
+char *string_stream_get_string(struct string_stream *stream);
+
+int string_stream_append(struct string_stream *stream,
+ struct string_stream *other);
+
+bool string_stream_is_empty(struct string_stream *stream);
+
+int string_stream_destroy(struct string_stream *stream);
+
+#endif /* _KUNIT_STRING_STREAM_H */
diff --git a/include/kunit/test.h b/include/kunit/test.h
new file mode 100644
index 000000000000..dba48304b3bd
--- /dev/null
+++ b/include/kunit/test.h
@@ -0,0 +1,1490 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Base unit test (KUnit) API.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+
+#ifndef _KUNIT_TEST_H
+#define _KUNIT_TEST_H
+
+#include <kunit/assert.h>
+#include <kunit/try-catch.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+struct kunit_resource;
+
+typedef int (*kunit_resource_init_t)(struct kunit_resource *, void *);
+typedef void (*kunit_resource_free_t)(struct kunit_resource *);
+
+/**
+ * struct kunit_resource - represents a *test managed resource*
+ * @allocation: for the user to store arbitrary data.
+ * @free: a user supplied function to free the resource. Populated by
+ * kunit_alloc_resource().
+ *
+ * Represents a *test managed resource*, a resource which will automatically be
+ * cleaned up at the end of a test case.
+ *
+ * Example:
+ *
+ * .. code-block:: c
+ *
+ * struct kunit_kmalloc_params {
+ * size_t size;
+ * gfp_t gfp;
+ * };
+ *
+ * static int kunit_kmalloc_init(struct kunit_resource *res, void *context)
+ * {
+ * struct kunit_kmalloc_params *params = context;
+ * res->allocation = kmalloc(params->size, params->gfp);
+ *
+ * if (!res->allocation)
+ * return -ENOMEM;
+ *
+ * return 0;
+ * }
+ *
+ * static void kunit_kmalloc_free(struct kunit_resource *res)
+ * {
+ * kfree(res->allocation);
+ * }
+ *
+ * void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp)
+ * {
+ * struct kunit_kmalloc_params params;
+ * struct kunit_resource *res;
+ *
+ * params.size = size;
+ * params.gfp = gfp;
+ *
+ * res = kunit_alloc_resource(test, kunit_kmalloc_init,
+ * kunit_kmalloc_free, &params);
+ * if (res)
+ * return res->allocation;
+ *
+ * return NULL;
+ * }
+ */
+struct kunit_resource {
+ void *allocation;
+ kunit_resource_free_t free;
+
+ /* private: internal use only. */
+ struct list_head node;
+};
+
+struct kunit;
+
+/**
+ * struct kunit_case - represents an individual test case.
+ *
+ * @run_case: the function representing the actual test case.
+ * @name: the name of the test case.
+ *
+ * A test case is a function with the signature,
+ * ``void (*)(struct kunit *)``
+ * that makes expectations and assertions (see KUNIT_EXPECT_TRUE() and
+ * KUNIT_ASSERT_TRUE()) about code under test. Each test case is associated
+ * with a &struct kunit_suite and will be run after the suite's init
+ * function and followed by the suite's exit function.
+ *
+ * A test case should be static and should only be created with the
+ * KUNIT_CASE() macro; additionally, every array of test cases should be
+ * terminated with an empty test case.
+ *
+ * Example:
+ *
+ * .. code-block:: c
+ *
+ * void add_test_basic(struct kunit *test)
+ * {
+ * KUNIT_EXPECT_EQ(test, 1, add(1, 0));
+ * KUNIT_EXPECT_EQ(test, 2, add(1, 1));
+ * KUNIT_EXPECT_EQ(test, 0, add(-1, 1));
+ * KUNIT_EXPECT_EQ(test, INT_MAX, add(0, INT_MAX));
+ * KUNIT_EXPECT_EQ(test, -1, add(INT_MAX, INT_MIN));
+ * }
+ *
+ * static struct kunit_case example_test_cases[] = {
+ * KUNIT_CASE(add_test_basic),
+ * {}
+ * };
+ *
+ */
+struct kunit_case {
+ void (*run_case)(struct kunit *test);
+ const char *name;
+
+ /* private: internal use only. */
+ bool success;
+};
+
+/**
+ * KUNIT_CASE - A helper for creating a &struct kunit_case
+ *
+ * @test_name: a reference to a test case function.
+ *
+ * Takes a symbol for a function representing a test case and creates a
+ * &struct kunit_case object from it. See the documentation for
+ * &struct kunit_case for an example on how to use it.
+ */
+#define KUNIT_CASE(test_name) { .run_case = test_name, .name = #test_name }
+
+/**
+ * struct kunit_suite - describes a related collection of &struct kunit_case
+ *
+ * @name: the name of the test. Purely informational.
+ * @init: called before every test case.
+ * @exit: called after every test case.
+ * @test_cases: a null terminated array of test cases.
+ *
+ * A kunit_suite is a collection of related &struct kunit_case s, such that
+ * @init is called before every test case and @exit is called after every
+ * test case, similar to the notion of a *test fixture* or a *test class*
+ * in other unit testing frameworks like JUnit or Googletest.
+ *
+ * Every &struct kunit_case must be associated with a kunit_suite for KUnit
+ * to run it.
+ */
+struct kunit_suite {
+ const char name[256];
+ int (*init)(struct kunit *test);
+ void (*exit)(struct kunit *test);
+ struct kunit_case *test_cases;
+};
+
+/**
+ * struct kunit - represents a running instance of a test.
+ *
+ * @priv: for user to store arbitrary data. Commonly used to pass data
+ * created in the init function (see &struct kunit_suite).
+ *
+ * Used to store information about the current context under which the test
+ * is running. Most of this data is private and should only be accessed
+ * indirectly via public functions; the one exception is @priv which can be
+ * used by the test writer to store arbitrary data.
+ */
+struct kunit {
+ void *priv;
+
+ /* private: internal use only. */
+ const char *name; /* Read only after initialization! */
+ struct kunit_try_catch try_catch;
+ /*
+ * success starts as true, and may only be set to false during a
+ * test case; thus, it is safe to update this across multiple
+ * threads using WRITE_ONCE; however, as a consequence, it may only
+ * be read after the test case finishes once all threads associated
+ * with the test case have terminated.
+ */
+ bool success; /* Read only after test_case finishes! */
+ spinlock_t lock; /* Guards all mutable test state. */
+ /*
+ * Because resources is a list that may be updated multiple times (with
+ * new resources) from any thread associated with a test case, we must
+ * protect it with some type of lock.
+ */
+ struct list_head resources; /* Protected by lock. */
+};
+
+void kunit_init_test(struct kunit *test, const char *name);
+
+int kunit_run_tests(struct kunit_suite *suite);
+
+/**
+ * kunit_test_suite() - used to register a &struct kunit_suite with KUnit.
+ *
+ * @suite: a statically allocated &struct kunit_suite.
+ *
+ * Registers @suite with the test framework. See &struct kunit_suite for
+ * more information.
+ *
+ * NOTE: Currently KUnit tests are all run as late_initcalls; this means
+ * that they cannot test anything where tests must run at a different init
+ * phase. One significant restriction resulting from this is that KUnit
+ * cannot reliably test anything that is initialize in the late_init phase;
+ * another is that KUnit is useless to test things that need to be run in
+ * an earlier init phase.
+ *
+ * TODO(brendanhiggins@google.com): Don't run all KUnit tests as
+ * late_initcalls. I have some future work planned to dispatch all KUnit
+ * tests from the same place, and at the very least to do so after
+ * everything else is definitely initialized.
+ */
+#define kunit_test_suite(suite) \
+ static int kunit_suite_init##suite(void) \
+ { \
+ return kunit_run_tests(&suite); \
+ } \
+ late_initcall(kunit_suite_init##suite)
+
+/*
+ * Like kunit_alloc_resource() below, but returns the struct kunit_resource
+ * object that contains the allocation. This is mostly for testing purposes.
+ */
+struct kunit_resource *kunit_alloc_and_get_resource(struct kunit *test,
+ kunit_resource_init_t init,
+ kunit_resource_free_t free,
+ gfp_t internal_gfp,
+ void *context);
+
+/**
+ * kunit_alloc_resource() - Allocates a *test managed resource*.
+ * @test: The test context object.
+ * @init: a user supplied function to initialize the resource.
+ * @free: a user supplied function to free the resource.
+ * @internal_gfp: gfp to use for internal allocations, if unsure, use GFP_KERNEL
+ * @context: for the user to pass in arbitrary data to the init function.
+ *
+ * Allocates a *test managed resource*, a resource which will automatically be
+ * cleaned up at the end of a test case. See &struct kunit_resource for an
+ * example.
+ *
+ * NOTE: KUnit needs to allocate memory for each kunit_resource object. You must
+ * specify an @internal_gfp that is compatible with the use context of your
+ * resource.
+ */
+static inline void *kunit_alloc_resource(struct kunit *test,
+ kunit_resource_init_t init,
+ kunit_resource_free_t free,
+ gfp_t internal_gfp,
+ void *context)
+{
+ struct kunit_resource *res;
+
+ res = kunit_alloc_and_get_resource(test, init, free, internal_gfp,
+ context);
+
+ if (res)
+ return res->allocation;
+
+ return NULL;
+}
+
+typedef bool (*kunit_resource_match_t)(struct kunit *test,
+ const void *res,
+ void *match_data);
+
+/**
+ * kunit_resource_instance_match() - Match a resource with the same instance.
+ * @test: Test case to which the resource belongs.
+ * @res: The data stored in kunit_resource->allocation.
+ * @match_data: The resource pointer to match against.
+ *
+ * An instance of kunit_resource_match_t that matches a resource whose
+ * allocation matches @match_data.
+ */
+static inline bool kunit_resource_instance_match(struct kunit *test,
+ const void *res,
+ void *match_data)
+{
+ return res == match_data;
+}
+
+/**
+ * kunit_resource_destroy() - Find a kunit_resource and destroy it.
+ * @test: Test case to which the resource belongs.
+ * @match: Match function. Returns whether a given resource matches @match_data.
+ * @free: Must match free on the kunit_resource to free.
+ * @match_data: Data passed into @match.
+ *
+ * Free the latest kunit_resource of @test for which @free matches the
+ * kunit_resource_free_t associated with the resource and for which @match
+ * returns true.
+ *
+ * RETURNS:
+ * 0 if kunit_resource is found and freed, -ENOENT if not found.
+ */
+int kunit_resource_destroy(struct kunit *test,
+ kunit_resource_match_t match,
+ kunit_resource_free_t free,
+ void *match_data);
+
+/**
+ * kunit_kmalloc() - Like kmalloc() except the allocation is *test managed*.
+ * @test: The test context object.
+ * @size: The size in bytes of the desired memory.
+ * @gfp: flags passed to underlying kmalloc().
+ *
+ * Just like `kmalloc(...)`, except the allocation is managed by the test case
+ * and is automatically cleaned up after the test case concludes. See &struct
+ * kunit_resource for more information.
+ */
+void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp);
+
+/**
+ * kunit_kfree() - Like kfree except for allocations managed by KUnit.
+ * @test: The test case to which the resource belongs.
+ * @ptr: The memory allocation to free.
+ */
+void kunit_kfree(struct kunit *test, const void *ptr);
+
+/**
+ * kunit_kzalloc() - Just like kunit_kmalloc(), but zeroes the allocation.
+ * @test: The test context object.
+ * @size: The size in bytes of the desired memory.
+ * @gfp: flags passed to underlying kmalloc().
+ *
+ * See kzalloc() and kunit_kmalloc() for more information.
+ */
+static inline void *kunit_kzalloc(struct kunit *test, size_t size, gfp_t gfp)
+{
+ return kunit_kmalloc(test, size, gfp | __GFP_ZERO);
+}
+
+void kunit_cleanup(struct kunit *test);
+
+#define kunit_printk(lvl, test, fmt, ...) \
+ printk(lvl "\t# %s: " fmt, (test)->name, ##__VA_ARGS__)
+
+/**
+ * kunit_info() - Prints an INFO level message associated with @test.
+ *
+ * @test: The test context object.
+ * @fmt: A printk() style format string.
+ *
+ * Prints an info level message associated with the test suite being run.
+ * Takes a variable number of format parameters just like printk().
+ */
+#define kunit_info(test, fmt, ...) \
+ kunit_printk(KERN_INFO, test, fmt, ##__VA_ARGS__)
+
+/**
+ * kunit_warn() - Prints a WARN level message associated with @test.
+ *
+ * @test: The test context object.
+ * @fmt: A printk() style format string.
+ *
+ * Prints a warning level message.
+ */
+#define kunit_warn(test, fmt, ...) \
+ kunit_printk(KERN_WARNING, test, fmt, ##__VA_ARGS__)
+
+/**
+ * kunit_err() - Prints an ERROR level message associated with @test.
+ *
+ * @test: The test context object.
+ * @fmt: A printk() style format string.
+ *
+ * Prints an error level message.
+ */
+#define kunit_err(test, fmt, ...) \
+ kunit_printk(KERN_ERR, test, fmt, ##__VA_ARGS__)
+
+/**
+ * KUNIT_SUCCEED() - A no-op expectation. Only exists for code clarity.
+ * @test: The test context object.
+ *
+ * The opposite of KUNIT_FAIL(), it is an expectation that cannot fail. In other
+ * words, it does nothing and only exists for code clarity. See
+ * KUNIT_EXPECT_TRUE() for more information.
+ */
+#define KUNIT_SUCCEED(test) do {} while (0)
+
+void kunit_do_assertion(struct kunit *test,
+ struct kunit_assert *assert,
+ bool pass,
+ const char *fmt, ...);
+
+#define KUNIT_ASSERTION(test, pass, assert_class, INITIALIZER, fmt, ...) do { \
+ struct assert_class __assertion = INITIALIZER; \
+ kunit_do_assertion(test, \
+ &__assertion.assert, \
+ pass, \
+ fmt, \
+ ##__VA_ARGS__); \
+} while (0)
+
+
+#define KUNIT_FAIL_ASSERTION(test, assert_type, fmt, ...) \
+ KUNIT_ASSERTION(test, \
+ false, \
+ kunit_fail_assert, \
+ KUNIT_INIT_FAIL_ASSERT_STRUCT(test, assert_type), \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_FAIL() - Always causes a test to fail when evaluated.
+ * @test: The test context object.
+ * @fmt: an informational message to be printed when the assertion is made.
+ * @...: string format arguments.
+ *
+ * The opposite of KUNIT_SUCCEED(), it is an expectation that always fails. In
+ * other words, it always results in a failed expectation, and consequently
+ * always causes the test case to fail when evaluated. See KUNIT_EXPECT_TRUE()
+ * for more information.
+ */
+#define KUNIT_FAIL(test, fmt, ...) \
+ KUNIT_FAIL_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_UNARY_ASSERTION(test, \
+ assert_type, \
+ condition, \
+ expected_true, \
+ fmt, \
+ ...) \
+ KUNIT_ASSERTION(test, \
+ !!(condition) == !!expected_true, \
+ kunit_unary_assert, \
+ KUNIT_INIT_UNARY_ASSERT_STRUCT(test, \
+ assert_type, \
+ #condition, \
+ expected_true), \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_TRUE_MSG_ASSERTION(test, assert_type, condition, fmt, ...) \
+ KUNIT_UNARY_ASSERTION(test, \
+ assert_type, \
+ condition, \
+ true, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_TRUE_ASSERTION(test, assert_type, condition) \
+ KUNIT_TRUE_MSG_ASSERTION(test, assert_type, condition, NULL)
+
+#define KUNIT_FALSE_MSG_ASSERTION(test, assert_type, condition, fmt, ...) \
+ KUNIT_UNARY_ASSERTION(test, \
+ assert_type, \
+ condition, \
+ false, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_FALSE_ASSERTION(test, assert_type, condition) \
+ KUNIT_FALSE_MSG_ASSERTION(test, assert_type, condition, NULL)
+
+/*
+ * A factory macro for defining the assertions and expectations for the basic
+ * comparisons defined for the built in types.
+ *
+ * Unfortunately, there is no common type that all types can be promoted to for
+ * which all the binary operators behave the same way as for the actual types
+ * (for example, there is no type that long long and unsigned long long can
+ * both be cast to where the comparison result is preserved for all values). So
+ * the best we can do is do the comparison in the original types and then coerce
+ * everything to long long for printing; this way, the comparison behaves
+ * correctly and the printed out value usually makes sense without
+ * interpretation, but can always be interpreted to figure out the actual
+ * value.
+ */
+#define KUNIT_BASE_BINARY_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, \
+ op, \
+ right, \
+ fmt, \
+ ...) \
+do { \
+ typeof(left) __left = (left); \
+ typeof(right) __right = (right); \
+ ((void)__typecheck(__left, __right)); \
+ \
+ KUNIT_ASSERTION(test, \
+ __left op __right, \
+ assert_class, \
+ ASSERT_CLASS_INIT(test, \
+ assert_type, \
+ #op, \
+ #left, \
+ __left, \
+ #right, \
+ __right), \
+ fmt, \
+ ##__VA_ARGS__); \
+} while (0)
+
+#define KUNIT_BASE_EQ_MSG_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_BINARY_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, ==, right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BASE_NE_MSG_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_BINARY_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, !=, right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BASE_LT_MSG_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_BINARY_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, <, right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BASE_LE_MSG_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_BINARY_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, <=, right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BASE_GT_MSG_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_BINARY_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, >, right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BASE_GE_MSG_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_BINARY_ASSERTION(test, \
+ assert_class, \
+ ASSERT_CLASS_INIT, \
+ assert_type, \
+ left, >=, right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_EQ_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\
+ KUNIT_BASE_EQ_MSG_ASSERTION(test, \
+ kunit_binary_assert, \
+ KUNIT_INIT_BINARY_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_EQ_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_EQ_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_EQ_MSG_ASSERTION(test, \
+ kunit_binary_ptr_assert, \
+ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_PTR_EQ_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_NE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\
+ KUNIT_BASE_NE_MSG_ASSERTION(test, \
+ kunit_binary_assert, \
+ KUNIT_INIT_BINARY_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_NE_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_NE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_NE_MSG_ASSERTION(test, \
+ kunit_binary_ptr_assert, \
+ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_PTR_NE_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_LT_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\
+ KUNIT_BASE_LT_MSG_ASSERTION(test, \
+ kunit_binary_assert, \
+ KUNIT_INIT_BINARY_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_LT_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_LT_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_PTR_LT_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_LT_MSG_ASSERTION(test, \
+ kunit_binary_ptr_assert, \
+ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_PTR_LT_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_PTR_LT_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_LE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\
+ KUNIT_BASE_LE_MSG_ASSERTION(test, \
+ kunit_binary_assert, \
+ KUNIT_INIT_BINARY_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_LE_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_LE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_PTR_LE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_LE_MSG_ASSERTION(test, \
+ kunit_binary_ptr_assert, \
+ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_PTR_LE_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_PTR_LE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_GT_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\
+ KUNIT_BASE_GT_MSG_ASSERTION(test, \
+ kunit_binary_assert, \
+ KUNIT_INIT_BINARY_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_GT_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_GT_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_PTR_GT_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_GT_MSG_ASSERTION(test, \
+ kunit_binary_ptr_assert, \
+ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_PTR_GT_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_PTR_GT_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_GE_MSG_ASSERTION(test, assert_type, left, right, fmt, ...)\
+ KUNIT_BASE_GE_MSG_ASSERTION(test, \
+ kunit_binary_assert, \
+ KUNIT_INIT_BINARY_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_GE_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_GE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_PTR_GE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BASE_GE_MSG_ASSERTION(test, \
+ kunit_binary_ptr_assert, \
+ KUNIT_INIT_BINARY_PTR_ASSERT_STRUCT, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_PTR_GE_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_PTR_GE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_STR_ASSERTION(test, \
+ assert_type, \
+ left, \
+ op, \
+ right, \
+ fmt, \
+ ...) \
+do { \
+ typeof(left) __left = (left); \
+ typeof(right) __right = (right); \
+ \
+ KUNIT_ASSERTION(test, \
+ strcmp(__left, __right) op 0, \
+ kunit_binary_str_assert, \
+ KUNIT_INIT_BINARY_ASSERT_STRUCT(test, \
+ assert_type, \
+ #op, \
+ #left, \
+ __left, \
+ #right, \
+ __right), \
+ fmt, \
+ ##__VA_ARGS__); \
+} while (0)
+
+#define KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BINARY_STR_ASSERTION(test, \
+ assert_type, \
+ left, ==, right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_STR_EQ_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ fmt, \
+ ...) \
+ KUNIT_BINARY_STR_ASSERTION(test, \
+ assert_type, \
+ left, !=, right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_BINARY_STR_NE_ASSERTION(test, assert_type, left, right) \
+ KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \
+ assert_type, \
+ left, \
+ right, \
+ NULL)
+
+#define KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \
+ assert_type, \
+ ptr, \
+ fmt, \
+ ...) \
+do { \
+ typeof(ptr) __ptr = (ptr); \
+ \
+ KUNIT_ASSERTION(test, \
+ !IS_ERR_OR_NULL(__ptr), \
+ kunit_ptr_not_err_assert, \
+ KUNIT_INIT_PTR_NOT_ERR_STRUCT(test, \
+ assert_type, \
+ #ptr, \
+ __ptr), \
+ fmt, \
+ ##__VA_ARGS__); \
+} while (0)
+
+#define KUNIT_PTR_NOT_ERR_OR_NULL_ASSERTION(test, assert_type, ptr) \
+ KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \
+ assert_type, \
+ ptr, \
+ NULL)
+
+/**
+ * KUNIT_EXPECT_TRUE() - Causes a test failure when the expression is not true.
+ * @test: The test context object.
+ * @condition: an arbitrary boolean expression. The test fails when this does
+ * not evaluate to true.
+ *
+ * This and expectations of the form `KUNIT_EXPECT_*` will cause the test case
+ * to fail when the specified condition is not met; however, it will not prevent
+ * the test case from continuing to run; this is otherwise known as an
+ * *expectation failure*.
+ */
+#define KUNIT_EXPECT_TRUE(test, condition) \
+ KUNIT_TRUE_ASSERTION(test, KUNIT_EXPECTATION, condition)
+
+#define KUNIT_EXPECT_TRUE_MSG(test, condition, fmt, ...) \
+ KUNIT_TRUE_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ condition, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_FALSE() - Makes a test failure when the expression is not false.
+ * @test: The test context object.
+ * @condition: an arbitrary boolean expression. The test fails when this does
+ * not evaluate to false.
+ *
+ * Sets an expectation that @condition evaluates to false. See
+ * KUNIT_EXPECT_TRUE() for more information.
+ */
+#define KUNIT_EXPECT_FALSE(test, condition) \
+ KUNIT_FALSE_ASSERTION(test, KUNIT_EXPECTATION, condition)
+
+#define KUNIT_EXPECT_FALSE_MSG(test, condition, fmt, ...) \
+ KUNIT_FALSE_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ condition, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_EQ() - Sets an expectation that @left and @right are equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an expectation that the values that @left and @right evaluate to are
+ * equal. This is semantically equivalent to
+ * KUNIT_EXPECT_TRUE(@test, (@left) == (@right)). See KUNIT_EXPECT_TRUE() for
+ * more information.
+ */
+#define KUNIT_EXPECT_EQ(test, left, right) \
+ KUNIT_BINARY_EQ_ASSERTION(test, KUNIT_EXPECTATION, left, right)
+
+#define KUNIT_EXPECT_EQ_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_EQ_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_PTR_EQ() - Expects that pointers @left and @right are equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a pointer.
+ * @right: an arbitrary expression that evaluates to a pointer.
+ *
+ * Sets an expectation that the values that @left and @right evaluate to are
+ * equal. This is semantically equivalent to
+ * KUNIT_EXPECT_TRUE(@test, (@left) == (@right)). See KUNIT_EXPECT_TRUE() for
+ * more information.
+ */
+#define KUNIT_EXPECT_PTR_EQ(test, left, right) \
+ KUNIT_BINARY_PTR_EQ_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right)
+
+#define KUNIT_EXPECT_PTR_EQ_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_NE() - An expectation that @left and @right are not equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an expectation that the values that @left and @right evaluate to are not
+ * equal. This is semantically equivalent to
+ * KUNIT_EXPECT_TRUE(@test, (@left) != (@right)). See KUNIT_EXPECT_TRUE() for
+ * more information.
+ */
+#define KUNIT_EXPECT_NE(test, left, right) \
+ KUNIT_BINARY_NE_ASSERTION(test, KUNIT_EXPECTATION, left, right)
+
+#define KUNIT_EXPECT_NE_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_NE_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_PTR_NE() - Expects that pointers @left and @right are not equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a pointer.
+ * @right: an arbitrary expression that evaluates to a pointer.
+ *
+ * Sets an expectation that the values that @left and @right evaluate to are not
+ * equal. This is semantically equivalent to
+ * KUNIT_EXPECT_TRUE(@test, (@left) != (@right)). See KUNIT_EXPECT_TRUE() for
+ * more information.
+ */
+#define KUNIT_EXPECT_PTR_NE(test, left, right) \
+ KUNIT_BINARY_PTR_NE_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right)
+
+#define KUNIT_EXPECT_PTR_NE_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_LT() - An expectation that @left is less than @right.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an expectation that the value that @left evaluates to is less than the
+ * value that @right evaluates to. This is semantically equivalent to
+ * KUNIT_EXPECT_TRUE(@test, (@left) < (@right)). See KUNIT_EXPECT_TRUE() for
+ * more information.
+ */
+#define KUNIT_EXPECT_LT(test, left, right) \
+ KUNIT_BINARY_LT_ASSERTION(test, KUNIT_EXPECTATION, left, right)
+
+#define KUNIT_EXPECT_LT_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_LT_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_LE() - Expects that @left is less than or equal to @right.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an expectation that the value that @left evaluates to is less than or
+ * equal to the value that @right evaluates to. Semantically this is equivalent
+ * to KUNIT_EXPECT_TRUE(@test, (@left) <= (@right)). See KUNIT_EXPECT_TRUE() for
+ * more information.
+ */
+#define KUNIT_EXPECT_LE(test, left, right) \
+ KUNIT_BINARY_LE_ASSERTION(test, KUNIT_EXPECTATION, left, right)
+
+#define KUNIT_EXPECT_LE_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_LE_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_GT() - An expectation that @left is greater than @right.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an expectation that the value that @left evaluates to is greater than
+ * the value that @right evaluates to. This is semantically equivalent to
+ * KUNIT_EXPECT_TRUE(@test, (@left) > (@right)). See KUNIT_EXPECT_TRUE() for
+ * more information.
+ */
+#define KUNIT_EXPECT_GT(test, left, right) \
+ KUNIT_BINARY_GT_ASSERTION(test, KUNIT_EXPECTATION, left, right)
+
+#define KUNIT_EXPECT_GT_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_GT_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_GE() - Expects that @left is greater than or equal to @right.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an expectation that the value that @left evaluates to is greater than
+ * the value that @right evaluates to. This is semantically equivalent to
+ * KUNIT_EXPECT_TRUE(@test, (@left) >= (@right)). See KUNIT_EXPECT_TRUE() for
+ * more information.
+ */
+#define KUNIT_EXPECT_GE(test, left, right) \
+ KUNIT_BINARY_GE_ASSERTION(test, KUNIT_EXPECTATION, left, right)
+
+#define KUNIT_EXPECT_GE_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_GE_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_STREQ() - Expects that strings @left and @right are equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a null terminated string.
+ * @right: an arbitrary expression that evaluates to a null terminated string.
+ *
+ * Sets an expectation that the values that @left and @right evaluate to are
+ * equal. This is semantically equivalent to
+ * KUNIT_EXPECT_TRUE(@test, !strcmp((@left), (@right))). See KUNIT_EXPECT_TRUE()
+ * for more information.
+ */
+#define KUNIT_EXPECT_STREQ(test, left, right) \
+ KUNIT_BINARY_STR_EQ_ASSERTION(test, KUNIT_EXPECTATION, left, right)
+
+#define KUNIT_EXPECT_STREQ_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_STRNEQ() - Expects that strings @left and @right are not equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a null terminated string.
+ * @right: an arbitrary expression that evaluates to a null terminated string.
+ *
+ * Sets an expectation that the values that @left and @right evaluate to are
+ * not equal. This is semantically equivalent to
+ * KUNIT_EXPECT_TRUE(@test, strcmp((@left), (@right))). See KUNIT_EXPECT_TRUE()
+ * for more information.
+ */
+#define KUNIT_EXPECT_STRNEQ(test, left, right) \
+ KUNIT_BINARY_STR_NE_ASSERTION(test, KUNIT_EXPECTATION, left, right)
+
+#define KUNIT_EXPECT_STRNEQ_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_EXPECT_NOT_ERR_OR_NULL() - Expects that @ptr is not null and not err.
+ * @test: The test context object.
+ * @ptr: an arbitrary pointer.
+ *
+ * Sets an expectation that the value that @ptr evaluates to is not null and not
+ * an errno stored in a pointer. This is semantically equivalent to
+ * KUNIT_EXPECT_TRUE(@test, !IS_ERR_OR_NULL(@ptr)). See KUNIT_EXPECT_TRUE() for
+ * more information.
+ */
+#define KUNIT_EXPECT_NOT_ERR_OR_NULL(test, ptr) \
+ KUNIT_PTR_NOT_ERR_OR_NULL_ASSERTION(test, KUNIT_EXPECTATION, ptr)
+
+#define KUNIT_EXPECT_NOT_ERR_OR_NULL_MSG(test, ptr, fmt, ...) \
+ KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \
+ KUNIT_EXPECTATION, \
+ ptr, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#define KUNIT_ASSERT_FAILURE(test, fmt, ...) \
+ KUNIT_FAIL_ASSERTION(test, KUNIT_ASSERTION, fmt, ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_TRUE() - Sets an assertion that @condition is true.
+ * @test: The test context object.
+ * @condition: an arbitrary boolean expression. The test fails and aborts when
+ * this does not evaluate to true.
+ *
+ * This and assertions of the form `KUNIT_ASSERT_*` will cause the test case to
+ * fail *and immediately abort* when the specified condition is not met. Unlike
+ * an expectation failure, it will prevent the test case from continuing to run;
+ * this is otherwise known as an *assertion failure*.
+ */
+#define KUNIT_ASSERT_TRUE(test, condition) \
+ KUNIT_TRUE_ASSERTION(test, KUNIT_ASSERTION, condition)
+
+#define KUNIT_ASSERT_TRUE_MSG(test, condition, fmt, ...) \
+ KUNIT_TRUE_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ condition, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_FALSE() - Sets an assertion that @condition is false.
+ * @test: The test context object.
+ * @condition: an arbitrary boolean expression.
+ *
+ * Sets an assertion that the value that @condition evaluates to is false. This
+ * is the same as KUNIT_EXPECT_FALSE(), except it causes an assertion failure
+ * (see KUNIT_ASSERT_TRUE()) when the assertion is not met.
+ */
+#define KUNIT_ASSERT_FALSE(test, condition) \
+ KUNIT_FALSE_ASSERTION(test, KUNIT_ASSERTION, condition)
+
+#define KUNIT_ASSERT_FALSE_MSG(test, condition, fmt, ...) \
+ KUNIT_FALSE_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ condition, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_EQ() - Sets an assertion that @left and @right are equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an assertion that the values that @left and @right evaluate to are
+ * equal. This is the same as KUNIT_EXPECT_EQ(), except it causes an assertion
+ * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met.
+ */
+#define KUNIT_ASSERT_EQ(test, left, right) \
+ KUNIT_BINARY_EQ_ASSERTION(test, KUNIT_ASSERTION, left, right)
+
+#define KUNIT_ASSERT_EQ_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_EQ_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_PTR_EQ() - Asserts that pointers @left and @right are equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a pointer.
+ * @right: an arbitrary expression that evaluates to a pointer.
+ *
+ * Sets an assertion that the values that @left and @right evaluate to are
+ * equal. This is the same as KUNIT_EXPECT_EQ(), except it causes an assertion
+ * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met.
+ */
+#define KUNIT_ASSERT_PTR_EQ(test, left, right) \
+ KUNIT_BINARY_PTR_EQ_ASSERTION(test, KUNIT_ASSERTION, left, right)
+
+#define KUNIT_ASSERT_PTR_EQ_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_PTR_EQ_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_NE() - An assertion that @left and @right are not equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an assertion that the values that @left and @right evaluate to are not
+ * equal. This is the same as KUNIT_EXPECT_NE(), except it causes an assertion
+ * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met.
+ */
+#define KUNIT_ASSERT_NE(test, left, right) \
+ KUNIT_BINARY_NE_ASSERTION(test, KUNIT_ASSERTION, left, right)
+
+#define KUNIT_ASSERT_NE_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_NE_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_PTR_NE() - Asserts that pointers @left and @right are not equal.
+ * KUNIT_ASSERT_PTR_EQ() - Asserts that pointers @left and @right are equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a pointer.
+ * @right: an arbitrary expression that evaluates to a pointer.
+ *
+ * Sets an assertion that the values that @left and @right evaluate to are not
+ * equal. This is the same as KUNIT_EXPECT_NE(), except it causes an assertion
+ * failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met.
+ */
+#define KUNIT_ASSERT_PTR_NE(test, left, right) \
+ KUNIT_BINARY_PTR_NE_ASSERTION(test, KUNIT_ASSERTION, left, right)
+
+#define KUNIT_ASSERT_PTR_NE_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_PTR_NE_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+/**
+ * KUNIT_ASSERT_LT() - An assertion that @left is less than @right.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an assertion that the value that @left evaluates to is less than the
+ * value that @right evaluates to. This is the same as KUNIT_EXPECT_LT(), except
+ * it causes an assertion failure (see KUNIT_ASSERT_TRUE()) when the assertion
+ * is not met.
+ */
+#define KUNIT_ASSERT_LT(test, left, right) \
+ KUNIT_BINARY_LT_ASSERTION(test, KUNIT_ASSERTION, left, right)
+
+#define KUNIT_ASSERT_LT_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_LT_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+/**
+ * KUNIT_ASSERT_LE() - An assertion that @left is less than or equal to @right.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an assertion that the value that @left evaluates to is less than or
+ * equal to the value that @right evaluates to. This is the same as
+ * KUNIT_EXPECT_LE(), except it causes an assertion failure (see
+ * KUNIT_ASSERT_TRUE()) when the assertion is not met.
+ */
+#define KUNIT_ASSERT_LE(test, left, right) \
+ KUNIT_BINARY_LE_ASSERTION(test, KUNIT_ASSERTION, left, right)
+
+#define KUNIT_ASSERT_LE_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_LE_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_GT() - An assertion that @left is greater than @right.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an assertion that the value that @left evaluates to is greater than the
+ * value that @right evaluates to. This is the same as KUNIT_EXPECT_GT(), except
+ * it causes an assertion failure (see KUNIT_ASSERT_TRUE()) when the assertion
+ * is not met.
+ */
+#define KUNIT_ASSERT_GT(test, left, right) \
+ KUNIT_BINARY_GT_ASSERTION(test, KUNIT_ASSERTION, left, right)
+
+#define KUNIT_ASSERT_GT_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_GT_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_GE() - Assertion that @left is greater than or equal to @right.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a primitive C type.
+ * @right: an arbitrary expression that evaluates to a primitive C type.
+ *
+ * Sets an assertion that the value that @left evaluates to is greater than the
+ * value that @right evaluates to. This is the same as KUNIT_EXPECT_GE(), except
+ * it causes an assertion failure (see KUNIT_ASSERT_TRUE()) when the assertion
+ * is not met.
+ */
+#define KUNIT_ASSERT_GE(test, left, right) \
+ KUNIT_BINARY_GE_ASSERTION(test, KUNIT_ASSERTION, left, right)
+
+#define KUNIT_ASSERT_GE_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_GE_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_STREQ() - An assertion that strings @left and @right are equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a null terminated string.
+ * @right: an arbitrary expression that evaluates to a null terminated string.
+ *
+ * Sets an assertion that the values that @left and @right evaluate to are
+ * equal. This is the same as KUNIT_EXPECT_STREQ(), except it causes an
+ * assertion failure (see KUNIT_ASSERT_TRUE()) when the assertion is not met.
+ */
+#define KUNIT_ASSERT_STREQ(test, left, right) \
+ KUNIT_BINARY_STR_EQ_ASSERTION(test, KUNIT_ASSERTION, left, right)
+
+#define KUNIT_ASSERT_STREQ_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_STR_EQ_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_STRNEQ() - Expects that strings @left and @right are not equal.
+ * @test: The test context object.
+ * @left: an arbitrary expression that evaluates to a null terminated string.
+ * @right: an arbitrary expression that evaluates to a null terminated string.
+ *
+ * Sets an expectation that the values that @left and @right evaluate to are
+ * not equal. This is semantically equivalent to
+ * KUNIT_ASSERT_TRUE(@test, strcmp((@left), (@right))). See KUNIT_ASSERT_TRUE()
+ * for more information.
+ */
+#define KUNIT_ASSERT_STRNEQ(test, left, right) \
+ KUNIT_BINARY_STR_NE_ASSERTION(test, KUNIT_ASSERTION, left, right)
+
+#define KUNIT_ASSERT_STRNEQ_MSG(test, left, right, fmt, ...) \
+ KUNIT_BINARY_STR_NE_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ left, \
+ right, \
+ fmt, \
+ ##__VA_ARGS__)
+
+/**
+ * KUNIT_ASSERT_NOT_ERR_OR_NULL() - Assertion that @ptr is not null and not err.
+ * @test: The test context object.
+ * @ptr: an arbitrary pointer.
+ *
+ * Sets an assertion that the value that @ptr evaluates to is not null and not
+ * an errno stored in a pointer. This is the same as
+ * KUNIT_EXPECT_NOT_ERR_OR_NULL(), except it causes an assertion failure (see
+ * KUNIT_ASSERT_TRUE()) when the assertion is not met.
+ */
+#define KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr) \
+ KUNIT_PTR_NOT_ERR_OR_NULL_ASSERTION(test, KUNIT_ASSERTION, ptr)
+
+#define KUNIT_ASSERT_NOT_ERR_OR_NULL_MSG(test, ptr, fmt, ...) \
+ KUNIT_PTR_NOT_ERR_OR_NULL_MSG_ASSERTION(test, \
+ KUNIT_ASSERTION, \
+ ptr, \
+ fmt, \
+ ##__VA_ARGS__)
+
+#endif /* _KUNIT_TEST_H */
diff --git a/include/kunit/try-catch.h b/include/kunit/try-catch.h
new file mode 100644
index 000000000000..404f336cbdc8
--- /dev/null
+++ b/include/kunit/try-catch.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * An API to allow a function, that may fail, to be executed, and recover in a
+ * controlled manner.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+
+#ifndef _KUNIT_TRY_CATCH_H
+#define _KUNIT_TRY_CATCH_H
+
+#include <linux/types.h>
+
+typedef void (*kunit_try_catch_func_t)(void *);
+
+struct completion;
+struct kunit;
+
+/**
+ * struct kunit_try_catch - provides a generic way to run code which might fail.
+ * @test: The test case that is currently being executed.
+ * @try_completion: Completion that the control thread waits on while test runs.
+ * @try_result: Contains any errno obtained while running test case.
+ * @try: The function, the test case, to attempt to run.
+ * @catch: The function called if @try bails out.
+ * @context: used to pass user data to the try and catch functions.
+ *
+ * kunit_try_catch provides a generic, architecture independent way to execute
+ * an arbitrary function of type kunit_try_catch_func_t which may bail out by
+ * calling kunit_try_catch_throw(). If kunit_try_catch_throw() is called, @try
+ * is stopped at the site of invocation and @catch is called.
+ *
+ * struct kunit_try_catch provides a generic interface for the functionality
+ * needed to implement kunit->abort() which in turn is needed for implementing
+ * assertions. Assertions allow stating a precondition for a test simplifying
+ * how test cases are written and presented.
+ *
+ * Assertions are like expectations, except they abort (call
+ * kunit_try_catch_throw()) when the specified condition is not met. This is
+ * useful when you look at a test case as a logical statement about some piece
+ * of code, where assertions are the premises for the test case, and the
+ * conclusion is a set of predicates, rather expectations, that must all be
+ * true. If your premises are violated, it does not makes sense to continue.
+ */
+struct kunit_try_catch {
+ /* private: internal use only. */
+ struct kunit *test;
+ struct completion *try_completion;
+ int try_result;
+ kunit_try_catch_func_t try;
+ kunit_try_catch_func_t catch;
+ void *context;
+};
+
+void kunit_try_catch_init(struct kunit_try_catch *try_catch,
+ struct kunit *test,
+ kunit_try_catch_func_t try,
+ kunit_try_catch_func_t catch);
+
+void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context);
+
+void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch);
+
+static inline int kunit_try_catch_get_result(struct kunit_try_catch *try_catch)
+{
+ return try_catch->try_result;
+}
+
+/*
+ * Exposed for testing only.
+ */
+void kunit_generic_try_catch_init(struct kunit_try_catch *try_catch);
+
+#endif /* _KUNIT_TRY_CATCH_H */
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 080012a6f025..df01a8579034 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -80,6 +80,22 @@
#include <linux/linkage.h>
#include <linux/types.h>
+
+enum arm_smccc_conduit {
+ SMCCC_CONDUIT_NONE,
+ SMCCC_CONDUIT_SMC,
+ SMCCC_CONDUIT_HVC,
+};
+
+/**
+ * arm_smccc_1_1_get_conduit()
+ *
+ * Returns the conduit to be used for SMCCCv1.1 or later.
+ *
+ * When SMCCCv1.1 is not present, returns SMCCC_CONDUIT_NONE.
+ */
+enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void);
+
/**
* struct arm_smccc_res - Result from SMC/HVC call
* @a0-a3 result values from registers 0 to 3
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 3305ea7f9dc7..0a241c5c911d 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -5,12 +5,6 @@
#include <uapi/linux/arm_sdei.h>
-enum sdei_conduit_types {
- CONDUIT_INVALID = 0,
- CONDUIT_SMC,
- CONDUIT_HVC,
-};
-
#include <acpi/ghes.h>
#ifdef CONFIG_ARM_SDE_INTERFACE
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 90528f12bdfa..29fc933df3bf 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -326,10 +326,11 @@ static inline int bitmap_equal(const unsigned long *src1,
}
/**
- * bitmap_or_equal - Check whether the or of two bitnaps is equal to a third
+ * bitmap_or_equal - Check whether the or of two bitmaps is equal to a third
* @src1: Pointer to bitmap 1
* @src2: Pointer to bitmap 2 will be or'ed with bitmap 1
* @src3: Pointer to bitmap 3. Compare to the result of *@src1 | *@src2
+ * @nbits: number of bits in each of these bitmaps
*
* Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
*/
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index bed9e43f9426..19394c77ed99 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -15,7 +15,9 @@
*/
#include <linux/cgroup.h>
+#include <linux/percpu.h>
#include <linux/percpu_counter.h>
+#include <linux/u64_stats_sync.h>
#include <linux/seq_file.h>
#include <linux/radix-tree.h>
#include <linux/blkdev.h>
@@ -31,15 +33,12 @@
#ifdef CONFIG_BLK_CGROUP
-enum blkg_rwstat_type {
- BLKG_RWSTAT_READ,
- BLKG_RWSTAT_WRITE,
- BLKG_RWSTAT_SYNC,
- BLKG_RWSTAT_ASYNC,
- BLKG_RWSTAT_DISCARD,
+enum blkg_iostat_type {
+ BLKG_IOSTAT_READ,
+ BLKG_IOSTAT_WRITE,
+ BLKG_IOSTAT_DISCARD,
- BLKG_RWSTAT_NR,
- BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
+ BLKG_IOSTAT_NR,
};
struct blkcg_gq;
@@ -61,17 +60,15 @@ struct blkcg {
#endif
};
-/*
- * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
- * recursive. Used to carry stats of dead children.
- */
-struct blkg_rwstat {
- struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
- atomic64_t aux_cnt[BLKG_RWSTAT_NR];
+struct blkg_iostat {
+ u64 bytes[BLKG_IOSTAT_NR];
+ u64 ios[BLKG_IOSTAT_NR];
};
-struct blkg_rwstat_sample {
- u64 cnt[BLKG_RWSTAT_NR];
+struct blkg_iostat_set {
+ struct u64_stats_sync sync;
+ struct blkg_iostat cur;
+ struct blkg_iostat last;
};
/*
@@ -127,8 +124,8 @@ struct blkcg_gq {
/* is this blkg online? protected by both blkcg and q locks */
bool online;
- struct blkg_rwstat stat_bytes;
- struct blkg_rwstat stat_ios;
+ struct blkg_iostat_set __percpu *iostat_cpu;
+ struct blkg_iostat_set iostat;
struct blkg_policy_data *pd[BLKCG_MAX_POLS];
@@ -202,13 +199,6 @@ int blkcg_activate_policy(struct request_queue *q,
void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol);
-static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat,
- unsigned int idx)
-{
- return atomic64_read(&rwstat->aux_cnt[idx]) +
- percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]);
-}
-
const char *blkg_dev_name(struct blkcg_gq *blkg);
void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
u64 (*prfill)(struct seq_file *,
@@ -216,17 +206,6 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
const struct blkcg_policy *pol, int data,
bool show_total);
u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
-u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
- const struct blkg_rwstat_sample *rwstat);
-u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
- int off);
-int blkg_print_stat_bytes(struct seq_file *sf, void *v);
-int blkg_print_stat_ios(struct seq_file *sf, void *v);
-int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
-int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
-
-void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
- int off, struct blkg_rwstat_sample *sum);
struct blkg_conf_ctx {
struct gendisk *disk;
@@ -578,128 +557,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q, false)))
-static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
-{
- int i, ret;
-
- for (i = 0; i < BLKG_RWSTAT_NR; i++) {
- ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
- if (ret) {
- while (--i >= 0)
- percpu_counter_destroy(&rwstat->cpu_cnt[i]);
- return ret;
- }
- atomic64_set(&rwstat->aux_cnt[i], 0);
- }
- return 0;
-}
-
-static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
-{
- int i;
-
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
- percpu_counter_destroy(&rwstat->cpu_cnt[i]);
-}
-
-/**
- * blkg_rwstat_add - add a value to a blkg_rwstat
- * @rwstat: target blkg_rwstat
- * @op: REQ_OP and flags
- * @val: value to add
- *
- * Add @val to @rwstat. The counters are chosen according to @rw. The
- * caller is responsible for synchronizing calls to this function.
- */
-static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
- unsigned int op, uint64_t val)
-{
- struct percpu_counter *cnt;
-
- if (op_is_discard(op))
- cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD];
- else if (op_is_write(op))
- cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
- else
- cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
-
- percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
-
- if (op_is_sync(op))
- cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
- else
- cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
-
- percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH);
-}
-
-/**
- * blkg_rwstat_read - read the current values of a blkg_rwstat
- * @rwstat: blkg_rwstat to read
- *
- * Read the current snapshot of @rwstat and return it in the aux counts.
- */
-static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat,
- struct blkg_rwstat_sample *result)
-{
- int i;
-
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
- result->cnt[i] =
- percpu_counter_sum_positive(&rwstat->cpu_cnt[i]);
-}
-
-/**
- * blkg_rwstat_total - read the total count of a blkg_rwstat
- * @rwstat: blkg_rwstat to read
- *
- * Return the total count of @rwstat regardless of the IO direction. This
- * function can be called without synchronization and takes care of u64
- * atomicity.
- */
-static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
-{
- struct blkg_rwstat_sample tmp = { };
-
- blkg_rwstat_read(rwstat, &tmp);
- return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
-}
-
-/**
- * blkg_rwstat_reset - reset a blkg_rwstat
- * @rwstat: blkg_rwstat to reset
- */
-static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
-{
- int i;
-
- for (i = 0; i < BLKG_RWSTAT_NR; i++) {
- percpu_counter_set(&rwstat->cpu_cnt[i], 0);
- atomic64_set(&rwstat->aux_cnt[i], 0);
- }
-}
-
-/**
- * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
- * @to: the destination blkg_rwstat
- * @from: the source
- *
- * Add @from's count including the aux one to @to's aux count.
- */
-static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
- struct blkg_rwstat *from)
-{
- u64 sum[BLKG_RWSTAT_NR];
- int i;
-
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
- sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
-
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
- atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
- &to->aux_cnt[i]);
-}
-
#ifdef CONFIG_BLK_DEV_THROTTLING
extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
struct bio *bio);
@@ -745,15 +602,33 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
throtl = blk_throtl_bio(q, blkg, bio);
if (!throtl) {
+ struct blkg_iostat_set *bis;
+ int rwd, cpu;
+
+ if (op_is_discard(bio->bi_opf))
+ rwd = BLKG_IOSTAT_DISCARD;
+ else if (op_is_write(bio->bi_opf))
+ rwd = BLKG_IOSTAT_WRITE;
+ else
+ rwd = BLKG_IOSTAT_READ;
+
+ cpu = get_cpu();
+ bis = per_cpu_ptr(blkg->iostat_cpu, cpu);
+ u64_stats_update_begin(&bis->sync);
+
/*
* If the bio is flagged with BIO_QUEUE_ENTERED it means this
* is a split bio and we would have already accounted for the
* size of the bio.
*/
if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
- blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
- bio->bi_iter.bi_size);
- blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
+ bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
+ bis->cur.ios[rwd]++;
+
+ u64_stats_update_end(&bis->sync);
+ if (cgroup_subsys_on_dfl(io_cgrp_subsys))
+ cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu);
+ put_cpu();
}
blkcg_bio_issue_init(bio);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0bf056de5cc3..11cfd6470b1a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -10,103 +10,239 @@ struct blk_mq_tags;
struct blk_flush_queue;
/**
- * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device
+ * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
+ * block device
*/
struct blk_mq_hw_ctx {
struct {
+ /** @lock: Protects the dispatch list. */
spinlock_t lock;
+ /**
+ * @dispatch: Used for requests that are ready to be
+ * dispatched to the hardware but for some reason (e.g. lack of
+ * resources) could not be sent to the hardware. As soon as the
+ * driver can send new requests, requests at this list will
+ * be sent first for a fairer dispatch.
+ */
struct list_head dispatch;
- unsigned long state; /* BLK_MQ_S_* flags */
+ /**
+ * @state: BLK_MQ_S_* flags. Defines the state of the hw
+ * queue (active, scheduled to restart, stopped).
+ */
+ unsigned long state;
} ____cacheline_aligned_in_smp;
+ /**
+ * @run_work: Used for scheduling a hardware queue run at a later time.
+ */
struct delayed_work run_work;
+ /** @cpumask: Map of available CPUs where this hctx can run. */
cpumask_var_t cpumask;
+ /**
+ * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU
+ * selection from @cpumask.
+ */
int next_cpu;
+ /**
+ * @next_cpu_batch: Counter of how many works left in the batch before
+ * changing to the next CPU.
+ */
int next_cpu_batch;
- unsigned long flags; /* BLK_MQ_F_* flags */
+ /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */
+ unsigned long flags;
+ /**
+ * @sched_data: Pointer owned by the IO scheduler attached to a request
+ * queue. It's up to the IO scheduler how to use this pointer.
+ */
void *sched_data;
+ /**
+ * @queue: Pointer to the request queue that owns this hardware context.
+ */
struct request_queue *queue;
+ /** @fq: Queue of requests that need to perform a flush operation. */
struct blk_flush_queue *fq;
+ /**
+ * @driver_data: Pointer to data owned by the block driver that created
+ * this hctx
+ */
void *driver_data;
+ /**
+ * @ctx_map: Bitmap for each software queue. If bit is on, there is a
+ * pending request in that software queue.
+ */
struct sbitmap ctx_map;
+ /**
+ * @dispatch_from: Software queue to be used when no scheduler was
+ * selected.
+ */
struct blk_mq_ctx *dispatch_from;
+ /**
+ * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to
+ * decide if the hw_queue is busy using Exponential Weighted Moving
+ * Average algorithm.
+ */
unsigned int dispatch_busy;
+ /** @type: HCTX_TYPE_* flags. Type of hardware queue. */
unsigned short type;
+ /** @nr_ctx: Number of software queues. */
unsigned short nr_ctx;
+ /** @ctxs: Array of software queues. */
struct blk_mq_ctx **ctxs;
+ /** @dispatch_wait_lock: Lock for dispatch_wait queue. */
spinlock_t dispatch_wait_lock;
+ /**
+ * @dispatch_wait: Waitqueue to put requests when there is no tag
+ * available at the moment, to wait for another try in the future.
+ */
wait_queue_entry_t dispatch_wait;
+
+ /**
+ * @wait_index: Index of next available dispatch_wait queue to insert
+ * requests.
+ */
atomic_t wait_index;
+ /**
+ * @tags: Tags owned by the block driver. A tag at this set is only
+ * assigned when a request is dispatched from a hardware queue.
+ */
struct blk_mq_tags *tags;
+ /**
+ * @sched_tags: Tags owned by I/O scheduler. If there is an I/O
+ * scheduler associated with a request queue, a tag is assigned when
+ * that request is allocated. Else, this member is not used.
+ */
struct blk_mq_tags *sched_tags;
+ /** @queued: Number of queued requests. */
unsigned long queued;
+ /** @run: Number of dispatched requests. */
unsigned long run;
#define BLK_MQ_MAX_DISPATCH_ORDER 7
+ /** @dispatched: Number of dispatch requests by queue. */
unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
+ /** @numa_node: NUMA node the storage adapter has been connected to. */
unsigned int numa_node;
+ /** @queue_num: Index of this hardware queue. */
unsigned int queue_num;
+ /**
+ * @nr_active: Number of active requests. Only used when a tag set is
+ * shared across request queues.
+ */
atomic_t nr_active;
+ /** @cpuhp_dead: List to store request if some CPU die. */
struct hlist_node cpuhp_dead;
+ /** @kobj: Kernel object for sysfs. */
struct kobject kobj;
+ /** @poll_considered: Count times blk_poll() was called. */
unsigned long poll_considered;
+ /** @poll_invoked: Count how many requests blk_poll() polled. */
unsigned long poll_invoked;
+ /** @poll_success: Count how many polled requests were completed. */
unsigned long poll_success;
#ifdef CONFIG_BLK_DEBUG_FS
+ /**
+ * @debugfs_dir: debugfs directory for this hardware queue. Named
+ * as cpu<cpu_number>.
+ */
struct dentry *debugfs_dir;
+ /** @sched_debugfs_dir: debugfs directory for the scheduler. */
struct dentry *sched_debugfs_dir;
#endif
+ /** @hctx_list: List of all hardware queues. */
struct list_head hctx_list;
- /* Must be the last member - see also blk_mq_hw_ctx_size(). */
+ /**
+ * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is
+ * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also
+ * blk_mq_hw_ctx_size().
+ */
struct srcu_struct srcu[0];
};
+/**
+ * struct blk_mq_queue_map - Map software queues to hardware queues
+ * @mq_map: CPU ID to hardware queue index map. This is an array
+ * with nr_cpu_ids elements. Each element has a value in the range
+ * [@queue_offset, @queue_offset + @nr_queues).
+ * @nr_queues: Number of hardware queues to map CPU IDs onto.
+ * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe
+ * driver to map each hardware queue type (enum hctx_type) onto a distinct
+ * set of hardware queues.
+ */
struct blk_mq_queue_map {
unsigned int *mq_map;
unsigned int nr_queues;
unsigned int queue_offset;
};
+/**
+ * enum hctx_type - Type of hardware queue
+ * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for.
+ * @HCTX_TYPE_READ: Just for READ I/O.
+ * @HCTX_TYPE_POLL: Polled I/O of any kind.
+ * @HCTX_MAX_TYPES: Number of types of hctx.
+ */
enum hctx_type {
- HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */
- HCTX_TYPE_READ, /* just for READ I/O */
- HCTX_TYPE_POLL, /* polled I/O of any kind */
+ HCTX_TYPE_DEFAULT,
+ HCTX_TYPE_READ,
+ HCTX_TYPE_POLL,
HCTX_MAX_TYPES,
};
+/**
+ * struct blk_mq_tag_set - tag set that can be shared between request queues
+ * @map: One or more ctx -> hctx mappings. One map exists for each
+ * hardware queue type (enum hctx_type) that the driver wishes
+ * to support. There are no restrictions on maps being of the
+ * same size, and it's perfectly legal to share maps between
+ * types.
+ * @nr_maps: Number of elements in the @map array. A number in the range
+ * [1, HCTX_MAX_TYPES].
+ * @ops: Pointers to functions that implement block driver behavior.
+ * @nr_hw_queues: Number of hardware queues supported by the block driver that
+ * owns this data structure.
+ * @queue_depth: Number of tags per hardware queue, reserved tags included.
+ * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag
+ * allocations.
+ * @cmd_size: Number of additional bytes to allocate per request. The block
+ * driver owns these additional bytes.
+ * @numa_node: NUMA node the storage adapter has been connected to.
+ * @timeout: Request processing timeout in jiffies.
+ * @flags: Zero or more BLK_MQ_F_* flags.
+ * @driver_data: Pointer to data owned by the block driver that created this
+ * tag set.
+ * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues
+ * elements.
+ * @tag_list_lock: Serializes tag_list accesses.
+ * @tag_list: List of the request queues that use this tag set. See also
+ * request_queue.tag_set_list.
+ */
struct blk_mq_tag_set {
- /*
- * map[] holds ctx -> hctx mappings, one map exists for each type
- * that the driver wishes to support. There are no restrictions
- * on maps being of the same size, and it's perfectly legal to
- * share maps between types.
- */
struct blk_mq_queue_map map[HCTX_MAX_TYPES];
- unsigned int nr_maps; /* nr entries in map[] */
+ unsigned int nr_maps;
const struct blk_mq_ops *ops;
- unsigned int nr_hw_queues; /* nr hw queues across maps */
- unsigned int queue_depth; /* max hw supported */
+ unsigned int nr_hw_queues;
+ unsigned int queue_depth;
unsigned int reserved_tags;
- unsigned int cmd_size; /* per-request extra data */
+ unsigned int cmd_size;
int numa_node;
unsigned int timeout;
- unsigned int flags; /* BLK_MQ_F_* */
+ unsigned int flags;
void *driver_data;
struct blk_mq_tags **tags;
@@ -115,6 +251,12 @@ struct blk_mq_tag_set {
struct list_head tag_list;
};
+/**
+ * struct blk_mq_queue_data - Data about a request inserted in a queue
+ *
+ * @rq: Request pointer.
+ * @last: If it is the last request in the queue.
+ */
struct blk_mq_queue_data {
struct request *rq;
bool last;
@@ -142,81 +284,101 @@ typedef bool (busy_fn)(struct request_queue *);
typedef void (complete_fn)(struct request *);
typedef void (cleanup_rq_fn)(struct request *);
-
+/**
+ * struct blk_mq_ops - Callback functions that implements block driver
+ * behaviour.
+ */
struct blk_mq_ops {
- /*
- * Queue request
+ /**
+ * @queue_rq: Queue a new request from block IO.
*/
queue_rq_fn *queue_rq;
- /*
- * If a driver uses bd->last to judge when to submit requests to
- * hardware, it must define this function. In case of errors that
- * make us stop issuing further requests, this hook serves the
+ /**
+ * @commit_rqs: If a driver uses bd->last to judge when to submit
+ * requests to hardware, it must define this function. In case of errors
+ * that make us stop issuing further requests, this hook serves the
* purpose of kicking the hardware (which the last request otherwise
* would have done).
*/
commit_rqs_fn *commit_rqs;
- /*
- * Reserve budget before queue request, once .queue_rq is
+ /**
+ * @get_budget: Reserve budget before queue request, once .queue_rq is
* run, it is driver's responsibility to release the
* reserved budget. Also we have to handle failure case
* of .get_budget for avoiding I/O deadlock.
*/
get_budget_fn *get_budget;
+ /**
+ * @put_budget: Release the reserved budget.
+ */
put_budget_fn *put_budget;
- /*
- * Called on request timeout
+ /**
+ * @timeout: Called on request timeout.
*/
timeout_fn *timeout;
- /*
- * Called to poll for completion of a specific tag.
+ /**
+ * @poll: Called to poll for completion of a specific tag.
*/
poll_fn *poll;
+ /**
+ * @complete: Mark the request as complete.
+ */
complete_fn *complete;
- /*
- * Called when the block layer side of a hardware queue has been
- * set up, allowing the driver to allocate/init matching structures.
- * Ditto for exit/teardown.
+ /**
+ * @init_hctx: Called when the block layer side of a hardware queue has
+ * been set up, allowing the driver to allocate/init matching
+ * structures.
*/
init_hctx_fn *init_hctx;
+ /**
+ * @exit_hctx: Ditto for exit/teardown.
+ */
exit_hctx_fn *exit_hctx;
- /*
- * Called for every command allocated by the block layer to allow
- * the driver to set up driver specific data.
+ /**
+ * @init_request: Called for every command allocated by the block layer
+ * to allow the driver to set up driver specific data.
*
* Tag greater than or equal to queue_depth is for setting up
* flush request.
- *
- * Ditto for exit/teardown.
*/
init_request_fn *init_request;
+ /**
+ * @exit_request: Ditto for exit/teardown.
+ */
exit_request_fn *exit_request;
- /* Called from inside blk_get_request() */
+
+ /**
+ * @initialize_rq_fn: Called from inside blk_get_request().
+ */
void (*initialize_rq_fn)(struct request *rq);
- /*
- * Called before freeing one request which isn't completed yet,
- * and usually for freeing the driver private data
+ /**
+ * @cleanup_rq: Called before freeing one request which isn't completed
+ * yet, and usually for freeing the driver private data.
*/
cleanup_rq_fn *cleanup_rq;
- /*
- * If set, returns whether or not this queue currently is busy
+ /**
+ * @busy: If set, returns whether or not this queue currently is busy.
*/
busy_fn *busy;
+ /**
+ * @map_queues: This allows drivers specify their own queue mapping by
+ * overriding the setup-time function that builds the mq_map.
+ */
map_queues_fn *map_queues;
#ifdef CONFIG_BLK_DEBUG_FS
- /*
- * Used by the debugfs implementation to show driver-specific
+ /**
+ * @show_rq: Used by the debugfs implementation to show driver-specific
* information about a request.
*/
void (*show_rq)(struct seq_file *m, struct request *rq);
@@ -262,7 +424,6 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
void blk_mq_free_request(struct request *rq);
-bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
bool blk_mq_queue_inflight(struct request_queue *q);
@@ -301,9 +462,25 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
}
+/**
+ * blk_mq_rq_state() - read the current MQ_RQ_* state of a request
+ * @rq: target request.
+ */
+static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
+{
+ return READ_ONCE(rq->state);
+}
+
+static inline int blk_mq_request_started(struct request *rq)
+{
+ return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
+}
+
+static inline int blk_mq_request_completed(struct request *rq)
+{
+ return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
+}
-int blk_mq_request_started(struct request *rq);
-int blk_mq_request_completed(struct request *rq);
void blk_mq_start_request(struct request *rq);
void blk_mq_end_request(struct request *rq, blk_status_t error);
void __blk_mq_end_request(struct request *rq, blk_status_t error);
@@ -324,7 +501,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_quiesce_queue(struct request_queue *q);
void blk_mq_unquiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
-bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
+void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
busy_tag_iter_fn *fn, void *priv);
@@ -343,14 +520,29 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q);
unsigned int blk_mq_rq_cpu(struct request *rq);
-/*
+/**
+ * blk_mq_rq_from_pdu - cast a PDU to a request
+ * @pdu: the PDU (Protocol Data Unit) to be casted
+ *
+ * Return: request
+ *
* Driver command data is immediately after the request. So subtract request
- * size to get back to the original request, add request size to get the PDU.
+ * size to get back to the original request.
*/
static inline struct request *blk_mq_rq_from_pdu(void *pdu)
{
return pdu - sizeof(struct request);
}
+
+/**
+ * blk_mq_rq_to_pdu - cast a request to a PDU
+ * @rq: the request to be casted
+ *
+ * Return: pointer to the PDU
+ *
+ * Driver command data is immediately after the request. So add request to get
+ * the PDU.
+ */
static inline void *blk_mq_rq_to_pdu(struct request *rq)
{
return rq + 1;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d688b96d1d63..70254ae11769 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -153,10 +153,10 @@ struct bio {
unsigned short bi_write_hint;
blk_status_t bi_status;
u8 bi_partno;
+ atomic_t __bi_remaining;
struct bvec_iter bi_iter;
- atomic_t __bi_remaining;
bio_end_io_t *bi_end_io;
void *bi_private;
@@ -290,6 +290,12 @@ enum req_opf {
REQ_OP_ZONE_RESET_ALL = 8,
/* write the zero filled sector many times */
REQ_OP_WRITE_ZEROES = 9,
+ /* Open a zone */
+ REQ_OP_ZONE_OPEN = 10,
+ /* Close a zone */
+ REQ_OP_ZONE_CLOSE = 11,
+ /* Transition a zone to full */
+ REQ_OP_ZONE_FINISH = 12,
/* SCSI passthrough using struct scsi_request */
REQ_OP_SCSI_IN = 32,
@@ -371,6 +377,7 @@ enum stat_group {
STAT_READ,
STAT_WRITE,
STAT_DISCARD,
+ STAT_FLUSH,
NR_STAT_GROUPS
};
@@ -417,6 +424,25 @@ static inline bool op_is_discard(unsigned int op)
return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
}
+/*
+ * Check if a bio or request operation is a zone management operation, with
+ * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
+ * due to its different handling in the block layer and device response in
+ * case of command failure.
+ */
+static inline bool op_is_zone_mgmt(enum req_opf op)
+{
+ switch (op & REQ_OP_MASK) {
+ case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
+ return true;
+ default:
+ return false;
+ }
+}
+
static inline int op_stat_group(unsigned int op)
{
if (op_is_discard(op))
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f3ea78b0c91c..397bb9bc230b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -349,25 +349,25 @@ struct queue_limits {
enum blk_zoned_model zoned;
};
+typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
+ void *data);
+
#ifdef CONFIG_BLK_DEV_ZONED
-/*
- * Maximum number of zones to report with a single report zones command.
- */
-#define BLK_ZONED_REPORT_MAX_ZONES 8192U
+#define BLK_ALL_ZONES ((unsigned int)-1)
+int blkdev_report_zones(struct block_device *bdev, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data);
extern unsigned int blkdev_nr_zones(struct block_device *bdev);
-extern int blkdev_report_zones(struct block_device *bdev,
- sector_t sector, struct blk_zone *zones,
- unsigned int *nr_zones);
-extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
- sector_t nr_sectors, gfp_t gfp_mask);
+extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
+ sector_t sectors, sector_t nr_sectors,
+ gfp_t gfp_mask);
extern int blk_revalidate_disk_zones(struct gendisk *disk);
extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg);
-extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg);
+extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned int cmd, unsigned long arg);
#else /* CONFIG_BLK_DEV_ZONED */
@@ -388,9 +388,9 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
return -ENOTTY;
}
-static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
- fmode_t mode, unsigned int cmd,
- unsigned long arg)
+static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
+ fmode_t mode, unsigned int cmd,
+ unsigned long arg)
{
return -ENOTTY;
}
@@ -411,7 +411,6 @@ struct request_queue {
/* sw queues */
struct blk_mq_ctx __percpu *queue_ctx;
- unsigned int nr_queues;
unsigned int queue_depth;
@@ -1709,7 +1708,7 @@ struct block_device_operations {
/* this callback is with swap_lock and sometimes page table lock held */
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
- struct blk_zone *zones, unsigned int *nr_zones);
+ unsigned int nr_zones, report_zones_cb cb, void *data);
struct module *owner;
const struct pr_ops *pr_ops;
};
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5b9d22338606..3bf3835d0e86 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -656,11 +656,11 @@ void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
-int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size);
+int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size);
void bpf_map_charge_finish(struct bpf_map_memory *mem);
void bpf_map_charge_move(struct bpf_map_memory *dst,
struct bpf_map_memory *src);
-void *bpf_map_area_alloc(size_t size, int numa_node);
+void *bpf_map_area_alloc(u64 size, int numa_node);
void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 8339071ab08b..e20a0cd09ba5 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -65,5 +65,6 @@ extern void can_rx_unregister(struct net *net, struct net_device *dev,
void *data);
extern int can_send(struct sk_buff *skb, int loop);
+void can_sock_destruct(struct sock *sk);
#endif /* !_CAN_CORE_H */
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 6b318efd8a74..cdf016596659 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -40,6 +40,7 @@
# define __GCC4_has_attribute___noclone__ 1
# define __GCC4_has_attribute___nonstring__ 0
# define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
+# define __GCC4_has_attribute___fallthrough__ 0
#endif
/*
@@ -186,6 +187,22 @@
#endif
/*
+ * Add the pseudo keyword 'fallthrough' so case statement blocks
+ * must end with any of these keywords:
+ * break;
+ * fallthrough;
+ * goto <label>;
+ * return [expression];
+ *
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html#Statement-Attributes
+ */
+#if __has_attribute(__fallthrough__)
+# define fallthrough __attribute__((__fallthrough__))
+#else
+# define fallthrough do {} while (0) /* fallthrough */
+#endif
+
+/*
* Note the missing underscores.
*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noinline-function-attribute
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d0633ebdaa9c..bc6c879bd110 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -59,6 +59,11 @@ extern ssize_t cpu_show_l1tf(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_mds(struct device *dev,
struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_tsx_async_abort(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+extern ssize_t cpu_show_itlb_multihit(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
@@ -213,28 +218,7 @@ static inline int cpuhp_smt_enable(void) { return 0; }
static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
#endif
-/*
- * These are used for a global "mitigations=" cmdline option for toggling
- * optional CPU mitigations.
- */
-enum cpu_mitigations {
- CPU_MITIGATIONS_OFF,
- CPU_MITIGATIONS_AUTO,
- CPU_MITIGATIONS_AUTO_NOSMT,
-};
-
-extern enum cpu_mitigations cpu_mitigations;
-
-/* mitigations=off */
-static inline bool cpu_mitigations_off(void)
-{
- return cpu_mitigations == CPU_MITIGATIONS_OFF;
-}
-
-/* mitigations=auto,nosmt */
-static inline bool cpu_mitigations_auto_nosmt(void)
-{
- return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
-}
+extern bool cpu_mitigations_off(void);
+extern bool cpu_mitigations_auto_nosmt(void);
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index c57e88e85c41..92d5fdc8154e 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -13,6 +13,7 @@
#include <linux/completion.h>
#include <linux/kobject.h>
#include <linux/notifier.h>
+#include <linux/pm_qos.h>
#include <linux/spinlock.h>
#include <linux/sysfs.h>
@@ -76,8 +77,10 @@ struct cpufreq_policy {
struct work_struct update; /* if update_policy() needs to be
* called, but you're in IRQ context */
- struct dev_pm_qos_request *min_freq_req;
- struct dev_pm_qos_request *max_freq_req;
+ struct freq_constraints constraints;
+ struct freq_qos_request *min_freq_req;
+ struct freq_qos_request *max_freq_req;
+
struct cpufreq_frequency_table *freq_table;
enum cpufreq_table_sorting freq_table_sorted;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 399ad8632356..475668c69dbc 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -17,6 +17,7 @@
struct dm_dev;
struct dm_target;
struct dm_table;
+struct dm_report_zones_args;
struct mapped_device;
struct bio_vec;
@@ -93,9 +94,9 @@ typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv,
typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev);
-typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector,
- struct blk_zone *zones,
- unsigned int *nr_zones);
+typedef int (*dm_report_zones_fn) (struct dm_target *ti,
+ struct dm_report_zones_args *args,
+ unsigned int nr_zones);
/*
* These iteration functions are typically used to check (and combine)
@@ -422,10 +423,23 @@ struct gendisk *dm_disk(struct mapped_device *md);
int dm_suspended(struct dm_target *ti);
int dm_noflush_suspending(struct dm_target *ti);
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
-void dm_remap_zone_report(struct dm_target *ti, sector_t start,
- struct blk_zone *zones, unsigned int *nr_zones);
union map_info *dm_get_rq_mapinfo(struct request *rq);
+#ifdef CONFIG_BLK_DEV_ZONED
+struct dm_report_zones_args {
+ struct dm_target *tgt;
+ sector_t next_sector;
+
+ void *orig_data;
+ report_zones_cb orig_cb;
+ unsigned int zone_idx;
+
+ /* must be filled by ->report_zones before calling dm_report_zones_cb */
+ sector_t start;
+};
+int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, void *data);
+#endif /* CONFIG_BLK_DEV_ZONED */
+
/*
* Device mapper functions to parse and create devices specified by the
* parameter "dm-mod.create="
@@ -594,9 +608,6 @@ void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size);
*/
#define dm_round_up(n, sz) (dm_div_up((n), (sz)) * (sz))
-#define dm_array_too_big(fixed, obj, num) \
- ((num) > (UINT_MAX - (fixed)) / (obj))
-
/*
* Sector offset taken relative to the start of the target instead of
* relative to the start of the device.
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index adf993a3bd58..d03af3605460 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,6 +5,8 @@
#include <linux/dma-mapping.h>
#include <linux/mem_encrypt.h>
+extern unsigned int zone_dma_bits;
+
#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
#include <asm/dma-direct.h>
#else
diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 6c809440f319..4cf02ecd67de 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -204,6 +204,12 @@ static inline int ddebug_dyndbg_module_param_cb(char *param, char *val,
do { if (0) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
#define dynamic_dev_dbg(dev, fmt, ...) \
do { if (0) dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); } while (0)
+#define dynamic_hex_dump(prefix_str, prefix_type, rowsize, \
+ groupsize, buf, len, ascii) \
+ do { if (0) \
+ print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, \
+ rowsize, groupsize, buf, len, ascii); \
+ } while (0)
#endif
#endif
diff --git a/include/linux/efi.h b/include/linux/efi.h
index bd3837022307..d87acf62958e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1579,9 +1579,22 @@ char *efi_convert_cmdline(efi_system_table_t *sys_table_arg,
efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
struct efi_boot_memmap *map);
+efi_status_t efi_low_alloc_above(efi_system_table_t *sys_table_arg,
+ unsigned long size, unsigned long align,
+ unsigned long *addr, unsigned long min);
+
+static inline
efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
unsigned long size, unsigned long align,
- unsigned long *addr);
+ unsigned long *addr)
+{
+ /*
+ * Don't allocate at 0x0. It will confuse code that
+ * checks pointers against NULL. Skip the first 8
+ * bytes so we start at a nice even number.
+ */
+ return efi_low_alloc_above(sys_table_arg, size, align, addr, 0x8);
+}
efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg,
unsigned long size, unsigned long align,
@@ -1592,7 +1605,8 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg,
unsigned long image_size,
unsigned long alloc_size,
unsigned long preferred_addr,
- unsigned long alignment);
+ unsigned long alignment,
+ unsigned long min_addr);
efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
efi_loaded_image_t *image,
diff --git a/include/linux/export.h b/include/linux/export.h
index 95f55b7f83a0..941d075f03d6 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -18,8 +18,6 @@ extern struct module __this_module;
#define THIS_MODULE ((struct module *)0)
#endif
-#define NS_SEPARATOR "."
-
#ifdef CONFIG_MODVERSIONS
/* Mark the CRC weak since genksyms apparently decides not to
* generate a checksums for some symbols */
@@ -48,14 +46,14 @@ extern struct module __this_module;
* absolute relocations that require runtime processing on relocatable
* kernels.
*/
-#define __KSYMTAB_ENTRY_NS(sym, sec, ns) \
+#define __KSYMTAB_ENTRY_NS(sym, sec) \
__ADDRESSABLE(sym) \
asm(" .section \"___ksymtab" sec "+" #sym "\", \"a\" \n" \
" .balign 4 \n" \
- "__ksymtab_" #sym NS_SEPARATOR #ns ": \n" \
+ "__ksymtab_" #sym ": \n" \
" .long " #sym "- . \n" \
" .long __kstrtab_" #sym "- . \n" \
- " .long __kstrtab_ns_" #sym "- . \n" \
+ " .long __kstrtabns_" #sym "- . \n" \
" .previous \n")
#define __KSYMTAB_ENTRY(sym, sec) \
@@ -74,16 +72,14 @@ struct kernel_symbol {
int namespace_offset;
};
#else
-#define __KSYMTAB_ENTRY_NS(sym, sec, ns) \
- static const struct kernel_symbol __ksymtab_##sym##__##ns \
- asm("__ksymtab_" #sym NS_SEPARATOR #ns) \
+#define __KSYMTAB_ENTRY_NS(sym, sec) \
+ static const struct kernel_symbol __ksymtab_##sym \
__attribute__((section("___ksymtab" sec "+" #sym), used)) \
__aligned(sizeof(void *)) \
- = { (unsigned long)&sym, __kstrtab_##sym, __kstrtab_ns_##sym }
+ = { (unsigned long)&sym, __kstrtab_##sym, __kstrtabns_##sym }
#define __KSYMTAB_ENTRY(sym, sec) \
static const struct kernel_symbol __ksymtab_##sym \
- asm("__ksymtab_" #sym) \
__attribute__((section("___ksymtab" sec "+" #sym), used)) \
__aligned(sizeof(void *)) \
= { (unsigned long)&sym, __kstrtab_##sym, NULL }
@@ -112,10 +108,10 @@ struct kernel_symbol {
/* For every exported symbol, place a struct in the __ksymtab section */
#define ___EXPORT_SYMBOL_NS(sym, sec, ns) \
___export_symbol_common(sym, sec); \
- static const char __kstrtab_ns_##sym[] \
+ static const char __kstrtabns_##sym[] \
__attribute__((section("__ksymtab_strings"), used, aligned(1))) \
= #ns; \
- __KSYMTAB_ENTRY_NS(sym, sec, ns)
+ __KSYMTAB_ENTRY_NS(sym, sec)
#define ___EXPORT_SYMBOL(sym, sec) \
___export_symbol_common(sym, sec); \
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2ce57645f3cd..0367a75f873b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1099,7 +1099,6 @@ static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
#endif /* CONFIG_BPF_JIT */
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
#define BPF_ANC BIT(15)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e0d909d35763..dde6dc4492a0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -148,8 +148,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH ((__force fmode_t)0x4000)
-/* File needs atomic accesses to f_pos */
-#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000)
/* Write access to underlying fs */
#define FMODE_WRITER ((__force fmode_t)0x10000)
/* Has read method(s) */
@@ -2632,8 +2630,6 @@ extern void bd_finish_claiming(struct block_device *bdev,
extern void bd_abort_claiming(struct block_device *bdev,
struct block_device *whole, void *holder);
extern void blkdev_put(struct block_device *bdev, fmode_t mode);
-extern int __blkdev_reread_part(struct block_device *bdev);
-extern int blkdev_reread_part(struct block_device *bdev);
#ifdef CONFIG_SYSFS
extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
@@ -2703,8 +2699,6 @@ extern void make_bad_inode(struct inode *);
extern bool is_bad_inode(struct inode *);
#ifdef CONFIG_BLOCK
-extern void check_disk_size_change(struct gendisk *disk,
- struct block_device *bdev, bool verbose);
extern int revalidate_disk(struct gendisk *);
extern int check_disk_change(struct block_device *);
extern int __invalidate_device(struct block_device *, bool);
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index f622f7460ed8..1a7bffe78ed5 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -20,7 +20,6 @@
#define FS_CRYPTO_BLOCK_SIZE 16
-struct fscrypt_ctx;
struct fscrypt_info;
struct fscrypt_str {
@@ -62,18 +61,9 @@ struct fscrypt_operations {
bool (*dummy_context)(struct inode *);
bool (*empty_dir)(struct inode *);
unsigned int max_namelen;
-};
-
-/* Decryption work */
-struct fscrypt_ctx {
- union {
- struct {
- struct bio *bio;
- struct work_struct work;
- };
- struct list_head free_list; /* Free list */
- };
- u8 flags; /* Flags */
+ bool (*has_stable_inodes)(struct super_block *sb);
+ void (*get_ino_and_lblk_bits)(struct super_block *sb,
+ int *ino_bits_ret, int *lblk_bits_ret);
};
static inline bool fscrypt_has_encryption_key(const struct inode *inode)
@@ -102,8 +92,6 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry)
/* crypto.c */
extern void fscrypt_enqueue_decrypt_work(struct work_struct *);
-extern struct fscrypt_ctx *fscrypt_get_ctx(gfp_t);
-extern void fscrypt_release_ctx(struct fscrypt_ctx *);
extern struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
unsigned int len,
@@ -244,8 +232,6 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname,
/* bio.c */
extern void fscrypt_decrypt_bio(struct bio *);
-extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx,
- struct bio *bio);
extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t,
unsigned int);
@@ -295,16 +281,6 @@ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work)
{
}
-static inline struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags)
-{
- return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
-{
- return;
-}
-
static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page,
unsigned int len,
unsigned int offs,
@@ -484,11 +460,6 @@ static inline void fscrypt_decrypt_bio(struct bio *bio)
{
}
-static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx,
- struct bio *bio)
-{
-}
-
static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
sector_t pblk, unsigned int len)
{
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8a8cb3c401b2..9141f2263286 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -499,7 +499,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
/**
* ftrace_make_nop - convert code into nop
* @mod: module structure if called by module load initialization
- * @rec: the mcount call site record
+ * @rec: the call site record (e.g. mcount/fentry)
* @addr: the address that the call site should be calling
*
* This is a very sensitive operation and great care needs
@@ -520,9 +520,38 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; }
extern int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr);
+
+/**
+ * ftrace_init_nop - initialize a nop call site
+ * @mod: module structure if called by module load initialization
+ * @rec: the call site record (e.g. mcount/fentry)
+ *
+ * This is a very sensitive operation and great care needs
+ * to be taken by the arch. The operation should carefully
+ * read the location, check to see if what is read is indeed
+ * what we expect it to be, and then on success of the compare,
+ * it should write to the location.
+ *
+ * The code segment at @rec->ip should contain the contents created by
+ * the compiler
+ *
+ * Return must be:
+ * 0 on success
+ * -EFAULT on error reading the location
+ * -EINVAL on a failed compare of the contents
+ * -EPERM on error writing to the location
+ * Any other value will be considered a failure.
+ */
+#ifndef ftrace_init_nop
+static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+ return ftrace_make_nop(mod, rec, MCOUNT_ADDR);
+}
+#endif
+
/**
* ftrace_make_call - convert a nop call site into a call to addr
- * @rec: the mcount call site record
+ * @rec: the call site record (e.g. mcount/fentry)
* @addr: the address that the call site should call
*
* This is a very sensitive operation and great care needs
@@ -545,7 +574,7 @@ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
/**
* ftrace_modify_call - convert from one addr to another (no nop)
- * @rec: the mcount call site record
+ * @rec: the call site record (e.g. mcount/fentry)
* @old_addr: the address expected to be currently called to
* @addr: the address to change to
*
@@ -709,6 +738,11 @@ static inline unsigned long get_lock_parent_ip(void)
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
extern void ftrace_init(void);
+#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY
+#define FTRACE_CALLSITE_SECTION "__patchable_function_entries"
+#else
+#define FTRACE_CALLSITE_SECTION "__mcount_loc"
+#endif
#else
static inline void ftrace_init(void) { }
#endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 8b5330dd5ac0..8bb63027e4d6 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -621,9 +621,10 @@ extern void blk_invalidate_devt(dev_t devt);
extern dev_t blk_lookup_devt(const char *name, int partno);
extern char *disk_name (struct gendisk *hd, int partno, char *buf);
+int bdev_disk_changed(struct block_device *bdev, bool invalidate);
+int blk_add_partitions(struct gendisk *disk, struct block_device *bdev);
+int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev);
extern int disk_expand_part_tbl(struct gendisk *disk, int target);
-extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
-extern int invalidate_partitions(struct gendisk *disk, struct block_device *bdev);
extern struct hd_struct * __must_check add_partition(struct gendisk *disk,
int partno, sector_t start,
sector_t len, int flags,
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index fb07b503dc45..61f2f6ff9467 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -325,6 +325,29 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
}
+/**
+ * gfpflags_normal_context - is gfp_flags a normal sleepable context?
+ * @gfp_flags: gfp_flags to test
+ *
+ * Test whether @gfp_flags indicates that the allocation is from the
+ * %current context and allowed to sleep.
+ *
+ * An allocation being allowed to block doesn't mean it owns the %current
+ * context. When direct reclaim path tries to allocate memory, the
+ * allocation context is nested inside whatever %current was doing at the
+ * time of the original allocation. The nested allocation may be allowed
+ * to block but modifying anything %current owns can corrupt the outer
+ * context's expectations.
+ *
+ * %true result from this function indicates that the allocation context
+ * can sleep and use anything that's associated with %current.
+ */
+static inline bool gfpflags_normal_context(const gfp_t gfp_flags)
+{
+ return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) ==
+ __GFP_DIRECT_RECLAIM;
+}
+
#ifdef CONFIG_HIGHMEM
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
#else
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index f8245d67f070..5dd9c982e2cb 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -202,6 +202,14 @@ struct gpio_irq_chip {
bool threaded;
/**
+ * @init_hw: optional routine to initialize hardware before
+ * an IRQ chip will be added. This is quite useful when
+ * a particular driver wants to clear IRQ related registers
+ * in order to avoid undesired events.
+ */
+ int (*init_hw)(struct gpio_chip *chip);
+
+ /**
* @init_valid_mask: optional routine to initialize @valid_mask, to be
* used if not all GPIO lines are valid interrupts. Sometimes some
* lines just cannot fire interrupts, and this routine, when defined,
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 04c36b7a61dd..72579168189d 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -235,7 +235,7 @@ enum hwmon_power_attributes {
#define HWMON_P_LABEL BIT(hwmon_power_label)
#define HWMON_P_ALARM BIT(hwmon_power_alarm)
#define HWMON_P_CAP_ALARM BIT(hwmon_power_cap_alarm)
-#define HWMON_P_MIN_ALARM BIT(hwmon_power_max_alarm)
+#define HWMON_P_MIN_ALARM BIT(hwmon_power_min_alarm)
#define HWMON_P_MAX_ALARM BIT(hwmon_power_max_alarm)
#define HWMON_P_LCRIT_ALARM BIT(hwmon_power_lcrit_alarm)
#define HWMON_P_CRIT_ALARM BIT(hwmon_power_crit_alarm)
diff --git a/include/linux/idr.h b/include/linux/idr.h
index 4ec8986e5dfb..ac6e946b6767 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -185,7 +185,7 @@ static inline void idr_preload_end(void)
* is convenient for a "not found" value.
*/
#define idr_for_each_entry(idr, entry, id) \
- for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id)
+ for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; id += 1U)
/**
* idr_for_each_entry_ul() - Iterate over an IDR's elements of a given type.
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 2e55e4cdbd8a..a367ead4bf4b 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -29,7 +29,6 @@ struct macvlan_dev {
netdev_features_t set_features;
enum macvlan_mode mode;
u16 flags;
- int nest_level;
unsigned int macaddr_count;
#ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll *netpoll;
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 06faa066496f..ec7e4bd07f82 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -223,6 +223,7 @@ struct team {
atomic_t count_pending;
struct delayed_work dw;
} mcast_rejoin;
+ struct lock_class_key team_lock_key;
long mode_priv[TEAM_MODE_PRIV_LONGS];
};
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 244278d5c222..b05e855f1ddd 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -182,7 +182,6 @@ struct vlan_dev_priv {
#ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll *netpoll;
#endif
- unsigned int nest_level;
};
static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
@@ -221,11 +220,6 @@ extern void vlan_vids_del_by_dev(struct net_device *dev,
extern bool vlan_uses_dev(const struct net_device *dev);
-static inline int vlan_get_encap_level(struct net_device *dev)
-{
- BUG_ON(!is_vlan_dev(dev));
- return vlan_dev_priv(dev)->nest_level;
-}
#else
static inline struct net_device *
__vlan_find_dev_deep_rcu(struct net_device *real_dev,
@@ -295,11 +289,6 @@ static inline bool vlan_uses_dev(const struct net_device *dev)
{
return false;
}
-static inline int vlan_get_encap_level(struct net_device *dev)
-{
- BUG();
- return 0;
-}
#endif
/**
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index ed11ef594378..6d8bf4bdf240 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -336,7 +336,8 @@ enum {
#define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32)
#define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16)
#define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
-#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
+#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \
+ ((u64)((pfsid >> 4) & 0xfff) << 52))
#define QI_DEV_IOTLB_SIZE 1
#define QI_DEV_IOTLB_MAX_INVS 32
@@ -360,7 +361,8 @@ enum {
#define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32)
#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16)
#define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4)
-#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52))
+#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \
+ ((u64)((pfsid >> 4) & 0xfff) << 52))
#define QI_DEV_EIOTLB_MAX_INVS 32
/* Page group response descriptor QW0 */
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 5cc10cf7cb3e..a0bde9e12efa 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -487,6 +487,8 @@
#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT)
#define ICC_CTLR_EL1_CBPR_SHIFT 0
#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT)
+#define ICC_CTLR_EL1_PMHE_SHIFT 6
+#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT)
#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8
#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
#define ICC_CTLR_EL1_ID_BITS_SHIFT 11
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 719fc3e15ea4..d41c521a39da 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -966,6 +966,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
bool kvm_is_reserved_pfn(kvm_pfn_t pfn);
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn);
struct kvm_irq_ack_notifier {
struct hlist_node link;
@@ -1382,4 +1383,10 @@ static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
}
#endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */
+typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data);
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+ uintptr_t data, const char *name,
+ struct task_struct **thread_ptr);
+
#endif
diff --git a/include/linux/leds.h b/include/linux/leds.h
index b8df71193329..efb309dba914 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -247,7 +247,7 @@ extern void led_set_brightness(struct led_classdev *led_cdev,
/**
* led_set_brightness_sync - set LED brightness synchronously
* @led_cdev: the LED to set
- * @brightness: the brightness to set it to
+ * @value: the brightness to set it to
*
* Set an LED's brightness immediately. This function will block
* the caller for the time required for accessing device registers,
@@ -301,8 +301,7 @@ extern void led_sysfs_enable(struct led_classdev *led_cdev);
/**
* led_compose_name - compose LED class device name
* @dev: LED controller device object
- * @child: child fwnode_handle describing a LED or a group of synchronized LEDs;
- * it must be provided only for fwnode based LEDs
+ * @init_data: the LED class device initialization data
* @led_classdev_name: composed LED class device name
*
* Create LED class device name basing on the provided init_data argument.
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 207e7ee764ce..d3bbfddf616a 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -484,6 +484,7 @@ enum hsm_task_states {
};
enum ata_completion_errors {
+ AC_ERR_OK = 0, /* no error */
AC_ERR_DEV = (1 << 0), /* device reported error */
AC_ERR_HSM = (1 << 1), /* host state machine violation */
AC_ERR_TIMEOUT = (1 << 2), /* timeout */
@@ -891,9 +892,9 @@ struct ata_port_operations {
/*
* Command execution
*/
- int (*qc_defer)(struct ata_queued_cmd *qc);
- int (*check_atapi_dma)(struct ata_queued_cmd *qc);
- void (*qc_prep)(struct ata_queued_cmd *qc);
+ int (*qc_defer)(struct ata_queued_cmd *qc);
+ int (*check_atapi_dma)(struct ata_queued_cmd *qc);
+ enum ata_completion_errors (*qc_prep)(struct ata_queued_cmd *qc);
unsigned int (*qc_issue)(struct ata_queued_cmd *qc);
bool (*qc_fill_rtf)(struct ata_queued_cmd *qc);
@@ -1161,7 +1162,7 @@ extern int ata_xfer_mode2shift(unsigned long xfer_mode);
extern const char *ata_mode_string(unsigned long xfer_mask);
extern unsigned long ata_id_xfermask(const u16 *id);
extern int ata_std_qc_defer(struct ata_queued_cmd *qc);
-extern void ata_noop_qc_prep(struct ata_queued_cmd *qc);
+extern enum ata_completion_errors ata_noop_qc_prep(struct ata_queued_cmd *qc);
extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
unsigned int n_elem);
extern unsigned int ata_dev_classify(const struct ata_taskfile *tf);
@@ -1893,9 +1894,9 @@ extern const struct ata_port_operations ata_bmdma_port_ops;
.sg_tablesize = LIBATA_MAX_PRD, \
.dma_boundary = ATA_DMA_BOUNDARY
-extern void ata_bmdma_qc_prep(struct ata_queued_cmd *qc);
+extern enum ata_completion_errors ata_bmdma_qc_prep(struct ata_queued_cmd *qc);
extern unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc);
-extern void ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc);
+extern enum ata_completion_errors ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc);
extern unsigned int ata_bmdma_port_intr(struct ata_port *ap,
struct ata_queued_cmd *qc);
extern irqreturn_t ata_bmdma_interrupt(int irq, void *dev_instance);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9b60863429cc..ae703ea3ef48 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -356,6 +356,19 @@ static inline bool mem_cgroup_disabled(void)
return !cgroup_subsys_enabled(memory_cgrp_subsys);
}
+static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
+ bool in_low_reclaim)
+{
+ if (mem_cgroup_disabled())
+ return 0;
+
+ if (in_low_reclaim)
+ return READ_ONCE(memcg->memory.emin);
+
+ return max(READ_ONCE(memcg->memory.emin),
+ READ_ONCE(memcg->memory.elow));
+}
+
enum mem_cgroup_protection mem_cgroup_protected(struct mem_cgroup *root,
struct mem_cgroup *memcg);
@@ -537,6 +550,8 @@ void mem_cgroup_handle_over_high(void);
unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg);
+unsigned long mem_cgroup_size(struct mem_cgroup *memcg);
+
void mem_cgroup_print_oom_context(struct mem_cgroup *memcg,
struct task_struct *p);
@@ -829,6 +844,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
{
}
+static inline unsigned long mem_cgroup_protection(struct mem_cgroup *memcg,
+ bool in_low_reclaim)
+{
+ return 0;
+}
+
static inline enum mem_cgroup_protection mem_cgroup_protected(
struct mem_cgroup *root, struct mem_cgroup *memcg)
{
@@ -968,6 +989,11 @@ static inline unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
return 0;
}
+static inline unsigned long mem_cgroup_size(struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
static inline void
mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct *p)
{
@@ -1264,6 +1290,9 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
static inline void mem_cgroup_track_foreign_dirty(struct page *page,
struct bdi_writeback *wb)
{
+ if (mem_cgroup_disabled())
+ return;
+
if (unlikely(&page->mem_cgroup->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(page, wb);
}
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 0ebb105eb261..4c75dae8dd29 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -119,6 +119,7 @@ extern struct memory_block *find_memory_block(struct mem_section *);
typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
extern int walk_memory_blocks(unsigned long start, unsigned long size,
void *arg, walk_memory_blocks_func_t func);
+extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index ad24554f11f9..75f880c25bb8 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -31,7 +31,7 @@
#define PHY_ID_KSZ886X 0x00221430
#define PHY_ID_KSZ8863 0x00221435
-#define PHY_ID_KSZ8795 0x00221550
+#define PHY_ID_KSZ87XX 0x00221550
#define PHY_ID_KSZ9477 0x00221631
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 138c50d5a353..0836fe232f97 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1545,9 +1545,8 @@ struct mlx5_ifc_extended_dest_format_bits {
};
union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits {
- struct mlx5_ifc_dest_format_struct_bits dest_format_struct;
+ struct mlx5_ifc_extended_dest_format_bits extended_dest_format;
struct mlx5_ifc_flow_counter_list_bits flow_counter_list;
- u8 reserved_at_0[0x40];
};
struct mlx5_ifc_fte_match_param_bits {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cc292273e6ba..a2adf95b3f9c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -695,11 +695,6 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
extern void kvfree(const void *addr);
-static inline atomic_t *compound_mapcount_ptr(struct page *page)
-{
- return &page[1].compound_mapcount;
-}
-
static inline int compound_mapcount(struct page *page)
{
VM_BUG_ON_PAGE(!PageCompound(page), page);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2222fa795284..270aa8fd2800 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -221,6 +221,11 @@ struct page {
#endif
} _struct_page_alignment;
+static inline atomic_t *compound_mapcount_ptr(struct page *page)
+{
+ return &page[1].compound_mapcount;
+}
+
/*
* Used for sizing the vmemmap region on some architectures
*/
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bda20282746b..b0a36d1580b6 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -359,33 +359,40 @@ struct per_cpu_nodestat {
#endif /* !__GENERATING_BOUNDS.H */
enum zone_type {
-#ifdef CONFIG_ZONE_DMA
/*
- * ZONE_DMA is used when there are devices that are not able
- * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
- * carve out the portion of memory that is needed for these devices.
- * The range is arch specific.
+ * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able
+ * to DMA to all of the addressable memory (ZONE_NORMAL).
+ * On architectures where this area covers the whole 32 bit address
+ * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller
+ * DMA addressing constraints. This distinction is important as a 32bit
+ * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
+ * platforms may need both zones as they support peripherals with
+ * different DMA addressing limitations.
+ *
+ * Some examples:
+ *
+ * - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the
+ * rest of the lower 4G.
+ *
+ * - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on
+ * the specific device.
+ *
+ * - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the
+ * lower 4G.
*
- * Some examples
+ * - powerpc only uses ZONE_DMA, the size, up to 2G, may vary
+ * depending on the specific device.
*
- * Architecture Limit
- * ---------------------------
- * parisc, ia64, sparc <4G
- * s390, powerpc <2G
- * arm Various
- * alpha Unlimited or 0-16MB.
+ * - s390 uses ZONE_DMA fixed to the lower 2G.
*
- * i386, x86_64 and multiple other arches
- * <16M.
+ * - ia64 and riscv only use ZONE_DMA32.
+ *
+ * - parisc uses neither.
*/
+#ifdef CONFIG_ZONE_DMA
ZONE_DMA,
#endif
#ifdef CONFIG_ZONE_DMA32
- /*
- * x86_64 needs two ZONE_DMAs because it supports devices that are
- * only able to do DMA to the lower 16M but also 32 bit devices that
- * can only do DMA areas below 4G.
- */
ZONE_DMA32,
#endif
/*
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index fc0b4b19c900..5a4623fc586b 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -22,6 +22,7 @@
#define SNOR_MFR_INTEL CFI_MFR_INTEL
#define SNOR_MFR_ST CFI_MFR_ST /* ST Micro */
#define SNOR_MFR_MICRON CFI_MFR_MICRON /* Micron */
+#define SNOR_MFR_ISSI CFI_MFR_PMC
#define SNOR_MFR_MACRONIX CFI_MFR_MACRONIX
#define SNOR_MFR_SPANSION CFI_MFR_AMD
#define SNOR_MFR_SST CFI_MFR_SST
@@ -133,7 +134,7 @@
#define SR_E_ERR BIT(5)
#define SR_P_ERR BIT(6)
-#define SR_QUAD_EN_MX BIT(6) /* Macronix Quad I/O */
+#define SR1_QUAD_EN_BIT6 BIT(6)
/* Enhanced Volatile Configuration Register bits */
#define EVCR_QUAD_EN_MICRON BIT(7) /* Micron Quad I/O */
@@ -144,10 +145,8 @@
#define FSR_P_ERR BIT(4) /* Program operation status */
#define FSR_PT_ERR BIT(1) /* Protection error bit */
-/* Configuration Register bits. */
-#define CR_QUAD_EN_SPAN BIT(1) /* Spansion Quad I/O */
-
/* Status Register 2 bits. */
+#define SR2_QUAD_EN_BIT1 BIT(1)
#define SR2_QUAD_EN_BIT7 BIT(7)
/* Supported SPI protocols */
@@ -243,6 +242,9 @@ enum spi_nor_option_flags {
SNOR_F_4B_OPCODES = BIT(6),
SNOR_F_HAS_4BAIT = BIT(7),
SNOR_F_HAS_LOCK = BIT(8),
+ SNOR_F_HAS_16BIT_SR = BIT(9),
+ SNOR_F_NO_READ_CR = BIT(10),
+
};
/**
@@ -466,6 +468,34 @@ enum spi_nor_pp_command_index {
struct spi_nor;
/**
+ * struct spi_nor_controller_ops - SPI NOR controller driver specific
+ * operations.
+ * @prepare: [OPTIONAL] do some preparations for the
+ * read/write/erase/lock/unlock operations.
+ * @unprepare: [OPTIONAL] do some post work after the
+ * read/write/erase/lock/unlock operations.
+ * @read_reg: read out the register.
+ * @write_reg: write data to the register.
+ * @read: read data from the SPI NOR.
+ * @write: write data to the SPI NOR.
+ * @erase: erase a sector of the SPI NOR at the offset @offs; if
+ * not provided by the driver, spi-nor will send the erase
+ * opcode via write_reg().
+ */
+struct spi_nor_controller_ops {
+ int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
+ void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops);
+ int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, size_t len);
+ int (*write_reg)(struct spi_nor *nor, u8 opcode, const u8 *buf,
+ size_t len);
+
+ ssize_t (*read)(struct spi_nor *nor, loff_t from, size_t len, u8 *buf);
+ ssize_t (*write)(struct spi_nor *nor, loff_t to, size_t len,
+ const u8 *buf);
+ int (*erase)(struct spi_nor *nor, loff_t offs);
+};
+
+/**
* struct spi_nor_locking_ops - SPI NOR locking methods
* @lock: lock a region of the SPI NOR.
* @unlock: unlock a region of the SPI NOR.
@@ -549,19 +579,7 @@ struct flash_info;
* @read_proto: the SPI protocol for read operations
* @write_proto: the SPI protocol for write operations
* @reg_proto the SPI protocol for read_reg/write_reg/erase operations
- * @prepare: [OPTIONAL] do some preparations for the
- * read/write/erase/lock/unlock operations
- * @unprepare: [OPTIONAL] do some post work after the
- * read/write/erase/lock/unlock operations
- * @read_reg: [DRIVER-SPECIFIC] read out the register
- * @write_reg: [DRIVER-SPECIFIC] write data to the register
- * @read: [DRIVER-SPECIFIC] read data from the SPI NOR
- * @write: [DRIVER-SPECIFIC] write data to the SPI NOR
- * @erase: [DRIVER-SPECIFIC] erase a sector of the SPI NOR
- * at the offset @offs; if not provided by the driver,
- * spi-nor will send the erase opcode via write_reg()
- * @clear_sr_bp: [FLASH-SPECIFIC] clears the Block Protection Bits from
- * the SPI NOR Status Register.
+ * @controller_ops: SPI NOR controller driver specific operations.
* @params: [FLASH-SPECIFIC] SPI-NOR flash parameters and settings.
* The structure includes legacy flash parameters and
* settings that can be overwritten by the spi_nor_fixups
@@ -588,18 +606,8 @@ struct spi_nor {
bool sst_write_second;
u32 flags;
- int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
- void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops);
- int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
- int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
-
- ssize_t (*read)(struct spi_nor *nor, loff_t from,
- size_t len, u_char *read_buf);
- ssize_t (*write)(struct spi_nor *nor, loff_t to,
- size_t len, const u_char *write_buf);
- int (*erase)(struct spi_nor *nor, loff_t offs);
+ const struct spi_nor_controller_ops *controller_ops;
- int (*clear_sr_bp)(struct spi_nor *nor);
struct spi_nor_flash_parameter params;
void *priv;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9eda1c31d1f7..c20f190b4c18 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -925,6 +925,7 @@ struct dev_ifalias {
struct devlink;
struct tlsdev_ops;
+
/*
* This structure defines the management hooks for network devices.
* The following hooks can be defined; unless noted otherwise, they are
@@ -1421,7 +1422,6 @@ struct net_device_ops {
void (*ndo_dfwd_del_station)(struct net_device *pdev,
void *priv);
- int (*ndo_get_lock_subclass)(struct net_device *dev);
int (*ndo_set_tx_maxrate)(struct net_device *dev,
int queue_index,
u32 maxrate);
@@ -1649,6 +1649,8 @@ enum netdev_priv_flags {
* @perm_addr: Permanent hw address
* @addr_assign_type: Hw address assignment type
* @addr_len: Hardware address length
+ * @upper_level: Maximum depth level of upper devices.
+ * @lower_level: Maximum depth level of lower devices.
* @neigh_priv_len: Used in neigh_alloc()
* @dev_id: Used to differentiate devices that share
* the same link layer address
@@ -1758,9 +1760,13 @@ enum netdev_priv_flags {
* @phydev: Physical device may attach itself
* for hardware timestamping
* @sfp_bus: attached &struct sfp_bus structure.
- *
- * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
- * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
+ * @qdisc_tx_busylock_key: lockdep class annotating Qdisc->busylock
+ spinlock
+ * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
+ * @qdisc_xmit_lock_key: lockdep class annotating
+ * netdev_queue->_xmit_lock spinlock
+ * @addr_list_lock_key: lockdep class annotating
+ * net_device->addr_list_lock spinlock
*
* @proto_down: protocol port state information can be sent to the
* switch driver and used to set the phys state of the
@@ -1875,6 +1881,8 @@ struct net_device {
unsigned char perm_addr[MAX_ADDR_LEN];
unsigned char addr_assign_type;
unsigned char addr_len;
+ unsigned char upper_level;
+ unsigned char lower_level;
unsigned short neigh_priv_len;
unsigned short dev_id;
unsigned short dev_port;
@@ -2045,8 +2053,10 @@ struct net_device {
#endif
struct phy_device *phydev;
struct sfp_bus *sfp_bus;
- struct lock_class_key *qdisc_tx_busylock;
- struct lock_class_key *qdisc_running_key;
+ struct lock_class_key qdisc_tx_busylock_key;
+ struct lock_class_key qdisc_running_key;
+ struct lock_class_key qdisc_xmit_lock_key;
+ struct lock_class_key addr_list_lock_key;
bool proto_down;
unsigned wol_enabled:1;
};
@@ -2124,23 +2134,6 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
f(dev, &dev->_tx[i], arg);
}
-#define netdev_lockdep_set_classes(dev) \
-{ \
- static struct lock_class_key qdisc_tx_busylock_key; \
- static struct lock_class_key qdisc_running_key; \
- static struct lock_class_key qdisc_xmit_lock_key; \
- static struct lock_class_key dev_addr_list_lock_key; \
- unsigned int i; \
- \
- (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
- (dev)->qdisc_running_key = &qdisc_running_key; \
- lockdep_set_class(&(dev)->addr_list_lock, \
- &dev_addr_list_lock_key); \
- for (i = 0; i < (dev)->num_tx_queues; i++) \
- lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \
- &qdisc_xmit_lock_key); \
-}
-
u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
struct net_device *sb_dev);
struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
@@ -3139,6 +3132,7 @@ static inline void netif_stop_queue(struct net_device *dev)
}
void netif_tx_stop_all_queues(struct net_device *dev);
+void netdev_update_lockdep_key(struct net_device *dev);
static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
{
@@ -4056,16 +4050,6 @@ static inline void netif_addr_lock(struct net_device *dev)
spin_lock(&dev->addr_list_lock);
}
-static inline void netif_addr_lock_nested(struct net_device *dev)
-{
- int subclass = SINGLE_DEPTH_NESTING;
-
- if (dev->netdev_ops->ndo_get_lock_subclass)
- subclass = dev->netdev_ops->ndo_get_lock_subclass(dev);
-
- spin_lock_nested(&dev->addr_list_lock, subclass);
-}
-
static inline void netif_addr_lock_bh(struct net_device *dev)
{
spin_lock_bh(&dev->addr_list_lock);
@@ -4329,6 +4313,16 @@ int netdev_master_upper_dev_link(struct net_device *dev,
struct netlink_ext_ack *extack);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev);
+int netdev_adjacent_change_prepare(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack);
+void netdev_adjacent_change_commit(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev);
+void netdev_adjacent_change_abort(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev);
void netdev_adjacent_rename_links(struct net_device *dev, char *oldname);
void *netdev_lower_dev_get_private(struct net_device *dev,
struct net_device *lower_dev);
@@ -4340,7 +4334,6 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
void netdev_rss_key_fill(void *buffer, size_t len);
-int dev_get_nest_level(struct net_device *dev);
int skb_checksum_help(struct sk_buff *skb);
int skb_crc32c_csum_help(struct sk_buff *skb);
int skb_csum_hwoffload_help(struct sk_buff *skb,
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index 067c9fea64fe..e8c30b39bb27 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -4,33 +4,60 @@
*/
/*
- * This file contains definitions relative to FC-NVME r1.14 (16-020vB).
- * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content.
+ * This file contains definitions relative to FC-NVME-2 r1.06
+ * (T11-2019-00210-v001).
*/
#ifndef _NVME_FC_H
#define _NVME_FC_H 1
+#include <uapi/scsi/fc/fc_fs.h>
-#define NVME_CMD_SCSI_ID 0xFD
+#define NVME_CMD_FORMAT_ID 0xFD
#define NVME_CMD_FC_ID FC_TYPE_NVME
/* FC-NVME Cmd IU Flags */
-#define FCNVME_CMD_FLAGS_DIRMASK 0x03
-#define FCNVME_CMD_FLAGS_WRITE 0x01
-#define FCNVME_CMD_FLAGS_READ 0x02
+enum {
+ FCNVME_CMD_FLAGS_DIRMASK = 0x03,
+ FCNVME_CMD_FLAGS_WRITE = (1 << 0),
+ FCNVME_CMD_FLAGS_READ = (1 << 1),
+
+ FCNVME_CMD_FLAGS_PICWP = (1 << 2),
+};
+
+enum {
+ FCNVME_CMD_CAT_MASK = 0x0F,
+ FCNVME_CMD_CAT_ADMINQ = 0x01,
+ FCNVME_CMD_CAT_CSSMASK = 0x07,
+ FCNVME_CMD_CAT_CSSFLAG = 0x08,
+};
+
+static inline __u8 fccmnd_set_cat_admin(__u8 rsv_cat)
+{
+ return (rsv_cat & ~FCNVME_CMD_CAT_MASK) | FCNVME_CMD_CAT_ADMINQ;
+}
+
+static inline __u8 fccmnd_set_cat_css(__u8 rsv_cat, __u8 css)
+{
+ return (rsv_cat & ~FCNVME_CMD_CAT_MASK) | FCNVME_CMD_CAT_CSSFLAG |
+ (css & FCNVME_CMD_CAT_CSSMASK);
+}
struct nvme_fc_cmd_iu {
- __u8 scsi_id;
+ __u8 format_id;
__u8 fc_id;
__be16 iu_len;
- __u8 rsvd4[3];
+ __u8 rsvd4[2];
+ __u8 rsv_cat;
__u8 flags;
__be64 connection_id;
__be32 csn;
__be32 data_len;
struct nvme_command sqe;
- __be32 rsvd88[2];
+ __u8 dps;
+ __u8 lbads;
+ __be16 ms;
+ __be32 rsvd92;
};
#define NVME_FC_SIZEOF_ZEROS_RSP 12
@@ -38,11 +65,12 @@ struct nvme_fc_cmd_iu {
enum {
FCNVME_SC_SUCCESS = 0,
FCNVME_SC_INVALID_FIELD = 1,
- FCNVME_SC_INVALID_CONNID = 2,
+ /* reserved 2 */
+ FCNVME_SC_ILL_CONN_PARAMS = 3,
};
struct nvme_fc_ersp_iu {
- __u8 status_code;
+ __u8 ersp_result;
__u8 rsvd1;
__be16 iu_len;
__be32 rsn;
@@ -53,14 +81,44 @@ struct nvme_fc_ersp_iu {
};
-/* FC-NVME Link Services */
+#define FCNVME_NVME_SR_OPCODE 0x01
+
+struct nvme_fc_nvme_sr_iu {
+ __u8 fc_id;
+ __u8 opcode;
+ __u8 rsvd2;
+ __u8 retry_rctl;
+ __be32 rsvd4;
+};
+
+
+enum {
+ FCNVME_SRSTAT_ACC = 0x0,
+ FCNVME_SRSTAT_INV_FCID = 0x1,
+ /* reserved 0x2 */
+ FCNVME_SRSTAT_LOGICAL_ERR = 0x3,
+ FCNVME_SRSTAT_INV_QUALIF = 0x4,
+ FCNVME_SRSTAT_UNABL2PERFORM = 0x9,
+};
+
+struct nvme_fc_nvme_sr_rsp_iu {
+ __u8 fc_id;
+ __u8 opcode;
+ __u8 rsvd2;
+ __u8 status;
+ __be32 rsvd4;
+};
+
+
+/* FC-NVME Link Services - LS cmd values (w0 bits 31:24) */
enum {
FCNVME_LS_RSVD = 0,
FCNVME_LS_RJT = 1,
FCNVME_LS_ACC = 2,
- FCNVME_LS_CREATE_ASSOCIATION = 3,
- FCNVME_LS_CREATE_CONNECTION = 4,
- FCNVME_LS_DISCONNECT = 5,
+ FCNVME_LS_CREATE_ASSOCIATION = 3, /* Create Association */
+ FCNVME_LS_CREATE_CONNECTION = 4, /* Create I/O Connection */
+ FCNVME_LS_DISCONNECT_ASSOC = 5, /* Disconnect Association */
+ FCNVME_LS_DISCONNECT_CONN = 6, /* Disconnect Connection */
};
/* FC-NVME Link Service Descriptors */
@@ -117,14 +175,17 @@ enum fcnvme_ls_rjt_reason {
FCNVME_RJT_RC_UNSUP = 0x0b,
/* command not supported */
- FCNVME_RJT_RC_INPROG = 0x0e,
- /* command already in progress */
-
FCNVME_RJT_RC_INV_ASSOC = 0x40,
- /* Invalid Association ID*/
+ /* Invalid Association ID */
FCNVME_RJT_RC_INV_CONN = 0x41,
- /* Invalid Connection ID*/
+ /* Invalid Connection ID */
+
+ FCNVME_RJT_RC_INV_PARAM = 0x42,
+ /* Invalid Parameters */
+
+ FCNVME_RJT_RC_INSUF_RES = 0x43,
+ /* Insufficient Resources */
FCNVME_RJT_RC_VENDOR = 0xff,
/* vendor specific error */
@@ -138,14 +199,32 @@ enum fcnvme_ls_rjt_explan {
FCNVME_RJT_EXP_OXID_RXID = 0x17,
/* invalid OX_ID-RX_ID combination */
- FCNVME_RJT_EXP_INSUF_RES = 0x29,
- /* insufficient resources */
-
FCNVME_RJT_EXP_UNAB_DATA = 0x2a,
/* unable to supply requested data */
FCNVME_RJT_EXP_INV_LEN = 0x2d,
/* Invalid payload length */
+
+ FCNVME_RJT_EXP_INV_ERSP_RAT = 0x40,
+ /* Invalid NVMe_ERSP Ratio */
+
+ FCNVME_RJT_EXP_INV_CTLR_ID = 0x41,
+ /* Invalid Controller ID */
+
+ FCNVME_RJT_EXP_INV_QUEUE_ID = 0x42,
+ /* Invalid Queue ID */
+
+ FCNVME_RJT_EXP_INV_SQSIZE = 0x43,
+ /* Invalid Submission Queue Size */
+
+ FCNVME_RJT_EXP_INV_HOSTID = 0x44,
+ /* Invalid HOST ID */
+
+ FCNVME_RJT_EXP_INV_HOSTNQN = 0x45,
+ /* Invalid HOSTNQN */
+
+ FCNVME_RJT_EXP_INV_SUBNQN = 0x46,
+ /* Invalid SUBNQN */
};
/* FCNVME_LSDESC_RJT */
@@ -209,21 +288,11 @@ struct fcnvme_lsdesc_cr_conn_cmd {
__be32 rsvd52;
};
-/* Disconnect Scope Values */
-enum {
- FCNVME_DISCONN_ASSOCIATION = 0,
- FCNVME_DISCONN_CONNECTION = 1,
-};
-
/* FCNVME_LSDESC_DISCONN_CMD */
struct fcnvme_lsdesc_disconn_cmd {
__be32 desc_tag; /* FCNVME_LSDESC_xxx */
__be32 desc_len;
- u8 rsvd8[3];
- /* note: scope is really a 1 bit field */
- u8 scope; /* FCNVME_DISCONN_xxx */
- __be32 rsvd12;
- __be64 id;
+ __be32 rsvd8[4];
};
/* FCNVME_LSDESC_CONN_ID */
@@ -242,9 +311,14 @@ struct fcnvme_lsdesc_assoc_id {
/* r_ctl values */
enum {
- FCNVME_RS_RCTL_DATA = 1,
- FCNVME_RS_RCTL_XFER_RDY = 5,
- FCNVME_RS_RCTL_RSP = 8,
+ FCNVME_RS_RCTL_CMND = 0x6,
+ FCNVME_RS_RCTL_DATA = 0x1,
+ FCNVME_RS_RCTL_CONF = 0x3,
+ FCNVME_RS_RCTL_SR = 0x9,
+ FCNVME_RS_RCTL_XFER_RDY = 0x5,
+ FCNVME_RS_RCTL_RSP = 0x7,
+ FCNVME_RS_RCTL_ERSP = 0x8,
+ FCNVME_RS_RCTL_SR_RSP = 0xA,
};
@@ -264,7 +338,10 @@ struct fcnvme_ls_acc_hdr {
struct fcnvme_ls_rqst_w0 w0;
__be32 desc_list_len;
struct fcnvme_lsdesc_rqst rqst;
- /* Followed by cmd-specific ACC descriptors, see next definitions */
+ /*
+ * Followed by cmd-specific ACCEPT descriptors, see xxx_acc
+ * definitions below
+ */
};
/* FCNVME_LS_CREATE_ASSOCIATION */
@@ -302,25 +379,39 @@ struct fcnvme_ls_cr_conn_acc {
struct fcnvme_lsdesc_conn_id connectid;
};
-/* FCNVME_LS_DISCONNECT */
-struct fcnvme_ls_disconnect_rqst {
+/* FCNVME_LS_DISCONNECT_ASSOC */
+struct fcnvme_ls_disconnect_assoc_rqst {
struct fcnvme_ls_rqst_w0 w0;
__be32 desc_list_len;
struct fcnvme_lsdesc_assoc_id associd;
struct fcnvme_lsdesc_disconn_cmd discon_cmd;
};
-struct fcnvme_ls_disconnect_acc {
+struct fcnvme_ls_disconnect_assoc_acc {
+ struct fcnvme_ls_acc_hdr hdr;
+};
+
+
+/* FCNVME_LS_DISCONNECT_CONN */
+struct fcnvme_ls_disconnect_conn_rqst {
+ struct fcnvme_ls_rqst_w0 w0;
+ __be32 desc_list_len;
+ struct fcnvme_lsdesc_assoc_id associd;
+ struct fcnvme_lsdesc_disconn_cmd connectid;
+};
+
+struct fcnvme_ls_disconnect_conn_acc {
struct fcnvme_ls_acc_hdr hdr;
};
/*
- * Yet to be defined in FC-NVME:
+ * Default R_A_TOV is pulled in from fc_fs.h but needs conversion
+ * from ms to seconds for our use.
*/
-#define NVME_FC_CONNECT_TIMEOUT_SEC 2 /* 2 seconds */
-#define NVME_FC_LS_TIMEOUT_SEC 2 /* 2 seconds */
-#define NVME_FC_TGTOP_TIMEOUT_SEC 2 /* 2 seconds */
+#define FC_TWO_TIMES_R_A_TOV (2 * (FC_DEF_R_A_TOV / 1000))
+#define NVME_FC_LS_TIMEOUT_SEC FC_TWO_TIMES_R_A_TOV
+#define NVME_FC_TGTOP_TIMEOUT_SEC FC_TWO_TIMES_R_A_TOV
/*
* TRADDR string must be of form "nn-<16hexdigits>:pn-<16hexdigits>"
@@ -328,6 +419,7 @@ struct fcnvme_ls_disconnect_acc {
* infront of the <16hexdigits>. Without is considered the "min" string
* and with is considered the "max" string. The hexdigits may be upper
* or lower case.
+ * Note: FC-NVME-2 standard requires a "0x" prefix.
*/
#define NVME_FC_TRADDR_NNLEN 3 /* "?n-" */
#define NVME_FC_TRADDR_OXNNLEN 5 /* "?n-0x" */
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index f61d6906e59d..3d5189f46cb1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -107,8 +107,22 @@ enum {
NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */
NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */
NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */
- NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
+ NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */
NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */
+ NVME_REG_BPINFO = 0x0040, /* Boot Partition Information */
+ NVME_REG_BPRSEL = 0x0044, /* Boot Partition Read Select */
+ NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer
+ * Location
+ */
+ NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */
+ NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */
+ NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */
+ NVME_REG_PMREBS = 0x0e0c, /* Persistent Memory Region Elasticity
+ * Buffer Size
+ */
+ NVME_REG_PMRSWTP = 0x0e10, /* Persistent Memory Region Sustained
+ * Write Throughput
+ */
NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */
};
@@ -295,6 +309,14 @@ enum {
NVME_CTRL_OACS_DIRECTIVES = 1 << 5,
NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8,
NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1,
+ NVME_CTRL_CTRATT_128_ID = 1 << 0,
+ NVME_CTRL_CTRATT_NON_OP_PSP = 1 << 1,
+ NVME_CTRL_CTRATT_NVM_SETS = 1 << 2,
+ NVME_CTRL_CTRATT_READ_RECV_LVLS = 1 << 3,
+ NVME_CTRL_CTRATT_ENDURANCE_GROUPS = 1 << 4,
+ NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5,
+ NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY = 1 << 7,
+ NVME_CTRL_CTRATT_UUID_LIST = 1 << 9,
};
struct nvme_lbaf {
@@ -352,6 +374,9 @@ enum {
NVME_ID_CNS_NS_PRESENT = 0x11,
NVME_ID_CNS_CTRL_NS_LIST = 0x12,
NVME_ID_CNS_CTRL_LIST = 0x13,
+ NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15,
+ NVME_ID_CNS_NS_GRANULARITY = 0x16,
+ NVME_ID_CNS_UUID_LIST = 0x17,
};
enum {
@@ -409,7 +434,8 @@ struct nvme_smart_log {
__u8 avail_spare;
__u8 spare_thresh;
__u8 percent_used;
- __u8 rsvd6[26];
+ __u8 endu_grp_crit_warn_sumry;
+ __u8 rsvd7[25];
__u8 data_units_read[16];
__u8 data_units_written[16];
__u8 host_reads[16];
@@ -423,7 +449,11 @@ struct nvme_smart_log {
__le32 warning_temp_time;
__le32 critical_comp_time;
__le16 temp_sensor[8];
- __u8 rsvd216[296];
+ __le32 thm_temp1_trans_count;
+ __le32 thm_temp2_trans_count;
+ __le32 thm_temp1_total_time;
+ __le32 thm_temp2_total_time;
+ __u8 rsvd232[280];
};
struct nvme_fw_slot_info_log {
@@ -440,6 +470,7 @@ enum {
NVME_CMD_EFFECTS_NIC = 1 << 3,
NVME_CMD_EFFECTS_CCC = 1 << 4,
NVME_CMD_EFFECTS_CSE_MASK = 3 << 16,
+ NVME_CMD_EFFECTS_UUID_SEL = 1 << 19,
};
struct nvme_effects_log {
@@ -563,6 +594,7 @@ enum nvme_opcode {
nvme_cmd_compare = 0x05,
nvme_cmd_write_zeroes = 0x08,
nvme_cmd_dsm = 0x09,
+ nvme_cmd_verify = 0x0c,
nvme_cmd_resv_register = 0x0d,
nvme_cmd_resv_report = 0x0e,
nvme_cmd_resv_acquire = 0x11,
@@ -772,6 +804,12 @@ struct nvme_write_zeroes_cmd {
/* Features */
+enum {
+ NVME_TEMP_THRESH_MASK = 0xffff,
+ NVME_TEMP_THRESH_SELECT_SHIFT = 16,
+ NVME_TEMP_THRESH_TYPE_UNDER = 0x100000,
+};
+
struct nvme_feat_auto_pst {
__le64 entries[32];
};
@@ -806,10 +844,14 @@ enum nvme_admin_opcode {
nvme_admin_ns_mgmt = 0x0d,
nvme_admin_activate_fw = 0x10,
nvme_admin_download_fw = 0x11,
+ nvme_admin_dev_self_test = 0x14,
nvme_admin_ns_attach = 0x15,
nvme_admin_keep_alive = 0x18,
nvme_admin_directive_send = 0x19,
nvme_admin_directive_recv = 0x1a,
+ nvme_admin_virtual_mgmt = 0x1c,
+ nvme_admin_nvme_mi_send = 0x1d,
+ nvme_admin_nvme_mi_recv = 0x1e,
nvme_admin_dbbuf = 0x7C,
nvme_admin_format_nvm = 0x80,
nvme_admin_security_send = 0x81,
@@ -873,6 +915,7 @@ enum {
NVME_FEAT_PLM_CONFIG = 0x13,
NVME_FEAT_PLM_WINDOW = 0x14,
NVME_FEAT_HOST_BEHAVIOR = 0x16,
+ NVME_FEAT_SANITIZE = 0x17,
NVME_FEAT_SW_PROGRESS = 0x80,
NVME_FEAT_HOST_ID = 0x81,
NVME_FEAT_RESV_MASK = 0x82,
@@ -883,6 +926,10 @@ enum {
NVME_LOG_FW_SLOT = 0x03,
NVME_LOG_CHANGED_NS = 0x04,
NVME_LOG_CMD_EFFECTS = 0x05,
+ NVME_LOG_DEVICE_SELF_TEST = 0x06,
+ NVME_LOG_TELEMETRY_HOST = 0x07,
+ NVME_LOG_TELEMETRY_CTRL = 0x08,
+ NVME_LOG_ENDURANCE_GROUP = 0x09,
NVME_LOG_ANA = 0x0c,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
@@ -1290,7 +1337,11 @@ enum {
NVME_SC_SGL_INVALID_OFFSET = 0x16,
NVME_SC_SGL_INVALID_SUBTYPE = 0x17,
+ NVME_SC_SANITIZE_FAILED = 0x1C,
+ NVME_SC_SANITIZE_IN_PROGRESS = 0x1D,
+
NVME_SC_NS_WRITE_PROTECTED = 0x20,
+ NVME_SC_CMD_INTERRUPTED = 0x21,
NVME_SC_LBA_RANGE = 0x80,
NVME_SC_CAP_EXCEEDED = 0x81,
@@ -1328,6 +1379,8 @@ enum {
NVME_SC_NS_NOT_ATTACHED = 0x11a,
NVME_SC_THIN_PROV_NOT_SUPP = 0x11b,
NVME_SC_CTRL_LIST_INVALID = 0x11c,
+ NVME_SC_BP_WRITE_PROHIBITED = 0x11e,
+ NVME_SC_PMR_SAN_PROHIBITED = 0x123,
/*
* I/O Command Set Specific - NVM commands:
@@ -1368,6 +1421,7 @@ enum {
NVME_SC_ANA_INACCESSIBLE = 0x302,
NVME_SC_ANA_TRANSITION = 0x303,
NVME_SC_HOST_PATH_ERROR = 0x370,
+ NVME_SC_HOST_ABORTED_CMD = 0x371,
NVME_SC_CRD = 0x1800,
NVME_SC_DNR = 0x4000,
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f91cb8898ff0..1bf83c8fcaa7 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -622,12 +622,28 @@ static inline int PageTransCompound(struct page *page)
*
* Unlike PageTransCompound, this is safe to be called only while
* split_huge_pmd() cannot run from under us, like if protected by the
- * MMU notifier, otherwise it may result in page->_mapcount < 0 false
+ * MMU notifier, otherwise it may result in page->_mapcount check false
* positives.
+ *
+ * We have to treat page cache THP differently since every subpage of it
+ * would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE
+ * mapped in the current process so comparing subpage's _mapcount to
+ * compound_mapcount to filter out PTE mapped case.
*/
static inline int PageTransCompoundMap(struct page *page)
{
- return PageTransCompound(page) && atomic_read(&page->_mapcount) < 0;
+ struct page *head;
+
+ if (!PageTransCompound(page))
+ return 0;
+
+ if (PageAnon(page))
+ return atomic_read(&page->_mapcount) < 0;
+
+ head = compound_head(page);
+ /* File THP is PMD mapped and not PTE mapped */
+ return atomic_read(&page->_mapcount) ==
+ atomic_read(compound_mapcount_ptr(head));
}
/*
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index 682fd465df06..cfce186f0c4e 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -18,7 +18,7 @@ struct page_ext_operations {
enum page_ext_flags {
PAGE_EXT_OWNER,
- PAGE_EXT_OWNER_ACTIVE,
+ PAGE_EXT_OWNER_ALLOCATED,
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG,
PAGE_EXT_IDLE,
@@ -36,6 +36,7 @@ struct page_ext {
unsigned long flags;
};
+extern unsigned long page_ext_size;
extern void pgdat_page_ext_init(struct pglist_data *pgdat);
#ifdef CONFIG_SPARSEMEM
@@ -52,6 +53,13 @@ static inline void page_ext_init(void)
struct page_ext *lookup_page_ext(const struct page *page);
+static inline struct page_ext *page_ext_next(struct page_ext *curr)
+{
+ void *next = curr;
+ next += page_ext_size;
+ return next;
+}
+
#else /* !CONFIG_PAGE_EXTENSION */
struct page_ext;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61448c19a132..68ccc5b1913b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -292,7 +292,7 @@ struct pmu {
* -EBUSY -- @event is for this PMU but PMU temporarily unavailable
* -EINVAL -- @event is for this PMU but @event is not valid
* -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported
- * -EACCESS -- @event is for this PMU, @event is valid, but no privilidges
+ * -EACCES -- @event is for this PMU, @event is valid, but no privileges
*
* 0 -- @event is for this PMU and valid
*
diff --git a/include/linux/platform_data/dma-imx-sdma.h b/include/linux/platform_data/dma-imx-sdma.h
index 6eaa53cef0bd..30e676b36b24 100644
--- a/include/linux/platform_data/dma-imx-sdma.h
+++ b/include/linux/platform_data/dma-imx-sdma.h
@@ -51,7 +51,10 @@ struct sdma_script_start_addrs {
/* End of v2 array */
s32 zcanfd_2_mcu_addr;
s32 zqspi_2_mcu_addr;
+ s32 mcu_2_ecspi_addr;
/* End of v3 array */
+ s32 mcu_2_zqspi_addr;
+ /* End of v4 array */
};
/**
diff --git a/include/linux/platform_data/intel-spi.h b/include/linux/platform_data/intel-spi.h
index ebb4f332588b..7f53a5c6f35e 100644
--- a/include/linux/platform_data/intel-spi.h
+++ b/include/linux/platform_data/intel-spi.h
@@ -13,6 +13,7 @@ enum intel_spi_type {
INTEL_SPI_BYT = 1,
INTEL_SPI_LPT,
INTEL_SPI_BXT,
+ INTEL_SPI_CNL,
};
/**
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 1b5cec067533..f2688404d1cd 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -64,6 +64,8 @@ extern struct resource *platform_get_resource_byname(struct platform_device *,
unsigned int,
const char *);
extern int platform_get_irq_byname(struct platform_device *, const char *);
+extern int platform_get_irq_byname_optional(struct platform_device *dev,
+ const char *name);
extern int platform_add_devices(struct platform_device **, int);
struct platform_device_info {
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 222c3e01397c..ebf5ef17cc2a 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -34,8 +34,6 @@ enum pm_qos_flags_status {
#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY
#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS
#define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0
-#define PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE 0
-#define PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE (-1)
#define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1)
#define PM_QOS_FLAG_NO_POWER_OFF (1 << 0)
@@ -54,8 +52,6 @@ struct pm_qos_flags_request {
enum dev_pm_qos_req_type {
DEV_PM_QOS_RESUME_LATENCY = 1,
DEV_PM_QOS_LATENCY_TOLERANCE,
- DEV_PM_QOS_MIN_FREQUENCY,
- DEV_PM_QOS_MAX_FREQUENCY,
DEV_PM_QOS_FLAGS,
};
@@ -97,14 +93,10 @@ struct pm_qos_flags {
struct dev_pm_qos {
struct pm_qos_constraints resume_latency;
struct pm_qos_constraints latency_tolerance;
- struct pm_qos_constraints min_frequency;
- struct pm_qos_constraints max_frequency;
struct pm_qos_flags flags;
struct dev_pm_qos_request *resume_latency_req;
struct dev_pm_qos_request *latency_tolerance_req;
struct dev_pm_qos_request *flags_req;
- struct dev_pm_qos_request *min_frequency_req;
- struct dev_pm_qos_request *max_frequency_req;
};
/* Action requested to pm_qos_update_target */
@@ -199,10 +191,6 @@ static inline s32 dev_pm_qos_read_value(struct device *dev,
switch (type) {
case DEV_PM_QOS_RESUME_LATENCY:
return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT;
- case DEV_PM_QOS_MIN_FREQUENCY:
- return PM_QOS_MIN_FREQUENCY_DEFAULT_VALUE;
- case DEV_PM_QOS_MAX_FREQUENCY:
- return PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
default:
WARN_ON(1);
return 0;
@@ -267,4 +255,48 @@ static inline s32 dev_pm_qos_raw_resume_latency(struct device *dev)
}
#endif
+#define FREQ_QOS_MIN_DEFAULT_VALUE 0
+#define FREQ_QOS_MAX_DEFAULT_VALUE (-1)
+
+enum freq_qos_req_type {
+ FREQ_QOS_MIN = 1,
+ FREQ_QOS_MAX,
+};
+
+struct freq_constraints {
+ struct pm_qos_constraints min_freq;
+ struct blocking_notifier_head min_freq_notifiers;
+ struct pm_qos_constraints max_freq;
+ struct blocking_notifier_head max_freq_notifiers;
+};
+
+struct freq_qos_request {
+ enum freq_qos_req_type type;
+ struct plist_node pnode;
+ struct freq_constraints *qos;
+};
+
+static inline int freq_qos_request_active(struct freq_qos_request *req)
+{
+ return !IS_ERR_OR_NULL(req->qos);
+}
+
+void freq_constraints_init(struct freq_constraints *qos);
+
+s32 freq_qos_read_value(struct freq_constraints *qos,
+ enum freq_qos_req_type type);
+
+int freq_qos_add_request(struct freq_constraints *qos,
+ struct freq_qos_request *req,
+ enum freq_qos_req_type type, s32 value);
+int freq_qos_update_request(struct freq_qos_request *req, s32 new_value);
+int freq_qos_remove_request(struct freq_qos_request *req);
+
+int freq_qos_add_notifier(struct freq_constraints *qos,
+ enum freq_qos_req_type type,
+ struct notifier_block *notifier);
+int freq_qos_remove_notifier(struct freq_constraints *qos,
+ enum freq_qos_req_type type,
+ struct notifier_block *notifier);
+
#endif
diff --git a/include/linux/psci.h b/include/linux/psci.h
index e2bacc6fd2f2..ebe0a881d13d 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -7,6 +7,7 @@
#ifndef __LINUX_PSCI_H
#define __LINUX_PSCI_H
+#include <linux/arm-smccc.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -18,12 +19,6 @@ bool psci_tos_resident_on(int cpu);
int psci_cpu_suspend_enter(u32 state);
bool psci_power_state_is_valid(u32 state);
-enum psci_conduit {
- PSCI_CONDUIT_NONE,
- PSCI_CONDUIT_SMC,
- PSCI_CONDUIT_HVC,
-};
-
enum smccc_version {
SMCCC_VERSION_1_0,
SMCCC_VERSION_1_1,
@@ -38,7 +33,7 @@ struct psci_operations {
int (*affinity_info)(unsigned long target_affinity,
unsigned long lowest_affinity_level);
int (*migrate_info_type)(void);
- enum psci_conduit conduit;
+ enum arm_smccc_conduit conduit;
enum smccc_version smccc_version;
};
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index b5116013f27e..63e62372443a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -316,24 +316,6 @@ radix_tree_iter_lookup(const struct radix_tree_root *root,
}
/**
- * radix_tree_iter_find - find a present entry
- * @root: radix tree root
- * @iter: iterator state
- * @index: start location
- *
- * This function returns the slot containing the entry with the lowest index
- * which is at least @index. If @index is larger than any present entry, this
- * function returns NULL. The @iter is updated to describe the entry found.
- */
-static inline void __rcu **
-radix_tree_iter_find(const struct radix_tree_root *root,
- struct radix_tree_iter *iter, unsigned long index)
-{
- radix_tree_iter_init(iter, index);
- return radix_tree_next_chunk(root, iter, 0);
-}
-
-/**
* radix_tree_iter_retry - retry this chunk of the iteration
* @iter: iterator state
*
diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h
index 9326d671b6e6..eaae6b4e9f24 100644
--- a/include/linux/reset-controller.h
+++ b/include/linux/reset-controller.h
@@ -7,7 +7,7 @@
struct reset_controller_dev;
/**
- * struct reset_control_ops
+ * struct reset_control_ops - reset controller driver callbacks
*
* @reset: for self-deasserting resets, does all necessary
* things to reset the device
@@ -33,7 +33,7 @@ struct of_phandle_args;
* @provider: name of the reset controller device controlling this reset line
* @index: ID of the reset controller in the reset controller device
* @dev_id: name of the device associated with this reset line
- * @con_id name of the reset line (can be NULL)
+ * @con_id: name of the reset line (can be NULL)
*/
struct reset_control_lookup {
struct list_head list;
diff --git a/include/linux/reset.h b/include/linux/reset.h
index e7793fc0fa93..eb597e8aa430 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -143,7 +143,7 @@ static inline int device_reset_optional(struct device *dev)
* If this function is called more than once for the same reset_control it will
* return -EBUSY.
*
- * See reset_control_get_shared for details on shared references to
+ * See reset_control_get_shared() for details on shared references to
* reset-controls.
*
* Use of id names is optional.
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index a986ac12a848..e40d019c3d9d 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -216,15 +216,6 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint,
*/
bool sbitmap_any_bit_set(const struct sbitmap *sb);
-/**
- * sbitmap_any_bit_clear() - Check for an unset bit in a &struct
- * sbitmap.
- * @sb: Bitmap to check.
- *
- * Return: true if any bit in the bitmap is clear, false otherwise.
- */
-bool sbitmap_any_bit_clear(const struct sbitmap *sb);
-
#define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift)
#define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U))
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c2e56bd8913..6666e25606b7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -223,6 +223,7 @@ extern long schedule_timeout_uninterruptible(long timeout);
extern long schedule_timeout_idle(long timeout);
asmlinkage void schedule(void);
extern void schedule_preempt_disabled(void);
+asmlinkage void preempt_schedule_irq(void);
extern int __must_check io_schedule_prepare(void);
extern void io_schedule_finish(int token);
@@ -1467,6 +1468,7 @@ extern struct pid *cad_pid;
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
+#define PF_IO_WORKER 0x20000000 /* Task is an IO worker */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
diff --git a/include/linux/security.h b/include/linux/security.h
index a8d59d612d27..9df7547afc0c 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -105,6 +105,7 @@ enum lockdown_reason {
LOCKDOWN_NONE,
LOCKDOWN_MODULE_SIGNATURE,
LOCKDOWN_DEV_MEM,
+ LOCKDOWN_EFI_TEST,
LOCKDOWN_KEXEC,
LOCKDOWN_HIBERNATION,
LOCKDOWN_PCI_ACCESS,
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 53c28d750a45..1ac0d712a9c3 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -42,6 +42,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
case IOC_OPAL_PSID_REVERT_TPR:
case IOC_OPAL_MBR_DONE:
case IOC_OPAL_WRITE_SHADOW_MBR:
+ case IOC_OPAL_GENERIC_TABLE_RW:
return true;
}
return false;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4351577b14d7..8688f7adfda7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1354,7 +1354,8 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6
return skb->hash;
}
-__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb);
+__u32 skb_get_hash_perturb(const struct sk_buff *skb,
+ const siphash_key_t *perturb);
static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
{
@@ -1495,6 +1496,19 @@ static inline int skb_queue_empty(const struct sk_buff_head *list)
}
/**
+ * skb_queue_empty_lockless - check if a queue is empty
+ * @list: queue head
+ *
+ * Returns true if the queue is empty, false otherwise.
+ * This variant can be used in lockless contexts.
+ */
+static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list)
+{
+ return READ_ONCE(list->next) == (const struct sk_buff *) list;
+}
+
+
+/**
* skb_queue_is_last - check if skb is the last entry in the queue
* @list: queue head
* @skb: buffer
@@ -1847,9 +1861,11 @@ static inline void __skb_insert(struct sk_buff *newsk,
struct sk_buff *prev, struct sk_buff *next,
struct sk_buff_head *list)
{
- newsk->next = next;
- newsk->prev = prev;
- next->prev = prev->next = newsk;
+ /* see skb_queue_empty_lockless() for the opposite READ_ONCE() */
+ WRITE_ONCE(newsk->next, next);
+ WRITE_ONCE(newsk->prev, prev);
+ WRITE_ONCE(next->prev, newsk);
+ WRITE_ONCE(prev->next, newsk);
list->qlen++;
}
@@ -1860,11 +1876,11 @@ static inline void __skb_queue_splice(const struct sk_buff_head *list,
struct sk_buff *first = list->next;
struct sk_buff *last = list->prev;
- first->prev = prev;
- prev->next = first;
+ WRITE_ONCE(first->prev, prev);
+ WRITE_ONCE(prev->next, first);
- last->next = next;
- next->prev = last;
+ WRITE_ONCE(last->next, next);
+ WRITE_ONCE(next->prev, last);
}
/**
@@ -2005,8 +2021,8 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
next = skb->next;
prev = skb->prev;
skb->next = skb->prev = NULL;
- next->prev = prev;
- prev->next = next;
+ WRITE_ONCE(next->prev, prev);
+ WRITE_ONCE(prev->next, next);
}
/**
@@ -3510,8 +3526,9 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len);
int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
int skb_vlan_pop(struct sk_buff *skb);
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
-int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto);
-int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto);
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+ int mac_len);
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len);
int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse);
int skb_mpls_dec_ttl(struct sk_buff *skb);
struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
@@ -4152,12 +4169,18 @@ static inline void skb_ext_reset(struct sk_buff *skb)
skb->active_extensions = 0;
}
}
+
+static inline bool skb_has_extensions(struct sk_buff *skb)
+{
+ return unlikely(skb->active_extensions);
+}
#else
static inline void skb_ext_put(struct sk_buff *skb) {}
static inline void skb_ext_reset(struct sk_buff *skb) {}
static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {}
static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {}
+static inline bool skb_has_extensions(struct sk_buff *skb) { return false; }
#endif /* CONFIG_SKB_EXTENSIONS */
static inline void nf_reset_ct(struct sk_buff *skb)
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index e4b3fb4bb77c..ce7055259877 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -139,6 +139,11 @@ static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes)
}
}
+static inline u32 sk_msg_iter_dist(u32 start, u32 end)
+{
+ return end >= start ? end - start : end + (MAX_MSG_FRAGS - start);
+}
+
#define sk_msg_iter_var_prev(var) \
do { \
if (var == 0) \
@@ -198,9 +203,7 @@ static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
if (sk_msg_full(msg))
return MAX_MSG_FRAGS;
- return msg->sg.end >= msg->sg.start ?
- msg->sg.end - msg->sg.start :
- msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start);
+ return sk_msg_iter_dist(msg->sg.start, msg->sg.end);
}
static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index ab2b98ad76e1..4d2a2fa55ed5 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -493,6 +493,10 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
* kmalloc is the normal method of allocating memory
* for objects smaller than page size in the kernel.
*
+ * The allocated object address is aligned to at least ARCH_KMALLOC_MINALIGN
+ * bytes. For @size of power of two bytes, the alignment is also guaranteed
+ * to be at least to the size.
+ *
* The @flags argument may be one of the GFP flags defined at
* include/linux/gfp.h and described at
* :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>`
diff --git a/include/linux/socket.h b/include/linux/socket.h
index fc0bed59fc84..09c32a21555b 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -263,7 +263,7 @@ struct ucred {
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
-#define SOMAXCONN 128
+#define SOMAXCONN 4096
/* Flags we can use with send/ and recv.
Added those for 1003.1g not all are supported yet
@@ -392,6 +392,9 @@ extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
extern int __sys_sendto(int fd, void __user *buff, size_t len,
unsigned int flags, struct sockaddr __user *addr,
int addr_len);
+extern int __sys_accept4_file(struct file *file, unsigned file_flags,
+ struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags);
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
diff --git a/include/linux/stat.h b/include/linux/stat.h
index 765573dc17d6..528c4baad091 100644
--- a/include/linux/stat.h
+++ b/include/linux/stat.h
@@ -33,7 +33,8 @@ struct kstat {
STATX_ATTR_IMMUTABLE | \
STATX_ATTR_APPEND | \
STATX_ATTR_NODUMP | \
- STATX_ATTR_ENCRYPTED \
+ STATX_ATTR_ENCRYPTED | \
+ STATX_ATTR_VERITY \
)/* Attrs corresponding to FS_*_FL flags */
u64 ino;
dev_t dev;
diff --git a/include/linux/string.h b/include/linux/string.h
index b2f9df7f0761..b6ccdc2c7f02 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -227,7 +227,26 @@ static inline bool strstarts(const char *str, const char *prefix)
}
size_t memweight(const void *ptr, size_t bytes);
-void memzero_explicit(void *s, size_t count);
+
+/**
+ * memzero_explicit - Fill a region of memory (e.g. sensitive
+ * keying data) with 0s.
+ * @s: Pointer to the start of the area.
+ * @count: The size of the area.
+ *
+ * Note: usually using memset() is just fine (!), but in cases
+ * where clearing out _local_ data at the end of a scope is
+ * necessary, memzero_explicit() should be used instead in
+ * order to prevent the compiler from optimising away zeroing.
+ *
+ * memzero_explicit() doesn't need an arch-specific version as
+ * it just invokes the one of memset() implicitly.
+ */
+static inline void memzero_explicit(void *s, size_t count)
+{
+ memset(s, 0, count);
+ barrier_data(s);
+}
/**
* kbasename - return the last part of a pathname.
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 87d27e13d885..d796058cdff2 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -64,6 +64,11 @@ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
return 0;
}
+static inline void xprt_destroy_backchannel(struct rpc_xprt *xprt,
+ unsigned int max_reqs)
+{
+}
+
static inline bool svc_is_backchannel(const struct svc_rqst *rqstp)
{
return false;
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 7638dbe7bc50..a940de03808d 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -61,6 +61,7 @@ struct sock_xprt {
struct mutex recv_mutex;
struct sockaddr_storage srcaddr;
unsigned short srcport;
+ int xprt_err;
/*
* UDP socket buffer size parameters
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 5420817ed317..fa7ee503fb76 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -196,9 +196,9 @@ struct bin_attribute {
.size = _size, \
}
-#define __BIN_ATTR_WO(_name) { \
+#define __BIN_ATTR_WO(_name, _size) { \
.attr = { .name = __stringify(_name), .mode = 0200 }, \
- .store = _name##_store, \
+ .write = _name##_write, \
.size = _size, \
}
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 99617e528ea2..668e25a76d69 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -393,7 +393,7 @@ struct tcp_sock {
/* fastopen_rsk points to request_sock that resulted in this big
* socket. Used to retransmit SYNACKs etc.
*/
- struct request_sock *fastopen_rsk;
+ struct request_sock __rcu *fastopen_rsk;
u32 *saved_syn;
};
@@ -447,8 +447,8 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
static inline bool tcp_passive_fastopen(const struct sock *sk)
{
- return (sk->sk_state == TCP_SYN_RECV &&
- tcp_sk(sk)->fastopen_rsk != NULL);
+ return sk->sk_state == TCP_SYN_RECV &&
+ rcu_access_pointer(tcp_sk(sk)->fastopen_rsk) != NULL;
}
static inline void fastopen_queue_tune(struct sock *sk, int backlog)
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 53c0ea9ec9df..0d6e949ba315 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -21,6 +21,7 @@
#include <linux/acpi.h>
#include <linux/cdev.h>
#include <linux/fs.h>
+#include <linux/highmem.h>
#include <crypto/hash_info.h>
#define TPM_DIGEST_SIZE 20 /* Max TPM v1.2 PCR size */
@@ -67,6 +68,8 @@ struct tpm_class_ops {
u8 (*status) (struct tpm_chip *chip);
void (*update_timeouts)(struct tpm_chip *chip,
unsigned long *timeout_cap);
+ void (*update_durations)(struct tpm_chip *chip,
+ unsigned long *duration_cap);
int (*go_idle)(struct tpm_chip *chip);
int (*cmd_ready)(struct tpm_chip *chip);
int (*request_locality)(struct tpm_chip *chip, int loc);
@@ -161,6 +164,235 @@ struct tpm_chip {
int locality;
};
+#define TPM_HEADER_SIZE 10
+
+enum tpm2_const {
+ TPM2_PLATFORM_PCR = 24,
+ TPM2_PCR_SELECT_MIN = ((TPM2_PLATFORM_PCR + 7) / 8),
+};
+
+enum tpm2_timeouts {
+ TPM2_TIMEOUT_A = 750,
+ TPM2_TIMEOUT_B = 2000,
+ TPM2_TIMEOUT_C = 200,
+ TPM2_TIMEOUT_D = 30,
+ TPM2_DURATION_SHORT = 20,
+ TPM2_DURATION_MEDIUM = 750,
+ TPM2_DURATION_LONG = 2000,
+ TPM2_DURATION_LONG_LONG = 300000,
+ TPM2_DURATION_DEFAULT = 120000,
+};
+
+enum tpm2_structures {
+ TPM2_ST_NO_SESSIONS = 0x8001,
+ TPM2_ST_SESSIONS = 0x8002,
+};
+
+/* Indicates from what layer of the software stack the error comes from */
+#define TSS2_RC_LAYER_SHIFT 16
+#define TSS2_RESMGR_TPM_RC_LAYER (11 << TSS2_RC_LAYER_SHIFT)
+
+enum tpm2_return_codes {
+ TPM2_RC_SUCCESS = 0x0000,
+ TPM2_RC_HASH = 0x0083, /* RC_FMT1 */
+ TPM2_RC_HANDLE = 0x008B,
+ TPM2_RC_INITIALIZE = 0x0100, /* RC_VER1 */
+ TPM2_RC_FAILURE = 0x0101,
+ TPM2_RC_DISABLED = 0x0120,
+ TPM2_RC_COMMAND_CODE = 0x0143,
+ TPM2_RC_TESTING = 0x090A, /* RC_WARN */
+ TPM2_RC_REFERENCE_H0 = 0x0910,
+ TPM2_RC_RETRY = 0x0922,
+};
+
+enum tpm2_command_codes {
+ TPM2_CC_FIRST = 0x011F,
+ TPM2_CC_HIERARCHY_CONTROL = 0x0121,
+ TPM2_CC_HIERARCHY_CHANGE_AUTH = 0x0129,
+ TPM2_CC_CREATE_PRIMARY = 0x0131,
+ TPM2_CC_SEQUENCE_COMPLETE = 0x013E,
+ TPM2_CC_SELF_TEST = 0x0143,
+ TPM2_CC_STARTUP = 0x0144,
+ TPM2_CC_SHUTDOWN = 0x0145,
+ TPM2_CC_NV_READ = 0x014E,
+ TPM2_CC_CREATE = 0x0153,
+ TPM2_CC_LOAD = 0x0157,
+ TPM2_CC_SEQUENCE_UPDATE = 0x015C,
+ TPM2_CC_UNSEAL = 0x015E,
+ TPM2_CC_CONTEXT_LOAD = 0x0161,
+ TPM2_CC_CONTEXT_SAVE = 0x0162,
+ TPM2_CC_FLUSH_CONTEXT = 0x0165,
+ TPM2_CC_VERIFY_SIGNATURE = 0x0177,
+ TPM2_CC_GET_CAPABILITY = 0x017A,
+ TPM2_CC_GET_RANDOM = 0x017B,
+ TPM2_CC_PCR_READ = 0x017E,
+ TPM2_CC_PCR_EXTEND = 0x0182,
+ TPM2_CC_EVENT_SEQUENCE_COMPLETE = 0x0185,
+ TPM2_CC_HASH_SEQUENCE_START = 0x0186,
+ TPM2_CC_CREATE_LOADED = 0x0191,
+ TPM2_CC_LAST = 0x0193, /* Spec 1.36 */
+};
+
+enum tpm2_permanent_handles {
+ TPM2_RS_PW = 0x40000009,
+};
+
+enum tpm2_capabilities {
+ TPM2_CAP_HANDLES = 1,
+ TPM2_CAP_COMMANDS = 2,
+ TPM2_CAP_PCRS = 5,
+ TPM2_CAP_TPM_PROPERTIES = 6,
+};
+
+enum tpm2_properties {
+ TPM_PT_TOTAL_COMMANDS = 0x0129,
+};
+
+enum tpm2_startup_types {
+ TPM2_SU_CLEAR = 0x0000,
+ TPM2_SU_STATE = 0x0001,
+};
+
+enum tpm2_cc_attrs {
+ TPM2_CC_ATTR_CHANDLES = 25,
+ TPM2_CC_ATTR_RHANDLE = 28,
+};
+
+#define TPM_VID_INTEL 0x8086
+#define TPM_VID_WINBOND 0x1050
+#define TPM_VID_STM 0x104A
+
+enum tpm_chip_flags {
+ TPM_CHIP_FLAG_TPM2 = BIT(1),
+ TPM_CHIP_FLAG_IRQ = BIT(2),
+ TPM_CHIP_FLAG_VIRTUAL = BIT(3),
+ TPM_CHIP_FLAG_HAVE_TIMEOUTS = BIT(4),
+ TPM_CHIP_FLAG_ALWAYS_POWERED = BIT(5),
+ TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED = BIT(6),
+};
+
+#define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
+
+struct tpm_header {
+ __be16 tag;
+ __be32 length;
+ union {
+ __be32 ordinal;
+ __be32 return_code;
+ };
+} __packed;
+
+/* A string buffer type for constructing TPM commands. This is based on the
+ * ideas of string buffer code in security/keys/trusted.h but is heap based
+ * in order to keep the stack usage minimal.
+ */
+
+enum tpm_buf_flags {
+ TPM_BUF_OVERFLOW = BIT(0),
+};
+
+struct tpm_buf {
+ unsigned int flags;
+ u8 *data;
+};
+
+enum tpm2_object_attributes {
+ TPM2_OA_USER_WITH_AUTH = BIT(6),
+};
+
+enum tpm2_session_attributes {
+ TPM2_SA_CONTINUE_SESSION = BIT(0),
+};
+
+struct tpm2_hash {
+ unsigned int crypto_id;
+ unsigned int tpm_id;
+};
+
+static inline void tpm_buf_reset(struct tpm_buf *buf, u16 tag, u32 ordinal)
+{
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+
+ head->tag = cpu_to_be16(tag);
+ head->length = cpu_to_be32(sizeof(*head));
+ head->ordinal = cpu_to_be32(ordinal);
+}
+
+static inline int tpm_buf_init(struct tpm_buf *buf, u16 tag, u32 ordinal)
+{
+ buf->data = (u8 *)__get_free_page(GFP_KERNEL);
+ if (!buf->data)
+ return -ENOMEM;
+
+ buf->flags = 0;
+ tpm_buf_reset(buf, tag, ordinal);
+ return 0;
+}
+
+static inline void tpm_buf_destroy(struct tpm_buf *buf)
+{
+ free_page((unsigned long)buf->data);
+}
+
+static inline u32 tpm_buf_length(struct tpm_buf *buf)
+{
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+
+ return be32_to_cpu(head->length);
+}
+
+static inline u16 tpm_buf_tag(struct tpm_buf *buf)
+{
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+
+ return be16_to_cpu(head->tag);
+}
+
+static inline void tpm_buf_append(struct tpm_buf *buf,
+ const unsigned char *new_data,
+ unsigned int new_len)
+{
+ struct tpm_header *head = (struct tpm_header *)buf->data;
+ u32 len = tpm_buf_length(buf);
+
+ /* Return silently if overflow has already happened. */
+ if (buf->flags & TPM_BUF_OVERFLOW)
+ return;
+
+ if ((len + new_len) > PAGE_SIZE) {
+ WARN(1, "tpm_buf: overflow\n");
+ buf->flags |= TPM_BUF_OVERFLOW;
+ return;
+ }
+
+ memcpy(&buf->data[len], new_data, new_len);
+ head->length = cpu_to_be32(len + new_len);
+}
+
+static inline void tpm_buf_append_u8(struct tpm_buf *buf, const u8 value)
+{
+ tpm_buf_append(buf, &value, 1);
+}
+
+static inline void tpm_buf_append_u16(struct tpm_buf *buf, const u16 value)
+{
+ __be16 value2 = cpu_to_be16(value);
+
+ tpm_buf_append(buf, (u8 *) &value2, 2);
+}
+
+static inline void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value)
+{
+ __be32 value2 = cpu_to_be32(value);
+
+ tpm_buf_append(buf, (u8 *) &value2, 4);
+}
+
+static inline u32 tpm2_rc_value(u32 rc)
+{
+ return (rc & BIT(7)) ? rc & 0xff : rc;
+}
+
#if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE)
extern int tpm_is_tpm2(struct tpm_chip *chip);
@@ -170,12 +402,6 @@ extern int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
struct tpm_digest *digests);
extern int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen);
extern int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max);
-extern int tpm_seal_trusted(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options);
-extern int tpm_unseal_trusted(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options);
extern struct tpm_chip *tpm_default_chip(void);
#else
static inline int tpm_is_tpm2(struct tpm_chip *chip)
@@ -204,18 +430,6 @@ static inline int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max)
return -ENODEV;
}
-static inline int tpm_seal_trusted(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options)
-{
- return -ENODEV;
-}
-static inline int tpm_unseal_trusted(struct tpm_chip *chip,
- struct trusted_key_payload *payload,
- struct trusted_key_options *options)
-{
- return -ENODEV;
-}
static inline struct tpm_chip *tpm_default_chip(void)
{
return NULL;
diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 63238c84dc0b..131ea1bad458 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -152,7 +152,7 @@ struct tcg_algorithm_info {
* total. Once we've done this we know the offset of the data length field,
* and can calculate the total size of the event.
*
- * Return: size of the event on success, <0 on failure
+ * Return: size of the event on success, 0 on failure
*/
static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
@@ -170,6 +170,7 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
u16 halg;
int i;
int j;
+ u32 count, event_type;
marker = event;
marker_start = marker;
@@ -190,16 +191,22 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
}
event = (struct tcg_pcr_event2_head *)mapping;
+ /*
+ * The loop below will unmap these fields if the log is larger than
+ * one page, so save them here for reference:
+ */
+ count = READ_ONCE(event->count);
+ event_type = READ_ONCE(event->event_type);
efispecid = (struct tcg_efi_specid_event_head *)event_header->event;
/* Check if event is malformed. */
- if (event->count > efispecid->num_algs) {
+ if (count > efispecid->num_algs) {
size = 0;
goto out;
}
- for (i = 0; i < event->count; i++) {
+ for (i = 0; i < count; i++) {
halg_size = sizeof(event->digests[i].alg_id);
/* Map the digest's algorithm identifier */
@@ -256,8 +263,9 @@ static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+ event_field->event_size;
size = marker - marker_start;
- if ((event->event_type == 0) && (event_field->event_size == 0))
+ if (event_type == 0 && event_field->event_size == 0)
size = 0;
+
out:
if (do_mapping)
TPM_MEMUNMAP(mapping, mapping_size);
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index e47d0522a1f4..d4ee6e942562 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -355,8 +355,10 @@ extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
#ifndef user_access_begin
#define user_access_begin(ptr,len) access_ok(ptr, len)
#define user_access_end() do { } while (0)
-#define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0)
-#define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0)
+#define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0)
+#define unsafe_get_user(x,p,e) unsafe_op_wrap(__get_user(x,p),e)
+#define unsafe_put_user(x,p,e) unsafe_op_wrap(__put_user(x,p),e)
+#define unsafe_copy_to_user(d,s,l,e) unsafe_op_wrap(__copy_to_user(d,s,l),e)
static inline unsigned long user_access_save(void) { return 0UL; }
static inline void user_access_restore(unsigned long flags) { }
#endif
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 4c7781f4b29b..07875ccc7bb5 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -48,7 +48,6 @@ struct virtio_vsock_sock {
struct virtio_vsock_pkt {
struct virtio_vsock_hdr hdr;
- struct work_struct work;
struct list_head list;
/* socket refcnt not held, only use for cancellation */
struct vsock_sock *vsk;
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 5921599b6dc4..86eecbd98e84 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -230,8 +230,8 @@ static inline int xa_err(void *entry)
* This structure is used either directly or via the XA_LIMIT() macro
* to communicate the range of IDs that are valid for allocation.
* Two common ranges are predefined for you:
- * * xa_limit_32b - [0 - UINT_MAX]
- * * xa_limit_31b - [0 - INT_MAX]
+ * * xa_limit_32b - [0 - UINT_MAX]
+ * * xa_limit_31b - [0 - INT_MAX]
*/
struct xa_limit {
u32 max;
diff --git a/include/net/bonding.h b/include/net/bonding.h
index f7fe45689142..3d56b026bb9e 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -159,7 +159,6 @@ struct slave {
unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
s8 link; /* one of BOND_LINK_XXXX */
s8 link_new_state; /* one of BOND_LINK_XXXX */
- s8 new_link;
u8 backup:1, /* indicates backup slave. Value corresponds with
BOND_STATE_ACTIVE and BOND_STATE_BACKUP */
inactive:1, /* indicates inactive slave */
@@ -203,7 +202,6 @@ struct bonding {
struct slave __rcu *primary_slave;
struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
bool force_primary;
- u32 nest_level;
s32 slave_cnt; /* never change this value outside the attach/detach wrappers */
int (*recv_probe)(const struct sk_buff *, struct bonding *,
struct slave *);
@@ -239,6 +237,7 @@ struct bonding {
struct dentry *debug_dir;
#endif /* CONFIG_DEBUG_FS */
struct rtnl_link_stats64 bond_stats;
+ struct lock_class_key stats_lock_key;
};
#define bond_slave_get_rcu(dev) \
@@ -549,7 +548,7 @@ static inline void bond_propose_link_state(struct slave *slave, int state)
static inline void bond_commit_link_state(struct slave *slave, bool notify)
{
- if (slave->link == slave->link_new_state)
+ if (slave->link_new_state == BOND_LINK_NOCHANGE)
return;
slave->link = slave->link_new_state;
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 127a5c4e3699..86e028388bad 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -122,7 +122,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
- sk->sk_napi_id = skb->napi_id;
+ WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
sk_rx_queue_set(sk, skb);
}
@@ -132,8 +132,8 @@ static inline void sk_mark_napi_id_once(struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
- if (!sk->sk_napi_id)
- sk->sk_napi_id = skb->napi_id;
+ if (!READ_ONCE(sk->sk_napi_id))
+ WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
}
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ff45c3e1abff..4ab2c49423dc 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5550,6 +5550,14 @@ const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy,
const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
/**
+ * regulatory_pre_cac_allowed - check if pre-CAC allowed in the current regdom
+ * @wiphy: wiphy for which pre-CAC capability is checked.
+ *
+ * Pre-CAC is allowed only in some regdomains (notable ETSI).
+ */
+bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
+
+/**
* DOC: Internal regulatory db functions
*
*/
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 23e4b65ec9df..2116c88663a1 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -38,7 +38,8 @@ struct devlink {
struct device *dev;
possible_net_t _net;
struct mutex lock;
- bool reload_failed;
+ u8 reload_failed:1,
+ reload_enabled:1;
char priv[0] __aligned(NETDEV_ALIGN);
};
@@ -774,6 +775,8 @@ struct ib_device;
struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size);
int devlink_register(struct devlink *devlink, struct device *dev);
void devlink_unregister(struct devlink *devlink);
+void devlink_reload_enable(struct devlink *devlink);
+void devlink_reload_disable(struct devlink *devlink);
void devlink_free(struct devlink *devlink);
int devlink_port_register(struct devlink *devlink,
struct devlink_port *devlink_port,
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 90bd210be060..5cd12276ae21 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/in6.h>
+#include <linux/siphash.h>
#include <uapi/linux/if_ether.h>
/**
@@ -276,7 +277,7 @@ struct flow_keys_basic {
struct flow_keys {
struct flow_dissector_key_control control;
#define FLOW_KEYS_HASH_START_FIELD basic
- struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT);
struct flow_dissector_key_tags tags;
struct flow_dissector_key_vlan vlan;
struct flow_dissector_key_vlan cvlan;
diff --git a/include/net/fq.h b/include/net/fq.h
index d126b5d20261..2ad85e683041 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -69,7 +69,7 @@ struct fq {
struct list_head backlogs;
spinlock_t lock;
u32 flows_cnt;
- u32 perturbation;
+ siphash_key_t perturbation;
u32 limit;
u32 memory_limit;
u32 memory_usage;
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index be40a4b327e3..38a9a3d1222b 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -108,7 +108,7 @@ begin:
static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
{
- u32 hash = skb_get_hash_perturb(skb, fq->perturbation);
+ u32 hash = skb_get_hash_perturb(skb, &fq->perturbation);
return reciprocal_scale(hash, fq->flows_cnt);
}
@@ -308,12 +308,12 @@ static int fq_init(struct fq *fq, int flows_cnt)
INIT_LIST_HEAD(&fq->backlogs);
spin_lock_init(&fq->lock);
fq->flows_cnt = max_t(u32, flows_cnt, 1);
- fq->perturbation = prandom_u32();
+ get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
fq->quantum = 300;
fq->limit = 8192;
fq->memory_limit = 16 << 20; /* 16 MBytes */
- fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL);
+ fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL);
if (!fq->flows)
return -ENOMEM;
@@ -331,7 +331,7 @@ static void fq_reset(struct fq *fq,
for (i = 0; i < fq->flows_cnt; i++)
fq_flow_reset(fq, &fq->flows[i], free_func);
- kfree(fq->flows);
+ kvfree(fq->flows);
fq->flows = NULL;
}
diff --git a/include/net/hwbm.h b/include/net/hwbm.h
index 81643cf8a1c4..c81444611a22 100644
--- a/include/net/hwbm.h
+++ b/include/net/hwbm.h
@@ -21,9 +21,13 @@ void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf);
int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp);
int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num);
#else
-void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {}
-int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) { return 0; }
-int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num)
+static inline void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {}
+
+static inline int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp)
+{ return 0; }
+
+static inline int hwbm_pool_add(struct hwbm_pool *bm_pool,
+ unsigned int buf_num)
{ return 0; }
#endif /* CONFIG_HWBM */
#endif /* _HWBM_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 95bb77f95bcc..a2c61c36dc4a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -185,7 +185,7 @@ static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter)
}
struct ip_frag_state {
- struct iphdr *iph;
+ bool DF;
unsigned int hlen;
unsigned int ll_rs;
unsigned int mtu;
@@ -196,7 +196,7 @@ struct ip_frag_state {
};
void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs,
- unsigned int mtu, struct ip_frag_state *state);
+ unsigned int mtu, bool DF, struct ip_frag_state *state);
struct sk_buff *ip_frag_next(struct sk_buff *skb,
struct ip_frag_state *state);
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3759167f91f5..078887c8c586 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -889,6 +889,7 @@ struct netns_ipvs {
struct delayed_work defense_work; /* Work handler */
int drop_rate;
int drop_counter;
+ int old_secure_tcp;
atomic_t dropentry;
/* locks in ctl.c */
spinlock_t dropentry_lock; /* drop entry handling */
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index df528a623548..ea985aa7a6c5 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -104,7 +104,7 @@ void llc_sk_reset(struct sock *sk);
/* Access to a connection */
int llc_conn_state_process(struct sock *sk, struct sk_buff *skb);
-int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
+void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb);
void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit);
void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 50a67bd6a434..b8452cc0e059 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -439,8 +439,8 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
unsigned long now = jiffies;
- if (neigh->used != now)
- neigh->used = now;
+ if (READ_ONCE(neigh->used) != now)
+ WRITE_ONCE(neigh->used, now);
if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
return __neigh_event_send(neigh, skb);
return 0;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index f8712bbeb2e0..c7e15a213ef2 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -52,6 +52,9 @@ struct bpf_prog;
#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
struct net {
+ /* First cache line can be often dirtied.
+ * Do not place here read-mostly fields.
+ */
refcount_t passive; /* To decide when the network
* namespace should be freed.
*/
@@ -60,7 +63,13 @@ struct net {
*/
spinlock_t rules_mod_lock;
- u32 hash_mix;
+ unsigned int dev_unreg_count;
+
+ unsigned int dev_base_seq; /* protected by rtnl_mutex */
+ int ifindex;
+
+ spinlock_t nsid_lock;
+ atomic_t fnhe_genid;
struct list_head list; /* list of network namespaces */
struct list_head exit_list; /* To linked to call pernet exit
@@ -76,11 +85,11 @@ struct net {
#endif
struct user_namespace *user_ns; /* Owning user namespace */
struct ucounts *ucounts;
- spinlock_t nsid_lock;
struct idr netns_ids;
struct ns_common ns;
+ struct list_head dev_base_head;
struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;
@@ -93,17 +102,18 @@ struct net {
struct uevent_sock *uevent_sock; /* uevent socket */
- struct list_head dev_base_head;
struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head;
- unsigned int dev_base_seq; /* protected by rtnl_mutex */
- int ifindex;
- unsigned int dev_unreg_count;
+ /* Note that @hash_mix can be read millions times per second,
+ * it is critical that it is on a read_mostly cache line.
+ */
+ u32 hash_mix;
+
+ struct net_device *loopback_dev; /* The loopback */
/* core fib_rules */
struct list_head rules_ops;
- struct net_device *loopback_dev; /* The loopback */
struct netns_core core;
struct netns_mib mib;
struct netns_packet packet;
@@ -171,7 +181,6 @@ struct net {
struct sock *crypto_nlsk;
#endif
struct sock *diag_nlsk;
- atomic_t fnhe_genid;
} __randomize_layout;
#include <linux/seq_file_net.h>
@@ -333,7 +342,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
#define __net_initconst __initconst
#endif
-int peernet2id_alloc(struct net *net, struct net *peer);
+int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp);
int peernet2id(struct net *net, struct net *peer);
bool peernet_has_id(struct net *net, struct net *peer);
struct net *get_net_ns_by_id(struct net *net, int id);
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 001d294edf57..2d0275f13bbf 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -820,7 +820,8 @@ struct nft_expr_ops {
*/
struct nft_expr {
const struct nft_expr_ops *ops;
- unsigned char data[];
+ unsigned char data[]
+ __attribute__((aligned(__alignof__(u64))));
};
static inline void *nft_expr_priv(const struct nft_expr *expr)
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index fd178d58fa84..cf8b33213bbc 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -185,7 +185,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
static inline bool reqsk_queue_empty(const struct request_sock_queue *queue)
{
- return queue->rskq_accept_head == NULL;
+ return READ_ONCE(queue->rskq_accept_head) == NULL;
}
static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue,
@@ -197,7 +197,7 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue
req = queue->rskq_accept_head;
if (req) {
sk_acceptq_removed(parent);
- queue->rskq_accept_head = req->dl_next;
+ WRITE_ONCE(queue->rskq_accept_head, req->dl_next);
if (queue->rskq_accept_head == NULL)
queue->rskq_accept_tail = NULL;
}
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 637548d54b3e..d80acda231ae 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -15,6 +15,7 @@
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/atomic.h>
+#include <linux/hashtable.h>
#include <net/gen_stats.h>
#include <net/rtnetlink.h>
#include <net/flow_offload.h>
@@ -362,6 +363,7 @@ struct tcf_proto {
bool deleting;
refcount_t refcnt;
struct rcu_head rcu;
+ struct hlist_node destroy_ht_node;
};
struct qdisc_skb_cb {
@@ -414,6 +416,8 @@ struct tcf_block {
struct list_head filter_chain_list;
} chain0;
struct rcu_head rcu;
+ DECLARE_HASHTABLE(proto_destroy_ht, 7);
+ struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
};
#ifdef CONFIG_PROVE_LOCKING
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 5d60f13d2347..3ab5c6bbb90b 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -610,4 +610,9 @@ static inline __u32 sctp_min_frag_point(struct sctp_sock *sp, __u16 datasize)
return sctp_mtu_payload(sp, SCTP_DEFAULT_MINSEGMENT, datasize);
}
+static inline bool sctp_newsk_ready(const struct sock *sk)
+{
+ return sock_flag(sk, SOCK_DEAD) || sk->sk_socket;
+}
+
#endif /* __net_sctp_h__ */
diff --git a/include/net/sock.h b/include/net/sock.h
index 2c53f1a1d905..718e62fbe869 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -878,12 +878,17 @@ static inline bool sk_acceptq_is_full(const struct sock *sk)
*/
static inline int sk_stream_min_wspace(const struct sock *sk)
{
- return sk->sk_wmem_queued >> 1;
+ return READ_ONCE(sk->sk_wmem_queued) >> 1;
}
static inline int sk_stream_wspace(const struct sock *sk)
{
- return sk->sk_sndbuf - sk->sk_wmem_queued;
+ return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued);
+}
+
+static inline void sk_wmem_queued_add(struct sock *sk, int val)
+{
+ WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
}
void sk_stream_write_space(struct sock *sk);
@@ -949,8 +954,8 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
{
int cpu = raw_smp_processor_id();
- if (unlikely(sk->sk_incoming_cpu != cpu))
- sk->sk_incoming_cpu = cpu;
+ if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu))
+ WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}
static inline void sock_rps_record_flow_hash(__u32 hash)
@@ -1207,7 +1212,7 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
- if (sk->sk_wmem_queued >= sk->sk_sndbuf)
+ if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
return false;
return sk->sk_prot->stream_memory_free ?
@@ -1467,7 +1472,7 @@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
- sk->sk_wmem_queued -= skb->truesize;
+ sk_wmem_queued_add(sk, -skb->truesize);
sk_mem_uncharge(sk, skb->truesize);
if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
@@ -2014,7 +2019,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- sk->sk_wmem_queued += copy;
+ sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
return 0;
}
@@ -2220,10 +2225,14 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band)
static inline void sk_stream_moderate_sndbuf(struct sock *sk)
{
- if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
- sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
- sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF);
- }
+ u32 val;
+
+ if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+ return;
+
+ val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
+
+ WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF));
}
struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
@@ -2233,12 +2242,17 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
* sk_page_frag - return an appropriate page_frag
* @sk: socket
*
- * If socket allocation mode allows current thread to sleep, it means its
- * safe to use the per task page_frag instead of the per socket one.
+ * Use the per task page_frag instead of the per socket one for
+ * optimization when we know that we're in the normal context and owns
+ * everything that's associated with %current.
+ *
+ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
+ * inside other socket operations and end up recursing into sk_page_frag()
+ * while it's already in use.
*/
static inline struct page_frag *sk_page_frag(struct sock *sk)
{
- if (gfpflags_allow_blocking(sk->sk_allocation))
+ if (gfpflags_normal_context(sk->sk_allocation))
return &current->task_frag;
return &sk->sk_frag;
@@ -2251,7 +2265,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
*/
static inline bool sock_writeable(const struct sock *sk)
{
- return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
+ return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
}
static inline gfp_t gfp_any(void)
@@ -2271,7 +2285,9 @@ static inline long sock_sndtimeo(const struct sock *sk, bool noblock)
static inline int sock_rcvlowat(const struct sock *sk, int waitall, int len)
{
- return (waitall ? len : min_t(int, sk->sk_rcvlowat, len)) ? : 1;
+ int v = waitall ? len : min_t(int, READ_ONCE(sk->sk_rcvlowat), len);
+
+ return v ?: 1;
}
/* Alas, with timeout socket operations are not restartable.
@@ -2326,7 +2342,7 @@ static inline ktime_t sock_read_timestamp(struct sock *sk)
return kt;
#else
- return sk->sk_stamp;
+ return READ_ONCE(sk->sk_stamp);
#endif
}
@@ -2337,7 +2353,7 @@ static inline void sock_write_timestamp(struct sock *sk, ktime_t kt)
sk->sk_stamp = kt;
write_sequnlock(&sk->sk_stamp_seq);
#else
- sk->sk_stamp = kt;
+ WRITE_ONCE(sk->sk_stamp, kt);
#endif
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c9a3f9688223..ab4eb5eb5d07 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -258,7 +258,7 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk)
mem_cgroup_under_socket_pressure(sk->sk_memcg))
return true;
- return tcp_memory_pressure;
+ return READ_ONCE(tcp_memory_pressure);
}
/*
* The next routines deal with comparing 32 bit unsigned ints
@@ -1380,13 +1380,14 @@ static inline int tcp_win_from_space(const struct sock *sk, int space)
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len -
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
+ READ_ONCE(sk->sk_backlog.len) -
atomic_read(&sk->sk_rmem_alloc));
}
static inline int tcp_full_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, sk->sk_rcvbuf);
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
}
extern void tcp_openreq_init_rwin(struct request_sock *req,
@@ -1916,7 +1917,8 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
static inline bool tcp_stream_memory_free(const struct sock *sk, int wake)
{
const struct tcp_sock *tp = tcp_sk(sk);
- u32 notsent_bytes = tp->write_seq - tp->snd_nxt;
+ u32 notsent_bytes = READ_ONCE(tp->write_seq) -
+ READ_ONCE(tp->snd_nxt);
return (notsent_bytes << wake) < tcp_notsent_lowat(tp);
}
diff --git a/include/net/tls.h b/include/net/tls.h
index c664e6dba0d1..f4ad831eaa02 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -40,6 +40,7 @@
#include <linux/socket.h>
#include <linux/tcp.h>
#include <linux/skmsg.h>
+#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/rcupdate.h>
@@ -269,6 +270,10 @@ struct tls_context {
bool in_tcp_sendpages;
bool pending_open_record_frags;
+
+ struct mutex tx_lock; /* protects partially_sent_* fields and
+ * per-type TX fields
+ */
unsigned long flags;
/* cache cold stuff */
@@ -351,6 +356,8 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
void tls_sw_strparser_done(struct tls_context *tls_ctx);
int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags);
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 335283dbe9b3..373aadcfea21 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -197,6 +197,7 @@ struct vxlan_rdst {
u8 offloaded:1;
__be32 remote_vni;
u32 remote_ifindex;
+ struct net_device *remote_dev;
struct list_head list;
struct rcu_head rcu;
struct dst_cache dst_cache;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6a47ba85c54c..e7e733add99f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -366,7 +366,7 @@ struct ib_tm_caps {
struct ib_cq_init_attr {
unsigned int cqe;
- int comp_vector;
+ u32 comp_vector;
u32 flags;
};
diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h
index 3810b340551c..6bd5ed695a5e 100644
--- a/include/scsi/scsi_eh.h
+++ b/include/scsi/scsi_eh.h
@@ -32,6 +32,7 @@ extern int scsi_ioctl_reset(struct scsi_device *, int __user *);
struct scsi_eh_save {
/* saved state */
int result;
+ unsigned int resid_len;
int eh_eflags;
enum dma_data_direction data_direction;
unsigned underflow;
diff --git a/include/sound/hda_register.h b/include/sound/hda_register.h
index 0fd39295b426..057d2a2d0bd0 100644
--- a/include/sound/hda_register.h
+++ b/include/sound/hda_register.h
@@ -264,6 +264,9 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
#define AZX_REG_ML_LOUTPAY 0x20
#define AZX_REG_ML_LINPAY 0x30
+/* bit0 is reserved, with BIT(1) mapping to stream1 */
+#define ML_LOSIDV_STREAM_MASK 0xFFFE
+
#define ML_LCTL_SCF_MASK 0xF
#define AZX_MLCTL_SPA (0x1 << 16)
#define AZX_MLCTL_CPA (0x1 << 23)
diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h
index 985a5f583de4..31f76b6abf71 100644
--- a/include/sound/simple_card_utils.h
+++ b/include/sound/simple_card_utils.h
@@ -135,9 +135,9 @@ int asoc_simple_init_priv(struct asoc_simple_priv *priv,
struct link_info *li);
#ifdef DEBUG
-inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv,
- char *name,
- struct asoc_simple_dai *dai)
+static inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv,
+ char *name,
+ struct asoc_simple_dai *dai)
{
struct device *dev = simple_priv_to_dev(priv);
@@ -167,7 +167,7 @@ inline void asoc_simple_debug_dai(struct asoc_simple_priv *priv,
dev_dbg(dev, "%s clk %luHz\n", name, clk_get_rate(dai->clk));
}
-inline void asoc_simple_debug_info(struct asoc_simple_priv *priv)
+static inline void asoc_simple_debug_info(struct asoc_simple_priv *priv)
{
struct snd_soc_card *card = simple_priv_to_card(priv);
struct device *dev = simple_priv_to_dev(priv);
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 5df604de4f11..620bf1b38fba 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -19,7 +19,7 @@ struct btrfs_delayed_ref_node;
struct btrfs_delayed_tree_ref;
struct btrfs_delayed_data_ref;
struct btrfs_delayed_ref_head;
-struct btrfs_block_group_cache;
+struct btrfs_block_group;
struct btrfs_free_cluster;
struct map_lookup;
struct extent_buffer;
@@ -170,7 +170,7 @@ DECLARE_EVENT_CLASS(btrfs__inode,
TP_STRUCT__entry_btrfs(
__field( u64, ino )
- __field( blkcnt_t, blocks )
+ __field( u64, blocks )
__field( u64, disk_i_size )
__field( u64, generation )
__field( u64, last_trans )
@@ -194,7 +194,7 @@ DECLARE_EVENT_CLASS(btrfs__inode,
show_root_type(__entry->root_objectid),
__entry->generation,
__entry->ino,
- (unsigned long long)__entry->blocks,
+ __entry->blocks,
__entry->disk_i_size,
__entry->last_trans,
__entry->logged_trans)
@@ -292,7 +292,7 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
TRACE_EVENT(btrfs_handle_em_exist,
- TP_PROTO(struct btrfs_fs_info *fs_info,
+ TP_PROTO(const struct btrfs_fs_info *fs_info,
const struct extent_map *existing, const struct extent_map *map,
u64 start, u64 len),
@@ -330,8 +330,8 @@ TRACE_EVENT(btrfs_handle_em_exist,
/* file extent item */
DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
- TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
- struct btrfs_file_extent_item *fi, u64 start),
+ TP_PROTO(const struct btrfs_inode *bi, const struct extent_buffer *l,
+ const struct btrfs_file_extent_item *fi, u64 start),
TP_ARGS(bi, l, fi, start),
@@ -385,8 +385,8 @@ DECLARE_EVENT_CLASS(btrfs__file_extent_item_regular,
DECLARE_EVENT_CLASS(
btrfs__file_extent_item_inline,
- TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
- struct btrfs_file_extent_item *fi, int slot, u64 start),
+ TP_PROTO(const struct btrfs_inode *bi, const struct extent_buffer *l,
+ const struct btrfs_file_extent_item *fi, int slot, u64 start),
TP_ARGS(bi, l, fi, slot, start),
@@ -426,8 +426,8 @@ DECLARE_EVENT_CLASS(
DEFINE_EVENT(
btrfs__file_extent_item_regular, btrfs_get_extent_show_fi_regular,
- TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
- struct btrfs_file_extent_item *fi, u64 start),
+ TP_PROTO(const struct btrfs_inode *bi, const struct extent_buffer *l,
+ const struct btrfs_file_extent_item *fi, u64 start),
TP_ARGS(bi, l, fi, start)
);
@@ -435,8 +435,8 @@ DEFINE_EVENT(
DEFINE_EVENT(
btrfs__file_extent_item_regular, btrfs_truncate_show_fi_regular,
- TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
- struct btrfs_file_extent_item *fi, u64 start),
+ TP_PROTO(const struct btrfs_inode *bi, const struct extent_buffer *l,
+ const struct btrfs_file_extent_item *fi, u64 start),
TP_ARGS(bi, l, fi, start)
);
@@ -444,8 +444,8 @@ DEFINE_EVENT(
DEFINE_EVENT(
btrfs__file_extent_item_inline, btrfs_get_extent_show_fi_inline,
- TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
- struct btrfs_file_extent_item *fi, int slot, u64 start),
+ TP_PROTO(const struct btrfs_inode *bi, const struct extent_buffer *l,
+ const struct btrfs_file_extent_item *fi, int slot, u64 start),
TP_ARGS(bi, l, fi, slot, start)
);
@@ -453,8 +453,8 @@ DEFINE_EVENT(
DEFINE_EVENT(
btrfs__file_extent_item_inline, btrfs_truncate_show_fi_inline,
- TP_PROTO(struct btrfs_inode *bi, struct extent_buffer *l,
- struct btrfs_file_extent_item *fi, int slot, u64 start),
+ TP_PROTO(const struct btrfs_inode *bi, const struct extent_buffer *l,
+ const struct btrfs_file_extent_item *fi, int slot, u64 start),
TP_ARGS(bi, l, fi, slot, start)
);
@@ -574,7 +574,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
__field( char, for_kupdate )
__field( char, for_reclaim )
__field( char, range_cyclic )
- __field( pgoff_t, writeback_index )
+ __field( unsigned long, writeback_index )
__field( u64, root_objectid )
),
@@ -603,7 +603,7 @@ DECLARE_EVENT_CLASS(btrfs__writepage,
__entry->range_start, __entry->range_end,
__entry->for_kupdate,
__entry->for_reclaim, __entry->range_cyclic,
- (unsigned long)__entry->writeback_index)
+ __entry->writeback_index)
);
DEFINE_EVENT(btrfs__writepage, __extent_writepage,
@@ -622,7 +622,7 @@ TRACE_EVENT(btrfs_writepage_end_io_hook,
TP_STRUCT__entry_btrfs(
__field( u64, ino )
- __field( pgoff_t, index )
+ __field( unsigned long, index )
__field( u64, start )
__field( u64, end )
__field( int, uptodate )
@@ -642,7 +642,7 @@ TRACE_EVENT(btrfs_writepage_end_io_hook,
TP_printk_btrfs("root=%llu(%s) ino=%llu page_index=%lu start=%llu "
"end=%llu uptodate=%d",
show_root_type(__entry->root_objectid),
- __entry->ino, (unsigned long)__entry->index,
+ __entry->ino, __entry->index,
__entry->start,
__entry->end, __entry->uptodate)
);
@@ -699,7 +699,7 @@ TRACE_EVENT(btrfs_sync_fs,
TRACE_EVENT(btrfs_add_block_group,
TP_PROTO(const struct btrfs_fs_info *fs_info,
- const struct btrfs_block_group_cache *block_group, int create),
+ const struct btrfs_block_group *block_group, int create),
TP_ARGS(fs_info, block_group, create),
@@ -713,11 +713,10 @@ TRACE_EVENT(btrfs_add_block_group,
),
TP_fast_assign_btrfs(fs_info,
- __entry->offset = block_group->key.objectid;
- __entry->size = block_group->key.offset;
+ __entry->offset = block_group->start;
+ __entry->size = block_group->length;
__entry->flags = block_group->flags;
- __entry->bytes_used =
- btrfs_block_group_used(&block_group->item);
+ __entry->bytes_used = block_group->used;
__entry->bytes_super = block_group->bytes_super;
__entry->create = create;
),
@@ -1018,7 +1017,7 @@ TRACE_EVENT(btrfs_cow_block,
TRACE_EVENT(btrfs_space_reservation,
- TP_PROTO(const struct btrfs_fs_info *fs_info, char *type, u64 val,
+ TP_PROTO(const struct btrfs_fs_info *fs_info, const char *type, u64 val,
u64 bytes, int reserve),
TP_ARGS(fs_info, type, val, bytes, reserve),
@@ -1051,7 +1050,7 @@ TRACE_EVENT(btrfs_space_reservation,
TRACE_EVENT(btrfs_trigger_flush,
TP_PROTO(const struct btrfs_fs_info *fs_info, u64 flags, u64 bytes,
- int flush, char *reason),
+ int flush, const char *reason),
TP_ARGS(fs_info, flags, bytes, flush, reason),
@@ -1185,7 +1184,7 @@ TRACE_EVENT(find_free_extent,
DECLARE_EVENT_CLASS(btrfs__reserve_extent,
- TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start,
+ TP_PROTO(const struct btrfs_block_group *block_group, u64 start,
u64 len),
TP_ARGS(block_group, start, len),
@@ -1198,7 +1197,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
),
TP_fast_assign_btrfs(block_group->fs_info,
- __entry->bg_objectid = block_group->key.objectid;
+ __entry->bg_objectid = block_group->start;
__entry->flags = block_group->flags;
__entry->start = start;
__entry->len = len;
@@ -1215,7 +1214,7 @@ DECLARE_EVENT_CLASS(btrfs__reserve_extent,
DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent,
- TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start,
+ TP_PROTO(const struct btrfs_block_group *block_group, u64 start,
u64 len),
TP_ARGS(block_group, start, len)
@@ -1223,7 +1222,7 @@ DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent,
DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster,
- TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start,
+ TP_PROTO(const struct btrfs_block_group *block_group, u64 start,
u64 len),
TP_ARGS(block_group, start, len)
@@ -1231,7 +1230,7 @@ DEFINE_EVENT(btrfs__reserve_extent, btrfs_reserve_extent_cluster,
TRACE_EVENT(btrfs_find_cluster,
- TP_PROTO(const struct btrfs_block_group_cache *block_group, u64 start,
+ TP_PROTO(const struct btrfs_block_group *block_group, u64 start,
u64 bytes, u64 empty_size, u64 min_bytes),
TP_ARGS(block_group, start, bytes, empty_size, min_bytes),
@@ -1246,7 +1245,7 @@ TRACE_EVENT(btrfs_find_cluster,
),
TP_fast_assign_btrfs(block_group->fs_info,
- __entry->bg_objectid = block_group->key.objectid;
+ __entry->bg_objectid = block_group->start;
__entry->flags = block_group->flags;
__entry->start = start;
__entry->bytes = bytes;
@@ -1264,7 +1263,7 @@ TRACE_EVENT(btrfs_find_cluster,
TRACE_EVENT(btrfs_failed_cluster_setup,
- TP_PROTO(const struct btrfs_block_group_cache *block_group),
+ TP_PROTO(const struct btrfs_block_group *block_group),
TP_ARGS(block_group),
@@ -1273,7 +1272,7 @@ TRACE_EVENT(btrfs_failed_cluster_setup,
),
TP_fast_assign_btrfs(block_group->fs_info,
- __entry->bg_objectid = block_group->key.objectid;
+ __entry->bg_objectid = block_group->start;
),
TP_printk_btrfs("block_group=%llu", __entry->bg_objectid)
@@ -1281,7 +1280,7 @@ TRACE_EVENT(btrfs_failed_cluster_setup,
TRACE_EVENT(btrfs_setup_cluster,
- TP_PROTO(const struct btrfs_block_group_cache *block_group,
+ TP_PROTO(const struct btrfs_block_group *block_group,
const struct btrfs_free_cluster *cluster,
u64 size, int bitmap),
@@ -1297,7 +1296,7 @@ TRACE_EVENT(btrfs_setup_cluster,
),
TP_fast_assign_btrfs(block_group->fs_info,
- __entry->bg_objectid = block_group->key.objectid;
+ __entry->bg_objectid = block_group->start;
__entry->flags = block_group->flags;
__entry->start = cluster->window_start;
__entry->max_size = cluster->max_size;
@@ -1325,17 +1324,17 @@ TRACE_EVENT(alloc_extent_state,
TP_STRUCT__entry(
__field(const struct extent_state *, state)
__field(gfp_t, mask)
- __field(unsigned long, ip)
+ __field(const void*, ip)
),
TP_fast_assign(
__entry->state = state,
__entry->mask = mask,
- __entry->ip = IP
+ __entry->ip = (const void *)IP
),
TP_printk("state=%p mask=%s caller=%pS", __entry->state,
- show_gfp_flags(__entry->mask), (const void *)__entry->ip)
+ show_gfp_flags(__entry->mask), __entry->ip)
);
TRACE_EVENT(free_extent_state,
@@ -1346,16 +1345,15 @@ TRACE_EVENT(free_extent_state,
TP_STRUCT__entry(
__field(const struct extent_state *, state)
- __field(unsigned long, ip)
+ __field(const void*, ip)
),
TP_fast_assign(
__entry->state = state,
- __entry->ip = IP
+ __entry->ip = (const void *)IP
),
- TP_printk("state=%p caller=%pS", __entry->state,
- (const void *)__entry->ip)
+ TP_printk("state=%p caller=%pS", __entry->state, __entry->ip)
);
DECLARE_EVENT_CLASS(btrfs__work,
@@ -1389,9 +1387,9 @@ DECLARE_EVENT_CLASS(btrfs__work,
);
/*
- * For situiations when the work is freed, we pass fs_info and a tag that that
- * matches address of the work structure so it can be paired with the
- * scheduling event.
+ * For situations when the work is freed, we pass fs_info and a tag that matches
+ * the address of the work structure so it can be paired with the scheduling
+ * event. DO NOT add anything here that dereferences wtag.
*/
DECLARE_EVENT_CLASS(btrfs__work__done,
@@ -1567,8 +1565,7 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
),
TP_printk_btrfs("bytenr=%llu num_bytes=%llu",
- (unsigned long long)__entry->bytenr,
- (unsigned long long)__entry->num_bytes)
+ __entry->bytenr, __entry->num_bytes)
);
DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents,
@@ -1644,7 +1641,7 @@ TRACE_EVENT(btrfs_qgroup_account_extent,
TRACE_EVENT(qgroup_update_counters,
TP_PROTO(const struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup *qgroup,
+ const struct btrfs_qgroup *qgroup,
u64 cur_old_count, u64 cur_new_count),
TP_ARGS(fs_info, qgroup, cur_old_count, cur_new_count),
@@ -1688,6 +1685,7 @@ TRACE_EVENT(qgroup_update_reserve,
__entry->qgid = qgroup->qgroupid;
__entry->cur_reserved = qgroup->rsv.values[type];
__entry->diff = diff;
+ __entry->type = type;
),
TP_printk_btrfs("qgid=%llu type=%s cur_reserved=%llu diff=%lld",
@@ -1710,6 +1708,7 @@ TRACE_EVENT(qgroup_meta_reserve,
TP_fast_assign_btrfs(root->fs_info,
__entry->refroot = root->root_key.objectid;
__entry->diff = diff;
+ __entry->type = type;
),
TP_printk_btrfs("refroot=%llu(%s) type=%s diff=%lld",
@@ -1726,7 +1725,6 @@ TRACE_EVENT(qgroup_meta_convert,
TP_STRUCT__entry_btrfs(
__field( u64, refroot )
__field( s64, diff )
- __field( int, type )
),
TP_fast_assign_btrfs(root->fs_info,
@@ -1824,7 +1822,7 @@ DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_insert,
);
TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
- TP_PROTO(struct btrfs_root *root, u64 ino, int mod),
+ TP_PROTO(const struct btrfs_root *root, u64 ino, int mod),
TP_ARGS(root, ino, mod),
@@ -1846,7 +1844,7 @@ TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
);
DECLARE_EVENT_CLASS(btrfs__block_group,
- TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+ TP_PROTO(const struct btrfs_block_group *bg_cache),
TP_ARGS(bg_cache),
@@ -1858,9 +1856,9 @@ DECLARE_EVENT_CLASS(btrfs__block_group,
),
TP_fast_assign_btrfs(bg_cache->fs_info,
- __entry->bytenr = bg_cache->key.objectid,
- __entry->len = bg_cache->key.offset,
- __entry->used = btrfs_block_group_used(&bg_cache->item);
+ __entry->bytenr = bg_cache->start,
+ __entry->len = bg_cache->length,
+ __entry->used = bg_cache->used;
__entry->flags = bg_cache->flags;
),
@@ -1870,19 +1868,19 @@ DECLARE_EVENT_CLASS(btrfs__block_group,
);
DEFINE_EVENT(btrfs__block_group, btrfs_remove_block_group,
- TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+ TP_PROTO(const struct btrfs_block_group *bg_cache),
TP_ARGS(bg_cache)
);
DEFINE_EVENT(btrfs__block_group, btrfs_add_unused_block_group,
- TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+ TP_PROTO(const struct btrfs_block_group *bg_cache),
TP_ARGS(bg_cache)
);
DEFINE_EVENT(btrfs__block_group, btrfs_skip_unused_block_group,
- TP_PROTO(const struct btrfs_block_group_cache *bg_cache),
+ TP_PROTO(const struct btrfs_block_group *bg_cache),
TP_ARGS(bg_cache)
);
@@ -1905,7 +1903,7 @@ TRACE_EVENT(btrfs_set_extent_bit,
TP_fast_assign_btrfs(tree->fs_info,
__entry->owner = tree->owner;
if (tree->private_data) {
- struct inode *inode = tree->private_data;
+ const struct inode *inode = tree->private_data;
__entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->rootid =
@@ -1944,7 +1942,7 @@ TRACE_EVENT(btrfs_clear_extent_bit,
TP_fast_assign_btrfs(tree->fs_info,
__entry->owner = tree->owner;
if (tree->private_data) {
- struct inode *inode = tree->private_data;
+ const struct inode *inode = tree->private_data;
__entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->rootid =
@@ -1984,7 +1982,7 @@ TRACE_EVENT(btrfs_convert_extent_bit,
TP_fast_assign_btrfs(tree->fs_info,
__entry->owner = tree->owner;
if (tree->private_data) {
- struct inode *inode = tree->private_data;
+ const struct inode *inode = tree->private_data;
__entry->ino = btrfs_ino(BTRFS_I(inode));
__entry->rootid =
@@ -2093,8 +2091,8 @@ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic);
DECLARE_EVENT_CLASS(btrfs__space_info_update,
- TP_PROTO(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *sinfo, u64 old, s64 diff),
+ TP_PROTO(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo, u64 old, s64 diff),
TP_ARGS(fs_info, sinfo, old, diff),
@@ -2116,16 +2114,16 @@ DECLARE_EVENT_CLASS(btrfs__space_info_update,
DEFINE_EVENT(btrfs__space_info_update, update_bytes_may_use,
- TP_PROTO(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *sinfo, u64 old, s64 diff),
+ TP_PROTO(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo, u64 old, s64 diff),
TP_ARGS(fs_info, sinfo, old, diff)
);
DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned,
- TP_PROTO(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *sinfo, u64 old, s64 diff),
+ TP_PROTO(const struct btrfs_fs_info *fs_info,
+ const struct btrfs_space_info *sinfo, u64 old, s64 diff),
TP_ARGS(fs_info, sinfo, old, diff)
);
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
new file mode 100644
index 000000000000..72a4d0174b02
--- /dev/null
+++ b/include/trace/events/io_uring.h
@@ -0,0 +1,358 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM io_uring
+
+#if !defined(_TRACE_IO_URING_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IO_URING_H
+
+#include <linux/tracepoint.h>
+
+struct io_wq_work;
+
+/**
+ * io_uring_create - called after a new io_uring context was prepared
+ *
+ * @fd: corresponding file descriptor
+ * @ctx: pointer to a ring context structure
+ * @sq_entries: actual SQ size
+ * @cq_entries: actual CQ size
+ * @flags: SQ ring flags, provided to io_uring_setup(2)
+ *
+ * Allows to trace io_uring creation and provide pointer to a context, that can
+ * be used later to find correlated events.
+ */
+TRACE_EVENT(io_uring_create,
+
+ TP_PROTO(int fd, void *ctx, u32 sq_entries, u32 cq_entries, u32 flags),
+
+ TP_ARGS(fd, ctx, sq_entries, cq_entries, flags),
+
+ TP_STRUCT__entry (
+ __field( int, fd )
+ __field( void *, ctx )
+ __field( u32, sq_entries )
+ __field( u32, cq_entries )
+ __field( u32, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->fd = fd;
+ __entry->ctx = ctx;
+ __entry->sq_entries = sq_entries;
+ __entry->cq_entries = cq_entries;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("ring %p, fd %d sq size %d, cq size %d, flags %d",
+ __entry->ctx, __entry->fd, __entry->sq_entries,
+ __entry->cq_entries, __entry->flags)
+);
+
+/**
+ * io_uring_register - called after a buffer/file/eventfd was succesfully
+ * registered for a ring
+ *
+ * @ctx: pointer to a ring context structure
+ * @opcode: describes which operation to perform
+ * @nr_user_files: number of registered files
+ * @nr_user_bufs: number of registered buffers
+ * @cq_ev_fd: whether eventfs registered or not
+ * @ret: return code
+ *
+ * Allows to trace fixed files/buffers/eventfds, that could be registered to
+ * avoid an overhead of getting references to them for every operation. This
+ * event, together with io_uring_file_get, can provide a full picture of how
+ * much overhead one can reduce via fixing.
+ */
+TRACE_EVENT(io_uring_register,
+
+ TP_PROTO(void *ctx, unsigned opcode, unsigned nr_files,
+ unsigned nr_bufs, bool eventfd, long ret),
+
+ TP_ARGS(ctx, opcode, nr_files, nr_bufs, eventfd, ret),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( unsigned, opcode )
+ __field( unsigned, nr_files )
+ __field( unsigned, nr_bufs )
+ __field( bool, eventfd )
+ __field( long, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->opcode = opcode;
+ __entry->nr_files = nr_files;
+ __entry->nr_bufs = nr_bufs;
+ __entry->eventfd = eventfd;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("ring %p, opcode %d, nr_user_files %d, nr_user_bufs %d, "
+ "eventfd %d, ret %ld",
+ __entry->ctx, __entry->opcode, __entry->nr_files,
+ __entry->nr_bufs, __entry->eventfd, __entry->ret)
+);
+
+/**
+ * io_uring_file_get - called before getting references to an SQE file
+ *
+ * @ctx: pointer to a ring context structure
+ * @fd: SQE file descriptor
+ *
+ * Allows to trace out how often an SQE file reference is obtained, which can
+ * help figuring out if it makes sense to use fixed files, or check that fixed
+ * files are used correctly.
+ */
+TRACE_EVENT(io_uring_file_get,
+
+ TP_PROTO(void *ctx, int fd),
+
+ TP_ARGS(ctx, fd),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( int, fd )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->fd = fd;
+ ),
+
+ TP_printk("ring %p, fd %d", __entry->ctx, __entry->fd)
+);
+
+/**
+ * io_uring_queue_async_work - called before submitting a new async work
+ *
+ * @ctx: pointer to a ring context structure
+ * @hashed: type of workqueue, hashed or normal
+ * @req: pointer to a submitted request
+ * @work: pointer to a submitted io_wq_work
+ *
+ * Allows to trace asynchronous work submission.
+ */
+TRACE_EVENT(io_uring_queue_async_work,
+
+ TP_PROTO(void *ctx, int rw, void * req, struct io_wq_work *work,
+ unsigned int flags),
+
+ TP_ARGS(ctx, rw, req, work, flags),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( int, rw )
+ __field( void *, req )
+ __field( struct io_wq_work *, work )
+ __field( unsigned int, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->rw = rw;
+ __entry->req = req;
+ __entry->work = work;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("ring %p, request %p, flags %d, %s queue, work %p",
+ __entry->ctx, __entry->req, __entry->flags,
+ __entry->rw ? "hashed" : "normal", __entry->work)
+);
+
+/**
+ * io_uring_defer_list - called before the io_uring work added into defer_list
+ *
+ * @ctx: pointer to a ring context structure
+ * @req: pointer to a deferred request
+ * @shadow: whether request is shadow or not
+ *
+ * Allows to track deferred requests, to get an insight about what requests are
+ * not started immediately.
+ */
+TRACE_EVENT(io_uring_defer,
+
+ TP_PROTO(void *ctx, void *req, bool shadow),
+
+ TP_ARGS(ctx, req, shadow),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( void *, req )
+ __field( bool, shadow )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->req = req;
+ __entry->shadow = shadow;
+ ),
+
+ TP_printk("ring %p, request %p%s", __entry->ctx, __entry->req,
+ __entry->shadow ? ", shadow": "")
+);
+
+/**
+ * io_uring_link - called before the io_uring request added into link_list of
+ * another request
+ *
+ * @ctx: pointer to a ring context structure
+ * @req: pointer to a linked request
+ * @target_req: pointer to a previous request, that would contain @req
+ *
+ * Allows to track linked requests, to understand dependencies between requests
+ * and how does it influence their execution flow.
+ */
+TRACE_EVENT(io_uring_link,
+
+ TP_PROTO(void *ctx, void *req, void *target_req),
+
+ TP_ARGS(ctx, req, target_req),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( void *, req )
+ __field( void *, target_req )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->req = req;
+ __entry->target_req = target_req;
+ ),
+
+ TP_printk("ring %p, request %p linked after %p",
+ __entry->ctx, __entry->req, __entry->target_req)
+);
+
+/**
+ * io_uring_cqring_wait - called before start waiting for an available CQE
+ *
+ * @ctx: pointer to a ring context structure
+ * @min_events: minimal number of events to wait for
+ *
+ * Allows to track waiting for CQE, so that we can e.g. troubleshoot
+ * situations, when an application wants to wait for an event, that never
+ * comes.
+ */
+TRACE_EVENT(io_uring_cqring_wait,
+
+ TP_PROTO(void *ctx, int min_events),
+
+ TP_ARGS(ctx, min_events),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( int, min_events )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->min_events = min_events;
+ ),
+
+ TP_printk("ring %p, min_events %d", __entry->ctx, __entry->min_events)
+);
+
+/**
+ * io_uring_fail_link - called before failing a linked request
+ *
+ * @req: request, which links were cancelled
+ * @link: cancelled link
+ *
+ * Allows to track linked requests cancellation, to see not only that some work
+ * was cancelled, but also which request was the reason.
+ */
+TRACE_EVENT(io_uring_fail_link,
+
+ TP_PROTO(void *req, void *link),
+
+ TP_ARGS(req, link),
+
+ TP_STRUCT__entry (
+ __field( void *, req )
+ __field( void *, link )
+ ),
+
+ TP_fast_assign(
+ __entry->req = req;
+ __entry->link = link;
+ ),
+
+ TP_printk("request %p, link %p", __entry->req, __entry->link)
+);
+
+/**
+ * io_uring_complete - called when completing an SQE
+ *
+ * @ctx: pointer to a ring context structure
+ * @user_data: user data associated with the request
+ * @res: result of the request
+ *
+ */
+TRACE_EVENT(io_uring_complete,
+
+ TP_PROTO(void *ctx, u64 user_data, long res),
+
+ TP_ARGS(ctx, user_data, res),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( u64, user_data )
+ __field( long, res )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->user_data = user_data;
+ __entry->res = res;
+ ),
+
+ TP_printk("ring %p, user_data 0x%llx, result %ld",
+ __entry->ctx, (unsigned long long)__entry->user_data,
+ __entry->res)
+);
+
+
+/**
+ * io_uring_submit_sqe - called before submitting one SQE
+ *
+ * @ctx: pointer to a ring context structure
+ * @user_data: user data associated with the request
+ * @force_nonblock: whether a context blocking or not
+ * @sq_thread: true if sq_thread has submitted this SQE
+ *
+ * Allows to track SQE submitting, to understand what was the source of it, SQ
+ * thread or io_uring_enter call.
+ */
+TRACE_EVENT(io_uring_submit_sqe,
+
+ TP_PROTO(void *ctx, u64 user_data, bool force_nonblock, bool sq_thread),
+
+ TP_ARGS(ctx, user_data, force_nonblock, sq_thread),
+
+ TP_STRUCT__entry (
+ __field( void *, ctx )
+ __field( u64, user_data )
+ __field( bool, force_nonblock )
+ __field( bool, sq_thread )
+ ),
+
+ TP_fast_assign(
+ __entry->ctx = ctx;
+ __entry->user_data = user_data;
+ __entry->force_nonblock = force_nonblock;
+ __entry->sq_thread = sq_thread;
+ ),
+
+ TP_printk("ring %p, user data 0x%llx, non block %d, sq_thread %d",
+ __entry->ctx, (unsigned long long) __entry->user_data,
+ __entry->force_nonblock, __entry->sq_thread)
+);
+
+#endif /* _TRACE_IO_URING_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index edc5c887a44c..191fe447f990 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -519,10 +519,10 @@ TRACE_EVENT(rxrpc_local,
);
TRACE_EVENT(rxrpc_peer,
- TP_PROTO(struct rxrpc_peer *peer, enum rxrpc_peer_trace op,
+ TP_PROTO(unsigned int peer_debug_id, enum rxrpc_peer_trace op,
int usage, const void *where),
- TP_ARGS(peer, op, usage, where),
+ TP_ARGS(peer_debug_id, op, usage, where),
TP_STRUCT__entry(
__field(unsigned int, peer )
@@ -532,7 +532,7 @@ TRACE_EVENT(rxrpc_peer,
),
TP_fast_assign(
- __entry->peer = peer->debug_id;
+ __entry->peer = peer_debug_id;
__entry->op = op;
__entry->usage = usage;
__entry->where = where;
@@ -546,10 +546,10 @@ TRACE_EVENT(rxrpc_peer,
);
TRACE_EVENT(rxrpc_conn,
- TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op,
+ TP_PROTO(unsigned int conn_debug_id, enum rxrpc_conn_trace op,
int usage, const void *where),
- TP_ARGS(conn, op, usage, where),
+ TP_ARGS(conn_debug_id, op, usage, where),
TP_STRUCT__entry(
__field(unsigned int, conn )
@@ -559,7 +559,7 @@ TRACE_EVENT(rxrpc_conn,
),
TP_fast_assign(
- __entry->conn = conn->debug_id;
+ __entry->conn = conn_debug_id;
__entry->op = op;
__entry->usage = usage;
__entry->where = where;
@@ -606,10 +606,10 @@ TRACE_EVENT(rxrpc_client,
);
TRACE_EVENT(rxrpc_call,
- TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op,
+ TP_PROTO(unsigned int call_debug_id, enum rxrpc_call_trace op,
int usage, const void *where, const void *aux),
- TP_ARGS(call, op, usage, where, aux),
+ TP_ARGS(call_debug_id, op, usage, where, aux),
TP_STRUCT__entry(
__field(unsigned int, call )
@@ -620,7 +620,7 @@ TRACE_EVENT(rxrpc_call,
),
TP_fast_assign(
- __entry->call = call->debug_id;
+ __entry->call = call_debug_id;
__entry->op = op;
__entry->usage = usage;
__entry->where = where;
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index a0c4b8a30966..51fe9f6719eb 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -82,7 +82,7 @@ TRACE_EVENT(sock_rcvqueue_full,
TP_fast_assign(
__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
__entry->truesize = skb->truesize;
- __entry->sk_rcvbuf = sk->sk_rcvbuf;
+ __entry->sk_rcvbuf = READ_ONCE(sk->sk_rcvbuf);
),
TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d",
@@ -115,7 +115,7 @@ TRACE_EVENT(sock_exceed_buf_limit,
__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
__entry->sysctl_wmem = sk_get_wmem0(sk, prot);
__entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc);
- __entry->wmem_queued = sk->sk_wmem_queued;
+ __entry->wmem_queued = READ_ONCE(sk->sk_wmem_queued);
__entry->kind = kind;
),
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 2bc9960a31aa..cf97f6339acb 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -86,7 +86,7 @@ DECLARE_EVENT_CLASS(tcp_event_sk_skb,
sk->sk_v6_rcv_saddr, sk->sk_v6_daddr);
),
- TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s\n",
+ TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c state=%s",
__entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
__entry->saddr_v6, __entry->daddr_v6,
show_tcp_state_name(__entry->state))
diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h
index b048694070e2..37342a13c9cb 100644
--- a/include/trace/events/wbt.h
+++ b/include/trace/events/wbt.h
@@ -33,7 +33,8 @@ TRACE_EVENT(wbt_stat,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strlcpy(__entry->name, dev_name(bdi->dev),
+ ARRAY_SIZE(__entry->name));
__entry->rmean = stat[0].mean;
__entry->rmin = stat[0].min;
__entry->rmax = stat[0].max;
@@ -67,7 +68,8 @@ TRACE_EVENT(wbt_lat,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strlcpy(__entry->name, dev_name(bdi->dev),
+ ARRAY_SIZE(__entry->name));
__entry->lat = div_u64(lat, 1000);
),
@@ -103,7 +105,8 @@ TRACE_EVENT(wbt_step,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strlcpy(__entry->name, dev_name(bdi->dev),
+ ARRAY_SIZE(__entry->name));
__entry->msg = msg;
__entry->step = step;
__entry->window = div_u64(window, 1000);
@@ -138,7 +141,8 @@ TRACE_EVENT(wbt_timer,
),
TP_fast_assign(
- strncpy(__entry->name, dev_name(bdi->dev), 32);
+ strlcpy(__entry->name, dev_name(bdi->dev),
+ ARRAY_SIZE(__entry->name));
__entry->status = status;
__entry->step = step;
__entry->inflight = inflight;
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index 498eec813494..0cdef67135f0 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -120,9 +120,11 @@ struct blk_zone_report {
};
/**
- * struct blk_zone_range - BLKRESETZONE ioctl request
- * @sector: starting sector of the first zone to issue reset write pointer
- * @nr_sectors: Total number of sectors of 1 or more zones to reset
+ * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/
+ * BLKCLOSEZONE/BLKFINISHZONE ioctl
+ * requests
+ * @sector: Starting sector of the first zone to operate on.
+ * @nr_sectors: Total number of sectors of all zones to operate on.
*/
struct blk_zone_range {
__u64 sector;
@@ -139,10 +141,19 @@ struct blk_zone_range {
* sector range. The sector range must be zone aligned.
* @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
* @BLKGETNRZONES: Get the total number of zones of the device.
+ * @BLKOPENZONE: Open the zones in the specified sector range.
+ * The 512 B sector range must be zone aligned.
+ * @BLKCLOSEZONE: Close the zones in the specified sector range.
+ * The 512 B sector range must be zone aligned.
+ * @BLKFINISHZONE: Mark the zones as full in the specified sector range.
+ * The 512 B sector range must be zone aligned.
*/
#define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report)
#define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range)
#define BLKGETZONESZ _IOR(0x12, 132, __u32)
#define BLKGETNRZONES _IOR(0x12, 133, __u32)
+#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
+#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
+#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
#endif /* _UAPI_BLKZONED_H */
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 3ee0678c0a83..7a8bc8b920f5 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -270,6 +270,7 @@ struct btrfs_ioctl_fs_info_args {
#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10)
+#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
struct btrfs_ioctl_feature_flags {
__u64 compat_flags;
@@ -831,7 +832,9 @@ enum btrfs_err_code {
BTRFS_ERROR_DEV_TGT_REPLACE,
BTRFS_ERROR_DEV_MISSING_NOT_FOUND,
BTRFS_ERROR_DEV_ONLY_WRITABLE,
- BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS
+ BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS,
+ BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
+ BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
};
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index b65c7ee75bc7..8e322e2c7e78 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -302,6 +302,9 @@
/* csum types */
enum btrfs_csum_type {
BTRFS_CSUM_TYPE_CRC32 = 0,
+ BTRFS_CSUM_TYPE_XXHASH = 1,
+ BTRFS_CSUM_TYPE_SHA256 = 2,
+ BTRFS_CSUM_TYPE_BLAKE2 = 3,
};
/*
@@ -737,10 +740,12 @@ struct btrfs_balance_item {
__le64 unused[4];
} __attribute__ ((__packed__));
-#define BTRFS_FILE_EXTENT_INLINE 0
-#define BTRFS_FILE_EXTENT_REG 1
-#define BTRFS_FILE_EXTENT_PREALLOC 2
-#define BTRFS_FILE_EXTENT_TYPES 2
+enum {
+ BTRFS_FILE_EXTENT_INLINE = 0,
+ BTRFS_FILE_EXTENT_REG = 1,
+ BTRFS_FILE_EXTENT_PREALLOC = 2,
+ BTRFS_NR_FILE_EXTENT_TYPES = 3,
+};
struct btrfs_file_extent_item {
/*
@@ -836,6 +841,8 @@ struct btrfs_dev_replace_item {
#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7)
#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
+#define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9)
+#define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10)
#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
BTRFS_SPACE_INFO_GLOBAL_RSV)
@@ -847,6 +854,8 @@ enum btrfs_raid_types {
BTRFS_RAID_SINGLE,
BTRFS_RAID_RAID5,
BTRFS_RAID_RAID6,
+ BTRFS_RAID_RAID1C3,
+ BTRFS_RAID_RAID1C4,
BTRFS_NR_RAID_TYPES
};
@@ -856,6 +865,8 @@ enum btrfs_raid_types {
#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID1 | \
+ BTRFS_BLOCK_GROUP_RAID1C3 | \
+ BTRFS_BLOCK_GROUP_RAID1C4 | \
BTRFS_BLOCK_GROUP_RAID5 | \
BTRFS_BLOCK_GROUP_RAID6 | \
BTRFS_BLOCK_GROUP_DUP | \
@@ -863,7 +874,9 @@ enum btrfs_raid_types {
#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
BTRFS_BLOCK_GROUP_RAID6)
-#define BTRFS_BLOCK_GROUP_RAID1_MASK (BTRFS_BLOCK_GROUP_RAID1)
+#define BTRFS_BLOCK_GROUP_RAID1_MASK (BTRFS_BLOCK_GROUP_RAID1 | \
+ BTRFS_BLOCK_GROUP_RAID1C3 | \
+ BTRFS_BLOCK_GROUP_RAID1C4)
/*
* We need a bit for restriper to be able to tell when chunks of type
diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 1e988fdeba34..6a6d2c7655ff 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */
/*
* linux/can.h
*
diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h
index 0fb328d93148..dd2b925b09ac 100644
--- a/include/uapi/linux/can/bcm.h
+++ b/include/uapi/linux/can/bcm.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */
/*
* linux/can/bcm.h
*
diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h
index bfc4b5d22a5e..34633283de64 100644
--- a/include/uapi/linux/can/error.h
+++ b/include/uapi/linux/can/error.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */
/*
* linux/can/error.h
*
diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h
index 3aea5388c8e4..c2190bbe21d8 100644
--- a/include/uapi/linux/can/gw.h
+++ b/include/uapi/linux/can/gw.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */
/*
* linux/can/gw.h
*
diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h
index c32325342d30..df6e821075c1 100644
--- a/include/uapi/linux/can/j1939.h
+++ b/include/uapi/linux/can/j1939.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* j1939.h
*
diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 1bc70d3a4d39..6f598b73839e 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* linux/can/netlink.h
*
diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h
index be3b36e7ff61..6a11d308eb5c 100644
--- a/include/uapi/linux/can/raw.h
+++ b/include/uapi/linux/can/raw.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0-only WITH Linux-syscall-note) OR BSD-3-Clause) */
/*
* linux/can/raw.h
*
diff --git a/include/uapi/linux/can/vxcan.h b/include/uapi/linux/can/vxcan.h
index 066812d118a2..4fa9d8777a07 100644
--- a/include/uapi/linux/can/vxcan.h
+++ b/include/uapi/linux/can/vxcan.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
#ifndef _UAPI_CAN_VXCAN_H
#define _UAPI_CAN_VXCAN_H
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 580b7a2e40e1..a8a2174db030 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -421,6 +421,7 @@ enum devlink_attr {
DEVLINK_ATTR_RELOAD_FAILED, /* u8 0 or 1 */
+ DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS, /* u64 */
/* add new attributes above here, update the policy in devlink.c */
__DEVLINK_ATTR_MAX,
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 1d338357df8a..1f97b33c840e 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -58,7 +58,7 @@
* Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
* used to clear any hints previously set.
*/
-#define RWF_WRITE_LIFE_NOT_SET 0
+#define RWH_WRITE_LIFE_NOT_SET 0
#define RWH_WRITE_LIFE_NONE 1
#define RWH_WRITE_LIFE_SHORT 2
#define RWH_WRITE_LIFE_MEDIUM 3
@@ -66,6 +66,13 @@
#define RWH_WRITE_LIFE_EXTREME 5
/*
+ * The originally introduced spelling is remained from the first
+ * versions of the patch set that introduced the feature, see commit
+ * v4.13-rc1~212^2~51.
+ */
+#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
+
+/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */
diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h
index 39ccfe9311c3..1beb174ad950 100644
--- a/include/uapi/linux/fscrypt.h
+++ b/include/uapi/linux/fscrypt.h
@@ -17,7 +17,8 @@
#define FSCRYPT_POLICY_FLAGS_PAD_32 0x03
#define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03
#define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04
-#define FSCRYPT_POLICY_FLAGS_VALID 0x07
+#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08
+#define FSCRYPT_POLICY_FLAGS_VALID 0x0F
/* Encryption algorithms */
#define FSCRYPT_MODE_AES_256_XTS 1
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 802b0377a49e..373cada89815 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -38,6 +38,43 @@
*
* Protocol changelog:
*
+ * 7.1:
+ * - add the following messages:
+ * FUSE_SETATTR, FUSE_SYMLINK, FUSE_MKNOD, FUSE_MKDIR, FUSE_UNLINK,
+ * FUSE_RMDIR, FUSE_RENAME, FUSE_LINK, FUSE_OPEN, FUSE_READ, FUSE_WRITE,
+ * FUSE_RELEASE, FUSE_FSYNC, FUSE_FLUSH, FUSE_SETXATTR, FUSE_GETXATTR,
+ * FUSE_LISTXATTR, FUSE_REMOVEXATTR, FUSE_OPENDIR, FUSE_READDIR,
+ * FUSE_RELEASEDIR
+ * - add padding to messages to accommodate 32-bit servers on 64-bit kernels
+ *
+ * 7.2:
+ * - add FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE flags
+ * - add FUSE_FSYNCDIR message
+ *
+ * 7.3:
+ * - add FUSE_ACCESS message
+ * - add FUSE_CREATE message
+ * - add filehandle to fuse_setattr_in
+ *
+ * 7.4:
+ * - add frsize to fuse_kstatfs
+ * - clean up request size limit checking
+ *
+ * 7.5:
+ * - add flags and max_write to fuse_init_out
+ *
+ * 7.6:
+ * - add max_readahead to fuse_init_in and fuse_init_out
+ *
+ * 7.7:
+ * - add FUSE_INTERRUPT message
+ * - add POSIX file lock support
+ *
+ * 7.8:
+ * - add lock_owner and flags fields to fuse_release_in
+ * - add FUSE_BMAP message
+ * - add FUSE_DESTROY message
+ *
* 7.9:
* - new fuse_getattr_in input argument of GETATTR
* - add lk_flags in fuse_lk_in
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ea57526a5b89..2a1569211d87 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -19,7 +19,10 @@ struct io_uring_sqe {
__u8 flags; /* IOSQE_ flags */
__u16 ioprio; /* ioprio for the request */
__s32 fd; /* file descriptor to do IO on */
- __u64 off; /* offset into file */
+ union {
+ __u64 off; /* offset into file */
+ __u64 addr2;
+ };
__u64 addr; /* pointer to buffer or iovecs */
__u32 len; /* buffer size or number of iovecs */
union {
@@ -29,6 +32,8 @@ struct io_uring_sqe {
__u32 sync_range_flags;
__u32 msg_flags;
__u32 timeout_flags;
+ __u32 accept_flags;
+ __u32 cancel_flags;
};
__u64 user_data; /* data to be passed back at completion time */
union {
@@ -50,6 +55,7 @@ struct io_uring_sqe {
#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
+#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
#define IORING_OP_NOP 0
#define IORING_OP_READV 1
@@ -63,6 +69,10 @@ struct io_uring_sqe {
#define IORING_OP_SENDMSG 9
#define IORING_OP_RECVMSG 10
#define IORING_OP_TIMEOUT 11
+#define IORING_OP_TIMEOUT_REMOVE 12
+#define IORING_OP_ACCEPT 13
+#define IORING_OP_ASYNC_CANCEL 14
+#define IORING_OP_LINK_TIMEOUT 15
/*
* sqe->fsync_flags
@@ -70,6 +80,11 @@ struct io_uring_sqe {
#define IORING_FSYNC_DATASYNC (1U << 0)
/*
+ * sqe->timeout_flags
+ */
+#define IORING_TIMEOUT_ABS (1U << 0)
+
+/*
* IO completion data structure (Completion Queue Entry)
*/
struct io_uring_cqe {
@@ -140,6 +155,7 @@ struct io_uring_params {
* io_uring_params->features flags
*/
#define IORING_FEAT_SINGLE_MMAP (1U << 0)
+#define IORING_FEAT_NODROP (1U << 1)
/*
* io_uring_register(2) opcodes and arguments
@@ -150,5 +166,11 @@ struct io_uring_params {
#define IORING_UNREGISTER_FILES 3
#define IORING_REGISTER_EVENTFD 4
#define IORING_UNREGISTER_EVENTFD 5
+#define IORING_REGISTER_FILES_UPDATE 6
+
+struct io_uring_files_update {
+ __u32 offset;
+ __s32 *fds;
+};
#endif
diff --git a/include/uapi/linux/nvme_ioctl.h b/include/uapi/linux/nvme_ioctl.h
index e168dc59e9a0..d99b5a772698 100644
--- a/include/uapi/linux/nvme_ioctl.h
+++ b/include/uapi/linux/nvme_ioctl.h
@@ -63,6 +63,7 @@ struct nvme_passthru_cmd64 {
__u32 cdw14;
__u32 cdw15;
__u32 timeout_ms;
+ __u32 rsvd2;
__u64 result;
};
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index 59e89a1bc3bb..9dc9d0079e98 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -31,13 +31,16 @@
#define PTP_ENABLE_FEATURE (1<<0)
#define PTP_RISING_EDGE (1<<1)
#define PTP_FALLING_EDGE (1<<2)
+#define PTP_STRICT_FLAGS (1<<3)
+#define PTP_EXTTS_EDGES (PTP_RISING_EDGE | PTP_FALLING_EDGE)
/*
* flag fields valid for the new PTP_EXTTS_REQUEST2 ioctl.
*/
#define PTP_EXTTS_VALID_FLAGS (PTP_ENABLE_FEATURE | \
PTP_RISING_EDGE | \
- PTP_FALLING_EDGE)
+ PTP_FALLING_EDGE | \
+ PTP_STRICT_FLAGS)
/*
* flag fields valid for the original PTP_EXTTS_REQUEST ioctl.
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 99335e1f4a27..25b4fa00bad1 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -51,6 +51,10 @@
* sent when the child exits.
* @stack: Specify the location of the stack for the
* child process.
+ * Note, @stack is expected to point to the
+ * lowest address. The stack direction will be
+ * determined by the kernel and set up
+ * appropriately based on @stack_size.
* @stack_size: The size of the stack for the child process.
* @tls: If CLONE_SETTLS is set, the tls descriptor
* is set to tls.
diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
index c6d035fa1b6c..6f5af1a84213 100644
--- a/include/uapi/linux/sed-opal.h
+++ b/include/uapi/linux/sed-opal.h
@@ -113,6 +113,25 @@ struct opal_shadow_mbr {
__u64 size;
};
+/* Opal table operations */
+enum opal_table_ops {
+ OPAL_READ_TABLE,
+ OPAL_WRITE_TABLE,
+};
+
+#define OPAL_UID_LENGTH 8
+struct opal_read_write_table {
+ struct opal_key key;
+ const __u64 data;
+ const __u8 table_uid[OPAL_UID_LENGTH];
+ __u64 offset;
+ __u64 size;
+#define OPAL_TABLE_READ (1 << OPAL_READ_TABLE)
+#define OPAL_TABLE_WRITE (1 << OPAL_WRITE_TABLE)
+ __u64 flags;
+ __u64 priv;
+};
+
#define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock)
#define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock)
#define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key)
@@ -128,5 +147,6 @@ struct opal_shadow_mbr {
#define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key)
#define IOC_OPAL_MBR_DONE _IOW('p', 233, struct opal_mbr_done)
#define IOC_OPAL_WRITE_SHADOW_MBR _IOW('p', 234, struct opal_shadow_mbr)
+#define IOC_OPAL_GENERIC_TABLE_RW _IOW('p', 235, struct opal_read_write_table)
#endif /* _UAPI_SED_OPAL_H */
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index 0f4f87a6fd54..e7fe550b6038 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -291,6 +291,6 @@
#define PORT_SUNIX 121
/* Freescale Linflex UART */
-#define PORT_LINFLEXUART 121
+#define PORT_LINFLEXUART 122
#endif /* _UAPILINUX_SERIAL_CORE_H */
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 7b35e98d3c58..ad80a5c885d5 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -167,8 +167,8 @@ struct statx {
#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
-
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
+#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/init/Kconfig b/init/Kconfig
index b4daad2bac23..4d8d145c41d2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1548,6 +1548,7 @@ config AIO
config IO_URING
bool "Enable IO uring support" if EXPERT
select ANON_INODES
+ select IO_WQ
default y
help
This option enables support for the io_uring interface, enabling
diff --git a/init/initramfs.c b/init/initramfs.c
index c47dad0884f7..8ec1be4d7d51 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -10,6 +10,7 @@
#include <linux/syscalls.h>
#include <linux/utime.h>
#include <linux/file.h>
+#include <linux/memblock.h>
static ssize_t __init xwrite(int fd, const char *p, size_t count)
{
@@ -529,6 +530,13 @@ extern unsigned long __initramfs_size;
void __weak free_initrd_mem(unsigned long start, unsigned long end)
{
+#ifdef CONFIG_ARCH_KEEP_MEMBLOCK
+ unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE);
+ unsigned long aligned_end = ALIGN(end, PAGE_SIZE);
+
+ memblock_free(__pa(aligned_start), aligned_end - aligned_start);
+#endif
+
free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
"initrd");
}
diff --git a/kernel/Makefile b/kernel/Makefile
index daad787fb795..f0902a7bd1b3 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -115,6 +115,8 @@ obj-$(CONFIG_TORTURE_TEST) += torture.o
obj-$(CONFIG_HAS_IOMEM) += iomem.o
obj-$(CONFIG_RSEQ) += rseq.o
+obj-$(CONFIG_SYSCTL_KUNIT_TEST) += sysctl-test.o
+
obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
KASAN_SANITIZE_stackleak.o := n
KCOV_INSTRUMENT_stackleak.o := n
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 1f31c2f1e6fc..4508d5e0cf69 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -351,12 +351,12 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
- inode_unlock(d_backing_inode(parent->dentry));
if (d_is_positive(d)) {
/* update watch filter fields */
watch->dev = d->d_sb->s_dev;
watch->ino = d_backing_inode(d)->i_ino;
}
+ inode_unlock(d_backing_inode(parent->dentry));
dput(d);
return 0;
}
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index ddd8addcdb5c..a3eaf08e7dd3 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1311,12 +1311,12 @@ static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
return false;
switch (off) {
- case offsetof(struct bpf_sysctl, write):
+ case bpf_ctx_range(struct bpf_sysctl, write):
if (type != BPF_READ)
return false;
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
- case offsetof(struct bpf_sysctl, file_pos):
+ case bpf_ctx_range(struct bpf_sysctl, file_pos):
if (type == BPF_READ) {
bpf_ctx_record_field_size(info, size_default);
return bpf_ctx_narrow_access_ok(off, size, size_default);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 66088a9e9b9e..ef0e1e3e66f4 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -502,7 +502,7 @@ int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt)
return WARN_ON_ONCE(bpf_adj_branches(prog, off, off + cnt, off, false));
}
-void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
+static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
{
int i;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index d27f3b60ff6d..3867864cdc2f 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -128,7 +128,7 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
if (!dtab->n_buckets) /* Overflow check */
return -EINVAL;
- cost += sizeof(struct hlist_head) * dtab->n_buckets;
+ cost += (u64) sizeof(struct hlist_head) * dtab->n_buckets;
}
/* if map size is larger than memlock limit, reject it */
@@ -719,6 +719,32 @@ const struct bpf_map_ops dev_map_hash_ops = {
.map_check_btf = map_check_no_btf,
};
+static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,
+ struct net_device *netdev)
+{
+ unsigned long flags;
+ u32 i;
+
+ spin_lock_irqsave(&dtab->index_lock, flags);
+ for (i = 0; i < dtab->n_buckets; i++) {
+ struct bpf_dtab_netdev *dev;
+ struct hlist_head *head;
+ struct hlist_node *next;
+
+ head = dev_map_index_hash(dtab, i);
+
+ hlist_for_each_entry_safe(dev, next, head, index_hlist) {
+ if (netdev != dev->dev)
+ continue;
+
+ dtab->items--;
+ hlist_del_rcu(&dev->index_hlist);
+ call_rcu(&dev->rcu, __dev_map_entry_free);
+ }
+ }
+ spin_unlock_irqrestore(&dtab->index_lock, flags);
+}
+
static int dev_map_notification(struct notifier_block *notifier,
ulong event, void *ptr)
{
@@ -735,6 +761,11 @@ static int dev_map_notification(struct notifier_block *notifier,
*/
rcu_read_lock();
list_for_each_entry_rcu(dtab, &dev_map_list, list) {
+ if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
+ dev_map_hash_remove_netdev(dtab, netdev);
+ continue;
+ }
+
for (i = 0; i < dtab->map.max_entries; i++) {
struct bpf_dtab_netdev *dev, *odev;
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index ba635209ae9a..5b9da0954a27 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -678,8 +678,10 @@ bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv)
down_write(&bpf_devs_lock);
if (!offdevs_inited) {
err = rhashtable_init(&offdevs, &offdevs_params);
- if (err)
+ if (err) {
+ up_write(&bpf_devs_lock);
return ERR_PTR(err);
+ }
offdevs_inited = true;
}
up_write(&bpf_devs_lock);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 82eabd4e38ad..ace1cfaa24b6 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -126,7 +126,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
}
-void *bpf_map_area_alloc(size_t size, int numa_node)
+void *bpf_map_area_alloc(u64 size, int numa_node)
{
/* We really just want to fail instead of triggering OOM killer
* under memory pressure, therefore we set __GFP_NORETRY to kmalloc,
@@ -141,6 +141,9 @@ void *bpf_map_area_alloc(size_t size, int numa_node)
const gfp_t flags = __GFP_NOWARN | __GFP_ZERO;
void *area;
+ if (size >= SIZE_MAX)
+ return NULL;
+
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags,
numa_node);
@@ -197,7 +200,7 @@ static void bpf_uncharge_memlock(struct user_struct *user, u32 pages)
atomic_long_sub(pages, &user->locked_vm);
}
-int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size)
+int bpf_map_charge_init(struct bpf_map_memory *mem, u64 size)
{
u32 pages = round_up(size, PAGE_SIZE) >> PAGE_SHIFT;
struct user_struct *user;
@@ -1326,24 +1329,32 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
{
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
+ kvfree(aux->func_info);
free_used_maps(aux);
bpf_prog_uncharge_memlock(aux->prog);
security_bpf_prog_free(aux);
bpf_prog_free(aux->prog);
}
+static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
+{
+ bpf_prog_kallsyms_del_all(prog);
+ btf_put(prog->aux->btf);
+ bpf_prog_free_linfo(prog);
+
+ if (deferred)
+ call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+ else
+ __bpf_prog_put_rcu(&prog->aux->rcu);
+}
+
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
if (atomic_dec_and_test(&prog->aux->refcnt)) {
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
- bpf_prog_kallsyms_del_all(prog);
- btf_put(prog->aux->btf);
- kvfree(prog->aux->func_info);
- bpf_prog_free_linfo(prog);
-
- call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
+ __bpf_prog_put_noref(prog, true);
}
}
@@ -1741,11 +1752,12 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
return err;
free_used_maps:
- bpf_prog_free_linfo(prog);
- kvfree(prog->aux->func_info);
- btf_put(prog->aux->btf);
- bpf_prog_kallsyms_del_subprogs(prog);
- free_used_maps(prog->aux);
+ /* In case we have subprogs, we need to wait for a grace
+ * period before we can tear down JIT memory since symbols
+ * are already exposed under kallsyms.
+ */
+ __bpf_prog_put_noref(prog, prog->aux->func_cnt);
+ return err;
free_prog:
bpf_prog_uncharge_memlock(prog);
free_prog_sec:
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 080561bb8a4b..ef4242e5d4bc 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2119,11 +2119,12 @@ int cgroup_do_get_tree(struct fs_context *fc)
nsdentry = kernfs_node_dentry(cgrp->kn, sb);
dput(fc->root);
- fc->root = nsdentry;
if (IS_ERR(nsdentry)) {
- ret = PTR_ERR(nsdentry);
deactivate_locked_super(sb);
+ ret = PTR_ERR(nsdentry);
+ nsdentry = NULL;
}
+ fc->root = nsdentry;
}
if (!ctx->kfc.new_sb_created)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index c52bc91f882b..c87ee6412b36 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -798,7 +798,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus))
continue;
- if (is_sched_load_balance(cp))
+ if (is_sched_load_balance(cp) &&
+ !cpumask_empty(cp->effective_cpus))
csa[csn++] = cp;
/* skip @cp's subtree if not a partition root */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index fc28e17940e0..e2cad3ee2ead 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2373,7 +2373,18 @@ void __init boot_cpu_hotplug_init(void)
this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
}
-enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
+/*
+ * These are used for a global "mitigations=" cmdline option for toggling
+ * optional CPU mitigations.
+ */
+enum cpu_mitigations {
+ CPU_MITIGATIONS_OFF,
+ CPU_MITIGATIONS_AUTO,
+ CPU_MITIGATIONS_AUTO_NOSMT,
+};
+
+static enum cpu_mitigations cpu_mitigations __ro_after_init =
+ CPU_MITIGATIONS_AUTO;
static int __init mitigations_parse_cmdline(char *arg)
{
@@ -2390,3 +2401,17 @@ static int __init mitigations_parse_cmdline(char *arg)
return 0;
}
early_param("mitigations", mitigations_parse_cmdline);
+
+/* mitigations=off */
+bool cpu_mitigations_off(void)
+{
+ return cpu_mitigations == CPU_MITIGATIONS_OFF;
+}
+EXPORT_SYMBOL_GPL(cpu_mitigations_off);
+
+/* mitigations=auto,nosmt */
+bool cpu_mitigations_auto_nosmt(void)
+{
+ return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
+}
+EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 8402b29c280f..0b67c04e531b 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -16,12 +16,11 @@
#include <linux/swiotlb.h>
/*
- * Most architectures use ZONE_DMA for the first 16 Megabytes, but
- * some use it for entirely different regions:
+ * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
+ * it for entirely different regions. In that case the arch code needs to
+ * override the variable below for dma-direct to work properly.
*/
-#ifndef ARCH_ZONE_DMA_BITS
-#define ARCH_ZONE_DMA_BITS 24
-#endif
+unsigned int zone_dma_bits __ro_after_init = 24;
static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size)
{
@@ -69,7 +68,7 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
* Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
* zones.
*/
- if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+ if (*phys_mask <= DMA_BIT_MASK(zone_dma_bits))
return GFP_DMA;
if (*phys_mask <= DMA_BIT_MASK(32))
return GFP_DMA32;
@@ -395,7 +394,7 @@ int dma_direct_supported(struct device *dev, u64 mask)
u64 min_mask;
if (IS_ENABLED(CONFIG_ZONE_DMA))
- min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS);
+ min_mask = DMA_BIT_MASK(zone_dma_bits);
else
min_mask = DMA_BIT_MASK(32);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3f0cb82e4fbc..00a014670ed0 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1031,7 +1031,7 @@ perf_cgroup_set_timestamp(struct task_struct *task,
{
}
-void
+static inline void
perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
{
}
@@ -3779,11 +3779,23 @@ static void rotate_ctx(struct perf_event_context *ctx, struct perf_event *event)
perf_event_groups_insert(&ctx->flexible_groups, event);
}
+/* pick an event from the flexible_groups to rotate */
static inline struct perf_event *
-ctx_first_active(struct perf_event_context *ctx)
+ctx_event_to_rotate(struct perf_event_context *ctx)
{
- return list_first_entry_or_null(&ctx->flexible_active,
- struct perf_event, active_list);
+ struct perf_event *event;
+
+ /* pick the first active flexible event */
+ event = list_first_entry_or_null(&ctx->flexible_active,
+ struct perf_event, active_list);
+
+ /* if no active flexible event, pick the first event */
+ if (!event) {
+ event = rb_entry_safe(rb_first(&ctx->flexible_groups.tree),
+ typeof(*event), group_node);
+ }
+
+ return event;
}
static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
@@ -3808,9 +3820,9 @@ static bool perf_rotate_context(struct perf_cpu_context *cpuctx)
perf_pmu_disable(cpuctx->ctx.pmu);
if (task_rotate)
- task_event = ctx_first_active(task_ctx);
+ task_event = ctx_event_to_rotate(task_ctx);
if (cpu_rotate)
- cpu_event = ctx_first_active(&cpuctx->ctx);
+ cpu_event = ctx_event_to_rotate(&cpuctx->ctx);
/*
* As per the order given at ctx_resched() first 'pop' task flexible
@@ -5595,8 +5607,10 @@ static void perf_mmap_close(struct vm_area_struct *vma)
perf_pmu_output_stop(event);
/* now it's safe to free the pages */
- atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
- atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
+ if (!rb->aux_mmap_locked)
+ atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
+ else
+ atomic64_sub(rb->aux_mmap_locked, &vma->vm_mm->pinned_vm);
/* this has to be the last one */
rb_free_aux(rb);
@@ -5668,7 +5682,8 @@ again:
* undo the VM accounting.
*/
- atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
+ atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
+ &mmap_user->locked_vm);
atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
free_uid(mmap_user);
@@ -5812,8 +5827,20 @@ accounting:
user_locked = atomic_long_read(&user->locked_vm) + user_extra;
- if (user_locked > user_lock_limit)
+ if (user_locked <= user_lock_limit) {
+ /* charge all to locked_vm */
+ } else if (atomic_long_read(&user->locked_vm) >= user_lock_limit) {
+ /* charge all to pinned_vm */
+ extra = user_extra;
+ user_extra = 0;
+ } else {
+ /*
+ * charge locked_vm until it hits user_lock_limit;
+ * charge the rest from pinned_vm
+ */
extra = user_locked - user_lock_limit;
+ user_extra -= extra;
+ }
lock_limit = rlimit(RLIMIT_MEMLOCK);
lock_limit >>= PAGE_SHIFT;
@@ -6922,7 +6949,7 @@ static void __perf_event_output_stop(struct perf_event *event, void *data)
static int __perf_pmu_output_stop(void *info)
{
struct perf_event *event = info;
- struct pmu *pmu = event->pmu;
+ struct pmu *pmu = event->ctx->pmu;
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
struct remote_output ro = {
.rb = event->rb,
@@ -10508,6 +10535,15 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
goto err_ns;
}
+ /*
+ * Disallow uncore-cgroup events, they don't make sense as the cgroup will
+ * be different on other CPUs in the uncore mask.
+ */
+ if (pmu->task_ctx_nr == perf_invalid_context && cgroup_fd != -1) {
+ err = -EINVAL;
+ goto err_pmu;
+ }
+
if (event->attr.aux_output &&
!(pmu->capabilities & PERF_PMU_CAP_AUX_OUTPUT)) {
err = -EOPNOTSUPP;
@@ -10608,7 +10644,7 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
attr->size = size;
- if (attr->__reserved_1)
+ if (attr->__reserved_1 || attr->__reserved_2)
return -EINVAL;
if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
@@ -11296,8 +11332,11 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
int err;
/*
- * Get the target context (task or percpu):
+ * Grouping is not supported for kernel events, neither is 'AUX',
+ * make sure the caller's intentions are adjusted.
*/
+ if (attr->aux_output)
+ return ERR_PTR(-EINVAL);
event = perf_event_alloc(attr, cpu, task, NULL, NULL,
overflow_handler, context, -1);
@@ -11309,6 +11348,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
/* Mark owner so we could distinguish it from user events. */
event->owner = TASK_TOMBSTONE;
+ /*
+ * Get the target context (task or percpu):
+ */
ctx = find_get_context(event->pmu, task, event);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
@@ -11760,7 +11802,7 @@ inherit_event(struct perf_event *parent_event,
GFP_KERNEL);
if (!child_ctx->task_ctx_data) {
free_event(child_event);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
}
@@ -11862,6 +11904,10 @@ static int inherit_group(struct perf_event *parent_event,
child, leader, child_ctx);
if (IS_ERR(child_ctr))
return PTR_ERR(child_ctr);
+
+ if (sub->aux_event == parent_event && child_ctr &&
+ !perf_get_aux_event(child_ctr, leader))
+ return -EINVAL;
}
return 0;
}
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 94d38a39d72e..c74761004ee5 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -474,14 +474,17 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
struct vm_area_struct *vma;
int ret, is_register, ref_ctr_updated = 0;
bool orig_page_huge = false;
+ unsigned int gup_flags = FOLL_FORCE;
is_register = is_swbp_insn(&opcode);
uprobe = container_of(auprobe, struct uprobe, arch);
retry:
+ if (is_register)
+ gup_flags |= FOLL_SPLIT_PMD;
/* Read the page with vaddr into memory */
- ret = get_user_pages_remote(NULL, mm, vaddr, 1,
- FOLL_FORCE | FOLL_SPLIT_PMD, &old_page, &vma, NULL);
+ ret = get_user_pages_remote(NULL, mm, vaddr, 1, gup_flags,
+ &old_page, &vma, NULL);
if (ret <= 0)
return ret;
@@ -489,6 +492,12 @@ retry:
if (ret <= 0)
goto put_old;
+ if (WARN(!is_register && PageCompound(old_page),
+ "uprobe unregister should never work on compound page\n")) {
+ ret = -EINVAL;
+ goto put_old;
+ }
+
/* We are going to replace instruction, update ref_ctr. */
if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
diff --git a/kernel/fork.c b/kernel/fork.c
index 1f6c45f6a734..13b38794efb5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1708,11 +1708,11 @@ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
/*
* Poll support for process exit notification.
*/
-static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
+static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
{
struct task_struct *task;
struct pid *pid = file->private_data;
- int poll_flags = 0;
+ __poll_t poll_flags = 0;
poll_wait(file, &pid->wait_pidfd, pts);
@@ -1724,7 +1724,7 @@ static unsigned int pidfd_poll(struct file *file, struct poll_table_struct *pts)
* group, then poll(2) should block, similar to the wait(2) family.
*/
if (!task || (task->exit_state && thread_group_empty(task)))
- poll_flags = POLLIN | POLLRDNORM;
+ poll_flags = EPOLLIN | EPOLLRDNORM;
rcu_read_unlock();
return poll_flags;
@@ -2561,7 +2561,35 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
return 0;
}
-static bool clone3_args_valid(const struct kernel_clone_args *kargs)
+/**
+ * clone3_stack_valid - check and prepare stack
+ * @kargs: kernel clone args
+ *
+ * Verify that the stack arguments userspace gave us are sane.
+ * In addition, set the stack direction for userspace since it's easy for us to
+ * determine.
+ */
+static inline bool clone3_stack_valid(struct kernel_clone_args *kargs)
+{
+ if (kargs->stack == 0) {
+ if (kargs->stack_size > 0)
+ return false;
+ } else {
+ if (kargs->stack_size == 0)
+ return false;
+
+ if (!access_ok((void __user *)kargs->stack, kargs->stack_size))
+ return false;
+
+#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64)
+ kargs->stack += kargs->stack_size;
+#endif
+ }
+
+ return true;
+}
+
+static bool clone3_args_valid(struct kernel_clone_args *kargs)
{
/*
* All lower bits of the flag word are taken.
@@ -2581,6 +2609,9 @@ static bool clone3_args_valid(const struct kernel_clone_args *kargs)
kargs->exit_signal)
return false;
+ if (!clone3_stack_valid(kargs))
+ return false;
+
return true;
}
@@ -2925,7 +2956,7 @@ int sysctl_max_threads(struct ctl_table *table, int write,
struct ctl_table t;
int ret;
int threads = max_threads;
- int min = MIN_THREADS;
+ int min = 1;
int max = MAX_THREADS;
t = *table;
@@ -2937,7 +2968,7 @@ int sysctl_max_threads(struct ctl_table *table, int write,
if (ret || !write)
return ret;
- set_max_threads(threads);
+ max_threads = threads;
return 0;
}
diff --git a/kernel/freezer.c b/kernel/freezer.c
index c0738424bb43..dc520f01f99d 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -22,12 +22,6 @@ EXPORT_SYMBOL(system_freezing_cnt);
bool pm_freezing;
bool pm_nosig_freezing;
-/*
- * Temporary export for the deadlock workaround in ata_scsi_hotplug().
- * Remove once the hack becomes unnecessary.
- */
-EXPORT_SYMBOL_GPL(pm_freezing);
-
/* protects freezing and frozen transitions */
static DEFINE_SPINLOCK(freezer_lock);
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index aff79e461fc9..5a0fc0b0403a 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -71,10 +71,13 @@ done | cpio --quiet -pd $cpio_dir >/dev/null 2>&1
find $cpio_dir -type f -print0 |
xargs -0 -P8 -n1 perl -pi -e 'BEGIN {undef $/;}; s/\/\*((?!SPDX).)*?\*\///smg;'
-# Create archive and try to normalize metadata for reproducibility
-tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
- --owner=0 --group=0 --sort=name --numeric-owner \
- -Jcf $tarfile -C $cpio_dir/ . > /dev/null
+# Create archive and try to normalize metadata for reproducibility.
+# For compatibility with older versions of tar, files are fed to tar
+# pre-sorted, as --sort=name might not be available.
+find $cpio_dir -printf "./%P\n" | LC_ALL=C sort | \
+ tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
+ --owner=0 --group=0 --numeric-owner --no-recursion \
+ -Jcf $tarfile -C $cpio_dir/ -T - > /dev/null
echo "$src_files_md5" > kernel/kheaders.md5
echo "$obj_files_md5" >> kernel/kheaders.md5
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 132672b74e4b..dd822fd8a7d5 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);
* @type: Type of irqchip_fwnode. See linux/irqdomain.h
* @name: Optional user provided domain name
* @id: Optional user provided id if name != NULL
- * @data: Optional user-provided data
+ * @pa: Optional user-provided physical address
*
* Allocate a struct irqchip_fwid, and return a poiner to the embedded
* fwnode_handle (or NULL on failure).
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 621467c33fef..b262f47046ca 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -866,9 +866,9 @@ void kthread_delayed_work_timer_fn(struct timer_list *t)
}
EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
-void __kthread_queue_delayed_work(struct kthread_worker *worker,
- struct kthread_delayed_work *dwork,
- unsigned long delay)
+static void __kthread_queue_delayed_work(struct kthread_worker *worker,
+ struct kthread_delayed_work *dwork,
+ unsigned long delay)
{
struct timer_list *timer = &dwork->timer;
struct kthread_work *work = &dwork->work;
diff --git a/kernel/module.c b/kernel/module.c
index ff2d7359a418..acf7962936c4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3222,7 +3222,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
/* sechdrs[0].sh_size is always zero */
- mod->ftrace_callsites = section_objs(info, "__mcount_loc",
+ mod->ftrace_callsites = section_objs(info, FTRACE_CALLSITE_SECTION,
sizeof(*mod->ftrace_callsites),
&mod->num_ftrace_callsites);
#endif
diff --git a/kernel/panic.c b/kernel/panic.c
index 47e8ebccc22b..f470a038b05b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -180,6 +180,7 @@ void panic(const char *fmt, ...)
* after setting panic_cpu) from invoking panic() again.
*/
local_irq_disable();
+ preempt_disable_notrace();
/*
* It's possible to come here directly from a panic-assertion and
diff --git a/kernel/power/main.c b/kernel/power/main.c
index e8710d179b35..e26de7af520b 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -15,6 +15,7 @@
#include <linux/seq_file.h>
#include <linux/suspend.h>
#include <linux/syscalls.h>
+#include <linux/pm_runtime.h>
#include "power.h"
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 9568a2fe7c11..a45cba7df0ae 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -650,3 +650,249 @@ static int __init pm_qos_power_init(void)
}
late_initcall(pm_qos_power_init);
+
+/* Definitions related to the frequency QoS below. */
+
+/**
+ * freq_constraints_init - Initialize frequency QoS constraints.
+ * @qos: Frequency QoS constraints to initialize.
+ */
+void freq_constraints_init(struct freq_constraints *qos)
+{
+ struct pm_qos_constraints *c;
+
+ c = &qos->min_freq;
+ plist_head_init(&c->list);
+ c->target_value = FREQ_QOS_MIN_DEFAULT_VALUE;
+ c->default_value = FREQ_QOS_MIN_DEFAULT_VALUE;
+ c->no_constraint_value = FREQ_QOS_MIN_DEFAULT_VALUE;
+ c->type = PM_QOS_MAX;
+ c->notifiers = &qos->min_freq_notifiers;
+ BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers);
+
+ c = &qos->max_freq;
+ plist_head_init(&c->list);
+ c->target_value = FREQ_QOS_MAX_DEFAULT_VALUE;
+ c->default_value = FREQ_QOS_MAX_DEFAULT_VALUE;
+ c->no_constraint_value = FREQ_QOS_MAX_DEFAULT_VALUE;
+ c->type = PM_QOS_MIN;
+ c->notifiers = &qos->max_freq_notifiers;
+ BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers);
+}
+
+/**
+ * freq_qos_read_value - Get frequency QoS constraint for a given list.
+ * @qos: Constraints to evaluate.
+ * @type: QoS request type.
+ */
+s32 freq_qos_read_value(struct freq_constraints *qos,
+ enum freq_qos_req_type type)
+{
+ s32 ret;
+
+ switch (type) {
+ case FREQ_QOS_MIN:
+ ret = IS_ERR_OR_NULL(qos) ?
+ FREQ_QOS_MIN_DEFAULT_VALUE :
+ pm_qos_read_value(&qos->min_freq);
+ break;
+ case FREQ_QOS_MAX:
+ ret = IS_ERR_OR_NULL(qos) ?
+ FREQ_QOS_MAX_DEFAULT_VALUE :
+ pm_qos_read_value(&qos->max_freq);
+ break;
+ default:
+ WARN_ON(1);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/**
+ * freq_qos_apply - Add/modify/remove frequency QoS request.
+ * @req: Constraint request to apply.
+ * @action: Action to perform (add/update/remove).
+ * @value: Value to assign to the QoS request.
+ */
+static int freq_qos_apply(struct freq_qos_request *req,
+ enum pm_qos_req_action action, s32 value)
+{
+ int ret;
+
+ switch(req->type) {
+ case FREQ_QOS_MIN:
+ ret = pm_qos_update_target(&req->qos->min_freq, &req->pnode,
+ action, value);
+ break;
+ case FREQ_QOS_MAX:
+ ret = pm_qos_update_target(&req->qos->max_freq, &req->pnode,
+ action, value);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/**
+ * freq_qos_add_request - Insert new frequency QoS request into a given list.
+ * @qos: Constraints to update.
+ * @req: Preallocated request object.
+ * @type: Request type.
+ * @value: Request value.
+ *
+ * Insert a new entry into the @qos list of requests, recompute the effective
+ * QoS constraint value for that list and initialize the @req object. The
+ * caller needs to save that object for later use in updates and removal.
+ *
+ * Return 1 if the effective constraint value has changed, 0 if the effective
+ * constraint value has not changed, or a negative error code on failures.
+ */
+int freq_qos_add_request(struct freq_constraints *qos,
+ struct freq_qos_request *req,
+ enum freq_qos_req_type type, s32 value)
+{
+ int ret;
+
+ if (IS_ERR_OR_NULL(qos) || !req)
+ return -EINVAL;
+
+ if (WARN(freq_qos_request_active(req),
+ "%s() called for active request\n", __func__))
+ return -EINVAL;
+
+ req->qos = qos;
+ req->type = type;
+ ret = freq_qos_apply(req, PM_QOS_ADD_REQ, value);
+ if (ret < 0) {
+ req->qos = NULL;
+ req->type = 0;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(freq_qos_add_request);
+
+/**
+ * freq_qos_update_request - Modify existing frequency QoS request.
+ * @req: Request to modify.
+ * @new_value: New request value.
+ *
+ * Update an existing frequency QoS request along with the effective constraint
+ * value for the list of requests it belongs to.
+ *
+ * Return 1 if the effective constraint value has changed, 0 if the effective
+ * constraint value has not changed, or a negative error code on failures.
+ */
+int freq_qos_update_request(struct freq_qos_request *req, s32 new_value)
+{
+ if (!req)
+ return -EINVAL;
+
+ if (WARN(!freq_qos_request_active(req),
+ "%s() called for unknown object\n", __func__))
+ return -EINVAL;
+
+ if (req->pnode.prio == new_value)
+ return 0;
+
+ return freq_qos_apply(req, PM_QOS_UPDATE_REQ, new_value);
+}
+EXPORT_SYMBOL_GPL(freq_qos_update_request);
+
+/**
+ * freq_qos_remove_request - Remove frequency QoS request from its list.
+ * @req: Request to remove.
+ *
+ * Remove the given frequency QoS request from the list of constraints it
+ * belongs to and recompute the effective constraint value for that list.
+ *
+ * Return 1 if the effective constraint value has changed, 0 if the effective
+ * constraint value has not changed, or a negative error code on failures.
+ */
+int freq_qos_remove_request(struct freq_qos_request *req)
+{
+ int ret;
+
+ if (!req)
+ return -EINVAL;
+
+ if (WARN(!freq_qos_request_active(req),
+ "%s() called for unknown object\n", __func__))
+ return -EINVAL;
+
+ ret = freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE);
+ req->qos = NULL;
+ req->type = 0;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(freq_qos_remove_request);
+
+/**
+ * freq_qos_add_notifier - Add frequency QoS change notifier.
+ * @qos: List of requests to add the notifier to.
+ * @type: Request type.
+ * @notifier: Notifier block to add.
+ */
+int freq_qos_add_notifier(struct freq_constraints *qos,
+ enum freq_qos_req_type type,
+ struct notifier_block *notifier)
+{
+ int ret;
+
+ if (IS_ERR_OR_NULL(qos) || !notifier)
+ return -EINVAL;
+
+ switch (type) {
+ case FREQ_QOS_MIN:
+ ret = blocking_notifier_chain_register(qos->min_freq.notifiers,
+ notifier);
+ break;
+ case FREQ_QOS_MAX:
+ ret = blocking_notifier_chain_register(qos->max_freq.notifiers,
+ notifier);
+ break;
+ default:
+ WARN_ON(1);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(freq_qos_add_notifier);
+
+/**
+ * freq_qos_remove_notifier - Remove frequency QoS change notifier.
+ * @qos: List of requests to remove the notifier from.
+ * @type: Request type.
+ * @notifier: Notifier block to remove.
+ */
+int freq_qos_remove_notifier(struct freq_constraints *qos,
+ enum freq_qos_req_type type,
+ struct notifier_block *notifier)
+{
+ int ret;
+
+ if (IS_ERR_OR_NULL(qos) || !notifier)
+ return -EINVAL;
+
+ switch (type) {
+ case FREQ_QOS_MIN:
+ ret = blocking_notifier_chain_unregister(qos->min_freq.notifiers,
+ notifier);
+ break;
+ case FREQ_QOS_MAX:
+ ret = blocking_notifier_chain_unregister(qos->max_freq.notifiers,
+ notifier);
+ break;
+ default:
+ WARN_ON(1);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(freq_qos_remove_notifier);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dd05a378631a..80b60ca7767f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -16,6 +16,7 @@
#include <asm/tlb.h>
#include "../workqueue_internal.h"
+#include "../../fs/io-wq.h"
#include "../smpboot.h"
#include "pelt.h"
@@ -1065,7 +1066,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
* affecting a valid clamp bucket, the next time it's enqueued,
* it will already see the updated clamp bucket value.
*/
- if (!p->uclamp[clamp_id].active) {
+ if (p->uclamp[clamp_id].active) {
uclamp_rq_dec_id(rq, p, clamp_id);
uclamp_rq_inc_id(rq, p, clamp_id);
}
@@ -1073,6 +1074,7 @@ uclamp_update_active(struct task_struct *p, enum uclamp_id clamp_id)
task_rq_unlock(rq, p, &rf);
}
+#ifdef CONFIG_UCLAMP_TASK_GROUP
static inline void
uclamp_update_active_tasks(struct cgroup_subsys_state *css,
unsigned int clamps)
@@ -1091,7 +1093,6 @@ uclamp_update_active_tasks(struct cgroup_subsys_state *css,
css_task_iter_end(&it);
}
-#ifdef CONFIG_UCLAMP_TASK_GROUP
static void cpu_util_update_eff(struct cgroup_subsys_state *css);
static void uclamp_update_root_tg(void)
{
@@ -3929,13 +3930,22 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
}
restart:
+#ifdef CONFIG_SMP
/*
- * Ensure that we put DL/RT tasks before the pick loop, such that they
- * can PULL higher prio tasks when we lower the RQ 'priority'.
+ * We must do the balancing pass before put_next_task(), such
+ * that when we release the rq->lock the task is in the same
+ * state as before we took rq->lock.
+ *
+ * We can terminate the balance pass as soon as we know there is
+ * a runnable task of @class priority or higher.
*/
- prev->sched_class->put_prev_task(rq, prev, rf);
- if (!rq->nr_running)
- newidle_balance(rq, rf);
+ for_class_range(class, prev->sched_class, &idle_sched_class) {
+ if (class->balance(rq, prev, rf))
+ break;
+ }
+#endif
+
+ put_prev_task(rq, prev);
for_each_class(class) {
p = class->pick_next_task(rq, NULL, NULL);
@@ -4103,9 +4113,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
* we disable preemption to avoid it calling schedule() again
* in the possible wakeup of a kworker.
*/
- if (tsk->flags & PF_WQ_WORKER) {
+ if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
preempt_disable();
- wq_worker_sleeping(tsk);
+ if (tsk->flags & PF_WQ_WORKER)
+ wq_worker_sleeping(tsk);
+ else
+ io_wq_worker_sleeping(tsk);
preempt_enable_no_resched();
}
@@ -4122,8 +4135,12 @@ static inline void sched_submit_work(struct task_struct *tsk)
static void sched_update_worker(struct task_struct *tsk)
{
- if (tsk->flags & PF_WQ_WORKER)
- wq_worker_running(tsk);
+ if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
+ if (tsk->flags & PF_WQ_WORKER)
+ wq_worker_running(tsk);
+ else
+ io_wq_worker_running(tsk);
+ }
}
asmlinkage __visible void __sched schedule(void)
@@ -6010,10 +6027,11 @@ void init_idle(struct task_struct *idle, int cpu)
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
+ __sched_fork(0, idle);
+
raw_spin_lock_irqsave(&idle->pi_lock, flags);
raw_spin_lock(&rq->lock);
- __sched_fork(0, idle);
idle->state = TASK_RUNNING;
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;
@@ -6201,7 +6219,7 @@ static struct task_struct *__pick_migrate_task(struct rq *rq)
for_each_class(class) {
next = class->pick_next_task(rq, NULL, NULL);
if (next) {
- next->sched_class->put_prev_task(rq, next, NULL);
+ next->sched_class->put_prev_task(rq, next);
return next;
}
}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 2305ce89a26c..46ed4e1383e2 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -740,7 +740,7 @@ void vtime_account_system(struct task_struct *tsk)
write_seqcount_begin(&vtime->seqcount);
/* We might have scheduled out from guest path */
- if (current->flags & PF_VCPU)
+ if (tsk->flags & PF_VCPU)
vtime_account_guest(tsk, vtime);
else
__vtime_account_system(tsk, vtime);
@@ -783,7 +783,7 @@ void vtime_guest_enter(struct task_struct *tsk)
*/
write_seqcount_begin(&vtime->seqcount);
__vtime_account_system(tsk, vtime);
- current->flags |= PF_VCPU;
+ tsk->flags |= PF_VCPU;
write_seqcount_end(&vtime->seqcount);
}
EXPORT_SYMBOL_GPL(vtime_guest_enter);
@@ -794,7 +794,7 @@ void vtime_guest_exit(struct task_struct *tsk)
write_seqcount_begin(&vtime->seqcount);
vtime_account_guest(tsk, vtime);
- current->flags &= ~PF_VCPU;
+ tsk->flags &= ~PF_VCPU;
write_seqcount_end(&vtime->seqcount);
}
EXPORT_SYMBOL_GPL(vtime_guest_exit);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 2dc48720f189..a8a08030a8f7 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1691,6 +1691,22 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
resched_curr(rq);
}
+static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+{
+ if (!on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
+ /*
+ * This is OK, because current is on_cpu, which avoids it being
+ * picked for load-balance and preemption/IRQs are still
+ * disabled avoiding further scheduler activity on it and we've
+ * not yet started the picking loop.
+ */
+ rq_unpin_lock(rq, rf);
+ pull_dl_task(rq);
+ rq_repin_lock(rq, rf);
+ }
+
+ return sched_stop_runnable(rq) || sched_dl_runnable(rq);
+}
#endif /* CONFIG_SMP */
/*
@@ -1758,45 +1774,28 @@ static struct task_struct *
pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct sched_dl_entity *dl_se;
+ struct dl_rq *dl_rq = &rq->dl;
struct task_struct *p;
- struct dl_rq *dl_rq;
WARN_ON_ONCE(prev || rf);
- dl_rq = &rq->dl;
-
- if (unlikely(!dl_rq->dl_nr_running))
+ if (!sched_dl_runnable(rq))
return NULL;
dl_se = pick_next_dl_entity(rq, dl_rq);
BUG_ON(!dl_se);
-
p = dl_task_of(dl_se);
-
set_next_task_dl(rq, p);
-
return p;
}
-static void put_prev_task_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+static void put_prev_task_dl(struct rq *rq, struct task_struct *p)
{
update_curr_dl(rq);
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 1);
if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
enqueue_pushable_dl_task(rq, p);
-
- if (rf && !on_dl_rq(&p->dl) && need_pull_dl_task(rq, p)) {
- /*
- * This is OK, because current is on_cpu, which avoids it being
- * picked for load-balance and preemption/IRQs are still
- * disabled avoiding further scheduler activity on it and we've
- * not yet started the picking loop.
- */
- rq_unpin_lock(rq, rf);
- pull_dl_task(rq);
- rq_repin_lock(rq, rf);
- }
}
/*
@@ -2442,6 +2441,7 @@ const struct sched_class dl_sched_class = {
.set_next_task = set_next_task_dl,
#ifdef CONFIG_SMP
+ .balance = balance_dl,
.select_task_rq = select_task_rq_dl,
.migrate_task_rq = migrate_task_rq_dl,
.set_cpus_allowed = set_cpus_allowed_dl,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 83ab35e2374f..69a81a5709ff 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4926,20 +4926,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
if (++count > 3) {
u64 new, old = ktime_to_ns(cfs_b->period);
- new = (old * 147) / 128; /* ~115% */
- new = min(new, max_cfs_quota_period);
-
- cfs_b->period = ns_to_ktime(new);
-
- /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
- cfs_b->quota *= new;
- cfs_b->quota = div64_u64(cfs_b->quota, old);
-
- pr_warn_ratelimited(
- "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
- smp_processor_id(),
- div_u64(new, NSEC_PER_USEC),
- div_u64(cfs_b->quota, NSEC_PER_USEC));
+ /*
+ * Grow period by a factor of 2 to avoid losing precision.
+ * Precision loss in the quota/period ratio can cause __cfs_schedulable
+ * to fail.
+ */
+ new = old * 2;
+ if (new < max_cfs_quota_period) {
+ cfs_b->period = ns_to_ktime(new);
+ cfs_b->quota *= 2;
+
+ pr_warn_ratelimited(
+ "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n",
+ smp_processor_id(),
+ div_u64(new, NSEC_PER_USEC),
+ div_u64(cfs_b->quota, NSEC_PER_USEC));
+ } else {
+ pr_warn_ratelimited(
+ "cfs_period_timer[cpu%d]: period too short, but cannot scale up without losing precision (cfs_period_us = %lld, cfs_quota_us = %lld)\n",
+ smp_processor_id(),
+ div_u64(old, NSEC_PER_USEC),
+ div_u64(cfs_b->quota, NSEC_PER_USEC));
+ }
/* reset count so we don't come right back in here */
count = 0;
@@ -6562,6 +6570,15 @@ static void task_dead_fair(struct task_struct *p)
{
remove_entity_load_avg(&p->se);
}
+
+static int
+balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+ if (rq->nr_running)
+ return 1;
+
+ return newidle_balance(rq, rf) != 0;
+}
#endif /* CONFIG_SMP */
static unsigned long wakeup_gran(struct sched_entity *se)
@@ -6738,7 +6755,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
int new_tasks;
again:
- if (!cfs_rq->nr_running)
+ if (!sched_fair_runnable(rq))
goto idle;
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -6876,7 +6893,7 @@ idle:
/*
* Account for a descheduled task:
*/
-static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
{
struct sched_entity *se = &prev->se;
struct cfs_rq *cfs_rq;
@@ -7531,6 +7548,19 @@ static void update_blocked_averages(int cpu)
update_rq_clock(rq);
/*
+ * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure
+ * that RT, DL and IRQ signals have been updated before updating CFS.
+ */
+ curr_class = rq->curr->sched_class;
+ update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
+ update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
+ update_irq_load_avg(rq, 0);
+
+ /* Don't need periodic decay once load/util_avg are null */
+ if (others_have_blocked(rq))
+ done = false;
+
+ /*
* Iterates the task_group tree in a bottom up fashion, see
* list_add_leaf_cfs_rq() for details.
*/
@@ -7557,14 +7587,6 @@ static void update_blocked_averages(int cpu)
done = false;
}
- curr_class = rq->curr->sched_class;
- update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
- update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
- update_irq_load_avg(rq, 0);
- /* Don't need periodic decay once load/util_avg are null */
- if (others_have_blocked(rq))
- done = false;
-
update_blocked_load_status(rq, !done);
rq_unlock_irqrestore(rq, &rf);
}
@@ -7625,12 +7647,18 @@ static inline void update_blocked_averages(int cpu)
rq_lock_irqsave(rq, &rf);
update_rq_clock(rq);
- update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
+ /*
+ * update_cfs_rq_load_avg() can call cpufreq_update_util(). Make sure
+ * that RT, DL and IRQ signals have been updated before updating CFS.
+ */
curr_class = rq->curr->sched_class;
update_rt_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &rt_sched_class);
update_dl_rq_load_avg(rq_clock_pelt(rq), rq, curr_class == &dl_sched_class);
update_irq_load_avg(rq, 0);
+
+ update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
+
update_blocked_load_status(rq, cfs_rq_has_blocked(cfs_rq) || others_have_blocked(rq));
rq_unlock_irqrestore(rq, &rf);
}
@@ -10406,11 +10434,11 @@ const struct sched_class fair_sched_class = {
.check_preempt_curr = check_preempt_wakeup,
.pick_next_task = pick_next_task_fair,
-
.put_prev_task = put_prev_task_fair,
.set_next_task = set_next_task_fair,
#ifdef CONFIG_SMP
+ .balance = balance_fair,
.select_task_rq = select_task_rq_fair,
.migrate_task_rq = migrate_task_rq_fair,
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 8dad5aa600ea..f65ef1e2f204 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -365,6 +365,12 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
{
return task_cpu(p); /* IDLE tasks as never migrated */
}
+
+static int
+balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+ return WARN_ON_ONCE(1);
+}
#endif
/*
@@ -375,7 +381,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
resched_curr(rq);
}
-static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{
}
@@ -460,6 +466,7 @@ const struct sched_class idle_sched_class = {
.set_next_task = set_next_task_idle,
#ifdef CONFIG_SMP
+ .balance = balance_idle,
.select_task_rq = select_task_rq_idle,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index ebaa4e619684..9b8adc01be3d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1469,6 +1469,22 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
resched_curr(rq);
}
+static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+{
+ if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
+ /*
+ * This is OK, because current is on_cpu, which avoids it being
+ * picked for load-balance and preemption/IRQs are still
+ * disabled avoiding further scheduler activity on it and we've
+ * not yet started the picking loop.
+ */
+ rq_unpin_lock(rq, rf);
+ pull_rt_task(rq);
+ rq_repin_lock(rq, rf);
+ }
+
+ return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq);
+}
#endif /* CONFIG_SMP */
/*
@@ -1552,21 +1568,18 @@ static struct task_struct *
pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
struct task_struct *p;
- struct rt_rq *rt_rq = &rq->rt;
WARN_ON_ONCE(prev || rf);
- if (!rt_rq->rt_queued)
+ if (!sched_rt_runnable(rq))
return NULL;
p = _pick_next_task_rt(rq);
-
set_next_task_rt(rq, p);
-
return p;
}
-static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
+static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
{
update_curr_rt(rq);
@@ -1578,18 +1591,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p, struct rq_fla
*/
if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
-
- if (rf && !on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) {
- /*
- * This is OK, because current is on_cpu, which avoids it being
- * picked for load-balance and preemption/IRQs are still
- * disabled avoiding further scheduler activity on it and we've
- * not yet started the picking loop.
- */
- rq_unpin_lock(rq, rf);
- pull_rt_task(rq);
- rq_repin_lock(rq, rf);
- }
}
#ifdef CONFIG_SMP
@@ -2366,8 +2367,8 @@ const struct sched_class rt_sched_class = {
.set_next_task = set_next_task_rt,
#ifdef CONFIG_SMP
+ .balance = balance_rt,
.select_task_rq = select_task_rq_rt,
-
.set_cpus_allowed = set_cpus_allowed_common,
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0db2c1b3361e..c8870c5bd7df 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1727,10 +1727,11 @@ struct sched_class {
struct task_struct * (*pick_next_task)(struct rq *rq,
struct task_struct *prev,
struct rq_flags *rf);
- void (*put_prev_task)(struct rq *rq, struct task_struct *p, struct rq_flags *rf);
+ void (*put_prev_task)(struct rq *rq, struct task_struct *p);
void (*set_next_task)(struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP
+ int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
@@ -1773,7 +1774,7 @@ struct sched_class {
static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
{
WARN_ON_ONCE(rq->curr != prev);
- prev->sched_class->put_prev_task(rq, prev, NULL);
+ prev->sched_class->put_prev_task(rq, prev);
}
static inline void set_next_task(struct rq *rq, struct task_struct *next)
@@ -1787,8 +1788,12 @@ static inline void set_next_task(struct rq *rq, struct task_struct *next)
#else
#define sched_class_highest (&dl_sched_class)
#endif
+
+#define for_class_range(class, _from, _to) \
+ for (class = (_from); class != (_to); class = class->next)
+
#define for_each_class(class) \
- for (class = sched_class_highest; class; class = class->next)
+ for_class_range(class, sched_class_highest, NULL)
extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
@@ -1796,6 +1801,25 @@ extern const struct sched_class rt_sched_class;
extern const struct sched_class fair_sched_class;
extern const struct sched_class idle_sched_class;
+static inline bool sched_stop_runnable(struct rq *rq)
+{
+ return rq->stop && task_on_rq_queued(rq->stop);
+}
+
+static inline bool sched_dl_runnable(struct rq *rq)
+{
+ return rq->dl.dl_nr_running > 0;
+}
+
+static inline bool sched_rt_runnable(struct rq *rq)
+{
+ return rq->rt.rt_queued > 0;
+}
+
+static inline bool sched_fair_runnable(struct rq *rq)
+{
+ return rq->cfs.nr_running > 0;
+}
#ifdef CONFIG_SMP
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index 7e1cee4e65b2..c0640739e05e 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -15,6 +15,12 @@ select_task_rq_stop(struct task_struct *p, int cpu, int sd_flag, int flags)
{
return task_cpu(p); /* stop tasks as never migrate */
}
+
+static int
+balance_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+{
+ return sched_stop_runnable(rq);
+}
#endif /* CONFIG_SMP */
static void
@@ -31,16 +37,13 @@ static void set_next_task_stop(struct rq *rq, struct task_struct *stop)
static struct task_struct *
pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
- struct task_struct *stop = rq->stop;
-
WARN_ON_ONCE(prev || rf);
- if (!stop || !task_on_rq_queued(stop))
+ if (!sched_stop_runnable(rq))
return NULL;
- set_next_task_stop(rq, stop);
-
- return stop;
+ set_next_task_stop(rq, rq->stop);
+ return rq->stop;
}
static void
@@ -60,7 +63,7 @@ static void yield_task_stop(struct rq *rq)
BUG(); /* the stop task should never yield, its pointless. */
}
-static void put_prev_task_stop(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
+static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
{
struct task_struct *curr = rq->curr;
u64 delta_exec;
@@ -129,6 +132,7 @@ const struct sched_class stop_sched_class = {
.set_next_task = set_next_task_stop,
#ifdef CONFIG_SMP
+ .balance = balance_stop,
.select_task_rq = select_task_rq_stop,
.set_cpus_allowed = set_cpus_allowed_common,
#endif
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index b5667a273bf6..49b835f1305f 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1948,7 +1948,7 @@ next_level:
static int
build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *attr)
{
- enum s_alloc alloc_state;
+ enum s_alloc alloc_state = sa_none;
struct sched_domain *sd;
struct s_data d;
struct rq *rq = NULL;
@@ -1956,6 +1956,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
struct sched_domain_topology_level *tl_asym;
bool has_asym = false;
+ if (WARN_ON(cpumask_empty(cpu_map)))
+ goto error;
+
alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
if (alloc_state != sa_rootdomain)
goto error;
@@ -2026,7 +2029,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
rcu_read_unlock();
if (has_asym)
- static_branch_enable_cpuslocked(&sched_asym_cpucapacity);
+ static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
if (rq && sched_debug_enabled) {
pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
@@ -2121,8 +2124,12 @@ int sched_init_domains(const struct cpumask *cpu_map)
*/
static void detach_destroy_domains(const struct cpumask *cpu_map)
{
+ unsigned int cpu = cpumask_any(cpu_map);
int i;
+ if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
+ static_branch_dec_cpuslocked(&sched_asym_cpucapacity);
+
rcu_read_lock();
for_each_cpu(i, cpu_map)
cpu_attach_domain(NULL, &def_root_domain, i);
diff --git a/kernel/signal.c b/kernel/signal.c
index c4da1ef56fdf..bcd46f547db3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2205,8 +2205,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
*/
preempt_disable();
read_unlock(&tasklist_lock);
- preempt_enable_no_resched();
cgroup_enter_frozen();
+ preempt_enable_no_resched();
freezable_schedule();
cgroup_leave_frozen(true);
} else {
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 6d1f68b7e528..c9ea7eb2cb1a 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -141,7 +141,8 @@ unsigned int stack_trace_save_tsk(struct task_struct *tsk, unsigned long *store,
struct stacktrace_cookie c = {
.store = store,
.size = size,
- .skip = skipnr + 1,
+ /* skip this function if they are tracing us */
+ .skip = skipnr + !!(current == tsk),
};
if (!try_get_task_stack(tsk))
@@ -298,7 +299,8 @@ unsigned int stack_trace_save_tsk(struct task_struct *task,
struct stack_trace trace = {
.entries = store,
.max_entries = size,
- .skip = skipnr + 1,
+ /* skip this function if they are tracing us */
+ .skip = skipnr + !!(current == task),
};
save_stack_trace_tsk(task, &trace);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index c7031a22aa7b..998d50ee2d9b 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -7,6 +7,7 @@
* Copyright (C) 2010 SUSE Linux Products GmbH
* Copyright (C) 2010 Tejun Heo <tj@kernel.org>
*/
+#include <linux/compiler.h>
#include <linux/completion.h>
#include <linux/cpu.h>
#include <linux/init.h>
@@ -167,7 +168,7 @@ static void set_state(struct multi_stop_data *msdata,
/* Reset ack counter. */
atomic_set(&msdata->thread_ack, msdata->num_threads);
smp_wmb();
- msdata->state = newstate;
+ WRITE_ONCE(msdata->state, newstate);
}
/* Last one to ack a state moves to the next state. */
@@ -186,7 +187,7 @@ void __weak stop_machine_yield(const struct cpumask *cpumask)
static int multi_cpu_stop(void *data)
{
struct multi_stop_data *msdata = data;
- enum multi_stop_state curstate = MULTI_STOP_NONE;
+ enum multi_stop_state newstate, curstate = MULTI_STOP_NONE;
int cpu = smp_processor_id(), err = 0;
const struct cpumask *cpumask;
unsigned long flags;
@@ -210,8 +211,9 @@ static int multi_cpu_stop(void *data)
do {
/* Chill out and ensure we re-read multi_stop_state. */
stop_machine_yield(cpumask);
- if (msdata->state != curstate) {
- curstate = msdata->state;
+ newstate = READ_ONCE(msdata->state);
+ if (newstate != curstate) {
+ curstate = newstate;
switch (curstate) {
case MULTI_STOP_DISABLE_IRQ:
local_irq_disable();
diff --git a/kernel/sysctl-test.c b/kernel/sysctl-test.c
new file mode 100644
index 000000000000..2a63241a8453
--- /dev/null
+++ b/kernel/sysctl-test.c
@@ -0,0 +1,392 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test of proc sysctl.
+ */
+
+#include <kunit/test.h>
+#include <linux/sysctl.h>
+
+#define KUNIT_PROC_READ 0
+#define KUNIT_PROC_WRITE 1
+
+static int i_zero;
+static int i_one_hundred = 100;
+
+/*
+ * Test that proc_dointvec will not try to use a NULL .data field even when the
+ * length is non-zero.
+ */
+static void sysctl_test_api_dointvec_null_tbl_data(struct kunit *test)
+{
+ struct ctl_table null_data_table = {
+ .procname = "foo",
+ /*
+ * Here we are testing that proc_dointvec behaves correctly when
+ * we give it a NULL .data field. Normally this would point to a
+ * piece of memory where the value would be stored.
+ */
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ };
+ /*
+ * proc_dointvec expects a buffer in user space, so we allocate one. We
+ * also need to cast it to __user so sparse doesn't get mad.
+ */
+ void __user *buffer = (void __user *)kunit_kzalloc(test, sizeof(int),
+ GFP_USER);
+ size_t len;
+ loff_t pos;
+
+ /*
+ * We don't care what the starting length is since proc_dointvec should
+ * not try to read because .data is NULL.
+ */
+ len = 1234;
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&null_data_table,
+ KUNIT_PROC_READ, buffer, &len,
+ &pos));
+ KUNIT_EXPECT_EQ(test, (size_t)0, len);
+
+ /*
+ * See above.
+ */
+ len = 1234;
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&null_data_table,
+ KUNIT_PROC_WRITE, buffer, &len,
+ &pos));
+ KUNIT_EXPECT_EQ(test, (size_t)0, len);
+}
+
+/*
+ * Similar to the previous test, we create a struct ctrl_table that has a .data
+ * field that proc_dointvec cannot do anything with; however, this time it is
+ * because we tell proc_dointvec that the size is 0.
+ */
+static void sysctl_test_api_dointvec_table_maxlen_unset(struct kunit *test)
+{
+ int data = 0;
+ struct ctl_table data_maxlen_unset_table = {
+ .procname = "foo",
+ .data = &data,
+ /*
+ * So .data is no longer NULL, but we tell proc_dointvec its
+ * length is 0, so it still shouldn't try to use it.
+ */
+ .maxlen = 0,
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ };
+ void __user *buffer = (void __user *)kunit_kzalloc(test, sizeof(int),
+ GFP_USER);
+ size_t len;
+ loff_t pos;
+
+ /*
+ * As before, we don't care what buffer length is because proc_dointvec
+ * cannot do anything because its internal .data buffer has zero length.
+ */
+ len = 1234;
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&data_maxlen_unset_table,
+ KUNIT_PROC_READ, buffer, &len,
+ &pos));
+ KUNIT_EXPECT_EQ(test, (size_t)0, len);
+
+ /*
+ * See previous comment.
+ */
+ len = 1234;
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&data_maxlen_unset_table,
+ KUNIT_PROC_WRITE, buffer, &len,
+ &pos));
+ KUNIT_EXPECT_EQ(test, (size_t)0, len);
+}
+
+/*
+ * Here we provide a valid struct ctl_table, but we try to read and write from
+ * it using a buffer of zero length, so it should still fail in a similar way as
+ * before.
+ */
+static void sysctl_test_api_dointvec_table_len_is_zero(struct kunit *test)
+{
+ int data = 0;
+ /* Good table. */
+ struct ctl_table table = {
+ .procname = "foo",
+ .data = &data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ };
+ void __user *buffer = (void __user *)kunit_kzalloc(test, sizeof(int),
+ GFP_USER);
+ /*
+ * However, now our read/write buffer has zero length.
+ */
+ size_t len = 0;
+ loff_t pos;
+
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&table, KUNIT_PROC_READ, buffer,
+ &len, &pos));
+ KUNIT_EXPECT_EQ(test, (size_t)0, len);
+
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&table, KUNIT_PROC_WRITE, buffer,
+ &len, &pos));
+ KUNIT_EXPECT_EQ(test, (size_t)0, len);
+}
+
+/*
+ * Test that proc_dointvec refuses to read when the file position is non-zero.
+ */
+static void sysctl_test_api_dointvec_table_read_but_position_set(
+ struct kunit *test)
+{
+ int data = 0;
+ /* Good table. */
+ struct ctl_table table = {
+ .procname = "foo",
+ .data = &data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ };
+ void __user *buffer = (void __user *)kunit_kzalloc(test, sizeof(int),
+ GFP_USER);
+ /*
+ * We don't care about our buffer length because we start off with a
+ * non-zero file position.
+ */
+ size_t len = 1234;
+ /*
+ * proc_dointvec should refuse to read into the buffer since the file
+ * pos is non-zero.
+ */
+ loff_t pos = 1;
+
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&table, KUNIT_PROC_READ, buffer,
+ &len, &pos));
+ KUNIT_EXPECT_EQ(test, (size_t)0, len);
+}
+
+/*
+ * Test that we can read a two digit number in a sufficiently size buffer.
+ * Nothing fancy.
+ */
+static void sysctl_test_dointvec_read_happy_single_positive(struct kunit *test)
+{
+ int data = 0;
+ /* Good table. */
+ struct ctl_table table = {
+ .procname = "foo",
+ .data = &data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ };
+ size_t len = 4;
+ loff_t pos = 0;
+ char *buffer = kunit_kzalloc(test, len, GFP_USER);
+ char __user *user_buffer = (char __user *)buffer;
+ /* Store 13 in the data field. */
+ *((int *)table.data) = 13;
+
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&table, KUNIT_PROC_READ,
+ user_buffer, &len, &pos));
+ KUNIT_ASSERT_EQ(test, (size_t)3, len);
+ buffer[len] = '\0';
+ /* And we read 13 back out. */
+ KUNIT_EXPECT_STREQ(test, "13\n", buffer);
+}
+
+/*
+ * Same as previous test, just now with negative numbers.
+ */
+static void sysctl_test_dointvec_read_happy_single_negative(struct kunit *test)
+{
+ int data = 0;
+ /* Good table. */
+ struct ctl_table table = {
+ .procname = "foo",
+ .data = &data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ };
+ size_t len = 5;
+ loff_t pos = 0;
+ char *buffer = kunit_kzalloc(test, len, GFP_USER);
+ char __user *user_buffer = (char __user *)buffer;
+ *((int *)table.data) = -16;
+
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&table, KUNIT_PROC_READ,
+ user_buffer, &len, &pos));
+ KUNIT_ASSERT_EQ(test, (size_t)4, len);
+ buffer[len] = '\0';
+ KUNIT_EXPECT_STREQ(test, "-16\n", (char *)buffer);
+}
+
+/*
+ * Test that a simple positive write works.
+ */
+static void sysctl_test_dointvec_write_happy_single_positive(struct kunit *test)
+{
+ int data = 0;
+ /* Good table. */
+ struct ctl_table table = {
+ .procname = "foo",
+ .data = &data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ };
+ char input[] = "9";
+ size_t len = sizeof(input) - 1;
+ loff_t pos = 0;
+ char *buffer = kunit_kzalloc(test, len, GFP_USER);
+ char __user *user_buffer = (char __user *)buffer;
+
+ memcpy(buffer, input, len);
+
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&table, KUNIT_PROC_WRITE,
+ user_buffer, &len, &pos));
+ KUNIT_EXPECT_EQ(test, sizeof(input) - 1, len);
+ KUNIT_EXPECT_EQ(test, sizeof(input) - 1, (size_t)pos);
+ KUNIT_EXPECT_EQ(test, 9, *((int *)table.data));
+}
+
+/*
+ * Same as previous test, but now with negative numbers.
+ */
+static void sysctl_test_dointvec_write_happy_single_negative(struct kunit *test)
+{
+ int data = 0;
+ struct ctl_table table = {
+ .procname = "foo",
+ .data = &data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ };
+ char input[] = "-9";
+ size_t len = sizeof(input) - 1;
+ loff_t pos = 0;
+ char *buffer = kunit_kzalloc(test, len, GFP_USER);
+ char __user *user_buffer = (char __user *)buffer;
+
+ memcpy(buffer, input, len);
+
+ KUNIT_EXPECT_EQ(test, 0, proc_dointvec(&table, KUNIT_PROC_WRITE,
+ user_buffer, &len, &pos));
+ KUNIT_EXPECT_EQ(test, sizeof(input) - 1, len);
+ KUNIT_EXPECT_EQ(test, sizeof(input) - 1, (size_t)pos);
+ KUNIT_EXPECT_EQ(test, -9, *((int *)table.data));
+}
+
+/*
+ * Test that writing a value smaller than the minimum possible value is not
+ * allowed.
+ */
+static void sysctl_test_api_dointvec_write_single_less_int_min(
+ struct kunit *test)
+{
+ int data = 0;
+ struct ctl_table table = {
+ .procname = "foo",
+ .data = &data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ };
+ size_t max_len = 32, len = max_len;
+ loff_t pos = 0;
+ char *buffer = kunit_kzalloc(test, max_len, GFP_USER);
+ char __user *user_buffer = (char __user *)buffer;
+ unsigned long abs_of_less_than_min = (unsigned long)INT_MAX
+ - (INT_MAX + INT_MIN) + 1;
+
+ /*
+ * We use this rigmarole to create a string that contains a value one
+ * less than the minimum accepted value.
+ */
+ KUNIT_ASSERT_LT(test,
+ (size_t)snprintf(buffer, max_len, "-%lu",
+ abs_of_less_than_min),
+ max_len);
+
+ KUNIT_EXPECT_EQ(test, -EINVAL, proc_dointvec(&table, KUNIT_PROC_WRITE,
+ user_buffer, &len, &pos));
+ KUNIT_EXPECT_EQ(test, max_len, len);
+ KUNIT_EXPECT_EQ(test, 0, *((int *)table.data));
+}
+
+/*
+ * Test that writing the maximum possible value works.
+ */
+static void sysctl_test_api_dointvec_write_single_greater_int_max(
+ struct kunit *test)
+{
+ int data = 0;
+ struct ctl_table table = {
+ .procname = "foo",
+ .data = &data,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = &i_zero,
+ .extra2 = &i_one_hundred,
+ };
+ size_t max_len = 32, len = max_len;
+ loff_t pos = 0;
+ char *buffer = kunit_kzalloc(test, max_len, GFP_USER);
+ char __user *user_buffer = (char __user *)buffer;
+ unsigned long greater_than_max = (unsigned long)INT_MAX + 1;
+
+ KUNIT_ASSERT_GT(test, greater_than_max, (unsigned long)INT_MAX);
+ KUNIT_ASSERT_LT(test, (size_t)snprintf(buffer, max_len, "%lu",
+ greater_than_max),
+ max_len);
+ KUNIT_EXPECT_EQ(test, -EINVAL, proc_dointvec(&table, KUNIT_PROC_WRITE,
+ user_buffer, &len, &pos));
+ KUNIT_ASSERT_EQ(test, max_len, len);
+ KUNIT_EXPECT_EQ(test, 0, *((int *)table.data));
+}
+
+static struct kunit_case sysctl_test_cases[] = {
+ KUNIT_CASE(sysctl_test_api_dointvec_null_tbl_data),
+ KUNIT_CASE(sysctl_test_api_dointvec_table_maxlen_unset),
+ KUNIT_CASE(sysctl_test_api_dointvec_table_len_is_zero),
+ KUNIT_CASE(sysctl_test_api_dointvec_table_read_but_position_set),
+ KUNIT_CASE(sysctl_test_dointvec_read_happy_single_positive),
+ KUNIT_CASE(sysctl_test_dointvec_read_happy_single_negative),
+ KUNIT_CASE(sysctl_test_dointvec_write_happy_single_positive),
+ KUNIT_CASE(sysctl_test_dointvec_write_happy_single_negative),
+ KUNIT_CASE(sysctl_test_api_dointvec_write_single_less_int_min),
+ KUNIT_CASE(sysctl_test_api_dointvec_write_single_greater_int_max),
+ {}
+};
+
+static struct kunit_suite sysctl_test_suite = {
+ .name = "sysctl_test",
+ .test_cases = sysctl_test_cases,
+};
+
+kunit_test_suite(sysctl_test_suite);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 00fcea236eba..b6f2f35d0bcf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -163,7 +163,7 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
#ifdef CONFIG_SPARC
#endif
-#ifdef __hppa__
+#ifdef CONFIG_PARISC
extern int pwrsw_enabled;
#endif
@@ -620,7 +620,7 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-#ifdef __hppa__
+#ifdef CONFIG_PARISC
{
.procname = "soft-power",
.data = &pwrsw_enabled,
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 0d4dc241c0fb..65605530ee34 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -164,7 +164,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
struct hrtimer_clock_base *base;
for (;;) {
- base = timer->base;
+ base = READ_ONCE(timer->base);
if (likely(base != &migration_base)) {
raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
if (likely(base == timer->base))
@@ -244,7 +244,7 @@ again:
return base;
/* See the comment in lock_hrtimer_base() */
- timer->base = &migration_base;
+ WRITE_ONCE(timer->base, &migration_base);
raw_spin_unlock(&base->cpu_base->lock);
raw_spin_lock(&new_base->cpu_base->lock);
@@ -253,10 +253,10 @@ again:
raw_spin_unlock(&new_base->cpu_base->lock);
raw_spin_lock(&base->cpu_base->lock);
new_cpu_base = this_cpu_base;
- timer->base = base;
+ WRITE_ONCE(timer->base, base);
goto again;
}
- timer->base = new_base;
+ WRITE_ONCE(timer->base, new_base);
} else {
if (new_cpu_base != this_cpu_base &&
hrtimer_check_target(timer, new_base)) {
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 65eb796610dc..069ca78fb0bf 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -771,7 +771,7 @@ int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
/* fill PPS status fields */
pps_fill_timex(txc);
- txc->time.tv_sec = (time_t)ts->tv_sec;
+ txc->time.tv_sec = ts->tv_sec;
txc->time.tv_usec = ts->tv_nsec;
if (!(time_status & STA_NANO))
txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 92a431981b1c..42d512fcfda2 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -266,7 +266,7 @@ static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
/**
* thread_group_sample_cputime - Sample cputime for a given task
* @tsk: Task for which cputime needs to be started
- * @iimes: Storage for time samples
+ * @samples: Storage for time samples
*
* Called from sys_getitimer() to calculate the expiry time of an active
* timer. That means group cputime accounting is already active. Called
@@ -1038,12 +1038,12 @@ unlock:
* member of @pct->bases[CLK].nextevt. False otherwise
*/
static inline bool
-task_cputimers_expired(const u64 *sample, struct posix_cputimers *pct)
+task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct)
{
int i;
for (i = 0; i < CPUCLOCK_MAX; i++) {
- if (sample[i] >= pct->bases[i].nextevt)
+ if (samples[i] >= pct->bases[i].nextevt)
return true;
}
return false;
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 142b07619918..dbd69052eaa6 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -17,6 +17,8 @@
#include <linux/seqlock.h>
#include <linux/bitops.h>
+#include "timekeeping.h"
+
/**
* struct clock_read_data - data required to read from sched_clock()
*
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index 4bc37ac3bb05..5ee0f7709410 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -110,8 +110,7 @@ void update_vsyscall(struct timekeeper *tk)
nsec = nsec + tk->wall_to_monotonic.tv_nsec;
vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec);
- if (__arch_use_vsyscall(vdata))
- update_vdso_data(vdata, tk);
+ update_vdso_data(vdata, tk);
__arch_update_vsyscall(vdata, tk);
@@ -124,10 +123,8 @@ void update_vsyscall_tz(void)
{
struct vdso_data *vdata = __arch_get_k_vdso_data();
- if (__arch_use_vsyscall(vdata)) {
- vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
- vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
- }
+ vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
+ vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
__arch_sync_vdso_data(vdata);
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 62a50bf399d6..5259d4dea675 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -18,6 +18,7 @@
#include <linux/clocksource.h>
#include <linux/sched/task.h>
#include <linux/kallsyms.h>
+#include <linux/security.h>
#include <linux/seq_file.h>
#include <linux/tracefs.h>
#include <linux/hardirq.h>
@@ -2493,14 +2494,14 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
}
static int
-ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
+ftrace_nop_initialize(struct module *mod, struct dyn_ftrace *rec)
{
int ret;
if (unlikely(ftrace_disabled))
return 0;
- ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
+ ret = ftrace_init_nop(mod, rec);
if (ret) {
ftrace_bug_type = FTRACE_BUG_INIT;
ftrace_bug(ret, rec);
@@ -2942,7 +2943,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
* to the NOP instructions.
*/
if (!__is_defined(CC_USING_NOP_MCOUNT) &&
- !ftrace_code_disable(mod, p))
+ !ftrace_nop_initialize(mod, p))
break;
update_cnt++;
@@ -3486,6 +3487,11 @@ static int
ftrace_avail_open(struct inode *inode, struct file *file)
{
struct ftrace_iterator *iter;
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
if (unlikely(ftrace_disabled))
return -ENODEV;
@@ -3505,6 +3511,15 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
{
struct ftrace_iterator *iter;
+ /*
+ * This shows us what functions are currently being
+ * traced and by what. Not sure if we want lockdown
+ * to hide such critical information for an admin.
+ * Although, perhaps it can show information we don't
+ * want people to see, but if something is tracing
+ * something, we probably want to know about it.
+ */
+
iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
if (!iter)
return -ENOMEM;
@@ -3540,21 +3555,22 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
struct ftrace_hash *hash;
struct list_head *mod_head;
struct trace_array *tr = ops->private;
- int ret = 0;
+ int ret = -ENOMEM;
ftrace_ops_init(ops);
if (unlikely(ftrace_disabled))
return -ENODEV;
+ if (tracing_check_open_get_tr(tr))
+ return -ENODEV;
+
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
- return -ENOMEM;
+ goto out;
- if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX)) {
- kfree(iter);
- return -ENOMEM;
- }
+ if (trace_parser_get_init(&iter->parser, FTRACE_BUFF_MAX))
+ goto out;
iter->ops = ops;
iter->flags = flag;
@@ -3584,13 +3600,13 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
if (!iter->hash) {
trace_parser_put(&iter->parser);
- kfree(iter);
- ret = -ENOMEM;
goto out_unlock;
}
} else
iter->hash = hash;
+ ret = 0;
+
if (file->f_mode & FMODE_READ) {
iter->pg = ftrace_pages_start;
@@ -3602,7 +3618,6 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
/* Failed */
free_ftrace_hash(iter->hash);
trace_parser_put(&iter->parser);
- kfree(iter);
}
} else
file->private_data = iter;
@@ -3610,6 +3625,13 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag,
out_unlock:
mutex_unlock(&ops->func_hash->regex_lock);
+ out:
+ if (ret) {
+ kfree(iter);
+ if (tr)
+ trace_array_put(tr);
+ }
+
return ret;
}
@@ -3618,6 +3640,7 @@ ftrace_filter_open(struct inode *inode, struct file *file)
{
struct ftrace_ops *ops = inode->i_private;
+ /* Checks for tracefs lockdown */
return ftrace_regex_open(ops,
FTRACE_ITER_FILTER | FTRACE_ITER_DO_PROBES,
inode, file);
@@ -3628,6 +3651,7 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
{
struct ftrace_ops *ops = inode->i_private;
+ /* Checks for tracefs lockdown */
return ftrace_regex_open(ops, FTRACE_ITER_NOTRACE,
inode, file);
}
@@ -5037,6 +5061,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
mutex_unlock(&iter->ops->func_hash->regex_lock);
free_ftrace_hash(iter->hash);
+ if (iter->tr)
+ trace_array_put(iter->tr);
kfree(iter);
return 0;
@@ -5194,9 +5220,13 @@ static int
__ftrace_graph_open(struct inode *inode, struct file *file,
struct ftrace_graph_data *fgd)
{
- int ret = 0;
+ int ret;
struct ftrace_hash *new_hash = NULL;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
if (file->f_mode & FMODE_WRITE) {
const int size_bits = FTRACE_HASH_DEFAULT_BITS;
@@ -6537,8 +6567,9 @@ ftrace_pid_open(struct inode *inode, struct file *file)
struct seq_file *m;
int ret = 0;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 252f79c435f8..6a0ee9178365 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,6 +17,7 @@
#include <linux/stacktrace.h>
#include <linux/writeback.h>
#include <linux/kallsyms.h>
+#include <linux/security.h>
#include <linux/seq_file.h>
#include <linux/notifier.h>
#include <linux/irqflags.h>
@@ -304,6 +305,23 @@ void trace_array_put(struct trace_array *this_tr)
mutex_unlock(&trace_types_lock);
}
+int tracing_check_open_get_tr(struct trace_array *tr)
+{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
+ if (tracing_disabled)
+ return -ENODEV;
+
+ if (tr && trace_array_get(tr) < 0)
+ return -ENODEV;
+
+ return 0;
+}
+
int call_filter_check_discard(struct trace_event_call *call, void *rec,
struct ring_buffer *buffer,
struct ring_buffer_event *event)
@@ -4140,8 +4158,11 @@ release:
int tracing_open_generic(struct inode *inode, struct file *filp)
{
- if (tracing_disabled)
- return -ENODEV;
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
filp->private_data = inode->i_private;
return 0;
@@ -4156,15 +4177,14 @@ bool tracing_is_disabled(void)
* Open and update trace_array ref count.
* Must have the current trace_array passed to it.
*/
-static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
+int tracing_open_generic_tr(struct inode *inode, struct file *filp)
{
struct trace_array *tr = inode->i_private;
+ int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
filp->private_data = inode->i_private;
@@ -4233,10 +4253,11 @@ static int tracing_open(struct inode *inode, struct file *file)
{
struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
- int ret = 0;
+ int ret;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
/* If this file was open for write, then erase contents */
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
@@ -4352,12 +4373,15 @@ static int show_traces_open(struct inode *inode, struct file *file)
struct seq_file *m;
int ret;
- if (tracing_disabled)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
ret = seq_open(file, &show_traces_seq_ops);
- if (ret)
+ if (ret) {
+ trace_array_put(tr);
return ret;
+ }
m = file->private_data;
m->private = tr;
@@ -4365,6 +4389,14 @@ static int show_traces_open(struct inode *inode, struct file *file)
return 0;
}
+static int show_traces_release(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+ return seq_release(inode, file);
+}
+
static ssize_t
tracing_write_stub(struct file *filp, const char __user *ubuf,
size_t count, loff_t *ppos)
@@ -4395,8 +4427,8 @@ static const struct file_operations tracing_fops = {
static const struct file_operations show_traces_fops = {
.open = show_traces_open,
.read = seq_read,
- .release = seq_release,
.llseek = seq_lseek,
+ .release = show_traces_release,
};
static ssize_t
@@ -4697,11 +4729,9 @@ static int tracing_trace_options_open(struct inode *inode, struct file *file)
struct trace_array *tr = inode->i_private;
int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
ret = single_open(file, tracing_trace_options_show, inode->i_private);
if (ret < 0)
@@ -5038,8 +5068,11 @@ static const struct seq_operations tracing_saved_tgids_seq_ops = {
static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
{
- if (tracing_disabled)
- return -ENODEV;
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
return seq_open(filp, &tracing_saved_tgids_seq_ops);
}
@@ -5115,8 +5148,11 @@ static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
{
- if (tracing_disabled)
- return -ENODEV;
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
}
@@ -5280,8 +5316,11 @@ static const struct seq_operations tracing_eval_map_seq_ops = {
static int tracing_eval_map_open(struct inode *inode, struct file *filp)
{
- if (tracing_disabled)
- return -ENODEV;
+ int ret;
+
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
return seq_open(filp, &tracing_eval_map_seq_ops);
}
@@ -5804,13 +5843,11 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
{
struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
- int ret = 0;
-
- if (tracing_disabled)
- return -ENODEV;
+ int ret;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
mutex_lock(&trace_types_lock);
@@ -5999,6 +6036,7 @@ waitagain:
sizeof(struct trace_iterator) -
offsetof(struct trace_iterator, seq));
cpumask_clear(iter->started);
+ trace_seq_init(&iter->seq);
iter->pos = -1;
trace_event_read_lock();
@@ -6547,11 +6585,9 @@ static int tracing_clock_open(struct inode *inode, struct file *file)
struct trace_array *tr = inode->i_private;
int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr))
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
ret = single_open(file, tracing_clock_show, inode->i_private);
if (ret < 0)
@@ -6581,11 +6617,9 @@ static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
struct trace_array *tr = inode->i_private;
int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr))
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
if (ret < 0)
@@ -6638,10 +6672,11 @@ static int tracing_snapshot_open(struct inode *inode, struct file *file)
struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
struct seq_file *m;
- int ret = 0;
+ int ret;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
if (file->f_mode & FMODE_READ) {
iter = __tracing_open(inode, file, true);
@@ -6786,6 +6821,7 @@ static int snapshot_raw_open(struct inode *inode, struct file *filp)
struct ftrace_buffer_info *info;
int ret;
+ /* The following checks for tracefs lockdown */
ret = tracing_buffers_open(inode, filp);
if (ret < 0)
return ret;
@@ -7105,8 +7141,9 @@ static int tracing_err_log_open(struct inode *inode, struct file *file)
struct trace_array *tr = inode->i_private;
int ret = 0;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
/* If this file was opened for write, then erase contents */
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
@@ -7157,11 +7194,9 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
struct ftrace_buffer_info *info;
int ret;
- if (tracing_disabled)
- return -ENODEV;
-
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f801d154ff6a..d685c61085c0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -338,6 +338,7 @@ extern struct mutex trace_types_lock;
extern int trace_array_get(struct trace_array *tr);
extern void trace_array_put(struct trace_array *tr);
+extern int tracing_check_open_get_tr(struct trace_array *tr);
extern int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs);
extern int tracing_set_clock(struct trace_array *tr, const char *clockstr);
@@ -681,6 +682,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf);
void tracing_reset_current(int cpu);
void tracing_reset_all_online_cpus(void);
int tracing_open_generic(struct inode *inode, struct file *filp);
+int tracing_open_generic_tr(struct inode *inode, struct file *filp);
bool tracing_is_disabled(void);
bool tracer_tracing_is_on(struct trace_array *tr);
void tracer_tracing_on(struct trace_array *tr);
diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c
index a41fed46c285..89779eb84a07 100644
--- a/kernel/trace/trace_dynevent.c
+++ b/kernel/trace/trace_dynevent.c
@@ -174,6 +174,10 @@ static int dyn_event_open(struct inode *inode, struct file *file)
{
int ret;
+ ret = tracing_check_open_get_tr(NULL);
+ if (ret)
+ return ret;
+
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
ret = dyn_events_release_all(NULL);
if (ret < 0)
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 0892e38ed6fb..a9dfa04ffa44 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -272,9 +272,11 @@ int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
goto out;
}
+ mutex_lock(&event_mutex);
ret = perf_trace_event_init(tp_event, p_event);
if (ret)
destroy_local_trace_kprobe(tp_event);
+ mutex_unlock(&event_mutex);
out:
kfree(func);
return ret;
@@ -282,8 +284,10 @@ out:
void perf_kprobe_destroy(struct perf_event *p_event)
{
+ mutex_lock(&event_mutex);
perf_trace_event_close(p_event);
perf_trace_event_unreg(p_event);
+ mutex_unlock(&event_mutex);
destroy_local_trace_kprobe(p_event->tp_event);
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index b89cdfe20bc1..fba87d10f0c1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -12,6 +12,7 @@
#define pr_fmt(fmt) fmt
#include <linux/workqueue.h>
+#include <linux/security.h>
#include <linux/spinlock.h>
#include <linux/kthread.h>
#include <linux/tracefs.h>
@@ -1294,6 +1295,8 @@ static int trace_format_open(struct inode *inode, struct file *file)
struct seq_file *m;
int ret;
+ /* Do we want to hide event format files on tracefs lockdown? */
+
ret = seq_open(file, &trace_format_seq_ops);
if (ret < 0)
return ret;
@@ -1440,28 +1443,17 @@ static int system_tr_open(struct inode *inode, struct file *filp)
struct trace_array *tr = inode->i_private;
int ret;
- if (tracing_is_disabled())
- return -ENODEV;
-
- if (trace_array_get(tr) < 0)
- return -ENODEV;
-
/* Make a temporary dir that has no system but points to tr */
dir = kzalloc(sizeof(*dir), GFP_KERNEL);
- if (!dir) {
- trace_array_put(tr);
+ if (!dir)
return -ENOMEM;
- }
- dir->tr = tr;
-
- ret = tracing_open_generic(inode, filp);
+ ret = tracing_open_generic_tr(inode, filp);
if (ret < 0) {
- trace_array_put(tr);
kfree(dir);
return ret;
}
-
+ dir->tr = tr;
filp->private_data = dir;
return 0;
@@ -1771,6 +1763,10 @@ ftrace_event_open(struct inode *inode, struct file *file,
struct seq_file *m;
int ret;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
ret = seq_open(file, seq_ops);
if (ret < 0)
return ret;
@@ -1795,6 +1791,7 @@ ftrace_event_avail_open(struct inode *inode, struct file *file)
{
const struct seq_operations *seq_ops = &show_event_seq_ops;
+ /* Checks for tracefs lockdown */
return ftrace_event_open(inode, file, seq_ops);
}
@@ -1805,8 +1802,9 @@ ftrace_event_set_open(struct inode *inode, struct file *file)
struct trace_array *tr = inode->i_private;
int ret;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
@@ -1825,8 +1823,9 @@ ftrace_event_set_pid_open(struct inode *inode, struct file *file)
struct trace_array *tr = inode->i_private;
int ret;
- if (trace_array_get(tr) < 0)
- return -ENODEV;
+ ret = tracing_check_open_get_tr(tr);
+ if (ret)
+ return ret;
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 9468bd8d44a2..7482a1466ebf 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
+#include <linux/security.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/stacktrace.h>
@@ -678,6 +679,8 @@ static bool synth_field_signed(char *type)
{
if (str_has_prefix(type, "u"))
return false;
+ if (strcmp(type, "gfp_t") == 0)
+ return false;
return true;
}
@@ -1448,6 +1451,10 @@ static int synth_events_open(struct inode *inode, struct file *file)
{
int ret;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
ret = dyn_events_release_all(&synth_event_ops);
if (ret < 0)
@@ -1680,7 +1687,7 @@ static int save_hist_vars(struct hist_trigger_data *hist_data)
if (var_data)
return 0;
- if (trace_array_get(tr) < 0)
+ if (tracing_check_open_get_tr(tr))
return -ENODEV;
var_data = kzalloc(sizeof(*var_data), GFP_KERNEL);
@@ -5515,6 +5522,12 @@ static int hist_show(struct seq_file *m, void *v)
static int event_hist_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
return single_open(file, hist_show, file);
}
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 2a2912cb4533..2cd53ca21b51 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -5,6 +5,7 @@
* Copyright (C) 2013 Tom Zanussi <tom.zanussi@linux.intel.com>
*/
+#include <linux/security.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/mutex.h>
@@ -173,7 +174,11 @@ static const struct seq_operations event_triggers_seq_ops = {
static int event_trigger_regex_open(struct inode *inode, struct file *file)
{
- int ret = 0;
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
mutex_lock(&event_mutex);
@@ -292,6 +297,7 @@ event_trigger_write(struct file *filp, const char __user *ubuf,
static int
event_trigger_open(struct inode *inode, struct file *filp)
{
+ /* Checks for tracefs lockdown */
return event_trigger_regex_open(inode, filp);
}
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index fa95139445b2..862f4b0139fc 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -150,7 +150,7 @@ void trace_hwlat_callback(bool enter)
if (enter)
nmi_ts_start = time_get();
else
- nmi_total_ts = time_get() - nmi_ts_start;
+ nmi_total_ts += time_get() - nmi_ts_start;
}
if (enter)
@@ -256,6 +256,8 @@ static int get_sample(void)
/* Keep a running maximum ever recorded hardware latency */
if (sample > tr->max_latency)
tr->max_latency = sample;
+ if (outer_sample > tr->max_latency)
+ tr->max_latency = outer_sample;
}
out:
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 324ffbea3556..1552a95c743b 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -7,11 +7,11 @@
*/
#define pr_fmt(fmt) "trace_kprobe: " fmt
+#include <linux/security.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/rculist.h>
#include <linux/error-injection.h>
-#include <linux/security.h>
#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
@@ -936,6 +936,10 @@ static int probes_open(struct inode *inode, struct file *file)
{
int ret;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
ret = dyn_events_release_all(&trace_kprobe_ops);
if (ret < 0)
@@ -988,6 +992,12 @@ static const struct seq_operations profile_seq_op = {
static int profile_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
return seq_open(file, &profile_seq_op);
}
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index c3fd849d4a8f..d4e31e969206 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -6,6 +6,7 @@
*
*/
#include <linux/seq_file.h>
+#include <linux/security.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/ftrace.h>
@@ -348,6 +349,12 @@ static const struct seq_operations show_format_seq_ops = {
static int
ftrace_formats_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
return seq_open(file, &show_format_seq_ops);
}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index ec9a34a97129..4df9a209f7ca 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -5,6 +5,7 @@
*/
#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
+#include <linux/security.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
@@ -470,6 +471,12 @@ static const struct seq_operations stack_trace_seq_ops = {
static int stack_trace_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
return seq_open(file, &stack_trace_seq_ops);
}
@@ -487,6 +494,7 @@ stack_trace_filter_open(struct inode *inode, struct file *file)
{
struct ftrace_ops *ops = inode->i_private;
+ /* Checks for tracefs lockdown */
return ftrace_regex_open(ops, FTRACE_ITER_FILTER,
inode, file);
}
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index 75bf1bcb4a8a..9ab0a1a7ad5e 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -9,7 +9,7 @@
*
*/
-
+#include <linux/security.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
@@ -238,6 +238,10 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
struct seq_file *m;
struct stat_session *session = inode->i_private;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
ret = stat_seq_init(session);
if (ret)
return ret;
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index dd884341f5c5..352073d36585 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -7,6 +7,7 @@
*/
#define pr_fmt(fmt) "trace_uprobe: " fmt
+#include <linux/security.h>
#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/uaccess.h>
@@ -769,6 +770,10 @@ static int probes_open(struct inode *inode, struct file *file)
{
int ret;
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
ret = dyn_events_release_all(&trace_uprobe_ops);
if (ret)
@@ -818,6 +823,12 @@ static const struct seq_operations profile_seq_op = {
static int profile_open(struct inode *inode, struct file *file)
{
+ int ret;
+
+ ret = security_locked_down(LOCKDOWN_TRACEFS);
+ if (ret)
+ return ret;
+
return seq_open(file, &profile_seq_op);
}
diff --git a/lib/Kconfig b/lib/Kconfig
index 183f92a297ca..3321d04dfa5a 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -447,7 +447,6 @@ config ASSOCIATIVE_ARRAY
config HAS_IOMEM
bool
depends on !NO_IOMEM
- select GENERIC_IO
default y
config HAS_IOPORT_MAP
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 93d97f9b0157..6c1be6181e38 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1664,6 +1664,8 @@ config PROVIDE_OHCI1394_DMA_INIT
See Documentation/debugging-via-ohci1394.txt for more information.
+source "lib/kunit/Kconfig"
+
menuconfig RUNTIME_TESTING_MENU
bool "Runtime Testing"
def_bool y
@@ -1948,6 +1950,35 @@ config TEST_SYSCTL
If unsure, say N.
+config SYSCTL_KUNIT_TEST
+ bool "KUnit test for sysctl"
+ depends on KUNIT
+ help
+ This builds the proc sysctl unit test, which runs on boot.
+ Tests the API contract and implementation correctness of sysctl.
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
+
+config LIST_KUNIT_TEST
+ bool "KUnit Test for Kernel Linked-list structures"
+ depends on KUNIT
+ help
+ This builds the linked list KUnit test suite.
+ It tests that the API and basic functionality of the list_head type
+ and associated macros.
+
+ KUnit tests run during boot and output the results to the debug log
+ in TAP format (http://testanything.org/). Only useful for kernel devs
+ running the KUnit test harness, and not intended for inclusion into a
+ production build.
+
+ For more information on KUnit and unit tests in general please refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+ If unsure, say N.
+
config TEST_UDELAY
tristate "udelay test driver"
help
diff --git a/lib/Makefile b/lib/Makefile
index c5892807e06f..890e581d00c4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -92,6 +92,8 @@ obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o
obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/
+obj-$(CONFIG_KUNIT) += kunit/
+
ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG
CFLAGS_kobject_uevent.o += -DDEBUG
@@ -290,3 +292,6 @@ obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o
obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o
obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o
obj-$(CONFIG_OBJAGG) += objagg.o
+
+# KUnit tests
+obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
diff --git a/lib/dump_stack.c b/lib/dump_stack.c
index 5cff72f18c4a..33ffbf308853 100644
--- a/lib/dump_stack.c
+++ b/lib/dump_stack.c
@@ -106,7 +106,12 @@ retry:
was_locked = 1;
} else {
local_irq_restore(flags);
- cpu_relax();
+ /*
+ * Wait for the lock to release before jumping to
+ * atomic_cmpxchg() in order to mitigate the thundering herd
+ * problem.
+ */
+ do { cpu_relax(); } while (atomic_read(&dump_lock) != -1);
goto retry;
}
diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c
index ae25e2fa2187..f25eb111c051 100644
--- a/lib/generic-radix-tree.c
+++ b/lib/generic-radix-tree.c
@@ -2,6 +2,7 @@
#include <linux/export.h>
#include <linux/generic-radix-tree.h>
#include <linux/gfp.h>
+#include <linux/kmemleak.h>
#define GENRADIX_ARY (PAGE_SIZE / sizeof(struct genradix_node *))
#define GENRADIX_ARY_SHIFT ilog2(GENRADIX_ARY)
@@ -75,6 +76,27 @@ void *__genradix_ptr(struct __genradix *radix, size_t offset)
}
EXPORT_SYMBOL(__genradix_ptr);
+static inline struct genradix_node *genradix_alloc_node(gfp_t gfp_mask)
+{
+ struct genradix_node *node;
+
+ node = (struct genradix_node *)__get_free_page(gfp_mask|__GFP_ZERO);
+
+ /*
+ * We're using pages (not slab allocations) directly for kernel data
+ * structures, so we need to explicitly inform kmemleak of them in order
+ * to avoid false positive memory leak reports.
+ */
+ kmemleak_alloc(node, PAGE_SIZE, 1, gfp_mask);
+ return node;
+}
+
+static inline void genradix_free_node(struct genradix_node *node)
+{
+ kmemleak_free(node);
+ free_page((unsigned long)node);
+}
+
/*
* Returns pointer to the specified byte @offset within @radix, allocating it if
* necessary - newly allocated slots are always zeroed out:
@@ -97,8 +119,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
break;
if (!new_node) {
- new_node = (void *)
- __get_free_page(gfp_mask|__GFP_ZERO);
+ new_node = genradix_alloc_node(gfp_mask);
if (!new_node)
return NULL;
}
@@ -121,8 +142,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
n = READ_ONCE(*p);
if (!n) {
if (!new_node) {
- new_node = (void *)
- __get_free_page(gfp_mask|__GFP_ZERO);
+ new_node = genradix_alloc_node(gfp_mask);
if (!new_node)
return NULL;
}
@@ -133,7 +153,7 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset,
}
if (new_node)
- free_page((unsigned long) new_node);
+ genradix_free_node(new_node);
return &n->data[offset];
}
@@ -191,7 +211,7 @@ static void genradix_free_recurse(struct genradix_node *n, unsigned level)
genradix_free_recurse(n->children[i], level - 1);
}
- free_page((unsigned long) n);
+ genradix_free_node(n);
}
int __genradix_prealloc(struct __genradix *radix, size_t size,
diff --git a/lib/idr.c b/lib/idr.c
index 66a374892482..c2cf2c52bbde 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -215,7 +215,7 @@ int idr_for_each(const struct idr *idr,
EXPORT_SYMBOL(idr_for_each);
/**
- * idr_get_next() - Find next populated entry.
+ * idr_get_next_ul() - Find next populated entry.
* @idr: IDR handle.
* @nextid: Pointer to an ID.
*
@@ -224,7 +224,7 @@ EXPORT_SYMBOL(idr_for_each);
* to the ID of the found value. To use in a loop, the value pointed to by
* nextid must be incremented by the user.
*/
-void *idr_get_next(struct idr *idr, int *nextid)
+void *idr_get_next_ul(struct idr *idr, unsigned long *nextid)
{
struct radix_tree_iter iter;
void __rcu **slot;
@@ -245,18 +245,14 @@ void *idr_get_next(struct idr *idr, int *nextid)
}
if (!slot)
return NULL;
- id = iter.index + base;
-
- if (WARN_ON_ONCE(id > INT_MAX))
- return NULL;
- *nextid = id;
+ *nextid = iter.index + base;
return entry;
}
-EXPORT_SYMBOL(idr_get_next);
+EXPORT_SYMBOL(idr_get_next_ul);
/**
- * idr_get_next_ul() - Find next populated entry.
+ * idr_get_next() - Find next populated entry.
* @idr: IDR handle.
* @nextid: Pointer to an ID.
*
@@ -265,22 +261,17 @@ EXPORT_SYMBOL(idr_get_next);
* to the ID of the found value. To use in a loop, the value pointed to by
* nextid must be incremented by the user.
*/
-void *idr_get_next_ul(struct idr *idr, unsigned long *nextid)
+void *idr_get_next(struct idr *idr, int *nextid)
{
- struct radix_tree_iter iter;
- void __rcu **slot;
- unsigned long base = idr->idr_base;
unsigned long id = *nextid;
+ void *entry = idr_get_next_ul(idr, &id);
- id = (id < base) ? 0 : id - base;
- slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
- if (!slot)
+ if (WARN_ON_ONCE(id > INT_MAX))
return NULL;
-
- *nextid = iter.index + base;
- return rcu_dereference_raw(*slot);
+ *nextid = id;
+ return entry;
}
-EXPORT_SYMBOL(idr_get_next_ul);
+EXPORT_SYMBOL(idr_get_next);
/**
* idr_replace() - replace pointer for given ID.
diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig
new file mode 100644
index 000000000000..af37016bfdd4
--- /dev/null
+++ b/lib/kunit/Kconfig
@@ -0,0 +1,36 @@
+#
+# KUnit base configuration
+#
+
+menuconfig KUNIT
+ bool "KUnit - Enable support for unit tests"
+ help
+ Enables support for kernel unit tests (KUnit), a lightweight unit
+ testing and mocking framework for the Linux kernel. These tests are
+ able to be run locally on a developer's workstation without a VM or
+ special hardware when using UML. Can also be used on most other
+ architectures. For more information, please see
+ Documentation/dev-tools/kunit/.
+
+if KUNIT
+
+config KUNIT_TEST
+ bool "KUnit test for KUnit"
+ help
+ Enables the unit tests for the KUnit test framework. These tests test
+ the KUnit test framework itself; the tests are both written using
+ KUnit and test KUnit. This option should only be enabled for testing
+ purposes by developers interested in testing that KUnit works as
+ expected.
+
+config KUNIT_EXAMPLE_TEST
+ bool "Example test for KUnit"
+ help
+ Enables an example unit test that illustrates some of the basic
+ features of KUnit. This test only exists to help new users understand
+ what KUnit is and how it is used. Please refer to the example test
+ itself, lib/kunit/example-test.c, for more information. This option
+ is intended for curious hackers who would like to understand how to
+ use KUnit for kernel development.
+
+endif # KUNIT
diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile
new file mode 100644
index 000000000000..769d9402b5d3
--- /dev/null
+++ b/lib/kunit/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_KUNIT) += test.o \
+ string-stream.o \
+ assert.o \
+ try-catch.o
+
+obj-$(CONFIG_KUNIT_TEST) += test-test.o \
+ string-stream-test.o
+
+obj-$(CONFIG_KUNIT_EXAMPLE_TEST) += example-test.o
diff --git a/lib/kunit/assert.c b/lib/kunit/assert.c
new file mode 100644
index 000000000000..86013d4cf891
--- /dev/null
+++ b/lib/kunit/assert.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Assertion and expectation serialization API.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+#include <kunit/assert.h>
+
+void kunit_base_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream)
+{
+ const char *expect_or_assert = NULL;
+
+ switch (assert->type) {
+ case KUNIT_EXPECTATION:
+ expect_or_assert = "EXPECTATION";
+ break;
+ case KUNIT_ASSERTION:
+ expect_or_assert = "ASSERTION";
+ break;
+ }
+
+ string_stream_add(stream, "%s FAILED at %s:%d\n",
+ expect_or_assert, assert->file, assert->line);
+}
+
+void kunit_assert_print_msg(const struct kunit_assert *assert,
+ struct string_stream *stream)
+{
+ if (assert->message.fmt)
+ string_stream_add(stream, "\n%pV", &assert->message);
+}
+
+void kunit_fail_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream)
+{
+ kunit_base_assert_format(assert, stream);
+ string_stream_add(stream, "%pV", &assert->message);
+}
+
+void kunit_unary_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream)
+{
+ struct kunit_unary_assert *unary_assert = container_of(
+ assert, struct kunit_unary_assert, assert);
+
+ kunit_base_assert_format(assert, stream);
+ if (unary_assert->expected_true)
+ string_stream_add(stream,
+ "\tExpected %s to be true, but is false\n",
+ unary_assert->condition);
+ else
+ string_stream_add(stream,
+ "\tExpected %s to be false, but is true\n",
+ unary_assert->condition);
+ kunit_assert_print_msg(assert, stream);
+}
+
+void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream)
+{
+ struct kunit_ptr_not_err_assert *ptr_assert = container_of(
+ assert, struct kunit_ptr_not_err_assert, assert);
+
+ kunit_base_assert_format(assert, stream);
+ if (!ptr_assert->value) {
+ string_stream_add(stream,
+ "\tExpected %s is not null, but is\n",
+ ptr_assert->text);
+ } else if (IS_ERR(ptr_assert->value)) {
+ string_stream_add(stream,
+ "\tExpected %s is not error, but is: %ld\n",
+ ptr_assert->text,
+ PTR_ERR(ptr_assert->value));
+ }
+ kunit_assert_print_msg(assert, stream);
+}
+
+void kunit_binary_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream)
+{
+ struct kunit_binary_assert *binary_assert = container_of(
+ assert, struct kunit_binary_assert, assert);
+
+ kunit_base_assert_format(assert, stream);
+ string_stream_add(stream,
+ "\tExpected %s %s %s, but\n",
+ binary_assert->left_text,
+ binary_assert->operation,
+ binary_assert->right_text);
+ string_stream_add(stream, "\t\t%s == %lld\n",
+ binary_assert->left_text,
+ binary_assert->left_value);
+ string_stream_add(stream, "\t\t%s == %lld",
+ binary_assert->right_text,
+ binary_assert->right_value);
+ kunit_assert_print_msg(assert, stream);
+}
+
+void kunit_binary_ptr_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream)
+{
+ struct kunit_binary_ptr_assert *binary_assert = container_of(
+ assert, struct kunit_binary_ptr_assert, assert);
+
+ kunit_base_assert_format(assert, stream);
+ string_stream_add(stream,
+ "\tExpected %s %s %s, but\n",
+ binary_assert->left_text,
+ binary_assert->operation,
+ binary_assert->right_text);
+ string_stream_add(stream, "\t\t%s == %pK\n",
+ binary_assert->left_text,
+ binary_assert->left_value);
+ string_stream_add(stream, "\t\t%s == %pK",
+ binary_assert->right_text,
+ binary_assert->right_value);
+ kunit_assert_print_msg(assert, stream);
+}
+
+void kunit_binary_str_assert_format(const struct kunit_assert *assert,
+ struct string_stream *stream)
+{
+ struct kunit_binary_str_assert *binary_assert = container_of(
+ assert, struct kunit_binary_str_assert, assert);
+
+ kunit_base_assert_format(assert, stream);
+ string_stream_add(stream,
+ "\tExpected %s %s %s, but\n",
+ binary_assert->left_text,
+ binary_assert->operation,
+ binary_assert->right_text);
+ string_stream_add(stream, "\t\t%s == %s\n",
+ binary_assert->left_text,
+ binary_assert->left_value);
+ string_stream_add(stream, "\t\t%s == %s",
+ binary_assert->right_text,
+ binary_assert->right_value);
+ kunit_assert_print_msg(assert, stream);
+}
diff --git a/lib/kunit/example-test.c b/lib/kunit/example-test.c
new file mode 100644
index 000000000000..f64a829aa441
--- /dev/null
+++ b/lib/kunit/example-test.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Example KUnit test to show how to use KUnit.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+
+#include <kunit/test.h>
+
+/*
+ * This is the most fundamental element of KUnit, the test case. A test case
+ * makes a set EXPECTATIONs and ASSERTIONs about the behavior of some code; if
+ * any expectations or assertions are not met, the test fails; otherwise, the
+ * test passes.
+ *
+ * In KUnit, a test case is just a function with the signature
+ * `void (*)(struct kunit *)`. `struct kunit` is a context object that stores
+ * information about the current test.
+ */
+static void example_simple_test(struct kunit *test)
+{
+ /*
+ * This is an EXPECTATION; it is how KUnit tests things. When you want
+ * to test a piece of code, you set some expectations about what the
+ * code should do. KUnit then runs the test and verifies that the code's
+ * behavior matched what was expected.
+ */
+ KUNIT_EXPECT_EQ(test, 1 + 1, 2);
+}
+
+/*
+ * This is run once before each test case, see the comment on
+ * example_test_suite for more information.
+ */
+static int example_test_init(struct kunit *test)
+{
+ kunit_info(test, "initializing\n");
+
+ return 0;
+}
+
+/*
+ * Here we make a list of all the test cases we want to add to the test suite
+ * below.
+ */
+static struct kunit_case example_test_cases[] = {
+ /*
+ * This is a helper to create a test case object from a test case
+ * function; its exact function is not important to understand how to
+ * use KUnit, just know that this is how you associate test cases with a
+ * test suite.
+ */
+ KUNIT_CASE(example_simple_test),
+ {}
+};
+
+/*
+ * This defines a suite or grouping of tests.
+ *
+ * Test cases are defined as belonging to the suite by adding them to
+ * `kunit_cases`.
+ *
+ * Often it is desirable to run some function which will set up things which
+ * will be used by every test; this is accomplished with an `init` function
+ * which runs before each test case is invoked. Similarly, an `exit` function
+ * may be specified which runs after every test case and can be used to for
+ * cleanup. For clarity, running tests in a test suite would behave as follows:
+ *
+ * suite.init(test);
+ * suite.test_case[0](test);
+ * suite.exit(test);
+ * suite.init(test);
+ * suite.test_case[1](test);
+ * suite.exit(test);
+ * ...;
+ */
+static struct kunit_suite example_test_suite = {
+ .name = "example",
+ .init = example_test_init,
+ .test_cases = example_test_cases,
+};
+
+/*
+ * This registers the above test suite telling KUnit that this is a suite of
+ * tests that need to be run.
+ */
+kunit_test_suite(example_test_suite);
diff --git a/lib/kunit/string-stream-test.c b/lib/kunit/string-stream-test.c
new file mode 100644
index 000000000000..76cc05eb00ed
--- /dev/null
+++ b/lib/kunit/string-stream-test.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test for struct string_stream.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+
+#include <kunit/string-stream.h>
+#include <kunit/test.h>
+#include <linux/slab.h>
+
+static void string_stream_test_empty_on_creation(struct kunit *test)
+{
+ struct string_stream *stream = alloc_string_stream(test, GFP_KERNEL);
+
+ KUNIT_EXPECT_TRUE(test, string_stream_is_empty(stream));
+}
+
+static void string_stream_test_not_empty_after_add(struct kunit *test)
+{
+ struct string_stream *stream = alloc_string_stream(test, GFP_KERNEL);
+
+ string_stream_add(stream, "Foo");
+
+ KUNIT_EXPECT_FALSE(test, string_stream_is_empty(stream));
+}
+
+static void string_stream_test_get_string(struct kunit *test)
+{
+ struct string_stream *stream = alloc_string_stream(test, GFP_KERNEL);
+ char *output;
+
+ string_stream_add(stream, "Foo");
+ string_stream_add(stream, " %s", "bar");
+
+ output = string_stream_get_string(stream);
+ KUNIT_ASSERT_STREQ(test, output, "Foo bar");
+}
+
+static struct kunit_case string_stream_test_cases[] = {
+ KUNIT_CASE(string_stream_test_empty_on_creation),
+ KUNIT_CASE(string_stream_test_not_empty_after_add),
+ KUNIT_CASE(string_stream_test_get_string),
+ {}
+};
+
+static struct kunit_suite string_stream_test_suite = {
+ .name = "string-stream-test",
+ .test_cases = string_stream_test_cases
+};
+kunit_test_suite(string_stream_test_suite);
diff --git a/lib/kunit/string-stream.c b/lib/kunit/string-stream.c
new file mode 100644
index 000000000000..e6d17aacca30
--- /dev/null
+++ b/lib/kunit/string-stream.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * C++ stream style string builder used in KUnit for building messages.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+
+#include <kunit/string-stream.h>
+#include <kunit/test.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+struct string_stream_fragment_alloc_context {
+ struct kunit *test;
+ int len;
+ gfp_t gfp;
+};
+
+static int string_stream_fragment_init(struct kunit_resource *res,
+ void *context)
+{
+ struct string_stream_fragment_alloc_context *ctx = context;
+ struct string_stream_fragment *frag;
+
+ frag = kunit_kzalloc(ctx->test, sizeof(*frag), ctx->gfp);
+ if (!frag)
+ return -ENOMEM;
+
+ frag->test = ctx->test;
+ frag->fragment = kunit_kmalloc(ctx->test, ctx->len, ctx->gfp);
+ if (!frag->fragment)
+ return -ENOMEM;
+
+ res->allocation = frag;
+
+ return 0;
+}
+
+static void string_stream_fragment_free(struct kunit_resource *res)
+{
+ struct string_stream_fragment *frag = res->allocation;
+
+ list_del(&frag->node);
+ kunit_kfree(frag->test, frag->fragment);
+ kunit_kfree(frag->test, frag);
+}
+
+static struct string_stream_fragment *alloc_string_stream_fragment(
+ struct kunit *test, int len, gfp_t gfp)
+{
+ struct string_stream_fragment_alloc_context context = {
+ .test = test,
+ .len = len,
+ .gfp = gfp
+ };
+
+ return kunit_alloc_resource(test,
+ string_stream_fragment_init,
+ string_stream_fragment_free,
+ gfp,
+ &context);
+}
+
+static int string_stream_fragment_destroy(struct string_stream_fragment *frag)
+{
+ return kunit_resource_destroy(frag->test,
+ kunit_resource_instance_match,
+ string_stream_fragment_free,
+ frag);
+}
+
+int string_stream_vadd(struct string_stream *stream,
+ const char *fmt,
+ va_list args)
+{
+ struct string_stream_fragment *frag_container;
+ int len;
+ va_list args_for_counting;
+
+ /* Make a copy because `vsnprintf` could change it */
+ va_copy(args_for_counting, args);
+
+ /* Need space for null byte. */
+ len = vsnprintf(NULL, 0, fmt, args_for_counting) + 1;
+
+ va_end(args_for_counting);
+
+ frag_container = alloc_string_stream_fragment(stream->test,
+ len,
+ stream->gfp);
+ if (!frag_container)
+ return -ENOMEM;
+
+ len = vsnprintf(frag_container->fragment, len, fmt, args);
+ spin_lock(&stream->lock);
+ stream->length += len;
+ list_add_tail(&frag_container->node, &stream->fragments);
+ spin_unlock(&stream->lock);
+
+ return 0;
+}
+
+int string_stream_add(struct string_stream *stream, const char *fmt, ...)
+{
+ va_list args;
+ int result;
+
+ va_start(args, fmt);
+ result = string_stream_vadd(stream, fmt, args);
+ va_end(args);
+
+ return result;
+}
+
+static void string_stream_clear(struct string_stream *stream)
+{
+ struct string_stream_fragment *frag_container, *frag_container_safe;
+
+ spin_lock(&stream->lock);
+ list_for_each_entry_safe(frag_container,
+ frag_container_safe,
+ &stream->fragments,
+ node) {
+ string_stream_fragment_destroy(frag_container);
+ }
+ stream->length = 0;
+ spin_unlock(&stream->lock);
+}
+
+char *string_stream_get_string(struct string_stream *stream)
+{
+ struct string_stream_fragment *frag_container;
+ size_t buf_len = stream->length + 1; /* +1 for null byte. */
+ char *buf;
+
+ buf = kunit_kzalloc(stream->test, buf_len, stream->gfp);
+ if (!buf)
+ return NULL;
+
+ spin_lock(&stream->lock);
+ list_for_each_entry(frag_container, &stream->fragments, node)
+ strlcat(buf, frag_container->fragment, buf_len);
+ spin_unlock(&stream->lock);
+
+ return buf;
+}
+
+int string_stream_append(struct string_stream *stream,
+ struct string_stream *other)
+{
+ const char *other_content;
+
+ other_content = string_stream_get_string(other);
+
+ if (!other_content)
+ return -ENOMEM;
+
+ return string_stream_add(stream, other_content);
+}
+
+bool string_stream_is_empty(struct string_stream *stream)
+{
+ return list_empty(&stream->fragments);
+}
+
+struct string_stream_alloc_context {
+ struct kunit *test;
+ gfp_t gfp;
+};
+
+static int string_stream_init(struct kunit_resource *res, void *context)
+{
+ struct string_stream *stream;
+ struct string_stream_alloc_context *ctx = context;
+
+ stream = kunit_kzalloc(ctx->test, sizeof(*stream), ctx->gfp);
+ if (!stream)
+ return -ENOMEM;
+
+ res->allocation = stream;
+ stream->gfp = ctx->gfp;
+ stream->test = ctx->test;
+ INIT_LIST_HEAD(&stream->fragments);
+ spin_lock_init(&stream->lock);
+
+ return 0;
+}
+
+static void string_stream_free(struct kunit_resource *res)
+{
+ struct string_stream *stream = res->allocation;
+
+ string_stream_clear(stream);
+}
+
+struct string_stream *alloc_string_stream(struct kunit *test, gfp_t gfp)
+{
+ struct string_stream_alloc_context context = {
+ .test = test,
+ .gfp = gfp
+ };
+
+ return kunit_alloc_resource(test,
+ string_stream_init,
+ string_stream_free,
+ gfp,
+ &context);
+}
+
+int string_stream_destroy(struct string_stream *stream)
+{
+ return kunit_resource_destroy(stream->test,
+ kunit_resource_instance_match,
+ string_stream_free,
+ stream);
+}
diff --git a/lib/kunit/test-test.c b/lib/kunit/test-test.c
new file mode 100644
index 000000000000..5ebe059d16e2
--- /dev/null
+++ b/lib/kunit/test-test.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test for core test infrastructure.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+#include <kunit/test.h>
+
+struct kunit_try_catch_test_context {
+ struct kunit_try_catch *try_catch;
+ bool function_called;
+};
+
+static void kunit_test_successful_try(void *data)
+{
+ struct kunit *test = data;
+ struct kunit_try_catch_test_context *ctx = test->priv;
+
+ ctx->function_called = true;
+}
+
+static void kunit_test_no_catch(void *data)
+{
+ struct kunit *test = data;
+
+ KUNIT_FAIL(test, "Catch should not be called\n");
+}
+
+static void kunit_test_try_catch_successful_try_no_catch(struct kunit *test)
+{
+ struct kunit_try_catch_test_context *ctx = test->priv;
+ struct kunit_try_catch *try_catch = ctx->try_catch;
+
+ kunit_try_catch_init(try_catch,
+ test,
+ kunit_test_successful_try,
+ kunit_test_no_catch);
+ kunit_try_catch_run(try_catch, test);
+
+ KUNIT_EXPECT_TRUE(test, ctx->function_called);
+}
+
+static void kunit_test_unsuccessful_try(void *data)
+{
+ struct kunit *test = data;
+ struct kunit_try_catch_test_context *ctx = test->priv;
+ struct kunit_try_catch *try_catch = ctx->try_catch;
+
+ kunit_try_catch_throw(try_catch);
+ KUNIT_FAIL(test, "This line should never be reached\n");
+}
+
+static void kunit_test_catch(void *data)
+{
+ struct kunit *test = data;
+ struct kunit_try_catch_test_context *ctx = test->priv;
+
+ ctx->function_called = true;
+}
+
+static void kunit_test_try_catch_unsuccessful_try_does_catch(struct kunit *test)
+{
+ struct kunit_try_catch_test_context *ctx = test->priv;
+ struct kunit_try_catch *try_catch = ctx->try_catch;
+
+ kunit_try_catch_init(try_catch,
+ test,
+ kunit_test_unsuccessful_try,
+ kunit_test_catch);
+ kunit_try_catch_run(try_catch, test);
+
+ KUNIT_EXPECT_TRUE(test, ctx->function_called);
+}
+
+static int kunit_try_catch_test_init(struct kunit *test)
+{
+ struct kunit_try_catch_test_context *ctx;
+
+ ctx = kunit_kzalloc(test, sizeof(*ctx), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+ test->priv = ctx;
+
+ ctx->try_catch = kunit_kmalloc(test,
+ sizeof(*ctx->try_catch),
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx->try_catch);
+
+ return 0;
+}
+
+static struct kunit_case kunit_try_catch_test_cases[] = {
+ KUNIT_CASE(kunit_test_try_catch_successful_try_no_catch),
+ KUNIT_CASE(kunit_test_try_catch_unsuccessful_try_does_catch),
+ {}
+};
+
+static struct kunit_suite kunit_try_catch_test_suite = {
+ .name = "kunit-try-catch-test",
+ .init = kunit_try_catch_test_init,
+ .test_cases = kunit_try_catch_test_cases,
+};
+kunit_test_suite(kunit_try_catch_test_suite);
+
+/*
+ * Context for testing test managed resources
+ * is_resource_initialized is used to test arbitrary resources
+ */
+struct kunit_test_resource_context {
+ struct kunit test;
+ bool is_resource_initialized;
+ int allocate_order[2];
+ int free_order[2];
+};
+
+static int fake_resource_init(struct kunit_resource *res, void *context)
+{
+ struct kunit_test_resource_context *ctx = context;
+
+ res->allocation = &ctx->is_resource_initialized;
+ ctx->is_resource_initialized = true;
+ return 0;
+}
+
+static void fake_resource_free(struct kunit_resource *res)
+{
+ bool *is_resource_initialized = res->allocation;
+
+ *is_resource_initialized = false;
+}
+
+static void kunit_resource_test_init_resources(struct kunit *test)
+{
+ struct kunit_test_resource_context *ctx = test->priv;
+
+ kunit_init_test(&ctx->test, "testing_test_init_test");
+
+ KUNIT_EXPECT_TRUE(test, list_empty(&ctx->test.resources));
+}
+
+static void kunit_resource_test_alloc_resource(struct kunit *test)
+{
+ struct kunit_test_resource_context *ctx = test->priv;
+ struct kunit_resource *res;
+ kunit_resource_free_t free = fake_resource_free;
+
+ res = kunit_alloc_and_get_resource(&ctx->test,
+ fake_resource_init,
+ fake_resource_free,
+ GFP_KERNEL,
+ ctx);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, res);
+ KUNIT_EXPECT_PTR_EQ(test,
+ &ctx->is_resource_initialized,
+ (bool *) res->allocation);
+ KUNIT_EXPECT_TRUE(test, list_is_last(&res->node, &ctx->test.resources));
+ KUNIT_EXPECT_PTR_EQ(test, free, res->free);
+}
+
+static void kunit_resource_test_destroy_resource(struct kunit *test)
+{
+ struct kunit_test_resource_context *ctx = test->priv;
+ struct kunit_resource *res = kunit_alloc_and_get_resource(
+ &ctx->test,
+ fake_resource_init,
+ fake_resource_free,
+ GFP_KERNEL,
+ ctx);
+
+ KUNIT_ASSERT_FALSE(test,
+ kunit_resource_destroy(&ctx->test,
+ kunit_resource_instance_match,
+ res->free,
+ res->allocation));
+
+ KUNIT_EXPECT_FALSE(test, ctx->is_resource_initialized);
+ KUNIT_EXPECT_TRUE(test, list_empty(&ctx->test.resources));
+}
+
+static void kunit_resource_test_cleanup_resources(struct kunit *test)
+{
+ int i;
+ struct kunit_test_resource_context *ctx = test->priv;
+ struct kunit_resource *resources[5];
+
+ for (i = 0; i < ARRAY_SIZE(resources); i++) {
+ resources[i] = kunit_alloc_and_get_resource(&ctx->test,
+ fake_resource_init,
+ fake_resource_free,
+ GFP_KERNEL,
+ ctx);
+ }
+
+ kunit_cleanup(&ctx->test);
+
+ KUNIT_EXPECT_TRUE(test, list_empty(&ctx->test.resources));
+}
+
+static void kunit_resource_test_mark_order(int order_array[],
+ size_t order_size,
+ int key)
+{
+ int i;
+
+ for (i = 0; i < order_size && order_array[i]; i++)
+ ;
+
+ order_array[i] = key;
+}
+
+#define KUNIT_RESOURCE_TEST_MARK_ORDER(ctx, order_field, key) \
+ kunit_resource_test_mark_order(ctx->order_field, \
+ ARRAY_SIZE(ctx->order_field), \
+ key)
+
+static int fake_resource_2_init(struct kunit_resource *res, void *context)
+{
+ struct kunit_test_resource_context *ctx = context;
+
+ KUNIT_RESOURCE_TEST_MARK_ORDER(ctx, allocate_order, 2);
+
+ res->allocation = ctx;
+
+ return 0;
+}
+
+static void fake_resource_2_free(struct kunit_resource *res)
+{
+ struct kunit_test_resource_context *ctx = res->allocation;
+
+ KUNIT_RESOURCE_TEST_MARK_ORDER(ctx, free_order, 2);
+}
+
+static int fake_resource_1_init(struct kunit_resource *res, void *context)
+{
+ struct kunit_test_resource_context *ctx = context;
+
+ kunit_alloc_and_get_resource(&ctx->test,
+ fake_resource_2_init,
+ fake_resource_2_free,
+ GFP_KERNEL,
+ ctx);
+
+ KUNIT_RESOURCE_TEST_MARK_ORDER(ctx, allocate_order, 1);
+
+ res->allocation = ctx;
+
+ return 0;
+}
+
+static void fake_resource_1_free(struct kunit_resource *res)
+{
+ struct kunit_test_resource_context *ctx = res->allocation;
+
+ KUNIT_RESOURCE_TEST_MARK_ORDER(ctx, free_order, 1);
+}
+
+/*
+ * TODO(brendanhiggins@google.com): replace the arrays that keep track of the
+ * order of allocation and freeing with strict mocks using the IN_SEQUENCE macro
+ * to assert allocation and freeing order when the feature becomes available.
+ */
+static void kunit_resource_test_proper_free_ordering(struct kunit *test)
+{
+ struct kunit_test_resource_context *ctx = test->priv;
+
+ /* fake_resource_1 allocates a fake_resource_2 in its init. */
+ kunit_alloc_and_get_resource(&ctx->test,
+ fake_resource_1_init,
+ fake_resource_1_free,
+ GFP_KERNEL,
+ ctx);
+
+ /*
+ * Since fake_resource_2_init calls KUNIT_RESOURCE_TEST_MARK_ORDER
+ * before returning to fake_resource_1_init, it should be the first to
+ * put its key in the allocate_order array.
+ */
+ KUNIT_EXPECT_EQ(test, ctx->allocate_order[0], 2);
+ KUNIT_EXPECT_EQ(test, ctx->allocate_order[1], 1);
+
+ kunit_cleanup(&ctx->test);
+
+ /*
+ * Because fake_resource_2 finishes allocation before fake_resource_1,
+ * fake_resource_1 should be freed first since it could depend on
+ * fake_resource_2.
+ */
+ KUNIT_EXPECT_EQ(test, ctx->free_order[0], 1);
+ KUNIT_EXPECT_EQ(test, ctx->free_order[1], 2);
+}
+
+static int kunit_resource_test_init(struct kunit *test)
+{
+ struct kunit_test_resource_context *ctx =
+ kzalloc(sizeof(*ctx), GFP_KERNEL);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+
+ test->priv = ctx;
+
+ kunit_init_test(&ctx->test, "test_test_context");
+
+ return 0;
+}
+
+static void kunit_resource_test_exit(struct kunit *test)
+{
+ struct kunit_test_resource_context *ctx = test->priv;
+
+ kunit_cleanup(&ctx->test);
+ kfree(ctx);
+}
+
+static struct kunit_case kunit_resource_test_cases[] = {
+ KUNIT_CASE(kunit_resource_test_init_resources),
+ KUNIT_CASE(kunit_resource_test_alloc_resource),
+ KUNIT_CASE(kunit_resource_test_destroy_resource),
+ KUNIT_CASE(kunit_resource_test_cleanup_resources),
+ KUNIT_CASE(kunit_resource_test_proper_free_ordering),
+ {}
+};
+
+static struct kunit_suite kunit_resource_test_suite = {
+ .name = "kunit-resource-test",
+ .init = kunit_resource_test_init,
+ .exit = kunit_resource_test_exit,
+ .test_cases = kunit_resource_test_cases,
+};
+kunit_test_suite(kunit_resource_test_suite);
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
new file mode 100644
index 000000000000..c83c0fa59cbd
--- /dev/null
+++ b/lib/kunit/test.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Base unit test (KUnit) API.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+
+#include <kunit/test.h>
+#include <kunit/try-catch.h>
+#include <linux/kernel.h>
+#include <linux/sched/debug.h>
+
+static void kunit_set_failure(struct kunit *test)
+{
+ WRITE_ONCE(test->success, false);
+}
+
+static void kunit_print_tap_version(void)
+{
+ static bool kunit_has_printed_tap_version;
+
+ if (!kunit_has_printed_tap_version) {
+ pr_info("TAP version 14\n");
+ kunit_has_printed_tap_version = true;
+ }
+}
+
+static size_t kunit_test_cases_len(struct kunit_case *test_cases)
+{
+ struct kunit_case *test_case;
+ size_t len = 0;
+
+ for (test_case = test_cases; test_case->run_case; test_case++)
+ len++;
+
+ return len;
+}
+
+static void kunit_print_subtest_start(struct kunit_suite *suite)
+{
+ kunit_print_tap_version();
+ pr_info("\t# Subtest: %s\n", suite->name);
+ pr_info("\t1..%zd\n", kunit_test_cases_len(suite->test_cases));
+}
+
+static void kunit_print_ok_not_ok(bool should_indent,
+ bool is_ok,
+ size_t test_number,
+ const char *description)
+{
+ const char *indent, *ok_not_ok;
+
+ if (should_indent)
+ indent = "\t";
+ else
+ indent = "";
+
+ if (is_ok)
+ ok_not_ok = "ok";
+ else
+ ok_not_ok = "not ok";
+
+ pr_info("%s%s %zd - %s\n", indent, ok_not_ok, test_number, description);
+}
+
+static bool kunit_suite_has_succeeded(struct kunit_suite *suite)
+{
+ const struct kunit_case *test_case;
+
+ for (test_case = suite->test_cases; test_case->run_case; test_case++)
+ if (!test_case->success)
+ return false;
+
+ return true;
+}
+
+static void kunit_print_subtest_end(struct kunit_suite *suite)
+{
+ static size_t kunit_suite_counter = 1;
+
+ kunit_print_ok_not_ok(false,
+ kunit_suite_has_succeeded(suite),
+ kunit_suite_counter++,
+ suite->name);
+}
+
+static void kunit_print_test_case_ok_not_ok(struct kunit_case *test_case,
+ size_t test_number)
+{
+ kunit_print_ok_not_ok(true,
+ test_case->success,
+ test_number,
+ test_case->name);
+}
+
+static void kunit_print_string_stream(struct kunit *test,
+ struct string_stream *stream)
+{
+ struct string_stream_fragment *fragment;
+ char *buf;
+
+ buf = string_stream_get_string(stream);
+ if (!buf) {
+ kunit_err(test,
+ "Could not allocate buffer, dumping stream:\n");
+ list_for_each_entry(fragment, &stream->fragments, node) {
+ kunit_err(test, "%s", fragment->fragment);
+ }
+ kunit_err(test, "\n");
+ } else {
+ kunit_err(test, "%s", buf);
+ kunit_kfree(test, buf);
+ }
+}
+
+static void kunit_fail(struct kunit *test, struct kunit_assert *assert)
+{
+ struct string_stream *stream;
+
+ kunit_set_failure(test);
+
+ stream = alloc_string_stream(test, GFP_KERNEL);
+ if (!stream) {
+ WARN(true,
+ "Could not allocate stream to print failed assertion in %s:%d\n",
+ assert->file,
+ assert->line);
+ return;
+ }
+
+ assert->format(assert, stream);
+
+ kunit_print_string_stream(test, stream);
+
+ WARN_ON(string_stream_destroy(stream));
+}
+
+static void __noreturn kunit_abort(struct kunit *test)
+{
+ kunit_try_catch_throw(&test->try_catch); /* Does not return. */
+
+ /*
+ * Throw could not abort from test.
+ *
+ * XXX: we should never reach this line! As kunit_try_catch_throw is
+ * marked __noreturn.
+ */
+ WARN_ONCE(true, "Throw could not abort from test!\n");
+}
+
+void kunit_do_assertion(struct kunit *test,
+ struct kunit_assert *assert,
+ bool pass,
+ const char *fmt, ...)
+{
+ va_list args;
+
+ if (pass)
+ return;
+
+ va_start(args, fmt);
+
+ assert->message.fmt = fmt;
+ assert->message.va = &args;
+
+ kunit_fail(test, assert);
+
+ va_end(args);
+
+ if (assert->type == KUNIT_ASSERTION)
+ kunit_abort(test);
+}
+
+void kunit_init_test(struct kunit *test, const char *name)
+{
+ spin_lock_init(&test->lock);
+ INIT_LIST_HEAD(&test->resources);
+ test->name = name;
+ test->success = true;
+}
+
+/*
+ * Initializes and runs test case. Does not clean up or do post validations.
+ */
+static void kunit_run_case_internal(struct kunit *test,
+ struct kunit_suite *suite,
+ struct kunit_case *test_case)
+{
+ if (suite->init) {
+ int ret;
+
+ ret = suite->init(test);
+ if (ret) {
+ kunit_err(test, "failed to initialize: %d\n", ret);
+ kunit_set_failure(test);
+ return;
+ }
+ }
+
+ test_case->run_case(test);
+}
+
+static void kunit_case_internal_cleanup(struct kunit *test)
+{
+ kunit_cleanup(test);
+}
+
+/*
+ * Performs post validations and cleanup after a test case was run.
+ * XXX: Should ONLY BE CALLED AFTER kunit_run_case_internal!
+ */
+static void kunit_run_case_cleanup(struct kunit *test,
+ struct kunit_suite *suite)
+{
+ if (suite->exit)
+ suite->exit(test);
+
+ kunit_case_internal_cleanup(test);
+}
+
+struct kunit_try_catch_context {
+ struct kunit *test;
+ struct kunit_suite *suite;
+ struct kunit_case *test_case;
+};
+
+static void kunit_try_run_case(void *data)
+{
+ struct kunit_try_catch_context *ctx = data;
+ struct kunit *test = ctx->test;
+ struct kunit_suite *suite = ctx->suite;
+ struct kunit_case *test_case = ctx->test_case;
+
+ /*
+ * kunit_run_case_internal may encounter a fatal error; if it does,
+ * abort will be called, this thread will exit, and finally the parent
+ * thread will resume control and handle any necessary clean up.
+ */
+ kunit_run_case_internal(test, suite, test_case);
+ /* This line may never be reached. */
+ kunit_run_case_cleanup(test, suite);
+}
+
+static void kunit_catch_run_case(void *data)
+{
+ struct kunit_try_catch_context *ctx = data;
+ struct kunit *test = ctx->test;
+ struct kunit_suite *suite = ctx->suite;
+ int try_exit_code = kunit_try_catch_get_result(&test->try_catch);
+
+ if (try_exit_code) {
+ kunit_set_failure(test);
+ /*
+ * Test case could not finish, we have no idea what state it is
+ * in, so don't do clean up.
+ */
+ if (try_exit_code == -ETIMEDOUT) {
+ kunit_err(test, "test case timed out\n");
+ /*
+ * Unknown internal error occurred preventing test case from
+ * running, so there is nothing to clean up.
+ */
+ } else {
+ kunit_err(test, "internal error occurred preventing test case from running: %d\n",
+ try_exit_code);
+ }
+ return;
+ }
+
+ /*
+ * Test case was run, but aborted. It is the test case's business as to
+ * whether it failed or not, we just need to clean up.
+ */
+ kunit_run_case_cleanup(test, suite);
+}
+
+/*
+ * Performs all logic to run a test case. It also catches most errors that
+ * occur in a test case and reports them as failures.
+ */
+static void kunit_run_case_catch_errors(struct kunit_suite *suite,
+ struct kunit_case *test_case)
+{
+ struct kunit_try_catch_context context;
+ struct kunit_try_catch *try_catch;
+ struct kunit test;
+
+ kunit_init_test(&test, test_case->name);
+ try_catch = &test.try_catch;
+
+ kunit_try_catch_init(try_catch,
+ &test,
+ kunit_try_run_case,
+ kunit_catch_run_case);
+ context.test = &test;
+ context.suite = suite;
+ context.test_case = test_case;
+ kunit_try_catch_run(try_catch, &context);
+
+ test_case->success = test.success;
+}
+
+int kunit_run_tests(struct kunit_suite *suite)
+{
+ struct kunit_case *test_case;
+ size_t test_case_count = 1;
+
+ kunit_print_subtest_start(suite);
+
+ for (test_case = suite->test_cases; test_case->run_case; test_case++) {
+ kunit_run_case_catch_errors(suite, test_case);
+ kunit_print_test_case_ok_not_ok(test_case, test_case_count++);
+ }
+
+ kunit_print_subtest_end(suite);
+
+ return 0;
+}
+
+struct kunit_resource *kunit_alloc_and_get_resource(struct kunit *test,
+ kunit_resource_init_t init,
+ kunit_resource_free_t free,
+ gfp_t internal_gfp,
+ void *context)
+{
+ struct kunit_resource *res;
+ int ret;
+
+ res = kzalloc(sizeof(*res), internal_gfp);
+ if (!res)
+ return NULL;
+
+ ret = init(res, context);
+ if (ret)
+ return NULL;
+
+ res->free = free;
+ spin_lock(&test->lock);
+ list_add_tail(&res->node, &test->resources);
+ spin_unlock(&test->lock);
+
+ return res;
+}
+
+static void kunit_resource_free(struct kunit *test, struct kunit_resource *res)
+{
+ res->free(res);
+ kfree(res);
+}
+
+static struct kunit_resource *kunit_resource_find(struct kunit *test,
+ kunit_resource_match_t match,
+ kunit_resource_free_t free,
+ void *match_data)
+{
+ struct kunit_resource *resource;
+
+ lockdep_assert_held(&test->lock);
+
+ list_for_each_entry_reverse(resource, &test->resources, node) {
+ if (resource->free != free)
+ continue;
+ if (match(test, resource->allocation, match_data))
+ return resource;
+ }
+
+ return NULL;
+}
+
+static struct kunit_resource *kunit_resource_remove(
+ struct kunit *test,
+ kunit_resource_match_t match,
+ kunit_resource_free_t free,
+ void *match_data)
+{
+ struct kunit_resource *resource;
+
+ spin_lock(&test->lock);
+ resource = kunit_resource_find(test, match, free, match_data);
+ if (resource)
+ list_del(&resource->node);
+ spin_unlock(&test->lock);
+
+ return resource;
+}
+
+int kunit_resource_destroy(struct kunit *test,
+ kunit_resource_match_t match,
+ kunit_resource_free_t free,
+ void *match_data)
+{
+ struct kunit_resource *resource;
+
+ resource = kunit_resource_remove(test, match, free, match_data);
+
+ if (!resource)
+ return -ENOENT;
+
+ kunit_resource_free(test, resource);
+ return 0;
+}
+
+struct kunit_kmalloc_params {
+ size_t size;
+ gfp_t gfp;
+};
+
+static int kunit_kmalloc_init(struct kunit_resource *res, void *context)
+{
+ struct kunit_kmalloc_params *params = context;
+
+ res->allocation = kmalloc(params->size, params->gfp);
+ if (!res->allocation)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void kunit_kmalloc_free(struct kunit_resource *res)
+{
+ kfree(res->allocation);
+}
+
+void *kunit_kmalloc(struct kunit *test, size_t size, gfp_t gfp)
+{
+ struct kunit_kmalloc_params params = {
+ .size = size,
+ .gfp = gfp
+ };
+
+ return kunit_alloc_resource(test,
+ kunit_kmalloc_init,
+ kunit_kmalloc_free,
+ gfp,
+ &params);
+}
+
+void kunit_kfree(struct kunit *test, const void *ptr)
+{
+ int rc;
+
+ rc = kunit_resource_destroy(test,
+ kunit_resource_instance_match,
+ kunit_kmalloc_free,
+ (void *)ptr);
+
+ WARN_ON(rc);
+}
+
+void kunit_cleanup(struct kunit *test)
+{
+ struct kunit_resource *resource;
+
+ /*
+ * test->resources is a stack - each allocation must be freed in the
+ * reverse order from which it was added since one resource may depend
+ * on another for its entire lifetime.
+ * Also, we cannot use the normal list_for_each constructs, even the
+ * safe ones because *arbitrary* nodes may be deleted when
+ * kunit_resource_free is called; the list_for_each_safe variants only
+ * protect against the current node being deleted, not the next.
+ */
+ while (true) {
+ spin_lock(&test->lock);
+ if (list_empty(&test->resources)) {
+ spin_unlock(&test->lock);
+ break;
+ }
+ resource = list_last_entry(&test->resources,
+ struct kunit_resource,
+ node);
+ list_del(&resource->node);
+ spin_unlock(&test->lock);
+
+ kunit_resource_free(test, resource);
+ }
+}
diff --git a/lib/kunit/try-catch.c b/lib/kunit/try-catch.c
new file mode 100644
index 000000000000..55686839eb61
--- /dev/null
+++ b/lib/kunit/try-catch.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * An API to allow a function, that may fail, to be executed, and recover in a
+ * controlled manner.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: Brendan Higgins <brendanhiggins@google.com>
+ */
+
+#include <kunit/test.h>
+#include <kunit/try-catch.h>
+#include <linux/completion.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/sched/sysctl.h>
+
+void __noreturn kunit_try_catch_throw(struct kunit_try_catch *try_catch)
+{
+ try_catch->try_result = -EFAULT;
+ complete_and_exit(try_catch->try_completion, -EFAULT);
+}
+
+static int kunit_generic_run_threadfn_adapter(void *data)
+{
+ struct kunit_try_catch *try_catch = data;
+
+ try_catch->try(try_catch->context);
+
+ complete_and_exit(try_catch->try_completion, 0);
+}
+
+static unsigned long kunit_test_timeout(void)
+{
+ unsigned long timeout_msecs;
+
+ /*
+ * TODO(brendanhiggins@google.com): We should probably have some type of
+ * variable timeout here. The only question is what that timeout value
+ * should be.
+ *
+ * The intention has always been, at some point, to be able to label
+ * tests with some type of size bucket (unit/small, integration/medium,
+ * large/system/end-to-end, etc), where each size bucket would get a
+ * default timeout value kind of like what Bazel does:
+ * https://docs.bazel.build/versions/master/be/common-definitions.html#test.size
+ * There is still some debate to be had on exactly how we do this. (For
+ * one, we probably want to have some sort of test runner level
+ * timeout.)
+ *
+ * For more background on this topic, see:
+ * https://mike-bland.com/2011/11/01/small-medium-large.html
+ */
+ if (sysctl_hung_task_timeout_secs) {
+ /*
+ * If sysctl_hung_task is active, just set the timeout to some
+ * value less than that.
+ *
+ * In regards to the above TODO, if we decide on variable
+ * timeouts, this logic will likely need to change.
+ */
+ timeout_msecs = (sysctl_hung_task_timeout_secs - 1) *
+ MSEC_PER_SEC;
+ } else {
+ timeout_msecs = 300 * MSEC_PER_SEC; /* 5 min */
+ }
+
+ return timeout_msecs;
+}
+
+void kunit_try_catch_run(struct kunit_try_catch *try_catch, void *context)
+{
+ DECLARE_COMPLETION_ONSTACK(try_completion);
+ struct kunit *test = try_catch->test;
+ struct task_struct *task_struct;
+ int exit_code, time_remaining;
+
+ try_catch->context = context;
+ try_catch->try_completion = &try_completion;
+ try_catch->try_result = 0;
+ task_struct = kthread_run(kunit_generic_run_threadfn_adapter,
+ try_catch,
+ "kunit_try_catch_thread");
+ if (IS_ERR(task_struct)) {
+ try_catch->catch(try_catch->context);
+ return;
+ }
+
+ time_remaining = wait_for_completion_timeout(&try_completion,
+ kunit_test_timeout());
+ if (time_remaining == 0) {
+ kunit_err(test, "try timed out\n");
+ try_catch->try_result = -ETIMEDOUT;
+ }
+
+ exit_code = try_catch->try_result;
+
+ if (!exit_code)
+ return;
+
+ if (exit_code == -EFAULT)
+ try_catch->try_result = 0;
+ else if (exit_code == -EINTR)
+ kunit_err(test, "wake_up_process() was never called\n");
+ else if (exit_code)
+ kunit_err(test, "Unknown error: %d\n", exit_code);
+
+ try_catch->catch(try_catch->context);
+}
+
+void kunit_try_catch_init(struct kunit_try_catch *try_catch,
+ struct kunit *test,
+ kunit_try_catch_func_t try,
+ kunit_try_catch_func_t catch)
+{
+ try_catch->test = test;
+ try_catch->try = try;
+ try_catch->catch = catch;
+}
diff --git a/lib/list-test.c b/lib/list-test.c
new file mode 100644
index 000000000000..363c600491c3
--- /dev/null
+++ b/lib/list-test.c
@@ -0,0 +1,746 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KUnit test for the Kernel Linked-list structures.
+ *
+ * Copyright (C) 2019, Google LLC.
+ * Author: David Gow <davidgow@google.com>
+ */
+#include <kunit/test.h>
+
+#include <linux/list.h>
+
+struct list_test_struct {
+ int data;
+ struct list_head list;
+};
+
+static void list_test_list_init(struct kunit *test)
+{
+ /* Test the different ways of initialising a list. */
+ struct list_head list1 = LIST_HEAD_INIT(list1);
+ struct list_head list2;
+ LIST_HEAD(list3);
+ struct list_head *list4;
+ struct list_head *list5;
+
+ INIT_LIST_HEAD(&list2);
+
+ list4 = kzalloc(sizeof(*list4), GFP_KERNEL | __GFP_NOFAIL);
+ INIT_LIST_HEAD(list4);
+
+ list5 = kmalloc(sizeof(*list5), GFP_KERNEL | __GFP_NOFAIL);
+ memset(list5, 0xFF, sizeof(*list5));
+ INIT_LIST_HEAD(list5);
+
+ /* list_empty_careful() checks both next and prev. */
+ KUNIT_EXPECT_TRUE(test, list_empty_careful(&list1));
+ KUNIT_EXPECT_TRUE(test, list_empty_careful(&list2));
+ KUNIT_EXPECT_TRUE(test, list_empty_careful(&list3));
+ KUNIT_EXPECT_TRUE(test, list_empty_careful(list4));
+ KUNIT_EXPECT_TRUE(test, list_empty_careful(list5));
+
+ kfree(list4);
+ kfree(list5);
+}
+
+static void list_test_list_add(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list);
+
+ list_add(&a, &list);
+ list_add(&b, &list);
+
+ /* should be [list] -> b -> a */
+ KUNIT_EXPECT_PTR_EQ(test, list.next, &b);
+ KUNIT_EXPECT_PTR_EQ(test, b.prev, &list);
+ KUNIT_EXPECT_PTR_EQ(test, b.next, &a);
+}
+
+static void list_test_list_add_tail(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list);
+
+ list_add_tail(&a, &list);
+ list_add_tail(&b, &list);
+
+ /* should be [list] -> a -> b */
+ KUNIT_EXPECT_PTR_EQ(test, list.next, &a);
+ KUNIT_EXPECT_PTR_EQ(test, a.prev, &list);
+ KUNIT_EXPECT_PTR_EQ(test, a.next, &b);
+}
+
+static void list_test_list_del(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list);
+
+ list_add_tail(&a, &list);
+ list_add_tail(&b, &list);
+
+ /* before: [list] -> a -> b */
+ list_del(&a);
+
+ /* now: [list] -> b */
+ KUNIT_EXPECT_PTR_EQ(test, list.next, &b);
+ KUNIT_EXPECT_PTR_EQ(test, b.prev, &list);
+}
+
+static void list_test_list_replace(struct kunit *test)
+{
+ struct list_head a_old, a_new, b;
+ LIST_HEAD(list);
+
+ list_add_tail(&a_old, &list);
+ list_add_tail(&b, &list);
+
+ /* before: [list] -> a_old -> b */
+ list_replace(&a_old, &a_new);
+
+ /* now: [list] -> a_new -> b */
+ KUNIT_EXPECT_PTR_EQ(test, list.next, &a_new);
+ KUNIT_EXPECT_PTR_EQ(test, b.prev, &a_new);
+}
+
+static void list_test_list_replace_init(struct kunit *test)
+{
+ struct list_head a_old, a_new, b;
+ LIST_HEAD(list);
+
+ list_add_tail(&a_old, &list);
+ list_add_tail(&b, &list);
+
+ /* before: [list] -> a_old -> b */
+ list_replace_init(&a_old, &a_new);
+
+ /* now: [list] -> a_new -> b */
+ KUNIT_EXPECT_PTR_EQ(test, list.next, &a_new);
+ KUNIT_EXPECT_PTR_EQ(test, b.prev, &a_new);
+
+ /* check a_old is empty (initialized) */
+ KUNIT_EXPECT_TRUE(test, list_empty_careful(&a_old));
+}
+
+static void list_test_list_swap(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list);
+
+ list_add_tail(&a, &list);
+ list_add_tail(&b, &list);
+
+ /* before: [list] -> a -> b */
+ list_swap(&a, &b);
+
+ /* after: [list] -> b -> a */
+ KUNIT_EXPECT_PTR_EQ(test, &b, list.next);
+ KUNIT_EXPECT_PTR_EQ(test, &a, list.prev);
+
+ KUNIT_EXPECT_PTR_EQ(test, &a, b.next);
+ KUNIT_EXPECT_PTR_EQ(test, &list, b.prev);
+
+ KUNIT_EXPECT_PTR_EQ(test, &list, a.next);
+ KUNIT_EXPECT_PTR_EQ(test, &b, a.prev);
+}
+
+static void list_test_list_del_init(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list);
+
+ list_add_tail(&a, &list);
+ list_add_tail(&b, &list);
+
+ /* before: [list] -> a -> b */
+ list_del_init(&a);
+ /* after: [list] -> b, a initialised */
+
+ KUNIT_EXPECT_PTR_EQ(test, list.next, &b);
+ KUNIT_EXPECT_PTR_EQ(test, b.prev, &list);
+ KUNIT_EXPECT_TRUE(test, list_empty_careful(&a));
+}
+
+static void list_test_list_move(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+
+ list_add_tail(&a, &list1);
+ list_add_tail(&b, &list2);
+
+ /* before: [list1] -> a, [list2] -> b */
+ list_move(&a, &list2);
+ /* after: [list1] empty, [list2] -> a -> b */
+
+ KUNIT_EXPECT_TRUE(test, list_empty(&list1));
+
+ KUNIT_EXPECT_PTR_EQ(test, &a, list2.next);
+ KUNIT_EXPECT_PTR_EQ(test, &b, a.next);
+}
+
+static void list_test_list_move_tail(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+
+ list_add_tail(&a, &list1);
+ list_add_tail(&b, &list2);
+
+ /* before: [list1] -> a, [list2] -> b */
+ list_move_tail(&a, &list2);
+ /* after: [list1] empty, [list2] -> b -> a */
+
+ KUNIT_EXPECT_TRUE(test, list_empty(&list1));
+
+ KUNIT_EXPECT_PTR_EQ(test, &b, list2.next);
+ KUNIT_EXPECT_PTR_EQ(test, &a, b.next);
+}
+
+static void list_test_list_bulk_move_tail(struct kunit *test)
+{
+ struct list_head a, b, c, d, x, y;
+ struct list_head *list1_values[] = { &x, &b, &c, &y };
+ struct list_head *list2_values[] = { &a, &d };
+ struct list_head *ptr;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+ int i = 0;
+
+ list_add_tail(&x, &list1);
+ list_add_tail(&y, &list1);
+
+ list_add_tail(&a, &list2);
+ list_add_tail(&b, &list2);
+ list_add_tail(&c, &list2);
+ list_add_tail(&d, &list2);
+
+ /* before: [list1] -> x -> y, [list2] -> a -> b -> c -> d */
+ list_bulk_move_tail(&y, &b, &c);
+ /* after: [list1] -> x -> b -> c -> y, [list2] -> a -> d */
+
+ list_for_each(ptr, &list1) {
+ KUNIT_EXPECT_PTR_EQ(test, ptr, list1_values[i]);
+ i++;
+ }
+ KUNIT_EXPECT_EQ(test, i, 4);
+ i = 0;
+ list_for_each(ptr, &list2) {
+ KUNIT_EXPECT_PTR_EQ(test, ptr, list2_values[i]);
+ i++;
+ }
+ KUNIT_EXPECT_EQ(test, i, 2);
+}
+
+static void list_test_list_is_first(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list);
+
+ list_add_tail(&a, &list);
+ list_add_tail(&b, &list);
+
+ KUNIT_EXPECT_TRUE(test, list_is_first(&a, &list));
+ KUNIT_EXPECT_FALSE(test, list_is_first(&b, &list));
+}
+
+static void list_test_list_is_last(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list);
+
+ list_add_tail(&a, &list);
+ list_add_tail(&b, &list);
+
+ KUNIT_EXPECT_FALSE(test, list_is_last(&a, &list));
+ KUNIT_EXPECT_TRUE(test, list_is_last(&b, &list));
+}
+
+static void list_test_list_empty(struct kunit *test)
+{
+ struct list_head a;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+
+ list_add_tail(&a, &list1);
+
+ KUNIT_EXPECT_FALSE(test, list_empty(&list1));
+ KUNIT_EXPECT_TRUE(test, list_empty(&list2));
+}
+
+static void list_test_list_empty_careful(struct kunit *test)
+{
+ /* This test doesn't check correctness under concurrent access */
+ struct list_head a;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+
+ list_add_tail(&a, &list1);
+
+ KUNIT_EXPECT_FALSE(test, list_empty_careful(&list1));
+ KUNIT_EXPECT_TRUE(test, list_empty_careful(&list2));
+}
+
+static void list_test_list_rotate_left(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list);
+
+ list_add_tail(&a, &list);
+ list_add_tail(&b, &list);
+
+ /* before: [list] -> a -> b */
+ list_rotate_left(&list);
+ /* after: [list] -> b -> a */
+
+ KUNIT_EXPECT_PTR_EQ(test, list.next, &b);
+ KUNIT_EXPECT_PTR_EQ(test, b.prev, &list);
+ KUNIT_EXPECT_PTR_EQ(test, b.next, &a);
+}
+
+static void list_test_list_rotate_to_front(struct kunit *test)
+{
+ struct list_head a, b, c, d;
+ struct list_head *list_values[] = { &c, &d, &a, &b };
+ struct list_head *ptr;
+ LIST_HEAD(list);
+ int i = 0;
+
+ list_add_tail(&a, &list);
+ list_add_tail(&b, &list);
+ list_add_tail(&c, &list);
+ list_add_tail(&d, &list);
+
+ /* before: [list] -> a -> b -> c -> d */
+ list_rotate_to_front(&c, &list);
+ /* after: [list] -> c -> d -> a -> b */
+
+ list_for_each(ptr, &list) {
+ KUNIT_EXPECT_PTR_EQ(test, ptr, list_values[i]);
+ i++;
+ }
+ KUNIT_EXPECT_EQ(test, i, 4);
+}
+
+static void list_test_list_is_singular(struct kunit *test)
+{
+ struct list_head a, b;
+ LIST_HEAD(list);
+
+ /* [list] empty */
+ KUNIT_EXPECT_FALSE(test, list_is_singular(&list));
+
+ list_add_tail(&a, &list);
+
+ /* [list] -> a */
+ KUNIT_EXPECT_TRUE(test, list_is_singular(&list));
+
+ list_add_tail(&b, &list);
+
+ /* [list] -> a -> b */
+ KUNIT_EXPECT_FALSE(test, list_is_singular(&list));
+}
+
+static void list_test_list_cut_position(struct kunit *test)
+{
+ struct list_head entries[3], *cur;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+ int i = 0;
+
+ list_add_tail(&entries[0], &list1);
+ list_add_tail(&entries[1], &list1);
+ list_add_tail(&entries[2], &list1);
+
+ /* before: [list1] -> entries[0] -> entries[1] -> entries[2] */
+ list_cut_position(&list2, &list1, &entries[1]);
+ /* after: [list2] -> entries[0] -> entries[1], [list1] -> entries[2] */
+
+ list_for_each(cur, &list2) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ i++;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, 2);
+
+ list_for_each(cur, &list1) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ i++;
+ }
+}
+
+static void list_test_list_cut_before(struct kunit *test)
+{
+ struct list_head entries[3], *cur;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+ int i = 0;
+
+ list_add_tail(&entries[0], &list1);
+ list_add_tail(&entries[1], &list1);
+ list_add_tail(&entries[2], &list1);
+
+ /* before: [list1] -> entries[0] -> entries[1] -> entries[2] */
+ list_cut_before(&list2, &list1, &entries[1]);
+ /* after: [list2] -> entries[0], [list1] -> entries[1] -> entries[2] */
+
+ list_for_each(cur, &list2) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ i++;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, 1);
+
+ list_for_each(cur, &list1) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ i++;
+ }
+}
+
+static void list_test_list_splice(struct kunit *test)
+{
+ struct list_head entries[5], *cur;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+ int i = 0;
+
+ list_add_tail(&entries[0], &list1);
+ list_add_tail(&entries[1], &list1);
+ list_add_tail(&entries[2], &list2);
+ list_add_tail(&entries[3], &list2);
+ list_add_tail(&entries[4], &list1);
+
+ /* before: [list1]->e[0]->e[1]->e[4], [list2]->e[2]->e[3] */
+ list_splice(&list2, &entries[1]);
+ /* after: [list1]->e[0]->e[1]->e[2]->e[3]->e[4], [list2] uninit */
+
+ list_for_each(cur, &list1) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ i++;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, 5);
+}
+
+static void list_test_list_splice_tail(struct kunit *test)
+{
+ struct list_head entries[5], *cur;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+ int i = 0;
+
+ list_add_tail(&entries[0], &list1);
+ list_add_tail(&entries[1], &list1);
+ list_add_tail(&entries[2], &list2);
+ list_add_tail(&entries[3], &list2);
+ list_add_tail(&entries[4], &list1);
+
+ /* before: [list1]->e[0]->e[1]->e[4], [list2]->e[2]->e[3] */
+ list_splice_tail(&list2, &entries[4]);
+ /* after: [list1]->e[0]->e[1]->e[2]->e[3]->e[4], [list2] uninit */
+
+ list_for_each(cur, &list1) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ i++;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, 5);
+}
+
+static void list_test_list_splice_init(struct kunit *test)
+{
+ struct list_head entries[5], *cur;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+ int i = 0;
+
+ list_add_tail(&entries[0], &list1);
+ list_add_tail(&entries[1], &list1);
+ list_add_tail(&entries[2], &list2);
+ list_add_tail(&entries[3], &list2);
+ list_add_tail(&entries[4], &list1);
+
+ /* before: [list1]->e[0]->e[1]->e[4], [list2]->e[2]->e[3] */
+ list_splice_init(&list2, &entries[1]);
+ /* after: [list1]->e[0]->e[1]->e[2]->e[3]->e[4], [list2] empty */
+
+ list_for_each(cur, &list1) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ i++;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, 5);
+
+ KUNIT_EXPECT_TRUE(test, list_empty_careful(&list2));
+}
+
+static void list_test_list_splice_tail_init(struct kunit *test)
+{
+ struct list_head entries[5], *cur;
+ LIST_HEAD(list1);
+ LIST_HEAD(list2);
+ int i = 0;
+
+ list_add_tail(&entries[0], &list1);
+ list_add_tail(&entries[1], &list1);
+ list_add_tail(&entries[2], &list2);
+ list_add_tail(&entries[3], &list2);
+ list_add_tail(&entries[4], &list1);
+
+ /* before: [list1]->e[0]->e[1]->e[4], [list2]->e[2]->e[3] */
+ list_splice_tail_init(&list2, &entries[4]);
+ /* after: [list1]->e[0]->e[1]->e[2]->e[3]->e[4], [list2] empty */
+
+ list_for_each(cur, &list1) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ i++;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, 5);
+
+ KUNIT_EXPECT_TRUE(test, list_empty_careful(&list2));
+}
+
+static void list_test_list_entry(struct kunit *test)
+{
+ struct list_test_struct test_struct;
+
+ KUNIT_EXPECT_PTR_EQ(test, &test_struct, list_entry(&(test_struct.list),
+ struct list_test_struct, list));
+}
+
+static void list_test_list_first_entry(struct kunit *test)
+{
+ struct list_test_struct test_struct1, test_struct2;
+ LIST_HEAD(list);
+
+ list_add_tail(&test_struct1.list, &list);
+ list_add_tail(&test_struct2.list, &list);
+
+
+ KUNIT_EXPECT_PTR_EQ(test, &test_struct1, list_first_entry(&list,
+ struct list_test_struct, list));
+}
+
+static void list_test_list_last_entry(struct kunit *test)
+{
+ struct list_test_struct test_struct1, test_struct2;
+ LIST_HEAD(list);
+
+ list_add_tail(&test_struct1.list, &list);
+ list_add_tail(&test_struct2.list, &list);
+
+
+ KUNIT_EXPECT_PTR_EQ(test, &test_struct2, list_last_entry(&list,
+ struct list_test_struct, list));
+}
+
+static void list_test_list_first_entry_or_null(struct kunit *test)
+{
+ struct list_test_struct test_struct1, test_struct2;
+ LIST_HEAD(list);
+
+ KUNIT_EXPECT_FALSE(test, list_first_entry_or_null(&list,
+ struct list_test_struct, list));
+
+ list_add_tail(&test_struct1.list, &list);
+ list_add_tail(&test_struct2.list, &list);
+
+ KUNIT_EXPECT_PTR_EQ(test, &test_struct1,
+ list_first_entry_or_null(&list,
+ struct list_test_struct, list));
+}
+
+static void list_test_list_next_entry(struct kunit *test)
+{
+ struct list_test_struct test_struct1, test_struct2;
+ LIST_HEAD(list);
+
+ list_add_tail(&test_struct1.list, &list);
+ list_add_tail(&test_struct2.list, &list);
+
+
+ KUNIT_EXPECT_PTR_EQ(test, &test_struct2, list_next_entry(&test_struct1,
+ list));
+}
+
+static void list_test_list_prev_entry(struct kunit *test)
+{
+ struct list_test_struct test_struct1, test_struct2;
+ LIST_HEAD(list);
+
+ list_add_tail(&test_struct1.list, &list);
+ list_add_tail(&test_struct2.list, &list);
+
+
+ KUNIT_EXPECT_PTR_EQ(test, &test_struct1, list_prev_entry(&test_struct2,
+ list));
+}
+
+static void list_test_list_for_each(struct kunit *test)
+{
+ struct list_head entries[3], *cur;
+ LIST_HEAD(list);
+ int i = 0;
+
+ list_add_tail(&entries[0], &list);
+ list_add_tail(&entries[1], &list);
+ list_add_tail(&entries[2], &list);
+
+ list_for_each(cur, &list) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ i++;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, 3);
+}
+
+static void list_test_list_for_each_prev(struct kunit *test)
+{
+ struct list_head entries[3], *cur;
+ LIST_HEAD(list);
+ int i = 2;
+
+ list_add_tail(&entries[0], &list);
+ list_add_tail(&entries[1], &list);
+ list_add_tail(&entries[2], &list);
+
+ list_for_each_prev(cur, &list) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ i--;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, -1);
+}
+
+static void list_test_list_for_each_safe(struct kunit *test)
+{
+ struct list_head entries[3], *cur, *n;
+ LIST_HEAD(list);
+ int i = 0;
+
+
+ list_add_tail(&entries[0], &list);
+ list_add_tail(&entries[1], &list);
+ list_add_tail(&entries[2], &list);
+
+ list_for_each_safe(cur, n, &list) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ list_del(&entries[i]);
+ i++;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, 3);
+ KUNIT_EXPECT_TRUE(test, list_empty(&list));
+}
+
+static void list_test_list_for_each_prev_safe(struct kunit *test)
+{
+ struct list_head entries[3], *cur, *n;
+ LIST_HEAD(list);
+ int i = 2;
+
+ list_add_tail(&entries[0], &list);
+ list_add_tail(&entries[1], &list);
+ list_add_tail(&entries[2], &list);
+
+ list_for_each_prev_safe(cur, n, &list) {
+ KUNIT_EXPECT_PTR_EQ(test, cur, &entries[i]);
+ list_del(&entries[i]);
+ i--;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, -1);
+ KUNIT_EXPECT_TRUE(test, list_empty(&list));
+}
+
+static void list_test_list_for_each_entry(struct kunit *test)
+{
+ struct list_test_struct entries[5], *cur;
+ static LIST_HEAD(list);
+ int i = 0;
+
+ for (i = 0; i < 5; ++i) {
+ entries[i].data = i;
+ list_add_tail(&entries[i].list, &list);
+ }
+
+ i = 0;
+
+ list_for_each_entry(cur, &list, list) {
+ KUNIT_EXPECT_EQ(test, cur->data, i);
+ i++;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, 5);
+}
+
+static void list_test_list_for_each_entry_reverse(struct kunit *test)
+{
+ struct list_test_struct entries[5], *cur;
+ static LIST_HEAD(list);
+ int i = 0;
+
+ for (i = 0; i < 5; ++i) {
+ entries[i].data = i;
+ list_add_tail(&entries[i].list, &list);
+ }
+
+ i = 4;
+
+ list_for_each_entry_reverse(cur, &list, list) {
+ KUNIT_EXPECT_EQ(test, cur->data, i);
+ i--;
+ }
+
+ KUNIT_EXPECT_EQ(test, i, -1);
+}
+
+static struct kunit_case list_test_cases[] = {
+ KUNIT_CASE(list_test_list_init),
+ KUNIT_CASE(list_test_list_add),
+ KUNIT_CASE(list_test_list_add_tail),
+ KUNIT_CASE(list_test_list_del),
+ KUNIT_CASE(list_test_list_replace),
+ KUNIT_CASE(list_test_list_replace_init),
+ KUNIT_CASE(list_test_list_swap),
+ KUNIT_CASE(list_test_list_del_init),
+ KUNIT_CASE(list_test_list_move),
+ KUNIT_CASE(list_test_list_move_tail),
+ KUNIT_CASE(list_test_list_bulk_move_tail),
+ KUNIT_CASE(list_test_list_is_first),
+ KUNIT_CASE(list_test_list_is_last),
+ KUNIT_CASE(list_test_list_empty),
+ KUNIT_CASE(list_test_list_empty_careful),
+ KUNIT_CASE(list_test_list_rotate_left),
+ KUNIT_CASE(list_test_list_rotate_to_front),
+ KUNIT_CASE(list_test_list_is_singular),
+ KUNIT_CASE(list_test_list_cut_position),
+ KUNIT_CASE(list_test_list_cut_before),
+ KUNIT_CASE(list_test_list_splice),
+ KUNIT_CASE(list_test_list_splice_tail),
+ KUNIT_CASE(list_test_list_splice_init),
+ KUNIT_CASE(list_test_list_splice_tail_init),
+ KUNIT_CASE(list_test_list_entry),
+ KUNIT_CASE(list_test_list_first_entry),
+ KUNIT_CASE(list_test_list_last_entry),
+ KUNIT_CASE(list_test_list_first_entry_or_null),
+ KUNIT_CASE(list_test_list_next_entry),
+ KUNIT_CASE(list_test_list_prev_entry),
+ KUNIT_CASE(list_test_list_for_each),
+ KUNIT_CASE(list_test_list_for_each_prev),
+ KUNIT_CASE(list_test_list_for_each_safe),
+ KUNIT_CASE(list_test_list_for_each_prev_safe),
+ KUNIT_CASE(list_test_list_for_each_entry),
+ KUNIT_CASE(list_test_list_for_each_entry_reverse),
+ {},
+};
+
+static struct kunit_suite list_test_module = {
+ .name = "list-kunit-test",
+ .test_cases = list_test_cases,
+};
+
+kunit_test_suite(list_test_module);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 18c1dfbb1765..c8fa1d274530 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1529,7 +1529,7 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
offset = radix_tree_find_next_bit(node, IDR_FREE,
offset + 1);
start = next_index(start, node, offset);
- if (start > max)
+ if (start > max || start == 0)
return ERR_PTR(-ENOSPC);
while (offset == RADIX_TREE_MAP_SIZE) {
offset = node->offset + 1;
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 969e5400a615..33feec8989f1 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -236,23 +236,6 @@ bool sbitmap_any_bit_set(const struct sbitmap *sb)
}
EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
-bool sbitmap_any_bit_clear(const struct sbitmap *sb)
-{
- unsigned int i;
-
- for (i = 0; i < sb->map_nr; i++) {
- const struct sbitmap_word *word = &sb->map[i];
- unsigned long mask = word->word & ~word->cleared;
- unsigned long ret;
-
- ret = find_first_zero_bit(&mask, word->depth);
- if (ret < word->depth)
- return true;
- }
- return false;
-}
-EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
-
static unsigned int __sbitmap_weight(const struct sbitmap *sb, bool set)
{
unsigned int i, weight = 0;
diff --git a/lib/string.c b/lib/string.c
index cd7a10c19210..08ec58cc673b 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -748,27 +748,6 @@ void *memset(void *s, int c, size_t count)
EXPORT_SYMBOL(memset);
#endif
-/**
- * memzero_explicit - Fill a region of memory (e.g. sensitive
- * keying data) with 0s.
- * @s: Pointer to the start of the area.
- * @count: The size of the area.
- *
- * Note: usually using memset() is just fine (!), but in cases
- * where clearing out _local_ data at the end of a scope is
- * necessary, memzero_explicit() should be used instead in
- * order to prevent the compiler from optimising away zeroing.
- *
- * memzero_explicit() doesn't need an arch-specific version as
- * it just invokes the one of memset() implicitly.
- */
-void memzero_explicit(void *s, size_t count)
-{
- memset(s, 0, count);
- barrier_data(s);
-}
-EXPORT_SYMBOL(memzero_explicit);
-
#ifndef __HAVE_ARCH_MEMSET16
/**
* memset16() - Fill a memory area with a uint16_t
diff --git a/lib/test_meminit.c b/lib/test_meminit.c
index 9729f271d150..9742e5cb853a 100644
--- a/lib/test_meminit.c
+++ b/lib/test_meminit.c
@@ -297,6 +297,32 @@ out:
return 1;
}
+static int __init do_kmem_cache_size_bulk(int size, int *total_failures)
+{
+ struct kmem_cache *c;
+ int i, iter, maxiter = 1024;
+ int num, bytes;
+ bool fail = false;
+ void *objects[10];
+
+ c = kmem_cache_create("test_cache", size, size, 0, NULL);
+ for (iter = 0; (iter < maxiter) && !fail; iter++) {
+ num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects),
+ objects);
+ for (i = 0; i < num; i++) {
+ bytes = count_nonzero_bytes(objects[i], size);
+ if (bytes)
+ fail = true;
+ fill_with_garbage(objects[i], size);
+ }
+
+ if (num)
+ kmem_cache_free_bulk(c, num, objects);
+ }
+ *total_failures += fail;
+ return 1;
+}
+
/*
* Test kmem_cache allocation by creating caches of different sizes, with and
* without constructors, with and without SLAB_TYPESAFE_BY_RCU.
@@ -318,6 +344,7 @@ static int __init test_kmemcache(int *total_failures)
num_tests += do_kmem_cache_size(size, ctor, rcu, zero,
&failures);
}
+ num_tests += do_kmem_cache_size_bulk(size, &failures);
}
REPORT_FAILURES_IN_FN();
*total_failures += failures;
diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c
index e365ace06538..5ff04d8fe971 100644
--- a/lib/test_user_copy.c
+++ b/lib/test_user_copy.c
@@ -47,18 +47,35 @@ static bool is_zeroed(void *from, size_t size)
static int test_check_nonzero_user(char *kmem, char __user *umem, size_t size)
{
int ret = 0;
- size_t start, end, i;
- size_t zero_start = size / 4;
- size_t zero_end = size - zero_start;
+ size_t start, end, i, zero_start, zero_end;
+
+ if (test(size < 2 * PAGE_SIZE, "buffer too small"))
+ return -EINVAL;
+
+ /*
+ * We want to cross a page boundary to exercise the code more
+ * effectively. We also don't want to make the size we scan too large,
+ * otherwise the test can take a long time and cause soft lockups. So
+ * scan a 1024 byte region across the page boundary.
+ */
+ size = 1024;
+ start = PAGE_SIZE - (size / 2);
+
+ kmem += start;
+ umem += start;
+
+ zero_start = size / 4;
+ zero_end = size - zero_start;
/*
- * We conduct a series of check_nonzero_user() tests on a block of memory
- * with the following byte-pattern (trying every possible [start,end]
- * pair):
+ * We conduct a series of check_nonzero_user() tests on a block of
+ * memory with the following byte-pattern (trying every possible
+ * [start,end] pair):
*
* [ 00 ff 00 ff ... 00 00 00 00 ... ff 00 ff 00 ]
*
- * And we verify that check_nonzero_user() acts identically to memchr_inv().
+ * And we verify that check_nonzero_user() acts identically to
+ * memchr_inv().
*/
memset(kmem, 0x0, size);
@@ -93,11 +110,13 @@ static int test_copy_struct_from_user(char *kmem, char __user *umem,
size_t ksize, usize;
umem_src = kmalloc(size, GFP_KERNEL);
- if ((ret |= test(umem_src == NULL, "kmalloc failed")))
+ ret = test(umem_src == NULL, "kmalloc failed");
+ if (ret)
goto out_free;
expected = kmalloc(size, GFP_KERNEL);
- if ((ret |= test(expected == NULL, "kmalloc failed")))
+ ret = test(expected == NULL, "kmalloc failed");
+ if (ret)
goto out_free;
/* Fill umem with a fixed byte pattern. */
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 9d631a7b6a70..7df4f7f395bf 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1110,6 +1110,28 @@ static noinline void check_find_entry(struct xarray *xa)
XA_BUG_ON(xa, !xa_empty(xa));
}
+static noinline void check_move_tiny(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+ rcu_read_lock();
+ XA_BUG_ON(xa, xas_next(&xas) != NULL);
+ XA_BUG_ON(xa, xas_next(&xas) != NULL);
+ rcu_read_unlock();
+ xa_store_index(xa, 0, GFP_KERNEL);
+ rcu_read_lock();
+ xas_set(&xas, 0);
+ XA_BUG_ON(xa, xas_next(&xas) != xa_mk_index(0));
+ XA_BUG_ON(xa, xas_next(&xas) != NULL);
+ xas_set(&xas, 0);
+ XA_BUG_ON(xa, xas_prev(&xas) != xa_mk_index(0));
+ XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+ rcu_read_unlock();
+ xa_erase_index(xa, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
static noinline void check_move_small(struct xarray *xa, unsigned long idx)
{
XA_STATE(xas, xa, 0);
@@ -1217,6 +1239,8 @@ static noinline void check_move(struct xarray *xa)
xa_destroy(xa);
+ check_move_tiny(xa);
+
for (i = 0; i < 16; i++)
check_move_small(xa, 1UL << i);
diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig
index cc00364bd2c2..9fe698ff62ec 100644
--- a/lib/vdso/Kconfig
+++ b/lib/vdso/Kconfig
@@ -24,13 +24,4 @@ config GENERIC_COMPAT_VDSO
help
This config option enables the compat VDSO layer.
-config CROSS_COMPILE_COMPAT_VDSO
- string "32 bit Toolchain prefix for compat vDSO"
- default ""
- depends on GENERIC_COMPAT_VDSO
- help
- Defines the cross-compiler prefix for compiling compat vDSO.
- If a 64 bit compiler (i.e. x86_64) can compile the VDSO for
- 32 bit, it does not need to define this parameter.
-
endif
diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c
index e630e7ff57f1..45f57fd2db64 100644
--- a/lib/vdso/gettimeofday.c
+++ b/lib/vdso/gettimeofday.c
@@ -214,9 +214,10 @@ int __cvdso_clock_getres_common(clockid_t clock, struct __kernel_timespec *res)
return -1;
}
- res->tv_sec = 0;
- res->tv_nsec = ns;
-
+ if (likely(res)) {
+ res->tv_sec = 0;
+ res->tv_nsec = ns;
+ }
return 0;
}
@@ -245,7 +246,7 @@ __cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
ret = clock_getres_fallback(clock, &ts);
#endif
- if (likely(!ret)) {
+ if (likely(!ret && res)) {
res->tv_sec = ts.tv_sec;
res->tv_nsec = ts.tv_nsec;
}
diff --git a/lib/xarray.c b/lib/xarray.c
index 446b956c9188..1237c213f52b 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -994,6 +994,8 @@ void *__xas_prev(struct xa_state *xas)
if (!xas_frozen(xas->xa_node))
xas->xa_index--;
+ if (!xas->xa_node)
+ return set_bounds(xas);
if (xas_not_node(xas->xa_node))
return xas_load(xas);
@@ -1031,6 +1033,8 @@ void *__xas_next(struct xa_state *xas)
if (!xas_frozen(xas->xa_node))
xas->xa_index++;
+ if (!xas->xa_node)
+ return set_bounds(xas);
if (xas_not_node(xas->xa_node))
return xas_load(xas);
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
index 08c3c8049998..156f26fdc4c9 100644
--- a/lib/xz/xz_dec_lzma2.c
+++ b/lib/xz/xz_dec_lzma2.c
@@ -1146,6 +1146,7 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props)
if (DEC_IS_DYNALLOC(s->dict.mode)) {
if (s->dict.allocated < s->dict.size) {
+ s->dict.allocated = s->dict.size;
vfree(s->dict.buf);
s->dict.buf = vmalloc(s->dict.size);
if (s->dict.buf == NULL) {
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d9daa3e422d0..c360f6a6c844 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -239,8 +239,8 @@ static int __init default_bdi_init(void)
{
int err;
- bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE |
- WQ_UNBOUND | WQ_SYSFS, 0);
+ bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND |
+ WQ_SYSFS, 0);
if (!bdi_wq)
return -ENOMEM;
diff --git a/mm/compaction.c b/mm/compaction.c
index ce08b39d85d4..672d3c78c6ab 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -270,14 +270,15 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
/* Ensure the start of the pageblock or zone is online and valid */
block_pfn = pageblock_start_pfn(pfn);
- block_page = pfn_to_online_page(max(block_pfn, zone->zone_start_pfn));
+ block_pfn = max(block_pfn, zone->zone_start_pfn);
+ block_page = pfn_to_online_page(block_pfn);
if (block_page) {
page = block_page;
pfn = block_pfn;
}
/* Ensure the end of the pageblock or zone is online and valid */
- block_pfn += pageblock_nr_pages;
+ block_pfn = pageblock_end_pfn(pfn) - 1;
block_pfn = min(block_pfn, zone_end_pfn(zone) - 1);
end_page = pfn_to_online_page(block_pfn);
if (!end_page)
@@ -303,7 +304,7 @@ __reset_isolation_pfn(struct zone *zone, unsigned long pfn, bool check_source,
page += (1 << PAGE_ALLOC_COSTLY_ORDER);
pfn += (1 << PAGE_ALLOC_COSTLY_ORDER);
- } while (page < end_page);
+ } while (page <= end_page);
return false;
}
diff --git a/mm/debug.c b/mm/debug.c
index 8345bb6e4769..0461df1207cb 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -67,28 +67,31 @@ void __dump_page(struct page *page, const char *reason)
*/
mapcount = PageSlab(page) ? 0 : page_mapcount(page);
- pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx",
- page, page_ref_count(page), mapcount,
- page->mapping, page_to_pgoff(page));
if (PageCompound(page))
- pr_cont(" compound_mapcount: %d", compound_mapcount(page));
- pr_cont("\n");
- if (PageAnon(page))
- pr_warn("anon ");
- else if (PageKsm(page))
- pr_warn("ksm ");
+ pr_warn("page:%px refcount:%d mapcount:%d mapping:%px "
+ "index:%#lx compound_mapcount: %d\n",
+ page, page_ref_count(page), mapcount,
+ page->mapping, page_to_pgoff(page),
+ compound_mapcount(page));
+ else
+ pr_warn("page:%px refcount:%d mapcount:%d mapping:%px index:%#lx\n",
+ page, page_ref_count(page), mapcount,
+ page->mapping, page_to_pgoff(page));
+ if (PageKsm(page))
+ pr_warn("ksm flags: %#lx(%pGp)\n", page->flags, &page->flags);
+ else if (PageAnon(page))
+ pr_warn("anon flags: %#lx(%pGp)\n", page->flags, &page->flags);
else if (mapping) {
- pr_warn("%ps ", mapping->a_ops);
if (mapping->host && mapping->host->i_dentry.first) {
struct dentry *dentry;
dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias);
- pr_warn("name:\"%pd\" ", dentry);
- }
+ pr_warn("%ps name:\"%pd\"\n", mapping->a_ops, dentry);
+ } else
+ pr_warn("%ps\n", mapping->a_ops);
+ pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags);
}
BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS + 1);
- pr_warn("flags: %#lx(%pGp)\n", page->flags, &page->flags);
-
hex_only:
print_hex_dump(KERN_WARNING, "raw: ", DUMP_PREFIX_NONE, 32,
sizeof(unsigned long), page,
diff --git a/mm/filemap.c b/mm/filemap.c
index 1146fcfa3215..85b7d087eb45 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -40,6 +40,7 @@
#include <linux/rmap.h>
#include <linux/delayacct.h>
#include <linux/psi.h>
+#include <linux/ramfs.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
diff --git a/mm/gup.c b/mm/gup.c
index 23a9f9c9d377..8f236a335ae9 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1973,7 +1973,8 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
}
static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
+ unsigned long end, unsigned int flags,
+ struct page **pages, int *nr)
{
unsigned long pte_end;
struct page *head, *page;
@@ -1986,7 +1987,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
pte = READ_ONCE(*ptep);
- if (!pte_access_permitted(pte, write))
+ if (!pte_access_permitted(pte, flags & FOLL_WRITE))
return 0;
/* hugepages are never "special" */
@@ -2023,7 +2024,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
}
static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
- unsigned int pdshift, unsigned long end, int write,
+ unsigned int pdshift, unsigned long end, unsigned int flags,
struct page **pages, int *nr)
{
pte_t *ptep;
@@ -2033,7 +2034,7 @@ static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
ptep = hugepte_offset(hugepd, addr, pdshift);
do {
next = hugepte_addr_end(addr, end, sz);
- if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
+ if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
return 0;
} while (ptep++, addr = next, addr != end);
@@ -2041,7 +2042,7 @@ static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
}
#else
static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
- unsigned pdshift, unsigned long end, int write,
+ unsigned int pdshift, unsigned long end, unsigned int flags,
struct page **pages, int *nr)
{
return 0;
@@ -2049,7 +2050,8 @@ static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
#endif /* CONFIG_ARCH_HAS_HUGEPD */
static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
- unsigned long end, unsigned int flags, struct page **pages, int *nr)
+ unsigned long end, unsigned int flags,
+ struct page **pages, int *nr)
{
struct page *head, *page;
int refs;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c5cb6dcd6c69..13cc93785006 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2789,8 +2789,13 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
ds_queue->split_queue_len--;
list_del(page_deferred_list(head));
}
- if (mapping)
- __dec_node_page_state(page, NR_SHMEM_THPS);
+ if (mapping) {
+ if (PageSwapBacked(page))
+ __dec_node_page_state(page, NR_SHMEM_THPS);
+ else
+ __dec_node_page_state(page, NR_FILE_THPS);
+ }
+
spin_unlock(&ds_queue->split_queue_lock);
__split_huge_page(page, list, end, flags);
if (PageSwapCache(head)) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ef37c85423a5..b45a95363a84 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1084,11 +1084,10 @@ static bool pfn_range_valid_gigantic(struct zone *z,
struct page *page;
for (i = start_pfn; i < end_pfn; i++) {
- if (!pfn_valid(i))
+ page = pfn_to_online_page(i);
+ if (!page)
return false;
- page = pfn_to_page(i);
-
if (page_zone(page) != z)
return false;
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index f1930fa0b445..2ac38bdc18a1 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -196,7 +196,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
again:
rcu_read_lock();
h_cg = hugetlb_cgroup_from_task(current);
- if (!css_tryget_online(&h_cg->css)) {
+ if (!css_tryget(&h_cg->css)) {
rcu_read_unlock();
goto again;
}
diff --git a/mm/init-mm.c b/mm/init-mm.c
index fb1e15028ef0..19603302a77f 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -5,6 +5,7 @@
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/cpumask.h>
+#include <linux/mman.h>
#include <linux/atomic.h>
#include <linux/user_namespace.h>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 0a1b4b484ac5..a8a57bebb5fa 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1028,12 +1028,13 @@ static void collapse_huge_page(struct mm_struct *mm,
anon_vma_lock_write(vma->anon_vma);
- pte = pte_offset_map(pmd, address);
- pte_ptl = pte_lockptr(mm, pmd);
-
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm,
address, address + HPAGE_PMD_SIZE);
mmu_notifier_invalidate_range_start(&range);
+
+ pte = pte_offset_map(pmd, address);
+ pte_ptl = pte_lockptr(mm, pmd);
+
pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
/*
* After this gup_fast can't run anymore. This also removes
@@ -1601,17 +1602,6 @@ static void collapse_file(struct mm_struct *mm,
result = SCAN_FAIL;
goto xa_unlocked;
}
- } else if (!PageUptodate(page)) {
- xas_unlock_irq(&xas);
- wait_on_page_locked(page);
- if (!trylock_page(page)) {
- result = SCAN_PAGE_LOCK;
- goto xa_unlocked;
- }
- get_page(page);
- } else if (PageDirty(page)) {
- result = SCAN_FAIL;
- goto xa_locked;
} else if (trylock_page(page)) {
get_page(page);
xas_unlock_irq(&xas);
@@ -1626,7 +1616,12 @@ static void collapse_file(struct mm_struct *mm,
* without racing with truncate.
*/
VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(!PageUptodate(page), page);
+
+ /* make sure the page is up to date */
+ if (unlikely(!PageUptodate(page))) {
+ result = SCAN_FAIL;
+ goto out_unlock;
+ }
/*
* If file was truncated then extended, or hole-punched, before
@@ -1642,6 +1637,16 @@ static void collapse_file(struct mm_struct *mm,
goto out_unlock;
}
+ if (!is_shmem && PageDirty(page)) {
+ /*
+ * khugepaged only works on read-only fd, so this
+ * page is dirty because it hasn't been flushed
+ * since first write.
+ */
+ result = SCAN_FAIL;
+ goto out_unlock;
+ }
+
if (isolate_lru_page(page)) {
result = SCAN_DEL_PAGE_LRU;
goto out_unlock;
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 03a8d84badad..244607663363 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -527,6 +527,16 @@ static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias)
}
/*
+ * Remove an object from the object_tree_root and object_list. Must be called
+ * with the kmemleak_lock held _if_ kmemleak is still enabled.
+ */
+static void __remove_object(struct kmemleak_object *object)
+{
+ rb_erase(&object->rb_node, &object_tree_root);
+ list_del_rcu(&object->object_list);
+}
+
+/*
* Look up an object in the object search tree and remove it from both
* object_tree_root and object_list. The returned object's use_count should be
* at least 1, as initially set by create_object().
@@ -538,10 +548,8 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
write_lock_irqsave(&kmemleak_lock, flags);
object = lookup_object(ptr, alias);
- if (object) {
- rb_erase(&object->rb_node, &object_tree_root);
- list_del_rcu(&object->object_list);
- }
+ if (object)
+ __remove_object(object);
write_unlock_irqrestore(&kmemleak_lock, flags);
return object;
@@ -1834,12 +1842,16 @@ static const struct file_operations kmemleak_fops = {
static void __kmemleak_do_cleanup(void)
{
- struct kmemleak_object *object;
+ struct kmemleak_object *object, *tmp;
- rcu_read_lock();
- list_for_each_entry_rcu(object, &object_list, object_list)
- delete_object_full(object->pointer);
- rcu_read_unlock();
+ /*
+ * Kmemleak has already been disabled, no need for RCU list traversal
+ * or kmemleak_lock held.
+ */
+ list_for_each_entry_safe(object, tmp, &object_list, object_list) {
+ __remove_object(object);
+ __delete_object(object);
+ }
}
/*
diff --git a/mm/ksm.c b/mm/ksm.c
index dbee2eb4dd05..7905934cd3ad 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -885,13 +885,13 @@ static int remove_stable_node(struct stable_node *stable_node)
return 0;
}
- if (WARN_ON_ONCE(page_mapped(page))) {
- /*
- * This should not happen: but if it does, just refuse to let
- * merge_across_nodes be switched - there is no need to panic.
- */
- err = -EBUSY;
- } else {
+ /*
+ * Page could be still mapped if this races with __mmput() running in
+ * between ksm_exit() and exit_mmap(). Just refuse to let
+ * merge_across_nodes/max_page_sharing be switched.
+ */
+ err = -EBUSY;
+ if (!page_mapped(page)) {
/*
* The stable node did not yet appear stale to get_ksm_page(),
* since that allows for an unmapped ksm page to be recognized
diff --git a/mm/madvise.c b/mm/madvise.c
index 2be9f3fdb05e..94c343b4c968 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -363,8 +363,12 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
ClearPageReferenced(page);
test_and_clear_page_young(page);
if (pageout) {
- if (!isolate_lru_page(page))
- list_add(&page->lru, &page_list);
+ if (!isolate_lru_page(page)) {
+ if (PageUnevictable(page))
+ putback_lru_page(page);
+ else
+ list_add(&page->lru, &page_list);
+ }
} else
deactivate_page(page);
huge_unlock:
@@ -441,8 +445,12 @@ regular_page:
ClearPageReferenced(page);
test_and_clear_page_young(page);
if (pageout) {
- if (!isolate_lru_page(page))
- list_add(&page->lru, &page_list);
+ if (!isolate_lru_page(page)) {
+ if (PageUnevictable(page))
+ putback_lru_page(page);
+ else
+ list_add(&page->lru, &page_list);
+ }
} else
deactivate_page(page);
}
diff --git a/mm/memblock.c b/mm/memblock.c
index 7d4f61ae666a..c4b16cae2bc9 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1356,9 +1356,6 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
align = SMP_CACHE_BYTES;
}
- if (end > memblock.current_limit)
- end = memblock.current_limit;
-
again:
found = memblock_find_in_range_node(size, align, start, end, nid,
flags);
@@ -1469,6 +1466,9 @@ static void * __init memblock_alloc_internal(
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, nid);
+ if (max_addr > memblock.current_limit)
+ max_addr = memblock.current_limit;
+
alloc = memblock_alloc_range_nid(size, align, min_addr, max_addr, nid);
/* retry allocation without lower limit */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c313c49074ca..46ad252e6d6a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -484,7 +484,7 @@ ino_t page_cgroup_ino(struct page *page)
unsigned long ino = 0;
rcu_read_lock();
- if (PageHead(page) && PageSlab(page))
+ if (PageSlab(page) && !PageTail(page))
memcg = memcg_from_slab_page(page);
else
memcg = READ_ONCE(page->mem_cgroup);
@@ -960,7 +960,7 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
if (unlikely(!memcg))
memcg = root_mem_cgroup;
}
- } while (!css_tryget_online(&memcg->css));
+ } while (!css_tryget(&memcg->css));
rcu_read_unlock();
return memcg;
}
@@ -1567,6 +1567,11 @@ unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg)
return max;
}
+unsigned long mem_cgroup_size(struct mem_cgroup *memcg)
+{
+ return page_counter_read(&memcg->memory);
+}
+
static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
int order)
{
@@ -2530,6 +2535,15 @@ retry:
}
/*
+ * Memcg doesn't have a dedicated reserve for atomic
+ * allocations. But like the global atomic pool, we need to
+ * put the burden of reclaim on regular allocation requests
+ * and let these go through as privileged allocations.
+ */
+ if (gfp_mask & __GFP_ATOMIC)
+ goto force;
+
+ /*
* Unlike in global OOM situations, memcg is not in a physical
* memory shortage. Allow dying and OOM-killed tasks to
* bypass the last charges so that they can exit quickly and
@@ -5009,12 +5023,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
{
int node;
- /*
- * Flush percpu vmstats and vmevents to guarantee the value correctness
- * on parent's and all ancestor levels.
- */
- memcg_flush_percpu_vmstats(memcg, false);
- memcg_flush_percpu_vmevents(memcg);
for_each_node(node)
free_mem_cgroup_per_node_info(memcg, node);
free_percpu(memcg->vmstats_percpu);
@@ -5025,6 +5033,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
static void mem_cgroup_free(struct mem_cgroup *memcg)
{
memcg_wb_domain_exit(memcg);
+ /*
+ * Flush percpu vmstats and vmevents to guarantee the value correctness
+ * on parent's and all ancestor levels.
+ */
+ memcg_flush_percpu_vmstats(memcg, false);
+ memcg_flush_percpu_vmevents(memcg);
__mem_cgroup_free(memcg);
}
@@ -5415,6 +5429,8 @@ static int mem_cgroup_move_account(struct page *page,
struct mem_cgroup *from,
struct mem_cgroup *to)
{
+ struct lruvec *from_vec, *to_vec;
+ struct pglist_data *pgdat;
unsigned long flags;
unsigned int nr_pages = compound ? hpage_nr_pages(page) : 1;
int ret;
@@ -5438,11 +5454,15 @@ static int mem_cgroup_move_account(struct page *page,
anon = PageAnon(page);
+ pgdat = page_pgdat(page);
+ from_vec = mem_cgroup_lruvec(pgdat, from);
+ to_vec = mem_cgroup_lruvec(pgdat, to);
+
spin_lock_irqsave(&from->move_lock, flags);
if (!anon && page_mapped(page)) {
- __mod_memcg_state(from, NR_FILE_MAPPED, -nr_pages);
- __mod_memcg_state(to, NR_FILE_MAPPED, nr_pages);
+ __mod_lruvec_state(from_vec, NR_FILE_MAPPED, -nr_pages);
+ __mod_lruvec_state(to_vec, NR_FILE_MAPPED, nr_pages);
}
/*
@@ -5454,14 +5474,14 @@ static int mem_cgroup_move_account(struct page *page,
struct address_space *mapping = page_mapping(page);
if (mapping_cap_account_dirty(mapping)) {
- __mod_memcg_state(from, NR_FILE_DIRTY, -nr_pages);
- __mod_memcg_state(to, NR_FILE_DIRTY, nr_pages);
+ __mod_lruvec_state(from_vec, NR_FILE_DIRTY, -nr_pages);
+ __mod_lruvec_state(to_vec, NR_FILE_DIRTY, nr_pages);
}
}
if (PageWriteback(page)) {
- __mod_memcg_state(from, NR_WRITEBACK, -nr_pages);
- __mod_memcg_state(to, NR_WRITEBACK, nr_pages);
+ __mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages);
+ __mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 7ef849da8278..3151c87dff73 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -199,7 +199,6 @@ struct to_kill {
struct task_struct *tsk;
unsigned long addr;
short size_shift;
- char addr_valid;
};
/*
@@ -324,22 +323,27 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
}
}
tk->addr = page_address_in_vma(p, vma);
- tk->addr_valid = 1;
if (is_zone_device_page(p))
tk->size_shift = dev_pagemap_mapping_shift(p, vma);
else
tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT;
/*
- * In theory we don't have to kill when the page was
- * munmaped. But it could be also a mremap. Since that's
- * likely very rare kill anyways just out of paranoia, but use
- * a SIGKILL because the error is not contained anymore.
+ * Send SIGKILL if "tk->addr == -EFAULT". Also, as
+ * "tk->size_shift" is always non-zero for !is_zone_device_page(),
+ * so "tk->size_shift == 0" effectively checks no mapping on
+ * ZONE_DEVICE. Indeed, when a devdax page is mmapped N times
+ * to a process' address space, it's possible not all N VMAs
+ * contain mappings for the page, but at least one VMA does.
+ * Only deliver SIGBUS with payload derived from the VMA that
+ * has a mapping for the page.
*/
- if (tk->addr == -EFAULT || tk->size_shift == 0) {
+ if (tk->addr == -EFAULT) {
pr_info("Memory failure: Unable to find user space address %lx in %s\n",
page_to_pfn(p), tsk->comm);
- tk->addr_valid = 0;
+ } else if (tk->size_shift == 0) {
+ kfree(tk);
+ return;
}
get_task_struct(tsk);
tk->tsk = tsk;
@@ -366,7 +370,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* make sure the process doesn't catch the
* signal and then access the memory. Just kill it.
*/
- if (fail || tk->addr_valid == 0) {
+ if (fail || tk->addr == -EFAULT) {
pr_err("Memory failure: %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
pfn, tk->tsk->comm, tk->tsk->pid);
do_send_sig_info(SIGKILL, SEND_SIG_PRIV,
@@ -1253,17 +1257,19 @@ int memory_failure(unsigned long pfn, int flags)
if (!sysctl_memory_failure_recovery)
panic("Memory failure on page %lx", pfn);
- if (!pfn_valid(pfn)) {
+ p = pfn_to_online_page(pfn);
+ if (!p) {
+ if (pfn_valid(pfn)) {
+ pgmap = get_dev_pagemap(pfn, NULL);
+ if (pgmap)
+ return memory_failure_dev_pagemap(pfn, flags,
+ pgmap);
+ }
pr_err("Memory failure: %#lx: memory outside kernel control\n",
pfn);
return -ENXIO;
}
- pgmap = get_dev_pagemap(pfn, NULL);
- if (pgmap)
- return memory_failure_dev_pagemap(pfn, flags, pgmap);
-
- p = pfn_to_page(pfn);
if (PageHuge(p))
return memory_failure_hugetlb(pfn, flags);
if (TestSetPageHWPoison(p)) {
diff --git a/mm/memory.c b/mm/memory.c
index b1ca51a079f2..b6a5d6a08438 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -118,6 +118,18 @@ int randomize_va_space __read_mostly =
2;
#endif
+#ifndef arch_faults_on_old_pte
+static inline bool arch_faults_on_old_pte(void)
+{
+ /*
+ * Those arches which don't have hw access flag feature need to
+ * implement their own helper. By default, "true" means pagefault
+ * will be hit on old pte.
+ */
+ return true;
+}
+#endif
+
static int __init disable_randmaps(char *s)
{
randomize_va_space = 0;
@@ -2145,32 +2157,82 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
return same;
}
-static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma)
+static inline bool cow_user_page(struct page *dst, struct page *src,
+ struct vm_fault *vmf)
{
+ bool ret;
+ void *kaddr;
+ void __user *uaddr;
+ bool force_mkyoung;
+ struct vm_area_struct *vma = vmf->vma;
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long addr = vmf->address;
+
debug_dma_assert_idle(src);
+ if (likely(src)) {
+ copy_user_highpage(dst, src, addr, vma);
+ return true;
+ }
+
/*
* If the source page was a PFN mapping, we don't have
* a "struct page" for it. We do a best-effort copy by
* just copying from the original user address. If that
* fails, we just zero-fill it. Live with it.
*/
- if (unlikely(!src)) {
- void *kaddr = kmap_atomic(dst);
- void __user *uaddr = (void __user *)(va & PAGE_MASK);
+ kaddr = kmap_atomic(dst);
+ uaddr = (void __user *)(addr & PAGE_MASK);
+
+ /*
+ * On architectures with software "accessed" bits, we would
+ * take a double page fault, so mark it accessed here.
+ */
+ force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte);
+ if (force_mkyoung) {
+ pte_t entry;
+
+ vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
+ if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
+ /*
+ * Other thread has already handled the fault
+ * and we don't need to do anything. If it's
+ * not the case, the fault will be triggered
+ * again on the same address.
+ */
+ ret = false;
+ goto pte_unlock;
+ }
+ entry = pte_mkyoung(vmf->orig_pte);
+ if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0))
+ update_mmu_cache(vma, addr, vmf->pte);
+ }
+
+ /*
+ * This really shouldn't fail, because the page is there
+ * in the page tables. But it might just be unreadable,
+ * in which case we just give up and fill the result with
+ * zeroes.
+ */
+ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) {
/*
- * This really shouldn't fail, because the page is there
- * in the page tables. But it might just be unreadable,
- * in which case we just give up and fill the result with
- * zeroes.
+ * Give a warn in case there can be some obscure
+ * use-case
*/
- if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
- clear_page(kaddr);
- kunmap_atomic(kaddr);
- flush_dcache_page(dst);
- } else
- copy_user_highpage(dst, src, va, vma);
+ WARN_ON_ONCE(1);
+ clear_page(kaddr);
+ }
+
+ ret = true;
+
+pte_unlock:
+ if (force_mkyoung)
+ pte_unmap_unlock(vmf->pte, vmf->ptl);
+ kunmap_atomic(kaddr);
+ flush_dcache_page(dst);
+
+ return ret;
}
static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma)
@@ -2327,7 +2389,19 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
vmf->address);
if (!new_page)
goto oom;
- cow_user_page(new_page, old_page, vmf->address, vma);
+
+ if (!cow_user_page(new_page, old_page, vmf)) {
+ /*
+ * COW failed, if the fault was solved by other,
+ * it's fine. If not, userspace would re-fault on
+ * the same address and we will handle the fault
+ * from the second attempt.
+ */
+ put_page(new_page);
+ if (old_page)
+ put_page(old_page);
+ return 0;
+ }
}
if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false))
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b1be791f772d..f307bd82d750 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -331,7 +331,7 @@ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
unsigned long end_pfn)
{
for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SUBSECTION) {
- if (unlikely(!pfn_valid(start_pfn)))
+ if (unlikely(!pfn_to_online_page(start_pfn)))
continue;
if (unlikely(pfn_to_nid(start_pfn) != nid))
@@ -356,7 +356,7 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
/* pfn is the end pfn of a memory section. */
pfn = end_pfn - 1;
for (; pfn >= start_pfn; pfn -= PAGES_PER_SUBSECTION) {
- if (unlikely(!pfn_valid(pfn)))
+ if (unlikely(!pfn_to_online_page(pfn)))
continue;
if (unlikely(pfn_to_nid(pfn) != nid))
@@ -415,7 +415,7 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
*/
pfn = zone_start_pfn;
for (; pfn < zone_end_pfn; pfn += PAGES_PER_SUBSECTION) {
- if (unlikely(!pfn_valid(pfn)))
+ if (unlikely(!pfn_to_online_page(pfn)))
continue;
if (page_zone(pfn_to_page(pfn)) != zone)
@@ -436,67 +436,33 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
zone_span_writeunlock(zone);
}
-static void shrink_pgdat_span(struct pglist_data *pgdat,
- unsigned long start_pfn, unsigned long end_pfn)
+static void update_pgdat_span(struct pglist_data *pgdat)
{
- unsigned long pgdat_start_pfn = pgdat->node_start_pfn;
- unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */
- unsigned long pgdat_end_pfn = p;
- unsigned long pfn;
- int nid = pgdat->node_id;
-
- if (pgdat_start_pfn == start_pfn) {
- /*
- * If the section is smallest section in the pgdat, it need
- * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages.
- * In this case, we find second smallest valid mem_section
- * for shrinking zone.
- */
- pfn = find_smallest_section_pfn(nid, NULL, end_pfn,
- pgdat_end_pfn);
- if (pfn) {
- pgdat->node_start_pfn = pfn;
- pgdat->node_spanned_pages = pgdat_end_pfn - pfn;
- }
- } else if (pgdat_end_pfn == end_pfn) {
- /*
- * If the section is biggest section in the pgdat, it need
- * shrink pgdat->node_spanned_pages.
- * In this case, we find second biggest valid mem_section for
- * shrinking zone.
- */
- pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn,
- start_pfn);
- if (pfn)
- pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1;
- }
+ unsigned long node_start_pfn = 0, node_end_pfn = 0;
+ struct zone *zone;
- /*
- * If the section is not biggest or smallest mem_section in the pgdat,
- * it only creates a hole in the pgdat. So in this case, we need not
- * change the pgdat.
- * But perhaps, the pgdat has only hole data. Thus it check the pgdat
- * has only hole or not.
- */
- pfn = pgdat_start_pfn;
- for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SUBSECTION) {
- if (unlikely(!pfn_valid(pfn)))
- continue;
+ for (zone = pgdat->node_zones;
+ zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
+ unsigned long zone_end_pfn = zone->zone_start_pfn +
+ zone->spanned_pages;
- if (pfn_to_nid(pfn) != nid)
+ /* No need to lock the zones, they can't change. */
+ if (!zone->spanned_pages)
continue;
-
- /* Skip range to be removed */
- if (pfn >= start_pfn && pfn < end_pfn)
+ if (!node_end_pfn) {
+ node_start_pfn = zone->zone_start_pfn;
+ node_end_pfn = zone_end_pfn;
continue;
+ }
- /* If we find valid section, we have nothing to do */
- return;
+ if (zone_end_pfn > node_end_pfn)
+ node_end_pfn = zone_end_pfn;
+ if (zone->zone_start_pfn < node_start_pfn)
+ node_start_pfn = zone->zone_start_pfn;
}
- /* The pgdat has no valid section */
- pgdat->node_start_pfn = 0;
- pgdat->node_spanned_pages = 0;
+ pgdat->node_start_pfn = node_start_pfn;
+ pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
}
static void __remove_zone(struct zone *zone, unsigned long start_pfn,
@@ -505,9 +471,19 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn,
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long flags;
+#ifdef CONFIG_ZONE_DEVICE
+ /*
+ * Zone shrinking code cannot properly deal with ZONE_DEVICE. So
+ * we will not try to shrink the zones - which is okay as
+ * set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
+ */
+ if (zone_idx(zone) == ZONE_DEVICE)
+ return;
+#endif
+
pgdat_resize_lock(zone->zone_pgdat, &flags);
shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
- shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages);
+ update_pgdat_span(pgdat);
pgdat_resize_unlock(zone->zone_pgdat, &flags);
}
@@ -1680,6 +1656,18 @@ static int check_cpu_on_node(pg_data_t *pgdat)
return 0;
}
+static int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg)
+{
+ int nid = *(int *)arg;
+
+ /*
+ * If a memory block belongs to multiple nodes, the stored nid is not
+ * reliable. However, such blocks are always online (e.g., cannot get
+ * offlined) and, therefore, are still spanned by the node.
+ */
+ return mem->nid == nid ? -EEXIST : 0;
+}
+
/**
* try_offline_node
* @nid: the node ID
@@ -1692,25 +1680,24 @@ static int check_cpu_on_node(pg_data_t *pgdat)
void try_offline_node(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
- unsigned long start_pfn = pgdat->node_start_pfn;
- unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
- unsigned long pfn;
-
- for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
- unsigned long section_nr = pfn_to_section_nr(pfn);
-
- if (!present_section_nr(section_nr))
- continue;
+ int rc;
- if (pfn_to_nid(pfn) != nid)
- continue;
+ /*
+ * If the node still spans pages (especially ZONE_DEVICE), don't
+ * offline it. A node spans memory after move_pfn_range_to_zone(),
+ * e.g., after the memory block was onlined.
+ */
+ if (pgdat->node_spanned_pages)
+ return;
- /*
- * some memory sections of this node are not removed, and we
- * can't offline node now.
- */
+ /*
+ * Especially offline memory blocks might not be spanned by the
+ * node. They will get spanned by the node once they get onlined.
+ * However, they link to the node in sysfs and can get onlined later.
+ */
+ rc = for_each_memory_block(&nid, check_no_memblock_for_node_cb);
+ if (rc)
return;
- }
if (check_cpu_on_node(pgdat))
return;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4ae967bcf954..e08c94170ae4 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -672,7 +672,9 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
* 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
* specified.
* 0 - queue pages successfully or no misplaced page.
- * -EIO - there is misplaced page and only MPOL_MF_STRICT was specified.
+ * errno - i.e. misplaced pages with MPOL_MF_STRICT specified (-EIO) or
+ * memory range specified by nodemask and maxnode points outside
+ * your accessible address space (-EFAULT)
*/
static int
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
@@ -1286,7 +1288,7 @@ static long do_mbind(unsigned long start, unsigned long len,
flags | MPOL_MF_INVERT, &pagelist);
if (ret < 0) {
- err = -EIO;
+ err = ret;
goto up_out;
}
@@ -1305,10 +1307,12 @@ static long do_mbind(unsigned long start, unsigned long len,
if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
err = -EIO;
- } else
- putback_movable_pages(&pagelist);
-
+ } else {
up_out:
+ if (!list_empty(&pagelist))
+ putback_movable_pages(&pagelist);
+ }
+
up_write(&mm->mmap_sem);
mpol_out:
mpol_put(new);
diff --git a/mm/memremap.c b/mm/memremap.c
index 32c79b51af86..03ccbdfeb697 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -13,8 +13,6 @@
#include <linux/xarray.h>
static DEFINE_XARRAY(pgmap_array);
-#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
-#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
#ifdef CONFIG_DEV_PAGEMAP_OPS
DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
@@ -105,6 +103,7 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
void memunmap_pages(struct dev_pagemap *pgmap)
{
struct resource *res = &pgmap->res;
+ struct page *first_page;
unsigned long pfn;
int nid;
@@ -113,14 +112,16 @@ void memunmap_pages(struct dev_pagemap *pgmap)
put_page(pfn_to_page(pfn));
dev_pagemap_cleanup(pgmap);
+ /* make sure to access a memmap that was actually initialized */
+ first_page = pfn_to_page(pfn_first(pgmap));
+
/* pages are dead and unused, undo the arch mapping */
- nid = page_to_nid(pfn_to_page(PHYS_PFN(res->start)));
+ nid = page_to_nid(first_page);
mem_hotplug_begin();
if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
- pfn = PHYS_PFN(res->start);
- __remove_pages(page_zone(pfn_to_page(pfn)), pfn,
- PHYS_PFN(resource_size(res)), NULL);
+ __remove_pages(page_zone(first_page), PHYS_PFN(res->start),
+ PHYS_PFN(resource_size(res)), NULL);
} else {
arch_remove_memory(nid, res->start, resource_size(res),
pgmap_altmap(pgmap));
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 7fde88695f35..9a889e456168 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -180,7 +180,7 @@ int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
mn->ops->invalidate_range_start, _ret,
!mmu_notifier_range_blockable(range) ? "non-" : "");
WARN_ON(mmu_notifier_range_blockable(range) ||
- ret != -EAGAIN);
+ _ret != -EAGAIN);
ret = _ret;
}
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 15c2050c629b..f391c0c4ed1d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1175,11 +1175,17 @@ static __always_inline bool free_pages_prepare(struct page *page,
debug_check_no_obj_freed(page_address(page),
PAGE_SIZE << order);
}
- arch_free_page(page, order);
if (want_init_on_free())
kernel_init_free_pages(page, 1 << order);
kernel_poison_pages(page, 1 << order, 0);
+ /*
+ * arch_free_page() can make the page's contents inaccessible. s390
+ * does this. So nothing which can access the page's contents should
+ * happen after this.
+ */
+ arch_free_page(page, order);
+
if (debug_pagealloc_enabled())
kernel_map_pages(page, 1 << order, 0);
@@ -1942,6 +1948,14 @@ void __init page_alloc_init_late(void)
wait_for_completion(&pgdat_init_all_done_comp);
/*
+ * The number of managed pages has changed due to the initialisation
+ * so the pcpu batch and high limits needs to be updated or the limits
+ * will be artificially small.
+ */
+ for_each_populated_zone(zone)
+ zone_pcp_update(zone);
+
+ /*
* We initialized the rest of the deferred pages. Permanently disable
* on-demand struct page initialization.
*/
@@ -3714,10 +3728,6 @@ try_this_zone:
static void warn_alloc_show_mem(gfp_t gfp_mask, nodemask_t *nodemask)
{
unsigned int filter = SHOW_MEM_FILTER_NODES;
- static DEFINE_RATELIMIT_STATE(show_mem_rs, HZ, 1);
-
- if (!__ratelimit(&show_mem_rs))
- return;
/*
* This documents exceptions given to allocations in certain
@@ -3738,8 +3748,7 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
- static DEFINE_RATELIMIT_STATE(nopage_rs, DEFAULT_RATELIMIT_INTERVAL,
- DEFAULT_RATELIMIT_BURST);
+ static DEFINE_RATELIMIT_STATE(nopage_rs, 10*HZ, 1);
if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
return;
@@ -4467,12 +4476,14 @@ retry_cpuset:
if (page)
goto got_pg;
- if (order >= pageblock_order && (gfp_mask & __GFP_IO)) {
+ if (order >= pageblock_order && (gfp_mask & __GFP_IO) &&
+ !(gfp_mask & __GFP_RETRY_MAYFAIL)) {
/*
* If allocating entire pageblock(s) and compaction
* failed because all zones are below low watermarks
* or is prohibited because it recently failed at this
- * order, fail immediately.
+ * order, fail immediately unless the allocator has
+ * requested compaction and reclaim retry.
*
* Reclaim is
* - potentially very expensive because zones are far
@@ -8506,7 +8517,6 @@ void free_contig_range(unsigned long pfn, unsigned int nr_pages)
WARN(count != 0, "%d pages are still in use!\n", count);
}
-#ifdef CONFIG_MEMORY_HOTPLUG
/*
* The zone indicated has a new number of managed_pages; batch sizes and percpu
* page high values need to be recalulated.
@@ -8520,7 +8530,6 @@ void __meminit zone_pcp_update(struct zone *zone)
per_cpu_ptr(zone->pageset, cpu));
mutex_unlock(&pcp_batch_high_lock);
}
-#endif
void zone_pcp_reset(struct zone *zone)
{
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 5f5769c7db3b..4ade843ff588 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -67,8 +67,9 @@ static struct page_ext_operations *page_ext_ops[] = {
#endif
};
+unsigned long page_ext_size = sizeof(struct page_ext);
+
static unsigned long total_usage;
-static unsigned long extra_mem;
static bool __init invoke_need_callbacks(void)
{
@@ -78,9 +79,8 @@ static bool __init invoke_need_callbacks(void)
for (i = 0; i < entries; i++) {
if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
- page_ext_ops[i]->offset = sizeof(struct page_ext) +
- extra_mem;
- extra_mem += page_ext_ops[i]->size;
+ page_ext_ops[i]->offset = page_ext_size;
+ page_ext_size += page_ext_ops[i]->size;
need = true;
}
}
@@ -99,14 +99,9 @@ static void __init invoke_init_callbacks(void)
}
}
-static unsigned long get_entry_size(void)
-{
- return sizeof(struct page_ext) + extra_mem;
-}
-
static inline struct page_ext *get_entry(void *base, unsigned long index)
{
- return base + get_entry_size() * index;
+ return base + page_ext_size * index;
}
#if !defined(CONFIG_SPARSEMEM)
@@ -156,7 +151,7 @@ static int __init alloc_node_page_ext(int nid)
!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
nr_pages += MAX_ORDER_NR_PAGES;
- table_size = get_entry_size() * nr_pages;
+ table_size = page_ext_size * nr_pages;
base = memblock_alloc_try_nid(
table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
@@ -234,7 +229,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
if (section->page_ext)
return 0;
- table_size = get_entry_size() * PAGES_PER_SECTION;
+ table_size = page_ext_size * PAGES_PER_SECTION;
base = alloc_page_ext(table_size, nid);
/*
@@ -254,7 +249,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
* we need to apply a mask.
*/
pfn &= PAGE_SECTION_MASK;
- section->page_ext = (void *)base - get_entry_size() * pfn;
+ section->page_ext = (void *)base - page_ext_size * pfn;
total_usage += table_size;
return 0;
}
@@ -267,7 +262,7 @@ static void free_page_ext(void *addr)
struct page *page = virt_to_page(addr);
size_t table_size;
- table_size = get_entry_size() * PAGES_PER_SECTION;
+ table_size = page_ext_size * PAGES_PER_SECTION;
BUG_ON(PageReserved(page));
kmemleak_free(addr);
diff --git a/mm/page_io.c b/mm/page_io.c
index 24ee600f9131..60a66a58b9bf 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -73,6 +73,7 @@ static void swap_slot_free_notify(struct page *page)
{
struct swap_info_struct *sis;
struct gendisk *disk;
+ swp_entry_t entry;
/*
* There is no guarantee that the page is in swap cache - the software
@@ -104,11 +105,10 @@ static void swap_slot_free_notify(struct page *page)
* we again wish to reclaim it.
*/
disk = sis->bdev->bd_disk;
- if (disk->fops->swap_slot_free_notify) {
- swp_entry_t entry;
+ entry.val = page_private(page);
+ if (disk->fops->swap_slot_free_notify && __swap_count(entry) == 1) {
unsigned long offset;
- entry.val = page_private(page);
offset = swp_offset(entry);
SetPageDirty(page);
diff --git a/mm/page_owner.c b/mm/page_owner.c
index dee931184788..18ecde9f45b2 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -24,12 +24,10 @@ struct page_owner {
short last_migrate_reason;
gfp_t gfp_mask;
depot_stack_handle_t handle;
-#ifdef CONFIG_DEBUG_PAGEALLOC
depot_stack_handle_t free_handle;
-#endif
};
-static bool page_owner_disabled = true;
+static bool page_owner_enabled = false;
DEFINE_STATIC_KEY_FALSE(page_owner_inited);
static depot_stack_handle_t dummy_handle;
@@ -44,7 +42,7 @@ static int __init early_page_owner_param(char *buf)
return -EINVAL;
if (strcmp(buf, "on") == 0)
- page_owner_disabled = false;
+ page_owner_enabled = true;
return 0;
}
@@ -52,10 +50,7 @@ early_param("page_owner", early_page_owner_param);
static bool need_page_owner(void)
{
- if (page_owner_disabled)
- return false;
-
- return true;
+ return page_owner_enabled;
}
static __always_inline depot_stack_handle_t create_dummy_stack(void)
@@ -84,7 +79,7 @@ static noinline void register_early_stack(void)
static void init_page_owner(void)
{
- if (page_owner_disabled)
+ if (!page_owner_enabled)
return;
register_dummy_stack();
@@ -148,25 +143,19 @@ void __reset_page_owner(struct page *page, unsigned int order)
{
int i;
struct page_ext *page_ext;
-#ifdef CONFIG_DEBUG_PAGEALLOC
depot_stack_handle_t handle = 0;
struct page_owner *page_owner;
- if (debug_pagealloc_enabled())
- handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
-#endif
+ handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
+ page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ return;
for (i = 0; i < (1 << order); i++) {
- page_ext = lookup_page_ext(page + i);
- if (unlikely(!page_ext))
- continue;
- __clear_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if (debug_pagealloc_enabled()) {
- page_owner = get_page_owner(page_ext);
- page_owner->free_handle = handle;
- }
-#endif
+ __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+ page_owner = get_page_owner(page_ext);
+ page_owner->free_handle = handle;
+ page_ext = page_ext_next(page_ext);
}
}
@@ -184,9 +173,9 @@ static inline void __set_page_owner_handle(struct page *page,
page_owner->gfp_mask = gfp_mask;
page_owner->last_migrate_reason = -1;
__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
- page_ext = lookup_page_ext(page + i);
+ page_ext = page_ext_next(page_ext);
}
}
@@ -224,12 +213,10 @@ void __split_page_owner(struct page *page, unsigned int order)
if (unlikely(!page_ext))
return;
- page_owner = get_page_owner(page_ext);
- page_owner->order = 0;
- for (i = 1; i < (1 << order); i++) {
- page_ext = lookup_page_ext(page + i);
+ for (i = 0; i < (1 << order); i++) {
page_owner = get_page_owner(page_ext);
page_owner->order = 0;
+ page_ext = page_ext_next(page_ext);
}
}
@@ -260,7 +247,7 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
* the new page, which will be freed.
*/
__set_bit(PAGE_EXT_OWNER, &new_ext->flags);
- __set_bit(PAGE_EXT_OWNER_ACTIVE, &new_ext->flags);
+ __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
}
void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -284,7 +271,8 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
* not matter as the mixed block count will still be correct
*/
for (; pfn < end_pfn; ) {
- if (!pfn_valid(pfn)) {
+ page = pfn_to_online_page(pfn);
+ if (!page) {
pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
continue;
}
@@ -292,13 +280,13 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
block_end_pfn = min(block_end_pfn, end_pfn);
- page = pfn_to_page(pfn);
pageblock_mt = get_pageblock_migratetype(page);
for (; pfn < block_end_pfn; pfn++) {
if (!pfn_valid_within(pfn))
continue;
+ /* The pageblock is online, no need to recheck. */
page = pfn_to_page(pfn);
if (page_zone(page) != zone)
@@ -320,7 +308,7 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
if (unlikely(!page_ext))
continue;
- if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue;
page_owner = get_page_owner(page_ext);
@@ -435,7 +423,7 @@ void __dump_page_owner(struct page *page)
return;
}
- if (test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ if (test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
pr_alert("page_owner tracks the page as allocated\n");
else
pr_alert("page_owner tracks the page as freed\n");
@@ -451,7 +439,6 @@ void __dump_page_owner(struct page *page)
stack_trace_print(entries, nr_entries, 0);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
handle = READ_ONCE(page_owner->free_handle);
if (!handle) {
pr_alert("page_owner free stack trace missing\n");
@@ -460,7 +447,6 @@ void __dump_page_owner(struct page *page)
pr_alert("page last free stack trace:\n");
stack_trace_print(entries, nr_entries, 0);
}
-#endif
if (page_owner->last_migrate_reason != -1)
pr_alert("page has been migrated, last migrate reason: %s\n",
@@ -527,7 +513,7 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
* Although we do have the info about past allocation of free
* pages, it's not relevant for current memory usage.
*/
- if (!test_bit(PAGE_EXT_OWNER_ACTIVE, &page_ext->flags))
+ if (!test_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags))
continue;
page_owner = get_page_owner(page_ext);
diff --git a/mm/rmap.c b/mm/rmap.c
index d9a23bb773bf..0c7b2a9400d4 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -61,6 +61,7 @@
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/hugetlb.h>
+#include <linux/huge_mm.h>
#include <linux/backing-dev.h>
#include <linux/page_idle.h>
#include <linux/memremap.h>
diff --git a/mm/shmem.c b/mm/shmem.c
index cd570cc79c76..220be9fa2c41 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3482,6 +3482,12 @@ static int shmem_parse_options(struct fs_context *fc, void *data)
{
char *options = data;
+ if (options) {
+ int err = security_sb_eat_lsm_opts(options, &fc->security);
+ if (err)
+ return err;
+ }
+
while (options != NULL) {
char *this_char = options;
for (;;) {
diff --git a/mm/shuffle.c b/mm/shuffle.c
index 3ce12481b1dc..b3fe97fd6654 100644
--- a/mm/shuffle.c
+++ b/mm/shuffle.c
@@ -33,7 +33,7 @@ __meminit void page_alloc_shuffle(enum mm_shuffle_ctl ctl)
}
static bool shuffle_param;
-extern int shuffle_show(char *buffer, const struct kernel_param *kp)
+static int shuffle_show(char *buffer, const struct kernel_param *kp)
{
return sprintf(buffer, "%c\n", test_bit(SHUFFLE_ENABLE, &shuffle_state)
? 'Y' : 'N');
diff --git a/mm/slab.c b/mm/slab.c
index 9df370558e5d..66e5d8032bae 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4206,9 +4206,12 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
/**
* __ksize -- Uninstrumented ksize.
+ * @objp: pointer to the object
*
* Unlike ksize(), __ksize() is uninstrumented, and does not provide the same
* safety checks as ksize() with KASAN instrumentation enabled.
+ *
+ * Return: size of the actual memory used by @objp in bytes
*/
size_t __ksize(const void *objp)
{
diff --git a/mm/slab.h b/mm/slab.h
index 68e455f2b698..b2b01694dc43 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -323,8 +323,8 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
* Expects a pointer to a slab page. Please note, that PageSlab() check
* isn't sufficient, as it returns true also for tail compound slab pages,
* which do not have slab_cache pointer set.
- * So this function assumes that the page can pass PageHead() and PageSlab()
- * checks.
+ * So this function assumes that the page can pass PageSlab() && !PageTail()
+ * check.
*
* The kmem_cache can be reparented asynchronously. The caller must ensure
* the memcg lifetime, e.g. by taking rcu_read_lock() or cgroup_mutex.
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 6491c3a41805..f9fb27b4c843 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -178,10 +178,13 @@ static int init_memcg_params(struct kmem_cache *s,
static void destroy_memcg_params(struct kmem_cache *s)
{
- if (is_root_cache(s))
+ if (is_root_cache(s)) {
kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
- else
+ } else {
+ mem_cgroup_put(s->memcg_params.memcg);
+ WRITE_ONCE(s->memcg_params.memcg, NULL);
percpu_ref_exit(&s->memcg_params.refcnt);
+ }
}
static void free_memcg_params(struct rcu_head *rcu)
@@ -253,8 +256,6 @@ static void memcg_unlink_cache(struct kmem_cache *s)
} else {
list_del(&s->memcg_params.children_node);
list_del(&s->memcg_params.kmem_caches_node);
- mem_cgroup_put(s->memcg_params.memcg);
- WRITE_ONCE(s->memcg_params.memcg, NULL);
}
}
#else
@@ -1030,10 +1031,19 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name,
unsigned int useroffset, unsigned int usersize)
{
int err;
+ unsigned int align = ARCH_KMALLOC_MINALIGN;
s->name = name;
s->size = s->object_size = size;
- s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+
+ /*
+ * For power of two sizes, guarantee natural alignment for kmalloc
+ * caches, regardless of SL*B debugging options.
+ */
+ if (is_power_of_2(size))
+ align = max(align, size);
+ s->align = calculate_alignment(flags, align, size);
+
s->useroffset = useroffset;
s->usersize = usersize;
@@ -1287,12 +1297,16 @@ void __init create_kmalloc_caches(slab_flags_t flags)
*/
void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
- void *ret;
+ void *ret = NULL;
struct page *page;
flags |= __GFP_COMP;
page = alloc_pages(flags, order);
- ret = page ? page_address(page) : NULL;
+ if (likely(page)) {
+ ret = page_address(page);
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+ 1 << order);
+ }
ret = kasan_kmalloc_large(ret, size, flags);
/* As ret might get tagged, call kmemleak hook after KASAN. */
kmemleak_alloc(ret, size, 1, flags);
diff --git a/mm/slob.c b/mm/slob.c
index cf377beab962..fa53e9f73893 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -190,7 +190,7 @@ static int slob_last(slob_t *s)
static void *slob_new_pages(gfp_t gfp, int order, int node)
{
- void *page;
+ struct page *page;
#ifdef CONFIG_NUMA
if (node != NUMA_NO_NODE)
@@ -202,14 +202,21 @@ static void *slob_new_pages(gfp_t gfp, int order, int node)
if (!page)
return NULL;
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+ 1 << order);
return page_address(page);
}
static void slob_free_pages(void *b, int order)
{
+ struct page *sp = virt_to_page(b);
+
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += 1 << order;
- free_pages((unsigned long)b, order);
+
+ mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE,
+ -(1 << order));
+ __free_pages(sp, order);
}
/*
@@ -217,6 +224,7 @@ static void slob_free_pages(void *b, int order)
* @sp: Page to look in.
* @size: Size of the allocation.
* @align: Allocation alignment.
+ * @align_offset: Offset in the allocated block that will be aligned.
* @page_removed_from_list: Return parameter.
*
* Tries to find a chunk of memory at least @size bytes big within @page.
@@ -227,7 +235,7 @@ static void slob_free_pages(void *b, int order)
* true (set to false otherwise).
*/
static void *slob_page_alloc(struct page *sp, size_t size, int align,
- bool *page_removed_from_list)
+ int align_offset, bool *page_removed_from_list)
{
slob_t *prev, *cur, *aligned = NULL;
int delta = 0, units = SLOB_UNITS(size);
@@ -236,8 +244,17 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
for (prev = NULL, cur = sp->freelist; ; prev = cur, cur = slob_next(cur)) {
slobidx_t avail = slob_units(cur);
+ /*
+ * 'aligned' will hold the address of the slob block so that the
+ * address 'aligned'+'align_offset' is aligned according to the
+ * 'align' parameter. This is for kmalloc() which prepends the
+ * allocated block with its size, so that the block itself is
+ * aligned when needed.
+ */
if (align) {
- aligned = (slob_t *)ALIGN((unsigned long)cur, align);
+ aligned = (slob_t *)
+ (ALIGN((unsigned long)cur + align_offset, align)
+ - align_offset);
delta = aligned - cur;
}
if (avail >= units + delta) { /* room enough? */
@@ -281,7 +298,8 @@ static void *slob_page_alloc(struct page *sp, size_t size, int align,
/*
* slob_alloc: entry point into the slob allocator.
*/
-static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
+static void *slob_alloc(size_t size, gfp_t gfp, int align, int node,
+ int align_offset)
{
struct page *sp;
struct list_head *slob_list;
@@ -312,7 +330,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
if (sp->units < SLOB_UNITS(size))
continue;
- b = slob_page_alloc(sp, size, align, &page_removed_from_list);
+ b = slob_page_alloc(sp, size, align, align_offset, &page_removed_from_list);
if (!b)
continue;
@@ -349,7 +367,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node)
INIT_LIST_HEAD(&sp->slab_list);
set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE));
set_slob_page_free(sp, slob_list);
- b = slob_page_alloc(sp, size, align, &_unused);
+ b = slob_page_alloc(sp, size, align, align_offset, &_unused);
BUG_ON(!b);
spin_unlock_irqrestore(&slob_lock, flags);
}
@@ -451,7 +469,7 @@ static __always_inline void *
__do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
{
unsigned int *m;
- int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+ int minalign = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
void *ret;
gfp &= gfp_allowed_mask;
@@ -459,19 +477,28 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
fs_reclaim_acquire(gfp);
fs_reclaim_release(gfp);
- if (size < PAGE_SIZE - align) {
+ if (size < PAGE_SIZE - minalign) {
+ int align = minalign;
+
+ /*
+ * For power of two sizes, guarantee natural alignment for
+ * kmalloc()'d objects.
+ */
+ if (is_power_of_2(size))
+ align = max(minalign, (int) size);
+
if (!size)
return ZERO_SIZE_PTR;
- m = slob_alloc(size + align, gfp, align, node);
+ m = slob_alloc(size + minalign, gfp, align, node, minalign);
if (!m)
return NULL;
*m = size;
- ret = (void *)m + align;
+ ret = (void *)m + minalign;
trace_kmalloc_node(caller, ret,
- size, size + align, gfp, node);
+ size, size + minalign, gfp, node);
} else {
unsigned int order = get_order(size);
@@ -521,8 +548,13 @@ void kfree(const void *block)
int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
unsigned int *m = (unsigned int *)(block - align);
slob_free(m, *m + align);
- } else
- __free_pages(sp, compound_order(sp));
+ } else {
+ unsigned int order = compound_order(sp);
+ mod_node_page_state(page_pgdat(sp), NR_SLAB_UNRECLAIMABLE,
+ -(1 << order));
+ __free_pages(sp, order);
+
+ }
}
EXPORT_SYMBOL(kfree);
@@ -567,7 +599,7 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
fs_reclaim_release(flags);
if (c->size < PAGE_SIZE) {
- b = slob_alloc(c->size, flags, c->align, node);
+ b = slob_alloc(c->size, flags, c->align, node, 0);
trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
SLOB_UNITS(c->size) * SLOB_UNIT,
flags, node);
diff --git a/mm/slub.c b/mm/slub.c
index 42c1b3af3c98..e72e802fc569 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1433,12 +1433,15 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
void *old_tail = *tail ? *tail : *head;
int rsize;
- if (slab_want_init_on_free(s)) {
- void *p = NULL;
+ /* Head and tail of the reconstructed freelist */
+ *head = NULL;
+ *tail = NULL;
- do {
- object = next;
- next = get_freepointer(s, object);
+ do {
+ object = next;
+ next = get_freepointer(s, object);
+
+ if (slab_want_init_on_free(s)) {
/*
* Clear the object and the metadata, but don't touch
* the redzone.
@@ -1448,29 +1451,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
: 0;
memset((char *)object + s->inuse, 0,
s->size - s->inuse - rsize);
- set_freepointer(s, object, p);
- p = object;
- } while (object != old_tail);
- }
-/*
- * Compiler cannot detect this function can be removed if slab_free_hook()
- * evaluates to nothing. Thus, catch all relevant config debug options here.
- */
-#if defined(CONFIG_LOCKDEP) || \
- defined(CONFIG_DEBUG_KMEMLEAK) || \
- defined(CONFIG_DEBUG_OBJECTS_FREE) || \
- defined(CONFIG_KASAN)
-
- next = *head;
-
- /* Head and tail of the reconstructed freelist */
- *head = NULL;
- *tail = NULL;
-
- do {
- object = next;
- next = get_freepointer(s, object);
+ }
/* If object's reuse doesn't have to be delayed */
if (!slab_free_hook(s, object)) {
/* Move object to the new freelist */
@@ -1485,9 +1467,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
*tail = NULL;
return *head != NULL;
-#else
- return true;
-#endif
}
static void *setup_object(struct kmem_cache *s, struct page *page,
@@ -2672,6 +2651,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
}
/*
+ * If the object has been wiped upon free, make sure it's fully initialized by
+ * zeroing out freelist pointer.
+ */
+static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
+ void *obj)
+{
+ if (unlikely(slab_want_init_on_free(s)) && obj)
+ memset((void *)((char *)obj + s->offset), 0, sizeof(void *));
+}
+
+/*
* Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc)
* have the fastpath folded into their functions. So no function call
* overhead for requests that can be satisfied on the fastpath.
@@ -2759,12 +2749,8 @@ redo:
prefetch_freepointer(s, next_object);
stat(s, ALLOC_FASTPATH);
}
- /*
- * If the object has been wiped upon free, make sure it's fully
- * initialized by zeroing out freelist pointer.
- */
- if (unlikely(slab_want_init_on_free(s)) && object)
- memset(object + s->offset, 0, sizeof(void *));
+
+ maybe_wipe_obj_freeptr(s, object);
if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(object, 0, s->object_size);
@@ -3178,10 +3164,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
goto error;
c = this_cpu_ptr(s->cpu_slab);
+ maybe_wipe_obj_freeptr(s, p[i]);
+
continue; /* goto for-loop */
}
c->freelist = get_freepointer(s, object);
p[i] = object;
+ maybe_wipe_obj_freeptr(s, p[i]);
}
c->tid = next_tid(c->tid);
local_irq_enable();
@@ -3821,11 +3810,15 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
{
struct page *page;
void *ptr = NULL;
+ unsigned int order = get_order(size);
flags |= __GFP_COMP;
- page = alloc_pages_node(node, flags, get_order(size));
- if (page)
+ page = alloc_pages_node(node, flags, order);
+ if (page) {
ptr = page_address(page);
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+ 1 << order);
+ }
return kmalloc_large_node_hook(ptr, size, flags);
}
@@ -3951,9 +3944,13 @@ void kfree(const void *x)
page = virt_to_head_page(x);
if (unlikely(!PageSlab(page))) {
+ unsigned int order = compound_order(page);
+
BUG_ON(!PageCompound(page));
kfree_hook(object);
- __free_pages(page, compound_order(page));
+ mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE,
+ -(1 << order));
+ __free_pages(page, order);
return;
}
slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_);
@@ -4838,7 +4835,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
}
}
- get_online_mems();
+ /*
+ * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex"
+ * already held which will conflict with an existing lock order:
+ *
+ * mem_hotplug_lock->slab_mutex->kernfs_mutex
+ *
+ * We don't really need mem_hotplug_lock (to hold off
+ * slab_mem_going_offline_callback) here because slab's memory hot
+ * unplug code doesn't destroy the kmem_cache->node[] data.
+ */
+
#ifdef CONFIG_SLUB_DEBUG
if (flags & SO_ALL) {
struct kmem_cache_node *n;
@@ -4879,7 +4886,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
x += sprintf(buf + x, " N%d=%lu",
node, nodes[node]);
#endif
- put_online_mems();
kfree(nodes);
return x + sprintf(buf + x, "\n");
}
diff --git a/mm/sparse.c b/mm/sparse.c
index bf32de9e666b..f6891c1992b1 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -219,7 +219,7 @@ static inline unsigned long first_present_section_nr(void)
return next_present_section_nr(-1);
}
-void subsection_mask_set(unsigned long *map, unsigned long pfn,
+static void subsection_mask_set(unsigned long *map, unsigned long pfn,
unsigned long nr_pages)
{
int idx = subsection_map_index(pfn);
diff --git a/mm/truncate.c b/mm/truncate.c
index 8563339041f6..dd9ebc1da356 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -592,6 +592,16 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
unlock_page(page);
continue;
}
+
+ /* Take a pin outside pagevec */
+ get_page(page);
+
+ /*
+ * Drop extra pins before trying to invalidate
+ * the huge page.
+ */
+ pagevec_remove_exceptionals(&pvec);
+ pagevec_release(&pvec);
}
ret = invalidate_inode_page(page);
@@ -602,6 +612,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
*/
if (!ret)
deactivate_file_page(page);
+ if (PageTransHuge(page))
+ put_page(page);
count += ret;
}
pagevec_remove_exceptionals(&pvec);
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index f3b50811497a..4bac22fe1aa2 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -355,6 +355,9 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
* "hierarchy" or "local").
*
* To be used as memcg event method.
+ *
+ * Return: 0 on success, -ENOMEM on memory failure or -EINVAL if @args could
+ * not be parsed.
*/
int vmpressure_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd, const char *args)
@@ -362,7 +365,7 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
struct vmpressure_event *ev;
enum vmpressure_modes mode = VMPRESSURE_NO_PASSTHROUGH;
- enum vmpressure_levels level = -1;
+ enum vmpressure_levels level;
char *spec, *spec_orig;
char *token;
int ret = 0;
@@ -375,20 +378,18 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
/* Find required level */
token = strsep(&spec, ",");
- level = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token);
- if (level < 0) {
- ret = level;
+ ret = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token);
+ if (ret < 0)
goto out;
- }
+ level = ret;
/* Find optional mode */
token = strsep(&spec, ",");
if (token) {
- mode = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token);
- if (mode < 0) {
- ret = mode;
+ ret = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token);
+ if (ret < 0)
goto out;
- }
+ mode = ret;
}
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
@@ -404,6 +405,7 @@ int vmpressure_register_event(struct mem_cgroup *memcg,
mutex_lock(&vmpr->events_lock);
list_add(&ev->node, &vmpr->events);
mutex_unlock(&vmpr->events_lock);
+ ret = 0;
out:
kfree(spec_orig);
return ret;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e5d52d6a24af..ee4eecc7e1c2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -351,12 +351,13 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
*/
unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
{
- unsigned long lru_size;
+ unsigned long lru_size = 0;
int zid;
- if (!mem_cgroup_disabled())
- lru_size = lruvec_page_state_local(lruvec, NR_LRU_BASE + lru);
- else
+ if (!mem_cgroup_disabled()) {
+ for (zid = 0; zid < MAX_NR_ZONES; zid++)
+ lru_size += mem_cgroup_get_zone_lru_size(lruvec, lru, zid);
+ } else
lru_size = node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
for (zid = zone_idx + 1; zid < MAX_NR_ZONES; zid++) {
@@ -932,10 +933,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
* Note that if SetPageDirty is always performed via set_page_dirty,
* and thus under the i_pages lock, then this ordering is not required.
*/
- if (unlikely(PageTransHuge(page)) && PageSwapCache(page))
- refcount = 1 + HPAGE_PMD_NR;
- else
- refcount = 2;
+ refcount = 1 + compound_nr(page);
if (!page_ref_freeze(page, refcount))
goto cannot_free;
/* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */
@@ -2459,17 +2457,70 @@ out:
*lru_pages = 0;
for_each_evictable_lru(lru) {
int file = is_file_lru(lru);
- unsigned long size;
+ unsigned long lruvec_size;
unsigned long scan;
+ unsigned long protection;
+
+ lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
+ protection = mem_cgroup_protection(memcg,
+ sc->memcg_low_reclaim);
+
+ if (protection) {
+ /*
+ * Scale a cgroup's reclaim pressure by proportioning
+ * its current usage to its memory.low or memory.min
+ * setting.
+ *
+ * This is important, as otherwise scanning aggression
+ * becomes extremely binary -- from nothing as we
+ * approach the memory protection threshold, to totally
+ * nominal as we exceed it. This results in requiring
+ * setting extremely liberal protection thresholds. It
+ * also means we simply get no protection at all if we
+ * set it too low, which is not ideal.
+ *
+ * If there is any protection in place, we reduce scan
+ * pressure by how much of the total memory used is
+ * within protection thresholds.
+ *
+ * There is one special case: in the first reclaim pass,
+ * we skip over all groups that are within their low
+ * protection. If that fails to reclaim enough pages to
+ * satisfy the reclaim goal, we come back and override
+ * the best-effort low protection. However, we still
+ * ideally want to honor how well-behaved groups are in
+ * that case instead of simply punishing them all
+ * equally. As such, we reclaim them based on how much
+ * memory they are using, reducing the scan pressure
+ * again by how much of the total memory used is under
+ * hard protection.
+ */
+ unsigned long cgroup_size = mem_cgroup_size(memcg);
+
+ /* Avoid TOCTOU with earlier protection check */
+ cgroup_size = max(cgroup_size, protection);
+
+ scan = lruvec_size - lruvec_size * protection /
+ cgroup_size;
+
+ /*
+ * Minimally target SWAP_CLUSTER_MAX pages to keep
+ * reclaim moving forwards, avoiding decremeting
+ * sc->priority further than desirable.
+ */
+ scan = max(scan, SWAP_CLUSTER_MAX);
+ } else {
+ scan = lruvec_size;
+ }
+
+ scan >>= sc->priority;
- size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
- scan = size >> sc->priority;
/*
* If the cgroup's already been deleted, make sure to
* scrape out the remaining cache.
*/
if (!scan && !mem_cgroup_online(memcg))
- scan = min(size, SWAP_CLUSTER_MAX);
+ scan = min(lruvec_size, SWAP_CLUSTER_MAX);
switch (scan_balance) {
case SCAN_EQUAL:
@@ -2489,7 +2540,7 @@ out:
case SCAN_ANON:
/* Scan one type exclusively */
if ((scan_balance == SCAN_FILE) != file) {
- size = 0;
+ lruvec_size = 0;
scan = 0;
}
break;
@@ -2498,7 +2549,7 @@ out:
BUG();
}
- *lru_pages += size;
+ *lru_pages += lruvec_size;
nr[lru] = scan;
}
}
@@ -2742,6 +2793,13 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
memcg_memory_event(memcg, MEMCG_LOW);
break;
case MEMCG_PROT_NONE:
+ /*
+ * All protection thresholds breached. We may
+ * still choose to vary the scan pressure
+ * applied based on by how much the cgroup in
+ * question has exceeded its protection
+ * thresholds (see get_scan_count).
+ */
break;
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6afc892a148a..a8222041bd44 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1383,12 +1383,29 @@ static void pagetypeinfo_showfree_print(struct seq_file *m,
unsigned long freecount = 0;
struct free_area *area;
struct list_head *curr;
+ bool overflow = false;
area = &(zone->free_area[order]);
- list_for_each(curr, &area->free_list[mtype])
- freecount++;
- seq_printf(m, "%6lu ", freecount);
+ list_for_each(curr, &area->free_list[mtype]) {
+ /*
+ * Cap the free_list iteration because it might
+ * be really large and we are under a spinlock
+ * so a long time spent here could trigger a
+ * hard lockup detector. Anyway this is a
+ * debugging tool so knowing there is a handful
+ * of pages of this order should be more than
+ * sufficient.
+ */
+ if (++freecount >= 100000) {
+ overflow = true;
+ break;
+ }
+ }
+ seq_printf(m, "%s%6lu ", overflow ? ">" : "", freecount);
+ spin_unlock_irq(&zone->lock);
+ cond_resched();
+ spin_lock_irq(&zone->lock);
}
seq_putc(m, '\n');
}
@@ -1972,7 +1989,7 @@ void __init init_mm_internals(void)
#endif
#ifdef CONFIG_PROC_FS
proc_create_seq("buddyinfo", 0444, NULL, &fragmentation_op);
- proc_create_seq("pagetypeinfo", 0444, NULL, &pagetypeinfo_op);
+ proc_create_seq("pagetypeinfo", 0400, NULL, &pagetypeinfo_op);
proc_create_seq("vmstat", 0444, NULL, &vmstat_op);
proc_create_seq("zoneinfo", 0444, NULL, &zoneinfo_op);
#endif
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 05bdf90646e7..6d3d3f698ebb 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -998,9 +998,11 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
struct z3fold_header *zhdr;
struct page *page;
enum buddy bud;
+ bool page_claimed;
zhdr = handle_to_z3fold_header(handle);
page = virt_to_page(zhdr);
+ page_claimed = test_and_set_bit(PAGE_CLAIMED, &page->private);
if (test_bit(PAGE_HEADLESS, &page->private)) {
/* if a headless page is under reclaim, just leave.
@@ -1008,7 +1010,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
* has not been set before, we release this page
* immediately so we don't care about its value any more.
*/
- if (!test_and_set_bit(PAGE_CLAIMED, &page->private)) {
+ if (!page_claimed) {
spin_lock(&pool->lock);
list_del(&page->lru);
spin_unlock(&pool->lock);
@@ -1044,13 +1046,15 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
atomic64_dec(&pool->pages_nr);
return;
}
- if (test_bit(PAGE_CLAIMED, &page->private)) {
+ if (page_claimed) {
+ /* the page has not been claimed by us */
z3fold_page_unlock(zhdr);
return;
}
if (unlikely(PageIsolated(page)) ||
test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
z3fold_page_unlock(zhdr);
+ clear_bit(PAGE_CLAIMED, &page->private);
return;
}
if (zhdr->cpu < 0 || !cpu_online(zhdr->cpu)) {
@@ -1060,10 +1064,12 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
zhdr->cpu = -1;
kref_get(&zhdr->refcount);
do_compact_page(zhdr, true);
+ clear_bit(PAGE_CLAIMED, &page->private);
return;
}
kref_get(&zhdr->refcount);
queue_work_on(zhdr->cpu, pool->compact_wq, &zhdr->work);
+ clear_bit(PAGE_CLAIMED, &page->private);
z3fold_page_unlock(zhdr);
}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 54728d2eda18..d4bcfd8f95bf 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -172,7 +172,6 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
if (err < 0)
goto out_uninit_mvrp;
- vlan->nest_level = dev_get_nest_level(real_dev) + 1;
err = register_netdevice(dev);
if (err < 0)
goto out_uninit_mvrp;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 93eadf179123..e5bff5cc6f97 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -489,36 +489,6 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
}
-/*
- * vlan network devices have devices nesting below it, and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key vlan_netdev_xmit_lock_key;
-static struct lock_class_key vlan_netdev_addr_lock_key;
-
-static void vlan_dev_set_lockdep_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_subclass)
-{
- lockdep_set_class_and_subclass(&txq->_xmit_lock,
- &vlan_netdev_xmit_lock_key,
- *(int *)_subclass);
-}
-
-static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
-{
- lockdep_set_class_and_subclass(&dev->addr_list_lock,
- &vlan_netdev_addr_lock_key,
- subclass);
- netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass);
-}
-
-static int vlan_dev_get_lock_subclass(struct net_device *dev)
-{
- return vlan_dev_priv(dev)->nest_level;
-}
-
static const struct header_ops vlan_header_ops = {
.create = vlan_dev_hard_header,
.parse = eth_header_parse,
@@ -609,8 +579,6 @@ static int vlan_dev_init(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &vlan_type);
- vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev));
-
vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
if (!vlan->vlan_pcpu_stats)
return -ENOMEM;
@@ -812,7 +780,6 @@ static const struct net_device_ops vlan_netdev_ops = {
.ndo_netpoll_cleanup = vlan_dev_netpoll_cleanup,
#endif
.ndo_fix_features = vlan_dev_fix_features,
- .ndo_get_lock_subclass = vlan_dev_get_lock_subclass,
.ndo_get_iflink = vlan_dev_get_iflink,
};
diff --git a/net/atm/clip.c b/net/atm/clip.c
index a7972da7235d..294cb9efe3d3 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -89,7 +89,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
struct clip_vcc **walk;
if (!entry) {
- pr_crit("!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
+ pr_err("!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
return;
}
netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */
@@ -109,10 +109,10 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
error = neigh_update(entry->neigh, NULL, NUD_NONE,
NEIGH_UPDATE_F_ADMIN, 0);
if (error)
- pr_crit("neigh_update failed with %d\n", error);
+ pr_err("neigh_update failed with %d\n", error);
goto out;
}
- pr_crit("ATMARP: failed (entry %p, vcc 0x%p)\n", entry, clip_vcc);
+ pr_err("ATMARP: failed (entry %p, vcc 0x%p)\n", entry, clip_vcc);
out:
netif_tx_unlock_bh(entry->neigh->dev);
}
diff --git a/net/atm/common.c b/net/atm/common.c
index b7528e77997c..0ce530af534d 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -668,7 +668,7 @@ __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
mask |= EPOLLHUP;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* writable? */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index d78938e3e008..5b0b20e6da95 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -22,6 +22,8 @@
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/pkt_sched.h>
@@ -193,14 +195,18 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
unsigned char *ogm_buff;
u32 random_seqno;
+ mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
/* randomize initial seqno to avoid collision */
get_random_bytes(&random_seqno, sizeof(random_seqno));
atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno);
hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN;
ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC);
- if (!ogm_buff)
+ if (!ogm_buff) {
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
return -ENOMEM;
+ }
hard_iface->bat_iv.ogm_buff = ogm_buff;
@@ -212,35 +218,59 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface)
batadv_ogm_packet->reserved = 0;
batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE;
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
+
return 0;
}
static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface)
{
+ mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
kfree(hard_iface->bat_iv.ogm_buff);
hard_iface->bat_iv.ogm_buff = NULL;
+
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
}
static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface)
{
struct batadv_ogm_packet *batadv_ogm_packet;
- unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
+ void *ogm_buff;
- batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+ mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
+ ogm_buff = hard_iface->bat_iv.ogm_buff;
+ if (!ogm_buff)
+ goto unlock;
+
+ batadv_ogm_packet = ogm_buff;
ether_addr_copy(batadv_ogm_packet->orig,
hard_iface->net_dev->dev_addr);
ether_addr_copy(batadv_ogm_packet->prev_sender,
hard_iface->net_dev->dev_addr);
+
+unlock:
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
}
static void
batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface)
{
struct batadv_ogm_packet *batadv_ogm_packet;
- unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff;
+ void *ogm_buff;
- batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff;
+ mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+
+ ogm_buff = hard_iface->bat_iv.ogm_buff;
+ if (!ogm_buff)
+ goto unlock;
+
+ batadv_ogm_packet = ogm_buff;
batadv_ogm_packet->ttl = BATADV_TTL;
+
+unlock:
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
}
/* when do we schedule our own ogm to be sent */
@@ -742,7 +772,11 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface)
}
}
-static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
+/**
+ * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer
+ * @hard_iface: interface whose ogm buffer should be transmitted
+ */
+static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff;
@@ -753,9 +787,7 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
u16 tvlv_len = 0;
unsigned long send_time;
- if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
- hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
- return;
+ lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex);
/* the interface gets activated here to avoid race conditions between
* the moment of activating the interface in
@@ -823,6 +855,17 @@ out:
batadv_hardif_put(primary_if);
}
+static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
+{
+ if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+ hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
+ return;
+
+ mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex);
+ batadv_iv_ogm_schedule_buff(hard_iface);
+ mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex);
+}
+
/**
* batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over iterface
* @orig_node: originator which reproadcasted the OGMs directly
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index dc4f7430cb5a..8033f24f506c 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -18,6 +18,7 @@
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
+#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/random.h>
#include <linux/rculist.h>
@@ -256,14 +257,12 @@ static void batadv_v_ogm_queue_on_if(struct sk_buff *skb,
}
/**
- * batadv_v_ogm_send() - periodic worker broadcasting the own OGM
- * @work: work queue item
+ * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM
+ * @bat_priv: the bat priv with all the soft interface information
*/
-static void batadv_v_ogm_send(struct work_struct *work)
+static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv)
{
struct batadv_hard_iface *hard_iface;
- struct batadv_priv_bat_v *bat_v;
- struct batadv_priv *bat_priv;
struct batadv_ogm2_packet *ogm_packet;
struct sk_buff *skb, *skb_tmp;
unsigned char *ogm_buff;
@@ -271,8 +270,7 @@ static void batadv_v_ogm_send(struct work_struct *work)
u16 tvlv_len = 0;
int ret;
- bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
- bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
+ lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex);
if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING)
goto out;
@@ -364,6 +362,23 @@ out:
}
/**
+ * batadv_v_ogm_send() - periodic worker broadcasting the own OGM
+ * @work: work queue item
+ */
+static void batadv_v_ogm_send(struct work_struct *work)
+{
+ struct batadv_priv_bat_v *bat_v;
+ struct batadv_priv *bat_priv;
+
+ bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work);
+ bat_priv = container_of(bat_v, struct batadv_priv, bat_v);
+
+ mutex_lock(&bat_priv->bat_v.ogm_buff_mutex);
+ batadv_v_ogm_send_softif(bat_priv);
+ mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex);
+}
+
+/**
* batadv_v_ogm_aggr_work() - OGM queue periodic task per interface
* @work: work queue item
*
@@ -424,11 +439,15 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface)
struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface);
struct batadv_ogm2_packet *ogm_packet;
+ mutex_lock(&bat_priv->bat_v.ogm_buff_mutex);
if (!bat_priv->bat_v.ogm_buff)
- return;
+ goto unlock;
ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff;
ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr);
+
+unlock:
+ mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex);
}
/**
@@ -1050,6 +1069,8 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv)
atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno);
INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send);
+ mutex_init(&bat_priv->bat_v.ogm_buff_mutex);
+
return 0;
}
@@ -1061,7 +1082,11 @@ void batadv_v_ogm_free(struct batadv_priv *bat_priv)
{
cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq);
+ mutex_lock(&bat_priv->bat_v.ogm_buff_mutex);
+
kfree(bat_priv->bat_v.ogm_buff);
bat_priv->bat_v.ogm_buff = NULL;
bat_priv->bat_v.ogm_buff_len = 0;
+
+ mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex);
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c90e47342bb0..afb52282d5bd 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -18,6 +18,7 @@
#include <linux/kref.h>
#include <linux/limits.h>
#include <linux/list.h>
+#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/printk.h>
#include <linux/rculist.h>
@@ -929,6 +930,7 @@ batadv_hardif_add_interface(struct net_device *net_dev)
INIT_LIST_HEAD(&hard_iface->list);
INIT_HLIST_HEAD(&hard_iface->neigh_list);
+ mutex_init(&hard_iface->bat_iv.ogm_buff_mutex);
spin_lock_init(&hard_iface->neigh_list_lock);
kref_init(&hard_iface->refcount);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 9cbed6f5a85a..5ee8e9a100f9 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -740,36 +740,6 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
return 0;
}
-/* batman-adv network devices have devices nesting below it and are a special
- * "super class" of normal network devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key batadv_netdev_xmit_lock_key;
-static struct lock_class_key batadv_netdev_addr_lock_key;
-
-/**
- * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue
- * @dev: device which owns the tx queue
- * @txq: tx queue to modify
- * @_unused: always NULL
- */
-static void batadv_set_lockdep_class_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key);
-}
-
-/**
- * batadv_set_lockdep_class() - Set txq and addr_list lockdep class
- * @dev: network device to modify
- */
-static void batadv_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock, &batadv_netdev_addr_lock_key);
- netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL);
-}
-
/**
* batadv_softif_init_late() - late stage initialization of soft interface
* @dev: registered network device to modify
@@ -783,8 +753,6 @@ static int batadv_softif_init_late(struct net_device *dev)
int ret;
size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM;
- batadv_set_lockdep_class(dev);
-
bat_priv = netdev_priv(dev);
bat_priv->soft_iface = dev;
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index be7c02aa91e2..4d7f1baee7b7 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -17,6 +17,7 @@
#include <linux/if.h>
#include <linux/if_ether.h>
#include <linux/kref.h>
+#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/sched.h> /* for linux/wait.h */
@@ -81,6 +82,9 @@ struct batadv_hard_iface_bat_iv {
/** @ogm_seqno: OGM sequence number - used to identify each OGM */
atomic_t ogm_seqno;
+
+ /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */
+ struct mutex ogm_buff_mutex;
};
/**
@@ -1539,6 +1543,9 @@ struct batadv_priv_bat_v {
/** @ogm_seqno: OGM sequence number - used to identify each OGM */
atomic_t ogm_seqno;
+ /** @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */
+ struct mutex ogm_buff_mutex;
+
/** @ogm_wq: workqueue used to schedule OGM transmissions */
struct delayed_work ogm_wq;
};
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index bb55d92691b0..4febc82a7c76 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -571,15 +571,7 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
return err < 0 ? NET_XMIT_DROP : err;
}
-static int bt_dev_init(struct net_device *dev)
-{
- netdev_lockdep_set_classes(dev);
-
- return 0;
-}
-
static const struct net_device_ops netdev_ops = {
- .ndo_init = bt_dev_init,
.ndo_start_xmit = bt_xmit,
};
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 94ddf19998c7..5f508c50649d 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -460,7 +460,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
if (sk->sk_state == BT_LISTEN)
return bt_accept_poll(sk);
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
@@ -470,7 +470,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
if (sk->sk_state == BT_CLOSED)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 681b72862c16..e804a3016902 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -24,8 +24,6 @@
const struct nf_br_ops __rcu *nf_br_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_br_ops);
-static struct lock_class_key bridge_netdev_addr_lock_key;
-
/* net device transmit always called with BH disabled */
netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
{
@@ -108,11 +106,6 @@ out:
return NETDEV_TX_OK;
}
-static void br_set_lockdep_class(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock, &bridge_netdev_addr_lock_key);
-}
-
static int br_dev_init(struct net_device *dev)
{
struct net_bridge *br = netdev_priv(dev);
@@ -150,7 +143,6 @@ static int br_dev_init(struct net_device *dev)
br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
}
- br_set_lockdep_class(dev);
return err;
}
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index ed91ea31978a..12a4f4d93681 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -20,7 +20,6 @@ static unsigned int
ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_nat_info *info = par->targinfo;
- struct net_device *dev;
if (skb_ensure_writable(skb, ETH_ALEN))
return EBT_DROP;
@@ -33,10 +32,22 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
else
skb->pkt_type = PACKET_MULTICAST;
} else {
- if (xt_hooknum(par) != NF_BR_BROUTING)
- dev = br_port_get_rcu(xt_in(par))->br->dev;
- else
+ const struct net_device *dev;
+
+ switch (xt_hooknum(par)) {
+ case NF_BR_BROUTING:
dev = xt_in(par);
+ break;
+ case NF_BR_PRE_ROUTING:
+ dev = br_port_get_rcu(xt_in(par))->br->dev;
+ break;
+ default:
+ dev = NULL;
+ break;
+ }
+
+ if (!dev) /* NF_BR_LOCAL_OUT */
+ return info->target;
if (ether_addr_equal(info->mac, dev->dev_addr))
skb->pkt_type = PACKET_HOST;
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 8842798c29e6..809673222382 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -33,6 +33,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
unsigned int hlen, ll_rs, mtu;
+ ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
struct iphdr *iph;
int err;
@@ -80,6 +81,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk,
if (iter.frag)
ip_fraglist_prepare(skb, &iter);
+ skb->tstamp = tstamp;
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -93,7 +95,7 @@ slow_path:
* This may also be a clone skbuff, we could preserve the geometry for
* the copies but probably not worth the effort.
*/
- ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state);
+ ip_frag_init(skb, hlen, ll_rs, frag_max_size, false, &state);
while (state.left > 0) {
struct sk_buff *skb2;
@@ -104,6 +106,7 @@ slow_path:
goto blackhole;
}
+ skb2->tstamp = tstamp;
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 13ea920600ae..ef14da50a981 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -953,7 +953,7 @@ static __poll_t caif_poll(struct file *file,
mask |= EPOLLRDHUP;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN))
mask |= EPOLLIN | EPOLLRDNORM;
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 5518a7d9eed9..128d37a4c2e0 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -86,11 +86,12 @@ static atomic_t skbcounter = ATOMIC_INIT(0);
/* af_can socket functions */
-static void can_sock_destruct(struct sock *sk)
+void can_sock_destruct(struct sock *sk)
{
skb_queue_purge(&sk->sk_receive_queue);
skb_queue_purge(&sk->sk_error_queue);
}
+EXPORT_SYMBOL(can_sock_destruct);
static const struct can_proto *can_get_proto(int protocol)
{
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index def2f813ffce..137054bff9ec 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -51,6 +51,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
if (!skb)
return;
+ j1939_priv_get(priv);
can_skb_set_owner(skb, iskb->sk);
/* get a pointer to the header of the skb
@@ -104,6 +105,7 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
j1939_simple_recv(priv, skb);
j1939_sk_recv(priv, skb);
done:
+ j1939_priv_put(priv);
kfree_skb(skb);
}
@@ -150,6 +152,10 @@ static void __j1939_priv_release(struct kref *kref)
netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv);
+ WARN_ON_ONCE(!list_empty(&priv->active_session_list));
+ WARN_ON_ONCE(!list_empty(&priv->ecus));
+ WARN_ON_ONCE(!list_empty(&priv->j1939_socks));
+
dev_put(ndev);
kfree(priv);
}
@@ -207,6 +213,9 @@ static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev)
{
struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+ if (!can_ml_priv)
+ return NULL;
+
return can_ml_priv->j1939_priv;
}
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 37c1040bcb9c..de09b0a65791 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -78,7 +78,6 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
{
jsk->state |= J1939_SOCK_BOUND;
j1939_priv_get(priv);
- jsk->priv = priv;
spin_lock_bh(&priv->j1939_socks_lock);
list_add_tail(&jsk->list, &priv->j1939_socks);
@@ -91,7 +90,6 @@ static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
list_del_init(&jsk->list);
spin_unlock_bh(&priv->j1939_socks_lock);
- jsk->priv = NULL;
j1939_priv_put(priv);
jsk->state &= ~J1939_SOCK_BOUND;
}
@@ -349,6 +347,34 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
spin_unlock_bh(&priv->j1939_socks_lock);
}
+static void j1939_sk_sock_destruct(struct sock *sk)
+{
+ struct j1939_sock *jsk = j1939_sk(sk);
+
+ /* This function will be call by the generic networking code, when then
+ * the socket is ultimately closed (sk->sk_destruct).
+ *
+ * The race between
+ * - processing a received CAN frame
+ * (can_receive -> j1939_can_recv)
+ * and accessing j1939_priv
+ * ... and ...
+ * - closing a socket
+ * (j1939_can_rx_unregister -> can_rx_unregister)
+ * and calling the final j1939_priv_put()
+ *
+ * is avoided by calling the final j1939_priv_put() from this
+ * RCU deferred cleanup call.
+ */
+ if (jsk->priv) {
+ j1939_priv_put(jsk->priv);
+ jsk->priv = NULL;
+ }
+
+ /* call generic CAN sock destruct */
+ can_sock_destruct(sk);
+}
+
static int j1939_sk_init(struct sock *sk)
{
struct j1939_sock *jsk = j1939_sk(sk);
@@ -371,6 +397,7 @@ static int j1939_sk_init(struct sock *sk)
atomic_set(&jsk->skb_pending, 0);
spin_lock_init(&jsk->sk_session_queue_lock);
INIT_LIST_HEAD(&jsk->sk_session_queue);
+ sk->sk_destruct = j1939_sk_sock_destruct;
return 0;
}
@@ -443,6 +470,12 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
}
jsk->ifindex = addr->can_ifindex;
+
+ /* the corresponding j1939_priv_put() is called via
+ * sk->sk_destruct, which points to j1939_sk_sock_destruct()
+ */
+ j1939_priv_get(priv);
+ jsk->priv = priv;
}
/* set default transmit pgn */
@@ -560,8 +593,8 @@ static int j1939_sk_release(struct socket *sock)
if (!sk)
return 0;
- jsk = j1939_sk(sk);
lock_sock(sk);
+ jsk = j1939_sk(sk);
if (jsk->state & J1939_SOCK_BOUND) {
struct j1939_priv *priv = jsk->priv;
@@ -580,6 +613,7 @@ static int j1939_sk_release(struct socket *sock)
j1939_netdev_stop(priv);
}
+ kfree(jsk->filters);
sock_orphan(sk);
sock->sk = NULL;
@@ -909,8 +943,10 @@ void j1939_sk_errqueue(struct j1939_session *session,
memset(serr, 0, sizeof(*serr));
switch (type) {
case J1939_ERRQUEUE_ACK:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) {
+ kfree_skb(skb);
return;
+ }
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
@@ -918,8 +954,10 @@ void j1939_sk_errqueue(struct j1939_session *session,
state = "ACK";
break;
case J1939_ERRQUEUE_SCHED:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+ if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) {
+ kfree_skb(skb);
return;
+ }
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
@@ -1054,51 +1092,72 @@ static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg,
{
struct sock *sk = sock->sk;
struct j1939_sock *jsk = j1939_sk(sk);
- struct j1939_priv *priv = jsk->priv;
+ struct j1939_priv *priv;
int ifindex;
int ret;
+ lock_sock(sock->sk);
/* various socket state tests */
- if (!(jsk->state & J1939_SOCK_BOUND))
- return -EBADFD;
+ if (!(jsk->state & J1939_SOCK_BOUND)) {
+ ret = -EBADFD;
+ goto sendmsg_done;
+ }
+ priv = jsk->priv;
ifindex = jsk->ifindex;
- if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR)
+ if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR) {
/* no source address assigned yet */
- return -EBADFD;
+ ret = -EBADFD;
+ goto sendmsg_done;
+ }
/* deal with provided destination address info */
if (msg->msg_name) {
struct sockaddr_can *addr = msg->msg_name;
- if (msg->msg_namelen < J1939_MIN_NAMELEN)
- return -EINVAL;
+ if (msg->msg_namelen < J1939_MIN_NAMELEN) {
+ ret = -EINVAL;
+ goto sendmsg_done;
+ }
- if (addr->can_family != AF_CAN)
- return -EINVAL;
+ if (addr->can_family != AF_CAN) {
+ ret = -EINVAL;
+ goto sendmsg_done;
+ }
- if (addr->can_ifindex && addr->can_ifindex != ifindex)
- return -EBADFD;
+ if (addr->can_ifindex && addr->can_ifindex != ifindex) {
+ ret = -EBADFD;
+ goto sendmsg_done;
+ }
if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) &&
- !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn))
- return -EINVAL;
+ !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) {
+ ret = -EINVAL;
+ goto sendmsg_done;
+ }
if (!addr->can_addr.j1939.name &&
addr->can_addr.j1939.addr == J1939_NO_ADDR &&
- !sock_flag(sk, SOCK_BROADCAST))
+ !sock_flag(sk, SOCK_BROADCAST)) {
/* broadcast, but SO_BROADCAST not set */
- return -EACCES;
+ ret = -EACCES;
+ goto sendmsg_done;
+ }
} else {
if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR &&
- !sock_flag(sk, SOCK_BROADCAST))
+ !sock_flag(sk, SOCK_BROADCAST)) {
/* broadcast, but SO_BROADCAST not set */
- return -EACCES;
+ ret = -EACCES;
+ goto sendmsg_done;
+ }
}
ret = j1939_sk_send_loop(priv, sk, msg, size);
+sendmsg_done:
+ release_sock(sock->sk);
+
return ret;
}
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index fe000ea757ea..9f99af5b0b11 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -255,6 +255,7 @@ static void __j1939_session_drop(struct j1939_session *session)
return;
j1939_sock_pending_del(session->sk);
+ sock_put(session->sk);
}
static void j1939_session_destroy(struct j1939_session *session)
@@ -266,6 +267,9 @@ static void j1939_session_destroy(struct j1939_session *session)
netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
+ WARN_ON_ONCE(!list_empty(&session->sk_session_queue_entry));
+ WARN_ON_ONCE(!list_empty(&session->active_session_list_entry));
+
skb_queue_purge(&session->skb_queue);
__j1939_session_drop(session);
j1939_priv_put(session->priv);
@@ -1042,12 +1046,13 @@ j1939_session_deactivate_activate_next(struct j1939_session *session)
j1939_sk_queue_activate_next(session);
}
-static void j1939_session_cancel(struct j1939_session *session,
+static void __j1939_session_cancel(struct j1939_session *session,
enum j1939_xtp_abort err)
{
struct j1939_priv *priv = session->priv;
WARN_ON_ONCE(!err);
+ lockdep_assert_held(&session->priv->active_session_list_lock);
session->err = j1939_xtp_abort_to_errno(priv, err);
/* do not send aborts on incoming broadcasts */
@@ -1062,6 +1067,20 @@ static void j1939_session_cancel(struct j1939_session *session,
j1939_sk_send_loop_abort(session->sk, session->err);
}
+static void j1939_session_cancel(struct j1939_session *session,
+ enum j1939_xtp_abort err)
+{
+ j1939_session_list_lock(session->priv);
+
+ if (session->state >= J1939_SESSION_ACTIVE &&
+ session->state < J1939_SESSION_WAITING_ABORT) {
+ j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
+ __j1939_session_cancel(session, err);
+ }
+
+ j1939_session_list_unlock(session->priv);
+}
+
static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
{
struct j1939_session *session =
@@ -1108,8 +1127,6 @@ static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer)
netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n",
__func__, session, ret);
if (session->skcb.addr.type != J1939_SIMPLE) {
- j1939_tp_set_rxtimeout(session,
- J1939_XTP_ABORT_TIMEOUT_MS);
j1939_session_cancel(session, J1939_XTP_ABORT_OTHER);
} else {
session->err = ret;
@@ -1169,7 +1186,7 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer)
hrtimer_start(&session->rxtimer,
ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS),
HRTIMER_MODE_REL_SOFT);
- j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
+ __j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT);
}
j1939_session_list_unlock(session->priv);
}
@@ -1273,9 +1290,27 @@ j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb,
static void
j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb)
{
+ struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb);
+ const u8 *dat;
+ int len;
+
if (j1939_xtp_rx_cmd_bad_pgn(session, skb))
return;
+ dat = skb->data;
+
+ if (skcb->addr.type == J1939_ETP)
+ len = j1939_etp_ctl_to_size(dat);
+ else
+ len = j1939_tp_ctl_to_size(dat);
+
+ if (session->total_message_size != len) {
+ netdev_warn_once(session->priv->ndev,
+ "%s: 0x%p: Incorrect size. Expected: %i; got: %i.\n",
+ __func__, session, session->total_message_size,
+ len);
+ }
+
netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session);
session->pkt.tx_acked = session->pkt.total;
@@ -1357,7 +1392,6 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb)
out_session_cancel:
j1939_session_timers_cancel(session);
- j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
j1939_session_cancel(session, err);
}
@@ -1432,7 +1466,7 @@ j1939_session *j1939_session_fresh_new(struct j1939_priv *priv,
skcb = j1939_skb_to_cb(skb);
memcpy(skcb, rel_skcb, sizeof(*skcb));
- session = j1939_session_new(priv, skb, skb->len);
+ session = j1939_session_new(priv, skb, size);
if (!session) {
kfree_skb(skb);
return NULL;
@@ -1554,7 +1588,6 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
/* RTS on active session */
j1939_session_timers_cancel(session);
- j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
}
@@ -1565,7 +1598,6 @@ static int j1939_xtp_rx_rts_session_active(struct j1939_session *session,
session->last_cmd);
j1939_session_timers_cancel(session);
- j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
j1939_session_cancel(session, J1939_XTP_ABORT_BUSY);
return -EBUSY;
@@ -1767,7 +1799,6 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session,
out_session_cancel:
j1939_session_timers_cancel(session);
- j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS);
j1939_session_cancel(session, J1939_XTP_ABORT_FAULT);
j1939_session_put(session);
}
@@ -1848,6 +1879,7 @@ struct j1939_session *j1939_tp_send(struct j1939_priv *priv,
return ERR_PTR(-ENOMEM);
/* skb is recounted in j1939_session_new() */
+ sock_hold(skb->sk);
session->sk = skb->sk;
session->transmission = true;
session->pkt.total = (size + 6) / 7;
@@ -2010,7 +2042,11 @@ int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk)
&priv->active_session_list,
active_session_list_entry) {
if (!sk || sk == session->sk) {
- j1939_session_timers_cancel(session);
+ if (hrtimer_try_to_cancel(&session->txtimer) == 1)
+ j1939_session_put(session);
+ if (hrtimer_try_to_cancel(&session->rxtimer) == 1)
+ j1939_session_put(session);
+
session->err = ESHUTDOWN;
j1939_session_deactivate_locked(session);
}
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 4cc8dc5db2b7..da3c24ed129c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -97,7 +97,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
if (error)
goto out_err;
- if (sk->sk_receive_queue.prev != skb)
+ if (READ_ONCE(sk->sk_receive_queue.prev) != skb)
goto out;
/* Socket shut down? */
@@ -278,7 +278,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
break;
sk_busy_loop(sk, flags & MSG_DONTWAIT);
- } while (sk->sk_receive_queue.prev != *last);
+ } while (READ_ONCE(sk->sk_receive_queue.prev) != *last);
error = -EAGAIN;
@@ -640,7 +640,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
skb->len += copied;
skb->truesize += truesize;
if (sk && sk->sk_type == SOCK_STREAM) {
- sk->sk_wmem_queued += truesize;
+ sk_wmem_queued_add(sk, truesize);
sk_mem_charge(sk, truesize);
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
@@ -767,7 +767,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
mask = 0;
/* exceptional events? */
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
@@ -777,7 +777,7 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
mask |= EPOLLHUP;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
diff --git a/net/core/dev.c b/net/core/dev.c
index bf3ed413abaf..99ac84ff398f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -146,6 +146,7 @@
#include "net-sysfs.h"
#define MAX_GRO_SKBS 8
+#define MAX_NEST_DEV 8
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)
@@ -276,88 +277,6 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
EXPORT_PER_CPU_SYMBOL(softnet_data);
-#ifdef CONFIG_LOCKDEP
-/*
- * register_netdevice() inits txq->_xmit_lock and sets lockdep class
- * according to dev->type
- */
-static const unsigned short netdev_lock_type[] = {
- ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
- ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
- ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
- ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
- ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
- ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
- ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
- ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
- ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
- ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
- ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
- ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
- ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
- ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
- ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
-
-static const char *const netdev_lock_name[] = {
- "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
- "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
- "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
- "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
- "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
- "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
- "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
- "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
- "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
- "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
- "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
- "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
- "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
- "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
- "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
-
-static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
-static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
-
-static inline unsigned short netdev_lock_pos(unsigned short dev_type)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
- if (netdev_lock_type[i] == dev_type)
- return i;
- /* the last key is used by default */
- return ARRAY_SIZE(netdev_lock_type) - 1;
-}
-
-static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
- unsigned short dev_type)
-{
- int i;
-
- i = netdev_lock_pos(dev_type);
- lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
- netdev_lock_name[i]);
-}
-
-static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
-{
- int i;
-
- i = netdev_lock_pos(dev->type);
- lockdep_set_class_and_name(&dev->addr_list_lock,
- &netdev_addr_lock_key[i],
- netdev_lock_name[i]);
-}
-#else
-static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
- unsigned short dev_type)
-{
-}
-static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
-{
-}
-#endif
-
/*******************************************************************************
*
* Protocol management and registration routines
@@ -6489,6 +6408,9 @@ struct netdev_adjacent {
/* upper master flag, there can only be one master device per list */
bool master;
+ /* lookup ignore flag */
+ bool ignore;
+
/* counter for the number of times this device was added to us */
u16 ref_nr;
@@ -6511,7 +6433,7 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
return NULL;
}
-static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data)
+static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
{
struct net_device *dev = data;
@@ -6532,7 +6454,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
{
ASSERT_RTNL();
- return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+ return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
upper_dev);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
@@ -6550,7 +6472,7 @@ EXPORT_SYMBOL(netdev_has_upper_dev);
bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
struct net_device *upper_dev)
{
- return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev,
+ return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
upper_dev);
}
EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
@@ -6594,6 +6516,22 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_master_upper_dev_get);
+static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev)
+{
+ struct netdev_adjacent *upper;
+
+ ASSERT_RTNL();
+
+ if (list_empty(&dev->adj_list.upper))
+ return NULL;
+
+ upper = list_first_entry(&dev->adj_list.upper,
+ struct netdev_adjacent, list);
+ if (likely(upper->master) && !upper->ignore)
+ return upper->dev;
+ return NULL;
+}
+
/**
* netdev_has_any_lower_dev - Check if device is linked to some device
* @dev: device
@@ -6644,6 +6582,23 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+static struct net_device *__netdev_next_upper_dev(struct net_device *dev,
+ struct list_head **iter,
+ bool *ignore)
+{
+ struct netdev_adjacent *upper;
+
+ upper = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+ if (&upper->list == &dev->adj_list.upper)
+ return NULL;
+
+ *iter = &upper->list;
+ *ignore = upper->ignore;
+
+ return upper->dev;
+}
+
static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
struct list_head **iter)
{
@@ -6661,34 +6616,111 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
return upper->dev;
}
+static int __netdev_walk_all_upper_dev(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int ret, cur = 0;
+ bool ignore;
+
+ now = dev;
+ iter = &dev->adj_list.upper;
+
+ while (1) {
+ if (now != dev) {
+ ret = fn(now, data);
+ if (ret)
+ return ret;
+ }
+
+ next = NULL;
+ while (1) {
+ udev = __netdev_next_upper_dev(now, &iter, &ignore);
+ if (!udev)
+ break;
+ if (ignore)
+ continue;
+
+ next = udev;
+ niter = &udev->adj_list.upper;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return 0;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
+ }
+
+ return 0;
+}
+
int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
int (*fn)(struct net_device *dev,
void *data),
void *data)
{
- struct net_device *udev;
- struct list_head *iter;
- int ret;
+ struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int ret, cur = 0;
- for (iter = &dev->adj_list.upper,
- udev = netdev_next_upper_dev_rcu(dev, &iter);
- udev;
- udev = netdev_next_upper_dev_rcu(dev, &iter)) {
- /* first is the upper device itself */
- ret = fn(udev, data);
- if (ret)
- return ret;
+ now = dev;
+ iter = &dev->adj_list.upper;
- /* then look at all of its upper devices */
- ret = netdev_walk_all_upper_dev_rcu(udev, fn, data);
- if (ret)
- return ret;
+ while (1) {
+ if (now != dev) {
+ ret = fn(now, data);
+ if (ret)
+ return ret;
+ }
+
+ next = NULL;
+ while (1) {
+ udev = netdev_next_upper_dev_rcu(now, &iter);
+ if (!udev)
+ break;
+
+ next = udev;
+ niter = &udev->adj_list.upper;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return 0;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
}
return 0;
}
EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
+static bool __netdev_has_upper_dev(struct net_device *dev,
+ struct net_device *upper_dev)
+{
+ ASSERT_RTNL();
+
+ return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
+ upper_dev);
+}
+
/**
* netdev_lower_get_next_private - Get the next ->private from the
* lower neighbour list
@@ -6785,34 +6817,119 @@ static struct net_device *netdev_next_lower_dev(struct net_device *dev,
return lower->dev;
}
+static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
+ struct list_head **iter,
+ bool *ignore)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->adj_list.lower)
+ return NULL;
+
+ *iter = &lower->list;
+ *ignore = lower->ignore;
+
+ return lower->dev;
+}
+
int netdev_walk_all_lower_dev(struct net_device *dev,
int (*fn)(struct net_device *dev,
void *data),
void *data)
{
- struct net_device *ldev;
- struct list_head *iter;
- int ret;
+ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int ret, cur = 0;
- for (iter = &dev->adj_list.lower,
- ldev = netdev_next_lower_dev(dev, &iter);
- ldev;
- ldev = netdev_next_lower_dev(dev, &iter)) {
- /* first is the lower device itself */
- ret = fn(ldev, data);
- if (ret)
- return ret;
+ now = dev;
+ iter = &dev->adj_list.lower;
- /* then look at all of its lower devices */
- ret = netdev_walk_all_lower_dev(ldev, fn, data);
- if (ret)
- return ret;
+ while (1) {
+ if (now != dev) {
+ ret = fn(now, data);
+ if (ret)
+ return ret;
+ }
+
+ next = NULL;
+ while (1) {
+ ldev = netdev_next_lower_dev(now, &iter);
+ if (!ldev)
+ break;
+
+ next = ldev;
+ niter = &ldev->adj_list.lower;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return 0;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
}
return 0;
}
EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
+static int __netdev_walk_all_lower_dev(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int ret, cur = 0;
+ bool ignore;
+
+ now = dev;
+ iter = &dev->adj_list.lower;
+
+ while (1) {
+ if (now != dev) {
+ ret = fn(now, data);
+ if (ret)
+ return ret;
+ }
+
+ next = NULL;
+ while (1) {
+ ldev = __netdev_next_lower_dev(now, &iter, &ignore);
+ if (!ldev)
+ break;
+ if (ignore)
+ continue;
+
+ next = ldev;
+ niter = &ldev->adj_list.lower;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return 0;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
+ }
+
+ return 0;
+}
+
static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
struct list_head **iter)
{
@@ -6827,28 +6944,99 @@ static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
return lower->dev;
}
-int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
- int (*fn)(struct net_device *dev,
- void *data),
- void *data)
+static u8 __netdev_upper_depth(struct net_device *dev)
+{
+ struct net_device *udev;
+ struct list_head *iter;
+ u8 max_depth = 0;
+ bool ignore;
+
+ for (iter = &dev->adj_list.upper,
+ udev = __netdev_next_upper_dev(dev, &iter, &ignore);
+ udev;
+ udev = __netdev_next_upper_dev(dev, &iter, &ignore)) {
+ if (ignore)
+ continue;
+ if (max_depth < udev->upper_level)
+ max_depth = udev->upper_level;
+ }
+
+ return max_depth;
+}
+
+static u8 __netdev_lower_depth(struct net_device *dev)
{
struct net_device *ldev;
struct list_head *iter;
- int ret;
+ u8 max_depth = 0;
+ bool ignore;
for (iter = &dev->adj_list.lower,
- ldev = netdev_next_lower_dev_rcu(dev, &iter);
+ ldev = __netdev_next_lower_dev(dev, &iter, &ignore);
ldev;
- ldev = netdev_next_lower_dev_rcu(dev, &iter)) {
- /* first is the lower device itself */
- ret = fn(ldev, data);
- if (ret)
- return ret;
+ ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) {
+ if (ignore)
+ continue;
+ if (max_depth < ldev->lower_level)
+ max_depth = ldev->lower_level;
+ }
- /* then look at all of its lower devices */
- ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data);
- if (ret)
- return ret;
+ return max_depth;
+}
+
+static int __netdev_update_upper_level(struct net_device *dev, void *data)
+{
+ dev->upper_level = __netdev_upper_depth(dev) + 1;
+ return 0;
+}
+
+static int __netdev_update_lower_level(struct net_device *dev, void *data)
+{
+ dev->lower_level = __netdev_lower_depth(dev) + 1;
+ return 0;
+}
+
+int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
+ int (*fn)(struct net_device *dev,
+ void *data),
+ void *data)
+{
+ struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
+ struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
+ int ret, cur = 0;
+
+ now = dev;
+ iter = &dev->adj_list.lower;
+
+ while (1) {
+ if (now != dev) {
+ ret = fn(now, data);
+ if (ret)
+ return ret;
+ }
+
+ next = NULL;
+ while (1) {
+ ldev = netdev_next_lower_dev_rcu(now, &iter);
+ if (!ldev)
+ break;
+
+ next = ldev;
+ niter = &ldev->adj_list.lower;
+ dev_stack[cur] = now;
+ iter_stack[cur++] = iter;
+ break;
+ }
+
+ if (!next) {
+ if (!cur)
+ return 0;
+ next = dev_stack[--cur];
+ niter = iter_stack[cur];
+ }
+
+ now = next;
+ iter = niter;
}
return 0;
@@ -6952,6 +7140,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj->master = master;
adj->ref_nr = 1;
adj->private = private;
+ adj->ignore = false;
dev_hold(adj_dev);
pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
@@ -7102,14 +7291,17 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (netdev_has_upper_dev(upper_dev, dev))
+ if (__netdev_has_upper_dev(upper_dev, dev))
return -EBUSY;
+ if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
+ return -EMLINK;
+
if (!master) {
- if (netdev_has_upper_dev(dev, upper_dev))
+ if (__netdev_has_upper_dev(dev, upper_dev))
return -EEXIST;
} else {
- master_dev = netdev_master_upper_dev_get(dev);
+ master_dev = __netdev_master_upper_dev_get(dev);
if (master_dev)
return master_dev == upper_dev ? -EEXIST : -EBUSY;
}
@@ -7131,6 +7323,13 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
goto rollback;
+ __netdev_update_upper_level(dev, NULL);
+ __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
+
+ __netdev_update_lower_level(upper_dev, NULL);
+ __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
+ NULL);
+
return 0;
rollback:
@@ -7213,9 +7412,96 @@ void netdev_upper_dev_unlink(struct net_device *dev,
call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
+
+ __netdev_update_upper_level(dev, NULL);
+ __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
+
+ __netdev_update_lower_level(upper_dev, NULL);
+ __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
+ NULL);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
+static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
+ struct net_device *lower_dev,
+ bool val)
+{
+ struct netdev_adjacent *adj;
+
+ adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower);
+ if (adj)
+ adj->ignore = val;
+
+ adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper);
+ if (adj)
+ adj->ignore = val;
+}
+
+static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
+ struct net_device *lower_dev)
+{
+ __netdev_adjacent_dev_set(upper_dev, lower_dev, true);
+}
+
+static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
+ struct net_device *lower_dev)
+{
+ __netdev_adjacent_dev_set(upper_dev, lower_dev, false);
+}
+
+int netdev_adjacent_change_prepare(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (!new_dev)
+ return 0;
+
+ if (old_dev && new_dev != old_dev)
+ netdev_adjacent_dev_disable(dev, old_dev);
+
+ err = netdev_upper_dev_link(new_dev, dev, extack);
+ if (err) {
+ if (old_dev && new_dev != old_dev)
+ netdev_adjacent_dev_enable(dev, old_dev);
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(netdev_adjacent_change_prepare);
+
+void netdev_adjacent_change_commit(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev)
+{
+ if (!new_dev || !old_dev)
+ return;
+
+ if (new_dev == old_dev)
+ return;
+
+ netdev_adjacent_dev_enable(dev, old_dev);
+ netdev_upper_dev_unlink(old_dev, dev);
+}
+EXPORT_SYMBOL(netdev_adjacent_change_commit);
+
+void netdev_adjacent_change_abort(struct net_device *old_dev,
+ struct net_device *new_dev,
+ struct net_device *dev)
+{
+ if (!new_dev)
+ return;
+
+ if (old_dev && new_dev != old_dev)
+ netdev_adjacent_dev_enable(dev, old_dev);
+
+ netdev_upper_dev_unlink(new_dev, dev);
+}
+EXPORT_SYMBOL(netdev_adjacent_change_abort);
+
/**
* netdev_bonding_info_change - Dispatch event about slave change
* @dev: device
@@ -7329,25 +7615,6 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
EXPORT_SYMBOL(netdev_lower_dev_get_private);
-int dev_get_nest_level(struct net_device *dev)
-{
- struct net_device *lower = NULL;
- struct list_head *iter;
- int max_nest = -1;
- int nest;
-
- ASSERT_RTNL();
-
- netdev_for_each_lower_dev(dev, lower, iter) {
- nest = dev_get_nest_level(lower);
- if (max_nest < nest)
- max_nest = nest;
- }
-
- return max_nest + 1;
-}
-EXPORT_SYMBOL(dev_get_nest_level);
-
/**
* netdev_lower_change - Dispatch event about lower device state change
* @lower_dev: device
@@ -8154,7 +8421,8 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
return -EINVAL;
}
- if (prog->aux->id == prog_id) {
+ /* prog->aux->id may be 0 for orphaned device-bound progs */
+ if (prog->aux->id && prog->aux->id == prog_id) {
bpf_prog_put(prog);
return 0;
}
@@ -8619,7 +8887,7 @@ static void netdev_init_one_queue(struct net_device *dev,
{
/* Initialize queue lock */
spin_lock_init(&queue->_xmit_lock);
- netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+ lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key);
queue->xmit_lock_owner = -1;
netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
queue->dev = dev;
@@ -8666,6 +8934,43 @@ void netif_tx_stop_all_queues(struct net_device *dev)
}
EXPORT_SYMBOL(netif_tx_stop_all_queues);
+static void netdev_register_lockdep_key(struct net_device *dev)
+{
+ lockdep_register_key(&dev->qdisc_tx_busylock_key);
+ lockdep_register_key(&dev->qdisc_running_key);
+ lockdep_register_key(&dev->qdisc_xmit_lock_key);
+ lockdep_register_key(&dev->addr_list_lock_key);
+}
+
+static void netdev_unregister_lockdep_key(struct net_device *dev)
+{
+ lockdep_unregister_key(&dev->qdisc_tx_busylock_key);
+ lockdep_unregister_key(&dev->qdisc_running_key);
+ lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
+ lockdep_unregister_key(&dev->addr_list_lock_key);
+}
+
+void netdev_update_lockdep_key(struct net_device *dev)
+{
+ struct netdev_queue *queue;
+ int i;
+
+ lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
+ lockdep_unregister_key(&dev->addr_list_lock_key);
+
+ lockdep_register_key(&dev->qdisc_xmit_lock_key);
+ lockdep_register_key(&dev->addr_list_lock_key);
+
+ lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ queue = netdev_get_tx_queue(dev, i);
+
+ lockdep_set_class(&queue->_xmit_lock,
+ &dev->qdisc_xmit_lock_key);
+ }
+}
+EXPORT_SYMBOL(netdev_update_lockdep_key);
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -8700,7 +9005,7 @@ int register_netdevice(struct net_device *dev)
BUG_ON(!net);
spin_lock_init(&dev->addr_list_lock);
- netdev_set_addr_lockdep_class(dev);
+ lockdep_set_class(&dev->addr_list_lock, &dev->addr_list_lock_key);
ret = dev_get_valid_name(net, dev, dev->name);
if (ret < 0)
@@ -9210,8 +9515,12 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
+ netdev_register_lockdep_key(dev);
+
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
+ dev->upper_level = 1;
+ dev->lower_level = 1;
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
@@ -9292,6 +9601,8 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
+ netdev_unregister_lockdep_key(dev);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
netdev_freemem(dev);
@@ -9460,7 +9771,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
- new_nsid = peernet2id_alloc(dev_net(dev), net);
+ new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
/* If there is an ifindex conflict assign a new one */
if (__dev_get_by_index(net, dev->ifindex))
new_ifindex = dev_new_index(net);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 6393ba930097..2f949b5a1eb9 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -691,7 +691,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from)
return;
netif_addr_lock_bh(from);
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
@@ -858,7 +858,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -888,7 +888,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
if (to->addr_len != from->addr_len)
return -EINVAL;
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
if (!err)
__dev_set_rx_mode(to);
@@ -912,7 +912,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
return;
netif_addr_lock_bh(from);
- netif_addr_lock_nested(to);
+ netif_addr_lock(to);
__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
__dev_set_rx_mode(to);
netif_addr_unlock(to);
diff --git a/net/core/devlink.c b/net/core/devlink.c
index f80151eeaf51..93905dc7c179 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -2699,7 +2699,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
struct devlink *devlink = info->user_ptr[0];
int err;
- if (!devlink_reload_supported(devlink))
+ if (!devlink_reload_supported(devlink) || !devlink->reload_enabled)
return -EOPNOTSUPP;
err = devlink_resources_validate(devlink, NULL, info);
@@ -4618,6 +4618,7 @@ struct devlink_health_reporter {
bool auto_recover;
u8 health_state;
u64 dump_ts;
+ u64 dump_real_ts;
u64 error_count;
u64 recovery_count;
u64 last_recovery_ts;
@@ -4790,6 +4791,7 @@ static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
goto dump_err;
reporter->dump_ts = jiffies;
+ reporter->dump_real_ts = ktime_get_real_ns();
return 0;
@@ -4952,6 +4954,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
jiffies_to_msecs(reporter->dump_ts),
DEVLINK_ATTR_PAD))
goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
+ reporter->dump_real_ts, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
nla_nest_end(msg, reporter_attr);
genlmsg_end(msg, hdr);
@@ -6196,6 +6202,8 @@ EXPORT_SYMBOL_GPL(devlink_register);
void devlink_unregister(struct devlink *devlink)
{
mutex_lock(&devlink_mutex);
+ WARN_ON(devlink_reload_supported(devlink) &&
+ devlink->reload_enabled);
devlink_notify(devlink, DEVLINK_CMD_DEL);
list_del(&devlink->list);
mutex_unlock(&devlink_mutex);
@@ -6203,6 +6211,41 @@ void devlink_unregister(struct devlink *devlink)
EXPORT_SYMBOL_GPL(devlink_unregister);
/**
+ * devlink_reload_enable - Enable reload of devlink instance
+ *
+ * @devlink: devlink
+ *
+ * Should be called at end of device initialization
+ * process when reload operation is supported.
+ */
+void devlink_reload_enable(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ devlink->reload_enabled = true;
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_reload_enable);
+
+/**
+ * devlink_reload_disable - Disable reload of devlink instance
+ *
+ * @devlink: devlink
+ *
+ * Should be called at the beginning of device cleanup
+ * process when reload operation is supported.
+ */
+void devlink_reload_disable(struct devlink *devlink)
+{
+ mutex_lock(&devlink_mutex);
+ /* Mutex is taken which ensures that no reload operation is in
+ * progress while setting up forbidded flag.
+ */
+ devlink->reload_enabled = false;
+ mutex_unlock(&devlink_mutex);
+}
+EXPORT_SYMBOL_GPL(devlink_reload_disable);
+
+/**
* devlink_free - Free devlink instance resources
*
* @devlink: devlink
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index c763106c73fc..cd9bc67381b2 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1396,11 +1396,13 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr)
static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
{
- struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+ struct ethtool_wolinfo wol;
if (!dev->ethtool_ops->get_wol)
return -EOPNOTSUPP;
+ memset(&wol, 0, sizeof(struct ethtool_wolinfo));
+ wol.cmd = ETHTOOL_GWOL;
dev->ethtool_ops->get_wol(dev, &wol);
if (copy_to_user(useraddr, &wol, sizeof(wol)))
diff --git a/net/core/filter.c b/net/core/filter.c
index ed6563622ce3..3fed5755494b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4252,12 +4252,14 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
case SO_RCVBUF:
val = min_t(u32, val, sysctl_rmem_max);
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
- sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+ WRITE_ONCE(sk->sk_rcvbuf,
+ max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_SNDBUF:
val = min_t(u32, val, sysctl_wmem_max);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+ WRITE_ONCE(sk->sk_sndbuf,
+ max_t(int, val * 2, SOCK_MIN_SNDBUF));
break;
case SO_MAX_PACING_RATE: /* 32bit version */
if (val != ~0U)
@@ -4274,7 +4276,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
case SO_RCVLOWAT:
if (val < 0)
val = INT_MAX;
- sk->sk_rcvlowat = val ? : 1;
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break;
case SO_MARK:
if (sk->sk_mark != val) {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 7c09d87d3269..68eda10d0680 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1350,30 +1350,21 @@ out_bad:
}
EXPORT_SYMBOL(__skb_flow_dissect);
-static u32 hashrnd __read_mostly;
+static siphash_key_t hashrnd __read_mostly;
static __always_inline void __flow_hash_secret_init(void)
{
net_get_random_once(&hashrnd, sizeof(hashrnd));
}
-static __always_inline u32 __flow_hash_words(const u32 *words, u32 length,
- u32 keyval)
+static const void *flow_keys_hash_start(const struct flow_keys *flow)
{
- return jhash2(words, length, keyval);
-}
-
-static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow)
-{
- const void *p = flow;
-
- BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32));
- return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET);
+ BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT);
+ return &flow->FLOW_KEYS_HASH_START_FIELD;
}
static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
{
size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs);
- BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32));
BUILD_BUG_ON(offsetof(typeof(*flow), addrs) !=
sizeof(*flow) - sizeof(flow->addrs));
@@ -1388,7 +1379,7 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow)
diff -= sizeof(flow->addrs.tipckey);
break;
}
- return (sizeof(*flow) - diff) / sizeof(u32);
+ return sizeof(*flow) - diff;
}
__be32 flow_get_u32_src(const struct flow_keys *flow)
@@ -1454,14 +1445,15 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys)
}
}
-static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
+static inline u32 __flow_hash_from_keys(struct flow_keys *keys,
+ const siphash_key_t *keyval)
{
u32 hash;
__flow_hash_consistentify(keys);
- hash = __flow_hash_words(flow_keys_hash_start(keys),
- flow_keys_hash_length(keys), keyval);
+ hash = siphash(flow_keys_hash_start(keys),
+ flow_keys_hash_length(keys), keyval);
if (!hash)
hash = 1;
@@ -1471,12 +1463,13 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval)
u32 flow_hash_from_keys(struct flow_keys *keys)
{
__flow_hash_secret_init();
- return __flow_hash_from_keys(keys, hashrnd);
+ return __flow_hash_from_keys(keys, &hashrnd);
}
EXPORT_SYMBOL(flow_hash_from_keys);
static inline u32 ___skb_get_hash(const struct sk_buff *skb,
- struct flow_keys *keys, u32 keyval)
+ struct flow_keys *keys,
+ const siphash_key_t *keyval)
{
skb_flow_dissect_flow_keys(skb, keys,
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
@@ -1524,7 +1517,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb)
&keys, NULL, 0, 0, 0,
FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
- return __flow_hash_from_keys(&keys, hashrnd);
+ return __flow_hash_from_keys(&keys, &hashrnd);
}
EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric);
@@ -1544,13 +1537,14 @@ void __skb_get_hash(struct sk_buff *skb)
__flow_hash_secret_init();
- hash = ___skb_get_hash(skb, &keys, hashrnd);
+ hash = ___skb_get_hash(skb, &keys, &hashrnd);
__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
}
EXPORT_SYMBOL(__skb_get_hash);
-__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
+__u32 skb_get_hash_perturb(const struct sk_buff *skb,
+ const siphash_key_t *perturb)
{
struct flow_keys keys;
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index f93785e5833c..74cfb8b5ab33 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -88,11 +88,16 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb)
int err = -EINVAL;
if (skb->protocol == htons(ETH_P_IP)) {
+ struct net_device *dev = skb_dst(skb)->dev;
struct iphdr *iph = ip_hdr(skb);
+ dev_hold(dev);
+ skb_dst_drop(skb);
err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- iph->tos, skb_dst(skb)->dev);
+ iph->tos, dev);
+ dev_put(dev);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
+ skb_dst_drop(skb);
err = ipv6_stub->ipv6_route_input(skb);
} else {
err = -EAFNOSUPPORT;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 865ba6ca16eb..ae3bcb1540ec 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -923,21 +923,23 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
"rx-%u", index);
if (error)
- return error;
+ goto err;
dev_hold(queue->dev);
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
- if (error) {
- kobject_put(kobj);
- return error;
- }
+ if (error)
+ goto err;
}
kobject_uevent(kobj, KOBJ_ADD);
return error;
+
+err:
+ kobject_put(kobj);
+ return error;
}
#endif /* CONFIG_SYSFS */
@@ -1461,21 +1463,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
"tx-%u", index);
if (error)
- return error;
+ goto err;
dev_hold(queue->dev);
#ifdef CONFIG_BQL
error = sysfs_create_group(kobj, &dql_group);
- if (error) {
- kobject_put(kobj);
- return error;
- }
+ if (error)
+ goto err;
#endif
kobject_uevent(kobj, KOBJ_ADD);
-
return 0;
+
+err:
+ kobject_put(kobj);
+ return error;
}
#endif /* CONFIG_SYSFS */
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index a0e0d298c991..39402840025e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -245,11 +245,12 @@ static int __peernet2id(struct net *net, struct net *peer)
return __peernet2id_alloc(net, peer, &no);
}
-static void rtnl_net_notifyid(struct net *net, int cmd, int id);
+static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
+ struct nlmsghdr *nlh, gfp_t gfp);
/* This function returns the id of a peer netns. If no id is assigned, one will
* be allocated and returned.
*/
-int peernet2id_alloc(struct net *net, struct net *peer)
+int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
{
bool alloc = false, alive = false;
int id;
@@ -268,7 +269,7 @@ int peernet2id_alloc(struct net *net, struct net *peer)
id = __peernet2id_alloc(net, peer, &alloc);
spin_unlock_bh(&net->nsid_lock);
if (alloc && id >= 0)
- rtnl_net_notifyid(net, RTM_NEWNSID, id);
+ rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
if (alive)
put_net(peer);
return id;
@@ -478,6 +479,7 @@ struct net *copy_net_ns(unsigned long flags,
if (rv < 0) {
put_userns:
+ key_remove_domain(net->key_domain);
put_user_ns(user_ns);
net_drop_ns(net);
dec_ucounts:
@@ -532,7 +534,8 @@ static void unhash_nsid(struct net *net, struct net *last)
idr_remove(&tmp->netns_ids, id);
spin_unlock_bh(&tmp->nsid_lock);
if (id >= 0)
- rtnl_net_notifyid(tmp, RTM_DELNSID, id);
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
+ GFP_KERNEL);
if (tmp == last)
break;
}
@@ -764,7 +767,8 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
err = alloc_netid(net, peer, nsid);
spin_unlock_bh(&net->nsid_lock);
if (err >= 0) {
- rtnl_net_notifyid(net, RTM_NEWNSID, err);
+ rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
+ nlh, GFP_KERNEL);
err = 0;
} else if (err == -ENOSPC && nsid >= 0) {
err = -EEXIST;
@@ -1051,16 +1055,19 @@ end:
return err < 0 ? err : skb->len;
}
-static void rtnl_net_notifyid(struct net *net, int cmd, int id)
+static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
+ struct nlmsghdr *nlh, gfp_t gfp)
{
struct net_fill_args fillargs = {
+ .portid = portid,
+ .seq = nlh ? nlh->nlmsg_seq : 0,
.cmd = cmd,
.nsid = id,
};
struct sk_buff *msg;
int err = -ENOMEM;
- msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+ msg = nlmsg_new(rtnl_net_get_size(), gfp);
if (!msg)
goto out;
@@ -1068,7 +1075,7 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id)
if (err < 0)
goto err_out;
- rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
+ rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp);
return;
err_out:
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index c9bb00008528..f35c2e998406 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -96,7 +96,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq;
- tcp_sk(sk)->fastopen_rsk = NULL;
+ RCU_INIT_POINTER(tcp_sk(sk)->fastopen_rsk, NULL);
spin_lock_bh(&fastopenq->lock);
fastopenq->qlen--;
tcp_rsk(req)->tfo_listener = false;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1ee6460f8275..e4ec575c1fba 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1523,7 +1523,7 @@ static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
static int rtnl_fill_link_netnsid(struct sk_buff *skb,
const struct net_device *dev,
- struct net *src_net)
+ struct net *src_net, gfp_t gfp)
{
bool put_iflink = false;
@@ -1531,7 +1531,7 @@ static int rtnl_fill_link_netnsid(struct sk_buff *skb,
struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
if (!net_eq(dev_net(dev), link_net)) {
- int id = peernet2id_alloc(src_net, link_net);
+ int id = peernet2id_alloc(src_net, link_net, gfp);
if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
return -EMSGSIZE;
@@ -1589,7 +1589,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask,
u32 event, int *new_nsid, int new_ifindex,
- int tgt_netnsid)
+ int tgt_netnsid, gfp_t gfp)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
@@ -1681,7 +1681,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
}
- if (rtnl_fill_link_netnsid(skb, dev, src_net))
+ if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
goto nla_put_failure;
if (new_nsid &&
@@ -2001,7 +2001,7 @@ walk_entries:
NETLINK_CB(cb->skb).portid,
nlh->nlmsg_seq, 0, flags,
ext_filter_mask, 0, NULL, 0,
- netnsid);
+ netnsid, GFP_KERNEL);
if (err < 0) {
if (likely(skb->len))
@@ -2195,6 +2195,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_MAC]) {
struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);
+ if (ivm->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_mac)
err = ops->ndo_set_vf_mac(dev, ivm->vf,
@@ -2206,6 +2208,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_VLAN]) {
struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);
+ if (ivv->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_vlan)
err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
@@ -2238,6 +2242,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (len == 0)
return -EINVAL;
+ if (ivvl[0]->vf >= INT_MAX)
+ return -EINVAL;
err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan,
ivvl[0]->qos, ivvl[0]->vlan_proto);
if (err < 0)
@@ -2248,6 +2254,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
struct ifla_vf_info ivf;
+ if (ivt->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_get_vf_config)
err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
@@ -2266,6 +2274,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_RATE]) {
struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);
+ if (ivt->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_rate)
err = ops->ndo_set_vf_rate(dev, ivt->vf,
@@ -2278,6 +2288,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_SPOOFCHK]) {
struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);
+ if (ivs->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_spoofchk)
err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
@@ -2289,6 +2301,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_LINK_STATE]) {
struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);
+ if (ivl->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_link_state)
err = ops->ndo_set_vf_link_state(dev, ivl->vf,
@@ -2302,6 +2316,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
err = -EOPNOTSUPP;
ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
+ if (ivrssq_en->vf >= INT_MAX)
+ return -EINVAL;
if (ops->ndo_set_vf_rss_query_en)
err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
ivrssq_en->setting);
@@ -2312,6 +2328,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_TRUST]) {
struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);
+ if (ivt->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_trust)
err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
@@ -2322,15 +2340,18 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_IB_NODE_GUID]) {
struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
+ if (ivt->vf >= INT_MAX)
+ return -EINVAL;
if (!ops->ndo_set_vf_guid)
return -EOPNOTSUPP;
-
return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
}
if (tb[IFLA_VF_IB_PORT_GUID]) {
struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
+ if (ivt->vf >= INT_MAX)
+ return -EINVAL;
if (!ops->ndo_set_vf_guid)
return -EOPNOTSUPP;
@@ -2355,6 +2376,7 @@ static int do_set_master(struct net_device *dev, int ifindex,
err = ops->ndo_del_slave(upper_dev, dev);
if (err)
return err;
+ netdev_update_lockdep_key(dev);
} else {
return -EOPNOTSUPP;
}
@@ -3359,7 +3381,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
err = rtnl_fill_ifinfo(nskb, dev, net,
RTM_NEWLINK, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, 0, 0, ext_filter_mask,
- 0, NULL, 0, netnsid);
+ 0, NULL, 0, netnsid, GFP_KERNEL);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
@@ -3471,7 +3493,7 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
err = rtnl_fill_ifinfo(skb, dev, dev_net(dev),
type, 0, 0, change, 0, 0, event,
- new_nsid, new_ifindex, -1);
+ new_nsid, new_ifindex, -1, flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -3916,7 +3938,7 @@ static int valid_fdb_dump_strict(const struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
ndm->ndm_flags || ndm->ndm_type) {
- NL_SET_ERR_MSG(extack, "Invalid values in header for fbd dump request");
+ NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request");
return -EINVAL;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 529133611ea2..867e61df00db 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4415,7 +4415,7 @@ static void skb_set_err_queue(struct sk_buff *skb)
int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
{
if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned int)sk->sk_rcvbuf)
+ (unsigned int)READ_ONCE(sk->sk_rcvbuf))
return -ENOMEM;
skb_orphan(skb);
@@ -5477,12 +5477,14 @@ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
* @skb: buffer
* @mpls_lse: MPLS label stack entry to push
* @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
+ * @mac_len: length of the MAC header
*
* Expects skb->data at mac header.
*
* Returns 0 on success, -errno otherwise.
*/
-int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto)
+int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
+ int mac_len)
{
struct mpls_shim_hdr *lse;
int err;
@@ -5499,15 +5501,15 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto)
return err;
if (!skb->inner_protocol) {
- skb_set_inner_network_header(skb, skb->mac_len);
+ skb_set_inner_network_header(skb, mac_len);
skb_set_inner_protocol(skb, skb->protocol);
}
skb_push(skb, MPLS_HLEN);
memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
- skb->mac_len);
+ mac_len);
skb_reset_mac_header(skb);
- skb_set_network_header(skb, skb->mac_len);
+ skb_set_network_header(skb, mac_len);
lse = mpls_hdr(skb);
lse->label_stack_entry = mpls_lse;
@@ -5526,29 +5528,30 @@ EXPORT_SYMBOL_GPL(skb_mpls_push);
*
* @skb: buffer
* @next_proto: ethertype of header after popped MPLS header
+ * @mac_len: length of the MAC header
*
* Expects skb->data at mac header.
*
* Returns 0 on success, -errno otherwise.
*/
-int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto)
+int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len)
{
int err;
if (unlikely(!eth_p_mpls(skb->protocol)))
- return -EINVAL;
+ return 0;
- err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
+ err = skb_ensure_writable(skb, mac_len + MPLS_HLEN);
if (unlikely(err))
return err;
skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
- skb->mac_len);
+ mac_len);
__skb_pull(skb, MPLS_HLEN);
skb_reset_mac_header(skb);
- skb_set_network_header(skb, skb->mac_len);
+ skb_set_network_header(skb, mac_len);
if (skb->dev && skb->dev->type == ARPHRD_ETHER) {
struct ethhdr *hdr;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index cf390e0aa73d..ad31e4e53d0a 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -270,18 +270,28 @@ void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len)
msg->sg.data[i].length -= trim;
sk_mem_uncharge(sk, trim);
+ /* Adjust copybreak if it falls into the trimmed part of last buf */
+ if (msg->sg.curr == i && msg->sg.copybreak > msg->sg.data[i].length)
+ msg->sg.copybreak = msg->sg.data[i].length;
out:
- /* If we trim data before curr pointer update copybreak and current
- * so that any future copy operations start at new copy location.
+ sk_msg_iter_var_next(i);
+ msg->sg.end = i;
+
+ /* If we trim data a full sg elem before curr pointer update
+ * copybreak and current so that any future copy operations
+ * start at new copy location.
* However trimed data that has not yet been used in a copy op
* does not require an update.
*/
- if (msg->sg.curr >= i) {
+ if (!msg->sg.size) {
+ msg->sg.curr = msg->sg.start;
+ msg->sg.copybreak = 0;
+ } else if (sk_msg_iter_dist(msg->sg.start, msg->sg.curr) >=
+ sk_msg_iter_dist(msg->sg.start, msg->sg.end)) {
+ sk_msg_iter_var_prev(i);
msg->sg.curr = i;
msg->sg.copybreak = msg->sg.data[i].length;
}
- sk_msg_iter_var_next(i);
- msg->sg.end = i;
}
EXPORT_SYMBOL_GPL(sk_msg_trim);
diff --git a/net/core/sock.c b/net/core/sock.c
index fac2b4d80de5..ac78a570e43a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -522,7 +522,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
rc = sk_backlog_rcv(sk, skb);
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
- } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
+ } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
bh_unlock_sock(sk);
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
@@ -785,7 +785,8 @@ set_sndbuf:
*/
val = min_t(int, val, INT_MAX / 2);
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
- sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
+ WRITE_ONCE(sk->sk_sndbuf,
+ max_t(int, val * 2, SOCK_MIN_SNDBUF));
/* Wake up sending tasks if we upped the value. */
sk->sk_write_space(sk);
break;
@@ -831,7 +832,8 @@ set_rcvbuf:
* returning the value we actually used in getsockopt
* is the most desirable behavior.
*/
- sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
+ WRITE_ONCE(sk->sk_rcvbuf,
+ max_t(int, val * 2, SOCK_MIN_RCVBUF));
break;
case SO_RCVBUFFORCE:
@@ -974,7 +976,7 @@ set_rcvbuf:
if (sock->ops->set_rcvlowat)
ret = sock->ops->set_rcvlowat(sk, val);
else
- sk->sk_rcvlowat = val ? : 1;
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
break;
case SO_RCVTIMEO_OLD:
@@ -1125,7 +1127,7 @@ set_rcvbuf:
break;
}
case SO_INCOMING_CPU:
- sk->sk_incoming_cpu = val;
+ WRITE_ONCE(sk->sk_incoming_cpu, val);
break;
case SO_CNX_ADVICE:
@@ -1474,7 +1476,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_INCOMING_CPU:
- v.val = sk->sk_incoming_cpu;
+ v.val = READ_ONCE(sk->sk_incoming_cpu);
break;
case SO_MEMINFO:
@@ -2088,8 +2090,10 @@ EXPORT_SYMBOL(sock_i_ino);
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
gfp_t priority)
{
- if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
+ if (force ||
+ refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
struct sk_buff *skb = alloc_skb(size, priority);
+
if (skb) {
skb_set_owner_w(skb, sk);
return skb;
@@ -2190,7 +2194,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
break;
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
+ if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
break;
if (sk->sk_shutdown & SEND_SHUTDOWN)
break;
@@ -2225,7 +2229,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto failure;
- if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
+ if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
break;
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -2334,8 +2338,8 @@ static void sk_leave_memory_pressure(struct sock *sk)
} else {
unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
- if (memory_pressure && *memory_pressure)
- *memory_pressure = 0;
+ if (memory_pressure && READ_ONCE(*memory_pressure))
+ WRITE_ONCE(*memory_pressure, 0);
}
}
@@ -2806,7 +2810,7 @@ static void sock_def_write_space(struct sock *sk)
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
- if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
+ if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
@@ -3204,13 +3208,13 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem)
memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
- mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+ mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
- mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+ mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
- mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+ mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+ mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
}
@@ -3596,7 +3600,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
{
struct sock *sk = p;
- return !skb_queue_empty(&sk->sk_receive_queue) ||
+ return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
sk_busy_loop_timeout(sk, start_time);
}
EXPORT_SYMBOL(sk_busy_loop_end);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d9b4200ed12d..d19557c6d04b 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -117,7 +117,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
inet->inet_dport);
- inet->inet_id = dp->dccps_iss ^ jiffies;
+ inet->inet_id = prandom_u32();
err = dccp_connect(sk);
rt = NULL;
@@ -416,7 +416,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
- newinet->inet_id = jiffies;
+ newinet->inet_id = prandom_u32();
if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
goto put_and_exit;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 0ea75286abf4..3349ea81f901 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1205,7 +1205,7 @@ static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wai
struct dn_scp *scp = DN_SK(sk);
__poll_t mask = datagram_poll(file, sock, wait);
- if (!skb_queue_empty(&scp->other_receive_queue))
+ if (!skb_queue_empty_lockless(&scp->other_receive_queue))
mask |= EPOLLRDBAND;
return mask;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 73002022c9d8..716d265ba8ca 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -46,7 +46,7 @@ static struct dsa_switch_tree *dsa_tree_alloc(int index)
dst->index = index;
INIT_LIST_HEAD(&dst->list);
- list_add_tail(&dsa_tree_list, &dst->list);
+ list_add_tail(&dst->list, &dsa_tree_list);
kref_init(&dst->refcount);
diff --git a/net/dsa/master.c b/net/dsa/master.c
index a8e52c9967f4..3255dfc97f86 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -310,8 +310,6 @@ static void dsa_master_reset_mtu(struct net_device *dev)
rtnl_unlock();
}
-static struct lock_class_key dsa_master_addr_list_lock_key;
-
int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
{
int ret;
@@ -325,9 +323,6 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp)
wmb();
dev->dsa_ptr = cpu_dp;
- lockdep_set_class(&dev->addr_list_lock,
- &dsa_master_addr_list_lock_key);
-
ret = dsa_master_ethtool_setup(dev);
if (ret)
return ret;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 75d58229a4bd..028e65f4b5ba 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1341,15 +1341,6 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
return ret;
}
-static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
-static void dsa_slave_set_lockdep_class_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock,
- &dsa_slave_netdev_xmit_lock_key);
-}
-
int dsa_slave_suspend(struct net_device *slave_dev)
{
struct dsa_port *dp = dsa_slave_to_port(slave_dev);
@@ -1433,9 +1424,6 @@ int dsa_slave_create(struct dsa_port *port)
slave_dev->max_mtu = ETH_MAX_MTU;
SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
- netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
- NULL);
-
SET_NETDEV_DEV(slave_dev, port->ds->dev);
slave_dev->dev.of_node = port->dn;
slave_dev->vlan_features = master->vlan_features;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 9c1cc2482b68..9e5a883a9f0c 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -106,7 +106,7 @@ static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
slave = ds->ports[port].slave;
err = br_vlan_get_pvid(slave, &pvid);
- if (err < 0)
+ if (!pvid || err < 0)
/* There is no pvid on the bridge for this port, which is
* perfectly valid. Nothing to restore, bye-bye!
*/
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 3297e7fa9945..c0b107cdd715 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -58,13 +58,6 @@ static const struct header_ops lowpan_header_ops = {
.create = lowpan_header_create,
};
-static int lowpan_dev_init(struct net_device *ldev)
-{
- netdev_lockdep_set_classes(ldev);
-
- return 0;
-}
-
static int lowpan_open(struct net_device *dev)
{
if (!open_count)
@@ -96,7 +89,6 @@ static int lowpan_get_iflink(const struct net_device *dev)
}
static const struct net_device_ops lowpan_netdev_ops = {
- .ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
.ndo_open = lowpan_open,
.ndo_stop = lowpan_stop,
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 9a0fe0c2fa02..4a8550c49202 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
reuseport_has_conns(sk, true);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
- inet->inet_id = jiffies;
+ inet->inet_id = prandom_u32();
sk_dst_set(sk, &rt->dst);
err = 0;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index dde77f72e03e..71c78d223dfd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1148,7 +1148,7 @@ void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric)
if (!(dev->flags & IFF_UP) ||
ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
ipv4_is_zeronet(prefix) ||
- prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
+ (prefix == ifa->ifa_local && ifa->ifa_prefixlen == 32))
return;
/* add the new */
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0913a090b2bf..f1888c683426 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1814,8 +1814,8 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
int ret = 0;
unsigned int hash = fib_laddr_hashfn(local);
struct hlist_head *head = &fib_info_laddrhash[hash];
+ int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
struct net *net = dev_net(dev);
- int tb_id = l3mdev_fib_table(dev);
struct fib_info *fi;
if (!fib_info_laddrhash || local == 0)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a9183543ca30..eb30fc1770de 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -906,7 +906,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
percpu_counter_inc(sk->sk_prot->orphan_count);
if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
- BUG_ON(tcp_sk(child)->fastopen_rsk != req);
+ BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req);
BUG_ON(sk != req->rsk_listener);
/* Paranoid, to prevent race condition if
@@ -915,7 +915,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req,
* Also to satisfy an assertion in
* tcp_v4_destroy_sock().
*/
- tcp_sk(child)->fastopen_rsk = NULL;
+ RCU_INIT_POINTER(tcp_sk(child)->fastopen_rsk, NULL);
}
inet_csk_destroy_sock(child);
}
@@ -934,7 +934,7 @@ struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
req->sk = child;
req->dl_next = NULL;
if (queue->rskq_accept_head == NULL)
- queue->rskq_accept_head = req;
+ WRITE_ONCE(queue->rskq_accept_head, req);
else
queue->rskq_accept_tail->dl_next = req;
queue->rskq_accept_tail = req;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index bbb005eb5218..7dc79b973e6e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -193,7 +193,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
struct inet_diag_meminfo minfo = {
.idiag_rmem = sk_rmem_alloc_get(sk),
- .idiag_wmem = sk->sk_wmem_queued,
+ .idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
.idiag_fmem = sk->sk_forward_alloc,
.idiag_tmem = sk_wmem_alloc_get(sk),
};
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 97824864e40d..83fb00153018 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -240,7 +240,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score = sk->sk_family == PF_INET ? 2 : 1;
- if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
}
return score;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 52690bb3e40f..10636fb6093e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -509,9 +509,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
key = &tun_info->key;
if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
goto err_free_skb;
- md = ip_tunnel_info_opts(tun_info);
- if (!md)
+ if (tun_info->options_len < sizeof(*md))
goto err_free_skb;
+ md = ip_tunnel_info_opts(tun_info);
/* ERSPAN has fixed 8 byte GRE header */
version = md->version;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 28fca408812c..3d8baaaf7086 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -645,11 +645,12 @@ void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter)
EXPORT_SYMBOL(ip_fraglist_prepare);
void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
- unsigned int ll_rs, unsigned int mtu,
+ unsigned int ll_rs, unsigned int mtu, bool DF,
struct ip_frag_state *state)
{
struct iphdr *iph = ip_hdr(skb);
+ state->DF = DF;
state->hlen = hlen;
state->ll_rs = ll_rs;
state->mtu = mtu;
@@ -668,9 +669,6 @@ static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to,
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(from)->flags;
- if (IPCB(from)->flags & IPSKB_FRAG_PMTU)
- state->iph->frag_off |= htons(IP_DF);
-
/* ANK: dirty, but effective trick. Upgrade options only if
* the segment to be fragmented was THE FIRST (otherwise,
* options are already fixed) and make it ONCE
@@ -738,6 +736,8 @@ struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state)
*/
iph = ip_hdr(skb2);
iph->frag_off = htons((state->offset >> 3));
+ if (state->DF)
+ iph->frag_off |= htons(IP_DF);
/*
* Added AC : If we are fragmenting a fragment that's not the
@@ -771,6 +771,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct rtable *rt = skb_rtable(skb);
unsigned int mtu, hlen, ll_rs;
struct ip_fraglist_iter iter;
+ ktime_t tstamp = skb->tstamp;
struct ip_frag_state state;
int err = 0;
@@ -846,6 +847,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
ip_fraglist_prepare(skb, &iter);
}
+ skb->tstamp = tstamp;
err = output(net, sk, skb);
if (!err)
@@ -881,7 +883,8 @@ slow_path:
* Fragment the datagram.
*/
- ip_frag_init(skb, hlen, ll_rs, mtu, &state);
+ ip_frag_init(skb, hlen, ll_rs, mtu, IPCB(skb)->flags & IPSKB_FRAG_PMTU,
+ &state);
/*
* Keep copying data until we run out.
@@ -900,6 +903,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
+ skb2->tstamp = tstamp;
err = output(net, sk, skb2);
if (err)
goto fail;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 716d5472c022..58007439cffd 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2289,7 +2289,8 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
rcu_read_unlock();
return -ENODEV;
}
- skb2 = skb_clone(skb, GFP_ATOMIC);
+
+ skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
if (!skb2) {
read_unlock(&mrt_lock);
rcu_read_unlock();
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 14654876127e..621f83434b24 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1482,7 +1482,7 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
prev = cmpxchg(p, orig, rt);
if (prev == orig) {
if (orig) {
- dst_dev_put(&orig->dst);
+ rt_add_uncached_list(orig);
dst_release(&orig->dst);
}
} else {
@@ -2470,14 +2470,17 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
int orig_oif = fl4->flowi4_oif;
unsigned int flags = 0;
struct rtable *rth;
- int err = -ENETUNREACH;
+ int err;
if (fl4->saddr) {
- rth = ERR_PTR(-EINVAL);
if (ipv4_is_multicast(fl4->saddr) ||
ipv4_is_lbcast(fl4->saddr) ||
- ipv4_is_zeronet(fl4->saddr))
+ ipv4_is_zeronet(fl4->saddr)) {
+ rth = ERR_PTR(-EINVAL);
goto out;
+ }
+
+ rth = ERR_PTR(-ENETUNREACH);
/* I removed check for oif == dev_out->oif here.
It was wrong for two reasons:
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 59ded25acd04..0902cb32bbad 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1037,7 +1037,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = &two,
},
#endif
{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f98a1882e537..d8876f0e9672 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -326,7 +326,7 @@ void tcp_enter_memory_pressure(struct sock *sk)
{
unsigned long val;
- if (tcp_memory_pressure)
+ if (READ_ONCE(tcp_memory_pressure))
return;
val = jiffies;
@@ -341,7 +341,7 @@ void tcp_leave_memory_pressure(struct sock *sk)
{
unsigned long val;
- if (!tcp_memory_pressure)
+ if (!READ_ONCE(tcp_memory_pressure))
return;
val = xchg(&tcp_memory_pressure, 0);
if (val)
@@ -450,8 +450,8 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
+ WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+ WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
sk_sockets_allocated_inc(sk);
sk->sk_route_forced_caps = NETIF_F_GSO;
@@ -477,7 +477,7 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
int target, struct sock *sk)
{
- return (tp->rcv_nxt - tp->copied_seq >= target) ||
+ return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) ||
(sk->sk_prot->stream_memory_read ?
sk->sk_prot->stream_memory_read(sk) : false);
}
@@ -543,10 +543,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
/* Connected or passive Fast Open socket? */
if (state != TCP_SYN_SENT &&
- (state != TCP_SYN_RECV || tp->fastopen_rsk)) {
+ (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) {
int target = sock_rcvlowat(sk, 0, INT_MAX);
- if (tp->urg_seq == tp->copied_seq &&
+ if (READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) &&
!sock_flag(sk, SOCK_URGINLINE) &&
tp->urg_data)
target++;
@@ -584,7 +584,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
}
/* This barrier is coupled with smp_wmb() in tcp_reset() */
smp_rmb();
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR;
return mask;
@@ -607,7 +607,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
unlock_sock_fast(sk, slow);
break;
case SIOCATMARK:
- answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
+ answ = tp->urg_data &&
+ READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq);
break;
case SIOCOUTQ:
if (sk->sk_state == TCP_LISTEN)
@@ -616,7 +617,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
answ = 0;
else
- answ = tp->write_seq - tp->snd_una;
+ answ = READ_ONCE(tp->write_seq) - tp->snd_una;
break;
case SIOCOUTQNSD:
if (sk->sk_state == TCP_LISTEN)
@@ -625,7 +626,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
answ = 0;
else
- answ = tp->write_seq - tp->snd_nxt;
+ answ = READ_ONCE(tp->write_seq) -
+ READ_ONCE(tp->snd_nxt);
break;
default:
return -ENOIOCTLCMD;
@@ -657,7 +659,7 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb)
tcb->sacked = 0;
__skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
- sk->sk_wmem_queued += skb->truesize;
+ sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
if (tp->nonagle & TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH;
@@ -1032,10 +1034,10 @@ new_segment:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- sk->sk_wmem_queued += copy;
+ sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
skb->ip_summed = CHECKSUM_PARTIAL;
- tp->write_seq += copy;
+ WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
@@ -1362,7 +1364,7 @@ new_segment:
if (!copied)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
- tp->write_seq += copy;
+ WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
@@ -1668,9 +1670,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_eat_skb(sk, skb);
if (!desc->count)
break;
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
}
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
@@ -1699,7 +1701,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
else
cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
val = min(val, cap);
- sk->sk_rcvlowat = val ? : 1;
+ WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
/* Check if we need to signal EPOLLIN right now */
tcp_data_ready(sk);
@@ -1709,7 +1711,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
val <<= 1;
if (val > sk->sk_rcvbuf) {
- sk->sk_rcvbuf = val;
+ WRITE_ONCE(sk->sk_rcvbuf, val);
tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val);
}
return 0;
@@ -1819,7 +1821,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
out:
up_read(&current->mm->mmap_sem);
if (length) {
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */
@@ -1962,7 +1964,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
- if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) &&
+ if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) &&
(sk->sk_state == TCP_ESTABLISHED))
sk_busy_loop(sk, nonblock);
@@ -2117,7 +2119,7 @@ found_ok_skb:
if (urg_offset < used) {
if (!urg_offset) {
if (!sock_flag(sk, SOCK_URGINLINE)) {
- ++*seq;
+ WRITE_ONCE(*seq, *seq + 1);
urg_hole++;
offset++;
used--;
@@ -2139,7 +2141,7 @@ found_ok_skb:
}
}
- *seq += used;
+ WRITE_ONCE(*seq, *seq + used);
copied += used;
len -= used;
@@ -2166,7 +2168,7 @@ skip_copy:
found_fin_ok:
/* Process the FIN. */
- ++*seq;
+ WRITE_ONCE(*seq, *seq + 1);
if (!(flags & MSG_PEEK))
sk_eat_skb(sk, skb);
break;
@@ -2487,7 +2489,10 @@ adjudge_to_death:
}
if (sk->sk_state == TCP_CLOSE) {
- struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+ struct request_sock *req;
+
+ req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+ lockdep_sock_is_held(sk));
/* We could get here with a non-NULL req if the socket is
* aborted (e.g., closed with unread data) before 3WHS
* finishes.
@@ -2559,6 +2564,7 @@ int tcp_disconnect(struct sock *sk, int flags)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int old_state = sk->sk_state;
+ u32 seq;
if (old_state != TCP_CLOSE)
tcp_set_state(sk, TCP_CLOSE);
@@ -2585,7 +2591,7 @@ int tcp_disconnect(struct sock *sk, int flags)
__kfree_skb(sk->sk_rx_skb_cache);
sk->sk_rx_skb_cache = NULL;
}
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
tp->urg_data = 0;
tcp_write_queue_purge(sk);
tcp_fastopen_active_disable_ofo_check(sk);
@@ -2601,9 +2607,12 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->srtt_us = 0;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
tp->rcv_rtt_last_tsecr = 0;
- tp->write_seq += tp->max_window + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
+
+ seq = tp->write_seq + tp->max_window + 2;
+ if (!seq)
+ seq = 1;
+ WRITE_ONCE(tp->write_seq, seq);
+
icsk->icsk_backoff = 0;
tp->snd_cwnd = 2;
icsk->icsk_probes_out = 0;
@@ -2930,9 +2939,9 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
if (sk->sk_state != TCP_CLOSE)
err = -EPERM;
else if (tp->repair_queue == TCP_SEND_QUEUE)
- tp->write_seq = val;
+ WRITE_ONCE(tp->write_seq, val);
else if (tp->repair_queue == TCP_RECV_QUEUE)
- tp->rcv_nxt = val;
+ WRITE_ONCE(tp->rcv_nxt, val);
else
err = -EINVAL;
break;
@@ -3831,7 +3840,13 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
void tcp_done(struct sock *sk)
{
- struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+ struct request_sock *req;
+
+ /* We might be called with a new socket, after
+ * inet_csk_prepare_forced_close() has been called
+ * so we can not use lockdep_sock_is_held(sk)
+ */
+ req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1);
if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 81a8221d650a..549506162dde 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -26,8 +26,9 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
} else if (sk->sk_type == SOCK_STREAM) {
const struct tcp_sock *tp = tcp_sk(sk);
- r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
- r->idiag_wqueue = tp->write_seq - tp->snd_una;
+ r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+ READ_ONCE(tp->copied_seq), 0);
+ r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una;
}
if (info)
tcp_get_info(sk, info);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 3fd451271a70..a915ade0c818 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -253,7 +253,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
*/
tp = tcp_sk(child);
- tp->fastopen_rsk = req;
+ rcu_assign_pointer(tp->fastopen_rsk, req);
tcp_rsk(req)->tfo_listener = true;
/* RFC1323: The window in SYN & SYN/ACK segments is never
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3578357abe30..a2e52ad7cdab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -359,7 +359,8 @@ static void tcp_sndbuf_expand(struct sock *sk)
sndmem *= nr_segs * per_mss;
if (sk->sk_sndbuf < sndmem)
- sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]);
+ WRITE_ONCE(sk->sk_sndbuf,
+ min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
@@ -483,8 +484,9 @@ static void tcp_clamp_window(struct sock *sk)
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
- sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
- net->ipv4.sysctl_tcp_rmem[2]);
+ WRITE_ONCE(sk->sk_rcvbuf,
+ min(atomic_read(&sk->sk_rmem_alloc),
+ net->ipv4.sysctl_tcp_rmem[2]));
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
@@ -648,7 +650,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
rcvbuf = min_t(u64, rcvwin * rcvmem,
sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
if (rcvbuf > sk->sk_rcvbuf) {
- sk->sk_rcvbuf = rcvbuf;
+ WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
/* Make the window clamp follow along. */
tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
@@ -2666,7 +2668,7 @@ static void tcp_process_loss(struct sock *sk, int flag, int num_dupack,
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
- if ((flag & FLAG_SND_UNA_ADVANCED || tp->fastopen_rsk) &&
+ if ((flag & FLAG_SND_UNA_ADVANCED || rcu_access_pointer(tp->fastopen_rsk)) &&
tcp_try_undo_loss(sk, false))
return;
@@ -2990,7 +2992,7 @@ void tcp_rearm_rto(struct sock *sk)
/* If the retrans timer is currently being used by Fast Open
* for SYN-ACK retrans purpose, stay put.
*/
- if (tp->fastopen_rsk)
+ if (rcu_access_pointer(tp->fastopen_rsk))
return;
if (!tp->packets_out) {
@@ -3362,7 +3364,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
sock_owned_by_me((struct sock *)tp);
tp->bytes_received += delta;
- tp->rcv_nxt = seq;
+ WRITE_ONCE(tp->rcv_nxt, seq);
}
/* Update our send window.
@@ -5356,7 +5358,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
}
tp->urg_data = TCP_URG_NOTYET;
- tp->urg_seq = ptr;
+ WRITE_ONCE(tp->urg_seq, ptr);
/* Disable header prediction. */
tp->pred_flags = 0;
@@ -5932,7 +5934,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
/* Ok.. it's good. Set up sequence numbers and
* move to established.
*/
- tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+ WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
@@ -5961,7 +5963,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
smc_check_reset_syn(tp);
@@ -6035,8 +6037,8 @@ discard:
tp->tcp_header_len = sizeof(struct tcphdr);
}
- tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
@@ -6087,6 +6089,8 @@ reset_and_undo:
static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
{
+ struct request_sock *req;
+
tcp_try_undo_loss(sk, false);
/* Reset rtx states to prevent spurious retransmits_timed_out() */
@@ -6096,7 +6100,9 @@ static void tcp_rcv_synrecv_state_fastopen(struct sock *sk)
/* Once we leave TCP_SYN_RECV or TCP_FIN_WAIT_1,
* we no longer need req so release it.
*/
- reqsk_fastopen_remove(sk, tcp_sk(sk)->fastopen_rsk, false);
+ req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk,
+ lockdep_sock_is_held(sk));
+ reqsk_fastopen_remove(sk, req, false);
/* Re-arm the timer because data may have been sent out.
* This is similar to the regular data transmission case
@@ -6171,7 +6177,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_mstamp_refresh(tp);
tp->rx_opt.saw_tstamp = 0;
- req = tp->fastopen_rsk;
+ req = rcu_dereference_protected(tp->fastopen_rsk,
+ lockdep_sock_is_held(sk));
if (req) {
bool req_stolen;
@@ -6211,7 +6218,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_try_undo_spurious_syn(sk);
tp->retrans_stamp = 0;
tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
}
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bf124b1742df..67b2dc7a1727 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -164,9 +164,11 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
* without appearing to create any others.
*/
if (likely(!tp->repair)) {
- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
+ u32 seq = tcptw->tw_snd_nxt + 65535 + 2;
+
+ if (!seq)
+ seq = 1;
+ WRITE_ONCE(tp->write_seq, seq);
tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
}
@@ -253,7 +255,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
if (likely(!tp->repair))
- tp->write_seq = 0;
+ WRITE_ONCE(tp->write_seq, 0);
}
inet->inet_dport = usin->sin_port;
@@ -291,16 +293,17 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (likely(!tp->repair)) {
if (!tp->write_seq)
- tp->write_seq = secure_tcp_seq(inet->inet_saddr,
- inet->inet_daddr,
- inet->inet_sport,
- usin->sin_port);
+ WRITE_ONCE(tp->write_seq,
+ secure_tcp_seq(inet->inet_saddr,
+ inet->inet_daddr,
+ inet->inet_sport,
+ usin->sin_port));
tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
inet->inet_saddr,
inet->inet_daddr);
}
- inet->inet_id = tp->write_seq ^ jiffies;
+ inet->inet_id = prandom_u32();
if (tcp_fastopen_defer_connect(sk, &err))
return err;
@@ -478,7 +481,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
icsk = inet_csk(sk);
tp = tcp_sk(sk);
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
- fastopen = tp->fastopen_rsk;
+ fastopen = rcu_dereference(tp->fastopen_rsk);
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
!between(seq, snd_una, tp->snd_nxt)) {
@@ -1447,7 +1450,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
- newinet->inet_id = newtp->write_seq ^ jiffies;
+ newinet->inet_id = prandom_u32();
if (!dst) {
dst = inet_csk_route_child_sock(sk, newsk, req);
@@ -1644,7 +1647,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
{
- u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
+ u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf);
struct skb_shared_info *shinfo;
const struct tcphdr *th;
struct tcphdr *thtail;
@@ -2121,7 +2124,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
if (inet_csk(sk)->icsk_bind_hash)
inet_put_port(sk);
- BUG_ON(tp->fastopen_rsk);
+ BUG_ON(rcu_access_pointer(tp->fastopen_rsk));
/* If socket is aborted during connect operation */
tcp_free_fastopen_req(tp);
@@ -2455,12 +2458,13 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
/* Because we don't lock the socket,
* we might find a transient negative value.
*/
- rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+ READ_ONCE(tp->copied_seq), 0);
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
i, src, srcp, dest, destp, state,
- tp->write_seq - tp->snd_una,
+ READ_ONCE(tp->write_seq) - tp->snd_una,
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
@@ -2677,7 +2681,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo;
- net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256);
+ net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128);
net->ipv4.sysctl_tcp_sack = 1;
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index bb140a5db8c0..c802bc80c400 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -462,6 +462,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
struct tcp_request_sock *treq = tcp_rsk(req);
struct inet_connection_sock *newicsk;
struct tcp_sock *oldtp, *newtp;
+ u32 seq;
if (!newsk)
return NULL;
@@ -475,12 +476,16 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
/* Now setup tcp_sock */
newtp->pred_flags = 0;
- newtp->rcv_wup = newtp->copied_seq =
- newtp->rcv_nxt = treq->rcv_isn + 1;
+ seq = treq->rcv_isn + 1;
+ newtp->rcv_wup = seq;
+ WRITE_ONCE(newtp->copied_seq, seq);
+ WRITE_ONCE(newtp->rcv_nxt, seq);
newtp->segs_in = 1;
- newtp->snd_sml = newtp->snd_una =
- newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
+ seq = treq->snt_isn + 1;
+ newtp->snd_sml = newtp->snd_una = seq;
+ WRITE_ONCE(newtp->snd_nxt, seq);
+ newtp->snd_up = seq;
INIT_LIST_HEAD(&newtp->tsq_node);
INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
@@ -495,7 +500,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->total_retrans = req->num_retrans;
tcp_init_xmit_timers(newsk);
- newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
+ WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1);
if (sock_flag(newsk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(newsk,
@@ -541,7 +546,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rx_opt.mss_clamp = req->mss;
tcp_ecn_openreq_child(newtp, req);
newtp->fastopen_req = NULL;
- newtp->fastopen_rsk = NULL;
+ RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fec6d67bfd14..0488607c5cd3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -67,7 +67,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
unsigned int prior_packets = tp->packets_out;
- tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+ WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(skb)->end_seq);
__skb_unlink(skb, &sk->sk_write_queue);
tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
@@ -1196,10 +1196,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
/* Advance write_seq and place onto the write_queue. */
- tp->write_seq = TCP_SKB_CB(skb)->end_seq;
+ WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq);
__skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
- sk->sk_wmem_queued += skb->truesize;
+ sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
}
@@ -1333,7 +1333,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
return -ENOMEM; /* We'll just try again later. */
skb_copy_decrypted(buff, skb);
- sk->sk_wmem_queued += buff->truesize;
+ sk_wmem_queued_add(sk, buff->truesize);
sk_mem_charge(sk, buff->truesize);
nlen = skb->len - len - nsize;
buff->truesize += nlen;
@@ -1443,7 +1443,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
if (delta_truesize) {
skb->truesize -= delta_truesize;
- sk->sk_wmem_queued -= delta_truesize;
+ sk_wmem_queued_add(sk, -delta_truesize);
sk_mem_uncharge(sk, delta_truesize);
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
}
@@ -1888,7 +1888,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
return -ENOMEM;
skb_copy_decrypted(buff, skb);
- sk->sk_wmem_queued += buff->truesize;
+ sk_wmem_queued_add(sk, buff->truesize);
sk_mem_charge(sk, buff->truesize);
buff->truesize += nlen;
skb->truesize -= nlen;
@@ -2152,7 +2152,7 @@ static int tcp_mtu_probe(struct sock *sk)
nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
if (!nskb)
return -1;
- sk->sk_wmem_queued += nskb->truesize;
+ sk_wmem_queued_add(sk, nskb->truesize);
sk_mem_charge(sk, nskb->truesize);
skb = tcp_send_head(sk);
@@ -2482,7 +2482,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
/* Don't do any loss probe on a Fast Open connection before 3WHS
* finishes.
*/
- if (tp->fastopen_rsk)
+ if (rcu_access_pointer(tp->fastopen_rsk))
return false;
early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
@@ -3142,7 +3142,7 @@ void tcp_send_fin(struct sock *sk)
* if FIN had been sent. This is because retransmit path
* does not change tp->snd_nxt.
*/
- tp->snd_nxt++;
+ WRITE_ONCE(tp->snd_nxt, tp->snd_nxt + 1);
return;
}
} else {
@@ -3222,7 +3222,7 @@ int tcp_send_synack(struct sock *sk)
tcp_rtx_queue_unlink_and_free(skb, sk);
__skb_header_release(nskb);
tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
- sk->sk_wmem_queued += nskb->truesize;
+ sk_wmem_queued_add(sk, nskb->truesize);
sk_mem_charge(sk, nskb->truesize);
skb = nskb;
}
@@ -3426,14 +3426,14 @@ static void tcp_connect_init(struct sock *sk)
tp->snd_una = tp->write_seq;
tp->snd_sml = tp->write_seq;
tp->snd_up = tp->write_seq;
- tp->snd_nxt = tp->write_seq;
+ WRITE_ONCE(tp->snd_nxt, tp->write_seq);
if (likely(!tp->repair))
tp->rcv_nxt = 0;
else
tp->rcv_tstamp = tcp_jiffies32;
tp->rcv_wup = tp->rcv_nxt;
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
inet_csk(sk)->icsk_retransmits = 0;
@@ -3447,9 +3447,9 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
tcb->end_seq += skb->len;
__skb_header_release(skb);
- sk->sk_wmem_queued += skb->truesize;
+ sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
- tp->write_seq = tcb->end_seq;
+ WRITE_ONCE(tp->write_seq, tcb->end_seq);
tp->packets_out += tcp_skb_pcount(skb);
}
@@ -3586,11 +3586,11 @@ int tcp_connect(struct sock *sk)
/* We change tp->snd_nxt after the tcp_transmit_skb() call
* in order to make this packet get counted in tcpOutSegs.
*/
- tp->snd_nxt = tp->write_seq;
+ WRITE_ONCE(tp->snd_nxt, tp->write_seq);
tp->pushed_seq = tp->write_seq;
buff = tcp_send_head(sk);
if (unlikely(buff)) {
- tp->snd_nxt = TCP_SKB_CB(buff)->seq;
+ WRITE_ONCE(tp->snd_nxt, TCP_SKB_CB(buff)->seq);
tp->pushed_seq = TCP_SKB_CB(buff)->seq;
}
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 05be564414e9..dd5a6317a801 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -386,15 +386,13 @@ abort: tcp_write_err(sk);
* Timer for Fast Open socket to retransmit SYNACK. Note that the
* sk here is the child socket, not the parent (listener) socket.
*/
-static void tcp_fastopen_synack_timer(struct sock *sk)
+static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int max_retries = icsk->icsk_syn_retries ? :
sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
struct tcp_sock *tp = tcp_sk(sk);
- struct request_sock *req;
- req = tcp_sk(sk)->fastopen_rsk;
req->rsk_ops->syn_ack_timeout(req);
if (req->num_timeout >= max_retries) {
@@ -435,11 +433,14 @@ void tcp_retransmit_timer(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct request_sock *req;
- if (tp->fastopen_rsk) {
+ req = rcu_dereference_protected(tp->fastopen_rsk,
+ lockdep_sock_is_held(sk));
+ if (req) {
WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
sk->sk_state != TCP_FIN_WAIT1);
- tcp_fastopen_synack_timer(sk);
+ tcp_fastopen_synack_timer(sk, req);
/* Before we receive ACK to our SYN-ACK don't retransmit
* anything else (e.g., data or FIN segments).
*/
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 14bc654b6842..447defbfccdd 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -388,7 +388,7 @@ static int compute_score(struct sock *sk, struct net *net,
return -1;
score += 4;
- if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
return score;
}
@@ -1297,6 +1297,27 @@ out:
#define UDP_SKB_IS_STATELESS 0x80000000
+/* all head states (dst, sk, nf conntrack) except skb extensions are
+ * cleared by udp_rcv().
+ *
+ * We need to preserve secpath, if present, to eventually process
+ * IP_CMSG_PASSSEC at recvmsg() time.
+ *
+ * Other extensions can be cleared.
+ */
+static bool udp_try_make_stateless(struct sk_buff *skb)
+{
+ if (!skb_has_extensions(skb))
+ return true;
+
+ if (!secpath_exists(skb)) {
+ skb_ext_reset(skb);
+ return true;
+ }
+
+ return false;
+}
+
static void udp_set_dev_scratch(struct sk_buff *skb)
{
struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
@@ -1308,14 +1329,24 @@ static void udp_set_dev_scratch(struct sk_buff *skb)
scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
scratch->is_linear = !skb_is_nonlinear(skb);
#endif
- /* all head states execept sp (dst, sk, nf) are always cleared by
- * udp_rcv() and we need to preserve secpath, if present, to eventually
- * process IP_CMSG_PASSSEC at recvmsg() time
- */
- if (likely(!skb_sec_path(skb)))
+ if (udp_try_make_stateless(skb))
scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
}
+static void udp_skb_csum_unnecessary_set(struct sk_buff *skb)
+{
+ /* We come here after udp_lib_checksum_complete() returned 0.
+ * This means that __skb_checksum_complete() might have
+ * set skb->csum_valid to 1.
+ * On 64bit platforms, we can set csum_unnecessary
+ * to true, but only if the skb is not shared.
+ */
+#if BITS_PER_LONG == 64
+ if (!skb_shared(skb))
+ udp_skb_scratch(skb)->csum_unnecessary = true;
+#endif
+}
+
static int udp_skb_truesize(struct sk_buff *skb)
{
return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
@@ -1550,10 +1581,7 @@ static struct sk_buff *__first_packet_length(struct sock *sk,
*total += skb->truesize;
kfree_skb(skb);
} else {
- /* the csum related bits could be changed, refresh
- * the scratch area
- */
- udp_set_dev_scratch(skb);
+ udp_skb_csum_unnecessary_set(skb);
break;
}
}
@@ -1577,7 +1605,7 @@ static int first_packet_length(struct sock *sk)
spin_lock_bh(&rcvq->lock);
skb = __first_packet_length(sk, rcvq, &total);
- if (!skb && !skb_queue_empty(sk_queue)) {
+ if (!skb && !skb_queue_empty_lockless(sk_queue)) {
spin_lock(&sk_queue->lock);
skb_queue_splice_tail_init(sk_queue, rcvq);
spin_unlock(&sk_queue->lock);
@@ -1650,7 +1678,7 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
return skb;
}
- if (skb_queue_empty(sk_queue)) {
+ if (skb_queue_empty_lockless(sk_queue)) {
spin_unlock_bh(&queue->lock);
goto busy_check;
}
@@ -1676,7 +1704,7 @@ busy_check:
break;
sk_busy_loop(sk, flags & MSG_DONTWAIT);
- } while (!skb_queue_empty(sk_queue));
+ } while (!skb_queue_empty_lockless(sk_queue));
/* sk_queue is empty, reader_queue may contain peeked packets */
} while (timeo &&
@@ -2712,7 +2740,7 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
__poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
- if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
+ if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Check for false positives due to checksum errors */
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 783f3c1466da..2fc079284ca4 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -7,6 +7,7 @@
#include <linux/export.h>
#include <net/ipv6.h>
#include <net/ipv6_stubs.h>
+#include <net/addrconf.h>
#include <net/ip.h>
/* if ipv6 module registers this function is used by xfrm to force all
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index cf60fae9533b..fbe9d4295eac 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -105,7 +105,7 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score = 1;
- if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
}
return score;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index d5779d6a6065..923034c52ce4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -980,9 +980,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
dsfield = key->tos;
if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
goto tx_err;
- md = ip_tunnel_info_opts(tun_info);
- if (!md)
+ if (tun_info->options_len < sizeof(*md))
goto tx_err;
+ md = ip_tunnel_info_opts(tun_info);
tun_id = tunnel_id_to_key32(key->tun_id);
if (md->version == 1) {
@@ -2192,6 +2192,7 @@ static void ip6erspan_tap_setup(struct net_device *dev)
{
ether_setup(dev);
+ dev->max_mtu = 0;
dev->netdev_ops = &ip6erspan_netdev_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ip6gre_dev_free;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index edadee4a7e76..71827b56c006 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -768,6 +768,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
inet6_sk(skb->sk) : NULL;
struct ip6_frag_state state;
unsigned int mtu, hlen, nexthdr_offset;
+ ktime_t tstamp = skb->tstamp;
int hroom, err = 0;
__be32 frag_id;
u8 *prevhdr, nexthdr = 0;
@@ -855,6 +856,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
+ skb->tstamp = tstamp;
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -913,6 +915,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
+ frag->tstamp = tstamp;
err = output(net, sk, frag);
if (err)
goto fail;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 264c292e7dcc..79fc012dd2ca 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -363,8 +363,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_TRANSPARENT:
- if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) &&
- !ns_capable(net->user_ns, CAP_NET_RAW)) {
+ if (valbool && !ns_capable(net->user_ns, CAP_NET_RAW) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
retv = -EPERM;
break;
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index a9bff556d3b2..409e79b84a83 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -119,6 +119,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct sk_buff *))
{
int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ ktime_t tstamp = skb->tstamp;
struct ip6_frag_state state;
u8 *prevhdr, nexthdr = 0;
unsigned int mtu, hlen;
@@ -183,6 +184,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (iter.frag)
ip6_fraglist_prepare(skb, &iter);
+ skb->tstamp = tstamp;
err = output(net, sk, data, skb);
if (err || !iter.frag)
break;
@@ -215,6 +217,7 @@ slow_path:
goto blackhole;
}
+ skb2->tstamp = tstamp;
err = output(net, sk, data, skb2);
if (err)
goto blackhole;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a63ff85fe141..3f83ea851ebf 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -621,6 +621,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
{
struct __rt6_probe_work *work = NULL;
const struct in6_addr *nh_gw;
+ unsigned long last_probe;
struct neighbour *neigh;
struct net_device *dev;
struct inet6_dev *idev;
@@ -633,12 +634,13 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- if (fib6_nh->fib_nh_gw_family)
+ if (!fib6_nh->fib_nh_gw_family)
return;
nh_gw = &fib6_nh->fib_nh_gw6;
dev = fib6_nh->fib_nh_dev;
rcu_read_lock_bh();
+ last_probe = READ_ONCE(fib6_nh->last_probe);
idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
if (neigh) {
@@ -654,13 +656,15 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
__neigh_set_probe_once(neigh);
}
write_unlock(&neigh->lock);
- } else if (time_after(jiffies, fib6_nh->last_probe +
+ } else if (time_after(jiffies, last_probe +
idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
}
- if (work) {
- fib6_nh->last_probe = jiffies;
+ if (!work || cmpxchg(&fib6_nh->last_probe,
+ last_probe, jiffies) != last_probe) {
+ kfree(work);
+ } else {
INIT_WORK(&work->work, rt6_probe_deferred);
work->target = *nh_gw;
dev_hold(dev);
@@ -3383,6 +3387,9 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
int err;
fib6_nh->fib_nh_family = AF_INET6;
+#ifdef CONFIG_IPV6_ROUTER_PREF
+ fib6_nh->last_probe = jiffies;
+#endif
err = -ENODEV;
if (cfg->fc_ifindex) {
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 9d4f75e0d33a..e70567446f28 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -81,6 +81,11 @@ static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb)
if (!pskb_may_pull(skb, srhoff + len))
return NULL;
+ /* note that pskb_may_pull may change pointers in header;
+ * for this reason it is necessary to reload them when needed.
+ */
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+
if (!seg6_validate_srh(srh, len))
return NULL;
@@ -336,6 +341,8 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
seg6_lookup_nexthop(skb, nhaddr, 0);
return dst_input(skb);
@@ -365,6 +372,8 @@ static int input_action_end_dx4(struct sk_buff *skb,
skb_dst_drop(skb);
+ skb_set_transport_header(skb, sizeof(struct iphdr));
+
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
if (err)
goto drop;
@@ -385,6 +394,8 @@ static int input_action_end_dt6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+
seg6_lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e3d9f4559c99..4804b6dc5e65 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -215,7 +215,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
!ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
- tp->write_seq = 0;
+ WRITE_ONCE(tp->write_seq, 0);
}
sk->sk_v6_daddr = usin->sin6_addr;
@@ -311,10 +311,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (likely(!tp->repair)) {
if (!tp->write_seq)
- tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32,
- inet->inet_sport,
- inet->inet_dport);
+ WRITE_ONCE(tp->write_seq,
+ secure_tcpv6_seq(np->saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
+ inet->inet_sport,
+ inet->inet_dport));
tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32);
@@ -406,7 +407,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
tp = tcp_sk(sk);
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
- fastopen = tp->fastopen_rsk;
+ fastopen = rcu_dereference(tp->fastopen_rsk);
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
if (sk->sk_state != TCP_LISTEN &&
!between(seq, snd_una, tp->snd_nxt)) {
@@ -1895,7 +1896,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
/* Because we don't lock the socket,
* we might find a transient negative value.
*/
- rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
+ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+ READ_ONCE(tp->copied_seq), 0);
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
@@ -1906,7 +1908,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
state,
- tp->write_seq - tp->snd_una,
+ READ_ONCE(tp->write_seq) - tp->snd_una,
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6324d3a8cb53..9fec580c968e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -135,7 +135,7 @@ static int compute_score(struct sock *sk, struct net *net,
return -1;
score++;
- if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id())
score++;
return score;
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index fd5ac2788e45..d3b520b9b2c9 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -56,7 +56,6 @@ static int l2tp_eth_dev_init(struct net_device *dev)
{
eth_hw_addr_random(dev);
eth_broadcast_addr(dev->broadcast);
- netdev_lockdep_set_classes(dev);
return 0;
}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 2017b7d780f5..c74f44dfaa22 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -113,22 +113,26 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr)
*
* Send data via reliable llc2 connection.
* Returns 0 upon success, non-zero if action did not succeed.
+ *
+ * This function always consumes a reference to the skb.
*/
static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock)
{
struct llc_sock* llc = llc_sk(sk);
- int rc = 0;
if (unlikely(llc_data_accept_state(llc->state) ||
llc->remote_busy_flag ||
llc->p_flag)) {
long timeout = sock_sndtimeo(sk, noblock);
+ int rc;
rc = llc_ui_wait_for_busy_core(sk, timeout);
+ if (rc) {
+ kfree_skb(skb);
+ return rc;
+ }
}
- if (unlikely(!rc))
- rc = llc_build_and_send_pkt(sk, skb);
- return rc;
+ return llc_build_and_send_pkt(sk, skb);
}
static void llc_ui_sk_init(struct socket *sock, struct sock *sk)
@@ -899,7 +903,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
int flags = msg->msg_flags;
int noblock = flags & MSG_DONTWAIT;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
size_t size = 0;
int rc = -EINVAL, copied = 0, hdrlen;
@@ -908,10 +912,10 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
lock_sock(sk);
if (addr) {
if (msg->msg_namelen < sizeof(*addr))
- goto release;
+ goto out;
} else {
if (llc_ui_addr_null(&llc->addr))
- goto release;
+ goto out;
addr = &llc->addr;
}
/* must bind connection to sap if user hasn't done it. */
@@ -919,7 +923,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
/* bind to sap with null dev, exclusive. */
rc = llc_ui_autobind(sock, addr);
if (rc)
- goto release;
+ goto out;
}
hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
size = hdrlen + len;
@@ -928,12 +932,12 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
copied = size - hdrlen;
rc = -EINVAL;
if (copied < 0)
- goto release;
+ goto out;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, noblock, &rc);
lock_sock(sk);
if (!skb)
- goto release;
+ goto out;
skb->dev = llc->dev;
skb->protocol = llc_proto_type(addr->sllc_arphrd);
skb_reserve(skb, hdrlen);
@@ -943,29 +947,31 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) {
llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
+ skb = NULL;
goto out;
}
if (addr->sllc_test) {
llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
+ skb = NULL;
goto out;
}
if (addr->sllc_xid) {
llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac,
addr->sllc_sap);
+ skb = NULL;
goto out;
}
rc = -ENOPROTOOPT;
if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua))
goto out;
rc = llc_ui_send_data(sk, skb, noblock);
+ skb = NULL;
out:
- if (rc) {
- kfree_skb(skb);
-release:
+ kfree_skb(skb);
+ if (rc)
dprintk("%s: failed sending from %02X to %02X: %d\n",
__func__, llc->laddr.lsap, llc->daddr.lsap, rc);
- }
release_sock(sk);
return rc ? : copied;
}
diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c
index 4d78375f9872..647c0554d04c 100644
--- a/net/llc/llc_c_ac.c
+++ b/net/llc/llc_c_ac.c
@@ -372,6 +372,7 @@ int llc_conn_ac_send_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb)
llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR);
rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
if (likely(!rc)) {
+ skb_get(skb);
llc_conn_send_pdu(sk, skb);
llc_conn_ac_inc_vs_by_1(sk, skb);
}
@@ -389,7 +390,8 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb)
llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
if (likely(!rc)) {
- rc = llc_conn_send_pdu(sk, skb);
+ skb_get(skb);
+ llc_conn_send_pdu(sk, skb);
llc_conn_ac_inc_vs_by_1(sk, skb);
}
return rc;
@@ -406,6 +408,7 @@ int llc_conn_ac_send_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb)
llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR);
rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
if (likely(!rc)) {
+ skb_get(skb);
llc_conn_send_pdu(sk, skb);
llc_conn_ac_inc_vs_by_1(sk, skb);
}
@@ -916,7 +919,8 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk,
llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR);
rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac);
if (likely(!rc)) {
- rc = llc_conn_send_pdu(sk, skb);
+ skb_get(skb);
+ llc_conn_send_pdu(sk, skb);
llc_conn_ac_inc_vs_by_1(sk, skb);
}
return rc;
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 4ff89cb7c86f..7b620acaca9e 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -30,7 +30,7 @@
#endif
static int llc_find_offset(int state, int ev_type);
-static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb);
+static void llc_conn_send_pdus(struct sock *sk);
static int llc_conn_service(struct sock *sk, struct sk_buff *skb);
static int llc_exec_conn_trans_actions(struct sock *sk,
struct llc_conn_state_trans *trans,
@@ -55,6 +55,8 @@ int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ;
* (executing it's actions and changing state), upper layer will be
* indicated or confirmed, if needed. Returns 0 for success, 1 for
* failure. The socket lock has to be held before calling this function.
+ *
+ * This function always consumes a reference to the skb.
*/
int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
{
@@ -62,12 +64,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
struct llc_sock *llc = llc_sk(skb->sk);
struct llc_conn_state_ev *ev = llc_conn_ev(skb);
- /*
- * We have to hold the skb, because llc_conn_service will kfree it in
- * the sending path and we need to look at the skb->cb, where we encode
- * llc_conn_state_ev.
- */
- skb_get(skb);
ev->ind_prim = ev->cfm_prim = 0;
/*
* Send event to state machine
@@ -75,21 +71,12 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
rc = llc_conn_service(skb->sk, skb);
if (unlikely(rc != 0)) {
printk(KERN_ERR "%s: llc_conn_service failed\n", __func__);
- goto out_kfree_skb;
- }
-
- if (unlikely(!ev->ind_prim && !ev->cfm_prim)) {
- /* indicate or confirm not required */
- if (!skb->next)
- goto out_kfree_skb;
goto out_skb_put;
}
- if (unlikely(ev->ind_prim && ev->cfm_prim)) /* Paranoia */
- skb_get(skb);
-
switch (ev->ind_prim) {
case LLC_DATA_PRIM:
+ skb_get(skb);
llc_save_primitive(sk, skb, LLC_DATA_PRIM);
if (unlikely(sock_queue_rcv_skb(sk, skb))) {
/*
@@ -106,6 +93,7 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
* skb->sk pointing to the newly created struct sock in
* llc_conn_handler. -acme
*/
+ skb_get(skb);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_state_change(sk);
break;
@@ -121,7 +109,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
sk->sk_state_change(sk);
}
}
- kfree_skb(skb);
sock_put(sk);
break;
case LLC_RESET_PRIM:
@@ -130,14 +117,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
* RESET is not being notified to upper layers for now
*/
printk(KERN_INFO "%s: received a reset ind!\n", __func__);
- kfree_skb(skb);
break;
default:
- if (ev->ind_prim) {
+ if (ev->ind_prim)
printk(KERN_INFO "%s: received unknown %d prim!\n",
__func__, ev->ind_prim);
- kfree_skb(skb);
- }
/* No indication */
break;
}
@@ -179,25 +163,22 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb)
printk(KERN_INFO "%s: received a reset conf!\n", __func__);
break;
default:
- if (ev->cfm_prim) {
+ if (ev->cfm_prim)
printk(KERN_INFO "%s: received unknown %d prim!\n",
__func__, ev->cfm_prim);
- break;
- }
- goto out_skb_put; /* No confirmation */
+ /* No confirmation */
+ break;
}
-out_kfree_skb:
- kfree_skb(skb);
out_skb_put:
kfree_skb(skb);
return rc;
}
-int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
+void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb)
{
/* queue PDU to send to MAC layer */
skb_queue_tail(&sk->sk_write_queue, skb);
- return llc_conn_send_pdus(sk, skb);
+ llc_conn_send_pdus(sk);
}
/**
@@ -255,7 +236,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit)
if (howmany_resend > 0)
llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
/* any PDUs to re-send are queued up; start sending to MAC */
- llc_conn_send_pdus(sk, NULL);
+ llc_conn_send_pdus(sk);
out:;
}
@@ -296,7 +277,7 @@ void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit)
if (howmany_resend > 0)
llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO;
/* any PDUs to re-send are queued up; start sending to MAC */
- llc_conn_send_pdus(sk, NULL);
+ llc_conn_send_pdus(sk);
out:;
}
@@ -340,16 +321,12 @@ out:
/**
* llc_conn_send_pdus - Sends queued PDUs
* @sk: active connection
- * @hold_skb: the skb held by caller, or NULL if does not care
*
- * Sends queued pdus to MAC layer for transmission. When @hold_skb is
- * NULL, always return 0. Otherwise, return 0 if @hold_skb is sent
- * successfully, or 1 for failure.
+ * Sends queued pdus to MAC layer for transmission.
*/
-static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb)
+static void llc_conn_send_pdus(struct sock *sk)
{
struct sk_buff *skb;
- int ret = 0;
while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) {
struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb);
@@ -361,20 +338,10 @@ static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb)
skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb);
if (!skb2)
break;
- dev_queue_xmit(skb2);
- } else {
- bool is_target = skb == hold_skb;
- int rc;
-
- if (is_target)
- skb_get(skb);
- rc = dev_queue_xmit(skb);
- if (is_target)
- ret = rc;
+ skb = skb2;
}
+ dev_queue_xmit(skb);
}
-
- return ret;
}
/**
@@ -846,7 +813,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
else {
dprintk("%s: adding to backlog...\n", __func__);
llc_set_backlog_type(skb, LLC_PACKET);
- if (sk_add_backlog(sk, skb, sk->sk_rcvbuf))
+ if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
goto drop_unlock;
}
out:
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index 8db03c2d5440..ad6547736c21 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -38,6 +38,8 @@
* closed and -EBUSY when sending data is not permitted in this state or
* LLC has send an I pdu with p bit set to 1 and is waiting for it's
* response.
+ *
+ * This function always consumes a reference to the skb.
*/
int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb)
{
@@ -46,20 +48,22 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb)
struct llc_sock *llc = llc_sk(sk);
if (unlikely(llc->state == LLC_CONN_STATE_ADM))
- goto out;
+ goto out_free;
rc = -EBUSY;
if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */
llc->p_flag)) {
llc->failed_data_req = 1;
- goto out;
+ goto out_free;
}
ev = llc_conn_ev(skb);
ev->type = LLC_CONN_EV_TYPE_PRIM;
ev->prim = LLC_DATA_PRIM;
ev->prim_type = LLC_PRIM_TYPE_REQ;
skb->dev = llc->dev;
- rc = llc_conn_state_process(sk, skb);
-out:
+ return llc_conn_state_process(sk, skb);
+
+out_free:
+ kfree_skb(skb);
return rc;
}
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index a94bd56bcac6..7ae4cc684d3a 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -58,8 +58,10 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb)
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_ui_cmd(skb);
rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
- if (likely(!rc))
+ if (likely(!rc)) {
+ skb_get(skb);
rc = dev_queue_xmit(skb);
+ }
return rc;
}
@@ -81,8 +83,10 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb)
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
- if (likely(!rc))
+ if (likely(!rc)) {
+ skb_get(skb);
rc = dev_queue_xmit(skb);
+ }
return rc;
}
@@ -135,8 +139,10 @@ int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb)
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_test_cmd(skb);
rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
- if (likely(!rc))
+ if (likely(!rc)) {
+ skb_get(skb);
rc = dev_queue_xmit(skb);
+ }
return rc;
}
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index a7f7b8ff4729..be419062e19a 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -197,29 +197,22 @@ out:
* After executing actions of the event, upper layer will be indicated
* if needed(on receiving an UI frame). sk can be null for the
* datalink_proto case.
+ *
+ * This function always consumes a reference to the skb.
*/
static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb)
{
struct llc_sap_state_ev *ev = llc_sap_ev(skb);
- /*
- * We have to hold the skb, because llc_sap_next_state
- * will kfree it in the sending path and we need to
- * look at the skb->cb, where we encode llc_sap_state_ev.
- */
- skb_get(skb);
ev->ind_cfm_flag = 0;
llc_sap_next_state(sap, skb);
- if (ev->ind_cfm_flag == LLC_IND) {
- if (skb->sk->sk_state == TCP_LISTEN)
- kfree_skb(skb);
- else {
- llc_save_primitive(skb->sk, skb, ev->prim);
- /* queue skb to the user. */
- if (sock_queue_rcv_skb(skb->sk, skb))
- kfree_skb(skb);
- }
+ if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) {
+ llc_save_primitive(skb->sk, skb, ev->prim);
+
+ /* queue skb to the user. */
+ if (sock_queue_rcv_skb(skb->sk, skb) == 0)
+ return;
}
kfree_skb(skb);
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index aba094b4ccfc..2d05c4cfaf6d 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1292,8 +1292,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
ieee80211_remove_interfaces(local);
fail_rate:
rtnl_unlock();
- ieee80211_led_exit(local);
fail_flows:
+ ieee80211_led_exit(local);
destroy_workqueue(local->workqueue);
fail_workqueue:
wiphy_unregister(local->hw.wiphy);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 26a2f49208b6..54dd8849d1cc 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2633,7 +2633,8 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw,
rcu_read_lock();
ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID);
- if (WARN_ON_ONCE(ssid == NULL))
+ if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN,
+ "invalid SSID element (len=%d)", ssid ? ssid[1] : -1))
ssid_len = 0;
else
ssid_len = ssid[1];
@@ -5233,7 +5234,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
- if (!ssidie) {
+ if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) {
rcu_read_unlock();
kfree(assoc_data);
return -EINVAL;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 768d14c9a716..0e05ff037672 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3467,9 +3467,18 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP):
/* process for all: mesh, mlme, ibss */
break;
+ case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
+ if (is_multicast_ether_addr(mgmt->da) &&
+ !is_broadcast_ether_addr(mgmt->da))
+ return RX_DROP_MONITOR;
+
+ /* process only for station/IBSS */
+ if (sdata->vif.type != NL80211_IFTYPE_STATION &&
+ sdata->vif.type != NL80211_IFTYPE_ADHOC)
+ return RX_DROP_MONITOR;
+ break;
case cpu_to_le16(IEEE80211_STYPE_ASSOC_RESP):
case cpu_to_le16(IEEE80211_STYPE_REASSOC_RESP):
- case cpu_to_le16(IEEE80211_STYPE_DEAUTH):
case cpu_to_le16(IEEE80211_STYPE_DISASSOC):
if (is_multicast_ether_addr(mgmt->da) &&
!is_broadcast_ether_addr(mgmt->da))
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index adf94ba1ed77..4d31d9688dc2 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -520,10 +520,33 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local,
return 0;
}
+static bool __ieee80211_can_leave_ch(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_sub_if_data *sdata_iter;
+
+ if (!ieee80211_is_radar_required(local))
+ return true;
+
+ if (!regulatory_pre_cac_allowed(local->hw.wiphy))
+ return false;
+
+ mutex_lock(&local->iflist_mtx);
+ list_for_each_entry(sdata_iter, &local->interfaces, list) {
+ if (sdata_iter->wdev.cac_started) {
+ mutex_unlock(&local->iflist_mtx);
+ return false;
+ }
+ }
+ mutex_unlock(&local->iflist_mtx);
+
+ return true;
+}
+
static bool ieee80211_can_scan(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
{
- if (ieee80211_is_radar_required(local))
+ if (!__ieee80211_can_leave_ch(sdata))
return false;
if (!list_empty(&local->roc_list))
@@ -630,7 +653,10 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
lockdep_assert_held(&local->mtx);
- if (local->scan_req || ieee80211_is_radar_required(local))
+ if (local->scan_req)
+ return -EBUSY;
+
+ if (!__ieee80211_can_leave_ch(sdata))
return -EBUSY;
if (!ieee80211_can_scan(local, sdata)) {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index bd11fef2139f..8d3a2389b055 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -2457,7 +2457,8 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta)
{
struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
- if (time_after(stats->last_rx, sta->status_stats.last_ack))
+ if (!sta->status_stats.last_ack ||
+ time_after(stats->last_rx, sta->status_stats.last_ack))
return stats->last_rx;
return sta->status_stats.last_ack;
}
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e64d5f9a89dd..d73d1828216a 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -296,7 +296,8 @@ ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr)
if (unlikely(!flag_nested(nla)))
return -IPSET_ERR_PROTOCOL;
- if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL))
+ if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
+ ipaddr_policy, NULL))
return -IPSET_ERR_PROTOCOL;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4)))
return -IPSET_ERR_PROTOCOL;
@@ -314,7 +315,8 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
if (unlikely(!flag_nested(nla)))
return -IPSET_ERR_PROTOCOL;
- if (nla_parse_nested_deprecated(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL))
+ if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla,
+ ipaddr_policy, NULL))
return -IPSET_ERR_PROTOCOL;
if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6)))
return -IPSET_ERR_PROTOCOL;
@@ -934,7 +936,8 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
/* Without holding any locks, create private part. */
if (attr[IPSET_ATTR_DATA] &&
- nla_parse_nested_deprecated(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy, NULL)) {
+ nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
+ set->type->create_policy, NULL)) {
ret = -IPSET_ERR_PROTOCOL;
goto put_out;
}
@@ -1281,6 +1284,14 @@ dump_attrs(struct nlmsghdr *nlh)
}
}
+static const struct nla_policy
+ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = {
+ [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
+ [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING,
+ .len = IPSET_MAXNAMELEN - 1 },
+ [IPSET_ATTR_FLAGS] = { .type = NLA_U32 },
+};
+
static int
dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
{
@@ -1292,9 +1303,9 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
ip_set_id_t index;
int ret;
- ret = nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, attr,
- nlh->nlmsg_len - min_len,
- ip_set_setname_policy, NULL);
+ ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr,
+ nlh->nlmsg_len - min_len,
+ ip_set_dump_policy, NULL);
if (ret)
return ret;
@@ -1543,9 +1554,9 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
memcpy(&errmsg->msg, nlh, nlh->nlmsg_len);
cmdattr = (void *)&errmsg->msg + min_len;
- ret = nla_parse_deprecated(cda, IPSET_ATTR_CMD_MAX, cmdattr,
- nlh->nlmsg_len - min_len,
- ip_set_adt_policy, NULL);
+ ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr,
+ nlh->nlmsg_len - min_len, ip_set_adt_policy,
+ NULL);
if (ret) {
nlmsg_free(skb2);
@@ -1596,7 +1607,9 @@ static int ip_set_ad(struct net *net, struct sock *ctnl,
use_lineno = !!attr[IPSET_ATTR_LINENO];
if (attr[IPSET_ATTR_DATA]) {
- if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL))
+ if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX,
+ attr[IPSET_ATTR_DATA],
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
ret = call_ad(ctnl, skb, set, tb, adt, flags,
use_lineno);
@@ -1606,7 +1619,8 @@ static int ip_set_ad(struct net *net, struct sock *ctnl,
nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) {
if (nla_type(nla) != IPSET_ATTR_DATA ||
!flag_nested(nla) ||
- nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL))
+ nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla,
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
ret = call_ad(ctnl, skb, set, tb, adt,
flags, use_lineno);
@@ -1655,7 +1669,8 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
if (!set)
return -ENOENT;
- if (nla_parse_nested_deprecated(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL))
+ if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
+ set->type->adt_policy, NULL))
return -IPSET_ERR_PROTOCOL;
rcu_read_lock_bh();
@@ -1961,7 +1976,7 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
[IPSET_CMD_LIST] = {
.call = ip_set_dump,
.attr_count = IPSET_ATTR_CMD_MAX,
- .policy = ip_set_setname_policy,
+ .policy = ip_set_dump_policy,
},
[IPSET_CMD_SAVE] = {
.call = ip_set_dump,
@@ -2069,8 +2084,9 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
}
req_version->version = IPSET_PROTOCOL;
- ret = copy_to_user(user, req_version,
- sizeof(struct ip_set_req_version));
+ if (copy_to_user(user, req_version,
+ sizeof(struct ip_set_req_version)))
+ ret = -EFAULT;
goto done;
}
case IP_SET_OP_GET_BYNAME: {
@@ -2129,7 +2145,8 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
} /* end of switch(op) */
copy:
- ret = copy_to_user(user, data, copylen);
+ if (copy_to_user(user, data, copylen))
+ ret = -EFAULT;
done:
vfree(data);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
index 24d8f4df4230..4ce563eb927d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -209,7 +209,7 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb,
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- if (opt->flags & IPSET_DIM_ONE_SRC)
+ if (opt->flags & IPSET_DIM_TWO_SRC)
ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
else
ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index c259cbc3ef45..3d932de0ad29 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -368,6 +368,7 @@ static struct ip_set_type hash_net_type __read_mostly = {
[IPSET_ATTR_IP_TO] = { .type = NLA_NESTED },
[IPSET_ATTR_CIDR] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index a3ae69bfee66..4398322fad59 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -476,6 +476,7 @@ static struct ip_set_type hash_netnet_type __read_mostly = {
[IPSET_ATTR_CIDR] = { .type = NLA_U8 },
[IPSET_ATTR_CIDR2] = { .type = NLA_U8 },
[IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 },
+ [IPSET_ATTR_LINENO] = { .type = NLA_U32 },
[IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 },
[IPSET_ATTR_BYTES] = { .type = NLA_U64 },
[IPSET_ATTR_PACKETS] = { .type = NLA_U64 },
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 4515056ef1c2..f9b16f2b2219 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -193,21 +193,29 @@ struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *
mutex_lock(&__ip_vs_app_mutex);
+ /* increase the module use count */
+ if (!ip_vs_use_count_inc()) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
list_for_each_entry(a, &ipvs->app_list, a_list) {
if (!strcmp(app->name, a->name)) {
err = -EEXIST;
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
goto out_unlock;
}
}
a = kmemdup(app, sizeof(*app), GFP_KERNEL);
if (!a) {
err = -ENOMEM;
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
goto out_unlock;
}
INIT_LIST_HEAD(&a->incs_list);
list_add(&a->a_list, &ipvs->app_list);
- /* increase the module use count */
- ip_vs_use_count_inc();
out_unlock:
mutex_unlock(&__ip_vs_app_mutex);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 8b48e7ce1c2c..3cccc88ef817 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -93,7 +93,6 @@ static bool __ip_vs_addr_is_local_v6(struct net *net,
static void update_defense_level(struct netns_ipvs *ipvs)
{
struct sysinfo i;
- static int old_secure_tcp = 0;
int availmem;
int nomem;
int to_change = -1;
@@ -174,35 +173,35 @@ static void update_defense_level(struct netns_ipvs *ipvs)
spin_lock(&ipvs->securetcp_lock);
switch (ipvs->sysctl_secure_tcp) {
case 0:
- if (old_secure_tcp >= 2)
+ if (ipvs->old_secure_tcp >= 2)
to_change = 0;
break;
case 1:
if (nomem) {
- if (old_secure_tcp < 2)
+ if (ipvs->old_secure_tcp < 2)
to_change = 1;
ipvs->sysctl_secure_tcp = 2;
} else {
- if (old_secure_tcp >= 2)
+ if (ipvs->old_secure_tcp >= 2)
to_change = 0;
}
break;
case 2:
if (nomem) {
- if (old_secure_tcp < 2)
+ if (ipvs->old_secure_tcp < 2)
to_change = 1;
} else {
- if (old_secure_tcp >= 2)
+ if (ipvs->old_secure_tcp >= 2)
to_change = 0;
ipvs->sysctl_secure_tcp = 1;
}
break;
case 3:
- if (old_secure_tcp < 2)
+ if (ipvs->old_secure_tcp < 2)
to_change = 1;
break;
}
- old_secure_tcp = ipvs->sysctl_secure_tcp;
+ ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp;
if (to_change >= 0)
ip_vs_protocol_timeout_change(ipvs,
ipvs->sysctl_secure_tcp > 1);
@@ -1275,7 +1274,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
struct ip_vs_service *svc = NULL;
/* increase the module use count */
- ip_vs_use_count_inc();
+ if (!ip_vs_use_count_inc())
+ return -ENOPROTOOPT;
/* Lookup the scheduler by 'u->sched_name' */
if (strcmp(u->sched_name, "none")) {
@@ -2435,9 +2435,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (copy_from_user(arg, user, len) != 0)
return -EFAULT;
- /* increase the module use count */
- ip_vs_use_count_inc();
-
/* Handle daemons since they have another lock */
if (cmd == IP_VS_SO_SET_STARTDAEMON ||
cmd == IP_VS_SO_SET_STOPDAEMON) {
@@ -2450,13 +2447,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
ret = -EINVAL;
if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
sizeof(cfg.mcast_ifn)) <= 0)
- goto out_dec;
+ return ret;
cfg.syncid = dm->syncid;
ret = start_sync_thread(ipvs, &cfg, dm->state);
} else {
ret = stop_sync_thread(ipvs, dm->state);
}
- goto out_dec;
+ return ret;
}
mutex_lock(&__ip_vs_mutex);
@@ -2551,10 +2548,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
out_unlock:
mutex_unlock(&__ip_vs_mutex);
- out_dec:
- /* decrease the module use count */
- ip_vs_use_count_dec();
-
return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index 8e104dff7abc..166c669f0763 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -68,7 +68,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)
struct ip_vs_pe *tmp;
/* increase the module use count */
- ip_vs_use_count_inc();
+ if (!ip_vs_use_count_inc())
+ return -ENOENT;
mutex_lock(&ip_vs_pe_mutex);
/* Make sure that the pe with this name doesn't exist
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 2f9d5cd5daee..d4903723be7e 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -179,7 +179,8 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
}
/* increase the module use count */
- ip_vs_use_count_inc();
+ if (!ip_vs_use_count_inc())
+ return -ENOENT;
mutex_lock(&ip_vs_sched_mutex);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index a4a78c4b06de..8dc892a9dc91 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1762,6 +1762,10 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n",
sizeof(struct ip_vs_sync_conn_v0));
+ /* increase the module use count */
+ if (!ip_vs_use_count_inc())
+ return -ENOPROTOOPT;
+
/* Do not hold one mutex and then to block on another */
for (;;) {
rtnl_lock();
@@ -1892,9 +1896,6 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
mutex_unlock(&ipvs->sync_mutex);
rtnl_unlock();
- /* increase the module use count */
- ip_vs_use_count_inc();
-
return 0;
out:
@@ -1924,11 +1925,17 @@ out:
}
kfree(ti);
}
+
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
return result;
out_early:
mutex_unlock(&ipvs->sync_mutex);
rtnl_unlock();
+
+ /* decrease the module use count */
+ ip_vs_use_count_dec();
return result;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0c63120b2db2..5cd610b547e0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1792,8 +1792,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
if (nf_ct_is_confirmed(ct))
extra_jiffies += nfct_time_stamp;
- if (ct->timeout != extra_jiffies)
- ct->timeout = extra_jiffies;
+ if (READ_ONCE(ct->timeout) != extra_jiffies)
+ WRITE_ONCE(ct->timeout, extra_jiffies);
acct:
if (do_acct)
nf_ct_acct_update(ct, ctinfo, skb->len);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 132f5228b431..128245efe84a 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -202,6 +202,8 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
{
int err;
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+
err = rhashtable_insert_fast(&flow_table->rhashtable,
&flow->tuplehash[0].node,
nf_flow_offload_rhash_params);
@@ -218,7 +220,6 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
return err;
}
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
return 0;
}
EXPORT_SYMBOL_GPL(flow_offload_add);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index d481f9baca2f..712a428509ad 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1922,6 +1922,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ flags |= chain->flags & NFT_BASE_CHAIN;
return nf_tables_updchain(&ctx, genmask, policy, flags);
}
@@ -5143,9 +5144,6 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
struct nft_trans *trans;
int err;
- if (!obj->ops->update)
- return -EOPNOTSUPP;
-
trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
sizeof(struct nft_trans_obj));
if (!trans)
@@ -6499,7 +6497,8 @@ static void nft_obj_commit_update(struct nft_trans *trans)
obj = nft_trans_obj(trans);
newobj = nft_trans_obj_newobj(trans);
- obj->ops->update(obj, newobj);
+ if (obj->ops->update)
+ obj->ops->update(obj, newobj);
kfree(newobj);
}
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index e546f759b7a7..e25dab8128db 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -334,7 +334,8 @@ int nft_flow_rule_offload_commit(struct net *net)
switch (trans->msg_type) {
case NFT_MSG_NEWCHAIN:
- if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ||
+ nft_trans_chain_update(trans))
continue;
policy = nft_trans_chain_policy(trans);
@@ -347,7 +348,7 @@ int nft_flow_rule_offload_commit(struct net *net)
policy = nft_trans_chain_policy(trans);
err = nft_flow_offload_chain(trans->ctx.chain, &policy,
- FLOW_BLOCK_BIND);
+ FLOW_BLOCK_UNBIND);
break;
case NFT_MSG_NEWRULE:
if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 974300178fa9..02afa752dd2e 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -134,12 +134,13 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
const struct nft_expr *expr)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
+ struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) ||
- priv->sreg != priv->dreg)
+ priv->sreg != priv->dreg || priv->len != reg->len)
return -EOPNOTSUPP;
- memcpy(&ctx->regs[priv->dreg].mask, &priv->mask, sizeof(priv->mask));
+ memcpy(&reg->mask, &priv->mask, sizeof(priv->mask));
return 0;
}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index bd173b1824c6..0744b2bb46da 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -116,7 +116,7 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
u8 *mask = (u8 *)&flow->match.mask;
u8 *key = (u8 *)&flow->match.key;
- if (priv->op != NFT_CMP_EQ)
+ if (priv->op != NFT_CMP_EQ || reg->len != priv->len)
return -EOPNOTSUPP;
memcpy(key + reg->offset, &priv->data, priv->len);
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 22a80eb60222..5cb2d8908d2a 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -161,13 +161,21 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
switch (priv->offset) {
case offsetof(struct ethhdr, h_source):
+ if (priv->len != ETH_ALEN)
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
src, ETH_ALEN, reg);
break;
case offsetof(struct ethhdr, h_dest):
+ if (priv->len != ETH_ALEN)
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
dst, ETH_ALEN, reg);
break;
+ default:
+ return -EOPNOTSUPP;
}
return 0;
@@ -181,14 +189,23 @@ static int nft_payload_offload_ip(struct nft_offload_ctx *ctx,
switch (priv->offset) {
case offsetof(struct iphdr, saddr):
+ if (priv->len != sizeof(struct in_addr))
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, src,
sizeof(struct in_addr), reg);
break;
case offsetof(struct iphdr, daddr):
+ if (priv->len != sizeof(struct in_addr))
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4, dst,
sizeof(struct in_addr), reg);
break;
case offsetof(struct iphdr, protocol):
+ if (priv->len != sizeof(__u8))
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
sizeof(__u8), reg);
nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
@@ -208,14 +225,23 @@ static int nft_payload_offload_ip6(struct nft_offload_ctx *ctx,
switch (priv->offset) {
case offsetof(struct ipv6hdr, saddr):
+ if (priv->len != sizeof(struct in6_addr))
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, src,
sizeof(struct in6_addr), reg);
break;
case offsetof(struct ipv6hdr, daddr):
+ if (priv->len != sizeof(struct in6_addr))
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6, dst,
sizeof(struct in6_addr), reg);
break;
case offsetof(struct ipv6hdr, nexthdr):
+ if (priv->len != sizeof(__u8))
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic, ip_proto,
sizeof(__u8), reg);
nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
@@ -255,10 +281,16 @@ static int nft_payload_offload_tcp(struct nft_offload_ctx *ctx,
switch (priv->offset) {
case offsetof(struct tcphdr, source):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
sizeof(__be16), reg);
break;
case offsetof(struct tcphdr, dest):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
sizeof(__be16), reg);
break;
@@ -277,10 +309,16 @@ static int nft_payload_offload_udp(struct nft_offload_ctx *ctx,
switch (priv->offset) {
case offsetof(struct udphdr, source):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, src,
sizeof(__be16), reg);
break;
case offsetof(struct udphdr, dest):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_PORTS, tp, dst,
sizeof(__be16), reg);
break;
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index c4f54ad2b98a..58d5373c513c 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -64,28 +64,6 @@ static DEFINE_SPINLOCK(nr_list_lock);
static const struct proto_ops nr_proto_ops;
/*
- * NETROM network devices are virtual network devices encapsulating NETROM
- * frames into AX.25 which will be sent through an AX.25 device, so form a
- * special "super class" of normal net devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key nr_netdev_xmit_lock_key;
-static struct lock_class_key nr_netdev_addr_lock_key;
-
-static void nr_set_lockdep_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key);
-}
-
-static void nr_set_lockdep_key(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock, &nr_netdev_addr_lock_key);
- netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL);
-}
-
-/*
* Socket removal during an interrupt is now safe.
*/
static void nr_remove_socket(struct sock *sk)
@@ -1414,7 +1392,6 @@ static int __init nr_proto_init(void)
free_netdev(dev);
goto fail;
}
- nr_set_lockdep_key(dev);
dev_nr[i] = dev;
}
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ccdd790e163a..28604414dec1 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -554,11 +554,11 @@ static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
if (sk->sk_state == LLCP_LISTEN)
return llcp_accept_poll(sk);
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
if (sk->sk_state == LLCP_CLOSED)
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 17e6ca62f1be..afde0d763039 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1099,7 +1099,6 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info)
local = nfc_llcp_find_local(dev);
if (!local) {
- nfc_put_device(dev);
rc = -ENODEV;
goto exit;
}
@@ -1159,7 +1158,6 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
local = nfc_llcp_find_local(dev);
if (!local) {
- nfc_put_device(dev);
rc = -ENODEV;
goto exit;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 3572e11b6f21..1c77f520f474 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -165,7 +165,8 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
{
int err;
- err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype);
+ err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype,
+ skb->mac_len);
if (err)
return err;
@@ -178,7 +179,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
{
int err;
- err = skb_mpls_pop(skb, ethertype);
+ err = skb_mpls_pop(skb, ethertype, skb->mac_len);
if (err)
return err;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f30e406fbec5..d8c364d637b1 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1881,7 +1881,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
/* Called with ovs_mutex or RCU read lock. */
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
struct net *net, u32 portid, u32 seq,
- u32 flags, u8 cmd)
+ u32 flags, u8 cmd, gfp_t gfp)
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
@@ -1902,7 +1902,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
goto nla_put_failure;
if (!net_eq(net, dev_net(vport->dev))) {
- int id = peernet2id_alloc(net, dev_net(vport->dev));
+ int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
goto nla_put_failure;
@@ -1943,11 +1943,12 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
struct sk_buff *skb;
int retval;
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
+ retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
+ GFP_KERNEL);
BUG_ON(retval < 0);
return skb;
@@ -2089,7 +2090,7 @@ restart:
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_NEW);
+ OVS_VPORT_CMD_NEW, GFP_KERNEL);
new_headroom = netdev_get_fwd_headroom(vport->dev);
@@ -2150,7 +2151,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_SET);
+ OVS_VPORT_CMD_SET, GFP_KERNEL);
BUG_ON(err < 0);
ovs_unlock();
@@ -2190,7 +2191,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_DEL);
+ OVS_VPORT_CMD_DEL, GFP_KERNEL);
BUG_ON(err < 0);
/* the vport deletion may trigger dp headroom update */
@@ -2237,7 +2238,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_GET);
+ OVS_VPORT_CMD_GET, GFP_ATOMIC);
BUG_ON(err < 0);
rcu_read_unlock();
@@ -2273,7 +2274,8 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
- OVS_VPORT_CMD_GET) < 0)
+ OVS_VPORT_CMD_GET,
+ GFP_ATOMIC) < 0)
goto out;
j++;
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 21c90d3a7ebf..58a7b8312c28 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -137,7 +137,7 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
IFF_NO_QUEUE;
netdev->needs_free_netdev = true;
- netdev->priv_destructor = internal_dev_destructor;
+ netdev->priv_destructor = NULL;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
@@ -159,7 +159,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
struct internal_dev *internal_dev;
struct net_device *dev;
int err;
- bool free_vport = true;
vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms);
if (IS_ERR(vport)) {
@@ -190,10 +189,9 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
rtnl_lock();
err = register_netdevice(vport->dev);
- if (err) {
- free_vport = false;
+ if (err)
goto error_unlock;
- }
+ vport->dev->priv_destructor = internal_dev_destructor;
dev_set_promiscuity(vport->dev, 1);
rtnl_unlock();
@@ -207,8 +205,7 @@ error_unlock:
error_free_netdev:
free_netdev(dev);
error_free_vport:
- if (free_vport)
- ovs_vport_free(vport);
+ ovs_vport_free(vport);
error:
return ERR_PTR(err);
}
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 96ea9f254ae9..76d499f6af9a 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -338,9 +338,9 @@ static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
if (sk->sk_state == TCP_CLOSE)
return EPOLLERR;
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
- if (!skb_queue_empty(&pn->ctrlreq_queue))
+ if (!skb_queue_empty_lockless(&pn->ctrlreq_queue))
mask |= EPOLLPRI;
if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
return EPOLLHUP;
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 233f1368162b..18c6fac6ead9 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -450,6 +450,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
struct ib_qp_init_attr attr;
struct ib_cq_init_attr cq_attr = {};
struct rds_ib_device *rds_ibdev;
+ unsigned long max_wrs;
int ret, fr_queue_space;
/*
@@ -469,10 +470,15 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
/* add the conn now so that connection establishment has the dev */
rds_ib_add_conn(rds_ibdev, conn);
- if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
- rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
- if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1)
- rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1);
+ max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_send_wr + 1 ?
+ rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_send_wr;
+ if (ic->i_send_ring.w_nr != max_wrs)
+ rds_ib_ring_resize(&ic->i_send_ring, max_wrs);
+
+ max_wrs = rds_ibdev->max_wrs < rds_ib_sysctl_max_recv_wr + 1 ?
+ rds_ibdev->max_wrs - 1 : rds_ib_sysctl_max_recv_wr;
+ if (ic->i_recv_ring.w_nr != max_wrs)
+ rds_ib_ring_resize(&ic->i_recv_ring, max_wrs);
/* Protection domain and memory range */
ic->i_pd = rds_ibdev->pd;
@@ -1099,8 +1105,9 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp)
ic->i_flowctl = 0;
atomic_set(&ic->i_credits, 0);
- rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
- rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
+ /* Re-init rings, but retain sizes. */
+ rds_ib_ring_init(&ic->i_send_ring, ic->i_send_ring.w_nr);
+ rds_ib_ring_init(&ic->i_recv_ring, ic->i_recv_ring.w_nr);
if (ic->i_ibinc) {
rds_inc_put(&ic->i_ibinc->ii_inc);
@@ -1147,8 +1154,8 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
* rds_ib_conn_shutdown() waits for these to be emptied so they
* must be initialized before it can be called.
*/
- rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
- rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
+ rds_ib_ring_init(&ic->i_send_ring, 0);
+ rds_ib_ring_init(&ic->i_recv_ring, 0);
ic->conn = conn;
conn->c_transport_data = ic;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index f0e9ccf472a9..6a0df7c8a939 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -65,28 +65,6 @@ static const struct proto_ops rose_proto_ops;
ax25_address rose_callsign;
/*
- * ROSE network devices are virtual network devices encapsulating ROSE
- * frames into AX.25 which will be sent through an AX.25 device, so form a
- * special "super class" of normal net devices; split their locks off into a
- * separate class since they always nest.
- */
-static struct lock_class_key rose_netdev_xmit_lock_key;
-static struct lock_class_key rose_netdev_addr_lock_key;
-
-static void rose_set_lockdep_one(struct net_device *dev,
- struct netdev_queue *txq,
- void *_unused)
-{
- lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key);
-}
-
-static void rose_set_lockdep_key(struct net_device *dev)
-{
- lockdep_set_class(&dev->addr_list_lock, &rose_netdev_addr_lock_key);
- netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL);
-}
-
-/*
* Convert a ROSE address into text.
*/
char *rose2asc(char *buf, const rose_address *addr)
@@ -1533,7 +1511,6 @@ static int __init rose_proto_init(void)
free_netdev(dev);
goto fail;
}
- rose_set_lockdep_key(dev);
dev_rose[i] = dev;
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 1091bf35a199..7c7d10f2e0c1 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -556,6 +556,7 @@ struct rxrpc_call {
struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_sock __rcu *socket; /* socket responsible */
struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
+ const struct rxrpc_security *security; /* applied security module */
struct mutex user_mutex; /* User access mutex */
unsigned long ack_at; /* When deferred ACK needs to happen */
unsigned long ack_lost_at; /* When ACK is figured as lost */
@@ -600,6 +601,7 @@ struct rxrpc_call {
int debug_id; /* debug ID for printks */
unsigned short rx_pkt_offset; /* Current recvmsg packet offset */
unsigned short rx_pkt_len; /* Current recvmsg packet len */
+ bool rx_pkt_last; /* Current recvmsg packet is last */
/* Rx/Tx circular buffer, depending on phase.
*
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 00c095d74145..135bf5cd8dd5 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -84,7 +84,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
smp_store_release(&b->conn_backlog_head,
(head + 1) & (size - 1));
- trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
atomic_read(&conn->usage), here);
}
@@ -97,7 +97,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
call->state = RXRPC_CALL_SERVER_PREALLOC;
- trace_rxrpc_call(call, rxrpc_call_new_service,
+ trace_rxrpc_call(call->debug_id, rxrpc_call_new_service,
atomic_read(&call->usage),
here, (const void *)user_call_ID);
@@ -307,6 +307,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
rxrpc_see_call(call);
call->conn = conn;
+ call->security = conn->security;
call->peer = rxrpc_get_peer(conn->params.peer);
call->cong_cwnd = call->peer->cong_cwnd;
return call;
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 32d8dc677142..a31c18c09894 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -240,7 +240,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
if (p->intr)
__set_bit(RXRPC_CALL_IS_INTR, &call->flags);
call->tx_total_len = p->tx_total_len;
- trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
+ trace_rxrpc_call(call->debug_id, rxrpc_call_new_client,
+ atomic_read(&call->usage),
here, (const void *)p->user_call_ID);
/* We need to protect a partially set up call against the user as we
@@ -290,8 +291,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
if (ret < 0)
goto error;
- trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
- here, NULL);
+ trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
+ atomic_read(&call->usage), here, NULL);
rxrpc_start_call_timer(call);
@@ -313,8 +314,8 @@ error_dup_user_ID:
error:
__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
RX_CALL_DEAD, ret);
- trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
- here, ERR_PTR(ret));
+ trace_rxrpc_call(call->debug_id, rxrpc_call_error,
+ atomic_read(&call->usage), here, ERR_PTR(ret));
rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put);
@@ -376,7 +377,8 @@ bool rxrpc_queue_call(struct rxrpc_call *call)
if (n == 0)
return false;
if (rxrpc_queue_work(&call->processor))
- trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL);
+ trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1,
+ here, NULL);
else
rxrpc_put_call(call, rxrpc_call_put_noqueue);
return true;
@@ -391,7 +393,8 @@ bool __rxrpc_queue_call(struct rxrpc_call *call)
int n = atomic_read(&call->usage);
ASSERTCMP(n, >=, 1);
if (rxrpc_queue_work(&call->processor))
- trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL);
+ trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n,
+ here, NULL);
else
rxrpc_put_call(call, rxrpc_call_put_noqueue);
return true;
@@ -406,7 +409,8 @@ void rxrpc_see_call(struct rxrpc_call *call)
if (call) {
int n = atomic_read(&call->usage);
- trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL);
+ trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n,
+ here, NULL);
}
}
@@ -418,7 +422,7 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
const void *here = __builtin_return_address(0);
int n = atomic_inc_return(&call->usage);
- trace_rxrpc_call(call, op, n, here, NULL);
+ trace_rxrpc_call(call->debug_id, op, n, here, NULL);
}
/*
@@ -445,7 +449,8 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
- trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
+ trace_rxrpc_call(call->debug_id, rxrpc_call_release,
+ atomic_read(&call->usage),
here, (const void *)call->flags);
ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
@@ -488,10 +493,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
- if (conn) {
+ if (conn)
rxrpc_disconnect_call(call);
- conn->security->free_call_crypto(call);
- }
+ if (call->security)
+ call->security->free_call_crypto(call);
rxrpc_cleanup_ring(call);
_leave("");
@@ -534,12 +539,13 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
{
struct rxrpc_net *rxnet = call->rxnet;
const void *here = __builtin_return_address(0);
+ unsigned int debug_id = call->debug_id;
int n;
ASSERT(call != NULL);
n = atomic_dec_return(&call->usage);
- trace_rxrpc_call(call, op, n, here, NULL);
+ trace_rxrpc_call(debug_id, op, n, here, NULL);
ASSERTCMP(n, >=, 0);
if (n == 0) {
_debug("call %d dead", call->debug_id);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 3f1da1b49f69..376370cd9285 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -212,7 +212,8 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
rxrpc_get_local(conn->params.local);
key_get(conn->params.key);
- trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage),
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client,
+ atomic_read(&conn->usage),
__builtin_return_address(0));
trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
_leave(" = %p", conn);
@@ -352,6 +353,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx,
if (cp->exclusive) {
call->conn = candidate;
+ call->security = candidate->security;
call->security_ix = candidate->security_ix;
call->service_id = candidate->service_id;
_leave(" = 0 [exclusive %d]", candidate->debug_id);
@@ -403,6 +405,7 @@ static int rxrpc_get_client_conn(struct rxrpc_sock *rx,
candidate_published:
set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
call->conn = candidate;
+ call->security = candidate->security;
call->security_ix = candidate->security_ix;
call->service_id = candidate->service_id;
spin_unlock(&local->client_conns_lock);
@@ -425,6 +428,7 @@ found_extant_conn:
spin_lock(&conn->channel_lock);
call->conn = conn;
+ call->security = conn->security;
call->security_ix = conn->security_ix;
call->service_id = conn->service_id;
list_add_tail(&call->chan_wait_link, &conn->waiting_calls);
@@ -985,11 +989,12 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
void rxrpc_put_client_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id = conn->debug_id;
int n;
do {
n = atomic_dec_return(&conn->usage);
- trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here);
+ trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here);
if (n > 0)
return;
ASSERTCMP(n, >=, 0);
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index ed05b6922132..38d718e90dc6 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -269,7 +269,7 @@ bool rxrpc_queue_conn(struct rxrpc_connection *conn)
if (n == 0)
return false;
if (rxrpc_queue_work(&conn->processor))
- trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here);
else
rxrpc_put_connection(conn);
return true;
@@ -284,7 +284,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn)
if (conn) {
int n = atomic_read(&conn->usage);
- trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here);
}
}
@@ -296,7 +296,7 @@ void rxrpc_get_connection(struct rxrpc_connection *conn)
const void *here = __builtin_return_address(0);
int n = atomic_inc_return(&conn->usage);
- trace_rxrpc_conn(conn, rxrpc_conn_got, n, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here);
}
/*
@@ -310,7 +310,7 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
if (conn) {
int n = atomic_fetch_add_unless(&conn->usage, 1, 0);
if (n > 0)
- trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here);
else
conn = NULL;
}
@@ -333,10 +333,11 @@ static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
void rxrpc_put_service_conn(struct rxrpc_connection *conn)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id = conn->debug_id;
int n;
n = atomic_dec_return(&conn->usage);
- trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
+ trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here);
ASSERTCMP(n, >=, 0);
if (n == 1)
rxrpc_set_service_reap_timer(conn->params.local->rxnet,
@@ -420,7 +421,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work)
*/
if (atomic_cmpxchg(&conn->usage, 1, 0) != 1)
continue;
- trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, NULL);
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL);
if (rxrpc_conn_is_client(conn))
BUG();
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index b30e13f6d95f..123d6ceab15c 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -134,7 +134,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
- trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+ trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service,
atomic_read(&conn->usage),
__builtin_return_address(0));
}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index c97ebdc043e4..48f67a9b1037 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -147,10 +147,16 @@ void rxrpc_error_report(struct sock *sk)
{
struct sock_exterr_skb *serr;
struct sockaddr_rxrpc srx;
- struct rxrpc_local *local = sk->sk_user_data;
+ struct rxrpc_local *local;
struct rxrpc_peer *peer;
struct sk_buff *skb;
+ rcu_read_lock();
+ local = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!local)) {
+ rcu_read_unlock();
+ return;
+ }
_enter("%p{%d}", sk, local->debug_id);
/* Clear the outstanding error value on the socket so that it doesn't
@@ -160,6 +166,7 @@ void rxrpc_error_report(struct sock *sk)
skb = sock_dequeue_err_skb(sk);
if (!skb) {
+ rcu_read_unlock();
_leave("UDP socket errqueue empty");
return;
}
@@ -167,11 +174,11 @@ void rxrpc_error_report(struct sock *sk)
serr = SKB_EXT_ERR(skb);
if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
_leave("UDP empty message");
+ rcu_read_unlock();
rxrpc_free_skb(skb, rxrpc_skb_freed);
return;
}
- rcu_read_lock();
peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx);
if (peer && !rxrpc_get_peer_maybe(peer))
peer = NULL;
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 9c3ac96f71cb..64830d8c1fdb 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -216,7 +216,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
peer = kzalloc(sizeof(struct rxrpc_peer), gfp);
if (peer) {
atomic_set(&peer->usage, 1);
- peer->local = local;
+ peer->local = rxrpc_get_local(local);
INIT_HLIST_HEAD(&peer->error_targets);
peer->service_conns = RB_ROOT;
seqlock_init(&peer->service_conn_lock);
@@ -307,7 +307,6 @@ void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local,
unsigned long hash_key;
hash_key = rxrpc_peer_hash_key(local, &peer->srx);
- peer->local = local;
rxrpc_init_peer(rx, peer, hash_key);
spin_lock(&rxnet->peer_hash_lock);
@@ -382,7 +381,7 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
int n;
n = atomic_inc_return(&peer->usage);
- trace_rxrpc_peer(peer, rxrpc_peer_got, n, here);
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here);
return peer;
}
@@ -396,7 +395,7 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer)
if (peer) {
int n = atomic_fetch_add_unless(&peer->usage, 1, 0);
if (n > 0)
- trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here);
+ trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here);
else
peer = NULL;
}
@@ -417,6 +416,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
list_del_init(&peer->keepalive_link);
spin_unlock_bh(&rxnet->peer_hash_lock);
+ rxrpc_put_local(peer->local);
kfree_rcu(peer, rcu);
}
@@ -426,11 +426,13 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
void rxrpc_put_peer(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id;
int n;
if (peer) {
+ debug_id = peer->debug_id;
n = atomic_dec_return(&peer->usage);
- trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+ trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
if (n == 0)
__rxrpc_put_peer(peer);
}
@@ -443,13 +445,15 @@ void rxrpc_put_peer(struct rxrpc_peer *peer)
void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
{
const void *here = __builtin_return_address(0);
+ unsigned int debug_id = peer->debug_id;
int n;
n = atomic_dec_return(&peer->usage);
- trace_rxrpc_peer(peer, rxrpc_peer_put, n, here);
+ trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here);
if (n == 0) {
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
+ rxrpc_put_local(peer->local);
kfree_rcu(peer, rcu);
}
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 3b0becb12041..8578c39ec839 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -251,8 +251,8 @@ static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
seq += subpacket;
}
- return call->conn->security->verify_packet(call, skb, offset, len,
- seq, cksum);
+ return call->security->verify_packet(call, skb, offset, len,
+ seq, cksum);
}
/*
@@ -267,11 +267,13 @@ static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
*/
static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
u8 *_annotation,
- unsigned int *_offset, unsigned int *_len)
+ unsigned int *_offset, unsigned int *_len,
+ bool *_last)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
unsigned int offset = sizeof(struct rxrpc_wire_header);
unsigned int len;
+ bool last = false;
int ret;
u8 annotation = *_annotation;
u8 subpacket = annotation & RXRPC_RX_ANNO_SUBPACKET;
@@ -281,6 +283,8 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
len = skb->len - offset;
if (subpacket < sp->nr_subpackets - 1)
len = RXRPC_JUMBO_DATALEN;
+ else if (sp->rx_flags & RXRPC_SKB_INCL_LAST)
+ last = true;
if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) {
ret = rxrpc_verify_packet(call, skb, annotation, offset, len);
@@ -291,7 +295,8 @@ static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
*_offset = offset;
*_len = len;
- call->conn->security->locate_data(call, skb, _offset, _len);
+ *_last = last;
+ call->security->locate_data(call, skb, _offset, _len);
return 0;
}
@@ -309,7 +314,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
rxrpc_serial_t serial;
rxrpc_seq_t hard_ack, top, seq;
size_t remain;
- bool last;
+ bool rx_pkt_last;
unsigned int rx_pkt_offset, rx_pkt_len;
int ix, copy, ret = -EAGAIN, ret2;
@@ -319,6 +324,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
rx_pkt_offset = call->rx_pkt_offset;
rx_pkt_len = call->rx_pkt_len;
+ rx_pkt_last = call->rx_pkt_last;
if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) {
seq = call->rx_hard_ack;
@@ -329,6 +335,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
/* Barriers against rxrpc_input_data(). */
hard_ack = call->rx_hard_ack;
seq = hard_ack + 1;
+
while (top = smp_load_acquire(&call->rx_top),
before_eq(seq, top)
) {
@@ -356,7 +363,8 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
if (rx_pkt_offset == 0) {
ret2 = rxrpc_locate_data(call, skb,
&call->rxtx_annotations[ix],
- &rx_pkt_offset, &rx_pkt_len);
+ &rx_pkt_offset, &rx_pkt_len,
+ &rx_pkt_last);
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq,
rx_pkt_offset, rx_pkt_len, ret2);
if (ret2 < 0) {
@@ -396,13 +404,12 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
}
/* The whole packet has been transferred. */
- last = sp->hdr.flags & RXRPC_LAST_PACKET;
if (!(flags & MSG_PEEK))
rxrpc_rotate_rx_window(call);
rx_pkt_offset = 0;
rx_pkt_len = 0;
- if (last) {
+ if (rx_pkt_last) {
ASSERTCMP(seq, ==, READ_ONCE(call->rx_top));
ret = 1;
goto out;
@@ -415,6 +422,7 @@ out:
if (!(flags & MSG_PEEK)) {
call->rx_pkt_offset = rx_pkt_offset;
call->rx_pkt_len = rx_pkt_len;
+ call->rx_pkt_last = rx_pkt_last;
}
done:
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq,
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 6a1547b270fe..813fd6888142 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -419,7 +419,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
call->tx_winsize)
sp->hdr.flags |= RXRPC_MORE_PACKETS;
- ret = conn->security->secure_packet(
+ ret = call->security->secure_packet(
call, skb, skb->mark, skb->head);
if (ret < 0)
goto out;
@@ -661,6 +661,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
case RXRPC_CALL_SERVER_PREALLOC:
case RXRPC_CALL_SERVER_SECURING:
case RXRPC_CALL_SERVER_ACCEPTING:
+ rxrpc_put_call(call, rxrpc_call_put);
ret = -EBUSY;
goto error_release_sock;
default:
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2558f00f6b3e..69d4676a402f 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -832,8 +832,7 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
}
static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
- [TCA_ACT_KIND] = { .type = NLA_NUL_STRING,
- .len = IFNAMSIZ - 1 },
+ [TCA_ACT_KIND] = { .type = NLA_STRING },
[TCA_ACT_INDEX] = { .type = NLA_U32 },
[TCA_ACT_COOKIE] = { .type = NLA_BINARY,
.len = TC_COOKIE_MAX_SIZE },
@@ -865,8 +864,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
NL_SET_ERR_MSG(extack, "TC action kind must be specified");
goto err_out;
}
- nla_strlcpy(act_name, kind, IFNAMSIZ);
-
+ if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ) {
+ NL_SET_ERR_MSG(extack, "TC action name too long");
+ goto err_out;
+ }
if (tb[TCA_ACT_COOKIE]) {
cookie = nla_memdup_cookie(tb);
if (!cookie) {
@@ -1352,11 +1353,16 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
size_t attr_size = 0;
- int ret = 0;
+ int loop, ret;
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
- ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, actions,
- &attr_size, true, extack);
+ for (loop = 0; loop < 10; loop++) {
+ ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0,
+ actions, &attr_size, true, extack);
+ if (ret != -EAGAIN)
+ break;
+ }
+
if (ret < 0)
return ret;
ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
@@ -1406,11 +1412,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
*/
if (n->nlmsg_flags & NLM_F_REPLACE)
ovr = 1;
-replay:
ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr,
extack);
- if (ret == -EAGAIN)
- goto replay;
break;
case RTM_DELACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 9ce073a05414..08923b21e566 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -484,7 +484,11 @@ static int __init mirred_init_module(void)
return err;
pr_info("Mirror/redirect action on\n");
- return tcf_register_action(&act_mirred_ops, &mirred_net_ops);
+ err = tcf_register_action(&act_mirred_ops, &mirred_net_ops);
+ if (err)
+ unregister_netdevice_notifier(&mirred_device_notifier);
+
+ return err;
}
static void __exit mirred_cleanup_module(void)
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index e168df0e008a..4cf6c553bb0b 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -55,7 +55,7 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_mpls *m = to_mpls(a);
struct tcf_mpls_params *p;
__be32 new_lse;
- int ret;
+ int ret, mac_len;
tcf_lastuse_update(&m->tcf_tm);
bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
@@ -63,8 +63,12 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
/* Ensure 'data' points at mac_header prior calling mpls manipulating
* functions.
*/
- if (skb_at_tc_ingress(skb))
+ if (skb_at_tc_ingress(skb)) {
skb_push_rcsum(skb, skb->mac_len);
+ mac_len = skb->mac_len;
+ } else {
+ mac_len = skb_network_header(skb) - skb_mac_header(skb);
+ }
ret = READ_ONCE(m->tcf_action);
@@ -72,12 +76,12 @@ static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a,
switch (p->tcfm_action) {
case TCA_MPLS_ACT_POP:
- if (skb_mpls_pop(skb, p->tcfm_proto))
+ if (skb_mpls_pop(skb, p->tcfm_proto, mac_len))
goto drop;
break;
case TCA_MPLS_ACT_PUSH:
new_lse = tcf_mpls_get_lse(NULL, p, !eth_p_mpls(skb->protocol));
- if (skb_mpls_push(skb, new_lse, p->tcfm_proto))
+ if (skb_mpls_push(skb, new_lse, p->tcfm_proto, mac_len))
goto drop;
break;
case TCA_MPLS_ACT_MODIFY:
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index cdfaa79382a2..b5bc631b96b7 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -43,7 +43,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla,
int err = -EINVAL;
int rem;
- if (!nla || !n)
+ if (!nla)
return NULL;
keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL);
@@ -170,6 +170,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
parm = nla_data(pattr);
+ if (!parm->nkeys) {
+ NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
+ return -EINVAL;
+ }
ksize = parm->nkeys * sizeof(struct tc_pedit_key);
if (nla_len(pattr) < sizeof(*parm) + ksize) {
NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid");
@@ -183,12 +187,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- if (!parm->nkeys) {
- tcf_idr_cleanup(tn, index);
- NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
- ret = -EINVAL;
- goto out_free;
- }
ret = tcf_idr_create(tn, index, est, a,
&act_pedit_ops, bind, false);
if (ret) {
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 2f83a79f76aa..d55669e14741 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -135,6 +135,10 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
+ if (opts_len > IP_TUNNEL_OPTS_MAX) {
+ NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
+ return -EINVAL;
+ }
if (dst) {
dst_len -= opt_len;
dst += opt_len;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 64584a1df425..20d60b8fcb70 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/rhashtable.h>
+#include <linux/jhash.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
@@ -47,6 +48,62 @@ static LIST_HEAD(tcf_proto_base);
/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);
+static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
+{
+ return jhash_3words(tp->chain->index, tp->prio,
+ (__force __u32)tp->protocol, 0);
+}
+
+static void tcf_proto_signal_destroying(struct tcf_chain *chain,
+ struct tcf_proto *tp)
+{
+ struct tcf_block *block = chain->block;
+
+ mutex_lock(&block->proto_destroy_lock);
+ hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
+ destroy_obj_hashfn(tp));
+ mutex_unlock(&block->proto_destroy_lock);
+}
+
+static bool tcf_proto_cmp(const struct tcf_proto *tp1,
+ const struct tcf_proto *tp2)
+{
+ return tp1->chain->index == tp2->chain->index &&
+ tp1->prio == tp2->prio &&
+ tp1->protocol == tp2->protocol;
+}
+
+static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
+ struct tcf_proto *tp)
+{
+ u32 hash = destroy_obj_hashfn(tp);
+ struct tcf_proto *iter;
+ bool found = false;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
+ destroy_ht_node, hash) {
+ if (tcf_proto_cmp(tp, iter)) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return found;
+}
+
+static void
+tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
+{
+ struct tcf_block *block = chain->block;
+
+ mutex_lock(&block->proto_destroy_lock);
+ if (hash_hashed(&tp->destroy_ht_node))
+ hash_del_rcu(&tp->destroy_ht_node);
+ mutex_unlock(&block->proto_destroy_lock);
+}
+
/* Find classifier type by string name */
static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
@@ -162,11 +219,22 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
return TC_H_MAJ(first);
}
+static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
+{
+ if (kind)
+ return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ;
+ memset(name, 0, IFNAMSIZ);
+ return false;
+}
+
static bool tcf_proto_is_unlocked(const char *kind)
{
const struct tcf_proto_ops *ops;
bool ret;
+ if (strlen(kind) == 0)
+ return false;
+
ops = tcf_proto_lookup_ops(kind, false, NULL);
/* On error return false to take rtnl lock. Proto lookup/create
* functions will perform lookup again and properly handle errors.
@@ -223,9 +291,11 @@ static void tcf_proto_get(struct tcf_proto *tp)
static void tcf_chain_put(struct tcf_chain *chain);
static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ bool sig_destroy, struct netlink_ext_ack *extack)
{
tp->ops->destroy(tp, rtnl_held, extack);
+ if (sig_destroy)
+ tcf_proto_signal_destroyed(tp->chain, tp);
tcf_chain_put(tp->chain);
module_put(tp->ops->owner);
kfree_rcu(tp, rcu);
@@ -235,7 +305,7 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
if (refcount_dec_and_test(&tp->refcnt))
- tcf_proto_destroy(tp, rtnl_held, extack);
+ tcf_proto_destroy(tp, rtnl_held, true, extack);
}
static int walker_check_empty(struct tcf_proto *tp, void *fh,
@@ -359,6 +429,7 @@ static bool tcf_chain_detach(struct tcf_chain *chain)
static void tcf_block_destroy(struct tcf_block *block)
{
mutex_destroy(&block->lock);
+ mutex_destroy(&block->proto_destroy_lock);
kfree_rcu(block, rcu);
}
@@ -534,6 +605,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_dereference(chain->filter_chain, chain);
+ while (tp) {
+ tp_next = rcu_dereference_protected(tp->next, 1);
+ tcf_proto_signal_destroying(chain, tp);
+ tp = tp_next;
+ }
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
RCU_INIT_POINTER(chain->filter_chain, NULL);
tcf_chain0_head_change(chain, NULL);
chain->flushing = true;
@@ -833,6 +910,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return ERR_PTR(-ENOMEM);
}
mutex_init(&block->lock);
+ mutex_init(&block->proto_destroy_lock);
init_rwsem(&block->cb_lock);
flow_block_init(&block->flow_block);
INIT_LIST_HEAD(&block->chain_list);
@@ -1610,6 +1688,12 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
mutex_lock(&chain->filter_chain_lock);
+ if (tcf_proto_exists_destroying(chain, tp_new)) {
+ mutex_unlock(&chain->filter_chain_lock);
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
+ return ERR_PTR(-EAGAIN);
+ }
+
tp = tcf_chain_tp_find(chain, &chain_info,
protocol, prio, false);
if (!tp)
@@ -1617,10 +1701,10 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
mutex_unlock(&chain->filter_chain_lock);
if (tp) {
- tcf_proto_destroy(tp_new, rtnl_held, NULL);
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
tp_new = tp;
} else if (err) {
- tcf_proto_destroy(tp_new, rtnl_held, NULL);
+ tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
tp_new = ERR_PTR(err);
}
@@ -1658,6 +1742,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
return;
}
+ tcf_proto_signal_destroying(chain, tp);
next = tcf_chain_dereference(chain_info.next, chain);
if (tp == chain->filter_chain)
tcf_chain0_head_change(chain, next);
@@ -1843,6 +1928,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
+ char name[IFNAMSIZ];
struct tcmsg *t;
u32 protocol;
u32 prio;
@@ -1899,13 +1985,19 @@ replay:
if (err)
return err;
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+ err = -EINVAL;
+ goto errout;
+ }
+
/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
* block is shared (no qdisc found), qdisc is not unlocked, classifier
* type is not specified, classifier is not unlocked.
*/
if (rtnl_held ||
(q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
- !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ !tcf_proto_is_unlocked(name)) {
rtnl_held = true;
rtnl_lock();
}
@@ -2063,6 +2155,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
+ char name[IFNAMSIZ];
struct tcmsg *t;
u32 protocol;
u32 prio;
@@ -2102,13 +2195,18 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
if (err)
return err;
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+ err = -EINVAL;
+ goto errout;
+ }
/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
* found), qdisc is not unlocked, classifier type is not specified,
* classifier is not unlocked.
*/
if (!prio ||
(q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
- !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ !tcf_proto_is_unlocked(name)) {
rtnl_held = true;
rtnl_lock();
}
@@ -2164,6 +2262,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
err = -EINVAL;
goto errout_locked;
} else if (t->tcm_handle == 0) {
+ tcf_proto_signal_destroying(chain, tp);
tcf_chain_tp_remove(chain, &chain_info, tp);
mutex_unlock(&chain->filter_chain_lock);
@@ -2216,6 +2315,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
+ char name[IFNAMSIZ];
struct tcmsg *t;
u32 protocol;
u32 prio;
@@ -2252,12 +2352,17 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
if (err)
return err;
+ if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
+ NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
+ err = -EINVAL;
+ goto errout;
+ }
/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
* unlocked, classifier type is not specified, classifier is not
* unlocked.
*/
if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
- !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ !tcf_proto_is_unlocked(name)) {
rtnl_held = true;
rtnl_lock();
}
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index bf10bdaf5012..8229ed4a67be 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -162,16 +162,20 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
cls_bpf.name = obj->bpf_name;
cls_bpf.exts_integrated = obj->exts_integrated;
- if (oldprog)
+ if (oldprog && prog)
err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
skip_sw, &oldprog->gen_flags,
&oldprog->in_hw_count,
&prog->gen_flags, &prog->in_hw_count,
true);
- else
+ else if (prog)
err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
skip_sw, &prog->gen_flags,
&prog->in_hw_count, true);
+ else
+ err = tc_setup_cb_destroy(block, tp, TC_SETUP_CLSBPF, &cls_bpf,
+ skip_sw, &oldprog->gen_flags,
+ &oldprog->in_hw_count, true);
if (prog && err) {
cls_bpf_offload_cmd(tp, oldprog, prog, extack);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 82bd14e7ac93..3177dcb17316 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -446,7 +446,7 @@ META_COLLECTOR(int_sk_wmem_queued)
*err = -1;
return;
}
- dst->value = sk->sk_wmem_queued;
+ dst->value = READ_ONCE(sk->sk_wmem_queued);
}
META_COLLECTOR(int_sk_fwd_alloc)
@@ -554,7 +554,7 @@ META_COLLECTOR(int_sk_rcvlowat)
*err = -1;
return;
}
- dst->value = sk->sk_rcvlowat;
+ dst->value = READ_ONCE(sk->sk_rcvlowat);
}
META_COLLECTOR(int_sk_rcvtimeo)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 81d58b280612..1047825d9f48 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1390,8 +1390,7 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
}
const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
- [TCA_KIND] = { .type = NLA_NUL_STRING,
- .len = IFNAMSIZ - 1 },
+ [TCA_KIND] = { .type = NLA_STRING },
[TCA_RATE] = { .type = NLA_BINARY,
.len = sizeof(struct tc_estimator) },
[TCA_STAB] = { .type = NLA_NESTED },
diff --git a/net/sched/sch_etf.c b/net/sched/sch_etf.c
index cebfb65d8556..b1da5589a0c6 100644
--- a/net/sched/sch_etf.c
+++ b/net/sched/sch_etf.c
@@ -177,7 +177,7 @@ static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
parent = *p;
skb = rb_to_skb(parent);
- if (ktime_after(txtime, skb->tstamp)) {
+ if (ktime_compare(txtime, skb->tstamp) >= 0) {
p = &parent->rb_right;
leftmost = false;
} else {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 17bd8f539bc7..8769b4b8807d 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -799,9 +799,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
};
EXPORT_SYMBOL(pfifo_fast_ops);
-static struct lock_class_key qdisc_tx_busylock;
-static struct lock_class_key qdisc_running_key;
-
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops,
struct netlink_ext_ack *extack)
@@ -854,17 +851,9 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
}
spin_lock_init(&sch->busylock);
- lockdep_set_class(&sch->busylock,
- dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
-
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
spin_lock_init(&sch->seqlock);
- lockdep_set_class(&sch->busylock,
- dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
-
seqcount_init(&sch->running);
- lockdep_set_class(&sch->running,
- dev->qdisc_running_key ?: &qdisc_running_key);
sch->ops = ops;
sch->flags = ops->static_flags;
@@ -875,6 +864,12 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
dev_hold(dev);
refcount_set(&sch->refcnt, 1);
+ if (sch != &noop_qdisc) {
+ lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key);
+ lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key);
+ lockdep_set_class(&sch->running, &dev->qdisc_running_key);
+ }
+
return sch;
errout1:
kfree(p);
@@ -1043,6 +1038,8 @@ static void attach_one_default_qdisc(struct net_device *dev,
if (dev->priv_flags & IFF_NO_QUEUE)
ops = &noqueue_qdisc_ops;
+ else if(dev->type == ARPHRD_CAN)
+ ops = &pfifo_fast_ops;
qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
if (!qdisc) {
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 23cd1c873a2c..be35f03b657b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -5,11 +5,11 @@
* Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com>
*/
-#include <linux/jhash.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/vmalloc.h>
+#include <linux/siphash.h>
#include <net/pkt_sched.h>
#include <net/sock.h>
@@ -126,7 +126,7 @@ struct wdrr_bucket {
struct hhf_sched_data {
struct wdrr_bucket buckets[WDRR_BUCKET_CNT];
- u32 perturbation; /* hash perturbation */
+ siphash_key_t perturbation; /* hash perturbation */
u32 quantum; /* psched_mtu(qdisc_dev(sch)); */
u32 drop_overlimit; /* number of times max qdisc packet
* limit was hit
@@ -264,7 +264,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch)
}
/* Get hashed flow-id of the skb. */
- hash = skb_get_hash_perturb(skb, q->perturbation);
+ hash = skb_get_hash_perturb(skb, &q->perturbation);
/* Check if this packet belongs to an already established HH flow. */
flow_pos = hash & HHF_BIT_MASK;
@@ -582,7 +582,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt,
sch->limit = 1000;
q->quantum = psched_mtu(qdisc_dev(sch));
- q->perturbation = prandom_u32();
+ get_random_bytes(&q->perturbation, sizeof(q->perturbation));
INIT_LIST_HEAD(&q->new_buckets);
INIT_LIST_HEAD(&q->old_buckets);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 0e44039e729c..42e557d48e4e 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -509,6 +509,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (skb->ip_summed == CHECKSUM_PARTIAL &&
skb_checksum_help(skb)) {
qdisc_drop(skb, sch, to_free);
+ skb = NULL;
goto finish_segs;
}
@@ -593,9 +594,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
finish_segs:
if (segs) {
unsigned int len, last_len;
- int nb = 0;
+ int nb;
- len = skb->len;
+ len = skb ? skb->len : 0;
+ nb = skb ? 1 : 0;
while (segs) {
skb2 = segs->next;
@@ -612,7 +614,10 @@ finish_segs:
}
segs = skb2;
}
- qdisc_tree_reduce_backlog(sch, -nb, prev_len - len);
+ /* Parent qdiscs accounted for 1 skb of size @prev_len */
+ qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
+ } else if (!skb) {
+ return NET_XMIT_DROP;
}
return NET_XMIT_SUCCESS;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index d448fe3068e5..4074c50ac3d7 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -18,7 +18,7 @@
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <linux/random.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <net/ip.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -45,7 +45,7 @@ struct sfb_bucket {
* (Section 4.4 of SFB reference : moving hash functions)
*/
struct sfb_bins {
- u32 perturbation; /* jhash perturbation */
+ siphash_key_t perturbation; /* siphash key */
struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
};
@@ -217,7 +217,8 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da
static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
{
- q->bins[slot].perturbation = prandom_u32();
+ get_random_bytes(&q->bins[slot].perturbation,
+ sizeof(q->bins[slot].perturbation));
}
static void sfb_swap_slot(struct sfb_sched_data *q)
@@ -314,9 +315,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* If using external classifiers, get result and record it. */
if (!sfb_classify(skb, fl, &ret, &salt))
goto other_drop;
- sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+ sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation);
} else {
- sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation);
+ sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation);
}
@@ -352,7 +353,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* Inelastic flow */
if (q->double_buffering) {
sfbhash = skb_get_hash_perturb(skb,
- q->bins[slot].perturbation);
+ &q->bins[slot].perturbation);
if (!sfbhash)
sfbhash = 1;
sfb_skb_cb(skb)->hashes[slot] = sfbhash;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 68404a9d2ce4..c787d4d46017 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -14,7 +14,7 @@
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/skbuff.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <net/netlink.h>
@@ -117,7 +117,7 @@ struct sfq_sched_data {
u8 headdrop;
u8 maxdepth; /* limit of packets per flow */
- u32 perturbation;
+ siphash_key_t perturbation;
u8 cur_depth; /* depth of longest slot */
u8 flags;
unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
@@ -157,7 +157,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index
static unsigned int sfq_hash(const struct sfq_sched_data *q,
const struct sk_buff *skb)
{
- return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
+ return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1);
}
static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
@@ -607,9 +607,11 @@ static void sfq_perturbation(struct timer_list *t)
struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+ siphash_key_t nkey;
+ get_random_bytes(&nkey, sizeof(nkey));
spin_lock(root_lock);
- q->perturbation = prandom_u32();
+ q->perturbation = nkey;
if (!q->filter_list && q->tail)
sfq_rehash(sch);
spin_unlock(root_lock);
@@ -688,7 +690,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
del_timer(&q->perturb_timer);
if (q->perturb_period) {
mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
- q->perturbation = prandom_u32();
+ get_random_bytes(&q->perturbation, sizeof(q->perturbation));
}
sch_tree_unlock(sch);
kfree(p);
@@ -745,7 +747,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt,
q->quantum = psched_mtu(qdisc_dev(sch));
q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
q->perturb_period = 0;
- q->perturbation = prandom_u32();
+ get_random_bytes(&q->perturbation, sizeof(q->perturbation));
if (opt) {
int err = sfq_change(sch, opt);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 68b543f85a96..c609373c8661 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -922,7 +922,7 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
}
/* Verify priority mapping uses valid tcs */
- for (i = 0; i < TC_BITMASK + 1; i++) {
+ for (i = 0; i <= TC_BITMASK; i++) {
if (qopt->prio_tc_map[i] >= qopt->num_tc) {
NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
return -EINVAL;
@@ -1152,7 +1152,7 @@ EXPORT_SYMBOL_GPL(taprio_offload_free);
* offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump().
* This is left as TODO.
*/
-void taprio_offload_config_changed(struct taprio_sched *q)
+static void taprio_offload_config_changed(struct taprio_sched *q)
{
struct sched_gate_list *oper, *admin;
@@ -1224,8 +1224,6 @@ static int taprio_enable_offload(struct net_device *dev,
goto done;
}
- taprio_offload_config_changed(q);
-
done:
taprio_offload_free(offload);
@@ -1341,10 +1339,34 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory");
goto out;
}
+
+ /* Everything went ok, return success. */
+ err = 0;
+
out:
return err;
}
+static int taprio_mqprio_cmp(const struct net_device *dev,
+ const struct tc_mqprio_qopt *mqprio)
+{
+ int i;
+
+ if (!mqprio || mqprio->num_tc != dev->num_tc)
+ return -1;
+
+ for (i = 0; i < mqprio->num_tc; i++)
+ if (dev->tc_to_txq[i].count != mqprio->count[i] ||
+ dev->tc_to_txq[i].offset != mqprio->offset[i])
+ return -1;
+
+ for (i = 0; i <= TC_BITMASK; i++)
+ if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
+ return -1;
+
+ return 0;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -1396,6 +1418,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
admin = rcu_dereference(q->admin_sched);
rcu_read_unlock();
+ /* no changes - no new mqprio settings */
+ if (!taprio_mqprio_cmp(dev, mqprio))
+ mqprio = NULL;
+
if (mqprio && (oper || admin)) {
NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
err = -ENOTSUPP;
@@ -1453,7 +1479,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
mqprio->offset[i]);
/* Always use supplied priority mappings */
- for (i = 0; i < TC_BITMASK + 1; i++)
+ for (i = 0; i <= TC_BITMASK; i++)
netdev_set_prio_tc_map(dev, i,
mqprio->prio_tc_map[i]);
}
@@ -1501,6 +1527,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
call_rcu(&admin->rcu, taprio_free_sched_cb);
spin_unlock_irqrestore(&q->current_entry_lock, flags);
+
+ if (FULL_OFFLOAD_IS_ENABLED(taprio_flags))
+ taprio_offload_config_changed(q);
}
new_admin = NULL;
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index fc9a4c6629ce..0851166b9175 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -175,7 +175,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
- mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
+ mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
if (nla_put(skb, INET_DIAG_SKMEMINFO, sizeof(mem), &mem) < 0)
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 5a070fb5b278..2277981559d0 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -243,7 +243,7 @@ int sctp_rcv(struct sk_buff *skb)
bh_lock_sock(sk);
}
- if (sock_owned_by_user(sk)) {
+ if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) {
if (sctp_add_backlog(sk, skb)) {
bh_unlock_sock(sk);
sctp_chunk_free(chunk);
@@ -321,8 +321,8 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
local_bh_disable();
bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
- if (sk_add_backlog(sk, skb, sk->sk_rcvbuf))
+ if (sock_owned_by_user(sk) || !sctp_newsk_ready(sk)) {
+ if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
sctp_chunk_free(chunk);
else
backloged = 1;
@@ -336,7 +336,13 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
if (backloged)
return 0;
} else {
- sctp_inq_push(inqueue, chunk);
+ if (!sctp_newsk_ready(sk)) {
+ if (!sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))
+ return 0;
+ sctp_chunk_free(chunk);
+ } else {
+ sctp_inq_push(inqueue, chunk);
+ }
}
done:
@@ -358,7 +364,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb)
struct sctp_ep_common *rcvr = chunk->rcvr;
int ret;
- ret = sk_add_backlog(sk, skb, sk->sk_rcvbuf);
+ ret = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf));
if (!ret) {
/* Hold the assoc/ep while hanging on the backlog queue.
* This way, we know structures we need will not disappear
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index e41ed2e0ae7d..48d63956a68c 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2155,7 +2155,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
case SCTP_PARAM_SET_PRIMARY:
if (ep->asconf_enable)
break;
- goto fallthrough;
+ goto unhandled;
case SCTP_PARAM_HOST_NAME_ADDRESS:
/* Tell the peer, we won't support this param. */
@@ -2166,11 +2166,11 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
case SCTP_PARAM_FWD_TSN_SUPPORT:
if (ep->prsctp_enable)
break;
- goto fallthrough;
+ goto unhandled;
case SCTP_PARAM_RANDOM:
if (!ep->auth_enable)
- goto fallthrough;
+ goto unhandled;
/* SCTP-AUTH: Secion 6.1
* If the random number is not 32 byte long the association
@@ -2187,7 +2187,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
case SCTP_PARAM_CHUNKS:
if (!ep->auth_enable)
- goto fallthrough;
+ goto unhandled;
/* SCTP-AUTH: Section 3.2
* The CHUNKS parameter MUST be included once in the INIT or
@@ -2203,7 +2203,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
case SCTP_PARAM_HMAC_ALGO:
if (!ep->auth_enable)
- goto fallthrough;
+ goto unhandled;
hmacs = (struct sctp_hmac_algo_param *)param.p;
n_elt = (ntohs(param.p->length) -
@@ -2226,7 +2226,7 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
retval = SCTP_IERROR_ABORT;
}
break;
-fallthrough:
+unhandled:
default:
pr_debug("%s: unrecognized param:%d for chunk:%d\n",
__func__, ntohs(param.p->type), cid);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 939b8d2595bc..ffd3262b7a41 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -8476,7 +8476,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
mask = 0;
/* Is there any exceptional events? */
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -8485,7 +8485,7 @@ __poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
mask |= EPOLLHUP;
/* Is it readable? Reconsider this code with TCP-style support. */
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* The association is either gone or not ready. */
@@ -8871,7 +8871,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
if (sk_can_busy_loop(sk)) {
sk_busy_loop(sk, noblock);
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
continue;
}
@@ -9306,7 +9306,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
newinet->inet_dport = htons(asoc->peer.port);
newinet->pmtudisc = inet->pmtudisc;
- newinet->inet_id = asoc->next_tsn ^ jiffies;
+ newinet->inet_id = prandom_u32();
newinet->uc_ttl = inet->uc_ttl;
newinet->mc_loop = 1;
@@ -9500,7 +9500,7 @@ struct proto sctp_prot = {
.backlog_rcv = sctp_backlog_rcv,
.hash = sctp_hash,
.unhash = sctp_unhash,
- .get_port = sctp_get_port,
+ .no_autobind = true,
.obj_size = sizeof(struct sctp_sock),
.useroffset = offsetof(struct sctp_sock, subscribe),
.usersize = offsetof(struct sctp_sock, initmsg) -
@@ -9542,7 +9542,7 @@ struct proto sctpv6_prot = {
.backlog_rcv = sctp_backlog_rcv,
.hash = sctp_hash,
.unhash = sctp_unhash,
- .get_port = sctp_get_port,
+ .no_autobind = true,
.obj_size = sizeof(struct sctp6_sock),
.useroffset = offsetof(struct sctp6_sock, sctp.subscribe),
.usersize = offsetof(struct sctp6_sock, sctp.initmsg) -
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5b932583e407..737b49909a7a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -123,6 +123,12 @@ struct proto smc_proto6 = {
};
EXPORT_SYMBOL_GPL(smc_proto6);
+static void smc_restore_fallback_changes(struct smc_sock *smc)
+{
+ smc->clcsock->file->private_data = smc->sk.sk_socket;
+ smc->clcsock->file = NULL;
+}
+
static int __smc_release(struct smc_sock *smc)
{
struct sock *sk = &smc->sk;
@@ -141,6 +147,7 @@ static int __smc_release(struct smc_sock *smc)
}
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk);
+ smc_restore_fallback_changes(smc);
}
sk->sk_prot->unhash(sk);
@@ -700,8 +707,6 @@ static int __smc_connect(struct smc_sock *smc)
int smc_type;
int rc = 0;
- sock_hold(&smc->sk); /* sock put in passive closing */
-
if (smc->use_fallback)
return smc_connect_fallback(smc, smc->fallback_rsn);
@@ -791,6 +796,7 @@ static void smc_connect_work(struct work_struct *work)
smc->sk.sk_err = EPIPE;
else if (signal_pending(current))
smc->sk.sk_err = -sock_intr_errno(timeo);
+ sock_put(&smc->sk); /* passive closing */
goto out;
}
@@ -846,6 +852,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
rc = kernel_connect(smc->clcsock, addr, alen, flags);
if (rc && rc != -EINPROGRESS)
goto out;
+
+ sock_hold(&smc->sk); /* sock put in passive closing */
if (flags & O_NONBLOCK) {
if (schedule_work(&smc->connect_work))
smc->connect_nonblock = 1;
@@ -1291,8 +1299,8 @@ static void smc_listen_work(struct work_struct *work)
/* check if RDMA is available */
if (!ism_supported) { /* SMC_TYPE_R or SMC_TYPE_B */
/* prepare RDMA check */
- memset(&ini, 0, sizeof(ini));
ini.is_smcd = false;
+ ini.ism_dev = NULL;
ini.ib_lcl = &pclc->lcl;
rc = smc_find_rdma_device(new_smc, &ini);
if (rc) {
@@ -1724,7 +1732,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
case TCP_FASTOPEN_KEY:
case TCP_FASTOPEN_NO_COOKIE:
/* option not supported by SMC */
- if (sk->sk_state == SMC_INIT) {
+ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 4ca50ddf8d16..2ba97ff325a5 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -213,7 +213,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
if (!lgr) {
rc = SMC_CLC_DECL_MEM;
- goto out;
+ goto ism_put_vlan;
}
lgr->is_smcd = ini->is_smcd;
lgr->sync_err = 0;
@@ -289,6 +289,9 @@ clear_llc_lnk:
smc_llc_link_clear(lnk);
free_lgr:
kfree(lgr);
+ism_put_vlan:
+ if (ini->is_smcd && ini->vlan_id)
+ smc_ism_put_vlan(ini->ism_dev, ini->vlan_id);
out:
if (rc < 0) {
if (rc == -ENOMEM)
@@ -558,7 +561,7 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
}
rtnl_lock();
- nest_lvl = dev_get_nest_level(ndev);
+ nest_lvl = ndev->lower_level;
for (i = 0; i < nest_lvl; i++) {
struct list_head *lower = &ndev->adj_list.lower;
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index bab2da8cf17a..571e6d84da3b 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -376,8 +376,6 @@ static int smc_pnet_fill_entry(struct net *net,
return 0;
error:
- if (pnetelem->ndev)
- dev_put(pnetelem->ndev);
return rc;
}
@@ -718,7 +716,7 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
int i, nest_lvl;
rtnl_lock();
- nest_lvl = dev_get_nest_level(ndev);
+ nest_lvl = ndev->lower_level;
for (i = 0; i < nest_lvl; i++) {
struct list_head *lower = &ndev->adj_list.lower;
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 413a6abf227e..97e8369002d7 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -211,8 +211,7 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
rc = sk_wait_event(sk, timeo,
sk->sk_err ||
sk->sk_shutdown & RCV_SHUTDOWN ||
- fcrit(conn) ||
- smc_cdc_rxed_any_close_or_senddone(conn),
+ fcrit(conn),
&wait);
remove_wait_queue(sk_sleep(sk), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
@@ -262,6 +261,18 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
return -EAGAIN;
}
+static bool smc_rx_recvmsg_data_available(struct smc_sock *smc)
+{
+ struct smc_connection *conn = &smc->conn;
+
+ if (smc_rx_data_available(conn))
+ return true;
+ else if (conn->urg_state == SMC_URG_VALID)
+ /* we received a single urgent Byte - skip */
+ smc_rx_update_cons(smc, 0);
+ return false;
+}
+
/* smc_rx_recvmsg - receive data from RMBE
* @msg: copy data to receive buffer
* @pipe: copy data to pipe if set - indicates splice() call
@@ -303,16 +314,18 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
if (read_done >= target || (pipe && read_done))
break;
- if (atomic_read(&conn->bytes_to_rcv))
+ if (smc_rx_recvmsg_data_available(smc))
goto copy;
- else if (conn->urg_state == SMC_URG_VALID)
- /* we received a single urgent Byte - skip */
- smc_rx_update_cons(smc, 0);
if (sk->sk_shutdown & RCV_SHUTDOWN ||
- smc_cdc_rxed_any_close_or_senddone(conn) ||
- conn->local_tx_ctrl.conn_state_flags.peer_conn_abort)
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) {
+ /* smc_cdc_msg_recv_action() could have run after
+ * above smc_rx_recvmsg_data_available()
+ */
+ if (smc_rx_recvmsg_data_available(smc))
+ goto copy;
break;
+ }
if (read_done) {
if (sk->sk_err ||
diff --git a/net/socket.c b/net/socket.c
index 6a9ab7a8b1d2..17bc1eee198a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -404,6 +404,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
sock->file = file;
file->private_data = sock;
+ stream_open(SOCK_INODE(sock), file);
return file;
}
EXPORT_SYMBOL(sock_alloc_file);
@@ -1690,24 +1691,13 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
return __sys_listen(fd, backlog);
}
-/*
- * For accept, we attempt to create a new socket, set up the link
- * with the client, wake up the client, then return the new
- * connected fd. We collect the address of the connector in kernel
- * space and move it to user at the very end. This is unclean because
- * we open the socket then return an error.
- *
- * 1003.1g adds the ability to recvmsg() to query connection pending
- * status to recvmsg. We need to add that support in a way thats
- * clean when we restructure accept also.
- */
-
-int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
- int __user *upeer_addrlen, int flags)
+int __sys_accept4_file(struct file *file, unsigned file_flags,
+ struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags)
{
struct socket *sock, *newsock;
struct file *newfile;
- int err, len, newfd, fput_needed;
+ int err, len, newfd;
struct sockaddr_storage address;
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
@@ -1716,14 +1706,14 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
- sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ sock = sock_from_file(file, &err);
if (!sock)
goto out;
err = -ENFILE;
newsock = sock_alloc();
if (!newsock)
- goto out_put;
+ goto out;
newsock->type = sock->type;
newsock->ops = sock->ops;
@@ -1738,20 +1728,21 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
if (unlikely(newfd < 0)) {
err = newfd;
sock_release(newsock);
- goto out_put;
+ goto out;
}
newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
if (IS_ERR(newfile)) {
err = PTR_ERR(newfile);
put_unused_fd(newfd);
- goto out_put;
+ goto out;
}
err = security_socket_accept(sock, newsock);
if (err)
goto out_fd;
- err = sock->ops->accept(sock, newsock, sock->file->f_flags, false);
+ err = sock->ops->accept(sock, newsock, sock->file->f_flags | file_flags,
+ false);
if (err < 0)
goto out_fd;
@@ -1772,15 +1763,42 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
fd_install(newfd, newfile);
err = newfd;
-
-out_put:
- fput_light(sock->file, fput_needed);
out:
return err;
out_fd:
fput(newfile);
put_unused_fd(newfd);
- goto out_put;
+ goto out;
+
+}
+
+/*
+ * For accept, we attempt to create a new socket, set up the link
+ * with the client, wake up the client, then return the new
+ * connected fd. We collect the address of the connector in kernel
+ * space and move it to user at the very end. This is unclean because
+ * we open the socket then return an error.
+ *
+ * 1003.1g adds the ability to recvmsg() to query connection pending
+ * status to recvmsg. We need to add that support in a way thats
+ * clean when we restructure accept also.
+ */
+
+int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags)
+{
+ int ret = -EBADF;
+ struct fd f;
+
+ f = fdget(fd);
+ if (f.file) {
+ ret = __sys_accept4_file(f.file, 0, upeer_sockaddr,
+ upeer_addrlen, flags);
+ if (f.flags)
+ fput(f.file);
+ }
+
+ return ret;
}
SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 339e8c077c2d..195b40c5dae4 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -220,7 +220,7 @@ void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
goto out;
spin_lock_bh(&xprt->bc_pa_lock);
- xprt->bc_alloc_max -= max_reqs;
+ xprt->bc_alloc_max -= min(max_reqs, xprt->bc_alloc_max);
list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
dprintk("RPC: req=%p\n", req);
list_del(&req->rq_bc_pa_list);
@@ -307,8 +307,8 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
*/
dprintk("RPC: Last session removed req=%p\n", req);
xprt_free_allocation(req);
- return;
}
+ xprt_put(xprt);
}
/*
@@ -339,7 +339,7 @@ found:
spin_unlock(&xprt->bc_pa_lock);
if (new) {
if (req != new)
- xprt_free_bc_rqst(new);
+ xprt_free_allocation(new);
break;
} else if (req)
break;
@@ -368,6 +368,7 @@ void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied)
set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
dprintk("RPC: add callback request to list\n");
+ xprt_get(xprt);
spin_lock(&bc_serv->sv_cb_lock);
list_add(&req->rq_bc_list, &bc_serv->sv_cb_list);
wake_up(&bc_serv->sv_cb_waitq);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8a45b3ccc313..41df4c507193 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1943,6 +1943,11 @@ static void xprt_destroy_cb(struct work_struct *work)
rpc_destroy_wait_queue(&xprt->backlog);
kfree(xprt->servername);
/*
+ * Destroy any existing back channel
+ */
+ xprt_destroy_backchannel(xprt, UINT_MAX);
+
+ /*
* Tear down transport state and free the rpc_xprt
*/
xprt->ops->destroy(xprt);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 50e075fcdd8f..b458bf53ca69 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -163,6 +163,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
spin_lock(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock(&xprt->bc_pa_lock);
+ xprt_put(xprt);
}
static struct rpc_rqst *rpcrdma_bc_rqst_get(struct rpcrdma_xprt *r_xprt)
@@ -259,6 +260,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
/* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv;
+ xprt_get(xprt);
spin_lock(&bc_serv->sv_cb_lock);
list_add(&rqst->rq_bc_list, &bc_serv->sv_cb_list);
spin_unlock(&bc_serv->sv_cb_lock);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9ac88722fa83..70e52f567b2a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1249,19 +1249,21 @@ static void xs_error_report(struct sock *sk)
{
struct sock_xprt *transport;
struct rpc_xprt *xprt;
- int err;
read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk)))
goto out;
transport = container_of(xprt, struct sock_xprt, xprt);
- err = -sk->sk_err;
- if (err == 0)
+ transport->xprt_err = -sk->sk_err;
+ if (transport->xprt_err == 0)
goto out;
dprintk("RPC: xs_error_report client %p, error=%d...\n",
- xprt, -err);
- trace_rpc_socket_error(xprt, sk->sk_socket, err);
+ xprt, -transport->xprt_err);
+ trace_rpc_socket_error(xprt, sk->sk_socket, transport->xprt_err);
+
+ /* barrier ensures xprt_err is set before XPRT_SOCK_WAKE_ERROR */
+ smp_mb__before_atomic();
xs_run_error_worker(transport, XPRT_SOCK_WAKE_ERROR);
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -2476,7 +2478,6 @@ static void xs_wake_write(struct sock_xprt *transport)
static void xs_wake_error(struct sock_xprt *transport)
{
int sockerr;
- int sockerr_len = sizeof(sockerr);
if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
return;
@@ -2485,9 +2486,7 @@ static void xs_wake_error(struct sock_xprt *transport)
goto out;
if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
goto out;
- if (kernel_getsockopt(transport->sock, SOL_SOCKET, SO_ERROR,
- (char *)&sockerr, &sockerr_len) != 0)
- goto out;
+ sockerr = xchg(&transport->xprt_err, 0);
if (sockerr < 0)
xprt_wake_pending_tasks(&transport->xprt, sockerr);
out:
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 23cb379a93d6..8f35060a24e1 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -34,8 +34,6 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include "core.h"
#include "name_table.h"
#include "subscr.h"
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 60d829581068..3042f654e0af 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -60,6 +60,12 @@
#include <linux/rhashtable.h>
#include <net/genetlink.h>
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
struct tipc_node;
struct tipc_bearer;
struct tipc_bc_base;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3b9f8cc328f5..4b92b196cfa6 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -740,7 +740,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
/* fall through */
case TIPC_LISTEN:
case TIPC_CONNECTING:
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
revents |= EPOLLIN | EPOLLRDNORM;
break;
case TIPC_OPEN:
@@ -748,7 +748,7 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock,
revents |= EPOLLOUT;
if (!tipc_sk_type_connectionless(sk))
break;
- if (skb_queue_empty(&sk->sk_receive_queue))
+ if (skb_queue_empty_lockless(&sk->sk_receive_queue))
break;
revents |= EPOLLIN | EPOLLRDNORM;
break;
@@ -2119,13 +2119,13 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
struct tipc_msg *hdr = buf_msg(skb);
if (unlikely(msg_in_group(hdr)))
- return sk->sk_rcvbuf;
+ return READ_ONCE(sk->sk_rcvbuf);
if (unlikely(!msg_connected(hdr)))
- return sk->sk_rcvbuf << msg_importance(hdr);
+ return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr);
if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL))
- return sk->sk_rcvbuf;
+ return READ_ONCE(sk->sk_rcvbuf);
return FLOWCTL_MSG_LIM;
}
@@ -3790,7 +3790,7 @@ int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf)
i += scnprintf(buf + i, sz - i, " %d", sk->sk_sndbuf);
i += scnprintf(buf + i, sz - i, " | %d", sk_rmem_alloc_get(sk));
i += scnprintf(buf + i, sz - i, " %d", sk->sk_rcvbuf);
- i += scnprintf(buf + i, sz - i, " | %d\n", sk->sk_backlog.len);
+ i += scnprintf(buf + i, sz - i, " | %d\n", READ_ONCE(sk->sk_backlog.len));
if (dqueues & TIPC_DUMP_SK_SNDQ) {
i += scnprintf(buf + i, sz - i, "sk_write_queue: ");
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index f959487c5cd1..683d00837693 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -523,8 +523,10 @@ last_record:
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
unsigned char record_type = TLS_RECORD_TYPE_DATA;
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
int rc;
+ mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
if (unlikely(msg->msg_controllen)) {
@@ -538,12 +540,14 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
out:
release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
return rc;
}
int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
struct iov_iter msg_iter;
char *kaddr = kmap(page);
struct kvec iov;
@@ -552,6 +556,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
if (flags & MSG_SENDPAGE_NOTLAST)
flags |= MSG_MORE;
+ mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
if (flags & MSG_OOB) {
@@ -568,6 +573,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
out:
release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
return rc;
}
@@ -623,9 +629,11 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
{
- if (!sk->sk_write_pending && tls_is_partially_sent_record(ctx)) {
+ if (tls_is_partially_sent_record(ctx)) {
gfp_t sk_allocation = sk->sk_allocation;
+ WARN_ON_ONCE(sk->sk_write_pending);
+
sk->sk_allocation = GFP_ATOMIC;
tls_push_partial_record(sk, ctx,
MSG_DONTWAIT | MSG_NOSIGNAL |
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index ac88877dcade..f874cc0da45d 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -267,6 +267,7 @@ void tls_ctx_free(struct sock *sk, struct tls_context *ctx)
memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
+ mutex_destroy(&ctx->tx_lock);
if (sk)
kfree_rcu(ctx, rcu);
@@ -612,6 +613,7 @@ static struct tls_context *create_ctx(struct sock *sk)
if (!ctx)
return NULL;
+ mutex_init(&ctx->tx_lock);
rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
ctx->sk_proto = sk->sk_prot;
return ctx;
@@ -906,6 +908,7 @@ static int __init tls_register(void)
{
tls_sw_proto_ops = inet_stream_ops;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
+ tls_sw_proto_ops.sendpage_locked = tls_sw_sendpage_locked,
tls_device_init();
tcp_register_ulp(&tcp_tls_ulp_ops);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index c2b5e0d2ba1a..319735d5c084 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -897,15 +897,9 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
return -ENOTSUPP;
+ mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
- /* Wait till there is any pending write on socket */
- if (unlikely(sk->sk_write_pending)) {
- ret = wait_on_pending_writer(sk, &timeo);
- if (unlikely(ret))
- goto send_end;
- }
-
if (unlikely(msg->msg_controllen)) {
ret = tls_proccess_cmsg(sk, msg, &record_type);
if (ret) {
@@ -1091,6 +1085,7 @@ send_end:
ret = sk_stream_error(sk, msg->msg_flags, ret);
release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
return copied ? copied : ret;
}
@@ -1114,13 +1109,6 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- /* Wait till there is any pending write on socket */
- if (unlikely(sk->sk_write_pending)) {
- ret = wait_on_pending_writer(sk, &timeo);
- if (unlikely(ret))
- goto sendpage_end;
- }
-
/* Call the sk_stream functions to manage the sndbuf mem. */
while (size > 0) {
size_t copy, required_size;
@@ -1216,18 +1204,32 @@ sendpage_end:
return copied ? copied : ret;
}
+int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
+{
+ if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY |
+ MSG_NO_SHARED_FRAGS))
+ return -ENOTSUPP;
+
+ return tls_sw_do_sendpage(sk, page, offset, size, flags);
+}
+
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
int ret;
if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
return -ENOTSUPP;
+ mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
return ret;
}
@@ -2170,9 +2172,11 @@ static void tx_work_handler(struct work_struct *work)
if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
return;
+ mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
tls_tx_records(sk, -1);
release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
}
void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
@@ -2180,12 +2184,9 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
struct tls_sw_context_tx *tx_ctx = tls_sw_ctx_tx(ctx);
/* Schedule the transmission if tx list is ready */
- if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
- /* Schedule the transmission */
- if (!test_and_set_bit(BIT_TX_SCHEDULED,
- &tx_ctx->tx_bitmask))
- schedule_delayed_work(&tx_ctx->tx_work.work, 0);
- }
+ if (is_tx_ready(tx_ctx) &&
+ !test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask))
+ schedule_delayed_work(&tx_ctx->tx_work.work, 0);
}
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 67e87db5877f..0d8da809bea2 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2599,7 +2599,7 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
@@ -2628,7 +2628,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
mask = 0;
/* exceptional events? */
- if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+ if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
@@ -2638,7 +2638,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
mask |= EPOLLHUP;
/* readable? */
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 2ab43b2bba31..582a3e4dfce2 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -870,7 +870,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
* the queue and write as long as the socket isn't shutdown for
* sending.
*/
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
(sk->sk_shutdown & RCV_SHUTDOWN)) {
mask |= EPOLLIN | EPOLLRDNORM;
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index a666ef8fc54e..fb2060dffb0a 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -204,10 +204,14 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
return virtio_transport_get_ops()->send_pkt(pkt);
}
-static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
+static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
struct virtio_vsock_pkt *pkt)
{
+ if (vvs->rx_bytes + pkt->len > vvs->buf_alloc)
+ return false;
+
vvs->rx_bytes += pkt->len;
+ return true;
}
static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
@@ -458,6 +462,9 @@ void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
vvs->buf_size_max = val;
vvs->buf_size = val;
vvs->buf_alloc = val;
+
+ virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM,
+ NULL);
}
EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size);
@@ -876,14 +883,18 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
struct virtio_vsock_pkt *pkt)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- bool free_pkt = false;
+ bool can_enqueue, free_pkt = false;
pkt->len = le32_to_cpu(pkt->hdr.len);
pkt->off = 0;
spin_lock_bh(&vvs->rx_lock);
- virtio_transport_inc_rx_pkt(vvs, pkt);
+ can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt);
+ if (!can_enqueue) {
+ free_pkt = true;
+ goto out;
+ }
/* Try to copy small packets into the buffer of last packet queued,
* to avoid wasting memory queueing the entire buffer with a small
@@ -936,9 +947,11 @@ virtio_transport_recv_connected(struct sock *sk,
if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
vsk->peer_shutdown |= SEND_SHUTDOWN;
if (vsk->peer_shutdown == SHUTDOWN_MASK &&
- vsock_stream_has_data(vsk) <= 0) {
- sock_set_flag(sk, SOCK_DONE);
- sk->sk_state = TCP_CLOSING;
+ vsock_stream_has_data(vsk) <= 0 &&
+ !sock_flag(sk, SOCK_DONE)) {
+ (void)virtio_transport_reset(vsk, NULL);
+
+ virtio_transport_do_close(vsk, true);
}
if (le32_to_cpu(pkt->hdr.flags))
sk->sk_state_change(sk);
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index e851cafd8e2f..fcac5c6366e1 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -204,6 +204,11 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
return false;
}
+ /* channel 14 is only for IEEE 802.11b */
+ if (chandef->center_freq1 == 2484 &&
+ chandef->width != NL80211_CHAN_WIDTH_20_NOHT)
+ return false;
+
if (cfg80211_chandef_is_edmg(chandef) &&
!cfg80211_edmg_chandef_valid(chandef))
return false;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 141cdb171665..7b72286922f7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -393,7 +393,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ },
[NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY,
.len = IEEE80211_MAX_MESH_ID_LEN },
- [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 },
+ [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT,
[NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 },
[NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED },
@@ -13682,7 +13682,7 @@ static int nl80211_get_ftm_responder_stats(struct sk_buff *skb,
hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
NL80211_CMD_GET_FTM_RESPONDER_STATS);
if (!hdr)
- return -ENOBUFS;
+ goto nla_put_failure;
if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 420c4207ab59..446c76d44e65 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3883,6 +3883,7 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy)
return pre_cac_allowed;
}
+EXPORT_SYMBOL(regulatory_pre_cac_allowed);
void regulatory_propagate_dfs_state(struct wiphy *wiphy,
struct cfg80211_chan_def *chandef,
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 504133d76de4..dc8f689bd469 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -156,14 +156,6 @@ bool regulatory_indoor_allowed(void);
#define REG_PRE_CAC_EXPIRY_GRACE_MS 2000
/**
- * regulatory_pre_cac_allowed - if pre-CAC allowed in the current dfs domain
- * @wiphy: wiphy for which pre-CAC capability is checked.
-
- * Pre-CAC is allowed only in ETSI domain.
- */
-bool regulatory_pre_cac_allowed(struct wiphy *wiphy);
-
-/**
* regulatory_propagate_dfs_state - Propagate DFS channel state to other wiphys
* @wiphy - wiphy on which radar is detected and the event will be propagated
* to other available wiphys having the same DFS domain
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index ff1016607f0b..aef240fdf8df 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1703,8 +1703,7 @@ cfg80211_parse_mbssid_frame_data(struct wiphy *wiphy,
static void
cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
struct cfg80211_bss *nontrans_bss,
- struct ieee80211_mgmt *mgmt, size_t len,
- gfp_t gfp)
+ struct ieee80211_mgmt *mgmt, size_t len)
{
u8 *ie, *new_ie, *pos;
const u8 *nontrans_ssid, *trans_ssid, *mbssid;
@@ -1715,6 +1714,8 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
const struct cfg80211_bss_ies *old;
u8 cpy_len;
+ lockdep_assert_held(&wiphy_to_rdev(wiphy)->bss_lock);
+
ie = mgmt->u.probe_resp.variable;
new_ie_len = ielen;
@@ -1731,23 +1732,22 @@ cfg80211_update_notlisted_nontrans(struct wiphy *wiphy,
if (!mbssid || mbssid < trans_ssid)
return;
new_ie_len -= mbssid[1];
- rcu_read_lock();
+
nontrans_ssid = ieee80211_bss_get_ie(nontrans_bss, WLAN_EID_SSID);
- if (!nontrans_ssid) {
- rcu_read_unlock();
+ if (!nontrans_ssid)
return;
- }
+
new_ie_len += nontrans_ssid[1];
- rcu_read_unlock();
/* generate new ie for nontrans BSS
* 1. replace SSID with nontrans BSS' SSID
* 2. skip MBSSID IE
*/
- new_ie = kzalloc(new_ie_len, gfp);
+ new_ie = kzalloc(new_ie_len, GFP_ATOMIC);
if (!new_ie)
return;
- new_ies = kzalloc(sizeof(*new_ies) + new_ie_len, gfp);
+
+ new_ies = kzalloc(sizeof(*new_ies) + new_ie_len, GFP_ATOMIC);
if (!new_ies)
goto out_free;
@@ -1901,6 +1901,8 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
cfg80211_parse_mbssid_frame_data(wiphy, data, mgmt, len,
&non_tx_data, gfp);
+ spin_lock_bh(&wiphy_to_rdev(wiphy)->bss_lock);
+
/* check if the res has other nontransmitting bss which is not
* in MBSSID IE
*/
@@ -1915,8 +1917,9 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
ies2 = rcu_access_pointer(tmp_bss->ies);
if (ies2->tsf < ies1->tsf)
cfg80211_update_notlisted_nontrans(wiphy, tmp_bss,
- mgmt, len, gfp);
+ mgmt, len);
}
+ spin_unlock_bh(&wiphy_to_rdev(wiphy)->bss_lock);
return res;
}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 419eb12c1e93..5b4ed5bbc542 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1559,7 +1559,8 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
}
if (freq == 2484) {
- if (chandef->width > NL80211_CHAN_WIDTH_40)
+ /* channel 14 is only for IEEE 802.11b */
+ if (chandef->width != NL80211_CHAN_WIDTH_20_NOHT)
return false;
*op_class = 82; /* channel 14 */
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index c67d7a82ab13..73fd0eae08ca 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -202,6 +202,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
struct iw_point *data, char *ssid)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
+ int ret = 0;
/* call only for station! */
if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
@@ -219,7 +220,10 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
if (ie) {
data->flags = 1;
data->length = ie[1];
- memcpy(ssid, ie + 2, data->length);
+ if (data->length > IW_ESSID_MAX_SIZE)
+ ret = -EINVAL;
+ else
+ memcpy(ssid, ie + 2, data->length);
}
rcu_read_unlock();
} else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) {
@@ -229,7 +233,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
}
wdev_unlock(wdev);
- return 0;
+ return ret;
}
int cfg80211_mgd_wext_siwap(struct net_device *dev,
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 5c111bc3c8ea..00e782335cb0 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -55,7 +55,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
if (!sock_owned_by_user(sk)) {
queued = x25_process_rx_frame(sk, skb);
} else {
- queued = !sk_add_backlog(sk, skb, sk->sk_rcvbuf);
+ queued = !sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf));
}
bh_unlock_sock(sk);
sock_put(sk);
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 16d5f353163a..3049af269fbf 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -27,6 +27,9 @@ void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
{
unsigned long flags;
+ if (!xs->tx)
+ return;
+
spin_lock_irqsave(&umem->xsk_list_lock, flags);
list_add_rcu(&xs->list, &umem->xsk_list);
spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
@@ -36,6 +39,9 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
{
unsigned long flags;
+ if (!xs->tx)
+ return;
+
spin_lock_irqsave(&umem->xsk_list_lock, flags);
list_del_rcu(&xs->list);
spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index fa8fbb8fa3c8..9044073fbf22 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -305,9 +305,8 @@ out:
}
EXPORT_SYMBOL(xsk_umem_consume_tx);
-static int xsk_zc_xmit(struct sock *sk)
+static int xsk_zc_xmit(struct xdp_sock *xs)
{
- struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev = xs->dev;
return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
@@ -327,11 +326,10 @@ static void xsk_destruct_skb(struct sk_buff *skb)
sock_wfree(skb);
}
-static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
- size_t total_len)
+static int xsk_generic_xmit(struct sock *sk)
{
- u32 max_batch = TX_BATCH_SIZE;
struct xdp_sock *xs = xdp_sk(sk);
+ u32 max_batch = TX_BATCH_SIZE;
bool sent_frame = false;
struct xdp_desc desc;
struct sk_buff *skb;
@@ -394,6 +392,18 @@ out:
return err;
}
+static int __xsk_sendmsg(struct sock *sk)
+{
+ struct xdp_sock *xs = xdp_sk(sk);
+
+ if (unlikely(!(xs->dev->flags & IFF_UP)))
+ return -ENETDOWN;
+ if (unlikely(!xs->tx))
+ return -ENOBUFS;
+
+ return xs->zc ? xsk_zc_xmit(xs) : xsk_generic_xmit(sk);
+}
+
static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
@@ -402,21 +412,18 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (unlikely(!xsk_is_bound(xs)))
return -ENXIO;
- if (unlikely(!(xs->dev->flags & IFF_UP)))
- return -ENETDOWN;
- if (unlikely(!xs->tx))
- return -ENOBUFS;
- if (need_wait)
+ if (unlikely(need_wait))
return -EOPNOTSUPP;
- return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
+ return __xsk_sendmsg(sk);
}
static unsigned int xsk_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
unsigned int mask = datagram_poll(file, sock, wait);
- struct xdp_sock *xs = xdp_sk(sock->sk);
+ struct sock *sk = sock->sk;
+ struct xdp_sock *xs = xdp_sk(sk);
struct net_device *dev;
struct xdp_umem *umem;
@@ -426,9 +433,14 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock,
dev = xs->dev;
umem = xs->umem;
- if (umem->need_wakeup)
- dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
- umem->need_wakeup);
+ if (umem->need_wakeup) {
+ if (dev->netdev_ops->ndo_xsk_wakeup)
+ dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
+ umem->need_wakeup);
+ else
+ /* Poll needs to drive Tx also in copy mode */
+ __xsk_sendmsg(sk);
+ }
if (xs->rx && !xskq_empty_desc(xs->rx))
mask |= POLLIN | POLLRDNORM;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 9b599ed66d97..2c86a2fc3915 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -480,6 +480,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
else
XFRM_INC_STATS(net,
LINUX_MIB_XFRMINSTATEINVALID);
+
+ if (encap_type == -1)
+ dev_put(skb->dev);
goto drop;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index c6f3c4a1bd99..f3423562d933 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -495,6 +495,8 @@ static void ___xfrm_state_destroy(struct xfrm_state *x)
x->type->destructor(x);
xfrm_put_type(x->type);
}
+ if (x->xfrag.page)
+ put_page(x->xfrag.page);
xfrm_dev_state_free(x);
security_xfrm_state_free(x);
xfrm_state_free(x);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 1d9be26b4edd..42b571cde177 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -176,6 +176,7 @@ KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/bpf/
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/lib/ -I$(srctree)/tools/include
KBUILD_HOSTCFLAGS += -I$(srctree)/tools/perf
+KBUILD_HOSTCFLAGS += -DHAVE_ATTR_TEST=0
HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h
index 7409722727ca..7048bb3594d6 100644
--- a/samples/bpf/asm_goto_workaround.h
+++ b/samples/bpf/asm_goto_workaround.h
@@ -3,7 +3,8 @@
#ifndef __ASM_GOTO_WORKAROUND_H
#define __ASM_GOTO_WORKAROUND_H
-/* this will bring in asm_volatile_goto macro definition
+/*
+ * This will bring in asm_volatile_goto and asm_inline macro definitions
* if enabled by compiler and config options.
*/
#include <linux/types.h>
@@ -13,5 +14,15 @@
#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto")
#endif
+/*
+ * asm_inline is defined as asm __inline in "include/linux/compiler_types.h"
+ * if supported by the kernel's CC (i.e CONFIG_CC_HAS_ASM_INLINE) which is not
+ * supported by CLANG.
+ */
+#ifdef asm_inline
+#undef asm_inline
+#define asm_inline asm
+#endif
+
#define volatile(x...) volatile("")
#endif
diff --git a/samples/bpf/task_fd_query_user.c b/samples/bpf/task_fd_query_user.c
index e39938058223..4c31b305e6ef 100644
--- a/samples/bpf/task_fd_query_user.c
+++ b/samples/bpf/task_fd_query_user.c
@@ -13,6 +13,7 @@
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <linux/perf_event.h>
#include "libbpf.h"
#include "bpf_load.h"
diff --git a/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci b/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci
deleted file mode 100644
index 56a2e261d61d..000000000000
--- a/scripts/coccinelle/api/devm_platform_ioremap_resource.cocci
+++ /dev/null
@@ -1,60 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/// Use devm_platform_ioremap_resource helper which wraps
-/// platform_get_resource() and devm_ioremap_resource() together.
-///
-// Confidence: High
-// Copyright: (C) 2019 Himanshu Jha GPLv2.
-// Copyright: (C) 2019 Julia Lawall, Inria/LIP6. GPLv2.
-// Keywords: platform_get_resource, devm_ioremap_resource,
-// Keywords: devm_platform_ioremap_resource
-
-virtual patch
-virtual report
-
-@r depends on patch && !report@
-expression e1, e2, arg1, arg2, arg3;
-identifier id;
-@@
-
-(
-- id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-|
-- struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-)
- ... when != id
-- e1 = devm_ioremap_resource(arg3, id);
-+ e1 = devm_platform_ioremap_resource(arg1, arg2);
- ... when != id
-? id = e2
-
-@r1 depends on patch && !report@
-identifier r.id;
-type T;
-@@
-
-- T *id;
- ...when != id
-
-@r2 depends on report && !patch@
-identifier id;
-expression e1, e2, arg1, arg2, arg3;
-position j0;
-@@
-
-(
- id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-|
- struct resource *id = platform_get_resource(arg1, IORESOURCE_MEM, arg2);
-)
- ... when != id
- e1@j0 = devm_ioremap_resource(arg3, id);
- ... when != id
-? id = e2
-
-@script:python depends on report && !patch@
-e1 << r2.e1;
-j0 << r2.j0;
-@@
-
-msg = "WARNING: Use devm_platform_ioremap_resource for %s" % (e1)
-coccilib.report.print_report(j0[0], msg)
diff --git a/scripts/coccinelle/misc/add_namespace.cocci b/scripts/coccinelle/misc/add_namespace.cocci
index c832bb6445a8..99e93a6c2e24 100644
--- a/scripts/coccinelle/misc/add_namespace.cocci
+++ b/scripts/coccinelle/misc/add_namespace.cocci
@@ -6,6 +6,8 @@
/// add a missing namespace tag to a module source file.
///
+virtual report
+
@has_ns_import@
declarer name MODULE_IMPORT_NS;
identifier virtual.ns;
diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py
index 6d2e09a2ad2f..2fa7bb83885f 100644
--- a/scripts/gdb/linux/dmesg.py
+++ b/scripts/gdb/linux/dmesg.py
@@ -16,6 +16,8 @@ import sys
from linux import utils
+printk_log_type = utils.CachedType("struct printk_log")
+
class LxDmesg(gdb.Command):
"""Print Linux kernel log buffer."""
@@ -42,9 +44,14 @@ class LxDmesg(gdb.Command):
b = utils.read_memoryview(inf, log_buf_addr, log_next_idx)
log_buf = a.tobytes() + b.tobytes()
+ length_offset = printk_log_type.get_type()['len'].bitpos // 8
+ text_len_offset = printk_log_type.get_type()['text_len'].bitpos // 8
+ time_stamp_offset = printk_log_type.get_type()['ts_nsec'].bitpos // 8
+ text_offset = printk_log_type.get_type().sizeof
+
pos = 0
while pos < log_buf.__len__():
- length = utils.read_u16(log_buf[pos + 8:pos + 10])
+ length = utils.read_u16(log_buf, pos + length_offset)
if length == 0:
if log_buf_2nd_half == -1:
gdb.write("Corrupted log buffer!\n")
@@ -52,10 +59,11 @@ class LxDmesg(gdb.Command):
pos = log_buf_2nd_half
continue
- text_len = utils.read_u16(log_buf[pos + 10:pos + 12])
- text = log_buf[pos + 16:pos + 16 + text_len].decode(
+ text_len = utils.read_u16(log_buf, pos + text_len_offset)
+ text_start = pos + text_offset
+ text = log_buf[text_start:text_start + text_len].decode(
encoding='utf8', errors='replace')
- time_stamp = utils.read_u64(log_buf[pos:pos + 8])
+ time_stamp = utils.read_u64(log_buf, pos + time_stamp_offset)
for line in text.splitlines():
msg = u"[{time:12.6f}] {line}\n".format(
diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py
index 34e40e96dee2..be984aa29b75 100644
--- a/scripts/gdb/linux/symbols.py
+++ b/scripts/gdb/linux/symbols.py
@@ -15,7 +15,7 @@ import gdb
import os
import re
-from linux import modules
+from linux import modules, utils
if hasattr(gdb, 'Breakpoint'):
@@ -99,7 +99,8 @@ lx-symbols command."""
attrs[n]['name'].string(): attrs[n]['address']
for n in range(int(sect_attrs['nsections']))}
args = []
- for section_name in [".data", ".data..read_mostly", ".rodata", ".bss"]:
+ for section_name in [".data", ".data..read_mostly", ".rodata", ".bss",
+ ".text", ".text.hot", ".text.unlikely"]:
address = section_name_to_address.get(section_name)
if address:
args.append(" -s {name} {addr}".format(
@@ -116,6 +117,12 @@ lx-symbols command."""
module_file = self._get_module_file(module_name)
if module_file:
+ if utils.is_target_arch('s390'):
+ # Module text is preceded by PLT stubs on s390.
+ module_arch = module['arch']
+ plt_offset = int(module_arch['plt_offset'])
+ plt_size = int(module_arch['plt_size'])
+ module_addr = hex(int(module_addr, 0) + plt_offset + plt_size)
gdb.write("loading @{addr}: {filename}\n".format(
addr=module_addr, filename=module_file))
cmdline = "add-symbol-file {filename} {addr}{sections}".format(
diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py
index bc67126118c4..ea94221dbd39 100644
--- a/scripts/gdb/linux/utils.py
+++ b/scripts/gdb/linux/utils.py
@@ -92,15 +92,16 @@ def read_memoryview(inf, start, length):
return memoryview(inf.read_memory(start, length))
-def read_u16(buffer):
+def read_u16(buffer, offset):
+ buffer_val = buffer[offset:offset + 2]
value = [0, 0]
- if type(buffer[0]) is str:
- value[0] = ord(buffer[0])
- value[1] = ord(buffer[1])
+ if type(buffer_val[0]) is str:
+ value[0] = ord(buffer_val[0])
+ value[1] = ord(buffer_val[1])
else:
- value[0] = buffer[0]
- value[1] = buffer[1]
+ value[0] = buffer_val[0]
+ value[1] = buffer_val[1]
if get_target_endianness() == LITTLE_ENDIAN:
return value[0] + (value[1] << 8)
@@ -108,18 +109,18 @@ def read_u16(buffer):
return value[1] + (value[0] << 8)
-def read_u32(buffer):
+def read_u32(buffer, offset):
if get_target_endianness() == LITTLE_ENDIAN:
- return read_u16(buffer[0:2]) + (read_u16(buffer[2:4]) << 16)
+ return read_u16(buffer, offset) + (read_u16(buffer, offset + 2) << 16)
else:
- return read_u16(buffer[2:4]) + (read_u16(buffer[0:2]) << 16)
+ return read_u16(buffer, offset + 2) + (read_u16(buffer, offset) << 16)
-def read_u64(buffer):
+def read_u64(buffer, offset):
if get_target_endianness() == LITTLE_ENDIAN:
- return read_u32(buffer[0:4]) + (read_u32(buffer[4:8]) << 32)
+ return read_u32(buffer, offset) + (read_u32(buffer, offset + 4) << 32)
else:
- return read_u32(buffer[4:8]) + (read_u32(buffer[0:4]) << 32)
+ return read_u32(buffer, offset + 4) + (read_u32(buffer, offset) << 32)
target_arch = None
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 442d5e2ad688..d2a30a7b3f07 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -166,7 +166,7 @@ struct symbol {
struct module *module;
unsigned int crc;
int crc_valid;
- const char *namespace;
+ char *namespace;
unsigned int weak:1;
unsigned int vmlinux:1; /* 1 if symbol is defined in vmlinux */
unsigned int kernel:1; /* 1 if symbol is from kernel
@@ -348,34 +348,43 @@ static enum export export_from_sec(struct elf_info *elf, unsigned int sec)
return export_unknown;
}
-static const char *sym_extract_namespace(const char **symname)
+static const char *namespace_from_kstrtabns(struct elf_info *info,
+ Elf_Sym *kstrtabns)
+{
+ char *value = info->ksymtab_strings + kstrtabns->st_value;
+ return value[0] ? value : NULL;
+}
+
+static void sym_update_namespace(const char *symname, const char *namespace)
{
- size_t n;
- char *dupsymname;
+ struct symbol *s = find_symbol(symname);
- n = strcspn(*symname, ".");
- if (n < strlen(*symname) - 1) {
- dupsymname = NOFAIL(strdup(*symname));
- dupsymname[n] = '\0';
- *symname = dupsymname;
- return dupsymname + n + 1;
+ /*
+ * That symbol should have been created earlier and thus this is
+ * actually an assertion.
+ */
+ if (!s) {
+ merror("Could not update namespace(%s) for symbol %s\n",
+ namespace, symname);
+ return;
}
- return NULL;
+ free(s->namespace);
+ s->namespace =
+ namespace && namespace[0] ? NOFAIL(strdup(namespace)) : NULL;
}
/**
* Add an exported symbol - it may have already been added without a
* CRC, in this case just update the CRC
**/
-static struct symbol *sym_add_exported(const char *name, const char *namespace,
- struct module *mod, enum export export)
+static struct symbol *sym_add_exported(const char *name, struct module *mod,
+ enum export export)
{
struct symbol *s = find_symbol(name);
if (!s) {
s = new_symbol(name, mod, export);
- s->namespace = namespace;
} else {
if (!s->preloaded) {
warn("%s: '%s' exported twice. Previous export was in %s%s\n",
@@ -584,6 +593,10 @@ static int parse_elf(struct elf_info *info, const char *filename)
info->export_unused_gpl_sec = i;
else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
info->export_gpl_future_sec = i;
+ else if (strcmp(secname, "__ksymtab_strings") == 0)
+ info->ksymtab_strings = (void *)hdr +
+ sechdrs[i].sh_offset -
+ sechdrs[i].sh_addr;
if (sechdrs[i].sh_type == SHT_SYMTAB) {
unsigned int sh_link_idx;
@@ -672,7 +685,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
unsigned int crc;
enum export export;
bool is_crc = false;
- const char *name, *namespace;
+ const char *name;
if ((!is_vmlinux(mod->name) || mod->is_dot_o) &&
strstarts(symname, "__ksymtab"))
@@ -745,8 +758,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info,
/* All exported symbols */
if (strstarts(symname, "__ksymtab_")) {
name = symname + strlen("__ksymtab_");
- namespace = sym_extract_namespace(&name);
- sym_add_exported(name, namespace, mod, export);
+ sym_add_exported(name, mod, export);
}
if (strcmp(symname, "init_module") == 0)
mod->has_init = 1;
@@ -2042,6 +2054,16 @@ static void read_symbols(const char *modname)
handle_moddevtable(mod, &info, sym, symname);
}
+ /* Apply symbol namespaces from __kstrtabns_<symbol> entries. */
+ for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
+ symname = remove_dot(info.strtab + sym->st_name);
+
+ if (strstarts(symname, "__kstrtabns_"))
+ sym_update_namespace(symname + strlen("__kstrtabns_"),
+ namespace_from_kstrtabns(&info,
+ sym));
+ }
+
// check for static EXPORT_SYMBOL_* functions && global vars
for (sym = info.symtab_start; sym < info.symtab_stop; sym++) {
unsigned char bind = ELF_ST_BIND(sym->st_info);
@@ -2453,12 +2475,12 @@ static void read_dump(const char *fname, unsigned int kernel)
mod = new_module(modname);
mod->skip = 1;
}
- s = sym_add_exported(symname, namespace, mod,
- export_no(export));
+ s = sym_add_exported(symname, mod, export_no(export));
s->kernel = kernel;
s->preloaded = 1;
s->is_static = 0;
sym_update_crc(symname, mod, crc, export_no(export));
+ sym_update_namespace(symname, namespace);
}
release_file(file, size);
return;
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 92a926d375d2..ad271bc6c313 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -143,6 +143,7 @@ struct elf_info {
Elf_Section export_gpl_sec;
Elf_Section export_unused_gpl_sec;
Elf_Section export_gpl_future_sec;
+ char *ksymtab_strings;
char *strtab;
char *modinfo;
unsigned int modinfo_len;
diff --git a/scripts/nsdeps b/scripts/nsdeps
index ac2b6031dd13..04cea0921673 100644
--- a/scripts/nsdeps
+++ b/scripts/nsdeps
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# Linux kernel symbol namespace import generator
#
@@ -31,17 +31,17 @@ generate_deps() {
local mod_file=`echo $@ | sed -e 's/\.ko/\.mod/'`
local ns_deps_file=`echo $@ | sed -e 's/\.ko/\.ns_deps/'`
if [ ! -f "$ns_deps_file" ]; then return; fi
- local mod_source_files=`cat $mod_file | sed -n 1p \
+ local mod_source_files="`cat $mod_file | sed -n 1p \
| sed -e 's/\.o/\.c/g' \
- | sed "s/[^ ]* */${srctree}\/&/g"`
+ | sed "s|[^ ]* *|${srctree}/&|g"`"
for ns in `cat $ns_deps_file`; do
echo "Adding namespace $ns to module $mod_name (if needed)."
- generate_deps_for_ns $ns $mod_source_files
+ generate_deps_for_ns $ns "$mod_source_files"
# sort the imports
for source_file in $mod_source_files; do
sed '/MODULE_IMPORT_NS/Q' $source_file > ${source_file}.tmp
offset=$(wc -l ${source_file}.tmp | awk '{print $1;}')
- cat $source_file | grep MODULE_IMPORT_NS | sort -u >> ${source_file}.tmp
+ cat $source_file | grep MODULE_IMPORT_NS | LANG=C sort -u >> ${source_file}.tmp
tail -n +$((offset +1)) ${source_file} | grep -v MODULE_IMPORT_NS >> ${source_file}.tmp
if ! diff -q ${source_file} ${source_file}.tmp; then
mv ${source_file}.tmp ${source_file}
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index 8f0a278ce0af..74eab03e31d4 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -389,11 +389,8 @@ static int nop_mcount(Elf_Shdr const *const relhdr,
mcountsym = get_mcountsym(sym0, relp, str0);
if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) {
- if (make_nop) {
+ if (make_nop)
ret = make_nop((void *)ehdr, _w(shdr->sh_offset) + _w(relp->r_offset));
- if (ret < 0)
- return -1;
- }
if (warn_on_notrace_sect && !once) {
printf("Section %s has mcount callers being ignored\n",
txtname);
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 220dae0db3f1..a2998b118ef9 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -93,7 +93,7 @@ scm_version()
# Check for mercurial and a mercurial repo.
if test -d .hg && hgid=`hg id 2>/dev/null`; then
# Do we have an tagged version? If so, latesttagdistance == 1
- if [ "`hg log -r . --template '{latesttagdistance}'`" == "1" ]; then
+ if [ "`hg log -r . --template '{latesttagdistance}'`" = "1" ]; then
id=`hg log -r . --template '{latesttag}'`
printf '%s%s' -hg "$id"
else
diff --git a/scripts/tools-support-relr.sh b/scripts/tools-support-relr.sh
index 97a2c844a95e..45e8aa360b45 100755
--- a/scripts/tools-support-relr.sh
+++ b/scripts/tools-support-relr.sh
@@ -4,13 +4,13 @@
tmp_file=$(mktemp)
trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT
-cat << "END" | "$CC" -c -x c - -o $tmp_file.o >/dev/null 2>&1
+cat << "END" | $CC -c -x c - -o $tmp_file.o >/dev/null 2>&1
void *p = &p;
END
-"$LD" $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file
+$LD $tmp_file.o -shared -Bsymbolic --pack-dyn-relocs=relr -o $tmp_file
# Despite printing an error message, GNU nm still exits with exit code 0 if it
# sees a relr section. So we need to check that nothing is printed to stderr.
-test -z "$("$NM" $tmp_file 2>&1 >/dev/null)"
+test -z "$($NM $tmp_file 2>&1 >/dev/null)"
-"$OBJCOPY" -O binary $tmp_file $tmp_file.bin
+$OBJCOPY -O binary $tmp_file $tmp_file.bin
diff --git a/security/keys/Makefile b/security/keys/Makefile
index 9cef54064f60..074f27538f55 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -28,5 +28,5 @@ obj-$(CONFIG_ASYMMETRIC_KEY_TYPE) += keyctl_pkey.o
# Key types
#
obj-$(CONFIG_BIG_KEYS) += big_key.o
-obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
+obj-$(CONFIG_TRUSTED_KEYS) += trusted-keys/
obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/
diff --git a/security/keys/trusted-keys/Makefile b/security/keys/trusted-keys/Makefile
new file mode 100644
index 000000000000..7b73cebbb378
--- /dev/null
+++ b/security/keys/trusted-keys/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for trusted keys
+#
+
+obj-$(CONFIG_TRUSTED_KEYS) += trusted.o
+trusted-y += trusted_tpm1.o
+trusted-y += trusted_tpm2.o
diff --git a/security/keys/trusted.c b/security/keys/trusted-keys/trusted_tpm1.c
index 1fbd77816610..d2c5ec1e040b 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted-keys/trusted_tpm1.c
@@ -27,7 +27,7 @@
#include <linux/tpm.h>
#include <linux/tpm_command.h>
-#include <keys/trusted.h>
+#include <keys/trusted_tpm.h>
static const char hmac_alg[] = "hmac(sha1)";
static const char hash_alg[] = "sha1";
@@ -406,13 +406,10 @@ static int osap(struct tpm_buf *tb, struct osapsess *s,
if (ret != TPM_NONCE_SIZE)
return ret;
- INIT_BUF(tb);
- store16(tb, TPM_TAG_RQU_COMMAND);
- store32(tb, TPM_OSAP_SIZE);
- store32(tb, TPM_ORD_OSAP);
- store16(tb, type);
- store32(tb, handle);
- storebytes(tb, ononce, TPM_NONCE_SIZE);
+ tpm_buf_reset(tb, TPM_TAG_RQU_COMMAND, TPM_ORD_OSAP);
+ tpm_buf_append_u16(tb, type);
+ tpm_buf_append_u32(tb, handle);
+ tpm_buf_append(tb, ononce, TPM_NONCE_SIZE);
ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
if (ret < 0)
@@ -437,10 +434,7 @@ int oiap(struct tpm_buf *tb, uint32_t *handle, unsigned char *nonce)
if (!chip)
return -ENODEV;
- INIT_BUF(tb);
- store16(tb, TPM_TAG_RQU_COMMAND);
- store32(tb, TPM_OIAP_SIZE);
- store32(tb, TPM_ORD_OIAP);
+ tpm_buf_reset(tb, TPM_TAG_RQU_COMMAND, TPM_ORD_OIAP);
ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
if (ret < 0)
return ret;
@@ -535,20 +529,17 @@ static int tpm_seal(struct tpm_buf *tb, uint16_t keytype,
goto out;
/* build and send the TPM request packet */
- INIT_BUF(tb);
- store16(tb, TPM_TAG_RQU_AUTH1_COMMAND);
- store32(tb, TPM_SEAL_SIZE + pcrinfosize + datalen);
- store32(tb, TPM_ORD_SEAL);
- store32(tb, keyhandle);
- storebytes(tb, td->encauth, SHA1_DIGEST_SIZE);
- store32(tb, pcrinfosize);
- storebytes(tb, pcrinfo, pcrinfosize);
- store32(tb, datalen);
- storebytes(tb, data, datalen);
- store32(tb, sess.handle);
- storebytes(tb, td->nonceodd, TPM_NONCE_SIZE);
- store8(tb, cont);
- storebytes(tb, td->pubauth, SHA1_DIGEST_SIZE);
+ tpm_buf_reset(tb, TPM_TAG_RQU_AUTH1_COMMAND, TPM_ORD_SEAL);
+ tpm_buf_append_u32(tb, keyhandle);
+ tpm_buf_append(tb, td->encauth, SHA1_DIGEST_SIZE);
+ tpm_buf_append_u32(tb, pcrinfosize);
+ tpm_buf_append(tb, pcrinfo, pcrinfosize);
+ tpm_buf_append_u32(tb, datalen);
+ tpm_buf_append(tb, data, datalen);
+ tpm_buf_append_u32(tb, sess.handle);
+ tpm_buf_append(tb, td->nonceodd, TPM_NONCE_SIZE);
+ tpm_buf_append_u8(tb, cont);
+ tpm_buf_append(tb, td->pubauth, SHA1_DIGEST_SIZE);
ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
if (ret < 0)
@@ -594,7 +585,6 @@ static int tpm_unseal(struct tpm_buf *tb,
uint32_t authhandle2 = 0;
unsigned char cont = 0;
uint32_t ordinal;
- uint32_t keyhndl;
int ret;
/* sessions for unsealing key and data */
@@ -610,7 +600,6 @@ static int tpm_unseal(struct tpm_buf *tb,
}
ordinal = htonl(TPM_ORD_UNSEAL);
- keyhndl = htonl(SRKHANDLE);
ret = tpm_get_random(chip, nonceodd, TPM_NONCE_SIZE);
if (ret != TPM_NONCE_SIZE) {
pr_info("trusted_key: tpm_get_random failed (%d)\n", ret);
@@ -628,20 +617,17 @@ static int tpm_unseal(struct tpm_buf *tb,
return ret;
/* build and send TPM request packet */
- INIT_BUF(tb);
- store16(tb, TPM_TAG_RQU_AUTH2_COMMAND);
- store32(tb, TPM_UNSEAL_SIZE + bloblen);
- store32(tb, TPM_ORD_UNSEAL);
- store32(tb, keyhandle);
- storebytes(tb, blob, bloblen);
- store32(tb, authhandle1);
- storebytes(tb, nonceodd, TPM_NONCE_SIZE);
- store8(tb, cont);
- storebytes(tb, authdata1, SHA1_DIGEST_SIZE);
- store32(tb, authhandle2);
- storebytes(tb, nonceodd, TPM_NONCE_SIZE);
- store8(tb, cont);
- storebytes(tb, authdata2, SHA1_DIGEST_SIZE);
+ tpm_buf_reset(tb, TPM_TAG_RQU_AUTH2_COMMAND, TPM_ORD_UNSEAL);
+ tpm_buf_append_u32(tb, keyhandle);
+ tpm_buf_append(tb, blob, bloblen);
+ tpm_buf_append_u32(tb, authhandle1);
+ tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
+ tpm_buf_append_u8(tb, cont);
+ tpm_buf_append(tb, authdata1, SHA1_DIGEST_SIZE);
+ tpm_buf_append_u32(tb, authhandle2);
+ tpm_buf_append(tb, nonceodd, TPM_NONCE_SIZE);
+ tpm_buf_append_u8(tb, cont);
+ tpm_buf_append(tb, authdata2, SHA1_DIGEST_SIZE);
ret = trusted_tpm_send(tb->data, MAX_BUF_SIZE);
if (ret < 0) {
@@ -670,23 +656,23 @@ static int tpm_unseal(struct tpm_buf *tb,
static int key_seal(struct trusted_key_payload *p,
struct trusted_key_options *o)
{
- struct tpm_buf *tb;
+ struct tpm_buf tb;
int ret;
- tb = kzalloc(sizeof *tb, GFP_KERNEL);
- if (!tb)
- return -ENOMEM;
+ ret = tpm_buf_init(&tb, 0, 0);
+ if (ret)
+ return ret;
/* include migratable flag at end of sealed key */
p->key[p->key_len] = p->migratable;
- ret = tpm_seal(tb, o->keytype, o->keyhandle, o->keyauth,
+ ret = tpm_seal(&tb, o->keytype, o->keyhandle, o->keyauth,
p->key, p->key_len + 1, p->blob, &p->blob_len,
o->blobauth, o->pcrinfo, o->pcrinfo_len);
if (ret < 0)
pr_info("trusted_key: srkseal failed (%d)\n", ret);
- kzfree(tb);
+ tpm_buf_destroy(&tb);
return ret;
}
@@ -696,14 +682,14 @@ static int key_seal(struct trusted_key_payload *p,
static int key_unseal(struct trusted_key_payload *p,
struct trusted_key_options *o)
{
- struct tpm_buf *tb;
+ struct tpm_buf tb;
int ret;
- tb = kzalloc(sizeof *tb, GFP_KERNEL);
- if (!tb)
- return -ENOMEM;
+ ret = tpm_buf_init(&tb, 0, 0);
+ if (ret)
+ return ret;
- ret = tpm_unseal(tb, o->keyhandle, o->keyauth, p->blob, p->blob_len,
+ ret = tpm_unseal(&tb, o->keyhandle, o->keyauth, p->blob, p->blob_len,
o->blobauth, p->key, &p->key_len);
if (ret < 0)
pr_info("trusted_key: srkunseal failed (%d)\n", ret);
@@ -711,7 +697,7 @@ static int key_unseal(struct trusted_key_payload *p,
/* pull migratable flag out of sealed key */
p->migratable = p->key[--p->key_len];
- kzfree(tb);
+ tpm_buf_destroy(&tb);
return ret;
}
@@ -1016,7 +1002,7 @@ static int trusted_instantiate(struct key *key,
switch (key_cmd) {
case Opt_load:
if (tpm2)
- ret = tpm_unseal_trusted(chip, payload, options);
+ ret = tpm2_unseal_trusted(chip, payload, options);
else
ret = key_unseal(payload, options);
dump_payload(payload);
@@ -1032,7 +1018,7 @@ static int trusted_instantiate(struct key *key,
goto out;
}
if (tpm2)
- ret = tpm_seal_trusted(chip, payload, options);
+ ret = tpm2_seal_trusted(chip, payload, options);
else
ret = key_seal(payload, options);
if (ret < 0)
diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
new file mode 100644
index 000000000000..a9810ac2776f
--- /dev/null
+++ b/security/keys/trusted-keys/trusted_tpm2.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2004 IBM Corporation
+ * Copyright (C) 2014 Intel Corporation
+ */
+
+#include <linux/string.h>
+#include <linux/err.h>
+#include <linux/tpm.h>
+#include <linux/tpm_command.h>
+
+#include <keys/trusted-type.h>
+#include <keys/trusted_tpm.h>
+
+static struct tpm2_hash tpm2_hash_map[] = {
+ {HASH_ALGO_SHA1, TPM_ALG_SHA1},
+ {HASH_ALGO_SHA256, TPM_ALG_SHA256},
+ {HASH_ALGO_SHA384, TPM_ALG_SHA384},
+ {HASH_ALGO_SHA512, TPM_ALG_SHA512},
+ {HASH_ALGO_SM3_256, TPM_ALG_SM3_256},
+};
+
+/**
+ * tpm_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
+ *
+ * @buf: an allocated tpm_buf instance
+ * @session_handle: session handle
+ * @nonce: the session nonce, may be NULL if not used
+ * @nonce_len: the session nonce length, may be 0 if not used
+ * @attributes: the session attributes
+ * @hmac: the session HMAC or password, may be NULL if not used
+ * @hmac_len: the session HMAC or password length, maybe 0 if not used
+ */
+static void tpm2_buf_append_auth(struct tpm_buf *buf, u32 session_handle,
+ const u8 *nonce, u16 nonce_len,
+ u8 attributes,
+ const u8 *hmac, u16 hmac_len)
+{
+ tpm_buf_append_u32(buf, 9 + nonce_len + hmac_len);
+ tpm_buf_append_u32(buf, session_handle);
+ tpm_buf_append_u16(buf, nonce_len);
+
+ if (nonce && nonce_len)
+ tpm_buf_append(buf, nonce, nonce_len);
+
+ tpm_buf_append_u8(buf, attributes);
+ tpm_buf_append_u16(buf, hmac_len);
+
+ if (hmac && hmac_len)
+ tpm_buf_append(buf, hmac, hmac_len);
+}
+
+/**
+ * tpm2_seal_trusted() - seal the payload of a trusted key
+ *
+ * @chip: TPM chip to use
+ * @payload: the key data in clear and encrypted form
+ * @options: authentication values and other options
+ *
+ * Return: < 0 on error and 0 on success.
+ */
+int tpm2_seal_trusted(struct tpm_chip *chip,
+ struct trusted_key_payload *payload,
+ struct trusted_key_options *options)
+{
+ unsigned int blob_len;
+ struct tpm_buf buf;
+ u32 hash;
+ int i;
+ int rc;
+
+ for (i = 0; i < ARRAY_SIZE(tpm2_hash_map); i++) {
+ if (options->hash == tpm2_hash_map[i].crypto_id) {
+ hash = tpm2_hash_map[i].tpm_id;
+ break;
+ }
+ }
+
+ if (i == ARRAY_SIZE(tpm2_hash_map))
+ return -EINVAL;
+
+ rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_CREATE);
+ if (rc)
+ return rc;
+
+ tpm_buf_append_u32(&buf, options->keyhandle);
+ tpm2_buf_append_auth(&buf, TPM2_RS_PW,
+ NULL /* nonce */, 0,
+ 0 /* session_attributes */,
+ options->keyauth /* hmac */,
+ TPM_DIGEST_SIZE);
+
+ /* sensitive */
+ tpm_buf_append_u16(&buf, 4 + TPM_DIGEST_SIZE + payload->key_len + 1);
+
+ tpm_buf_append_u16(&buf, TPM_DIGEST_SIZE);
+ tpm_buf_append(&buf, options->blobauth, TPM_DIGEST_SIZE);
+ tpm_buf_append_u16(&buf, payload->key_len + 1);
+ tpm_buf_append(&buf, payload->key, payload->key_len);
+ tpm_buf_append_u8(&buf, payload->migratable);
+
+ /* public */
+ tpm_buf_append_u16(&buf, 14 + options->policydigest_len);
+ tpm_buf_append_u16(&buf, TPM_ALG_KEYEDHASH);
+ tpm_buf_append_u16(&buf, hash);
+
+ /* policy */
+ if (options->policydigest_len) {
+ tpm_buf_append_u32(&buf, 0);
+ tpm_buf_append_u16(&buf, options->policydigest_len);
+ tpm_buf_append(&buf, options->policydigest,
+ options->policydigest_len);
+ } else {
+ tpm_buf_append_u32(&buf, TPM2_OA_USER_WITH_AUTH);
+ tpm_buf_append_u16(&buf, 0);
+ }
+
+ /* public parameters */
+ tpm_buf_append_u16(&buf, TPM_ALG_NULL);
+ tpm_buf_append_u16(&buf, 0);
+
+ /* outside info */
+ tpm_buf_append_u16(&buf, 0);
+
+ /* creation PCR */
+ tpm_buf_append_u32(&buf, 0);
+
+ if (buf.flags & TPM_BUF_OVERFLOW) {
+ rc = -E2BIG;
+ goto out;
+ }
+
+ rc = tpm_send(chip, buf.data, tpm_buf_length(&buf));
+ if (rc)
+ goto out;
+
+ blob_len = be32_to_cpup((__be32 *) &buf.data[TPM_HEADER_SIZE]);
+ if (blob_len > MAX_BLOB_SIZE) {
+ rc = -E2BIG;
+ goto out;
+ }
+ if (tpm_buf_length(&buf) < TPM_HEADER_SIZE + 4 + blob_len) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ memcpy(payload->blob, &buf.data[TPM_HEADER_SIZE + 4], blob_len);
+ payload->blob_len = blob_len;
+
+out:
+ tpm_buf_destroy(&buf);
+
+ if (rc > 0) {
+ if (tpm2_rc_value(rc) == TPM2_RC_HASH)
+ rc = -EINVAL;
+ else
+ rc = -EPERM;
+ }
+
+ return rc;
+}
+
+/**
+ * tpm2_load_cmd() - execute a TPM2_Load command
+ *
+ * @chip: TPM chip to use
+ * @payload: the key data in clear and encrypted form
+ * @options: authentication values and other options
+ * @blob_handle: returned blob handle
+ *
+ * Return: 0 on success.
+ * -E2BIG on wrong payload size.
+ * -EPERM on tpm error status.
+ * < 0 error from tpm_send.
+ */
+static int tpm2_load_cmd(struct tpm_chip *chip,
+ struct trusted_key_payload *payload,
+ struct trusted_key_options *options,
+ u32 *blob_handle)
+{
+ struct tpm_buf buf;
+ unsigned int private_len;
+ unsigned int public_len;
+ unsigned int blob_len;
+ int rc;
+
+ private_len = be16_to_cpup((__be16 *) &payload->blob[0]);
+ if (private_len > (payload->blob_len - 2))
+ return -E2BIG;
+
+ public_len = be16_to_cpup((__be16 *) &payload->blob[2 + private_len]);
+ blob_len = private_len + public_len + 4;
+ if (blob_len > payload->blob_len)
+ return -E2BIG;
+
+ rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_LOAD);
+ if (rc)
+ return rc;
+
+ tpm_buf_append_u32(&buf, options->keyhandle);
+ tpm2_buf_append_auth(&buf, TPM2_RS_PW,
+ NULL /* nonce */, 0,
+ 0 /* session_attributes */,
+ options->keyauth /* hmac */,
+ TPM_DIGEST_SIZE);
+
+ tpm_buf_append(&buf, payload->blob, blob_len);
+
+ if (buf.flags & TPM_BUF_OVERFLOW) {
+ rc = -E2BIG;
+ goto out;
+ }
+
+ rc = tpm_send(chip, buf.data, tpm_buf_length(&buf));
+ if (!rc)
+ *blob_handle = be32_to_cpup(
+ (__be32 *) &buf.data[TPM_HEADER_SIZE]);
+
+out:
+ tpm_buf_destroy(&buf);
+
+ if (rc > 0)
+ rc = -EPERM;
+
+ return rc;
+}
+
+/**
+ * tpm2_unseal_cmd() - execute a TPM2_Unload command
+ *
+ * @chip: TPM chip to use
+ * @payload: the key data in clear and encrypted form
+ * @options: authentication values and other options
+ * @blob_handle: blob handle
+ *
+ * Return: 0 on success
+ * -EPERM on tpm error status
+ * < 0 error from tpm_send
+ */
+static int tpm2_unseal_cmd(struct tpm_chip *chip,
+ struct trusted_key_payload *payload,
+ struct trusted_key_options *options,
+ u32 blob_handle)
+{
+ struct tpm_buf buf;
+ u16 data_len;
+ u8 *data;
+ int rc;
+
+ rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_UNSEAL);
+ if (rc)
+ return rc;
+
+ tpm_buf_append_u32(&buf, blob_handle);
+ tpm2_buf_append_auth(&buf,
+ options->policyhandle ?
+ options->policyhandle : TPM2_RS_PW,
+ NULL /* nonce */, 0,
+ TPM2_SA_CONTINUE_SESSION,
+ options->blobauth /* hmac */,
+ TPM_DIGEST_SIZE);
+
+ rc = tpm_send(chip, buf.data, tpm_buf_length(&buf));
+ if (rc > 0)
+ rc = -EPERM;
+
+ if (!rc) {
+ data_len = be16_to_cpup(
+ (__be16 *) &buf.data[TPM_HEADER_SIZE + 4]);
+ if (data_len < MIN_KEY_SIZE || data_len > MAX_KEY_SIZE + 1) {
+ rc = -EFAULT;
+ goto out;
+ }
+
+ if (tpm_buf_length(&buf) < TPM_HEADER_SIZE + 6 + data_len) {
+ rc = -EFAULT;
+ goto out;
+ }
+ data = &buf.data[TPM_HEADER_SIZE + 6];
+
+ memcpy(payload->key, data, data_len - 1);
+ payload->key_len = data_len - 1;
+ payload->migratable = data[data_len - 1];
+ }
+
+out:
+ tpm_buf_destroy(&buf);
+ return rc;
+}
+
+/**
+ * tpm2_unseal_trusted() - unseal the payload of a trusted key
+ *
+ * @chip: TPM chip to use
+ * @payload: the key data in clear and encrypted form
+ * @options: authentication values and other options
+ *
+ * Return: Same as with tpm_send.
+ */
+int tpm2_unseal_trusted(struct tpm_chip *chip,
+ struct trusted_key_payload *payload,
+ struct trusted_key_options *options)
+{
+ u32 blob_handle;
+ int rc;
+
+ rc = tpm2_load_cmd(chip, payload, options, &blob_handle);
+ if (rc)
+ return rc;
+
+ rc = tpm2_unseal_cmd(chip, payload, options, blob_handle);
+
+ return rc;
+}
diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c
index 8a10b43daf74..40b790536def 100644
--- a/security/lockdown/lockdown.c
+++ b/security/lockdown/lockdown.c
@@ -20,6 +20,7 @@ static const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
[LOCKDOWN_NONE] = "none",
[LOCKDOWN_MODULE_SIGNATURE] = "unsigned module loading",
[LOCKDOWN_DEV_MEM] = "/dev/mem,kmem,port",
+ [LOCKDOWN_EFI_TEST] = "/dev/efi_test access",
[LOCKDOWN_KEXEC] = "kexec of unsigned images",
[LOCKDOWN_HIBERNATION] = "hibernation",
[LOCKDOWN_PCI_ACCESS] = "direct PCI access",
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 3a29e7c24ba9..a5813c7629c1 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -1946,7 +1946,14 @@ static int convert_context(struct context *oldc, struct context *newc, void *p)
rc = string_to_context_struct(args->newp, NULL, s,
newc, SECSID_NULL);
if (rc == -EINVAL) {
- /* Retain string representation for later mapping. */
+ /*
+ * Retain string representation for later mapping.
+ *
+ * IMPORTANT: We need to copy the contents of oldc->str
+ * back into s again because string_to_context_struct()
+ * may have garbled it.
+ */
+ memcpy(s, oldc->str, oldc->len);
context_init(newc);
newc->str = s;
newc->len = oldc->len;
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 41905afada63..f34ce564d92c 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -528,7 +528,7 @@ static int snd_compress_check_input(struct snd_compr_params *params)
{
/* first let's check the buffer parameter's */
if (params->buffer.fragment_size == 0 ||
- params->buffer.fragments > INT_MAX / params->buffer.fragment_size ||
+ params->buffer.fragments > U32_MAX / params->buffer.fragment_size ||
params->buffer.fragments == 0)
return -EINVAL;
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index d80041ea4e01..2236b5e0c1f2 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -1782,11 +1782,14 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream)
struct snd_pcm_runtime *runtime;
unsigned long flags;
- if (PCM_RUNTIME_CHECK(substream))
+ if (snd_BUG_ON(!substream))
return;
- runtime = substream->runtime;
snd_pcm_stream_lock_irqsave(substream, flags);
+ if (PCM_RUNTIME_CHECK(substream))
+ goto _unlock;
+ runtime = substream->runtime;
+
if (!snd_pcm_running(substream) ||
snd_pcm_update_hw_ptr0(substream, 1) < 0)
goto _end;
@@ -1797,6 +1800,7 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream)
#endif
_end:
kill_fasync(&runtime->fasync, SIGIO, POLL_IN);
+ _unlock:
snd_pcm_stream_unlock_irqrestore(substream, flags);
}
EXPORT_SYMBOL(snd_pcm_period_elapsed);
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 5c9fbf3f4340..59ae21b0bb93 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -226,7 +226,8 @@ static int snd_timer_check_master(struct snd_timer_instance *master)
return 0;
}
-static int snd_timer_close_locked(struct snd_timer_instance *timeri);
+static int snd_timer_close_locked(struct snd_timer_instance *timeri,
+ struct device **card_devp_to_put);
/*
* open a timer instance
@@ -238,6 +239,7 @@ int snd_timer_open(struct snd_timer_instance **ti,
{
struct snd_timer *timer;
struct snd_timer_instance *timeri = NULL;
+ struct device *card_dev_to_put = NULL;
int err;
mutex_lock(&register_mutex);
@@ -261,7 +263,7 @@ int snd_timer_open(struct snd_timer_instance **ti,
list_add_tail(&timeri->open_list, &snd_timer_slave_list);
err = snd_timer_check_slave(timeri);
if (err < 0) {
- snd_timer_close_locked(timeri);
+ snd_timer_close_locked(timeri, &card_dev_to_put);
timeri = NULL;
}
goto unlock;
@@ -282,11 +284,11 @@ int snd_timer_open(struct snd_timer_instance **ti,
goto unlock;
}
if (!list_empty(&timer->open_list_head)) {
- timeri = list_entry(timer->open_list_head.next,
+ struct snd_timer_instance *t =
+ list_entry(timer->open_list_head.next,
struct snd_timer_instance, open_list);
- if (timeri->flags & SNDRV_TIMER_IFLG_EXCLUSIVE) {
+ if (t->flags & SNDRV_TIMER_IFLG_EXCLUSIVE) {
err = -EBUSY;
- timeri = NULL;
goto unlock;
}
}
@@ -313,7 +315,7 @@ int snd_timer_open(struct snd_timer_instance **ti,
timeri = NULL;
if (timer->card)
- put_device(&timer->card->card_dev);
+ card_dev_to_put = &timer->card->card_dev;
module_put(timer->module);
goto unlock;
}
@@ -323,12 +325,15 @@ int snd_timer_open(struct snd_timer_instance **ti,
timer->num_instances++;
err = snd_timer_check_master(timeri);
if (err < 0) {
- snd_timer_close_locked(timeri);
+ snd_timer_close_locked(timeri, &card_dev_to_put);
timeri = NULL;
}
unlock:
mutex_unlock(&register_mutex);
+ /* put_device() is called after unlock for avoiding deadlock */
+ if (card_dev_to_put)
+ put_device(card_dev_to_put);
*ti = timeri;
return err;
}
@@ -338,7 +343,8 @@ EXPORT_SYMBOL(snd_timer_open);
* close a timer instance
* call this with register_mutex down.
*/
-static int snd_timer_close_locked(struct snd_timer_instance *timeri)
+static int snd_timer_close_locked(struct snd_timer_instance *timeri,
+ struct device **card_devp_to_put)
{
struct snd_timer *timer = timeri->timer;
struct snd_timer_instance *slave, *tmp;
@@ -395,7 +401,7 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri)
timer->hw.close(timer);
/* release a card refcount for safe disconnection */
if (timer->card)
- put_device(&timer->card->card_dev);
+ *card_devp_to_put = &timer->card->card_dev;
module_put(timer->module);
}
@@ -407,14 +413,18 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri)
*/
int snd_timer_close(struct snd_timer_instance *timeri)
{
+ struct device *card_dev_to_put = NULL;
int err;
if (snd_BUG_ON(!timeri))
return -ENXIO;
mutex_lock(&register_mutex);
- err = snd_timer_close_locked(timeri);
+ err = snd_timer_close_locked(timeri, &card_dev_to_put);
mutex_unlock(&register_mutex);
+ /* put_device() is called after unlock for avoiding deadlock */
+ if (card_dev_to_put)
+ put_device(card_dev_to_put);
return err;
}
EXPORT_SYMBOL(snd_timer_close);
diff --git a/sound/firewire/bebob/bebob_focusrite.c b/sound/firewire/bebob/bebob_focusrite.c
index 32b864bee25f..06d6a37cd853 100644
--- a/sound/firewire/bebob/bebob_focusrite.c
+++ b/sound/firewire/bebob/bebob_focusrite.c
@@ -27,6 +27,8 @@
#define SAFFIRE_CLOCK_SOURCE_SPDIF 1
/* clock sources as returned from register of Saffire Pro 10 and 26 */
+#define SAFFIREPRO_CLOCK_SOURCE_SELECT_MASK 0x000000ff
+#define SAFFIREPRO_CLOCK_SOURCE_DETECT_MASK 0x0000ff00
#define SAFFIREPRO_CLOCK_SOURCE_INTERNAL 0
#define SAFFIREPRO_CLOCK_SOURCE_SKIP 1 /* never used on hardware */
#define SAFFIREPRO_CLOCK_SOURCE_SPDIF 2
@@ -189,6 +191,7 @@ saffirepro_both_clk_src_get(struct snd_bebob *bebob, unsigned int *id)
map = saffirepro_clk_maps[1];
/* In a case that this driver cannot handle the value of register. */
+ value &= SAFFIREPRO_CLOCK_SOURCE_SELECT_MASK;
if (value >= SAFFIREPRO_CLOCK_SOURCE_COUNT || map[value] < 0) {
err = -EIO;
goto end;
diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c
index 73fee991bd75..6c1497d9f52b 100644
--- a/sound/firewire/bebob/bebob_stream.c
+++ b/sound/firewire/bebob/bebob_stream.c
@@ -252,8 +252,7 @@ end:
return err;
}
-static unsigned int
-map_data_channels(struct snd_bebob *bebob, struct amdtp_stream *s)
+static int map_data_channels(struct snd_bebob *bebob, struct amdtp_stream *s)
{
unsigned int sec, sections, ch, channels;
unsigned int pcm, midi, location;
diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c
index 211ca85acd8c..cfab60d88c92 100644
--- a/sound/hda/ext/hdac_ext_controller.c
+++ b/sound/hda/ext/hdac_ext_controller.c
@@ -271,6 +271,11 @@ int snd_hdac_ext_bus_link_get(struct hdac_bus *bus,
ret = snd_hdac_ext_bus_link_power_up(link);
/*
+ * clear the register to invalidate all the output streams
+ */
+ snd_hdac_updatew(link->ml_addr, AZX_REG_ML_LOSIDV,
+ ML_LOSIDV_STREAM_MASK, 0);
+ /*
* wait for 521usec for codec to report status
* HDA spec section 4.3 - Codec Discovery
*/
diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index d3999e7b0705..7e7be8e4dcf9 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c
@@ -447,8 +447,6 @@ static void azx_int_disable(struct hdac_bus *bus)
list_for_each_entry(azx_dev, &bus->stream_list, list)
snd_hdac_stream_updateb(azx_dev, SD_CTL, SD_INT_MASK, 0);
- synchronize_irq(bus->irq);
-
/* disable SIE for all streams */
snd_hdac_chip_writeb(bus, INTCTL, 0);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 240f4ca76391..c52419376c74 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -1348,9 +1348,9 @@ static int azx_free(struct azx *chip)
}
if (bus->chip_init) {
- azx_stop_chip(chip);
azx_clear_irq_pending(chip);
azx_stop_all_streams(chip);
+ azx_stop_chip(chip);
}
if (bus->irq >= 0)
@@ -2396,9 +2396,18 @@ static const struct pci_device_id azx_ids[] = {
/* CometLake-H */
{ PCI_DEVICE(0x8086, 0x06C8),
.driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+ /* CometLake-S */
+ { PCI_DEVICE(0x8086, 0xa3f0),
+ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
/* Icelake */
{ PCI_DEVICE(0x8086, 0x34c8),
.driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+ /* Jasperlake */
+ { PCI_DEVICE(0x8086, 0x38c8),
+ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+ /* Tigerlake */
+ { PCI_DEVICE(0x8086, 0xa0c8),
+ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
/* Elkhart Lake */
{ PCI_DEVICE(0x8086, 0x4b55),
.driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 6d1fb7c11f17..b7a1abb3e231 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -7604,7 +7604,7 @@ static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb)
/* Delay enabling the HP amp, to let the mic-detection
* state machine run.
*/
- cancel_delayed_work_sync(&spec->unsol_hp_work);
+ cancel_delayed_work(&spec->unsol_hp_work);
schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500));
tbl = snd_hda_jack_tbl_get(codec, cb->nid);
if (tbl)
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index bca5de78e9ad..78bd2e3722c7 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -46,10 +46,12 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
((codec)->core.vendor_id == 0x80862800))
#define is_cannonlake(codec) ((codec)->core.vendor_id == 0x8086280c)
#define is_icelake(codec) ((codec)->core.vendor_id == 0x8086280f)
+#define is_tigerlake(codec) ((codec)->core.vendor_id == 0x80862812)
#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
|| is_skylake(codec) || is_broxton(codec) \
|| is_kabylake(codec) || is_geminilake(codec) \
- || is_cannonlake(codec) || is_icelake(codec))
+ || is_cannonlake(codec) || is_icelake(codec) \
+ || is_tigerlake(codec))
#define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
#define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)
#define is_valleyview_plus(codec) (is_valleyview(codec) || is_cherryview(codec))
@@ -145,6 +147,7 @@ struct hdmi_spec {
struct snd_array pins; /* struct hdmi_spec_per_pin */
struct hdmi_pcm pcm_rec[16];
struct mutex pcm_lock;
+ struct mutex bind_lock; /* for audio component binding */
/* pcm_bitmap means which pcms have been assigned to pins*/
unsigned long pcm_bitmap;
int pcm_used; /* counter of pcm_rec[] */
@@ -2258,7 +2261,7 @@ static int generic_hdmi_init(struct hda_codec *codec)
struct hdmi_spec *spec = codec->spec;
int pin_idx;
- mutex_lock(&spec->pcm_lock);
+ mutex_lock(&spec->bind_lock);
spec->use_jack_detect = !codec->jackpoll_interval;
for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) {
struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx);
@@ -2275,7 +2278,7 @@ static int generic_hdmi_init(struct hda_codec *codec)
snd_hda_jack_detect_enable_callback(codec, pin_nid,
jack_callback);
}
- mutex_unlock(&spec->pcm_lock);
+ mutex_unlock(&spec->bind_lock);
return 0;
}
@@ -2382,6 +2385,7 @@ static int alloc_generic_hdmi(struct hda_codec *codec)
spec->ops = generic_standard_hdmi_ops;
spec->dev_num = 1; /* initialize to 1 */
mutex_init(&spec->pcm_lock);
+ mutex_init(&spec->bind_lock);
snd_hdac_register_chmap_ops(&codec->core, &spec->chmap);
spec->chmap.ops.get_chmap = hdmi_get_chmap;
@@ -2451,7 +2455,7 @@ static void generic_acomp_notifier_set(struct drm_audio_component *acomp,
int i;
spec = container_of(acomp->audio_ops, struct hdmi_spec, drm_audio_ops);
- mutex_lock(&spec->pcm_lock);
+ mutex_lock(&spec->bind_lock);
spec->use_acomp_notifier = use_acomp;
spec->codec->relaxed_resume = use_acomp;
/* reprogram each jack detection logic depending on the notifier */
@@ -2461,7 +2465,7 @@ static void generic_acomp_notifier_set(struct drm_audio_component *acomp,
get_pin(spec, i)->pin_nid,
use_acomp);
}
- mutex_unlock(&spec->pcm_lock);
+ mutex_unlock(&spec->bind_lock);
}
/* enable / disable the notifier via master bind / unbind */
@@ -2849,6 +2853,18 @@ static int patch_i915_icl_hdmi(struct hda_codec *codec)
return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map));
}
+static int patch_i915_tgl_hdmi(struct hda_codec *codec)
+{
+ /*
+ * pin to port mapping table where the value indicate the pin number and
+ * the index indicate the port number with 1 base.
+ */
+ static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf};
+
+ return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map));
+}
+
+
/* Intel Baytrail and Braswell; with eld notifier */
static int patch_i915_byt_hdmi(struct hda_codec *codec)
{
@@ -3474,6 +3490,8 @@ static int patch_nvhdmi(struct hda_codec *codec)
nvhdmi_chmap_cea_alloc_validate_get_type;
spec->chmap.ops.chmap_validate = nvhdmi_chmap_validate;
+ codec->link_down_at_suspend = 1;
+
generic_acomp_init(codec, &nvhdmi_audio_ops, nvhdmi_port2pin);
return 0;
@@ -4149,6 +4167,7 @@ HDA_CODEC_ENTRY(0x8086280b, "Kabylake HDMI", patch_i915_hsw_hdmi),
HDA_CODEC_ENTRY(0x8086280c, "Cannonlake HDMI", patch_i915_glk_hdmi),
HDA_CODEC_ENTRY(0x8086280d, "Geminilake HDMI", patch_i915_glk_hdmi),
HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi),
+HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi),
HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi),
HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b000b36ac3c6..80f66ba85f87 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -393,6 +393,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
case 0x10ec0700:
case 0x10ec0701:
case 0x10ec0703:
+ case 0x10ec0711:
alc_update_coef_idx(codec, 0x10, 1<<15, 0);
break;
case 0x10ec0662:
@@ -408,6 +409,9 @@ static void alc_fill_eapd_coef(struct hda_codec *codec)
case 0x10ec0672:
alc_update_coef_idx(codec, 0xd, 0, 1<<14); /* EAPD Ctrl */
break;
+ case 0x10ec0623:
+ alc_update_coef_idx(codec, 0x19, 1<<13, 0);
+ break;
case 0x10ec0668:
alc_update_coef_idx(codec, 0x7, 3<<13, 0);
break;
@@ -2919,6 +2923,7 @@ enum {
ALC269_TYPE_ALC225,
ALC269_TYPE_ALC294,
ALC269_TYPE_ALC300,
+ ALC269_TYPE_ALC623,
ALC269_TYPE_ALC700,
};
@@ -2954,6 +2959,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec)
case ALC269_TYPE_ALC225:
case ALC269_TYPE_ALC294:
case ALC269_TYPE_ALC300:
+ case ALC269_TYPE_ALC623:
case ALC269_TYPE_ALC700:
ssids = alc269_ssids;
break;
@@ -5358,6 +5364,17 @@ static void alc271_hp_gate_mic_jack(struct hda_codec *codec,
}
}
+static void alc256_fixup_dell_xps_13_headphone_noise2(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ if (action != HDA_FIXUP_ACT_PRE_PROBE)
+ return;
+
+ snd_hda_codec_amp_stereo(codec, 0x1a, HDA_INPUT, 0, HDA_AMP_VOLMASK, 1);
+ snd_hda_override_wcaps(codec, 0x1a, get_wcaps(codec, 0x1a) & ~AC_WCAP_IN_AMP);
+}
+
static void alc269_fixup_limit_int_mic_boost(struct hda_codec *codec,
const struct hda_fixup *fix,
int action)
@@ -5822,6 +5839,7 @@ enum {
ALC298_FIXUP_DELL_AIO_MIC_NO_PRESENCE,
ALC275_FIXUP_DELL_XPS,
ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
+ ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2,
ALC293_FIXUP_LENOVO_SPK_NOISE,
ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
ALC255_FIXUP_DELL_SPK_NOISE,
@@ -5869,6 +5887,7 @@ enum {
ALC225_FIXUP_WYSE_AUTO_MUTE,
ALC225_FIXUP_WYSE_DISABLE_MIC_VREF,
ALC286_FIXUP_ACER_AIO_HEADSET_MIC,
+ ALC256_FIXUP_ASUS_HEADSET_MIC,
ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
ALC299_FIXUP_PREDATOR_SPK,
ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC,
@@ -6558,6 +6577,12 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
},
+ [ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc256_fixup_dell_xps_13_headphone_noise2,
+ .chained = true,
+ .chain_id = ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE
+ },
[ALC293_FIXUP_LENOVO_SPK_NOISE] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc_fixup_disable_aamix,
@@ -6912,6 +6937,15 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE
},
+ [ALC256_FIXUP_ASUS_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x03a11020 }, /* headset mic with jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
+ },
[ALC256_FIXUP_ASUS_MIC_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
@@ -7001,17 +7035,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x06de, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
- SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2),
SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
SND_PCI_QUIRK(0x1028, 0x0738, "Dell Precision 5820", ALC269_FIXUP_NO_SHUTUP),
- SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2),
SND_PCI_QUIRK(0x1028, 0x075c, "Dell XPS 27 7760", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3),
SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
SND_PCI_QUIRK(0x1028, 0x080c, "Dell WYSE", ALC225_FIXUP_DELL_WYSE_MIC_NO_PRESENCE),
- SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x082a, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE2),
SND_PCI_QUIRK(0x1028, 0x084b, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
SND_PCI_QUIRK(0x1028, 0x084e, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
SND_PCI_QUIRK(0x1028, 0x0871, "Dell Precision 3630", ALC255_FIXUP_DELL_HEADSET_MIC),
@@ -7108,6 +7142,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
@@ -7186,6 +7221,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC),
SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI),
@@ -7987,9 +8024,13 @@ static int patch_alc269(struct hda_codec *codec)
spec->codec_variant = ALC269_TYPE_ALC300;
spec->gen.mixer_nid = 0; /* no loopback on ALC300 */
break;
+ case 0x10ec0623:
+ spec->codec_variant = ALC269_TYPE_ALC623;
+ break;
case 0x10ec0700:
case 0x10ec0701:
case 0x10ec0703:
+ case 0x10ec0711:
spec->codec_variant = ALC269_TYPE_ALC700;
spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */
alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */
@@ -9187,6 +9228,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269),
HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269),
HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269),
+ HDA_CODEC_ENTRY(0x10ec0623, "ALC623", patch_alc269),
HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861),
HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd),
HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861),
@@ -9204,6 +9246,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = {
HDA_CODEC_ENTRY(0x10ec0700, "ALC700", patch_alc269),
HDA_CODEC_ENTRY(0x10ec0701, "ALC701", patch_alc269),
HDA_CODEC_ENTRY(0x10ec0703, "ALC703", patch_alc269),
+ HDA_CODEC_ENTRY(0x10ec0711, "ALC711", patch_alc269),
HDA_CODEC_ENTRY(0x10ec0867, "ALC891", patch_alc662),
HDA_CODEC_ENTRY(0x10ec0880, "ALC880", patch_alc880),
HDA_CODEC_ENTRY(0x10ec0882, "ALC882", patch_alc882),
diff --git a/sound/soc/codecs/hdac_hda.c b/sound/soc/codecs/hdac_hda.c
index 91242b6f8ea7..4570f662fb48 100644
--- a/sound/soc/codecs/hdac_hda.c
+++ b/sound/soc/codecs/hdac_hda.c
@@ -410,8 +410,8 @@ static void hdac_hda_codec_remove(struct snd_soc_component *component)
return;
}
- snd_hdac_ext_bus_link_put(hdev->bus, hlink);
pm_runtime_disable(&hdev->dev);
+ snd_hdac_ext_bus_link_put(hdev->bus, hlink);
}
static const struct snd_soc_dapm_route hdac_hda_dapm_routes[] = {
diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index b5fd8f08726e..f8b5b960e597 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -274,7 +274,7 @@ struct hdmi_codec_priv {
uint8_t eld[MAX_ELD_BYTES];
struct snd_pcm_chmap *chmap_info;
unsigned int chmap_idx;
- struct mutex lock;
+ unsigned long busy;
struct snd_soc_jack *jack;
unsigned int jack_status;
};
@@ -390,8 +390,8 @@ static int hdmi_codec_startup(struct snd_pcm_substream *substream,
struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
int ret = 0;
- ret = mutex_trylock(&hcp->lock);
- if (!ret) {
+ ret = test_and_set_bit(0, &hcp->busy);
+ if (ret) {
dev_err(dai->dev, "Only one simultaneous stream supported!\n");
return -EINVAL;
}
@@ -419,7 +419,7 @@ static int hdmi_codec_startup(struct snd_pcm_substream *substream,
err:
/* Release the exclusive lock on error */
- mutex_unlock(&hcp->lock);
+ clear_bit(0, &hcp->busy);
return ret;
}
@@ -431,7 +431,7 @@ static void hdmi_codec_shutdown(struct snd_pcm_substream *substream,
hcp->chmap_idx = HDMI_CODEC_CHMAP_IDX_UNKNOWN;
hcp->hcd.ops->audio_shutdown(dai->dev->parent, hcp->hcd.data);
- mutex_unlock(&hcp->lock);
+ clear_bit(0, &hcp->busy);
}
static int hdmi_codec_hw_params(struct snd_pcm_substream *substream,
@@ -811,8 +811,6 @@ static int hdmi_codec_probe(struct platform_device *pdev)
return -ENOMEM;
hcp->hcd = *hcd;
- mutex_init(&hcp->lock);
-
daidrv = devm_kcalloc(dev, dai_count, sizeof(*daidrv), GFP_KERNEL);
if (!daidrv)
return -ENOMEM;
diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c
index e609abcf3220..cae1def8902d 100644
--- a/sound/soc/codecs/max98373.c
+++ b/sound/soc/codecs/max98373.c
@@ -901,16 +901,20 @@ static void max98373_slot_config(struct i2c_client *i2c,
max98373->i_slot = value & 0xF;
else
max98373->i_slot = 1;
-
- max98373->reset_gpio = of_get_named_gpio(dev->of_node,
+ if (dev->of_node) {
+ max98373->reset_gpio = of_get_named_gpio(dev->of_node,
"maxim,reset-gpio", 0);
- if (!gpio_is_valid(max98373->reset_gpio)) {
- dev_err(dev, "Looking up %s property in node %s failed %d\n",
- "maxim,reset-gpio", dev->of_node->full_name,
- max98373->reset_gpio);
+ if (!gpio_is_valid(max98373->reset_gpio)) {
+ dev_err(dev, "Looking up %s property in node %s failed %d\n",
+ "maxim,reset-gpio", dev->of_node->full_name,
+ max98373->reset_gpio);
+ } else {
+ dev_dbg(dev, "maxim,reset-gpio=%d",
+ max98373->reset_gpio);
+ }
} else {
- dev_dbg(dev, "maxim,reset-gpio=%d",
- max98373->reset_gpio);
+ /* this makes reset_gpio as invalid */
+ max98373->reset_gpio = -1;
}
if (!device_property_read_u32(dev, "maxim,spkfb-slot-no", &value))
@@ -956,11 +960,11 @@ static int max98373_i2c_probe(struct i2c_client *i2c,
/* Power on device */
if (gpio_is_valid(max98373->reset_gpio)) {
- ret = gpio_request(max98373->reset_gpio, "MAX98373_RESET");
+ ret = devm_gpio_request(&i2c->dev, max98373->reset_gpio,
+ "MAX98373_RESET");
if (ret) {
dev_err(&i2c->dev, "%s: Failed to request gpio %d\n",
__func__, max98373->reset_gpio);
- gpio_free(max98373->reset_gpio);
return -EINVAL;
}
gpio_direction_output(max98373->reset_gpio, 0);
diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c
index 667e9f73aba3..e3d311fb510e 100644
--- a/sound/soc/codecs/msm8916-wcd-analog.c
+++ b/sound/soc/codecs/msm8916-wcd-analog.c
@@ -306,7 +306,7 @@ struct pm8916_wcd_analog_priv {
};
static const char *const adc2_mux_text[] = { "ZERO", "INP2", "INP3" };
-static const char *const rdac2_mux_text[] = { "ZERO", "RX2", "RX1" };
+static const char *const rdac2_mux_text[] = { "RX1", "RX2" };
static const char *const hph_text[] = { "ZERO", "Switch", };
static const struct soc_enum hph_enum = SOC_ENUM_SINGLE_VIRT(
@@ -321,7 +321,7 @@ static const struct soc_enum adc2_enum = SOC_ENUM_SINGLE_VIRT(
/* RDAC2 MUX */
static const struct soc_enum rdac2_mux_enum = SOC_ENUM_SINGLE(
- CDC_D_CDC_CONN_HPHR_DAC_CTL, 0, 3, rdac2_mux_text);
+ CDC_D_CDC_CONN_HPHR_DAC_CTL, 0, 2, rdac2_mux_text);
static const struct snd_kcontrol_new spkr_switch[] = {
SOC_DAPM_SINGLE("Switch", CDC_A_SPKR_DAC_CTL, 7, 1, 0)
diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c
index 9fa5d44fdc79..58b2468fb2a7 100644
--- a/sound/soc/codecs/msm8916-wcd-digital.c
+++ b/sound/soc/codecs/msm8916-wcd-digital.c
@@ -243,6 +243,10 @@ static const char *const rx_mix1_text[] = {
"ZERO", "IIR1", "IIR2", "RX1", "RX2", "RX3"
};
+static const char * const rx_mix2_text[] = {
+ "ZERO", "IIR1", "IIR2"
+};
+
static const char *const dec_mux_text[] = {
"ZERO", "ADC1", "ADC2", "ADC3", "DMIC1", "DMIC2"
};
@@ -270,6 +274,16 @@ static const struct soc_enum rx3_mix1_inp_enum[] = {
SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX3_B2_CTL, 0, 6, rx_mix1_text),
};
+/* RX1 MIX2 */
+static const struct soc_enum rx_mix2_inp1_chain_enum =
+ SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX1_B3_CTL,
+ 0, 3, rx_mix2_text);
+
+/* RX2 MIX2 */
+static const struct soc_enum rx2_mix2_inp1_chain_enum =
+ SOC_ENUM_SINGLE(LPASS_CDC_CONN_RX2_B3_CTL,
+ 0, 3, rx_mix2_text);
+
/* DEC */
static const struct soc_enum dec1_mux_enum = SOC_ENUM_SINGLE(
LPASS_CDC_CONN_TX_B1_CTL, 0, 6, dec_mux_text);
@@ -309,6 +323,10 @@ static const struct snd_kcontrol_new rx3_mix1_inp2_mux = SOC_DAPM_ENUM(
"RX3 MIX1 INP2 Mux", rx3_mix1_inp_enum[1]);
static const struct snd_kcontrol_new rx3_mix1_inp3_mux = SOC_DAPM_ENUM(
"RX3 MIX1 INP3 Mux", rx3_mix1_inp_enum[2]);
+static const struct snd_kcontrol_new rx1_mix2_inp1_mux = SOC_DAPM_ENUM(
+ "RX1 MIX2 INP1 Mux", rx_mix2_inp1_chain_enum);
+static const struct snd_kcontrol_new rx2_mix2_inp1_mux = SOC_DAPM_ENUM(
+ "RX2 MIX2 INP1 Mux", rx2_mix2_inp1_chain_enum);
/* Digital Gain control -38.4 dB to +38.4 dB in 0.3 dB steps */
static const DECLARE_TLV_DB_SCALE(digital_gain, -3840, 30, 0);
@@ -740,6 +758,10 @@ static const struct snd_soc_dapm_widget msm8916_wcd_digital_dapm_widgets[] = {
&rx3_mix1_inp2_mux),
SND_SOC_DAPM_MUX("RX3 MIX1 INP3", SND_SOC_NOPM, 0, 0,
&rx3_mix1_inp3_mux),
+ SND_SOC_DAPM_MUX("RX1 MIX2 INP1", SND_SOC_NOPM, 0, 0,
+ &rx1_mix2_inp1_mux),
+ SND_SOC_DAPM_MUX("RX2 MIX2 INP1", SND_SOC_NOPM, 0, 0,
+ &rx2_mix2_inp1_mux),
SND_SOC_DAPM_MUX("CIC1 MUX", SND_SOC_NOPM, 0, 0, &cic1_mux),
SND_SOC_DAPM_MUX("CIC2 MUX", SND_SOC_NOPM, 0, 0, &cic2_mux),
diff --git a/sound/soc/codecs/rt5651.c b/sound/soc/codecs/rt5651.c
index 762595de956c..c506c9305043 100644
--- a/sound/soc/codecs/rt5651.c
+++ b/sound/soc/codecs/rt5651.c
@@ -1770,6 +1770,9 @@ static int rt5651_detect_headset(struct snd_soc_component *component)
static bool rt5651_support_button_press(struct rt5651_priv *rt5651)
{
+ if (!rt5651->hp_jack)
+ return false;
+
/* Button press support only works with internal jack-detection */
return (rt5651->hp_jack->status & SND_JACK_MICROPHONE) &&
rt5651->gpiod_hp_det == NULL;
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index 1ef470700ed5..c50b75ce82e0 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -995,6 +995,16 @@ static int rt5682_set_jack_detect(struct snd_soc_component *component,
{
struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component);
+ rt5682->hs_jack = hs_jack;
+
+ if (!hs_jack) {
+ regmap_update_bits(rt5682->regmap, RT5682_IRQ_CTRL_2,
+ RT5682_JD1_EN_MASK, RT5682_JD1_DIS);
+ regmap_update_bits(rt5682->regmap, RT5682_RC_CLK_CTRL,
+ RT5682_POW_JDH | RT5682_POW_JDL, 0);
+ return 0;
+ }
+
switch (rt5682->pdata.jd_src) {
case RT5682_JD1:
snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_2,
@@ -1032,8 +1042,6 @@ static int rt5682_set_jack_detect(struct snd_soc_component *component,
break;
}
- rt5682->hs_jack = hs_jack;
-
return 0;
}
diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c
index c3d06e8bc54f..d5fb7f5dd551 100644
--- a/sound/soc/codecs/wm8994.c
+++ b/sound/soc/codecs/wm8994.c
@@ -533,13 +533,10 @@ static SOC_ENUM_SINGLE_DECL(dac_osr,
static SOC_ENUM_SINGLE_DECL(adc_osr,
WM8994_OVERSAMPLING, 1, osr_text);
-static const struct snd_kcontrol_new wm8994_snd_controls[] = {
+static const struct snd_kcontrol_new wm8994_common_snd_controls[] = {
SOC_DOUBLE_R_TLV("AIF1ADC1 Volume", WM8994_AIF1_ADC1_LEFT_VOLUME,
WM8994_AIF1_ADC1_RIGHT_VOLUME,
1, 119, 0, digital_tlv),
-SOC_DOUBLE_R_TLV("AIF1ADC2 Volume", WM8994_AIF1_ADC2_LEFT_VOLUME,
- WM8994_AIF1_ADC2_RIGHT_VOLUME,
- 1, 119, 0, digital_tlv),
SOC_DOUBLE_R_TLV("AIF2ADC Volume", WM8994_AIF2_ADC_LEFT_VOLUME,
WM8994_AIF2_ADC_RIGHT_VOLUME,
1, 119, 0, digital_tlv),
@@ -556,8 +553,6 @@ SOC_ENUM("AIF2DACR Source", aif2dacr_src),
SOC_DOUBLE_R_TLV("AIF1DAC1 Volume", WM8994_AIF1_DAC1_LEFT_VOLUME,
WM8994_AIF1_DAC1_RIGHT_VOLUME, 1, 96, 0, digital_tlv),
-SOC_DOUBLE_R_TLV("AIF1DAC2 Volume", WM8994_AIF1_DAC2_LEFT_VOLUME,
- WM8994_AIF1_DAC2_RIGHT_VOLUME, 1, 96, 0, digital_tlv),
SOC_DOUBLE_R_TLV("AIF2DAC Volume", WM8994_AIF2_DAC_LEFT_VOLUME,
WM8994_AIF2_DAC_RIGHT_VOLUME, 1, 96, 0, digital_tlv),
@@ -565,17 +560,12 @@ SOC_SINGLE_TLV("AIF1 Boost Volume", WM8994_AIF1_CONTROL_2, 10, 3, 0, aif_tlv),
SOC_SINGLE_TLV("AIF2 Boost Volume", WM8994_AIF2_CONTROL_2, 10, 3, 0, aif_tlv),
SOC_SINGLE("AIF1DAC1 EQ Switch", WM8994_AIF1_DAC1_EQ_GAINS_1, 0, 1, 0),
-SOC_SINGLE("AIF1DAC2 EQ Switch", WM8994_AIF1_DAC2_EQ_GAINS_1, 0, 1, 0),
SOC_SINGLE("AIF2 EQ Switch", WM8994_AIF2_EQ_GAINS_1, 0, 1, 0),
WM8994_DRC_SWITCH("AIF1DAC1 DRC Switch", WM8994_AIF1_DRC1_1, 2),
WM8994_DRC_SWITCH("AIF1ADC1L DRC Switch", WM8994_AIF1_DRC1_1, 1),
WM8994_DRC_SWITCH("AIF1ADC1R DRC Switch", WM8994_AIF1_DRC1_1, 0),
-WM8994_DRC_SWITCH("AIF1DAC2 DRC Switch", WM8994_AIF1_DRC2_1, 2),
-WM8994_DRC_SWITCH("AIF1ADC2L DRC Switch", WM8994_AIF1_DRC2_1, 1),
-WM8994_DRC_SWITCH("AIF1ADC2R DRC Switch", WM8994_AIF1_DRC2_1, 0),
-
WM8994_DRC_SWITCH("AIF2DAC DRC Switch", WM8994_AIF2_DRC_1, 2),
WM8994_DRC_SWITCH("AIF2ADCL DRC Switch", WM8994_AIF2_DRC_1, 1),
WM8994_DRC_SWITCH("AIF2ADCR DRC Switch", WM8994_AIF2_DRC_1, 0),
@@ -594,9 +584,6 @@ SOC_SINGLE("Sidetone HPF Switch", WM8994_SIDETONE, 6, 1, 0),
SOC_ENUM("AIF1ADC1 HPF Mode", aif1adc1_hpf),
SOC_DOUBLE("AIF1ADC1 HPF Switch", WM8994_AIF1_ADC1_FILTERS, 12, 11, 1, 0),
-SOC_ENUM("AIF1ADC2 HPF Mode", aif1adc2_hpf),
-SOC_DOUBLE("AIF1ADC2 HPF Switch", WM8994_AIF1_ADC2_FILTERS, 12, 11, 1, 0),
-
SOC_ENUM("AIF2ADC HPF Mode", aif2adc_hpf),
SOC_DOUBLE("AIF2ADC HPF Switch", WM8994_AIF2_ADC_FILTERS, 12, 11, 1, 0),
@@ -637,6 +624,24 @@ SOC_SINGLE("AIF2DAC 3D Stereo Switch", WM8994_AIF2_DAC_FILTERS_2,
8, 1, 0),
};
+/* Controls not available on WM1811 */
+static const struct snd_kcontrol_new wm8994_snd_controls[] = {
+SOC_DOUBLE_R_TLV("AIF1ADC2 Volume", WM8994_AIF1_ADC2_LEFT_VOLUME,
+ WM8994_AIF1_ADC2_RIGHT_VOLUME,
+ 1, 119, 0, digital_tlv),
+SOC_DOUBLE_R_TLV("AIF1DAC2 Volume", WM8994_AIF1_DAC2_LEFT_VOLUME,
+ WM8994_AIF1_DAC2_RIGHT_VOLUME, 1, 96, 0, digital_tlv),
+
+SOC_SINGLE("AIF1DAC2 EQ Switch", WM8994_AIF1_DAC2_EQ_GAINS_1, 0, 1, 0),
+
+WM8994_DRC_SWITCH("AIF1DAC2 DRC Switch", WM8994_AIF1_DRC2_1, 2),
+WM8994_DRC_SWITCH("AIF1ADC2L DRC Switch", WM8994_AIF1_DRC2_1, 1),
+WM8994_DRC_SWITCH("AIF1ADC2R DRC Switch", WM8994_AIF1_DRC2_1, 0),
+
+SOC_ENUM("AIF1ADC2 HPF Mode", aif1adc2_hpf),
+SOC_DOUBLE("AIF1ADC2 HPF Switch", WM8994_AIF1_ADC2_FILTERS, 12, 11, 1, 0),
+};
+
static const struct snd_kcontrol_new wm8994_eq_controls[] = {
SOC_SINGLE_TLV("AIF1DAC1 EQ1 Volume", WM8994_AIF1_DAC1_EQ_GAINS_1, 11, 31, 0,
eq_tlv),
@@ -4258,13 +4263,15 @@ static int wm8994_component_probe(struct snd_soc_component *component)
wm8994_handle_pdata(wm8994);
wm_hubs_add_analogue_controls(component);
- snd_soc_add_component_controls(component, wm8994_snd_controls,
- ARRAY_SIZE(wm8994_snd_controls));
+ snd_soc_add_component_controls(component, wm8994_common_snd_controls,
+ ARRAY_SIZE(wm8994_common_snd_controls));
snd_soc_dapm_new_controls(dapm, wm8994_dapm_widgets,
ARRAY_SIZE(wm8994_dapm_widgets));
switch (control->type) {
case WM8994:
+ snd_soc_add_component_controls(component, wm8994_snd_controls,
+ ARRAY_SIZE(wm8994_snd_controls));
snd_soc_dapm_new_controls(dapm, wm8994_specific_dapm_widgets,
ARRAY_SIZE(wm8994_specific_dapm_widgets));
if (control->revision < 4) {
@@ -4284,8 +4291,10 @@ static int wm8994_component_probe(struct snd_soc_component *component)
}
break;
case WM8958:
+ snd_soc_add_component_controls(component, wm8994_snd_controls,
+ ARRAY_SIZE(wm8994_snd_controls));
snd_soc_add_component_controls(component, wm8958_snd_controls,
- ARRAY_SIZE(wm8958_snd_controls));
+ ARRAY_SIZE(wm8958_snd_controls));
snd_soc_dapm_new_controls(dapm, wm8958_dapm_widgets,
ARRAY_SIZE(wm8958_dapm_widgets));
if (control->revision < 1) {
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index ae28d9907c30..9b8bb7bbe945 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -1259,8 +1259,7 @@ static unsigned int wmfw_convert_flags(unsigned int in, unsigned int len)
}
if (in) {
- if (in & WMFW_CTL_FLAG_READABLE)
- out |= rd;
+ out |= rd;
if (in & WMFW_CTL_FLAG_WRITEABLE)
out |= wr;
if (in & WMFW_CTL_FLAG_VOLATILE)
@@ -3697,11 +3696,16 @@ static int wm_adsp_buffer_parse_legacy(struct wm_adsp *dsp)
u32 xmalg, addr, magic;
int i, ret;
+ alg_region = wm_adsp_find_alg_region(dsp, WMFW_ADSP2_XM, dsp->fw_id);
+ if (!alg_region) {
+ adsp_err(dsp, "No algorithm region found\n");
+ return -EINVAL;
+ }
+
buf = wm_adsp_buffer_alloc(dsp);
if (!buf)
return -ENOMEM;
- alg_region = wm_adsp_find_alg_region(dsp, WMFW_ADSP2_XM, dsp->fw_id);
xmalg = dsp->ops->sys_config_size / sizeof(__be32);
addr = alg_region->base + xmalg + ALG_XM_FIELD(magic);
diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c
index a437567b8cee..4f6e58c3954a 100644
--- a/sound/soc/intel/boards/sof_rt5682.c
+++ b/sound/soc/intel/boards/sof_rt5682.c
@@ -308,6 +308,9 @@ static const struct snd_soc_dapm_widget sof_widgets[] = {
SND_SOC_DAPM_HP("Headphone Jack", NULL),
SND_SOC_DAPM_MIC("Headset Mic", NULL),
SND_SOC_DAPM_SPK("Spk", NULL),
+};
+
+static const struct snd_soc_dapm_widget dmic_widgets[] = {
SND_SOC_DAPM_MIC("SoC DMIC", NULL),
};
@@ -318,10 +321,6 @@ static const struct snd_soc_dapm_route sof_map[] = {
/* other jacks */
{ "IN1P", NULL, "Headset Mic" },
-
- /* digital mics */
- {"DMic", NULL, "SoC DMIC"},
-
};
static const struct snd_soc_dapm_route speaker_map[] = {
@@ -329,6 +328,11 @@ static const struct snd_soc_dapm_route speaker_map[] = {
{ "Spk", NULL, "Speaker" },
};
+static const struct snd_soc_dapm_route dmic_map[] = {
+ /* digital mics */
+ {"DMic", NULL, "SoC DMIC"},
+};
+
static int speaker_codec_init(struct snd_soc_pcm_runtime *rtd)
{
struct snd_soc_card *card = rtd->card;
@@ -342,6 +346,28 @@ static int speaker_codec_init(struct snd_soc_pcm_runtime *rtd)
return ret;
}
+static int dmic_init(struct snd_soc_pcm_runtime *rtd)
+{
+ struct snd_soc_card *card = rtd->card;
+ int ret;
+
+ ret = snd_soc_dapm_new_controls(&card->dapm, dmic_widgets,
+ ARRAY_SIZE(dmic_widgets));
+ if (ret) {
+ dev_err(card->dev, "DMic widget addition failed: %d\n", ret);
+ /* Don't need to add routes if widget addition failed */
+ return ret;
+ }
+
+ ret = snd_soc_dapm_add_routes(&card->dapm, dmic_map,
+ ARRAY_SIZE(dmic_map));
+
+ if (ret)
+ dev_err(card->dev, "DMic map addition failed: %d\n", ret);
+
+ return ret;
+}
+
/* sof audio machine driver for rt5682 codec */
static struct snd_soc_card sof_audio_card_rt5682 = {
.name = "sof_rt5682",
@@ -445,6 +471,7 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev,
links[id].name = "dmic01";
links[id].cpus = &cpus[id];
links[id].cpus->dai_name = "DMIC01 Pin";
+ links[id].init = dmic_init;
if (dmic_be_num > 1) {
/* set up 2 BE links at most */
links[id + 1].name = "dmic16k";
@@ -576,6 +603,15 @@ static int sof_audio_probe(struct platform_device *pdev)
/* need to get main clock from pmc */
if (sof_rt5682_quirk & SOF_RT5682_MCLK_BYTCHT_EN) {
ctx->mclk = devm_clk_get(&pdev->dev, "pmc_plt_clk_3");
+ if (IS_ERR(ctx->mclk)) {
+ ret = PTR_ERR(ctx->mclk);
+
+ dev_err(&pdev->dev,
+ "Failed to get MCLK from pmc_plt_clk_3: %d\n",
+ ret);
+ return ret;
+ }
+
ret = clk_prepare_enable(ctx->mclk);
if (ret < 0) {
dev_err(&pdev->dev,
@@ -621,8 +657,24 @@ static int sof_audio_probe(struct platform_device *pdev)
&sof_audio_card_rt5682);
}
+static int sof_rt5682_remove(struct platform_device *pdev)
+{
+ struct snd_soc_card *card = platform_get_drvdata(pdev);
+ struct snd_soc_component *component = NULL;
+
+ for_each_card_components(card, component) {
+ if (!strcmp(component->name, rt5682_component[0].name)) {
+ snd_soc_component_set_jack(component, NULL, NULL);
+ break;
+ }
+ }
+
+ return 0;
+}
+
static struct platform_driver sof_audio = {
.probe = sof_audio_probe,
+ .remove = sof_rt5682_remove,
.driver = {
.name = "sof_rt5682",
.pm = &snd_soc_pm_ops,
diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c
index 61226fefe1c4..2a4ffe945177 100644
--- a/sound/soc/kirkwood/kirkwood-i2s.c
+++ b/sound/soc/kirkwood/kirkwood-i2s.c
@@ -555,10 +555,6 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev)
return PTR_ERR(priv->clk);
}
- err = clk_prepare_enable(priv->clk);
- if (err < 0)
- return err;
-
priv->extclk = devm_clk_get(&pdev->dev, "extclk");
if (IS_ERR(priv->extclk)) {
if (PTR_ERR(priv->extclk) == -EPROBE_DEFER)
@@ -574,6 +570,10 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev)
}
}
+ err = clk_prepare_enable(priv->clk);
+ if (err < 0)
+ return err;
+
/* Some sensible defaults - this reflects the powerup values */
priv->ctl_play = KIRKWOOD_PLAYCTL_SIZE_24;
priv->ctl_rec = KIRKWOOD_RECCTL_SIZE_24;
@@ -587,7 +587,7 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev)
priv->ctl_rec |= KIRKWOOD_RECCTL_BURST_128;
}
- err = devm_snd_soc_register_component(&pdev->dev, &kirkwood_soc_component,
+ err = snd_soc_register_component(&pdev->dev, &kirkwood_soc_component,
soc_dai, 2);
if (err) {
dev_err(&pdev->dev, "snd_soc_register_component failed\n");
@@ -610,6 +610,7 @@ static int kirkwood_i2s_dev_remove(struct platform_device *pdev)
{
struct kirkwood_dma_data *priv = dev_get_drvdata(&pdev->dev);
+ snd_soc_unregister_component(&pdev->dev);
if (!IS_ERR(priv->extclk))
clk_disable_unprepare(priv->extclk);
clk_disable_unprepare(priv->clk);
diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c
index af2d5a6124c8..61c984f10d8e 100644
--- a/sound/soc/rockchip/rockchip_i2s.c
+++ b/sound/soc/rockchip/rockchip_i2s.c
@@ -677,7 +677,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev)
ret = rockchip_pcm_platform_register(&pdev->dev);
if (ret) {
dev_err(&pdev->dev, "Could not register PCM\n");
- return ret;
+ goto err_suspend;
}
return 0;
diff --git a/sound/soc/rockchip/rockchip_max98090.c b/sound/soc/rockchip/rockchip_max98090.c
index 0097df1fae66..e80b09143b63 100644
--- a/sound/soc/rockchip/rockchip_max98090.c
+++ b/sound/soc/rockchip/rockchip_max98090.c
@@ -66,10 +66,13 @@ static int rk_jack_event(struct notifier_block *nb, unsigned long event,
struct snd_soc_jack *jack = (struct snd_soc_jack *)data;
struct snd_soc_dapm_context *dapm = &jack->card->dapm;
- if (event & SND_JACK_MICROPHONE)
+ if (event & SND_JACK_MICROPHONE) {
snd_soc_dapm_force_enable_pin(dapm, "MICBIAS");
- else
+ snd_soc_dapm_force_enable_pin(dapm, "SHDN");
+ } else {
snd_soc_dapm_disable_pin(dapm, "MICBIAS");
+ snd_soc_dapm_disable_pin(dapm, "SHDN");
+ }
snd_soc_dapm_sync(dapm);
diff --git a/sound/soc/samsung/arndale_rt5631.c b/sound/soc/samsung/arndale_rt5631.c
index c213913eb984..fd8c6642fb0d 100644
--- a/sound/soc/samsung/arndale_rt5631.c
+++ b/sound/soc/samsung/arndale_rt5631.c
@@ -5,6 +5,7 @@
// Author: Claude <claude@insginal.co.kr>
#include <linux/module.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/clk.h>
@@ -74,6 +75,17 @@ static struct snd_soc_card arndale_rt5631 = {
.num_links = ARRAY_SIZE(arndale_rt5631_dai),
};
+static void arndale_put_of_nodes(struct snd_soc_card *card)
+{
+ struct snd_soc_dai_link *dai_link;
+ int i;
+
+ for_each_card_prelinks(card, i, dai_link) {
+ of_node_put(dai_link->cpus->of_node);
+ of_node_put(dai_link->codecs->of_node);
+ }
+}
+
static int arndale_audio_probe(struct platform_device *pdev)
{
int n, ret;
@@ -103,18 +115,31 @@ static int arndale_audio_probe(struct platform_device *pdev)
if (!arndale_rt5631_dai[0].codecs->of_node) {
dev_err(&pdev->dev,
"Property 'samsung,audio-codec' missing or invalid\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_put_of_nodes;
}
}
ret = devm_snd_soc_register_card(card->dev, card);
+ if (ret) {
+ dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", ret);
+ goto err_put_of_nodes;
+ }
+ return 0;
- if (ret)
- dev_err(&pdev->dev, "snd_soc_register_card() failed:%d\n", ret);
-
+err_put_of_nodes:
+ arndale_put_of_nodes(card);
return ret;
}
+static int arndale_audio_remove(struct platform_device *pdev)
+{
+ struct snd_soc_card *card = platform_get_drvdata(pdev);
+
+ arndale_put_of_nodes(card);
+ return 0;
+}
+
static const struct of_device_id samsung_arndale_rt5631_of_match[] __maybe_unused = {
{ .compatible = "samsung,arndale-rt5631", },
{ .compatible = "samsung,arndale-alc5631", },
@@ -129,6 +154,7 @@ static struct platform_driver arndale_audio_driver = {
.of_match_table = of_match_ptr(samsung_arndale_rt5631_of_match),
},
.probe = arndale_audio_probe,
+ .remove = arndale_audio_remove,
};
module_platform_driver(arndale_audio_driver);
diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index bda5b958d0dc..e9596c2096cd 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -761,6 +761,7 @@ static int rsnd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
}
/* set format */
+ rdai->bit_clk_inv = 0;
switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
case SND_SOC_DAIFMT_I2S:
rdai->sys_delay = 0;
diff --git a/sound/soc/sh/rcar/dma.c b/sound/soc/sh/rcar/dma.c
index 0324a5c39619..28f65eba2bb4 100644
--- a/sound/soc/sh/rcar/dma.c
+++ b/sound/soc/sh/rcar/dma.c
@@ -508,10 +508,10 @@ static struct rsnd_mod_ops rsnd_dmapp_ops = {
#define RDMA_SSI_I_N(addr, i) (addr ##_reg - 0x00300000 + (0x40 * i) + 0x8)
#define RDMA_SSI_O_N(addr, i) (addr ##_reg - 0x00300000 + (0x40 * i) + 0xc)
-#define RDMA_SSIU_I_N(addr, i, j) (addr ##_reg - 0x00441000 + (0x1000 * (i)) + (((j) / 4) * 0xA000) + (((j) % 4) * 0x400))
+#define RDMA_SSIU_I_N(addr, i, j) (addr ##_reg - 0x00441000 + (0x1000 * (i)) + (((j) / 4) * 0xA000) + (((j) % 4) * 0x400) - (0x4000 * ((i) / 9) * ((j) / 4)))
#define RDMA_SSIU_O_N(addr, i, j) RDMA_SSIU_I_N(addr, i, j)
-#define RDMA_SSIU_I_P(addr, i, j) (addr ##_reg - 0x00141000 + (0x1000 * (i)) + (((j) / 4) * 0xA000) + (((j) % 4) * 0x400))
+#define RDMA_SSIU_I_P(addr, i, j) (addr ##_reg - 0x00141000 + (0x1000 * (i)) + (((j) / 4) * 0xA000) + (((j) % 4) * 0x400) - (0x4000 * ((i) / 9) * ((j) / 4)))
#define RDMA_SSIU_O_P(addr, i, j) RDMA_SSIU_I_P(addr, i, j)
#define RDMA_SRC_I_N(addr, i) (addr ##_reg - 0x00500000 + (0x400 * i))
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index e163dde5eab1..b600d3eaaf5c 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -1070,7 +1070,7 @@ static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
return ret;
}
- snd_soc_dai_trigger(cpu_dai, substream, cmd);
+ ret = snd_soc_dai_trigger(cpu_dai, substream, cmd);
if (ret < 0)
return ret;
@@ -1097,7 +1097,7 @@ static int soc_pcm_bespoke_trigger(struct snd_pcm_substream *substream,
return ret;
}
- snd_soc_dai_bespoke_trigger(cpu_dai, substream, cmd);
+ ret = snd_soc_dai_bespoke_trigger(cpu_dai, substream, cmd);
if (ret < 0)
return ret;
@@ -1146,6 +1146,7 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
{
struct snd_soc_dpcm *dpcm;
unsigned long flags;
+ char *name;
/* only add new dpcms */
for_each_dpcm_be(fe, stream, dpcm) {
@@ -1171,9 +1172,15 @@ static int dpcm_be_connect(struct snd_soc_pcm_runtime *fe,
stream ? "<-" : "->", be->dai_link->name);
#ifdef CONFIG_DEBUG_FS
- dpcm->debugfs_state = debugfs_create_dir(be->dai_link->name,
- fe->debugfs_dpcm_root);
- debugfs_create_u32("state", 0644, dpcm->debugfs_state, &dpcm->state);
+ name = kasprintf(GFP_KERNEL, "%s:%s", be->dai_link->name,
+ stream ? "capture" : "playback");
+ if (name) {
+ dpcm->debugfs_state = debugfs_create_dir(name,
+ fe->debugfs_dpcm_root);
+ debugfs_create_u32("state", 0644, dpcm->debugfs_state,
+ &dpcm->state);
+ kfree(name);
+ }
#endif
return 1;
}
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index aa9a1fca46fa..0fd032914a31 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1582,7 +1582,7 @@ static int soc_tplg_dapm_widget_create(struct soc_tplg *tplg,
/* map user to kernel widget ID */
template.id = get_widget_id(le32_to_cpu(w->id));
- if (template.id < 0)
+ if ((int)template.id < 0)
return template.id;
/* strings are allocated here, but used and freed by the widget */
diff --git a/sound/soc/sof/control.c b/sound/soc/sof/control.c
index a4983f90ff5b..2b8711eda362 100644
--- a/sound/soc/sof/control.c
+++ b/sound/soc/sof/control.c
@@ -60,13 +60,16 @@ int snd_sof_volume_put(struct snd_kcontrol *kcontrol,
struct snd_sof_dev *sdev = scontrol->sdev;
struct sof_ipc_ctrl_data *cdata = scontrol->control_data;
unsigned int i, channels = scontrol->num_channels;
+ bool change = false;
+ u32 value;
/* update each channel */
for (i = 0; i < channels; i++) {
- cdata->chanv[i].value =
- mixer_to_ipc(ucontrol->value.integer.value[i],
+ value = mixer_to_ipc(ucontrol->value.integer.value[i],
scontrol->volume_table, sm->max + 1);
+ change = change || (value != cdata->chanv[i].value);
cdata->chanv[i].channel = i;
+ cdata->chanv[i].value = value;
}
/* notify DSP of mixer updates */
@@ -76,8 +79,7 @@ int snd_sof_volume_put(struct snd_kcontrol *kcontrol,
SOF_CTRL_TYPE_VALUE_CHAN_GET,
SOF_CTRL_CMD_VOLUME,
true);
-
- return 0;
+ return change;
}
int snd_sof_switch_get(struct snd_kcontrol *kcontrol,
@@ -105,11 +107,15 @@ int snd_sof_switch_put(struct snd_kcontrol *kcontrol,
struct snd_sof_dev *sdev = scontrol->sdev;
struct sof_ipc_ctrl_data *cdata = scontrol->control_data;
unsigned int i, channels = scontrol->num_channels;
+ bool change = false;
+ u32 value;
/* update each channel */
for (i = 0; i < channels; i++) {
- cdata->chanv[i].value = ucontrol->value.integer.value[i];
+ value = ucontrol->value.integer.value[i];
+ change = change || (value != cdata->chanv[i].value);
cdata->chanv[i].channel = i;
+ cdata->chanv[i].value = value;
}
/* notify DSP of mixer updates */
@@ -120,7 +126,7 @@ int snd_sof_switch_put(struct snd_kcontrol *kcontrol,
SOF_CTRL_CMD_SWITCH,
true);
- return 0;
+ return change;
}
int snd_sof_enum_get(struct snd_kcontrol *kcontrol,
@@ -148,11 +154,15 @@ int snd_sof_enum_put(struct snd_kcontrol *kcontrol,
struct snd_sof_dev *sdev = scontrol->sdev;
struct sof_ipc_ctrl_data *cdata = scontrol->control_data;
unsigned int i, channels = scontrol->num_channels;
+ bool change = false;
+ u32 value;
/* update each channel */
for (i = 0; i < channels; i++) {
- cdata->chanv[i].value = ucontrol->value.enumerated.item[i];
+ value = ucontrol->value.enumerated.item[i];
+ change = change || (value != cdata->chanv[i].value);
cdata->chanv[i].channel = i;
+ cdata->chanv[i].value = value;
}
/* notify DSP of enum updates */
@@ -163,7 +173,7 @@ int snd_sof_enum_put(struct snd_kcontrol *kcontrol,
SOF_CTRL_CMD_ENUM,
true);
- return 0;
+ return change;
}
int snd_sof_bytes_get(struct snd_kcontrol *kcontrol,
diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c
index 54cd431faab7..5529e8eeca46 100644
--- a/sound/soc/sof/debug.c
+++ b/sound/soc/sof/debug.c
@@ -152,8 +152,10 @@ static ssize_t sof_dfsentry_write(struct file *file, const char __user *buffer,
*/
dentry = file->f_path.dentry;
if (strcmp(dentry->d_name.name, "ipc_flood_count") &&
- strcmp(dentry->d_name.name, "ipc_flood_duration_ms"))
- return -EINVAL;
+ strcmp(dentry->d_name.name, "ipc_flood_duration_ms")) {
+ ret = -EINVAL;
+ goto out;
+ }
if (!strcmp(dentry->d_name.name, "ipc_flood_duration_ms"))
flood_duration_test = true;
diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig
index 479ba249e219..d62f51d33be1 100644
--- a/sound/soc/sof/intel/Kconfig
+++ b/sound/soc/sof/intel/Kconfig
@@ -273,6 +273,16 @@ config SND_SOC_SOF_HDA_AUDIO_CODEC
Say Y if you want to enable HDAudio codecs with SOF.
If unsure select "N".
+config SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1
+ bool "SOF enable DMI Link L1"
+ help
+ This option enables DMI L1 for both playback and capture
+ and disables known workarounds for specific HDaudio platforms.
+ Only use to look into power optimizations on platforms not
+ affected by DMI L1 issues. This option is not recommended.
+ Say Y if you want to enable DMI Link L1
+ If unsure, select "N".
+
endif ## SND_SOC_SOF_HDA_COMMON
config SND_SOC_SOF_HDA_LINK_BASELINE
diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c
index e282179263e8..80e2826fb447 100644
--- a/sound/soc/sof/intel/bdw.c
+++ b/sound/soc/sof/intel/bdw.c
@@ -37,6 +37,7 @@
#define MBOX_SIZE 0x1000
#define MBOX_DUMP_SIZE 0x30
#define EXCEPT_OFFSET 0x800
+#define EXCEPT_MAX_HDR_SIZE 0x400
/* DSP peripherals */
#define DMAC0_OFFSET 0xFE000
@@ -228,6 +229,11 @@ static void bdw_get_registers(struct snd_sof_dev *sdev,
/* note: variable AR register array is not read */
/* then get panic info */
+ if (xoops->arch_hdr.totalsize > EXCEPT_MAX_HDR_SIZE) {
+ dev_err(sdev->dev, "invalid header size 0x%x. FW oops is bogus\n",
+ xoops->arch_hdr.totalsize);
+ return;
+ }
offset += xoops->arch_hdr.totalsize;
sof_mailbox_read(sdev, offset, panic_info, sizeof(*panic_info));
@@ -451,6 +457,7 @@ static int bdw_probe(struct snd_sof_dev *sdev)
/* TODO: add offsets */
sdev->mmio_bar = BDW_DSP_BAR;
sdev->mailbox_bar = BDW_DSP_BAR;
+ sdev->dsp_oops_offset = MBOX_OFFSET;
/* PCI base */
mmio = platform_get_resource(pdev, IORESOURCE_MEM,
diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c
index 5e7a6aaa627a..a1e514f71739 100644
--- a/sound/soc/sof/intel/byt.c
+++ b/sound/soc/sof/intel/byt.c
@@ -28,6 +28,7 @@
#define MBOX_OFFSET 0x144000
#define MBOX_SIZE 0x1000
#define EXCEPT_OFFSET 0x800
+#define EXCEPT_MAX_HDR_SIZE 0x400
/* DSP peripherals */
#define DMAC0_OFFSET 0x098000
@@ -126,6 +127,11 @@ static void byt_get_registers(struct snd_sof_dev *sdev,
/* note: variable AR register array is not read */
/* then get panic info */
+ if (xoops->arch_hdr.totalsize > EXCEPT_MAX_HDR_SIZE) {
+ dev_err(sdev->dev, "invalid header size 0x%x. FW oops is bogus\n",
+ xoops->arch_hdr.totalsize);
+ return;
+ }
offset += xoops->arch_hdr.totalsize;
sof_mailbox_read(sdev, offset, panic_info, sizeof(*panic_info));
diff --git a/sound/soc/sof/intel/hda-ctrl.c b/sound/soc/sof/intel/hda-ctrl.c
index bc41028a7a01..df1909e1d950 100644
--- a/sound/soc/sof/intel/hda-ctrl.c
+++ b/sound/soc/sof/intel/hda-ctrl.c
@@ -139,20 +139,16 @@ void hda_dsp_ctrl_misc_clock_gating(struct snd_sof_dev *sdev, bool enable)
*/
int hda_dsp_ctrl_clock_power_gating(struct snd_sof_dev *sdev, bool enable)
{
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
- struct hdac_bus *bus = sof_to_bus(sdev);
-#endif
u32 val;
/* enable/disable audio dsp clock gating */
val = enable ? PCI_CGCTL_ADSPDCGE : 0;
snd_sof_pci_update_bits(sdev, PCI_CGCTL, PCI_CGCTL_ADSPDCGE, val);
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
- /* enable/disable L1 support */
- val = enable ? SOF_HDA_VS_EM2_L1SEN : 0;
- snd_hdac_chip_updatel(bus, VS_EM2, SOF_HDA_VS_EM2_L1SEN, val);
-#endif
+ /* enable/disable DMI Link L1 support */
+ val = enable ? HDA_VS_INTEL_EM2_L1SEN : 0;
+ snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR, HDA_VS_INTEL_EM2,
+ HDA_VS_INTEL_EM2_L1SEN, val);
/* enable/disable audio dsp power gating */
val = enable ? 0 : PCI_PGCTL_ADSPPGD;
diff --git a/sound/soc/sof/intel/hda-loader.c b/sound/soc/sof/intel/hda-loader.c
index 6427f0b3a2f1..65c2af3fcaab 100644
--- a/sound/soc/sof/intel/hda-loader.c
+++ b/sound/soc/sof/intel/hda-loader.c
@@ -44,6 +44,7 @@ static int cl_stream_prepare(struct snd_sof_dev *sdev, unsigned int format,
return -ENODEV;
}
hstream = &dsp_stream->hstream;
+ hstream->substream = NULL;
/* allocate DMA buffer */
ret = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV_SG, &pci->dev, size, dmab);
diff --git a/sound/soc/sof/intel/hda-stream.c b/sound/soc/sof/intel/hda-stream.c
index ad8d41f22e92..0c11fceb28a7 100644
--- a/sound/soc/sof/intel/hda-stream.c
+++ b/sound/soc/sof/intel/hda-stream.c
@@ -185,6 +185,17 @@ hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction)
direction == SNDRV_PCM_STREAM_PLAYBACK ?
"playback" : "capture");
+ /*
+ * Disable DMI Link L1 entry when capture stream is opened.
+ * Workaround to address a known issue with host DMA that results
+ * in xruns during pause/release in capture scenarios.
+ */
+ if (!IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1))
+ if (stream && direction == SNDRV_PCM_STREAM_CAPTURE)
+ snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
+ HDA_VS_INTEL_EM2,
+ HDA_VS_INTEL_EM2_L1SEN, 0);
+
return stream;
}
@@ -193,23 +204,43 @@ int hda_dsp_stream_put(struct snd_sof_dev *sdev, int direction, int stream_tag)
{
struct hdac_bus *bus = sof_to_bus(sdev);
struct hdac_stream *s;
+ bool active_capture_stream = false;
+ bool found = false;
spin_lock_irq(&bus->reg_lock);
- /* find used stream */
+ /*
+ * close stream matching the stream tag
+ * and check if there are any open capture streams.
+ */
list_for_each_entry(s, &bus->stream_list, list) {
- if (s->direction == direction &&
- s->opened && s->stream_tag == stream_tag) {
+ if (!s->opened)
+ continue;
+
+ if (s->direction == direction && s->stream_tag == stream_tag) {
s->opened = false;
- spin_unlock_irq(&bus->reg_lock);
- return 0;
+ found = true;
+ } else if (s->direction == SNDRV_PCM_STREAM_CAPTURE) {
+ active_capture_stream = true;
}
}
spin_unlock_irq(&bus->reg_lock);
- dev_dbg(sdev->dev, "stream_tag %d not opened!\n", stream_tag);
- return -ENODEV;
+ /* Enable DMI L1 entry if there are no capture streams open */
+ if (!IS_ENABLED(CONFIG_SND_SOC_SOF_HDA_ALWAYS_ENABLE_DMI_L1))
+ if (!active_capture_stream)
+ snd_sof_dsp_update_bits(sdev, HDA_DSP_HDA_BAR,
+ HDA_VS_INTEL_EM2,
+ HDA_VS_INTEL_EM2_L1SEN,
+ HDA_VS_INTEL_EM2_L1SEN);
+
+ if (!found) {
+ dev_dbg(sdev->dev, "stream_tag %d not opened!\n", stream_tag);
+ return -ENODEV;
+ }
+
+ return 0;
}
int hda_dsp_stream_trigger(struct snd_sof_dev *sdev,
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index c72e9a09eee1..06e84679087b 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -35,6 +35,8 @@
#define IS_CFL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa348)
#define IS_CNL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9dc8)
+#define EXCEPT_MAX_HDR_SIZE 0x400
+
/*
* Debug
*/
@@ -131,6 +133,11 @@ static void hda_dsp_get_registers(struct snd_sof_dev *sdev,
/* note: variable AR register array is not read */
/* then get panic info */
+ if (xoops->arch_hdr.totalsize > EXCEPT_MAX_HDR_SIZE) {
+ dev_err(sdev->dev, "invalid header size 0x%x. FW oops is bogus\n",
+ xoops->arch_hdr.totalsize);
+ return;
+ }
offset += xoops->arch_hdr.totalsize;
sof_block_read(sdev, sdev->mmio_bar, offset,
panic_info, sizeof(*panic_info));
diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h
index 5591841a1b6f..23e430d3e056 100644
--- a/sound/soc/sof/intel/hda.h
+++ b/sound/soc/sof/intel/hda.h
@@ -39,7 +39,6 @@
#define SOF_HDA_WAKESTS 0x0E
#define SOF_HDA_WAKESTS_INT_MASK ((1 << 8) - 1)
#define SOF_HDA_RIRBSTS 0x5d
-#define SOF_HDA_VS_EM2_L1SEN BIT(13)
/* SOF_HDA_GCTL register bist */
#define SOF_HDA_GCTL_RESET BIT(0)
@@ -228,6 +227,10 @@
#define HDA_DSP_REG_HIPCIE (HDA_DSP_IPC_BASE + 0x0C)
#define HDA_DSP_REG_HIPCCTL (HDA_DSP_IPC_BASE + 0x10)
+/* Intel Vendor Specific Registers */
+#define HDA_VS_INTEL_EM2 0x1030
+#define HDA_VS_INTEL_EM2_L1SEN BIT(13)
+
/* HIPCI */
#define HDA_DSP_REG_HIPCI_BUSY BIT(31)
#define HDA_DSP_REG_HIPCI_MSG_MASK 0x7FFFFFFF
diff --git a/sound/soc/sof/ipc.c b/sound/soc/sof/ipc.c
index b2f359d2f7e5..086eeeab8679 100644
--- a/sound/soc/sof/ipc.c
+++ b/sound/soc/sof/ipc.c
@@ -572,8 +572,10 @@ static int sof_set_get_large_ctrl_data(struct snd_sof_dev *sdev,
else
err = sof_get_ctrl_copy_params(cdata->type, partdata, cdata,
sparams);
- if (err < 0)
+ if (err < 0) {
+ kfree(partdata);
return err;
+ }
msg_bytes = sparams->msg_bytes;
pl_size = sparams->pl_size;
diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c
index d7f32745fefe..9a9a381a908d 100644
--- a/sound/soc/sof/loader.c
+++ b/sound/soc/sof/loader.c
@@ -546,10 +546,10 @@ int snd_sof_run_firmware(struct snd_sof_dev *sdev)
msecs_to_jiffies(sdev->boot_timeout));
if (ret == 0) {
dev_err(sdev->dev, "error: firmware boot failure\n");
- /* after this point FW_READY msg should be ignored */
- sdev->boot_complete = true;
snd_sof_dsp_dbg_dump(sdev, SOF_DBG_REGS | SOF_DBG_MBOX |
SOF_DBG_TEXT | SOF_DBG_PCI);
+ /* after this point FW_READY msg should be ignored */
+ sdev->boot_complete = true;
return -EIO;
}
diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c
index e3f6a6dc0f36..2b876d497447 100644
--- a/sound/soc/sof/pcm.c
+++ b/sound/soc/sof/pcm.c
@@ -244,7 +244,7 @@ static int sof_pcm_hw_free(struct snd_pcm_substream *substream)
snd_soc_rtdcom_lookup(rtd, DRV_NAME);
struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
struct snd_sof_pcm *spcm;
- int ret;
+ int ret, err = 0;
/* nothing to do for BE */
if (rtd->dai_link->no_pcm)
@@ -254,26 +254,26 @@ static int sof_pcm_hw_free(struct snd_pcm_substream *substream)
if (!spcm)
return -EINVAL;
- if (!spcm->prepared[substream->stream])
- return 0;
-
dev_dbg(sdev->dev, "pcm: free stream %d dir %d\n", spcm->pcm.pcm_id,
substream->stream);
- ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm);
+ if (spcm->prepared[substream->stream]) {
+ ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm);
+ if (ret < 0)
+ err = ret;
+ }
snd_pcm_lib_free_pages(substream);
cancel_work_sync(&spcm->stream[substream->stream].period_elapsed_work);
- if (ret < 0)
- return ret;
-
ret = snd_sof_pcm_platform_hw_free(sdev, substream);
- if (ret < 0)
+ if (ret < 0) {
dev_err(sdev->dev, "error: platform hw free failed\n");
+ err = ret;
+ }
- return ret;
+ return err;
}
static int sof_pcm_prepare(struct snd_pcm_substream *substream)
@@ -323,6 +323,7 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
struct sof_ipc_stream stream;
struct sof_ipc_reply reply;
bool reset_hw_params = false;
+ bool ipc_first = false;
int ret;
/* nothing to do for BE */
@@ -343,6 +344,7 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
switch (cmd) {
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_PAUSE;
+ ipc_first = true;
break;
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_RELEASE;
@@ -363,6 +365,7 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
case SNDRV_PCM_TRIGGER_SUSPEND:
case SNDRV_PCM_TRIGGER_STOP:
stream.hdr.cmd |= SOF_IPC_STREAM_TRIG_STOP;
+ ipc_first = true;
reset_hw_params = true;
break;
default:
@@ -370,12 +373,22 @@ static int sof_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
return -EINVAL;
}
- snd_sof_pcm_platform_trigger(sdev, substream, cmd);
+ /*
+ * DMA and IPC sequence is different for start and stop. Need to send
+ * STOP IPC before stop DMA
+ */
+ if (!ipc_first)
+ snd_sof_pcm_platform_trigger(sdev, substream, cmd);
/* send IPC to the DSP */
ret = sof_ipc_tx_message(sdev->ipc, stream.hdr.cmd, &stream,
sizeof(stream), &reply, sizeof(reply));
+ /* need to STOP DMA even if STOP IPC failed */
+ if (ipc_first)
+ snd_sof_pcm_platform_trigger(sdev, substream, cmd);
+
+ /* free PCM if reset_hw_params is set and the STOP IPC is successful */
if (!ret && reset_hw_params)
ret = sof_pcm_dsp_pcm_free(substream, sdev, spcm);
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index fc85efbad378..4452594c2e17 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -543,15 +543,16 @@ static int sof_control_load_bytes(struct snd_soc_component *scomp,
struct soc_bytes_ext *sbe = (struct soc_bytes_ext *)kc->private_value;
int max_size = sbe->max;
- if (le32_to_cpu(control->priv.size) > max_size) {
+ /* init the get/put bytes data */
+ scontrol->size = sizeof(struct sof_ipc_ctrl_data) +
+ le32_to_cpu(control->priv.size);
+
+ if (scontrol->size > max_size) {
dev_err(sdev->dev, "err: bytes data size %d exceeds max %d.\n",
- control->priv.size, max_size);
+ scontrol->size, max_size);
return -EINVAL;
}
- /* init the get/put bytes data */
- scontrol->size = sizeof(struct sof_ipc_ctrl_data) +
- le32_to_cpu(control->priv.size);
scontrol->control_data = kzalloc(max_size, GFP_KERNEL);
cdata = scontrol->control_data;
if (!scontrol->control_data)
@@ -920,7 +921,9 @@ static void sof_parse_word_tokens(struct snd_soc_component *scomp,
for (j = 0; j < count; j++) {
/* match token type */
if (!(tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_WORD ||
- tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_SHORT))
+ tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_SHORT ||
+ tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_BYTE ||
+ tokens[j].type == SND_SOC_TPLG_TUPLE_TYPE_BOOL))
continue;
/* match token id */
diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c
index d7501f88aaa6..48e629ac2d88 100644
--- a/sound/soc/stm/stm32_sai_sub.c
+++ b/sound/soc/stm/stm32_sai_sub.c
@@ -505,10 +505,20 @@ static int stm32_sai_set_sysclk(struct snd_soc_dai *cpu_dai,
if (dir == SND_SOC_CLOCK_OUT && sai->sai_mclk) {
ret = regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX,
SAI_XCR1_NODIV,
- (unsigned int)~SAI_XCR1_NODIV);
+ freq ? 0 : SAI_XCR1_NODIV);
if (ret < 0)
return ret;
+ /* Assume shutdown if requested frequency is 0Hz */
+ if (!freq) {
+ /* Release mclk rate only if rate was actually set */
+ if (sai->mclk_rate) {
+ clk_rate_exclusive_put(sai->sai_mclk);
+ sai->mclk_rate = 0;
+ }
+ return 0;
+ }
+
/* If master clock is used, set parent clock now */
ret = stm32_sai_set_parent_clock(sai, freq);
if (ret)
@@ -1093,15 +1103,6 @@ static void stm32_sai_shutdown(struct snd_pcm_substream *substream,
regmap_update_bits(sai->regmap, STM_SAI_IMR_REGX, SAI_XIMR_MASK, 0);
- regmap_update_bits(sai->regmap, STM_SAI_CR1_REGX, SAI_XCR1_NODIV,
- SAI_XCR1_NODIV);
-
- /* Release mclk rate only if rate was actually set */
- if (sai->mclk_rate) {
- clk_rate_exclusive_put(sai->sai_mclk);
- sai->mclk_rate = 0;
- }
-
clk_disable_unprepare(sai->sai_ck);
spin_lock_irqsave(&sai->irq_lock, flags);
@@ -1217,6 +1218,16 @@ static int stm32_sai_pcm_process_spdif(struct snd_pcm_substream *substream,
return 0;
}
+/* No support of mmap in S/PDIF mode */
+static const struct snd_pcm_hardware stm32_sai_pcm_hw_spdif = {
+ .info = SNDRV_PCM_INFO_INTERLEAVED,
+ .buffer_bytes_max = 8 * PAGE_SIZE,
+ .period_bytes_min = 1024,
+ .period_bytes_max = PAGE_SIZE,
+ .periods_min = 2,
+ .periods_max = 8,
+};
+
static const struct snd_pcm_hardware stm32_sai_pcm_hw = {
.info = SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_MMAP,
.buffer_bytes_max = 8 * PAGE_SIZE,
@@ -1269,7 +1280,7 @@ static const struct snd_dmaengine_pcm_config stm32_sai_pcm_config = {
};
static const struct snd_dmaengine_pcm_config stm32_sai_pcm_config_spdif = {
- .pcm_hardware = &stm32_sai_pcm_hw,
+ .pcm_hardware = &stm32_sai_pcm_hw_spdif,
.prepare_slave_config = snd_dmaengine_pcm_prepare_slave_config,
.process = stm32_sai_pcm_process_spdif,
};
diff --git a/sound/soc/ti/sdma-pcm.c b/sound/soc/ti/sdma-pcm.c
index a236350beb10..2b0bc234e1b6 100644
--- a/sound/soc/ti/sdma-pcm.c
+++ b/sound/soc/ti/sdma-pcm.c
@@ -62,7 +62,7 @@ int sdma_pcm_platform_register(struct device *dev,
config->chan_names[0] = txdmachan;
config->chan_names[1] = rxdmachan;
- return devm_snd_dmaengine_pcm_register(dev, config, 0);
+ return devm_snd_dmaengine_pcm_register(dev, config, flags);
}
EXPORT_SYMBOL_GPL(sdma_pcm_platform_register);
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index a2ab8e8d3a93..4a9a2f6ef5a4 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -388,6 +388,9 @@ static void snd_complete_urb(struct urb *urb)
}
prepare_outbound_urb(ep, ctx);
+ /* can be stopped during prepare callback */
+ if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags)))
+ goto exit_clear;
} else {
retire_inbound_urb(ep, ctx);
/* can be stopped during retire callback */
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 3fd1d1749edf..45eee5cc312e 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -1229,7 +1229,8 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval,
if (cval->min + cval->res < cval->max) {
int last_valid_res = cval->res;
int saved, test, check;
- get_cur_mix_raw(cval, minchn, &saved);
+ if (get_cur_mix_raw(cval, minchn, &saved) < 0)
+ goto no_res_check;
for (;;) {
test = saved;
if (test < cval->max)
@@ -1249,6 +1250,7 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval,
snd_usb_set_cur_mix_value(cval, minchn, 0, saved);
}
+no_res_check:
cval->initialized = 1;
}
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 33cd26763c0e..ff5ab24f3bd1 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -348,6 +348,9 @@ static int set_sync_ep_implicit_fb_quirk(struct snd_usb_substream *subs,
ep = 0x84;
ifnum = 0;
goto add_sync_ep_from_ifnum;
+ case USB_ID(0x0582, 0x01d8): /* BOSS Katana */
+ /* BOSS Katana amplifiers do not need quirks */
+ return 0;
}
if (attr == USB_ENDPOINT_SYNC_ASYNC &&
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index fbfde996fee7..349e1e52996d 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -248,8 +248,8 @@ static int create_yamaha_midi_quirk(struct snd_usb_audio *chip,
NULL, USB_MS_MIDI_OUT_JACK);
if (!injd && !outjd)
return -ENODEV;
- if (!(injd && snd_usb_validate_midi_desc(injd)) ||
- !(outjd && snd_usb_validate_midi_desc(outjd)))
+ if ((injd && !snd_usb_validate_midi_desc(injd)) ||
+ (outjd && !snd_usb_validate_midi_desc(outjd)))
return -ENODEV;
if (injd && (injd->bLength < 5 ||
(injd->bJackType != USB_MS_EMBEDDED &&
@@ -1657,6 +1657,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
case 0x23ba: /* Playback Designs */
case 0x25ce: /* Mytek devices */
case 0x278b: /* Rotel? */
+ case 0x292b: /* Gustard/Ess based devices */
case 0x2ab6: /* T+A devices */
case 0x3842: /* EVGA */
case 0xc502: /* HiBy devices */
diff --git a/sound/usb/validate.c b/sound/usb/validate.c
index 3c8f73a0eb12..389e8657434a 100644
--- a/sound/usb/validate.c
+++ b/sound/usb/validate.c
@@ -75,15 +75,15 @@ static bool validate_processing_unit(const void *p,
if (d->bLength < sizeof(*d))
return false;
- len = d->bLength < sizeof(*d) + d->bNrInPins;
+ len = sizeof(*d) + d->bNrInPins;
if (d->bLength < len)
return false;
switch (v->protocol) {
case UAC_VERSION_1:
default:
- /* bNrChannels, wChannelConfig, iChannelNames, bControlSize */
- len += 1 + 2 + 1 + 1;
- if (d->bLength < len) /* bControlSize */
+ /* bNrChannels, wChannelConfig, iChannelNames */
+ len += 1 + 2 + 1;
+ if (d->bLength < len + 1) /* bControlSize */
return false;
m = hdr[len];
len += 1 + m + 1; /* bControlSize, bmControls, iProcessing */
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index a4217c1a5d01..2769360f195c 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -266,8 +266,10 @@ struct kvm_vcpu_events {
#define KVM_DEV_ARM_ITS_CTRL_RESET 4
/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_VCPU2_SHIFT 28
+#define KVM_ARM_IRQ_VCPU2_MASK 0xf
#define KVM_ARM_IRQ_TYPE_SHIFT 24
-#define KVM_ARM_IRQ_TYPE_MASK 0xff
+#define KVM_ARM_IRQ_TYPE_MASK 0xf
#define KVM_ARM_IRQ_VCPU_SHIFT 16
#define KVM_ARM_IRQ_VCPU_MASK 0xff
#define KVM_ARM_IRQ_NUM_SHIFT 0
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 9a507716ae2f..67c21f9bdbad 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -325,8 +325,10 @@ struct kvm_vcpu_events {
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_VCPU2_SHIFT 28
+#define KVM_ARM_IRQ_VCPU2_MASK 0xf
#define KVM_ARM_IRQ_TYPE_SHIFT 24
-#define KVM_ARM_IRQ_TYPE_MASK 0xff
+#define KVM_ARM_IRQ_TYPE_MASK 0xf
#define KVM_ARM_IRQ_VCPU_SHIFT 16
#define KVM_ARM_IRQ_VCPU_MASK 0xff
#define KVM_ARM_IRQ_NUM_SHIFT 0
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 47104e5b47fd..436ec7636927 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -231,6 +231,12 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
+
+#define KVM_SYNC_S390_VALID_FIELDS \
+ (KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
+ KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
+ KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
+
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
#define SDNXL (1UL << SDNXC)
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index a9731f8a480f..2e8a30f06c74 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -75,6 +75,7 @@
#define SVM_EXIT_MWAIT 0x08b
#define SVM_EXIT_MWAIT_COND 0x08c
#define SVM_EXIT_XSETBV 0x08d
+#define SVM_EXIT_RDPRU 0x08e
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index f0b0c90dd398..3eb8411ab60e 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -31,6 +31,7 @@
#define EXIT_REASON_EXCEPTION_NMI 0
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
#define EXIT_REASON_TRIPLE_FAULT 2
+#define EXIT_REASON_INIT_SIGNAL 3
#define EXIT_REASON_PENDING_INTERRUPT 7
#define EXIT_REASON_NMI_WINDOW 8
@@ -85,11 +86,14 @@
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
+#define EXIT_REASON_UMWAIT 67
+#define EXIT_REASON_TPAUSE 68
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
{ EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
{ EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
+ { EXIT_REASON_INIT_SIGNAL, "INIT_SIGNAL" }, \
{ EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
{ EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
{ EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
@@ -142,7 +146,9 @@
{ EXIT_REASON_RDSEED, "RDSEED" }, \
{ EXIT_REASON_PML_FULL, "PML_FULL" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
- { EXIT_REASON_XRSTORS, "XRSTORS" }
+ { EXIT_REASON_XRSTORS, "XRSTORS" }, \
+ { EXIT_REASON_UMWAIT, "UMWAIT" }, \
+ { EXIT_REASON_TPAUSE, "TPAUSE" }
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index fbf5e4a0cb9c..5d1995fd369c 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -12,7 +12,11 @@ INSTALL ?= install
CFLAGS += -Wall -O2
CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
-ifeq ($(srctree),)
+# This will work when bpf is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is set to ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
diff --git a/tools/gpio/Build b/tools/gpio/Build
index 620c1937d957..4141f35837db 100644
--- a/tools/gpio/Build
+++ b/tools/gpio/Build
@@ -1,3 +1,4 @@
+gpio-utils-y += gpio-utils.o
lsgpio-y += lsgpio.o gpio-utils.o
gpio-hammer-y += gpio-hammer.o gpio-utils.o
gpio-event-mon-y += gpio-event-mon.o gpio-utils.o
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
index 6ecdd1067826..6080de58861f 100644
--- a/tools/gpio/Makefile
+++ b/tools/gpio/Makefile
@@ -3,7 +3,11 @@ include ../scripts/Makefile.include
bindir ?= /usr/bin
-ifeq ($(srctree),)
+# This will work when gpio is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is set to ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
@@ -31,11 +35,15 @@ $(OUTPUT)include/linux/gpio.h: ../../include/uapi/linux/gpio.h
prepare: $(OUTPUT)include/linux/gpio.h
+GPIO_UTILS_IN := $(output)gpio-utils-in.o
+$(GPIO_UTILS_IN): prepare FORCE
+ $(Q)$(MAKE) $(build)=gpio-utils
+
#
# lsgpio
#
LSGPIO_IN := $(OUTPUT)lsgpio-in.o
-$(LSGPIO_IN): prepare FORCE
+$(LSGPIO_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o
$(Q)$(MAKE) $(build)=lsgpio
$(OUTPUT)lsgpio: $(LSGPIO_IN)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
@@ -44,7 +52,7 @@ $(OUTPUT)lsgpio: $(LSGPIO_IN)
# gpio-hammer
#
GPIO_HAMMER_IN := $(OUTPUT)gpio-hammer-in.o
-$(GPIO_HAMMER_IN): prepare FORCE
+$(GPIO_HAMMER_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o
$(Q)$(MAKE) $(build)=gpio-hammer
$(OUTPUT)gpio-hammer: $(GPIO_HAMMER_IN)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
@@ -53,7 +61,7 @@ $(OUTPUT)gpio-hammer: $(GPIO_HAMMER_IN)
# gpio-event-mon
#
GPIO_EVENT_MON_IN := $(OUTPUT)gpio-event-mon-in.o
-$(GPIO_EVENT_MON_IN): prepare FORCE
+$(GPIO_EVENT_MON_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o
$(Q)$(MAKE) $(build)=gpio-event-mon
$(OUTPUT)gpio-event-mon: $(GPIO_EVENT_MON_IN)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 63b1f506ea67..c160a5354eb6 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -67,6 +67,9 @@
#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
+#define MADV_COLD 20 /* deactivate these pages */
+#define MADV_PAGEOUT 21 /* reclaim these pages */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 328d05e77d9f..469dc512cca3 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -521,6 +521,7 @@ typedef struct drm_i915_irq_wait {
#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1)
#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
#define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3)
+#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4)
#define I915_PARAM_HUC_STATUS 42
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index 1d338357df8a..1f97b33c840e 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -58,7 +58,7 @@
* Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
* used to clear any hints previously set.
*/
-#define RWF_WRITE_LIFE_NOT_SET 0
+#define RWH_WRITE_LIFE_NOT_SET 0
#define RWH_WRITE_LIFE_NONE 1
#define RWH_WRITE_LIFE_SHORT 2
#define RWH_WRITE_LIFE_MEDIUM 3
@@ -66,6 +66,13 @@
#define RWH_WRITE_LIFE_EXTREME 5
/*
+ * The originally introduced spelling is remained from the first
+ * versions of the patch set that introduced the feature, see commit
+ * v4.13-rc1~212^2~51.
+ */
+#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
+
+/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */
diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h
index 2a616aa3f686..379a612f8f1d 100644
--- a/tools/include/uapi/linux/fs.h
+++ b/tools/include/uapi/linux/fs.h
@@ -13,6 +13,9 @@
#include <linux/limits.h>
#include <linux/ioctl.h>
#include <linux/types.h>
+#ifndef __KERNEL__
+#include <linux/fscrypt.h>
+#endif
/* Use of MS_* flags within the kernel is restricted to core mount(2) code. */
#if !defined(__KERNEL__)
@@ -213,57 +216,6 @@ struct fsxattr {
#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX])
/*
- * File system encryption support
- */
-/* Policy provided via an ioctl on the topmost directory */
-#define FS_KEY_DESCRIPTOR_SIZE 8
-
-#define FS_POLICY_FLAGS_PAD_4 0x00
-#define FS_POLICY_FLAGS_PAD_8 0x01
-#define FS_POLICY_FLAGS_PAD_16 0x02
-#define FS_POLICY_FLAGS_PAD_32 0x03
-#define FS_POLICY_FLAGS_PAD_MASK 0x03
-#define FS_POLICY_FLAG_DIRECT_KEY 0x04 /* use master key directly */
-#define FS_POLICY_FLAGS_VALID 0x07
-
-/* Encryption algorithms */
-#define FS_ENCRYPTION_MODE_INVALID 0
-#define FS_ENCRYPTION_MODE_AES_256_XTS 1
-#define FS_ENCRYPTION_MODE_AES_256_GCM 2
-#define FS_ENCRYPTION_MODE_AES_256_CBC 3
-#define FS_ENCRYPTION_MODE_AES_256_CTS 4
-#define FS_ENCRYPTION_MODE_AES_128_CBC 5
-#define FS_ENCRYPTION_MODE_AES_128_CTS 6
-#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* Removed, do not use. */
-#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* Removed, do not use. */
-#define FS_ENCRYPTION_MODE_ADIANTUM 9
-
-struct fscrypt_policy {
- __u8 version;
- __u8 contents_encryption_mode;
- __u8 filenames_encryption_mode;
- __u8 flags;
- __u8 master_key_descriptor[FS_KEY_DESCRIPTOR_SIZE];
-};
-
-#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy)
-#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
-#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy)
-
-/* Parameters for passing an encryption key into the kernel keyring */
-#define FS_KEY_DESC_PREFIX "fscrypt:"
-#define FS_KEY_DESC_PREFIX_SIZE 8
-
-/* Structure that userspace passes to the kernel keyring */
-#define FS_MAX_KEY_SIZE 64
-
-struct fscrypt_key {
- __u32 mode;
- __u8 raw[FS_MAX_KEY_SIZE];
- __u32 size;
-};
-
-/*
* Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
*
* Note: for historical reasons, these flags were originally used and
@@ -306,6 +258,7 @@ struct fscrypt_key {
#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */
#define FS_EXTENT_FL 0x00080000 /* Extents */
+#define FS_VERITY_FL 0x00100000 /* Verity protected inode */
#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
new file mode 100644
index 000000000000..39ccfe9311c3
--- /dev/null
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * fscrypt user API
+ *
+ * These ioctls can be used on filesystems that support fscrypt. See the
+ * "User API" section of Documentation/filesystems/fscrypt.rst.
+ */
+#ifndef _UAPI_LINUX_FSCRYPT_H
+#define _UAPI_LINUX_FSCRYPT_H
+
+#include <linux/types.h>
+
+/* Encryption policy flags */
+#define FSCRYPT_POLICY_FLAGS_PAD_4 0x00
+#define FSCRYPT_POLICY_FLAGS_PAD_8 0x01
+#define FSCRYPT_POLICY_FLAGS_PAD_16 0x02
+#define FSCRYPT_POLICY_FLAGS_PAD_32 0x03
+#define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03
+#define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04
+#define FSCRYPT_POLICY_FLAGS_VALID 0x07
+
+/* Encryption algorithms */
+#define FSCRYPT_MODE_AES_256_XTS 1
+#define FSCRYPT_MODE_AES_256_CTS 4
+#define FSCRYPT_MODE_AES_128_CBC 5
+#define FSCRYPT_MODE_AES_128_CTS 6
+#define FSCRYPT_MODE_ADIANTUM 9
+#define __FSCRYPT_MODE_MAX 9
+
+/*
+ * Legacy policy version; ad-hoc KDF and no key verification.
+ * For new encrypted directories, use fscrypt_policy_v2 instead.
+ *
+ * Careful: the .version field for this is actually 0, not 1.
+ */
+#define FSCRYPT_POLICY_V1 0
+#define FSCRYPT_KEY_DESCRIPTOR_SIZE 8
+struct fscrypt_policy_v1 {
+ __u8 version;
+ __u8 contents_encryption_mode;
+ __u8 filenames_encryption_mode;
+ __u8 flags;
+ __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
+};
+#define fscrypt_policy fscrypt_policy_v1
+
+/*
+ * Process-subscribed "logon" key description prefix and payload format.
+ * Deprecated; prefer FS_IOC_ADD_ENCRYPTION_KEY instead.
+ */
+#define FSCRYPT_KEY_DESC_PREFIX "fscrypt:"
+#define FSCRYPT_KEY_DESC_PREFIX_SIZE 8
+#define FSCRYPT_MAX_KEY_SIZE 64
+struct fscrypt_key {
+ __u32 mode;
+ __u8 raw[FSCRYPT_MAX_KEY_SIZE];
+ __u32 size;
+};
+
+/*
+ * New policy version with HKDF and key verification (recommended).
+ */
+#define FSCRYPT_POLICY_V2 2
+#define FSCRYPT_KEY_IDENTIFIER_SIZE 16
+struct fscrypt_policy_v2 {
+ __u8 version;
+ __u8 contents_encryption_mode;
+ __u8 filenames_encryption_mode;
+ __u8 flags;
+ __u8 __reserved[4];
+ __u8 master_key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
+};
+
+/* Struct passed to FS_IOC_GET_ENCRYPTION_POLICY_EX */
+struct fscrypt_get_policy_ex_arg {
+ __u64 policy_size; /* input/output */
+ union {
+ __u8 version;
+ struct fscrypt_policy_v1 v1;
+ struct fscrypt_policy_v2 v2;
+ } policy; /* output */
+};
+
+/*
+ * v1 policy keys are specified by an arbitrary 8-byte key "descriptor",
+ * matching fscrypt_policy_v1::master_key_descriptor.
+ */
+#define FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR 1
+
+/*
+ * v2 policy keys are specified by a 16-byte key "identifier" which the kernel
+ * calculates as a cryptographic hash of the key itself,
+ * matching fscrypt_policy_v2::master_key_identifier.
+ */
+#define FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER 2
+
+/*
+ * Specifies a key, either for v1 or v2 policies. This doesn't contain the
+ * actual key itself; this is just the "name" of the key.
+ */
+struct fscrypt_key_specifier {
+ __u32 type; /* one of FSCRYPT_KEY_SPEC_TYPE_* */
+ __u32 __reserved;
+ union {
+ __u8 __reserved[32]; /* reserve some extra space */
+ __u8 descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
+ __u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE];
+ } u;
+};
+
+/* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */
+struct fscrypt_add_key_arg {
+ struct fscrypt_key_specifier key_spec;
+ __u32 raw_size;
+ __u32 __reserved[9];
+ __u8 raw[];
+};
+
+/* Struct passed to FS_IOC_REMOVE_ENCRYPTION_KEY */
+struct fscrypt_remove_key_arg {
+ struct fscrypt_key_specifier key_spec;
+#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY 0x00000001
+#define FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS 0x00000002
+ __u32 removal_status_flags; /* output */
+ __u32 __reserved[5];
+};
+
+/* Struct passed to FS_IOC_GET_ENCRYPTION_KEY_STATUS */
+struct fscrypt_get_key_status_arg {
+ /* input */
+ struct fscrypt_key_specifier key_spec;
+ __u32 __reserved[6];
+
+ /* output */
+#define FSCRYPT_KEY_STATUS_ABSENT 1
+#define FSCRYPT_KEY_STATUS_PRESENT 2
+#define FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED 3
+ __u32 status;
+#define FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF 0x00000001
+ __u32 status_flags;
+ __u32 user_count;
+ __u32 __out_reserved[13];
+};
+
+#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
+#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */
+#define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg)
+#define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg)
+#define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS _IOWR('f', 25, struct fscrypt_remove_key_arg)
+#define FS_IOC_GET_ENCRYPTION_KEY_STATUS _IOWR('f', 26, struct fscrypt_get_key_status_arg)
+
+/**********************************************************************/
+
+/* old names; don't add anything new here! */
+#ifndef __KERNEL__
+#define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE
+#define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4
+#define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8
+#define FS_POLICY_FLAGS_PAD_16 FSCRYPT_POLICY_FLAGS_PAD_16
+#define FS_POLICY_FLAGS_PAD_32 FSCRYPT_POLICY_FLAGS_PAD_32
+#define FS_POLICY_FLAGS_PAD_MASK FSCRYPT_POLICY_FLAGS_PAD_MASK
+#define FS_POLICY_FLAG_DIRECT_KEY FSCRYPT_POLICY_FLAG_DIRECT_KEY
+#define FS_POLICY_FLAGS_VALID FSCRYPT_POLICY_FLAGS_VALID
+#define FS_ENCRYPTION_MODE_INVALID 0 /* never used */
+#define FS_ENCRYPTION_MODE_AES_256_XTS FSCRYPT_MODE_AES_256_XTS
+#define FS_ENCRYPTION_MODE_AES_256_GCM 2 /* never used */
+#define FS_ENCRYPTION_MODE_AES_256_CBC 3 /* never used */
+#define FS_ENCRYPTION_MODE_AES_256_CTS FSCRYPT_MODE_AES_256_CTS
+#define FS_ENCRYPTION_MODE_AES_128_CBC FSCRYPT_MODE_AES_128_CBC
+#define FS_ENCRYPTION_MODE_AES_128_CTS FSCRYPT_MODE_AES_128_CTS
+#define FS_ENCRYPTION_MODE_SPECK128_256_XTS 7 /* removed */
+#define FS_ENCRYPTION_MODE_SPECK128_256_CTS 8 /* removed */
+#define FS_ENCRYPTION_MODE_ADIANTUM FSCRYPT_MODE_ADIANTUM
+#define FS_KEY_DESC_PREFIX FSCRYPT_KEY_DESC_PREFIX
+#define FS_KEY_DESC_PREFIX_SIZE FSCRYPT_KEY_DESC_PREFIX_SIZE
+#define FS_MAX_KEY_SIZE FSCRYPT_MAX_KEY_SIZE
+#endif /* !__KERNEL__ */
+
+#endif /* _UAPI_LINUX_FSCRYPT_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 5e3f12d5359e..52641d8ca9e8 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -243,6 +243,8 @@ struct kvm_hyperv_exit {
#define KVM_INTERNAL_ERROR_SIMUL_EX 2
/* Encounter unexpected vm-exit due to delivery event. */
#define KVM_INTERNAL_ERROR_DELIVERY_EV 3
+/* Encounter unexpected vm-exit reason */
+#define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
@@ -996,6 +998,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ARM_PTRAUTH_ADDRESS 171
#define KVM_CAP_ARM_PTRAUTH_GENERIC 172
#define KVM_CAP_PMU_EVENT_FILTER 173
+#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
+#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1142,6 +1146,7 @@ struct kvm_dirty_tlb {
#define KVM_REG_S390 0x5000000000000000ULL
#define KVM_REG_ARM64 0x6000000000000000ULL
#define KVM_REG_MIPS 0x7000000000000000ULL
+#define KVM_REG_RISCV 0x8000000000000000ULL
#define KVM_REG_SIZE_SHIFT 52
#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
index b3105ac1381a..99335e1f4a27 100644
--- a/tools/include/uapi/linux/sched.h
+++ b/tools/include/uapi/linux/sched.h
@@ -33,8 +33,31 @@
#define CLONE_NEWNET 0x40000000 /* New network namespace */
#define CLONE_IO 0x80000000 /* Clone io context */
-/*
- * Arguments for the clone3 syscall
+#ifndef __ASSEMBLY__
+/**
+ * struct clone_args - arguments for the clone3 syscall
+ * @flags: Flags for the new process as listed above.
+ * All flags are valid except for CSIGNAL and
+ * CLONE_DETACHED.
+ * @pidfd: If CLONE_PIDFD is set, a pidfd will be
+ * returned in this argument.
+ * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the
+ * child process will be returned in the child's
+ * memory.
+ * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of
+ * the child process will be returned in the
+ * parent's memory.
+ * @exit_signal: The exit_signal the parent process will be
+ * sent when the child exits.
+ * @stack: Specify the location of the stack for the
+ * child process.
+ * @stack_size: The size of the stack for the child process.
+ * @tls: If CLONE_SETTLS is set, the tls descriptor
+ * is set to tls.
+ *
+ * The structure is versioned by size and thus extensible.
+ * New struct members must go at the end of the struct and
+ * must be properly 64bit aligned.
*/
struct clone_args {
__aligned_u64 flags;
@@ -46,6 +69,9 @@ struct clone_args {
__aligned_u64 stack_size;
__aligned_u64 tls;
};
+#endif
+
+#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
/*
* Scheduling policies
diff --git a/tools/include/uapi/linux/usbdevice_fs.h b/tools/include/uapi/linux/usbdevice_fs.h
index 78efe870c2b7..cf525cddeb94 100644
--- a/tools/include/uapi/linux/usbdevice_fs.h
+++ b/tools/include/uapi/linux/usbdevice_fs.h
@@ -158,6 +158,7 @@ struct usbdevfs_hub_portinfo {
#define USBDEVFS_CAP_MMAP 0x20
#define USBDEVFS_CAP_DROP_PRIVILEGES 0x40
#define USBDEVFS_CAP_CONNINFO_EX 0x80
+#define USBDEVFS_CAP_SUSPEND 0x100
/* USBDEVFS_DISCONNECT_CLAIM flags & struct */
@@ -223,5 +224,8 @@ struct usbdevfs_streams {
* extending size of the data returned.
*/
#define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len)
+#define USBDEVFS_FORBID_SUSPEND _IO('U', 33)
+#define USBDEVFS_ALLOW_SUSPEND _IO('U', 34)
+#define USBDEVFS_WAIT_FOR_RESUME _IO('U', 35)
#endif /* _UAPI_LINUX_USBDEVICE_FS_H */
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index c6f94cffe06e..56ce6292071b 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -8,7 +8,11 @@ LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
MAKEFLAGS += --no-print-directory
-ifeq ($(srctree),)
+# This will work when bpf is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is a ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -110,6 +114,9 @@ override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+# flags specific for shared library
+SHLIB_FLAGS := -DSHARED
+
ifeq ($(VERBOSE),1)
Q =
else
@@ -126,14 +133,17 @@ all:
export srctree OUTPUT CC LD CFLAGS V
include $(srctree)/tools/build/Makefile.include
-BPF_IN := $(OUTPUT)libbpf-in.o
+SHARED_OBJDIR := $(OUTPUT)sharedobjs/
+STATIC_OBJDIR := $(OUTPUT)staticobjs/
+BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o
+BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o
VERSION_SCRIPT := libbpf.map
LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
PC_FILE := $(addprefix $(OUTPUT),$(PC_FILE))
-GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \
+GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \
sort -u | wc -l)
@@ -155,7 +165,7 @@ all: fixdep
all_cmd: $(CMD_TARGETS) check
-$(BPF_IN): force elfdep bpfdep
+$(BPF_IN_SHARED): force elfdep bpfdep
@(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
(diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
@@ -171,17 +181,20 @@ $(BPF_IN): force elfdep bpfdep
@(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \
(diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
- $(Q)$(MAKE) $(build)=libbpf
+ $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)"
+
+$(BPF_IN_STATIC): force elfdep bpfdep
+ $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
-$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN)
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
$(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
-Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
@ln -sf $(@F) $(OUTPUT)libbpf.so
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
-$(OUTPUT)libbpf.a: $(BPF_IN)
+$(OUTPUT)libbpf.a: $(BPF_IN_STATIC)
$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a
@@ -197,7 +210,7 @@ check: check_abi
check_abi: $(OUTPUT)libbpf.so
@if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \
- echo "Warning: Num of global symbols in $(BPF_IN)" \
+ echo "Warning: Num of global symbols in $(BPF_IN_SHARED)" \
"($(GLOBAL_SYM_COUNT)) does NOT match with num of" \
"versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
"Please make sure all LIBBPF_API symbols are" \
@@ -255,9 +268,9 @@ config-clean:
$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
clean:
- $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \
+ $(call QUIET_CLEAN, libbpf) $(RM) -rf $(TARGETS) $(CXX_TEST_TARGET) \
*.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \
- *.pc LIBBPF-CFLAGS
+ *.pc LIBBPF-CFLAGS $(SHARED_OBJDIR) $(STATIC_OBJDIR)
$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 2e83a34f8c79..98216a69c32f 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -34,6 +34,22 @@
(offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
#endif
+/* Symbol versioning is different between static and shared library.
+ * Properly versioned symbols are needed for shared library, but
+ * only the symbol of the new version is needed for static library.
+ */
+#ifdef SHARED
+# define COMPAT_VERSION(internal_name, api_name, version) \
+ asm(".symver " #internal_name "," #api_name "@" #version);
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+ asm(".symver " #internal_name "," #api_name "@@" #version);
+#else
+# define COMPAT_VERSION(internal_name, api_name, version)
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+ extern typeof(internal_name) api_name \
+ __attribute__((alias(#internal_name)));
+#endif
+
extern void libbpf_print(enum libbpf_print_level level,
const char *format, ...)
__attribute__((format(printf, 2, 3)));
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 24fa313524fb..a902838f9fcc 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -261,8 +261,8 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
&config);
}
-asm(".symver xsk_umem__create_v0_0_2, xsk_umem__create@LIBBPF_0.0.2");
-asm(".symver xsk_umem__create_v0_0_4, xsk_umem__create@@LIBBPF_0.0.4");
+COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
+DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
static int xsk_load_xdp_prog(struct xsk_socket *xsk)
{
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
index ed61fb3a46c0..5b2cd5e58df0 100644
--- a/tools/lib/subcmd/Makefile
+++ b/tools/lib/subcmd/Makefile
@@ -20,7 +20,13 @@ MAKEFLAGS += --no-print-directory
LIBFILE = $(OUTPUT)libsubcmd.a
CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
-CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -fPIC
+
+ifeq ($(DEBUG),0)
+ ifeq ($(feature-fortify-source), 1)
+ CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
+ endif
+endif
ifeq ($(CC_NO_CLANG), 0)
CFLAGS += -O3
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 044c9a3cb247..543c068096b1 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -144,6 +144,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"usercopy_abort",
"machine_real_restart",
"rewind_stack_do_exit",
+ "kunit_try_catch_throw",
};
if (!func)
diff --git a/tools/perf/Documentation/asciidoc.conf b/tools/perf/Documentation/asciidoc.conf
index 356b23a40339..2b62ba1e72b7 100644
--- a/tools/perf/Documentation/asciidoc.conf
+++ b/tools/perf/Documentation/asciidoc.conf
@@ -71,6 +71,9 @@ ifdef::backend-docbook[]
[header]
template::[header-declarations]
<refentry>
+ifdef::perf_date[]
+<refentryinfo><date>{perf_date}</date></refentryinfo>
+endif::perf_date[]
<refmeta>
<refentrytitle>{mantitle}</refentrytitle>
<manvolnum>{manvolnum}</manvolnum>
diff --git a/tools/perf/Documentation/jitdump-specification.txt b/tools/perf/Documentation/jitdump-specification.txt
index 4c62b0713651..52152d156ad9 100644
--- a/tools/perf/Documentation/jitdump-specification.txt
+++ b/tools/perf/Documentation/jitdump-specification.txt
@@ -36,8 +36,8 @@ III/ Jitdump file header format
Each jitdump file starts with a fixed size header containing the following fields in order:
-* uint32_t magic : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can be used to detect the endianness of the file
-* uint32_t version : a 4-byte value representing the format version. It is currently set to 2
+* uint32_t magic : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It written is as 0x4A695444. The reader will detect an endian mismatch when it reads 0x4454694a.
+* uint32_t version : a 4-byte value representing the format version. It is currently set to 1
* uint32_t total_size: size in bytes of file header
* uint32_t elf_mach : ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h)
* uint32_t pad1 : padding. Reserved for future use
diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c
index e1d4b484cc4b..2ff6cedeb9c5 100644
--- a/tools/perf/arch/arm/annotate/instructions.c
+++ b/tools/perf/arch/arm/annotate/instructions.c
@@ -37,7 +37,7 @@ static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
arm = zalloc(sizeof(*arm));
if (!arm)
- return -1;
+ return ENOMEM;
#define ARM_CONDS "(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl|vc|vs)"
err = regcomp(&arm->call_insn, "^blx?" ARM_CONDS "?$", REG_EXTENDED);
@@ -59,5 +59,5 @@ out_free_call:
regfree(&arm->call_insn);
out_free_arm:
free(arm);
- return -1;
+ return SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP;
}
diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
index 43aa93ed8414..037e292ecd8e 100644
--- a/tools/perf/arch/arm64/annotate/instructions.c
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -95,7 +95,7 @@ static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
arm = zalloc(sizeof(*arm));
if (!arm)
- return -1;
+ return ENOMEM;
/* bl, blr */
err = regcomp(&arm->call_insn, "^blr?$", REG_EXTENDED);
@@ -118,5 +118,5 @@ out_free_call:
regfree(&arm->call_insn);
out_free_arm:
free(arm);
- return -1;
+ return SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP;
}
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index b6b7bc7e31a1..3b4cdfc5efd6 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <sys/types.h>
+#include <errno.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
@@ -30,7 +31,7 @@ get_cpuid(char *buffer, size_t sz)
buffer[nb-1] = '\0';
return 0;
}
- return -1;
+ return ENOBUFS;
}
char *
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index 89bb8f2c54ce..a50e70baf918 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -164,8 +164,10 @@ static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
if (!arch->initialized) {
arch->initialized = true;
arch->associate_instruction_ops = s390__associate_ins_ops;
- if (cpuid)
- err = s390__cpuid_parse(arch, cpuid);
+ if (cpuid) {
+ if (s390__cpuid_parse(arch, cpuid))
+ err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING;
+ }
}
return err;
diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c
index 8b0b018d896a..7933f6871c81 100644
--- a/tools/perf/arch/s390/util/header.c
+++ b/tools/perf/arch/s390/util/header.c
@@ -8,6 +8,7 @@
*/
#include <sys/types.h>
+#include <errno.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
@@ -54,7 +55,7 @@ int get_cpuid(char *buffer, size_t sz)
sysinfo = fopen(SYSINFO, "r");
if (sysinfo == NULL)
- return -1;
+ return errno;
while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) {
@@ -89,7 +90,7 @@ int get_cpuid(char *buffer, size_t sz)
/* Missing manufacturer, type or model information should not happen */
if (!manufacturer[0] || !type[0] || !model[0])
- return -1;
+ return EINVAL;
/*
* Scan /proc/service_levels and return the CPU-MF counter facility
@@ -133,14 +134,14 @@ skip_sysinfo:
else
nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type,
model);
- return (nbytes >= sz) ? -1 : 0;
+ return (nbytes >= sz) ? ENOBUFS : 0;
}
char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
{
char *buf = malloc(128);
- if (buf && get_cpuid(buf, 128) < 0)
+ if (buf && get_cpuid(buf, 128))
zfree(&buf);
return buf;
}
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index 44f5aba78210..7eb5621c021d 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -196,8 +196,10 @@ static int x86__annotate_init(struct arch *arch, char *cpuid)
if (arch->initialized)
return 0;
- if (cpuid)
- err = x86__cpuid_parse(arch, cpuid);
+ if (cpuid) {
+ if (x86__cpuid_parse(arch, cpuid))
+ err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING;
+ }
arch->initialized = true;
return err;
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
index 662ecf84a421..aa6deb463bf3 100644
--- a/tools/perf/arch/x86/util/header.c
+++ b/tools/perf/arch/x86/util/header.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <sys/types.h>
+#include <errno.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
@@ -58,7 +59,7 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt)
buffer[nb-1] = '\0';
return 0;
}
- return -1;
+ return ENOBUFS;
}
int
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 3542b6ab9813..e69f44941aad 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2635,6 +2635,7 @@ static int build_cl_output(char *cl_sort, bool no_source)
bool add_sym = false;
bool add_dso = false;
bool add_src = false;
+ int ret = 0;
if (!buf)
return -ENOMEM;
@@ -2653,7 +2654,8 @@ static int build_cl_output(char *cl_sort, bool no_source)
add_dso = true;
} else if (strcmp(tok, "offset")) {
pr_err("unrecognized sort token: %s\n", tok);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err;
}
}
@@ -2676,13 +2678,15 @@ static int build_cl_output(char *cl_sort, bool no_source)
add_sym ? "symbol," : "",
add_dso ? "dso," : "",
add_src ? "cl_srcline," : "",
- "node") < 0)
- return -ENOMEM;
+ "node") < 0) {
+ ret = -ENOMEM;
+ goto err;
+ }
c2c.show_src = add_src;
-
+err:
free(buf);
- return 0;
+ return ret;
}
static int setup_coalesce(const char *coalesce, bool no_source)
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 1e61e353f579..9661671cc26e 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -691,6 +691,7 @@ static char *compact_gfp_flags(char *gfp_flags)
new = realloc(new_flags, len + strlen(cpt) + 2);
if (new == NULL) {
free(new_flags);
+ free(orig_flags);
return NULL;
}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 2227e2f42c09..58a9e0989491 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -705,14 +705,15 @@ static int process_sample_event(struct perf_tool *tool,
static int cpu_isa_config(struct perf_kvm_stat *kvm)
{
- char buf[64], *cpuid;
+ char buf[128], *cpuid;
int err;
if (kvm->live) {
err = get_cpuid(buf, sizeof(buf));
if (err != 0) {
- pr_err("Failed to look up CPU type\n");
- return err;
+ pr_err("Failed to look up CPU type: %s\n",
+ str_error_r(err, buf, sizeof(buf)));
+ return -err;
}
cpuid = buf;
} else
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 286fc70d7402..67be8d31afab 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1063,7 +1063,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
continue;
insn = 0;
- for (off = 0;; off += ilen) {
+ for (off = 0; off < (unsigned)len; off += ilen) {
uint64_t ip = start + off;
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
@@ -1074,6 +1074,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += print_srccode(thread, x.cpumode, ip);
break;
} else {
+ ilen = 0;
printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
dump_insn(&x, ip, buffer + off, len - off, &ilen));
if (ilen == 0)
@@ -1083,6 +1084,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
insn++;
}
}
+ if (off != (unsigned)len)
+ printed += fprintf(fp, "\tmismatch of LBR data and executable\n");
}
/*
@@ -1123,6 +1126,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
goto out;
}
for (off = 0; off <= end - start; off += ilen) {
+ ilen = 0;
printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", start + off,
dump_insn(&x, start + off, buffer + off, len - off, &ilen));
if (ilen == 0)
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index e2e0f06c97d0..cea13cb987d0 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -8,6 +8,7 @@ include/uapi/drm/i915_drm.h
include/uapi/linux/fadvise.h
include/uapi/linux/fcntl.h
include/uapi/linux/fs.h
+include/uapi/linux/fscrypt.h
include/uapi/linux/kcmp.h
include/uapi/linux/kvm.h
include/uapi/linux/in.h
diff --git a/tools/perf/jvmti/Build b/tools/perf/jvmti/Build
index 1e148bbdf820..202cadaaf097 100644
--- a/tools/perf/jvmti/Build
+++ b/tools/perf/jvmti/Build
@@ -2,7 +2,7 @@ jvmti-y += libjvmti.o
jvmti-y += jvmti_agent.o
# For strlcpy
-jvmti-y += libstring.o
+jvmti-y += libstring.o libctype.o
CFLAGS_jvmti = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux
CFLAGS_REMOVE_jvmti = -Wmissing-declarations
@@ -15,3 +15,7 @@ CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PE
$(OUTPUT)jvmti/libstring.o: ../lib/string.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)jvmti/libctype.o: ../lib/ctype.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 63e4349a772a..15e458e150bd 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -15,7 +15,9 @@ void test_attr__init(void);
void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
int fd, int group_fd, unsigned long flags);
-#define HAVE_ATTR_TEST
+#ifndef HAVE_ATTR_TEST
+#define HAVE_ATTR_TEST 1
+#endif
static inline int
sys_perf_event_open(struct perf_event_attr *attr,
@@ -27,7 +29,7 @@ sys_perf_event_open(struct perf_event_attr *attr,
fd = syscall(__NR_perf_event_open, attr, pid, cpu,
group_fd, flags);
-#ifdef HAVE_ATTR_TEST
+#if HAVE_ATTR_TEST
if (unlikely(test_attr__enabled))
test_attr__open(attr, pid, cpu, fd, group_fd, flags);
#endif
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json
index 17fb5241928b..17fb5241928b 100644
--- a/tools/perf/pmu-events/arch/s390/cf_m8561/basic.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json
index db286f19e7b6..db286f19e7b6 100644
--- a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
index 5e36bc2468d0..5e36bc2468d0 100644
--- a/tools/perf/pmu-events/arch/s390/cf_m8561/crypto6.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
diff --git a/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
index 89e070727e1b..89e070727e1b 100644
--- a/tools/perf/pmu-events/arch/s390/cf_m8561/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z15/transaction.json
new file mode 100644
index 000000000000..1a0034f79f73
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/transaction.json
@@ -0,0 +1,7 @@
+[
+ {
+ "BriefDescription": "Transaction count",
+ "MetricName": "transaction",
+ "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
index bd3fc577139c..61641a3480e0 100644
--- a/tools/perf/pmu-events/arch/s390/mapfile.csv
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -4,4 +4,4 @@ Family-model,Version,Filename,EventType
^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core
^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core
^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core
-^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_m8561,core
+^IBM.856[12].*3\.6.[[:xdigit:]]+$,3,cf_z15,core
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 9e37287da924..e2837260ca4d 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -450,12 +450,12 @@ static struct fixed {
const char *name;
const char *event;
} fixed[] = {
- { "inst_retired.any", "event=0xc0" },
- { "inst_retired.any_p", "event=0xc0" },
- { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" },
- { "cpu_clk_unhalted.thread", "event=0x3c" },
- { "cpu_clk_unhalted.core", "event=0x3c" },
- { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" },
+ { "inst_retired.any", "event=0xc0,period=2000003" },
+ { "inst_retired.any_p", "event=0xc0,period=2000003" },
+ { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03,period=2000003" },
+ { "cpu_clk_unhalted.thread", "event=0x3c,period=2000003" },
+ { "cpu_clk_unhalted.core", "event=0x3c,period=2000003" },
+ { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1,period=2000003" },
{ NULL, NULL},
};
diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c
index dbc27199c65e..dd865e0bea12 100644
--- a/tools/perf/tests/perf-hooks.c
+++ b/tools/perf/tests/perf-hooks.c
@@ -19,12 +19,11 @@ static void sigsegv_handler(int sig __maybe_unused)
static void the_hook(void *_hook_flags)
{
int *hook_flags = _hook_flags;
- int *p = NULL;
*hook_flags = 1234;
/* Generate a segfault, test perf_hooks__recover */
- *p = 0;
+ raise(SIGSEGV);
}
int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused)
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index e830eadfca2a..e42bf572358c 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1631,6 +1631,19 @@ int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map *
case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF:
scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation");
break;
+ case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP:
+ scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions.");
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING:
+ scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization.");
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE:
+ scnprintf(buf, buflen, "Invalid BPF file: %s.", dso->long_name);
+ break;
+ case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF:
+ scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
+ dso->long_name);
+ break;
default:
scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
break;
@@ -1662,7 +1675,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
build_id_path = strdup(filename);
if (!build_id_path)
- return -1;
+ return ENOMEM;
/*
* old style build-id cache has name of XX/XXXXXXX.. while
@@ -1713,13 +1726,13 @@ static int symbol__disassemble_bpf(struct symbol *sym,
char tpath[PATH_MAX];
size_t buf_size;
int nr_skip = 0;
- int ret = -1;
char *buf;
bfd *bfdf;
+ int ret;
FILE *s;
if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO)
- return -1;
+ return SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE;
pr_debug("%s: handling sym %s addr %" PRIx64 " len %" PRIx64 "\n", __func__,
sym->name, sym->start, sym->end - sym->start);
@@ -1732,8 +1745,10 @@ static int symbol__disassemble_bpf(struct symbol *sym,
assert(bfd_check_format(bfdf, bfd_object));
s = open_memstream(&buf, &buf_size);
- if (!s)
+ if (!s) {
+ ret = errno;
goto out;
+ }
init_disassemble_info(&info, s,
(fprintf_ftype) fprintf);
@@ -1742,8 +1757,10 @@ static int symbol__disassemble_bpf(struct symbol *sym,
info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env,
dso->bpf_prog.id);
- if (!info_node)
+ if (!info_node) {
+ ret = SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF;
goto out;
+ }
info_linear = info_node->info_linear;
sub_id = dso->bpf_prog.sub_id;
@@ -2071,11 +2088,11 @@ int symbol__annotate(struct symbol *sym, struct map *map,
int err;
if (!arch_name)
- return -1;
+ return errno;
args.arch = arch = arch__find(arch_name);
if (arch == NULL)
- return -ENOTSUP;
+ return ENOTSUP;
if (parch)
*parch = arch;
@@ -2971,7 +2988,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct evsel *evsel,
notes->offsets = zalloc(size * sizeof(struct annotation_line *));
if (notes->offsets == NULL)
- return -1;
+ return ENOMEM;
if (perf_evsel__is_group_event(evsel))
nr_pcnt = evsel->core.nr_members;
@@ -2997,7 +3014,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct evsel *evsel,
out_free_offsets:
zfree(&notes->offsets);
- return -1;
+ return err;
}
#define ANNOTATION__CFG(n) \
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index d94be9140e31..d76fd0e81f46 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -370,6 +370,10 @@ enum symbol_disassemble_errno {
SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX = __SYMBOL_ANNOTATE_ERRNO__START,
SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF,
+ SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING,
+ SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP,
+ SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE,
+ SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF,
__SYMBOL_ANNOTATE_ERRNO__END,
};
diff --git a/tools/perf/util/copyfile.c b/tools/perf/util/copyfile.c
index 3fa0db136667..47e03de7c235 100644
--- a/tools/perf/util/copyfile.c
+++ b/tools/perf/util/copyfile.c
@@ -101,14 +101,16 @@ static int copyfile_mode_ns(const char *from, const char *to, mode_t mode,
if (tofd < 0)
goto out;
- if (fchmod(tofd, mode))
- goto out_close_to;
-
if (st.st_size == 0) { /* /proc? do it slowly... */
err = slow_copyfile(from, tmp, nsi);
+ if (!err && fchmod(tofd, mode))
+ err = -1;
goto out_close_to;
}
+ if (fchmod(tofd, mode))
+ goto out_close_to;
+
nsinfo__mountns_enter(nsi, &nsc);
fromfd = open(from, O_RDONLY);
nsinfo__mountns_exit(&nsc);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index d277a98e62df..de79c735e441 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1659,7 +1659,7 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
is_open = false;
if (c2->leader == leader) {
if (is_open)
- perf_evsel__close(&evsel->core);
+ perf_evsel__close(&c2->core);
c2->leader = c2;
c2->core.nr_members = 0;
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 5591af81a070..abc7fda4a0fe 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -30,6 +30,7 @@
#include "counts.h"
#include "event.h"
#include "evsel.h"
+#include "util/env.h"
#include "util/evsel_config.h"
#include "util/evsel_fprintf.h"
#include "evlist.h"
@@ -2512,7 +2513,7 @@ struct perf_env *perf_evsel__env(struct evsel *evsel)
{
if (evsel && evsel->evlist)
return evsel->evlist->env;
- return NULL;
+ return &perf_env;
}
static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 86d9396cb131..becc2d109423 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1296,8 +1296,10 @@ static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
continue;
if (WARN_ONCE(cnt >= size,
- "failed to write MEM_TOPOLOGY, way too many nodes\n"))
+ "failed to write MEM_TOPOLOGY, way too many nodes\n")) {
+ closedir(dir);
return -1;
+ }
ret = memory_node__read(&nodes[cnt++], idx);
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 679a1d75090c..7b6eaf5e0bda 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1625,7 +1625,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
return 0;
}
-static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
+static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
{
struct hists *hists = a->hists;
struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 1bdf4c6ea3e5..e3ccb0ce1938 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -395,7 +395,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
size_t size;
u16 idr_size;
const char *sym;
- uint32_t count;
+ uint64_t count;
int ret, csize, usize;
pid_t pid, tid;
struct {
@@ -418,7 +418,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
return -1;
filename = event->mmap2.filename;
- size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so",
+ size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
jd->dir,
pid,
count);
@@ -529,7 +529,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
return -1;
filename = event->mmap2.filename;
- size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64,
+ size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
jd->dir,
pid,
jr->move.code_index);
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 8d04e3d070b1..8b14e4a7f1dc 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -233,14 +233,14 @@ static int detect_kbuild_dir(char **kbuild_dir)
const char *prefix_dir = "";
const char *suffix_dir = "";
+ /* _UTSNAME_LENGTH is 65 */
+ char release[128];
+
char *autoconf_path;
int err;
if (!test_dir) {
- /* _UTSNAME_LENGTH is 65 */
- char release[128];
-
err = fetch_kernel_version(NULL, release,
sizeof(release));
if (err)
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 5b83ed1ebbd6..eec9b282c047 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "symbol.h"
+#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <limits.h>
@@ -850,6 +851,8 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
}
after->start = map->end;
+ after->pgoff += map->end - pos->start;
+ assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end));
__map_groups__insert(pos->groups, after);
if (verbose >= 2 && !use_browser)
map__fprintf(after, fp);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 53f31053a27a..02460362256d 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -14,6 +14,7 @@
#include "thread_map.h"
#include "trace-event.h"
#include "mmap.h"
+#include "util/env.h"
#include <internal/lib.h>
#include "../perf-sys.h"
@@ -54,6 +55,11 @@ int parse_callchain_record(const char *arg __maybe_unused,
}
/*
+ * Add this one here not to drag util/env.c
+ */
+struct perf_env perf_env;
+
+/*
* Support debug printing even though util/debug.c is not linked. That means
* implementing 'verbose' and 'eprintf'.
*/
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 15961854ba67..741f040648b5 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -539,10 +539,11 @@ static int perl_stop_script(void)
static int perl_generate_script(struct tep_handle *pevent, const char *outfile)
{
+ int i, not_first, count, nr_events;
+ struct tep_event **all_events;
struct tep_event *event = NULL;
struct tep_format_field *f;
char fname[PATH_MAX];
- int not_first, count;
FILE *ofp;
sprintf(fname, "%s.pl", outfile);
@@ -603,8 +604,11 @@ sub print_backtrace\n\
}\n\n\
");
+ nr_events = tep_get_events_count(pevent);
+ all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID);
- while ((event = trace_find_next_event(pevent, event))) {
+ for (i = 0; all_events && i < nr_events; i++) {
+ event = all_events[i];
fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
fprintf(ofp, "\tmy (");
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 5d341efc3237..93c03b39cd9c 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1687,10 +1687,11 @@ static int python_stop_script(void)
static int python_generate_script(struct tep_handle *pevent, const char *outfile)
{
+ int i, not_first, count, nr_events;
+ struct tep_event **all_events;
struct tep_event *event = NULL;
struct tep_format_field *f;
char fname[PATH_MAX];
- int not_first, count;
FILE *ofp;
sprintf(fname, "%s.py", outfile);
@@ -1735,7 +1736,11 @@ static int python_generate_script(struct tep_handle *pevent, const char *outfile
fprintf(ofp, "def trace_end():\n");
fprintf(ofp, "\tprint(\"in trace_end\")\n\n");
- while ((event = trace_find_next_event(pevent, event))) {
+ nr_events = tep_get_events_count(pevent);
+ all_events = tep_list_events(pevent, TEP_EVENT_SORT_ID);
+
+ for (i = 0; all_events && i < nr_events; i++) {
+ event = all_events[i];
fprintf(ofp, "def %s__%s(", event->system, event->name);
fprintf(ofp, "event_name, ");
fprintf(ofp, "context, ");
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 5d6bfc70b210..9634f0ae57be 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -173,37 +173,6 @@ int parse_event_file(struct tep_handle *pevent,
return tep_parse_event(pevent, buf, size, sys);
}
-struct tep_event *trace_find_next_event(struct tep_handle *pevent,
- struct tep_event *event)
-{
- static int idx;
- int events_count;
- struct tep_event *all_events;
-
- all_events = tep_get_first_event(pevent);
- events_count = tep_get_events_count(pevent);
- if (!pevent || !all_events || events_count < 1)
- return NULL;
-
- if (!event) {
- idx = 0;
- return all_events;
- }
-
- if (idx < events_count && event == (all_events + idx)) {
- idx++;
- if (idx == events_count)
- return NULL;
- return (all_events + idx);
- }
-
- for (idx = 1; idx < events_count; idx++) {
- if (event == (all_events + (idx - 1)))
- return (all_events + idx);
- }
- return NULL;
-}
-
struct flag {
const char *name;
unsigned long long value;
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 2e158387b3d7..72fdf2a3577c 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -47,8 +47,6 @@ void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int siz
ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe);
-struct tep_event *trace_find_next_event(struct tep_handle *pevent,
- struct tep_event *event);
unsigned long long read_size(struct tep_event *event, void *ptr, int size);
unsigned long long eval_flag(const char *flag);
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 5eda6e19c947..ae56c766eda1 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -154,8 +154,10 @@ static int rm_rf_depth_pat(const char *path, int depth, const char **pat)
if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
continue;
- if (!match_pat(d->d_name, pat))
- return -2;
+ if (!match_pat(d->d_name, pat)) {
+ ret = -2;
+ break;
+ }
scnprintf(namebuf, sizeof(namebuf), "%s/%s",
path, d->d_name);
diff --git a/tools/testing/kunit/.gitignore b/tools/testing/kunit/.gitignore
new file mode 100644
index 000000000000..c791ff59a37a
--- /dev/null
+++ b/tools/testing/kunit/.gitignore
@@ -0,0 +1,3 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod] \ No newline at end of file
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
new file mode 100644
index 000000000000..9235b7d42d38
--- /dev/null
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_KUNIT_TEST=y
+CONFIG_KUNIT_EXAMPLE_TEST=y
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
new file mode 100755
index 000000000000..efe06d621983
--- /dev/null
+++ b/tools/testing/kunit/kunit.py
@@ -0,0 +1,138 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# A thin wrapper on top of the KUnit Kernel
+#
+# Copyright (C) 2019, Google LLC.
+# Author: Felix Guo <felixguoxiuping@gmail.com>
+# Author: Brendan Higgins <brendanhiggins@google.com>
+
+import argparse
+import sys
+import os
+import time
+import shutil
+
+from collections import namedtuple
+from enum import Enum, auto
+
+import kunit_config
+import kunit_kernel
+import kunit_parser
+
+KunitResult = namedtuple('KunitResult', ['status','result'])
+
+KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', 'build_dir', 'defconfig'])
+
+class KunitStatus(Enum):
+ SUCCESS = auto()
+ CONFIG_FAILURE = auto()
+ BUILD_FAILURE = auto()
+ TEST_FAILURE = auto()
+
+def create_default_kunitconfig():
+ if not os.path.exists(kunit_kernel.KUNITCONFIG_PATH):
+ shutil.copyfile('arch/um/configs/kunit_defconfig',
+ kunit_kernel.KUNITCONFIG_PATH)
+
+def run_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitRequest) -> KunitResult:
+ if request.defconfig:
+ create_default_kunitconfig()
+
+ config_start = time.time()
+ success = linux.build_reconfig(request.build_dir)
+ config_end = time.time()
+ if not success:
+ return KunitResult(KunitStatus.CONFIG_FAILURE, 'could not configure kernel')
+
+ kunit_parser.print_with_timestamp('Building KUnit Kernel ...')
+
+ build_start = time.time()
+ success = linux.build_um_kernel(request.jobs, request.build_dir)
+ build_end = time.time()
+ if not success:
+ return KunitResult(KunitStatus.BUILD_FAILURE, 'could not build kernel')
+
+ kunit_parser.print_with_timestamp('Starting KUnit Kernel ...')
+ test_start = time.time()
+
+ test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS,
+ [],
+ 'Tests not Parsed.')
+ if request.raw_output:
+ kunit_parser.raw_output(
+ linux.run_kernel(timeout=request.timeout,
+ build_dir=request.build_dir))
+ else:
+ kunit_output = linux.run_kernel(timeout=request.timeout,
+ build_dir=request.build_dir)
+ test_result = kunit_parser.parse_run_tests(kunit_output)
+ test_end = time.time()
+
+ kunit_parser.print_with_timestamp((
+ 'Elapsed time: %.3fs total, %.3fs configuring, %.3fs ' +
+ 'building, %.3fs running\n') % (
+ test_end - config_start,
+ config_end - config_start,
+ build_end - build_start,
+ test_end - test_start))
+
+ if test_result.status != kunit_parser.TestStatus.SUCCESS:
+ return KunitResult(KunitStatus.TEST_FAILURE, test_result)
+ else:
+ return KunitResult(KunitStatus.SUCCESS, test_result)
+
+def main(argv, linux=None):
+ parser = argparse.ArgumentParser(
+ description='Helps writing and running KUnit tests.')
+ subparser = parser.add_subparsers(dest='subcommand')
+
+ run_parser = subparser.add_parser('run', help='Runs KUnit tests.')
+ run_parser.add_argument('--raw_output', help='don\'t format output from kernel',
+ action='store_true')
+
+ run_parser.add_argument('--timeout',
+ help='maximum number of seconds to allow for all tests '
+ 'to run. This does not include time taken to build the '
+ 'tests.',
+ type=int,
+ default=300,
+ metavar='timeout')
+
+ run_parser.add_argument('--jobs',
+ help='As in the make command, "Specifies the number of '
+ 'jobs (commands) to run simultaneously."',
+ type=int, default=8, metavar='jobs')
+
+ run_parser.add_argument('--build_dir',
+ help='As in the make command, it specifies the build '
+ 'directory.',
+ type=str, default=None, metavar='build_dir')
+
+ run_parser.add_argument('--defconfig',
+ help='Uses a default kunitconfig.',
+ action='store_true')
+
+ cli_args = parser.parse_args(argv)
+
+ if cli_args.subcommand == 'run':
+ if cli_args.defconfig:
+ create_default_kunitconfig()
+
+ if not linux:
+ linux = kunit_kernel.LinuxSourceTree()
+
+ request = KunitRequest(cli_args.raw_output,
+ cli_args.timeout,
+ cli_args.jobs,
+ cli_args.build_dir,
+ cli_args.defconfig)
+ result = run_tests(linux, request)
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+ else:
+ parser.print_help()
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
new file mode 100644
index 000000000000..ebf3942b23f5
--- /dev/null
+++ b/tools/testing/kunit/kunit_config.py
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Builds a .config from a kunitconfig.
+#
+# Copyright (C) 2019, Google LLC.
+# Author: Felix Guo <felixguoxiuping@gmail.com>
+# Author: Brendan Higgins <brendanhiggins@google.com>
+
+import collections
+import re
+
+CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_\w+ is not set$'
+CONFIG_PATTERN = r'^CONFIG_\w+=\S+$'
+
+KconfigEntryBase = collections.namedtuple('KconfigEntry', ['raw_entry'])
+
+
+class KconfigEntry(KconfigEntryBase):
+
+ def __str__(self) -> str:
+ return self.raw_entry
+
+
+class KconfigParseError(Exception):
+ """Error parsing Kconfig defconfig or .config."""
+
+
+class Kconfig(object):
+ """Represents defconfig or .config specified using the Kconfig language."""
+
+ def __init__(self):
+ self._entries = []
+
+ def entries(self):
+ return set(self._entries)
+
+ def add_entry(self, entry: KconfigEntry) -> None:
+ self._entries.append(entry)
+
+ def is_subset_of(self, other: 'Kconfig') -> bool:
+ return self.entries().issubset(other.entries())
+
+ def write_to_file(self, path: str) -> None:
+ with open(path, 'w') as f:
+ for entry in self.entries():
+ f.write(str(entry) + '\n')
+
+ def parse_from_string(self, blob: str) -> None:
+ """Parses a string containing KconfigEntrys and populates this Kconfig."""
+ self._entries = []
+ is_not_set_matcher = re.compile(CONFIG_IS_NOT_SET_PATTERN)
+ config_matcher = re.compile(CONFIG_PATTERN)
+ for line in blob.split('\n'):
+ line = line.strip()
+ if not line:
+ continue
+ elif config_matcher.match(line) or is_not_set_matcher.match(line):
+ self._entries.append(KconfigEntry(line))
+ elif line[0] == '#':
+ continue
+ else:
+ raise KconfigParseError('Failed to parse: ' + line)
+
+ def read_from_file(self, path: str) -> None:
+ with open(path, 'r') as f:
+ self.parse_from_string(f.read())
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
new file mode 100644
index 000000000000..bf3876835331
--- /dev/null
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -0,0 +1,149 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Runs UML kernel, collects output, and handles errors.
+#
+# Copyright (C) 2019, Google LLC.
+# Author: Felix Guo <felixguoxiuping@gmail.com>
+# Author: Brendan Higgins <brendanhiggins@google.com>
+
+
+import logging
+import subprocess
+import os
+
+import kunit_config
+
+KCONFIG_PATH = '.config'
+KUNITCONFIG_PATH = 'kunitconfig'
+
+class ConfigError(Exception):
+ """Represents an error trying to configure the Linux kernel."""
+
+
+class BuildError(Exception):
+ """Represents an error trying to build the Linux kernel."""
+
+
+class LinuxSourceTreeOperations(object):
+ """An abstraction over command line operations performed on a source tree."""
+
+ def make_mrproper(self):
+ try:
+ subprocess.check_output(['make', 'mrproper'])
+ except OSError as e:
+ raise ConfigError('Could not call make command: ' + e)
+ except subprocess.CalledProcessError as e:
+ raise ConfigError(e.output)
+
+ def make_olddefconfig(self, build_dir):
+ command = ['make', 'ARCH=um', 'olddefconfig']
+ if build_dir:
+ command += ['O=' + build_dir]
+ try:
+ subprocess.check_output(command)
+ except OSError as e:
+ raise ConfigError('Could not call make command: ' + e)
+ except subprocess.CalledProcessError as e:
+ raise ConfigError(e.output)
+
+ def make(self, jobs, build_dir):
+ command = ['make', 'ARCH=um', '--jobs=' + str(jobs)]
+ if build_dir:
+ command += ['O=' + build_dir]
+ try:
+ subprocess.check_output(command)
+ except OSError as e:
+ raise BuildError('Could not call execute make: ' + e)
+ except subprocess.CalledProcessError as e:
+ raise BuildError(e.output)
+
+ def linux_bin(self, params, timeout, build_dir):
+ """Runs the Linux UML binary. Must be named 'linux'."""
+ linux_bin = './linux'
+ if build_dir:
+ linux_bin = os.path.join(build_dir, 'linux')
+ process = subprocess.Popen(
+ [linux_bin] + params,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ process.wait(timeout=timeout)
+ return process
+
+
+def get_kconfig_path(build_dir):
+ kconfig_path = KCONFIG_PATH
+ if build_dir:
+ kconfig_path = os.path.join(build_dir, KCONFIG_PATH)
+ return kconfig_path
+
+class LinuxSourceTree(object):
+ """Represents a Linux kernel source tree with KUnit tests."""
+
+ def __init__(self):
+ self._kconfig = kunit_config.Kconfig()
+ self._kconfig.read_from_file(KUNITCONFIG_PATH)
+ self._ops = LinuxSourceTreeOperations()
+
+ def clean(self):
+ try:
+ self._ops.make_mrproper()
+ except ConfigError as e:
+ logging.error(e)
+ return False
+ return True
+
+ def build_config(self, build_dir):
+ kconfig_path = get_kconfig_path(build_dir)
+ if build_dir and not os.path.exists(build_dir):
+ os.mkdir(build_dir)
+ self._kconfig.write_to_file(kconfig_path)
+ try:
+ self._ops.make_olddefconfig(build_dir)
+ except ConfigError as e:
+ logging.error(e)
+ return False
+ validated_kconfig = kunit_config.Kconfig()
+ validated_kconfig.read_from_file(kconfig_path)
+ if not self._kconfig.is_subset_of(validated_kconfig):
+ logging.error('Provided Kconfig is not contained in validated .config!')
+ return False
+ return True
+
+ def build_reconfig(self, build_dir):
+ """Creates a new .config if it is not a subset of the kunitconfig."""
+ kconfig_path = get_kconfig_path(build_dir)
+ if os.path.exists(kconfig_path):
+ existing_kconfig = kunit_config.Kconfig()
+ existing_kconfig.read_from_file(kconfig_path)
+ if not self._kconfig.is_subset_of(existing_kconfig):
+ print('Regenerating .config ...')
+ os.remove(kconfig_path)
+ return self.build_config(build_dir)
+ else:
+ return True
+ else:
+ print('Generating .config ...')
+ return self.build_config(build_dir)
+
+ def build_um_kernel(self, jobs, build_dir):
+ try:
+ self._ops.make_olddefconfig(build_dir)
+ self._ops.make(jobs, build_dir)
+ except (ConfigError, BuildError) as e:
+ logging.error(e)
+ return False
+ used_kconfig = kunit_config.Kconfig()
+ used_kconfig.read_from_file(get_kconfig_path(build_dir))
+ if not self._kconfig.is_subset_of(used_kconfig):
+ logging.error('Provided Kconfig is not contained in final config!')
+ return False
+ return True
+
+ def run_kernel(self, args=[], timeout=None, build_dir=None):
+ args.extend(['mem=256M'])
+ process = self._ops.linux_bin(args, timeout, build_dir)
+ with open('test.log', 'w') as f:
+ for line in process.stdout:
+ f.write(line.rstrip().decode('ascii') + '\n')
+ yield line.rstrip().decode('ascii')
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
new file mode 100644
index 000000000000..4ffbae0f6732
--- /dev/null
+++ b/tools/testing/kunit/kunit_parser.py
@@ -0,0 +1,310 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Parses test results from a kernel dmesg log.
+#
+# Copyright (C) 2019, Google LLC.
+# Author: Felix Guo <felixguoxiuping@gmail.com>
+# Author: Brendan Higgins <brendanhiggins@google.com>
+
+import re
+
+from collections import namedtuple
+from datetime import datetime
+from enum import Enum, auto
+from functools import reduce
+from typing import List
+
+TestResult = namedtuple('TestResult', ['status','suites','log'])
+
+class TestSuite(object):
+ def __init__(self):
+ self.status = None
+ self.name = None
+ self.cases = []
+
+ def __str__(self):
+ return 'TestSuite(' + self.status + ',' + self.name + ',' + str(self.cases) + ')'
+
+ def __repr__(self):
+ return str(self)
+
+class TestCase(object):
+ def __init__(self):
+ self.status = None
+ self.name = ''
+ self.log = []
+
+ def __str__(self):
+ return 'TestCase(' + self.status + ',' + self.name + ',' + str(self.log) + ')'
+
+ def __repr__(self):
+ return str(self)
+
+class TestStatus(Enum):
+ SUCCESS = auto()
+ FAILURE = auto()
+ TEST_CRASHED = auto()
+ NO_TESTS = auto()
+
+kunit_start_re = re.compile(r'^TAP version [0-9]+$')
+kunit_end_re = re.compile('List of all partitions:')
+
+def isolate_kunit_output(kernel_output):
+ started = False
+ for line in kernel_output:
+ if kunit_start_re.match(line):
+ started = True
+ yield line
+ elif kunit_end_re.match(line):
+ break
+ elif started:
+ yield line
+
+def raw_output(kernel_output):
+ for line in kernel_output:
+ print(line)
+
+DIVIDER = '=' * 60
+
+RESET = '\033[0;0m'
+
+def red(text):
+ return '\033[1;31m' + text + RESET
+
+def yellow(text):
+ return '\033[1;33m' + text + RESET
+
+def green(text):
+ return '\033[1;32m' + text + RESET
+
+def print_with_timestamp(message):
+ print('[%s] %s' % (datetime.now().strftime('%H:%M:%S'), message))
+
+def format_suite_divider(message):
+ return '======== ' + message + ' ========'
+
+def print_suite_divider(message):
+ print_with_timestamp(DIVIDER)
+ print_with_timestamp(format_suite_divider(message))
+
+def print_log(log):
+ for m in log:
+ print_with_timestamp(m)
+
+TAP_ENTRIES = re.compile(r'^(TAP|\t?ok|\t?not ok|\t?[0-9]+\.\.[0-9]+|\t?#).*$')
+
+def consume_non_diagnositic(lines: List[str]) -> None:
+ while lines and not TAP_ENTRIES.match(lines[0]):
+ lines.pop(0)
+
+def save_non_diagnositic(lines: List[str], test_case: TestCase) -> None:
+ while lines and not TAP_ENTRIES.match(lines[0]):
+ test_case.log.append(lines[0])
+ lines.pop(0)
+
+OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text'])
+
+OK_NOT_OK_SUBTEST = re.compile(r'^\t(ok|not ok) [0-9]+ - (.*)$')
+
+OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$')
+
+def parse_ok_not_ok_test_case(lines: List[str],
+ test_case: TestCase,
+ expecting_test_case: bool) -> bool:
+ save_non_diagnositic(lines, test_case)
+ if not lines:
+ if expecting_test_case:
+ test_case.status = TestStatus.TEST_CRASHED
+ return True
+ else:
+ return False
+ line = lines[0]
+ match = OK_NOT_OK_SUBTEST.match(line)
+ if match:
+ test_case.log.append(lines.pop(0))
+ test_case.name = match.group(2)
+ if test_case.status == TestStatus.TEST_CRASHED:
+ return True
+ if match.group(1) == 'ok':
+ test_case.status = TestStatus.SUCCESS
+ else:
+ test_case.status = TestStatus.FAILURE
+ return True
+ else:
+ return False
+
+SUBTEST_DIAGNOSTIC = re.compile(r'^\t# .*?: (.*)$')
+DIAGNOSTIC_CRASH_MESSAGE = 'kunit test case crashed!'
+
+def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
+ save_non_diagnositic(lines, test_case)
+ if not lines:
+ return False
+ line = lines[0]
+ match = SUBTEST_DIAGNOSTIC.match(line)
+ if match:
+ test_case.log.append(lines.pop(0))
+ if match.group(1) == DIAGNOSTIC_CRASH_MESSAGE:
+ test_case.status = TestStatus.TEST_CRASHED
+ return True
+ else:
+ return False
+
+def parse_test_case(lines: List[str], expecting_test_case: bool) -> TestCase:
+ test_case = TestCase()
+ save_non_diagnositic(lines, test_case)
+ while parse_diagnostic(lines, test_case):
+ pass
+ if parse_ok_not_ok_test_case(lines, test_case, expecting_test_case):
+ return test_case
+ else:
+ return None
+
+SUBTEST_HEADER = re.compile(r'^\t# Subtest: (.*)$')
+
+def parse_subtest_header(lines: List[str]) -> str:
+ consume_non_diagnositic(lines)
+ if not lines:
+ return None
+ match = SUBTEST_HEADER.match(lines[0])
+ if match:
+ lines.pop(0)
+ return match.group(1)
+ else:
+ return None
+
+SUBTEST_PLAN = re.compile(r'\t[0-9]+\.\.([0-9]+)')
+
+def parse_subtest_plan(lines: List[str]) -> int:
+ consume_non_diagnositic(lines)
+ match = SUBTEST_PLAN.match(lines[0])
+ if match:
+ lines.pop(0)
+ return int(match.group(1))
+ else:
+ return None
+
+def max_status(left: TestStatus, right: TestStatus) -> TestStatus:
+ if left == TestStatus.TEST_CRASHED or right == TestStatus.TEST_CRASHED:
+ return TestStatus.TEST_CRASHED
+ elif left == TestStatus.FAILURE or right == TestStatus.FAILURE:
+ return TestStatus.FAILURE
+ elif left != TestStatus.SUCCESS:
+ return left
+ elif right != TestStatus.SUCCESS:
+ return right
+ else:
+ return TestStatus.SUCCESS
+
+def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool:
+ consume_non_diagnositic(lines)
+ if not lines:
+ test_suite.status = TestStatus.TEST_CRASHED
+ return False
+ line = lines[0]
+ match = OK_NOT_OK_MODULE.match(line)
+ if match:
+ lines.pop(0)
+ if match.group(1) == 'ok':
+ test_suite.status = TestStatus.SUCCESS
+ else:
+ test_suite.status = TestStatus.FAILURE
+ return True
+ else:
+ return False
+
+def bubble_up_errors(to_status, status_container_list) -> TestStatus:
+ status_list = map(to_status, status_container_list)
+ return reduce(max_status, status_list, TestStatus.SUCCESS)
+
+def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus:
+ max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases)
+ return max_status(max_test_case_status, test_suite.status)
+
+def parse_test_suite(lines: List[str]) -> TestSuite:
+ if not lines:
+ return None
+ consume_non_diagnositic(lines)
+ test_suite = TestSuite()
+ test_suite.status = TestStatus.SUCCESS
+ name = parse_subtest_header(lines)
+ if not name:
+ return None
+ test_suite.name = name
+ expected_test_case_num = parse_subtest_plan(lines)
+ if not expected_test_case_num:
+ return None
+ test_case = parse_test_case(lines, expected_test_case_num > 0)
+ expected_test_case_num -= 1
+ while test_case:
+ test_suite.cases.append(test_case)
+ test_case = parse_test_case(lines, expected_test_case_num > 0)
+ expected_test_case_num -= 1
+ if parse_ok_not_ok_test_suite(lines, test_suite):
+ test_suite.status = bubble_up_test_case_errors(test_suite)
+ return test_suite
+ elif not lines:
+ print_with_timestamp(red('[ERROR] ') + 'ran out of lines before end token')
+ return test_suite
+ else:
+ print('failed to parse end of suite' + lines[0])
+ return None
+
+TAP_HEADER = re.compile(r'^TAP version 14$')
+
+def parse_tap_header(lines: List[str]) -> bool:
+ consume_non_diagnositic(lines)
+ if TAP_HEADER.match(lines[0]):
+ lines.pop(0)
+ return True
+ else:
+ return False
+
+def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus:
+ return bubble_up_errors(lambda x: x.status, test_suite_list)
+
+def parse_test_result(lines: List[str]) -> TestResult:
+ if not lines:
+ return TestResult(TestStatus.NO_TESTS, [], lines)
+ consume_non_diagnositic(lines)
+ if not parse_tap_header(lines):
+ return None
+ test_suites = []
+ test_suite = parse_test_suite(lines)
+ while test_suite:
+ test_suites.append(test_suite)
+ test_suite = parse_test_suite(lines)
+ return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines)
+
+def parse_run_tests(kernel_output) -> TestResult:
+ total_tests = 0
+ failed_tests = 0
+ crashed_tests = 0
+ test_result = parse_test_result(list(isolate_kunit_output(kernel_output)))
+ for test_suite in test_result.suites:
+ if test_suite.status == TestStatus.SUCCESS:
+ print_suite_divider(green('[PASSED] ') + test_suite.name)
+ elif test_suite.status == TestStatus.TEST_CRASHED:
+ print_suite_divider(red('[CRASHED] ' + test_suite.name))
+ else:
+ print_suite_divider(red('[FAILED] ') + test_suite.name)
+ for test_case in test_suite.cases:
+ total_tests += 1
+ if test_case.status == TestStatus.SUCCESS:
+ print_with_timestamp(green('[PASSED] ') + test_case.name)
+ elif test_case.status == TestStatus.TEST_CRASHED:
+ crashed_tests += 1
+ print_with_timestamp(red('[CRASHED] ' + test_case.name))
+ print_log(map(yellow, test_case.log))
+ print_with_timestamp('')
+ else:
+ failed_tests += 1
+ print_with_timestamp(red('[FAILED] ') + test_case.name)
+ print_log(map(yellow, test_case.log))
+ print_with_timestamp('')
+ print_with_timestamp(DIVIDER)
+ fmt = green if test_result.status == TestStatus.SUCCESS else red
+ print_with_timestamp(
+ fmt('Testing complete. %d tests run. %d failed. %d crashed.' %
+ (total_tests, failed_tests, crashed_tests)))
+ return test_result
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
new file mode 100755
index 000000000000..4a12baa0cd4e
--- /dev/null
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -0,0 +1,206 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# A collection of tests for tools/testing/kunit/kunit.py
+#
+# Copyright (C) 2019, Google LLC.
+# Author: Brendan Higgins <brendanhiggins@google.com>
+
+import unittest
+from unittest import mock
+
+import tempfile, shutil # Handling test_tmpdir
+
+import os
+
+import kunit_config
+import kunit_parser
+import kunit_kernel
+import kunit
+
+test_tmpdir = ''
+
+def setUpModule():
+ global test_tmpdir
+ test_tmpdir = tempfile.mkdtemp()
+
+def tearDownModule():
+ shutil.rmtree(test_tmpdir)
+
+def get_absolute_path(path):
+ return os.path.join(os.path.dirname(__file__), path)
+
+class KconfigTest(unittest.TestCase):
+
+ def test_is_subset_of(self):
+ kconfig0 = kunit_config.Kconfig()
+ self.assertTrue(kconfig0.is_subset_of(kconfig0))
+
+ kconfig1 = kunit_config.Kconfig()
+ kconfig1.add_entry(kunit_config.KconfigEntry('CONFIG_TEST=y'))
+ self.assertTrue(kconfig1.is_subset_of(kconfig1))
+ self.assertTrue(kconfig0.is_subset_of(kconfig1))
+ self.assertFalse(kconfig1.is_subset_of(kconfig0))
+
+ def test_read_from_file(self):
+ kconfig = kunit_config.Kconfig()
+ kconfig_path = get_absolute_path(
+ 'test_data/test_read_from_file.kconfig')
+
+ kconfig.read_from_file(kconfig_path)
+
+ expected_kconfig = kunit_config.Kconfig()
+ expected_kconfig.add_entry(
+ kunit_config.KconfigEntry('CONFIG_UML=y'))
+ expected_kconfig.add_entry(
+ kunit_config.KconfigEntry('CONFIG_MMU=y'))
+ expected_kconfig.add_entry(
+ kunit_config.KconfigEntry('CONFIG_TEST=y'))
+ expected_kconfig.add_entry(
+ kunit_config.KconfigEntry('CONFIG_EXAMPLE_TEST=y'))
+ expected_kconfig.add_entry(
+ kunit_config.KconfigEntry('# CONFIG_MK8 is not set'))
+
+ self.assertEqual(kconfig.entries(), expected_kconfig.entries())
+
+ def test_write_to_file(self):
+ kconfig_path = os.path.join(test_tmpdir, '.config')
+
+ expected_kconfig = kunit_config.Kconfig()
+ expected_kconfig.add_entry(
+ kunit_config.KconfigEntry('CONFIG_UML=y'))
+ expected_kconfig.add_entry(
+ kunit_config.KconfigEntry('CONFIG_MMU=y'))
+ expected_kconfig.add_entry(
+ kunit_config.KconfigEntry('CONFIG_TEST=y'))
+ expected_kconfig.add_entry(
+ kunit_config.KconfigEntry('CONFIG_EXAMPLE_TEST=y'))
+ expected_kconfig.add_entry(
+ kunit_config.KconfigEntry('# CONFIG_MK8 is not set'))
+
+ expected_kconfig.write_to_file(kconfig_path)
+
+ actual_kconfig = kunit_config.Kconfig()
+ actual_kconfig.read_from_file(kconfig_path)
+
+ self.assertEqual(actual_kconfig.entries(),
+ expected_kconfig.entries())
+
+class KUnitParserTest(unittest.TestCase):
+
+ def assertContains(self, needle, haystack):
+ for line in haystack:
+ if needle in line:
+ return
+ raise AssertionError('"' +
+ str(needle) + '" not found in "' + str(haystack) + '"!')
+
+ def test_output_isolated_correctly(self):
+ log_path = get_absolute_path(
+ 'test_data/test_output_isolated_correctly.log')
+ file = open(log_path)
+ result = kunit_parser.isolate_kunit_output(file.readlines())
+ self.assertContains('TAP version 14\n', result)
+ self.assertContains(' # Subtest: example', result)
+ self.assertContains(' 1..2', result)
+ self.assertContains(' ok 1 - example_simple_test', result)
+ self.assertContains(' ok 2 - example_mock_test', result)
+ self.assertContains('ok 1 - example', result)
+ file.close()
+
+ def test_parse_successful_test_log(self):
+ all_passed_log = get_absolute_path(
+ 'test_data/test_is_test_passed-all_passed.log')
+ file = open(all_passed_log)
+ result = kunit_parser.parse_run_tests(file.readlines())
+ self.assertEqual(
+ kunit_parser.TestStatus.SUCCESS,
+ result.status)
+ file.close()
+
+ def test_parse_failed_test_log(self):
+ failed_log = get_absolute_path(
+ 'test_data/test_is_test_passed-failure.log')
+ file = open(failed_log)
+ result = kunit_parser.parse_run_tests(file.readlines())
+ self.assertEqual(
+ kunit_parser.TestStatus.FAILURE,
+ result.status)
+ file.close()
+
+ def test_no_tests(self):
+ empty_log = get_absolute_path(
+ 'test_data/test_is_test_passed-no_tests_run.log')
+ file = open(empty_log)
+ result = kunit_parser.parse_run_tests(
+ kunit_parser.isolate_kunit_output(file.readlines()))
+ self.assertEqual(0, len(result.suites))
+ self.assertEqual(
+ kunit_parser.TestStatus.NO_TESTS,
+ result.status)
+ file.close()
+
+ def test_crashed_test(self):
+ crashed_log = get_absolute_path(
+ 'test_data/test_is_test_passed-crash.log')
+ file = open(crashed_log)
+ result = kunit_parser.parse_run_tests(file.readlines())
+ self.assertEqual(
+ kunit_parser.TestStatus.TEST_CRASHED,
+ result.status)
+ file.close()
+
+class StrContains(str):
+ def __eq__(self, other):
+ return self in other
+
+class KUnitMainTest(unittest.TestCase):
+ def setUp(self):
+ path = get_absolute_path('test_data/test_is_test_passed-all_passed.log')
+ file = open(path)
+ all_passed_log = file.readlines()
+ self.print_patch = mock.patch('builtins.print')
+ self.print_mock = self.print_patch.start()
+ self.linux_source_mock = mock.Mock()
+ self.linux_source_mock.build_reconfig = mock.Mock(return_value=True)
+ self.linux_source_mock.build_um_kernel = mock.Mock(return_value=True)
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=all_passed_log)
+
+ def tearDown(self):
+ self.print_patch.stop()
+ pass
+
+ def test_run_passes_args_pass(self):
+ kunit.main(['run'], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 1
+ assert self.linux_source_mock.run_kernel.call_count == 1
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
+ def test_run_passes_args_fail(self):
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ with self.assertRaises(SystemExit) as e:
+ kunit.main(['run'], self.linux_source_mock)
+ assert type(e.exception) == SystemExit
+ assert e.exception.code == 1
+ assert self.linux_source_mock.build_reconfig.call_count == 1
+ assert self.linux_source_mock.run_kernel.call_count == 1
+ self.print_mock.assert_any_call(StrContains(' 0 tests run'))
+
+ def test_run_raw_output(self):
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ kunit.main(['run', '--raw_output'], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 1
+ assert self.linux_source_mock.run_kernel.call_count == 1
+ for kall in self.print_mock.call_args_list:
+ assert kall != mock.call(StrContains('Testing complete.'))
+ assert kall != mock.call(StrContains(' 0 tests run'))
+
+ def test_run_timeout(self):
+ timeout = 3453
+ kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 1
+ self.linux_source_mock.run_kernel.assert_called_once_with(timeout=timeout)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
new file mode 100644
index 000000000000..62ebc0288355
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
@@ -0,0 +1,32 @@
+TAP version 14
+ # Subtest: sysctl_test
+ 1..8
+ # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
+ ok 1 - sysctl_test_dointvec_null_tbl_data
+ # sysctl_test_dointvec_table_maxlen_unset: sysctl_test_dointvec_table_maxlen_unset passed
+ ok 2 - sysctl_test_dointvec_table_maxlen_unset
+ # sysctl_test_dointvec_table_len_is_zero: sysctl_test_dointvec_table_len_is_zero passed
+ ok 3 - sysctl_test_dointvec_table_len_is_zero
+ # sysctl_test_dointvec_table_read_but_position_set: sysctl_test_dointvec_table_read_but_position_set passed
+ ok 4 - sysctl_test_dointvec_table_read_but_position_set
+ # sysctl_test_dointvec_happy_single_positive: sysctl_test_dointvec_happy_single_positive passed
+ ok 5 - sysctl_test_dointvec_happy_single_positive
+ # sysctl_test_dointvec_happy_single_negative: sysctl_test_dointvec_happy_single_negative passed
+ ok 6 - sysctl_test_dointvec_happy_single_negative
+ # sysctl_test_dointvec_single_less_int_min: sysctl_test_dointvec_single_less_int_min passed
+ ok 7 - sysctl_test_dointvec_single_less_int_min
+ # sysctl_test_dointvec_single_greater_int_max: sysctl_test_dointvec_single_greater_int_max passed
+ ok 8 - sysctl_test_dointvec_single_greater_int_max
+kunit sysctl_test: all tests passed
+ok 1 - sysctl_test
+ # Subtest: example
+ 1..2
+init_suite
+ # example_simple_test: initializing
+ # example_simple_test: example_simple_test passed
+ ok 1 - example_simple_test
+ # example_mock_test: initializing
+ # example_mock_test: example_mock_test passed
+ ok 2 - example_mock_test
+kunit example: all tests passed
+ok 2 - example
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
new file mode 100644
index 000000000000..0b249870c8be
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
@@ -0,0 +1,69 @@
+printk: console [tty0] enabled
+printk: console [mc-1] enabled
+TAP version 14
+ # Subtest: sysctl_test
+ 1..8
+ # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
+ ok 1 - sysctl_test_dointvec_null_tbl_data
+ # sysctl_test_dointvec_table_maxlen_unset: sysctl_test_dointvec_table_maxlen_unset passed
+ ok 2 - sysctl_test_dointvec_table_maxlen_unset
+ # sysctl_test_dointvec_table_len_is_zero: sysctl_test_dointvec_table_len_is_zero passed
+ ok 3 - sysctl_test_dointvec_table_len_is_zero
+ # sysctl_test_dointvec_table_read_but_position_set: sysctl_test_dointvec_table_read_but_position_set passed
+ ok 4 - sysctl_test_dointvec_table_read_but_position_set
+ # sysctl_test_dointvec_happy_single_positive: sysctl_test_dointvec_happy_single_positive passed
+ ok 5 - sysctl_test_dointvec_happy_single_positive
+ # sysctl_test_dointvec_happy_single_negative: sysctl_test_dointvec_happy_single_negative passed
+ ok 6 - sysctl_test_dointvec_happy_single_negative
+ # sysctl_test_dointvec_single_less_int_min: sysctl_test_dointvec_single_less_int_min passed
+ ok 7 - sysctl_test_dointvec_single_less_int_min
+ # sysctl_test_dointvec_single_greater_int_max: sysctl_test_dointvec_single_greater_int_max passed
+ ok 8 - sysctl_test_dointvec_single_greater_int_max
+kunit sysctl_test: all tests passed
+ok 1 - sysctl_test
+ # Subtest: example
+ 1..2
+init_suite
+ # example_simple_test: initializing
+Stack:
+ 6016f7db 6f81bd30 6f81bdd0 60021450
+ 6024b0e8 60021440 60018bbe 16f81bdc0
+ 00000001 6f81bd30 6f81bd20 6f81bdd0
+Call Trace:
+ [<6016f7db>] ? kunit_try_run_case+0xab/0xf0
+ [<60021450>] ? set_signals+0x0/0x60
+ [<60021440>] ? get_signals+0x0/0x10
+ [<60018bbe>] ? kunit_um_run_try_catch+0x5e/0xc0
+ [<60021450>] ? set_signals+0x0/0x60
+ [<60021440>] ? get_signals+0x0/0x10
+ [<60018bb3>] ? kunit_um_run_try_catch+0x53/0xc0
+ [<6016f321>] ? kunit_run_case_catch_errors+0x121/0x1a0
+ [<60018b60>] ? kunit_um_run_try_catch+0x0/0xc0
+ [<600189e0>] ? kunit_um_throw+0x0/0x180
+ [<6016f730>] ? kunit_try_run_case+0x0/0xf0
+ [<6016f600>] ? kunit_catch_run_case+0x0/0x130
+ [<6016edd0>] ? kunit_vprintk+0x0/0x30
+ [<6016ece0>] ? kunit_fail+0x0/0x40
+ [<6016eca0>] ? kunit_abort+0x0/0x40
+ [<6016ed20>] ? kunit_printk_emit+0x0/0xb0
+ [<6016f200>] ? kunit_run_case_catch_errors+0x0/0x1a0
+ [<6016f46e>] ? kunit_run_tests+0xce/0x260
+ [<6005b390>] ? unregister_console+0x0/0x190
+ [<60175b70>] ? suite_kunit_initexample_test_suite+0x0/0x20
+ [<60001cbb>] ? do_one_initcall+0x0/0x197
+ [<60001d47>] ? do_one_initcall+0x8c/0x197
+ [<6005cd20>] ? irq_to_desc+0x0/0x30
+ [<60002005>] ? kernel_init_freeable+0x1b3/0x272
+ [<6005c5ec>] ? printk+0x0/0x9b
+ [<601c0086>] ? kernel_init+0x26/0x160
+ [<60014442>] ? new_thread_handler+0x82/0xc0
+
+ # example_simple_test: kunit test case crashed!
+ # example_simple_test: example_simple_test failed
+ not ok 1 - example_simple_test
+ # example_mock_test: initializing
+ # example_mock_test: example_mock_test passed
+ ok 2 - example_mock_test
+kunit example: one or more tests failed
+not ok 2 - example
+List of all partitions:
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-failure.log b/tools/testing/kunit/test_data/test_is_test_passed-failure.log
new file mode 100644
index 000000000000..9e89d32d5667
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-failure.log
@@ -0,0 +1,36 @@
+TAP version 14
+ # Subtest: sysctl_test
+ 1..8
+ # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
+ ok 1 - sysctl_test_dointvec_null_tbl_data
+ # sysctl_test_dointvec_table_maxlen_unset: sysctl_test_dointvec_table_maxlen_unset passed
+ ok 2 - sysctl_test_dointvec_table_maxlen_unset
+ # sysctl_test_dointvec_table_len_is_zero: sysctl_test_dointvec_table_len_is_zero passed
+ ok 3 - sysctl_test_dointvec_table_len_is_zero
+ # sysctl_test_dointvec_table_read_but_position_set: sysctl_test_dointvec_table_read_but_position_set passed
+ ok 4 - sysctl_test_dointvec_table_read_but_position_set
+ # sysctl_test_dointvec_happy_single_positive: sysctl_test_dointvec_happy_single_positive passed
+ ok 5 - sysctl_test_dointvec_happy_single_positive
+ # sysctl_test_dointvec_happy_single_negative: sysctl_test_dointvec_happy_single_negative passed
+ ok 6 - sysctl_test_dointvec_happy_single_negative
+ # sysctl_test_dointvec_single_less_int_min: sysctl_test_dointvec_single_less_int_min passed
+ ok 7 - sysctl_test_dointvec_single_less_int_min
+ # sysctl_test_dointvec_single_greater_int_max: sysctl_test_dointvec_single_greater_int_max passed
+ ok 8 - sysctl_test_dointvec_single_greater_int_max
+kunit sysctl_test: all tests passed
+ok 1 - sysctl_test
+ # Subtest: example
+ 1..2
+init_suite
+ # example_simple_test: initializing
+ # example_simple_test: EXPECTATION FAILED at lib/kunit/example-test.c:30
+ Expected 1 + 1 == 3, but
+ 1 + 1 == 2
+ 3 == 3
+ # example_simple_test: example_simple_test failed
+ not ok 1 - example_simple_test
+ # example_mock_test: initializing
+ # example_mock_test: example_mock_test passed
+ ok 2 - example_mock_test
+kunit example: one or more tests failed
+not ok 2 - example
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log
new file mode 100644
index 000000000000..ba69f5c94b75
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log
@@ -0,0 +1,75 @@
+Core dump limits :
+ soft - 0
+ hard - NONE
+Checking environment variables for a tempdir...none found
+Checking if /dev/shm is on tmpfs...OK
+Checking PROT_EXEC mmap in /dev/shm...OK
+Adding 24743936 bytes to physical memory to account for exec-shield gap
+Linux version 4.12.0-rc3-00010-g7319eb35f493-dirty (brendanhiggins@mactruck.svl.corp.google.com) (gcc version 7.3.0 (Debian 7.3.0-5) ) #29 Thu Mar 15 14:57:19 PDT 2018
+Built 1 zonelists in Zone order, mobility grouping on. Total pages: 14038
+Kernel command line: root=98:0
+PID hash table entries: 256 (order: -1, 2048 bytes)
+Dentry cache hash table entries: 8192 (order: 4, 65536 bytes)
+Inode-cache hash table entries: 4096 (order: 3, 32768 bytes)
+Memory: 27868K/56932K available (1681K kernel code, 480K rwdata, 400K rodata, 89K init, 205K bss, 29064K reserved, 0K cma-reserved)
+SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
+NR_IRQS:15
+clocksource: timer: mask: 0xffffffffffffffff max_cycles: 0x1cd42e205, max_idle_ns: 881590404426 ns
+Calibrating delay loop... 7384.26 BogoMIPS (lpj=36921344)
+pid_max: default: 32768 minimum: 301
+Mount-cache hash table entries: 512 (order: 0, 4096 bytes)
+Mountpoint-cache hash table entries: 512 (order: 0, 4096 bytes)
+Checking that host ptys support output SIGIO...Yes
+Checking that host ptys support SIGIO on close...No, enabling workaround
+Using 2.6 host AIO
+clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 19112604462750000 ns
+futex hash table entries: 256 (order: 0, 6144 bytes)
+clocksource: Switched to clocksource timer
+console [stderr0] disabled
+mconsole (version 2) initialized on /usr/local/google/home/brendanhiggins/.uml/6Ijecl/mconsole
+Checking host MADV_REMOVE support...OK
+workingset: timestamp_bits=62 max_order=13 bucket_order=0
+Block layer SCSI generic (bsg) driver version 0.4 loaded (major 254)
+io scheduler noop registered
+io scheduler deadline registered
+io scheduler cfq registered (default)
+io scheduler mq-deadline registered
+io scheduler kyber registered
+Initialized stdio console driver
+Using a channel type which is configured out of UML
+setup_one_line failed for device 1 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 2 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 3 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 4 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 5 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 6 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 7 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 8 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 9 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 10 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 11 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 12 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 13 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 14 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 15 : Configuration failed
+Console initialized on /dev/tty0
+console [tty0] enabled
+console [mc-1] enabled
+List of all partitions:
+No filesystem could mount root, tried:
+
+Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
diff --git a/tools/testing/kunit/test_data/test_output_isolated_correctly.log b/tools/testing/kunit/test_data/test_output_isolated_correctly.log
new file mode 100644
index 000000000000..94a6b3aeaa92
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_output_isolated_correctly.log
@@ -0,0 +1,106 @@
+Linux version 5.1.0-rc7-00061-g04652f1cb4aa0 (brendanhiggins@mactruck.svl.corp.google.com) (gcc version 7.3.0 (Debian 7.3.0-18)) #163 Wed May 8 16:18:20 PDT 2019
+Built 1 zonelists, mobility grouping on. Total pages: 69906
+Kernel command line: mem=256M root=98:0
+Dentry cache hash table entries: 65536 (order: 7, 524288 bytes)
+Inode-cache hash table entries: 32768 (order: 6, 262144 bytes)
+Memory: 254468K/283500K available (1734K kernel code, 489K rwdata, 396K rodata, 85K init, 216K bss, 29032K reserved, 0K cma-reserved)
+SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=1, Nodes=1
+NR_IRQS: 15
+clocksource: timer: mask: 0xffffffffffffffff max_cycles: 0x1cd42e205, max_idle_ns: 881590404426 ns
+------------[ cut here ]------------
+WARNING: CPU: 0 PID: 0 at kernel/time/clockevents.c:458 clockevents_register_device+0x143/0x160
+posix-timer cpumask == cpu_all_mask, using cpu_possible_mask instead
+CPU: 0 PID: 0 Comm: swapper Not tainted 5.1.0-rc7-00061-g04652f1cb4aa0 #163
+Stack:
+ 6005cc00 60233e18 60233e60 60233e18
+ 60233e60 00000009 00000000 6002a1b4
+ 1ca00000000 60071c23 60233e78 100000000000062
+Call Trace:
+ [<600214c5>] ? os_is_signal_stack+0x15/0x30
+ [<6005c5ec>] ? printk+0x0/0x9b
+ [<6001597e>] ? show_stack+0xbe/0x1c0
+ [<6005cc00>] ? __printk_safe_exit+0x0/0x40
+ [<6002a1b4>] ? __warn+0x144/0x170
+ [<60071c23>] ? clockevents_register_device+0x143/0x160
+ [<60021440>] ? get_signals+0x0/0x10
+ [<6005c5ec>] ? printk+0x0/0x9b
+ [<6002a27b>] ? warn_slowpath_fmt+0x9b/0xb0
+ [<6005c5ec>] ? printk+0x0/0x9b
+ [<6002a1e0>] ? warn_slowpath_fmt+0x0/0xb0
+ [<6005c5ec>] ? printk+0x0/0x9b
+ [<60021440>] ? get_signals+0x0/0x10
+ [<600213f0>] ? block_signals+0x0/0x20
+ [<60071c23>] ? clockevents_register_device+0x143/0x160
+ [<60021440>] ? get_signals+0x0/0x10
+ [<600213f0>] ? block_signals+0x0/0x20
+ [<6005c5ec>] ? printk+0x0/0x9b
+ [<60001bc8>] ? start_kernel+0x477/0x56a
+ [<600036f1>] ? start_kernel_proc+0x46/0x4d
+ [<60014442>] ? new_thread_handler+0x82/0xc0
+
+random: get_random_bytes called from print_oops_end_marker+0x4c/0x60 with crng_init=0
+---[ end trace c83434852b3702d3 ]---
+Calibrating delay loop... 6958.28 BogoMIPS (lpj=34791424)
+pid_max: default: 32768 minimum: 301
+Mount-cache hash table entries: 1024 (order: 1, 8192 bytes)
+Mountpoint-cache hash table entries: 1024 (order: 1, 8192 bytes)
+*** VALIDATE proc ***
+Checking that host ptys support output SIGIO...Yes
+Checking that host ptys support SIGIO on close...No, enabling workaround
+clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 19112604462750000 ns
+futex hash table entries: 256 (order: 0, 6144 bytes)
+clocksource: Switched to clocksource timer
+printk: console [stderr0] disabled
+mconsole (version 2) initialized on /usr/local/google/home/brendanhiggins/.uml/VZ2qMm/mconsole
+Checking host MADV_REMOVE support...OK
+workingset: timestamp_bits=62 max_order=16 bucket_order=0
+Block layer SCSI generic (bsg) driver version 0.4 loaded (major 254)
+io scheduler mq-deadline registered
+io scheduler kyber registered
+Initialized stdio console driver
+Using a channel type which is configured out of UML
+setup_one_line failed for device 1 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 2 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 3 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 4 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 5 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 6 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 7 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 8 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 9 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 10 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 11 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 12 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 13 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 14 : Configuration failed
+Using a channel type which is configured out of UML
+setup_one_line failed for device 15 : Configuration failed
+Console initialized on /dev/tty0
+printk: console [tty0] enabled
+printk: console [mc-1] enabled
+TAP version 14
+ # Subtest: example
+ 1..2
+init_suite
+ # example_simple_test: initializing
+ # example_simple_test: example_simple_test passed
+ ok 1 - example_simple_test
+ # example_mock_test: initializing
+ # example_mock_test: example_mock_test passed
+ ok 2 - example_mock_test
+kunit example: all tests passed
+ok 1 - example
+List of all partitions:
diff --git a/tools/testing/kunit/test_data/test_read_from_file.kconfig b/tools/testing/kunit/test_data/test_read_from_file.kconfig
new file mode 100644
index 000000000000..d2a4928ac773
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_read_from_file.kconfig
@@ -0,0 +1,17 @@
+#
+# Automatically generated file; DO NOT EDIT.
+# User Mode Linux/x86 4.12.0-rc3 Kernel Configuration
+#
+CONFIG_UML=y
+CONFIG_MMU=y
+
+#
+# UML-specific options
+#
+
+#
+# Host processor type and features
+#
+# CONFIG_MK8 is not set
+CONFIG_TEST=y
+CONFIG_EXAMPLE_TEST=y
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c3feccb99ff5..0b6b81ba4421 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
TARGETS = android
+TARGETS += arm64
TARGETS += bpf
TARGETS += breakpoints
TARGETS += capabilities
@@ -63,6 +64,13 @@ TARGETS += zram
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
+# User can optionally provide a TARGETS skiplist.
+SKIP_TARGETS ?=
+ifneq ($(SKIP_TARGETS),)
+ TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS))
+ override TARGETS := $(TMP)
+endif
+
# Clear LDFLAGS and MAKEFLAGS if called from main
# Makefile to avoid test build failures when test
# Makefile doesn't have explicit build rules.
@@ -79,10 +87,10 @@ override LDFLAGS =
endif
ifneq ($(O),)
- BUILD := $(O)
+ BUILD := $(abs_objtree)
else
ifneq ($(KBUILD_OUTPUT),)
- BUILD := $(KBUILD_OUTPUT)/kselftest
+ BUILD := $(abs_objtree)/kselftest
else
BUILD := $(shell pwd)
DEFAULT_INSTALL_HDR_PATH := 1
@@ -95,6 +103,7 @@ include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
export KSFT_KHDR_INSTALL_DONE := 1
export BUILD
+#$(info abd_objtree = $(abs_objtree) BUILD = $(BUILD))
# build and run gpio when output directory is the src dir.
# gpio has dependency on tools/gpio and builds tools/gpio
@@ -171,15 +180,19 @@ run_pstore_crash:
# 1. output_dir=kernel_src
# 2. a separate output directory is specified using O= KBUILD_OUTPUT
# 3. a separate output directory is specified using KBUILD_OUTPUT
+# Avoid conflict with INSTALL_PATH set by the main Makefile
#
-INSTALL_PATH ?= $(BUILD)/install
-INSTALL_PATH := $(abspath $(INSTALL_PATH))
+KSFT_INSTALL_PATH ?= $(BUILD)/kselftest_install
+KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH))
+# Avoid changing the rest of the logic here and lib.mk.
+INSTALL_PATH := $(KSFT_INSTALL_PATH)
ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
install: all
ifdef INSTALL_PATH
@# Ask all targets to install their files
mkdir -p $(INSTALL_PATH)/kselftest
+ install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
@for TARGET in $(TARGETS); do \
@@ -198,11 +211,16 @@ ifdef INSTALL_PATH
echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT)
echo "fi" >> $(ALL_SCRIPT)
+ @# While building run_kselftest.sh skip also non-existent TARGET dirs:
+ @# they could be the result of a build failure and should NOT be
+ @# included in the generated runlist.
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
+ [ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \
echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
echo "cd $$TARGET" >> $(ALL_SCRIPT); \
echo -n "run_many" >> $(ALL_SCRIPT); \
+ echo -n "Emit Tests for $$TARGET\n"; \
$(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
echo "" >> $(ALL_SCRIPT); \
echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index f9f79fb272f0..93b567d23c8b 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -1,12 +1,66 @@
# SPDX-License-Identifier: GPL-2.0
-# ARCH can be overridden by the user for cross compiling
+# When ARCH not overridden for crosscompiling, lookup machine
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),aarch64 arm64))
-CFLAGS += -I../../../../usr/include/
-TEST_GEN_PROGS := tags_test
-TEST_PROGS := run_tags_test.sh
+ARM64_SUBTARGETS ?= tags signal
+else
+ARM64_SUBTARGETS :=
endif
-include ../lib.mk
+CFLAGS := -Wall -O2 -g
+
+# A proper top_srcdir is needed by KSFT(lib.mk)
+top_srcdir = $(realpath ../../../../)
+
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
+
+# Guessing where the Kernel headers could have been installed
+# depending on ENV config
+ifeq ($(KBUILD_OUTPUT),)
+khdr_dir = $(top_srcdir)/usr/include
+else
+# the KSFT preferred location when KBUILD_OUTPUT is set
+khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include
+endif
+
+CFLAGS += -I$(khdr_dir)
+
+export CFLAGS
+export top_srcdir
+
+all:
+ @for DIR in $(ARM64_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir -p $$BUILD_TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+install: all
+ @for DIR in $(ARM64_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+run_tests: all
+ @for DIR in $(ARM64_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+# Avoid any output on non arm64 on emit_tests
+emit_tests: all
+ @for DIR in $(ARM64_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+clean:
+ @for DIR in $(ARM64_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+.PHONY: all clean install run_tests emit_tests
diff --git a/tools/testing/selftests/arm64/README b/tools/testing/selftests/arm64/README
new file mode 100644
index 000000000000..a1badd882102
--- /dev/null
+++ b/tools/testing/selftests/arm64/README
@@ -0,0 +1,25 @@
+KSelfTest ARM64
+===============
+
+- These tests are arm64 specific and so not built or run but just skipped
+ completely when env-variable ARCH is found to be different than 'arm64'
+ and `uname -m` reports other than 'aarch64'.
+
+- Holding true the above, ARM64 KSFT tests can be run within the KSelfTest
+ framework using standard Linux top-level-makefile targets:
+
+ $ make TARGETS=arm64 kselftest-clean
+ $ make TARGETS=arm64 kselftest
+
+ or
+
+ $ make -C tools/testing/selftests TARGETS=arm64 \
+ INSTALL_PATH=<your-installation-path> install
+
+ or, alternatively, only specific arm64/ subtargets can be picked:
+
+ $ make -C tools/testing/selftests TARGETS=arm64 ARM64_SUBTARGETS="tags signal" \
+ INSTALL_PATH=<your-installation-path> install
+
+ Further details on building and running KFST can be found in:
+ Documentation/dev-tools/kselftest.rst
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
new file mode 100644
index 000000000000..3c5b4e8ff894
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -0,0 +1,3 @@
+mangle_*
+fake_sigreturn_*
+!*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile
new file mode 100644
index 000000000000..b497cfea4643
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2019 ARM Limited
+
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
+
+SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c))
+PROGS := $(patsubst %.c,%,$(SRCS))
+
+# Generated binaries to be installed by top KSFT script
+TEST_GEN_PROGS := $(notdir $(PROGS))
+
+# Get Kernel headers installed and use them.
+KSFT_KHDR_INSTALL := 1
+
+# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
+# to account for any OUTPUT target-dirs optionally provided by
+# the toplevel makefile
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): $(PROGS)
+ cp $(PROGS) $(OUTPUT)/
+
+clean:
+ $(CLEAN)
+ rm -f $(PROGS)
+
+# Common test-unit targets to build common-layout test-cases executables
+# Needs secondary expansion to properly include the testcase c-file in pre-reqs
+.SECONDEXPANSION:
+$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c signals.S $$@.c test_signals.h test_signals_utils.h testcases/testcases.h
+ $(CC) $(CFLAGS) $^ -o $@
diff --git a/tools/testing/selftests/arm64/signal/README b/tools/testing/selftests/arm64/signal/README
new file mode 100644
index 000000000000..967a531b245c
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/README
@@ -0,0 +1,59 @@
+KSelfTest arm64/signal/
+=======================
+
+Signals Tests
++++++++++++++
+
+- Tests are built around a common main compilation unit: such shared main
+ enforces a standard sequence of operations needed to perform a single
+ signal-test (setup/trigger/run/result/cleanup)
+
+- The above mentioned ops are configurable on a test-by-test basis: each test
+ is described (and configured) using the descriptor signals.h::struct tdescr
+
+- Each signal testcase is compiled into its own executable: a separate
+ executable is used for each test since many tests complete successfully
+ by receiving some kind of fatal signal from the Kernel, so it's safer
+ to run each test unit in its own standalone process, so as to start each
+ test from a clean slate.
+
+- New tests can be simply defined in testcases/ dir providing a proper struct
+ tdescr overriding all the defaults we wish to change (as of now providing a
+ custom run method is mandatory though)
+
+- Signals' test-cases hereafter defined belong currently to two
+ principal families:
+
+ - 'mangle_' tests: a real signal (SIGUSR1) is raised and used as a trigger
+ and then the test case code modifies the signal frame from inside the
+ signal handler itself.
+
+ - 'fake_sigreturn_' tests: a brand new custom artificial sigframe structure
+ is placed on the stack and a sigreturn syscall is called to simulate a
+ real signal return. This kind of tests does not use a trigger usually and
+ they are just fired using some simple included assembly trampoline code.
+
+ - Most of these tests are successfully passing if the process gets killed by
+ some fatal signal: usually SIGSEGV or SIGBUS. Since while writing this
+ kind of tests it is extremely easy in fact to end-up injecting other
+ unrelated SEGV bugs in the testcases, it becomes extremely tricky to
+ be really sure that the tests are really addressing what they are meant
+ to address and they are not instead falling apart due to unplanned bugs
+ in the test code.
+ In order to alleviate the misery of the life of such test-developer, a few
+ helpers are provided:
+
+ - a couple of ASSERT_BAD/GOOD_CONTEXT() macros to easily parse a ucontext_t
+ and verify if it is indeed GOOD or BAD (depending on what we were
+ expecting), using the same logic/perspective as in the arm64 Kernel signals
+ routines.
+
+ - a sanity mechanism to be used in 'fake_sigreturn_'-alike tests: enabled by
+ default it takes care to verify that the test-execution had at least
+ successfully progressed up to the stage of triggering the fake sigreturn
+ call.
+
+ In both cases test results are expected in terms of:
+ - some fatal signal sent by the Kernel to the test process
+ or
+ - analyzing some final regs state
diff --git a/tools/testing/selftests/arm64/signal/signals.S b/tools/testing/selftests/arm64/signal/signals.S
new file mode 100644
index 000000000000..9f8c1aefc3b9
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/signals.S
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 ARM Limited */
+
+#include <asm/unistd.h>
+
+.section .rodata, "a"
+call_fmt:
+ .asciz "Calling sigreturn with fake sigframe sized:%zd at SP @%08lX\n"
+
+.text
+
+.globl fake_sigreturn
+
+/* fake_sigreturn x0:&sigframe, x1:sigframe_size, x2:misalign_bytes */
+fake_sigreturn:
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ mov x20, x0
+ mov x21, x1
+ mov x22, x2
+
+ /* create space on the stack for fake sigframe 16 bytes-aligned */
+ add x0, x21, x22
+ add x0, x0, #15
+ bic x0, x0, #15 /* round_up(sigframe_size + misalign_bytes, 16) */
+ sub sp, sp, x0
+ add x23, sp, x22 /* new sigframe base with misaligment if any */
+
+ ldr x0, =call_fmt
+ mov x1, x21
+ mov x2, x23
+ bl printf
+
+ /* memcpy the provided content, while still keeping SP aligned */
+ mov x0, x23
+ mov x1, x20
+ mov x2, x21
+ bl memcpy
+
+ /*
+ * Here saving a last minute SP to current->token acts as a marker:
+ * if we got here, we are successfully faking a sigreturn; in other
+ * words we are sure no bad fatal signal has been raised till now
+ * for unrelated reasons, so we should consider the possibly observed
+ * fatal signal like SEGV coming from Kernel restore_sigframe() and
+ * triggered as expected from our test-case.
+ * For simplicity this assumes that current field 'token' is laid out
+ * as first in struct tdescr
+ */
+ ldr x0, current
+ str x23, [x0]
+ /* finally move SP to misaligned address...if any requested */
+ mov sp, x23
+
+ mov x8, #__NR_rt_sigreturn
+ svc #0
+
+ /*
+ * Above sigreturn should not return...looping here leads to a timeout
+ * and ensure proper and clean test failure, instead of jumping around
+ * on a potentially corrupted stack.
+ */
+ b .
diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c
new file mode 100644
index 000000000000..416b1ff43199
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/test_signals.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Generic test wrapper for arm64 signal tests.
+ *
+ * Each test provides its own tde struct tdescr descriptor to link with
+ * this wrapper. Framework provides common helpers.
+ */
+#include <kselftest.h>
+
+#include "test_signals.h"
+#include "test_signals_utils.h"
+
+struct tdescr *current;
+
+int main(int argc, char *argv[])
+{
+ current = &tde;
+
+ ksft_print_msg("%s :: %s\n", current->name, current->descr);
+ if (test_setup(current) && test_init(current)) {
+ test_run(current);
+ test_cleanup(current);
+ }
+ test_result(current);
+
+ return current->result;
+}
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
new file mode 100644
index 000000000000..f96baf1cef1a
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 ARM Limited */
+
+#ifndef __TEST_SIGNALS_H__
+#define __TEST_SIGNALS_H__
+
+#include <signal.h>
+#include <stdbool.h>
+#include <ucontext.h>
+
+/*
+ * Using ARCH specific and sanitized Kernel headers installed by KSFT
+ * framework since we asked for it by setting flag KSFT_KHDR_INSTALL
+ * in our Makefile.
+ */
+#include <asm/ptrace.h>
+#include <asm/hwcap.h>
+
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#define get_regval(regname, out) \
+{ \
+ asm volatile("mrs %0, " __stringify(regname) \
+ : "=r" (out) \
+ : \
+ : "memory"); \
+}
+
+/*
+ * Feature flags used in tdescr.feats_required to specify
+ * any feature by the test
+ */
+enum {
+ FSSBS_BIT,
+ FMAX_END
+};
+
+#define FEAT_SSBS (1UL << FSSBS_BIT)
+
+/*
+ * A descriptor used to describe and configure a test case.
+ * Fields with a non-trivial meaning are described inline in the following.
+ */
+struct tdescr {
+ /* KEEP THIS FIELD FIRST for easier lookup from assembly */
+ void *token;
+ /* when disabled token based sanity checking is skipped in handler */
+ bool sanity_disabled;
+ /* just a name for the test-case; manadatory field */
+ char *name;
+ char *descr;
+ unsigned long feats_required;
+ /* bitmask of effectively supported feats: populated at run-time */
+ unsigned long feats_supported;
+ bool initialized;
+ unsigned int minsigstksz;
+ /* signum used as a test trigger. Zero if no trigger-signal is used */
+ int sig_trig;
+ /*
+ * signum considered as a successful test completion.
+ * Zero when no signal is expected on success
+ */
+ int sig_ok;
+ /* signum expected on unsupported CPU features. */
+ int sig_unsupp;
+ /* a timeout in second for test completion */
+ unsigned int timeout;
+ bool triggered;
+ bool pass;
+ unsigned int result;
+ /* optional sa_flags for the installed handler */
+ int sa_flags;
+ ucontext_t saved_uc;
+ /* used by get_current_ctx() */
+ size_t live_sz;
+ ucontext_t *live_uc;
+ volatile sig_atomic_t live_uc_valid;
+ /* optional test private data */
+ void *priv;
+
+ /* a custom setup: called alternatively to default_setup */
+ int (*setup)(struct tdescr *td);
+ /* a custom init: called by default test init after test_setup */
+ bool (*init)(struct tdescr *td);
+ /* a custom cleanup function called before test exits */
+ void (*cleanup)(struct tdescr *td);
+ /* an optional function to be used as a trigger for starting test */
+ int (*trigger)(struct tdescr *td);
+ /*
+ * the actual test-core: invoked differently depending on the
+ * presence of the trigger function above; this is mandatory
+ */
+ int (*run)(struct tdescr *td, siginfo_t *si, ucontext_t *uc);
+ /* an optional function for custom results' processing */
+ void (*check_result)(struct tdescr *td);
+};
+
+extern struct tdescr tde;
+#endif
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
new file mode 100644
index 000000000000..2de6e5ed5e25
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019 ARM Limited */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/auxv.h>
+#include <linux/auxvec.h>
+#include <ucontext.h>
+
+#include <asm/unistd.h>
+
+#include <kselftest.h>
+
+#include "test_signals.h"
+#include "test_signals_utils.h"
+#include "testcases/testcases.h"
+
+
+extern struct tdescr *current;
+
+static int sig_copyctx = SIGTRAP;
+
+static char const *const feats_names[FMAX_END] = {
+ " SSBS ",
+};
+
+#define MAX_FEATS_SZ 128
+static char feats_string[MAX_FEATS_SZ];
+
+static inline char *feats_to_string(unsigned long feats)
+{
+ size_t flen = MAX_FEATS_SZ - 1;
+
+ for (int i = 0; i < FMAX_END; i++) {
+ if (feats & (1UL << i)) {
+ size_t tlen = strlen(feats_names[i]);
+
+ assert(flen > tlen);
+ flen -= tlen;
+ strncat(feats_string, feats_names[i], flen);
+ }
+ }
+
+ return feats_string;
+}
+
+static void unblock_signal(int signum)
+{
+ sigset_t sset;
+
+ sigemptyset(&sset);
+ sigaddset(&sset, signum);
+ sigprocmask(SIG_UNBLOCK, &sset, NULL);
+}
+
+static void default_result(struct tdescr *td, bool force_exit)
+{
+ if (td->result == KSFT_SKIP) {
+ fprintf(stderr, "==>> completed. SKIP.\n");
+ } else if (td->pass) {
+ fprintf(stderr, "==>> completed. PASS(1)\n");
+ td->result = KSFT_PASS;
+ } else {
+ fprintf(stdout, "==>> completed. FAIL(0)\n");
+ td->result = KSFT_FAIL;
+ }
+
+ if (force_exit)
+ exit(td->result);
+}
+
+/*
+ * The following handle_signal_* helpers are used by main default_handler
+ * and are meant to return true when signal is handled successfully:
+ * when false is returned instead, it means that the signal was somehow
+ * unexpected in that context and it was NOT handled; default_handler will
+ * take care of such unexpected situations.
+ */
+
+static bool handle_signal_unsupported(struct tdescr *td,
+ siginfo_t *si, void *uc)
+{
+ if (feats_ok(td))
+ return false;
+
+ /* Mangling PC to avoid loops on original SIGILL */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+
+ if (!td->initialized) {
+ fprintf(stderr,
+ "Got SIG_UNSUPP @test_init. Ignore.\n");
+ } else {
+ fprintf(stderr,
+ "-- RX SIG_UNSUPP on unsupported feat...OK\n");
+ td->pass = 1;
+ default_result(current, 1);
+ }
+
+ return true;
+}
+
+static bool handle_signal_trigger(struct tdescr *td,
+ siginfo_t *si, void *uc)
+{
+ td->triggered = 1;
+ /* ->run was asserted NON-NULL in test_setup() already */
+ td->run(td, si, uc);
+
+ return true;
+}
+
+static bool handle_signal_ok(struct tdescr *td,
+ siginfo_t *si, void *uc)
+{
+ /*
+ * it's a bug in the test code when this assert fail:
+ * if sig_trig was defined, it must have been used before getting here.
+ */
+ assert(!td->sig_trig || td->triggered);
+ fprintf(stderr,
+ "SIG_OK -- SP:0x%llX si_addr@:%p si_code:%d token@:%p offset:%ld\n",
+ ((ucontext_t *)uc)->uc_mcontext.sp,
+ si->si_addr, si->si_code, td->token, td->token - si->si_addr);
+ /*
+ * fake_sigreturn tests, which have sanity_enabled=1, set, at the very
+ * last time, the token field to the SP address used to place the fake
+ * sigframe: so token==0 means we never made it to the end,
+ * segfaulting well-before, and the test is possibly broken.
+ */
+ if (!td->sanity_disabled && !td->token) {
+ fprintf(stdout,
+ "current->token ZEROED...test is probably broken!\n");
+ abort();
+ }
+ /*
+ * Trying to narrow down the SEGV to the ones generated by Kernel itself
+ * via arm64_notify_segfault(). This is a best-effort check anyway, and
+ * the si_code check may need to change if this aspect of the kernel
+ * ABI changes.
+ */
+ if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) {
+ fprintf(stdout,
+ "si_code != SEGV_ACCERR...test is probably broken!\n");
+ abort();
+ }
+ td->pass = 1;
+ /*
+ * Some tests can lead to SEGV loops: in such a case we want to
+ * terminate immediately exiting straight away; some others are not
+ * supposed to outlive the signal handler code, due to the content of
+ * the fake sigframe which caused the signal itself.
+ */
+ default_result(current, 1);
+
+ return true;
+}
+
+static bool handle_signal_copyctx(struct tdescr *td,
+ siginfo_t *si, void *uc)
+{
+ /* Mangling PC to avoid loops on original BRK instr */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+ memcpy(td->live_uc, uc, td->live_sz);
+ ASSERT_GOOD_CONTEXT(td->live_uc);
+ td->live_uc_valid = 1;
+ fprintf(stderr,
+ "GOOD CONTEXT grabbed from sig_copyctx handler\n");
+
+ return true;
+}
+
+static void default_handler(int signum, siginfo_t *si, void *uc)
+{
+ if (current->sig_unsupp && signum == current->sig_unsupp &&
+ handle_signal_unsupported(current, si, uc)) {
+ fprintf(stderr, "Handled SIG_UNSUPP\n");
+ } else if (current->sig_trig && signum == current->sig_trig &&
+ handle_signal_trigger(current, si, uc)) {
+ fprintf(stderr, "Handled SIG_TRIG\n");
+ } else if (current->sig_ok && signum == current->sig_ok &&
+ handle_signal_ok(current, si, uc)) {
+ fprintf(stderr, "Handled SIG_OK\n");
+ } else if (signum == sig_copyctx && current->live_uc &&
+ handle_signal_copyctx(current, si, uc)) {
+ fprintf(stderr, "Handled SIG_COPYCTX\n");
+ } else {
+ if (signum == SIGALRM && current->timeout) {
+ fprintf(stderr, "-- Timeout !\n");
+ } else {
+ fprintf(stderr,
+ "-- RX UNEXPECTED SIGNAL: %d\n", signum);
+ }
+ default_result(current, 1);
+ }
+}
+
+static int default_setup(struct tdescr *td)
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = default_handler;
+ sa.sa_flags = SA_SIGINFO | SA_RESTART;
+ sa.sa_flags |= td->sa_flags;
+ sigemptyset(&sa.sa_mask);
+ /* uncatchable signals naturally skipped ... */
+ for (int sig = 1; sig < 32; sig++)
+ sigaction(sig, &sa, NULL);
+ /*
+ * RT Signals default disposition is Term but they cannot be
+ * generated by the Kernel in response to our tests; so just catch
+ * them all and report them as UNEXPECTED signals.
+ */
+ for (int sig = SIGRTMIN; sig <= SIGRTMAX; sig++)
+ sigaction(sig, &sa, NULL);
+
+ /* just in case...unblock explicitly all we need */
+ if (td->sig_trig)
+ unblock_signal(td->sig_trig);
+ if (td->sig_ok)
+ unblock_signal(td->sig_ok);
+ if (td->sig_unsupp)
+ unblock_signal(td->sig_unsupp);
+
+ if (td->timeout) {
+ unblock_signal(SIGALRM);
+ alarm(td->timeout);
+ }
+ fprintf(stderr, "Registered handlers for all signals.\n");
+
+ return 1;
+}
+
+static inline int default_trigger(struct tdescr *td)
+{
+ return !raise(td->sig_trig);
+}
+
+int test_init(struct tdescr *td)
+{
+ if (td->sig_trig == sig_copyctx) {
+ fprintf(stdout,
+ "Signal %d is RESERVED, cannot be used as a trigger. Aborting\n",
+ sig_copyctx);
+ return 0;
+ }
+ /* just in case */
+ unblock_signal(sig_copyctx);
+
+ td->minsigstksz = getauxval(AT_MINSIGSTKSZ);
+ if (!td->minsigstksz)
+ td->minsigstksz = MINSIGSTKSZ;
+ fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz);
+
+ if (td->feats_required) {
+ td->feats_supported = 0;
+ /*
+ * Checking for CPU required features using both the
+ * auxval and the arm64 MRS Emulation to read sysregs.
+ */
+ if (getauxval(AT_HWCAP) & HWCAP_SSBS)
+ td->feats_supported |= FEAT_SSBS;
+ if (feats_ok(td))
+ fprintf(stderr,
+ "Required Features: [%s] supported\n",
+ feats_to_string(td->feats_required &
+ td->feats_supported));
+ else
+ fprintf(stderr,
+ "Required Features: [%s] NOT supported\n",
+ feats_to_string(td->feats_required &
+ ~td->feats_supported));
+ }
+
+ /* Perform test specific additional initialization */
+ if (td->init && !td->init(td)) {
+ fprintf(stderr, "FAILED Testcase initialization.\n");
+ return 0;
+ }
+ td->initialized = 1;
+ fprintf(stderr, "Testcase initialized.\n");
+
+ return 1;
+}
+
+int test_setup(struct tdescr *td)
+{
+ /* assert core invariants symptom of a rotten testcase */
+ assert(current);
+ assert(td);
+ assert(td->name);
+ assert(td->run);
+
+ /* Default result is FAIL if test setup fails */
+ td->result = KSFT_FAIL;
+ if (td->setup)
+ return td->setup(td);
+ else
+ return default_setup(td);
+}
+
+int test_run(struct tdescr *td)
+{
+ if (td->sig_trig) {
+ if (td->trigger)
+ return td->trigger(td);
+ else
+ return default_trigger(td);
+ } else {
+ return td->run(td, NULL, NULL);
+ }
+}
+
+void test_result(struct tdescr *td)
+{
+ if (td->initialized && td->result != KSFT_SKIP && td->check_result)
+ td->check_result(td);
+ default_result(td, 0);
+}
+
+void test_cleanup(struct tdescr *td)
+{
+ if (td->cleanup)
+ td->cleanup(td);
+}
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
new file mode 100644
index 000000000000..6772b5c8d274
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 ARM Limited */
+
+#ifndef __TEST_SIGNALS_UTILS_H__
+#define __TEST_SIGNALS_UTILS_H__
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "test_signals.h"
+
+int test_init(struct tdescr *td);
+int test_setup(struct tdescr *td);
+void test_cleanup(struct tdescr *td);
+int test_run(struct tdescr *td);
+void test_result(struct tdescr *td);
+
+static inline bool feats_ok(struct tdescr *td)
+{
+ return (td->feats_required & td->feats_supported) == td->feats_required;
+}
+
+/*
+ * Obtaining a valid and full-blown ucontext_t from userspace is tricky:
+ * libc getcontext does() not save all the regs and messes with some of
+ * them (pstate value in particular is not reliable).
+ *
+ * Here we use a service signal to grab the ucontext_t from inside a
+ * dedicated signal handler, since there, it is populated by Kernel
+ * itself in setup_sigframe(). The grabbed context is then stored and
+ * made available in td->live_uc.
+ *
+ * As service-signal is used a SIGTRAP induced by a 'brk' instruction,
+ * because here we have to avoid syscalls to trigger the signal since
+ * they would cause any SVE sigframe content (if any) to be removed.
+ *
+ * Anyway this function really serves a dual purpose:
+ *
+ * 1. grab a valid sigcontext into td->live_uc for result analysis: in
+ * such case it returns 1.
+ *
+ * 2. detect if, somehow, a previously grabbed live_uc context has been
+ * used actively with a sigreturn: in such a case the execution would have
+ * magically resumed in the middle of this function itself (seen_already==1):
+ * in such a case return 0, since in fact we have not just simply grabbed
+ * the context.
+ *
+ * This latter case is useful to detect when a fake_sigreturn test-case has
+ * unexpectedly survived without hitting a SEGV.
+ *
+ * Note that the case of runtime dynamically sized sigframes (like in SVE
+ * context) is still NOT addressed: sigframe size is supposed to be fixed
+ * at sizeof(ucontext_t).
+ */
+static __always_inline bool get_current_context(struct tdescr *td,
+ ucontext_t *dest_uc)
+{
+ static volatile bool seen_already;
+
+ assert(td && dest_uc);
+ /* it's a genuine invocation..reinit */
+ seen_already = 0;
+ td->live_uc_valid = 0;
+ td->live_sz = sizeof(*dest_uc);
+ memset(dest_uc, 0x00, td->live_sz);
+ td->live_uc = dest_uc;
+ /*
+ * Grab ucontext_t triggering a SIGTRAP.
+ *
+ * Note that:
+ * - live_uc_valid is declared volatile sig_atomic_t in
+ * struct tdescr since it will be changed inside the
+ * sig_copyctx handler
+ * - the additional 'memory' clobber is there to avoid possible
+ * compiler's assumption on live_uc_valid and the content
+ * pointed by dest_uc, which are all changed inside the signal
+ * handler
+ * - BRK causes a debug exception which is handled by the Kernel
+ * and finally causes the SIGTRAP signal to be delivered to this
+ * test thread. Since such delivery happens on the ret_to_user()
+ * /do_notify_resume() debug exception return-path, we are sure
+ * that the registered SIGTRAP handler has been run to completion
+ * before the execution path is restored here: as a consequence
+ * we can be sure that the volatile sig_atomic_t live_uc_valid
+ * carries a meaningful result. Being in a single thread context
+ * we'll also be sure that any access to memory modified by the
+ * handler (namely ucontext_t) will be visible once returned.
+ * - note that since we are using a breakpoint instruction here
+ * to cause a SIGTRAP, the ucontext_t grabbed from the signal
+ * handler would naturally contain a PC pointing exactly to this
+ * BRK line, which means that, on return from the signal handler,
+ * or if we place the ucontext_t on the stack to fake a sigreturn,
+ * we'll end up in an infinite loop of BRK-SIGTRAP-handler.
+ * For this reason we take care to artificially move forward the
+ * PC to the next instruction while inside the signal handler.
+ */
+ asm volatile ("brk #666"
+ : "+m" (*dest_uc)
+ :
+ : "memory");
+
+ /*
+ * If we get here with seen_already==1 it implies the td->live_uc
+ * context has been used to get back here....this probably means
+ * a test has failed to cause a SEGV...anyway live_uc does not
+ * point to a just acquired copy of ucontext_t...so return 0
+ */
+ if (seen_already) {
+ fprintf(stdout,
+ "Unexpected successful sigreturn detected: live_uc is stale !\n");
+ return 0;
+ }
+ seen_already = 1;
+
+ return td->live_uc_valid;
+}
+
+int fake_sigreturn(void *sigframe, size_t sz, int misalign_bytes);
+#endif
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
new file mode 100644
index 000000000000..8dc600a7d4fd
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack including a BAD Unknown magic
+ * record: on sigreturn Kernel must spot this attempt and the test
+ * case is expected to be terminated via SEGV.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static int fake_sigreturn_bad_magic_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
+
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ /* need at least 2*HDR_SZ space: KSFT_BAD_MAGIC + terminator. */
+ head = get_starting_head(shead, HDR_SZ * 2, GET_SF_RESV_SIZE(sf), NULL);
+ if (!head)
+ return 0;
+
+ /*
+ * use a well known NON existent bad magic...something
+ * we should pretty sure won't be ever defined in Kernel
+ */
+ head->magic = KSFT_BAD_MAGIC;
+ head->size = HDR_SZ;
+ write_terminator_record(GET_RESV_NEXT_HEAD(head));
+
+ ASSERT_BAD_CONTEXT(&sf.uc);
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_BAD_MAGIC",
+ .descr = "Trigger a sigreturn with a sigframe with a bad magic",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_bad_magic_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
new file mode 100644
index 000000000000..b3c362100666
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack including a bad record overflowing
+ * the __reserved space: on sigreturn Kernel must spot this attempt and
+ * the test case is expected to be terminated via SEGV.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+#define MIN_SZ_ALIGN 16
+
+static int fake_sigreturn_bad_size_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, need_sz, offset;
+ struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
+
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ /* at least HDR_SZ + bad sized esr_context needed */
+ need_sz = sizeof(struct esr_context) + HDR_SZ;
+ head = get_starting_head(shead, need_sz, resv_sz, &offset);
+ if (!head)
+ return 0;
+
+ /*
+ * Use an esr_context to build a fake header with a
+ * size greater then the free __reserved area minus HDR_SZ;
+ * using ESR_MAGIC here since it is not checked for size nor
+ * is limited to one instance.
+ *
+ * At first inject an additional normal esr_context
+ */
+ head->magic = ESR_MAGIC;
+ head->size = sizeof(struct esr_context);
+ /* and terminate properly */
+ write_terminator_record(GET_RESV_NEXT_HEAD(head));
+ ASSERT_GOOD_CONTEXT(&sf.uc);
+
+ /*
+ * now mess with fake esr_context size: leaving less space than
+ * needed while keeping size value 16-aligned
+ *
+ * It must trigger a SEGV from Kernel on:
+ *
+ * resv_sz - offset < sizeof(*head)
+ */
+ /* at first set the maximum good 16-aligned size */
+ head->size = (resv_sz - offset - need_sz + MIN_SZ_ALIGN) & ~0xfUL;
+ /* plus a bit more of 16-aligned sized stuff */
+ head->size += MIN_SZ_ALIGN;
+ /* and terminate properly */
+ write_terminator_record(GET_RESV_NEXT_HEAD(head));
+ ASSERT_BAD_CONTEXT(&sf.uc);
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_BAD_SIZE",
+ .descr = "Triggers a sigreturn with a overrun __reserved area",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_bad_size_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
new file mode 100644
index 000000000000..a44b88bfc81a
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack including a badly sized terminator
+ * record: on sigreturn Kernel must spot this attempt and the test case
+ * is expected to be terminated via SEGV.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static int fake_sigreturn_bad_size_for_magic0_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
+
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ /* at least HDR_SZ for the badly sized terminator. */
+ head = get_starting_head(shead, HDR_SZ, GET_SF_RESV_SIZE(sf), NULL);
+ if (!head)
+ return 0;
+
+ head->magic = 0;
+ head->size = HDR_SZ;
+ ASSERT_BAD_CONTEXT(&sf.uc);
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_BAD_SIZE_FOR_TERMINATOR",
+ .descr = "Trigger a sigreturn using non-zero size terminator",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_bad_size_for_magic0_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
new file mode 100644
index 000000000000..afe8915f0998
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack including an additional FPSIMD
+ * record: on sigreturn Kernel must spot this attempt and the test
+ * case is expected to be terminated via SEGV.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static int fake_sigreturn_duplicated_fpsimd_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
+
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ head = get_starting_head(shead, sizeof(struct fpsimd_context) + HDR_SZ,
+ GET_SF_RESV_SIZE(sf), NULL);
+ if (!head)
+ return 0;
+
+ /* Add a spurious fpsimd_context */
+ head->magic = FPSIMD_MAGIC;
+ head->size = sizeof(struct fpsimd_context);
+ /* and terminate */
+ write_terminator_record(GET_RESV_NEXT_HEAD(head));
+
+ ASSERT_BAD_CONTEXT(&sf.uc);
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_DUPLICATED_FPSIMD",
+ .descr = "Triggers a sigreturn including two fpsimd_context",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_duplicated_fpsimd_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
new file mode 100644
index 000000000000..1e089e66f9f3
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack at a misaligned SP: on sigreturn
+ * Kernel must spot this attempt and the test case is expected to be
+ * terminated via SEGV.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static int fake_sigreturn_misaligned_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ /* Forcing sigframe on misaligned SP (16 + 3) */
+ fake_sigreturn(&sf, sizeof(sf), 3);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_MISALIGNED_SP",
+ .descr = "Triggers a sigreturn with a misaligned sigframe",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_misaligned_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
new file mode 100644
index 000000000000..08ecd8073a1a
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Place a fake sigframe on the stack missing the mandatory FPSIMD
+ * record: on sigreturn Kernel must spot this attempt and the test
+ * case is expected to be terminated via SEGV.
+ */
+
+#include <stdio.h>
+#include <signal.h>
+#include <ucontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+
+static int fake_sigreturn_missing_fpsimd_run(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+
+ /* just to fill the ucontext_t with something real */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, FPSIMD_MAGIC, resv_sz, &offset);
+ if (head && resv_sz - offset >= HDR_SZ) {
+ fprintf(stderr, "Mangling template header. Spare space:%zd\n",
+ resv_sz - offset);
+ /* Just overwrite fpsmid_context */
+ write_terminator_record(head);
+
+ ASSERT_BAD_CONTEXT(&sf.uc);
+ fake_sigreturn(&sf, sizeof(sf), 0);
+ }
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_MISSING_FPSIMD",
+ .descr = "Triggers a sigreturn with a missing fpsimd_context",
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .run = fake_sigreturn_missing_fpsimd_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c
new file mode 100644
index 000000000000..2cb118b0ba05
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the execution state bit: this attempt must be spotted by Kernel and
+ * the test case is expected to be terminated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si,
+ ucontext_t *uc)
+{
+ ASSERT_GOOD_CONTEXT(uc);
+
+ /* This config should trigger a SIGSEGV by Kernel */
+ uc->uc_mcontext.pstate ^= PSR_MODE32_BIT;
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .sanity_disabled = true,
+ .name = "MANGLE_PSTATE_INVALID_STATE_TOGGLE",
+ .descr = "Mangling uc_mcontext with INVALID STATE_TOGGLE",
+ .sig_trig = SIGUSR1,
+ .sig_ok = SIGSEGV,
+ .run = mangle_invalid_pstate_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c
new file mode 100644
index 000000000000..434b82597007
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, mangling the
+ * DAIF bits in an illegal manner: this attempt must be spotted by Kernel
+ * and the test case is expected to be terminated via SEGV.
+ *
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si,
+ ucontext_t *uc)
+{
+ ASSERT_GOOD_CONTEXT(uc);
+
+ /*
+ * This config should trigger a SIGSEGV by Kernel when it checks
+ * the sigframe consistency in valid_user_regs() routine.
+ */
+ uc->uc_mcontext.pstate |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT;
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .sanity_disabled = true,
+ .name = "MANGLE_PSTATE_INVALID_DAIF_BITS",
+ .descr = "Mangling uc_mcontext with INVALID DAIF_BITS",
+ .sig_trig = SIGUSR1,
+ .sig_ok = SIGSEGV,
+ .run = mangle_invalid_pstate_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c
new file mode 100644
index 000000000000..95f821abdf46
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(1h);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c
new file mode 100644
index 000000000000..cc222d8a618a
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(1t);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c
new file mode 100644
index 000000000000..2188add7d28c
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(2h);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c
new file mode 100644
index 000000000000..df32dd5a479c
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(2t);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c
new file mode 100644
index 000000000000..9e6829b7e5db
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(3h);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c
new file mode 100644
index 000000000000..5685a4f10d06
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Try to mangle the ucontext from inside a signal handler, toggling
+ * the mode bit to escalate exception level: this attempt must be spotted
+ * by Kernel and the test case is expected to be termninated via SEGV.
+ */
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#include "mangle_pstate_invalid_mode_template.h"
+
+DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(3t);
diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h
new file mode 100644
index 000000000000..f5bf1804d858
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 ARM Limited
+ *
+ * Utility macro to ease definition of testcases toggling mode EL
+ */
+
+#define DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(_mode) \
+ \
+static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, \
+ ucontext_t *uc) \
+{ \
+ ASSERT_GOOD_CONTEXT(uc); \
+ \
+ uc->uc_mcontext.pstate &= ~PSR_MODE_MASK; \
+ uc->uc_mcontext.pstate |= PSR_MODE_EL ## _mode; \
+ \
+ return 1; \
+} \
+ \
+struct tdescr tde = { \
+ .sanity_disabled = true, \
+ .name = "MANGLE_PSTATE_INVALID_MODE_EL"#_mode, \
+ .descr = "Mangling uc_mcontext INVALID MODE EL"#_mode, \
+ .sig_trig = SIGUSR1, \
+ .sig_ok = SIGSEGV, \
+ .run = mangle_invalid_pstate_run, \
+}
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
new file mode 100644
index 000000000000..61ebcdf63831
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019 ARM Limited */
+#include "testcases.h"
+
+struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
+ size_t resv_sz, size_t *offset)
+{
+ size_t offs = 0;
+ struct _aarch64_ctx *found = NULL;
+
+ if (!head || resv_sz < HDR_SZ)
+ return found;
+
+ while (offs <= resv_sz - HDR_SZ &&
+ head->magic != magic && head->magic) {
+ offs += head->size;
+ head = GET_RESV_NEXT_HEAD(head);
+ }
+ if (head->magic == magic) {
+ found = head;
+ if (offset)
+ *offset = offs;
+ }
+
+ return found;
+}
+
+bool validate_extra_context(struct extra_context *extra, char **err)
+{
+ struct _aarch64_ctx *term;
+
+ if (!extra || !err)
+ return false;
+
+ fprintf(stderr, "Validating EXTRA...\n");
+ term = GET_RESV_NEXT_HEAD(extra);
+ if (!term || term->magic || term->size) {
+ *err = "Missing terminator after EXTRA context";
+ return false;
+ }
+ if (extra->datap & 0x0fUL)
+ *err = "Extra DATAP misaligned";
+ else if (extra->size & 0x0fUL)
+ *err = "Extra SIZE misaligned";
+ else if (extra->datap != (uint64_t)term + sizeof(*term))
+ *err = "Extra DATAP misplaced (not contiguous)";
+ if (*err)
+ return false;
+
+ return true;
+}
+
+bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
+{
+ bool terminated = false;
+ size_t offs = 0;
+ int flags = 0;
+ struct extra_context *extra = NULL;
+ struct _aarch64_ctx *head =
+ (struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
+
+ if (!err)
+ return false;
+ /* Walk till the end terminator verifying __reserved contents */
+ while (head && !terminated && offs < resv_sz) {
+ if ((uint64_t)head & 0x0fUL) {
+ *err = "Misaligned HEAD";
+ return false;
+ }
+
+ switch (head->magic) {
+ case 0:
+ if (head->size)
+ *err = "Bad size for terminator";
+ else
+ terminated = true;
+ break;
+ case FPSIMD_MAGIC:
+ if (flags & FPSIMD_CTX)
+ *err = "Multiple FPSIMD_MAGIC";
+ else if (head->size !=
+ sizeof(struct fpsimd_context))
+ *err = "Bad size for fpsimd_context";
+ flags |= FPSIMD_CTX;
+ break;
+ case ESR_MAGIC:
+ if (head->size != sizeof(struct esr_context))
+ *err = "Bad size for esr_context";
+ break;
+ case SVE_MAGIC:
+ if (flags & SVE_CTX)
+ *err = "Multiple SVE_MAGIC";
+ else if (head->size !=
+ sizeof(struct sve_context))
+ *err = "Bad size for sve_context";
+ flags |= SVE_CTX;
+ break;
+ case EXTRA_MAGIC:
+ if (flags & EXTRA_CTX)
+ *err = "Multiple EXTRA_MAGIC";
+ else if (head->size !=
+ sizeof(struct extra_context))
+ *err = "Bad size for extra_context";
+ flags |= EXTRA_CTX;
+ extra = (struct extra_context *)head;
+ break;
+ case KSFT_BAD_MAGIC:
+ /*
+ * This is a BAD magic header defined
+ * artificially by a testcase and surely
+ * unknown to the Kernel parse_user_sigframe().
+ * It MUST cause a Kernel induced SEGV
+ */
+ *err = "BAD MAGIC !";
+ break;
+ default:
+ /*
+ * A still unknown Magic: potentially freshly added
+ * to the Kernel code and still unknown to the
+ * tests.
+ */
+ fprintf(stdout,
+ "SKIP Unknown MAGIC: 0x%X - Is KSFT arm64/signal up to date ?\n",
+ head->magic);
+ break;
+ }
+
+ if (*err)
+ return false;
+
+ offs += head->size;
+ if (resv_sz < offs + sizeof(*head)) {
+ *err = "HEAD Overrun";
+ return false;
+ }
+
+ if (flags & EXTRA_CTX)
+ if (!validate_extra_context(extra, err))
+ return false;
+
+ head = GET_RESV_NEXT_HEAD(head);
+ }
+
+ if (terminated && !(flags & FPSIMD_CTX)) {
+ *err = "Missing FPSIMD";
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * This function walks through the records inside the provided reserved area
+ * trying to find enough space to fit @need_sz bytes: if not enough space is
+ * available and an extra_context record is present, it throws away the
+ * extra_context record.
+ *
+ * It returns a pointer to a new header where it is possible to start storing
+ * our need_sz bytes.
+ *
+ * @shead: points to the start of reserved area
+ * @need_sz: needed bytes
+ * @resv_sz: reserved area size in bytes
+ * @offset: if not null, this will be filled with the offset of the return
+ * head pointer from @shead
+ *
+ * @return: pointer to a new head where to start storing need_sz bytes, or
+ * NULL if space could not be made available.
+ */
+struct _aarch64_ctx *get_starting_head(struct _aarch64_ctx *shead,
+ size_t need_sz, size_t resv_sz,
+ size_t *offset)
+{
+ size_t offs = 0;
+ struct _aarch64_ctx *head;
+
+ head = get_terminator(shead, resv_sz, &offs);
+ /* not found a terminator...no need to update offset if any */
+ if (!head)
+ return head;
+ if (resv_sz - offs < need_sz) {
+ fprintf(stderr, "Low on space:%zd. Discarding extra_context.\n",
+ resv_sz - offs);
+ head = get_header(shead, EXTRA_MAGIC, resv_sz, &offs);
+ if (!head || resv_sz - offs < need_sz) {
+ fprintf(stderr,
+ "Failed to reclaim space on sigframe.\n");
+ return NULL;
+ }
+ }
+
+ fprintf(stderr, "Available space:%zd\n", resv_sz - offs);
+ if (offset)
+ *offset = offs;
+ return head;
+}
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
new file mode 100644
index 000000000000..ad884c135314
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 ARM Limited */
+#ifndef __TESTCASES_H__
+#define __TESTCASES_H__
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <signal.h>
+
+/* Architecture specific sigframe definitions */
+#include <asm/sigcontext.h>
+
+#define FPSIMD_CTX (1 << 0)
+#define SVE_CTX (1 << 1)
+#define EXTRA_CTX (1 << 2)
+
+#define KSFT_BAD_MAGIC 0xdeadbeef
+
+#define HDR_SZ \
+ sizeof(struct _aarch64_ctx)
+
+#define GET_SF_RESV_HEAD(sf) \
+ (struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved)
+
+#define GET_SF_RESV_SIZE(sf) \
+ sizeof((sf).uc.uc_mcontext.__reserved)
+
+#define GET_UCP_RESV_SIZE(ucp) \
+ sizeof((ucp)->uc_mcontext.__reserved)
+
+#define ASSERT_BAD_CONTEXT(uc) do { \
+ char *err = NULL; \
+ if (!validate_reserved((uc), GET_UCP_RESV_SIZE((uc)), &err)) { \
+ if (err) \
+ fprintf(stderr, \
+ "Using badly built context - ERR: %s\n",\
+ err); \
+ } else { \
+ abort(); \
+ } \
+} while (0)
+
+#define ASSERT_GOOD_CONTEXT(uc) do { \
+ char *err = NULL; \
+ if (!validate_reserved((uc), GET_UCP_RESV_SIZE((uc)), &err)) { \
+ if (err) \
+ fprintf(stderr, \
+ "Detected BAD context - ERR: %s\n", err);\
+ abort(); \
+ } else { \
+ fprintf(stderr, "uc context validated.\n"); \
+ } \
+} while (0)
+
+/*
+ * A simple record-walker for __reserved area: it walks through assuming
+ * only to find a proper struct __aarch64_ctx header descriptor.
+ *
+ * Instead it makes no assumptions on the content and ordering of the
+ * records, any needed bounds checking must be enforced by the caller
+ * if wanted: this way can be used by caller on any maliciously built bad
+ * contexts.
+ *
+ * head->size accounts both for payload and header _aarch64_ctx size !
+ */
+#define GET_RESV_NEXT_HEAD(h) \
+ (struct _aarch64_ctx *)((char *)(h) + (h)->size)
+
+struct fake_sigframe {
+ siginfo_t info;
+ ucontext_t uc;
+};
+
+
+bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err);
+
+bool validate_extra_context(struct extra_context *extra, char **err);
+
+struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
+ size_t resv_sz, size_t *offset);
+
+static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head,
+ size_t resv_sz,
+ size_t *offset)
+{
+ return get_header(head, 0, resv_sz, offset);
+}
+
+static inline void write_terminator_record(struct _aarch64_ctx *tail)
+{
+ if (tail) {
+ tail->magic = 0;
+ tail->size = 0;
+ }
+}
+
+struct _aarch64_ctx *get_starting_head(struct _aarch64_ctx *shead,
+ size_t need_sz, size_t resv_sz,
+ size_t *offset);
+#endif
diff --git a/tools/testing/selftests/arm64/.gitignore b/tools/testing/selftests/arm64/tags/.gitignore
index e8fae8d61ed6..e8fae8d61ed6 100644
--- a/tools/testing/selftests/arm64/.gitignore
+++ b/tools/testing/selftests/arm64/tags/.gitignore
diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile
new file mode 100644
index 000000000000..41cb75070511
--- /dev/null
+++ b/tools/testing/selftests/arm64/tags/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+TEST_GEN_PROGS := tags_test
+TEST_PROGS := run_tags_test.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/run_tags_test.sh b/tools/testing/selftests/arm64/tags/run_tags_test.sh
index 745f11379930..745f11379930 100755
--- a/tools/testing/selftests/arm64/run_tags_test.sh
+++ b/tools/testing/selftests/arm64/tags/run_tags_test.sh
diff --git a/tools/testing/selftests/arm64/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c
index 5701163460ef..5701163460ef 100644
--- a/tools/testing/selftests/arm64/tags_test.c
+++ b/tools/testing/selftests/arm64/tags/tags_test.c
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 6cbeea7b4bf1..8547ecbdc61f 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -195,7 +195,7 @@ static void run_test(int cgroup_fd)
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
(void *)&server_fd)))
- goto close_bpf_object;
+ goto close_server_fd;
pthread_mutex_lock(&server_started_mtx);
pthread_cond_wait(&server_started, &server_started_mtx);
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index a82da555b1b0..f4cd60d6fba2 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -260,13 +260,14 @@ void test_tcp_rtt(void)
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
(void *)&server_fd)))
- goto close_cgroup_fd;
+ goto close_server_fd;
pthread_mutex_lock(&server_started_mtx);
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
CHECK_FAIL(run_test(cgroup_fd, server_fd));
+close_server_fd:
close(server_fd);
close_cgroup_fd:
close(cgroup_fd);
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
index d23d4da66b83..e2d06191bd35 100755
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -63,6 +63,9 @@ fi
# Setup
tc qdisc add dev lo ingress
+echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter
+echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
+echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter
echo "Testing IPv4..."
# Drops all IP/UDP packets coming from port 9
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index acf7a74f97cd..59ea56945e6c 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -314,15 +314,15 @@ test_gso()
command -v nc >/dev/null 2>&1 || \
{ echo >&2 "nc is not available: skipping TSO tests"; return; }
- # listen on IPv*_DST, capture TCP into $TMPFILE
+ # listen on port 9000, capture TCP into $TMPFILE
if [ "${PROTO}" == "IPv4" ] ; then
IP_DST=${IPv4_DST}
ip netns exec ${NS3} bash -c \
- "nc -4 -l -s ${IPv4_DST} -p 9000 > ${TMPFILE} &"
+ "nc -4 -l -p 9000 > ${TMPFILE} &"
elif [ "${PROTO}" == "IPv6" ] ; then
IP_DST=${IPv6_DST}
ip netns exec ${NS3} bash -c \
- "nc -6 -l -s ${IPv6_DST} -p 9000 > ${TMPFILE} &"
+ "nc -6 -l -p 9000 > ${TMPFILE} &"
RET=$?
else
echo " test_gso: unknown PROTO: ${PROTO}"
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 15a666329a34..1afa22c88e42 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -22,6 +22,7 @@ import os
import pprint
import random
import re
+import stat
import string
import struct
import subprocess
@@ -311,7 +312,11 @@ class DebugfsDir:
for f in out.split():
if f == "ports":
continue
+
p = os.path.join(path, f)
+ if not os.stat(p).st_mode & stat.S_IRUSR:
+ continue
+
if os.path.isfile(p):
_, out = cmd('cat %s/%s' % (path, f))
dfs[f] = out.strip()
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index a320e3844b17..7c6e5b173f33 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -161,9 +161,14 @@ static struct sysctl_test tests[] = {
.descr = "ctx:file_pos sysctl:read read ok narrow",
.insns = {
/* If (file_pos == X) */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, file_pos)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+#else
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sysctl, file_pos) + 3),
+#endif
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 4, 2),
/* return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
@@ -176,6 +181,7 @@ static struct sysctl_test tests[] = {
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "kernel/ostype",
.open_flags = O_RDONLY,
+ .seek = 4,
.result = SUCCESS,
},
{
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
index f38567ef694b..daa7d1b8d309 100755
--- a/tools/testing/selftests/bpf/test_tc_edt.sh
+++ b/tools/testing/selftests/bpf/test_tc_edt.sh
@@ -59,7 +59,7 @@ ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
# start the listener
ip netns exec ${NS_DST} bash -c \
- "nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &"
+ "nc -4 -l -p 9000 >/dev/null &"
declare -i NC_PID=$!
sleep 1
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
index 58ed5eeab709..ad41ea69001b 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -109,7 +109,7 @@ static bool set_watchpoint(pid_t pid, int size, int wp)
return false;
}
-static bool arun_test(int wr_size, int wp_size, int wr, int wp)
+static bool run_test(int wr_size, int wp_size, int wr, int wp)
{
int status;
siginfo_t siginfo;
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
index ae6146ec5afd..4632f51af7ab 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -112,14 +112,16 @@ sanitization_single_dev_mcast_group_test()
RET=0
ip link add dev br0 type bridge mcast_snooping 0
+ ip link add name dummy1 up type dummy
ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
- dev $swp2 group 239.0.0.1
+ dev dummy1 group 239.0.0.1
sanitization_single_dev_test_fail
ip link del dev vxlan0
+ ip link del dev dummy1
ip link del dev br0
log_test "vxlan device with a multicast group"
@@ -181,13 +183,15 @@ sanitization_single_dev_local_interface_test()
RET=0
ip link add dev br0 type bridge mcast_snooping 0
+ ip link add name dummy1 up type dummy
ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev $swp2
+ ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1
sanitization_single_dev_test_fail
ip link del dev vxlan0
+ ip link del dev dummy1
ip link del dev br0
log_test "vxlan device with local interface"
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
index a27e2eec3586..8b2b6088540d 100755
--- a/tools/testing/selftests/gen_kselftest_tar.sh
+++ b/tools/testing/selftests/gen_kselftest_tar.sh
@@ -38,16 +38,21 @@ main()
esac
fi
- install_dir=./kselftest
+ # Create working directory.
+ dest=`pwd`
+ install_work="$dest"/kselftest_install
+ install_name=kselftest
+ install_dir="$install_work"/"$install_name"
+ mkdir -p "$install_dir"
-# Run install using INSTALL_KSFT_PATH override to generate install
-# directory
-./kselftest_install.sh
-tar $copts kselftest${ext} $install_dir
-echo "Kselftest archive kselftest${ext} created!"
+ # Run install using INSTALL_KSFT_PATH override to generate install
+ # directory
+ ./kselftest_install.sh "$install_dir"
+ (cd "$install_work"; tar $copts "$dest"/kselftest${ext} $install_name)
+ echo "Kselftest archive kselftest${ext} created!"
-# clean up install directory
-rm -rf kselftest
+ # clean up top-level install work directory
+ rm -rf "$install_work"
}
main "$@"
diff --git a/tools/testing/selftests/kselftest_module.sh b/tools/testing/selftests/kselftest/module.sh
index 18e1c7992d30..18e1c7992d30 100755
--- a/tools/testing/selftests/kselftest_module.sh
+++ b/tools/testing/selftests/kselftest/module.sh
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 00c9020bdda8..84de7bc74f2c 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -3,9 +3,14 @@
#
# Runs a set of tests in a given subdirectory.
export skip_rc=4
+export timeout_rc=124
export logfile=/dev/stdout
export per_test_logging=
+# Defaults for "settings" file fields:
+# "timeout" how many seconds to let each test run before failing.
+export kselftest_default_timeout=45
+
# There isn't a shell-agnostic way to find the path of a sourced file,
# so we must rely on BASE_DIR being set to find other tools.
if [ -z "$BASE_DIR" ]; then
@@ -24,6 +29,16 @@ tap_prefix()
fi
}
+tap_timeout()
+{
+ # Make sure tests will time out if utility is available.
+ if [ -x /usr/bin/timeout ] ; then
+ /usr/bin/timeout "$kselftest_timeout" "$1"
+ else
+ "$1"
+ fi
+}
+
run_one()
{
DIR="$1"
@@ -32,6 +47,18 @@ run_one()
BASENAME_TEST=$(basename $TEST)
+ # Reset any "settings"-file variables.
+ export kselftest_timeout="$kselftest_default_timeout"
+ # Load per-test-directory kselftest "settings" file.
+ settings="$BASE_DIR/$DIR/settings"
+ if [ -r "$settings" ] ; then
+ while read line ; do
+ field=$(echo "$line" | cut -d= -f1)
+ value=$(echo "$line" | cut -d= -f2-)
+ eval "kselftest_$field"="$value"
+ done < "$settings"
+ fi
+
TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST"
echo "# $TEST_HDR_MSG"
if [ ! -x "$TEST" ]; then
@@ -44,14 +71,17 @@ run_one()
echo "not ok $test_num $TEST_HDR_MSG"
else
cd `dirname $TEST` > /dev/null
- (((((./$BASENAME_TEST 2>&1; echo $? >&3) |
+ ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) |
tap_prefix >&4) 3>&1) |
(read xs; exit $xs)) 4>>"$logfile" &&
echo "ok $test_num $TEST_HDR_MSG") ||
- (if [ $? -eq $skip_rc ]; then \
+ (rc=$?; \
+ if [ $rc -eq $skip_rc ]; then \
echo "not ok $test_num $TEST_HDR_MSG # SKIP"
+ elif [ $rc -eq $timeout_rc ]; then \
+ echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT"
else
- echo "not ok $test_num $TEST_HDR_MSG"
+ echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
fi)
cd - >/dev/null
fi
diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
index ec304463883c..407af7da7037 100755
--- a/tools/testing/selftests/kselftest_install.sh
+++ b/tools/testing/selftests/kselftest_install.sh
@@ -6,30 +6,30 @@
# Author: Shuah Khan <shuahkh@osg.samsung.com>
# Copyright (C) 2015 Samsung Electronics Co., Ltd.
-install_loc=`pwd`
-
main()
{
- if [ $(basename $install_loc) != "selftests" ]; then
+ base_dir=`pwd`
+ install_dir="$base_dir"/kselftest_install
+
+ # Make sure we're in the selftests top-level directory.
+ if [ $(basename "$base_dir") != "selftests" ]; then
echo "$0: Please run it in selftests directory ..."
exit 1;
fi
+
+ # Only allow installation into an existing location.
if [ "$#" -eq 0 ]; then
- echo "$0: Installing in default location - $install_loc ..."
+ echo "$0: Installing in default location - $install_dir ..."
elif [ ! -d "$1" ]; then
echo "$0: $1 doesn't exist!!"
exit 1;
else
- install_loc=$1
- echo "$0: Installing in specified location - $install_loc ..."
+ install_dir="$1"
+ echo "$0: Installing in specified location - $install_dir ..."
fi
- install_dir=$install_loc/kselftest
-
-# Create install directory
- mkdir -p $install_dir
-# Build tests
- INSTALL_PATH=$install_dir make install
+ # Build tests
+ KSFT_INSTALL_PATH="$install_dir" make install
}
main "$@"
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index b35da375530a..409c1fa75e03 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,4 +1,5 @@
/s390x/sync_regs_test
+/s390x/memop
/x86_64/cr4_cpuid_sync_test
/x86_64/evmcs_test
/x86_64/hyperv_cpuid
@@ -9,6 +10,7 @@
/x86_64/state_test
/x86_64/sync_regs_test
/x86_64/vmx_close_while_nested_test
+/x86_64/vmx_dirty_log_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
/clear_dirty_log_test
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 6ae5a47fe067..f52e0ba84fed 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -580,6 +580,8 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
bool load_vmcs(struct vmx_pages *vmx);
+void nested_vmx_check_supported(void);
+
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index 4911fc77d0f6..d1cf9f6e0e6b 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -55,7 +55,7 @@ static void test_dump_stack(void)
#pragma GCC diagnostic pop
}
-static pid_t gettid(void)
+static pid_t _gettid(void)
{
return syscall(SYS_gettid);
}
@@ -72,7 +72,7 @@ test_assert(bool exp, const char *exp_str,
fprintf(stderr, "==== Test Assertion Failure ====\n"
" %s:%u: %s\n"
" pid=%d tid=%d - %s\n",
- file, line, exp_str, getpid(), gettid(),
+ file, line, exp_str, getpid(), _gettid(),
strerror(errno));
test_dump_stack();
if (fmt) {
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index fab8f6b0bf52..f6ec97b7eaef 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -376,6 +376,16 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_guest_state(guest_rip, guest_rsp);
}
+void nested_vmx_check_supported(void)
+{
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ if (!(entry->ecx & CPUID_VMX)) {
+ fprintf(stderr, "nested VMX not enabled, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+}
+
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
{
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index 11c2a70a7b87..5c8224256294 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -22,18 +22,19 @@
#define VCPU_ID 5
+#define UCALL_PIO_PORT ((uint16_t)0x1000)
+
+/*
+ * ucall is embedded here to protect against compiler reshuffling registers
+ * before calling a function. In this test we only need to get KVM_EXIT_IO
+ * vmexit and preserve RBX, no additional information is needed.
+ */
void guest_code(void)
{
- /*
- * use a callee-save register, otherwise the compiler
- * saves it around the call to GUEST_SYNC.
- */
- register u32 stage asm("rbx");
- for (;;) {
- GUEST_SYNC(0);
- stage++;
- asm volatile ("" : : "r" (stage));
- }
+ asm volatile("1: in %[port], %%al\n"
+ "add $0x1, %%rbx\n"
+ "jmp 1b"
+ : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
}
static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index 3b0ffe01dacd..5dfb53546a26 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -53,12 +53,8 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & CPUID_VMX)) {
- fprintf(stderr, "nested VMX not enabled, skipping test\n");
- exit(KSFT_SKIP);
- }
+ nested_vmx_check_supported();
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 0bca1cfe2c1e..a223a6401258 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -78,6 +78,8 @@ int main(int argc, char *argv[])
struct ucall uc;
bool done = false;
+ nested_vmx_check_supported();
+
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
index 853e370e8a39..9ef7fab39d48 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -224,7 +224,6 @@ int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_nested_state state;
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
@@ -237,10 +236,7 @@ int main(int argc, char *argv[])
* AMD currently does not implement set_nested_state, so for now we
* just early out.
*/
- if (!(entry->ecx & CPUID_VMX)) {
- fprintf(stderr, "nested VMX not enabled, skipping test\n");
- exit(KSFT_SKIP);
- }
+ nested_vmx_check_supported();
vm = vm_create_default(VCPU_ID, 0, 0);
@@ -271,12 +267,7 @@ int main(int argc, char *argv[])
state.flags = KVM_STATE_NESTED_RUN_PENDING;
test_nested_state_expect_einval(vm, &state);
- /*
- * TODO: When SVM support is added for KVM_SET_NESTED_STATE
- * add tests here to support it like VMX.
- */
- if (entry->ecx & CPUID_VMX)
- test_vmx_nested_state(vm);
+ test_vmx_nested_state(vm);
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index f36c10eba71e..5590fd2bcf87 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -128,12 +128,8 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & CPUID_VMX)) {
- fprintf(stderr, "nested VMX not enabled, skipping test\n");
- exit(KSFT_SKIP);
- }
+ nested_vmx_check_supported();
vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
index 5511dddc5c2d..00a416fbc0ef 100755
--- a/tools/testing/selftests/lib/bitmap.sh
+++ b/tools/testing/selftests/lib/bitmap.sh
@@ -1,3 +1,3 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-$(dirname $0)/../kselftest_module.sh "bitmap" test_bitmap
+$(dirname $0)/../kselftest/module.sh "bitmap" test_bitmap
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
index 43b28f24e453..370b79a9cb2e 100755
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -1,4 +1,4 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# Checks fast/slow prime_number generation for inconsistencies
-$(dirname $0)/../kselftest_module.sh "prime numbers" prime_numbers selftest=65536
+$(dirname $0)/../kselftest/module.sh "prime numbers" prime_numbers selftest=65536
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
index 2ffa61da0296..05f4544e87f9 100755
--- a/tools/testing/selftests/lib/printf.sh
+++ b/tools/testing/selftests/lib/printf.sh
@@ -1,4 +1,4 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# Tests the printf infrastructure using test_printf kernel module.
-$(dirname $0)/../kselftest_module.sh "printf" test_printf
+$(dirname $0)/../kselftest/module.sh "printf" test_printf
diff --git a/tools/testing/selftests/lib/strscpy.sh b/tools/testing/selftests/lib/strscpy.sh
index 71f2be6afba6..be60ef6e1a7f 100755
--- a/tools/testing/selftests/lib/strscpy.sh
+++ b/tools/testing/selftests/lib/strscpy.sh
@@ -1,3 +1,3 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
-$(dirname $0)/../kselftest_module.sh "strscpy*" test_strscpy
+$(dirname $0)/../kselftest/module.sh "strscpy*" test_strscpy
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index c4ba0ff4a53f..76c1897e6352 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1438,6 +1438,27 @@ ipv4_addr_metric_test()
fi
log_test $rc 0 "Prefix route with metric on link up"
+ # explicitly check for metric changes on edge scenarios
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259"
+ run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of .0/24 address"
+
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
+ run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of address with peer route"
+
$IP li del dummy1
$IP li del dummy2
cleanup
diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh
index 5782433886fc..5782433886fc 100644..100755
--- a/tools/testing/selftests/net/l2tp.sh
+++ b/tools/testing/selftests/net/l2tp.sh
diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c
index fe3230c55986..fb7a59ed759e 100644
--- a/tools/testing/selftests/net/reuseport_dualstack.c
+++ b/tools/testing/selftests/net/reuseport_dualstack.c
@@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count, int proto)
{
struct epoll_event ev;
int epfd, i, test_fd;
- uint16_t test_family;
+ int test_family;
socklen_t len;
epfd = epoll_create(1);
@@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count, int proto)
send_from_v4(proto);
test_fd = receive_once(epfd, proto);
+ len = sizeof(test_family);
if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len))
error(1, errno, "failed to read socket domain");
if (test_family != AF_INET)
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 53f598f06647..34df4c8882af 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -105,8 +105,8 @@ static void do_recv_one(int fdr, struct timed_send *ts)
tstop = (gettime_ns() - glob_tstart) / 1000;
texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
- fprintf(stderr, "payload:%c delay:%ld expected:%ld (us)\n",
- rbuf[0], tstop, texpect);
+ fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n",
+ rbuf[0], (long long)tstop, (long long)texpect);
if (rbuf[0] != ts->data)
error(1, 0, "payload mismatch. expected %c", ts->data);
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index 31ced79f4f25..33035d1b3f6d 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -71,7 +71,7 @@
#define MSG_ZEROCOPY 0x4000000
#endif
-#define FILE_SZ (1UL << 35)
+#define FILE_SZ (1ULL << 35)
static int cfg_family = AF_INET6;
static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
static int cfg_port = 8787;
@@ -155,7 +155,7 @@ void *child_thread(void *arg)
socklen_t zc_len = sizeof(zc);
int res;
- zc.address = (__u64)addr;
+ zc.address = (__u64)((unsigned long)addr);
zc.length = chunk_size;
zc.recv_skip_hint = 0;
res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
@@ -302,7 +302,7 @@ int main(int argc, char *argv[])
{
struct sockaddr_storage listenaddr, addr;
unsigned int max_pacing_rate = 0;
- unsigned long total = 0;
+ size_t total = 0;
char *host = NULL;
int fd, c, on = 1;
char *buffer;
@@ -417,7 +417,7 @@ int main(int argc, char *argv[])
zflg = 0;
}
while (total < FILE_SZ) {
- long wr = FILE_SZ - total;
+ ssize_t wr = FILE_SZ - total;
if (wr > chunk_size)
wr = chunk_size;
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 4c285b6e1db8..1c8f194d6556 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -898,6 +898,114 @@ TEST_F(tls, nonblocking)
}
}
+static void
+test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self,
+ bool sendpg, unsigned int n_readers, unsigned int n_writers)
+{
+ const unsigned int n_children = n_readers + n_writers;
+ const size_t data = 6 * 1000 * 1000;
+ const size_t file_sz = data / 100;
+ size_t read_bias, write_bias;
+ int i, fd, child_id;
+ char buf[file_sz];
+ pid_t pid;
+
+ /* Only allow multiples for simplicity */
+ ASSERT_EQ(!(n_readers % n_writers) || !(n_writers % n_readers), true);
+ read_bias = n_writers / n_readers ?: 1;
+ write_bias = n_readers / n_writers ?: 1;
+
+ /* prep a file to send */
+ fd = open("/tmp/", O_TMPFILE | O_RDWR, 0600);
+ ASSERT_GE(fd, 0);
+
+ memset(buf, 0xac, file_sz);
+ ASSERT_EQ(write(fd, buf, file_sz), file_sz);
+
+ /* spawn children */
+ for (child_id = 0; child_id < n_children; child_id++) {
+ pid = fork();
+ ASSERT_NE(pid, -1);
+ if (!pid)
+ break;
+ }
+
+ /* parent waits for all children */
+ if (pid) {
+ for (i = 0; i < n_children; i++) {
+ int status;
+
+ wait(&status);
+ EXPECT_EQ(status, 0);
+ }
+
+ return;
+ }
+
+ /* Split threads for reading and writing */
+ if (child_id < n_readers) {
+ size_t left = data * read_bias;
+ char rb[8001];
+
+ while (left) {
+ int res;
+
+ res = recv(self->cfd, rb,
+ left > sizeof(rb) ? sizeof(rb) : left, 0);
+
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+ } else {
+ size_t left = data * write_bias;
+
+ while (left) {
+ int res;
+
+ ASSERT_EQ(lseek(fd, 0, SEEK_SET), 0);
+ if (sendpg)
+ res = sendfile(self->fd, fd, NULL,
+ left > file_sz ? file_sz : left);
+ else
+ res = send(self->fd, buf,
+ left > file_sz ? file_sz : left, 0);
+
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+ }
+}
+
+TEST_F(tls, mutliproc_even)
+{
+ test_mutliproc(_metadata, self, false, 6, 6);
+}
+
+TEST_F(tls, mutliproc_readers)
+{
+ test_mutliproc(_metadata, self, false, 4, 12);
+}
+
+TEST_F(tls, mutliproc_writers)
+{
+ test_mutliproc(_metadata, self, false, 10, 2);
+}
+
+TEST_F(tls, mutliproc_sendpage_even)
+{
+ test_mutliproc(_metadata, self, true, 6, 6);
+}
+
+TEST_F(tls, mutliproc_sendpage_readers)
+{
+ test_mutliproc(_metadata, self, true, 4, 12);
+}
+
+TEST_F(tls, mutliproc_sendpage_writers)
+{
+ test_mutliproc(_metadata, self, true, 10, 2);
+}
+
TEST_F(tls, control_msg)
{
if (self->notls)
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 614b31aad168..c66da6ffd6d8 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -440,7 +440,8 @@ static bool __send_one(int fd, struct msghdr *msg, int flags)
if (ret == -1)
error(1, errno, "sendmsg");
if (ret != msg->msg_iov->iov_len)
- error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+ error(1, 0, "sendto: %d != %llu", ret,
+ (unsigned long long)msg->msg_iov->iov_len);
if (msg->msg_flags)
error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index ada99496634a..17512a43885e 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -405,7 +405,8 @@ static int send_udp_segment(int fd, char *data)
if (ret == -1)
error(1, errno, "sendmsg");
if (ret != iov.iov_len)
- error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+ error(1, 0, "sendmsg: %u != %llu\n", ret,
+ (unsigned long long)iov.iov_len);
return 1;
}
diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c
index 9868a5ddd847..f85a0938ab25 100644
--- a/tools/testing/selftests/powerpc/mm/tlbie_test.c
+++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c
@@ -636,7 +636,7 @@ int main(int argc, char *argv[])
nrthreads = strtoul(optarg, NULL, 10);
break;
case 'l':
- strncpy(logdir, optarg, LOGDIR_NAME_SIZE);
+ strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1);
break;
case 't':
run_time = strtoul(optarg, NULL, 10);
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
index 47b7473dedef..e6aa00a183bc 100644
--- a/tools/testing/selftests/proc/proc-self-map-files-002.c
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -47,7 +47,11 @@ static void fail(const char *fmt, unsigned long a, unsigned long b)
int main(void)
{
const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
- const unsigned long va_max = 1UL << 32;
+ /*
+ * va_max must be enough bigger than vm.mmap_min_addr, which is
+ * 64KB/32KB by default. (depends on CONFIG_LSM_MMAP_MIN_ADDR)
+ */
+ const unsigned long va_max = 1UL << 20;
unsigned long va;
void *p;
int fd;
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index bd4a7247b44f..c0dd10257df5 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -44,6 +44,46 @@ static int clock_adjtime(clockid_t id, struct timex *tx)
}
#endif
+static void show_flag_test(int rq_index, unsigned int flags, int err)
+{
+ printf("PTP_EXTTS_REQUEST%c flags 0x%08x : (%d) %s\n",
+ rq_index ? '1' + rq_index : ' ',
+ flags, err, strerror(errno));
+ /* sigh, uClibc ... */
+ errno = 0;
+}
+
+static void do_flag_test(int fd, unsigned int index)
+{
+ struct ptp_extts_request extts_request;
+ unsigned long request[2] = {
+ PTP_EXTTS_REQUEST,
+ PTP_EXTTS_REQUEST2,
+ };
+ unsigned int enable_flags[5] = {
+ PTP_ENABLE_FEATURE,
+ PTP_ENABLE_FEATURE | PTP_RISING_EDGE,
+ PTP_ENABLE_FEATURE | PTP_FALLING_EDGE,
+ PTP_ENABLE_FEATURE | PTP_RISING_EDGE | PTP_FALLING_EDGE,
+ PTP_ENABLE_FEATURE | (PTP_EXTTS_VALID_FLAGS + 1),
+ };
+ int err, i, j;
+
+ memset(&extts_request, 0, sizeof(extts_request));
+ extts_request.index = index;
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < 5; j++) {
+ extts_request.flags = enable_flags[j];
+ err = ioctl(fd, request[i], &extts_request);
+ show_flag_test(i, extts_request.flags, err);
+
+ extts_request.flags = 0;
+ err = ioctl(fd, request[i], &extts_request);
+ }
+ }
+}
+
static clockid_t get_clockid(int fd)
{
#define CLOCKFD 3
@@ -96,7 +136,8 @@ static void usage(char *progname)
" -s set the ptp clock time from the system time\n"
" -S set the system time from the ptp clock time\n"
" -t val shift the ptp clock time by 'val' seconds\n"
- " -T val set the ptp clock time to 'val' seconds\n",
+ " -T val set the ptp clock time to 'val' seconds\n"
+ " -z test combinations of rising/falling external time stamp flags\n",
progname);
}
@@ -122,6 +163,7 @@ int main(int argc, char *argv[])
int adjtime = 0;
int capabilities = 0;
int extts = 0;
+ int flagtest = 0;
int gettime = 0;
int index = 0;
int list_pins = 0;
@@ -138,7 +180,7 @@ int main(int argc, char *argv[])
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -191,6 +233,9 @@ int main(int argc, char *argv[])
settime = 3;
seconds = atoi(optarg);
break;
+ case 'z':
+ flagtest = 1;
+ break;
case 'h':
usage(progname);
return 0;
@@ -322,6 +367,10 @@ int main(int argc, char *argv[])
}
}
+ if (flagtest) {
+ do_flag_test(fd, index);
+ }
+
if (list_pins) {
int n_pins = 0;
if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings
new file mode 100644
index 000000000000..ba4d85f74cd6
--- /dev/null
+++ b/tools/testing/selftests/rtc/settings
@@ -0,0 +1 @@
+timeout=90
diff --git a/tools/testing/selftests/sync/sync.c b/tools/testing/selftests/sync/sync.c
index f3d599f249b9..7741c0518d18 100644
--- a/tools/testing/selftests/sync/sync.c
+++ b/tools/testing/selftests/sync/sync.c
@@ -109,7 +109,7 @@ static struct sync_file_info *sync_file_info(int fd)
return NULL;
}
- info->sync_fence_info = (uint64_t)fence_info;
+ info->sync_fence_info = (uint64_t)(unsigned long)fence_info;
err = ioctl(fd, SYNC_IOC_FILE_INFO, info);
if (err < 0) {
@@ -124,7 +124,7 @@ static struct sync_file_info *sync_file_info(int fd)
static void sync_file_info_free(struct sync_file_info *info)
{
- free((void *)info->sync_fence_info);
+ free((void *)(unsigned long)info->sync_fence_info);
free(info);
}
@@ -152,7 +152,7 @@ int sync_fence_count_with_status(int fd, int status)
if (!info)
return -1;
- fence_info = (struct sync_fence_info *)info->sync_fence_info;
+ fence_info = (struct sync_fence_info *)(unsigned long)info->sync_fence_info;
for (i = 0 ; i < info->num_fences ; i++) {
if (fence_info[i].status == status)
count++;
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 9534dc2bc929..7f9a8a8c31da 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for vm selftests
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
LDLIBS = -lrt
@@ -16,8 +18,11 @@ TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
+
+ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64))
TEST_GEN_FILES += va_128TBswitch
TEST_GEN_FILES += virtual_address_range
+endif
TEST_PROGS := run_vmtests
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index c0534e298b51..485cf06ef013 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -37,7 +37,7 @@ int main(int argc, char **argv)
char *file = "/dev/zero";
char *p;
- while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) {
switch (opt) {
case 'm':
size = atoi(optarg) * MB;
@@ -71,7 +71,7 @@ int main(int argc, char **argv)
flags |= MAP_SHARED;
break;
case 'H':
- flags |= MAP_HUGETLB;
+ flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
break;
default:
return -1;
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 951c507a27f7..a692ea828317 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -58,6 +58,14 @@ else
exit 1
fi
+#filter 64bit architectures
+ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64"
+if [ -z $ARCH ]; then
+ ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'`
+fi
+VADDR64=0
+echo "$ARCH64STR" | grep $ARCH && VADDR64=1
+
mkdir $mnt
mount -t hugetlbfs none $mnt
@@ -189,6 +197,7 @@ else
echo "[PASS]"
fi
+if [ $VADDR64 -ne 0 ]; then
echo "-----------------------------"
echo "running virtual_address_range"
echo "-----------------------------"
@@ -210,6 +219,7 @@ if [ $? -ne 0 ]; then
else
echo "[PASS]"
fi
+fi # VADDR64
echo "------------------------------------"
echo "running vmalloc stability smoke test"
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
index afff120c7be6..f45e510500c0 100644
--- a/tools/testing/selftests/watchdog/watchdog-test.c
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -19,7 +19,7 @@
int fd;
const char v = 'V';
-static const char sopts[] = "bdehp:t:Tn:NLf:";
+static const char sopts[] = "bdehp:t:Tn:NLf:i";
static const struct option lopts[] = {
{"bootstatus", no_argument, NULL, 'b'},
{"disable", no_argument, NULL, 'd'},
@@ -32,6 +32,7 @@ static const struct option lopts[] = {
{"getpretimeout", no_argument, NULL, 'N'},
{"gettimeleft", no_argument, NULL, 'L'},
{"file", required_argument, NULL, 'f'},
+ {"info", no_argument, NULL, 'i'},
{NULL, no_argument, NULL, 0x0}
};
@@ -72,6 +73,7 @@ static void usage(char *progname)
printf("Usage: %s [options]\n", progname);
printf(" -f, --file\t\tOpen watchdog device file\n");
printf("\t\t\tDefault is /dev/watchdog\n");
+ printf(" -i, --info\t\tShow watchdog_info\n");
printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n");
printf(" -d, --disable\t\tTurn off the watchdog timer\n");
printf(" -e, --enable\t\tTurn on the watchdog timer\n");
@@ -97,6 +99,7 @@ int main(int argc, char *argv[])
int c;
int oneshot = 0;
char *file = "/dev/watchdog";
+ struct watchdog_info info;
setbuf(stdout, NULL);
@@ -118,6 +121,16 @@ int main(int argc, char *argv[])
exit(-1);
}
+ /*
+ * Validate that `file` is a watchdog device
+ */
+ ret = ioctl(fd, WDIOC_GETSUPPORT, &info);
+ if (ret) {
+ printf("WDIOC_GETSUPPORT error '%s'\n", strerror(errno));
+ close(fd);
+ exit(ret);
+ }
+
optind = 0;
while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
@@ -205,6 +218,18 @@ int main(int argc, char *argv[])
case 'f':
/* Handled above */
break;
+ case 'i':
+ /*
+ * watchdog_info was obtained as part of file open
+ * validation. So we just show it here.
+ */
+ oneshot = 1;
+ printf("watchdog_info:\n");
+ printf(" identity:\t\t%s\n", info.identity);
+ printf(" firmware_version:\t%u\n",
+ info.firmware_version);
+ printf(" options:\t\t%08x\n", info.options);
+ break;
default:
usage(argv[0]);
diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.c b/tools/usb/usbip/libsrc/usbip_device_driver.c
index 051d7d3f443b..927a151fa9aa 100644
--- a/tools/usb/usbip/libsrc/usbip_device_driver.c
+++ b/tools/usb/usbip/libsrc/usbip_device_driver.c
@@ -69,7 +69,7 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev)
FILE *fd = NULL;
struct udev_device *plat;
const char *speed;
- int ret = 0;
+ size_t ret;
plat = udev_device_get_parent(sdev);
path = udev_device_get_syspath(plat);
@@ -79,8 +79,10 @@ int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev)
if (!fd)
return -1;
ret = fread((char *) &descr, sizeof(descr), 1, fd);
- if (ret < 0)
+ if (ret != 1) {
+ err("Cannot read vudc device descr file: %s", strerror(errno));
goto err;
+ }
fclose(fd);
copy_descr_attr(dev, &descr, bDeviceClass);
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/tools/virtio/crypto/hash.h
index e69de29bb2d1..e69de29bb2d1 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/tools/virtio/crypto/hash.h
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
index f91aeb5fe571..8f41cd6bd5c0 100644
--- a/tools/virtio/linux/dma-mapping.h
+++ b/tools/virtio/linux/dma-mapping.h
@@ -29,4 +29,6 @@ enum dma_data_direction {
#define dma_unmap_single(...) do { } while (0)
#define dma_unmap_page(...) do { } while (0)
+#define dma_max_mapping_size(...) SIZE_MAX
+
#endif
diff --git a/tools/virtio/xen/xen.h b/tools/virtio/xen/xen.h
new file mode 100644
index 000000000000..f569387d1403
--- /dev/null
+++ b/tools/virtio/xen/xen.h
@@ -0,0 +1,6 @@
+#ifndef XEN_XEN_STUB_H
+#define XEN_XEN_STUB_H
+
+#define xen_domain() 0
+
+#endif
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 362a01886bab..8731dfeced8b 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -8,6 +8,7 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/perf_event.h>
+#include <linux/perf/arm_pmu.h>
#include <linux/uaccess.h>
#include <asm/kvm_emulate.h>
#include <kvm/arm_pmu.h>
@@ -146,8 +147,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
if (kvm_pmu_pmc_is_chained(pmc) &&
kvm_pmu_idx_is_high_counter(select_idx))
counter = upper_32_bits(counter);
-
- else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
+ else if (select_idx != ARMV8_PMU_CYCLE_IDX)
counter = lower_32_bits(counter);
return counter;
@@ -193,7 +193,7 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
*/
static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
{
- u64 counter, reg;
+ u64 counter, reg, val;
pmc = kvm_pmu_get_canonical_pmc(pmc);
if (!pmc->perf_event)
@@ -201,16 +201,19 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
- if (kvm_pmu_pmc_is_chained(pmc)) {
- reg = PMEVCNTR0_EL0 + pmc->idx;
- __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
- __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+ if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
+ reg = PMCCNTR_EL0;
+ val = counter;
} else {
- reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
- ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
- __vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
+ reg = PMEVCNTR0_EL0 + pmc->idx;
+ val = lower_32_bits(counter);
}
+ __vcpu_sys_reg(vcpu, reg) = val;
+
+ if (kvm_pmu_pmc_is_chained(pmc))
+ __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
+
kvm_pmu_release_perf_event(pmc);
}
@@ -440,8 +443,25 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
struct pt_regs *regs)
{
struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+ struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu);
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
int idx = pmc->idx;
+ u64 period;
+
+ cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE);
+
+ /*
+ * Reset the sample period to the architectural limit,
+ * i.e. the point where the counter overflows.
+ */
+ period = -(local64_read(&perf_event->count));
+
+ if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
+ period &= GENMASK(31, 0);
+
+ local64_set(&perf_event->hw.period_left, 0);
+ perf_event->attr.sample_period = period;
+ perf_event->hw.sample_period = period;
__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
@@ -449,6 +469,8 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
}
+
+ cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD);
}
/**
@@ -567,12 +589,12 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
* high counter.
*/
attr.sample_period = (-counter) & GENMASK(63, 0);
+ if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
+ attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
+
event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow,
pmc + 1);
-
- if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
- attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
} else {
/* The initial sample period (overflow count) of an event. */
if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fd68fbe0a75d..13efc291b1c7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -50,6 +50,7 @@
#include <linux/bsearch.h>
#include <linux/io.h>
#include <linux/lockdep.h>
+#include <linux/kthread.h>
#include <asm/processor.h>
#include <asm/ioctl.h>
@@ -121,9 +122,22 @@ static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
#define KVM_COMPAT(c) .compat_ioctl = (c)
#else
+/*
+ * For architectures that don't implement a compat infrastructure,
+ * adopt a double line of defense:
+ * - Prevent a compat task from opening /dev/kvm
+ * - If the open has been done by a 64bit task, and the KVM fd
+ * passed to a compat task, let the ioctls fail.
+ */
static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg) { return -EINVAL; }
-#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl
+
+static int kvm_no_compat_open(struct inode *inode, struct file *file)
+{
+ return is_compat_task() ? -ENODEV : 0;
+}
+#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \
+ .open = kvm_no_compat_open
#endif
static int hardware_enable_all(void);
static void hardware_disable_all(void);
@@ -149,10 +163,30 @@ __weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
return 0;
}
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+{
+ /*
+ * The metadata used by is_zone_device_page() to determine whether or
+ * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
+ * the device has been pinned, e.g. by get_user_pages(). WARN if the
+ * page_count() is zero to help detect bad usage of this helper.
+ */
+ if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
+ return false;
+
+ return is_zone_device_page(pfn_to_page(pfn));
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
+ /*
+ * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
+ * perspective they are "normal" pages, albeit with slightly different
+ * usage rules.
+ */
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return PageReserved(pfn_to_page(pfn)) &&
+ !kvm_is_zone_device_pfn(pfn);
return true;
}
@@ -625,10 +659,28 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
return 0;
}
+/*
+ * Called after the VM is otherwise initialized, but just before adding it to
+ * the vm_list.
+ */
+int __weak kvm_arch_post_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+/*
+ * Called just after removing the VM from the vm_list, but before doing any
+ * other destruction.
+ */
+void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
- int r, i;
struct kvm *kvm = kvm_arch_alloc_vm();
+ int r = -ENOMEM;
+ int i;
if (!kvm)
return ERR_PTR(-ENOMEM);
@@ -640,46 +692,51 @@ static struct kvm *kvm_create_vm(unsigned long type)
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
mutex_init(&kvm->slots_lock);
- refcount_set(&kvm->users_count, 1);
INIT_LIST_HEAD(&kvm->devices);
- r = kvm_arch_init_vm(kvm, type);
- if (r)
- goto out_err_no_disable;
-
- r = hardware_enable_all();
- if (r)
- goto out_err_no_disable;
-
-#ifdef CONFIG_HAVE_KVM_IRQFD
- INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
-#endif
-
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
- r = -ENOMEM;
+ if (init_srcu_struct(&kvm->srcu))
+ goto out_err_no_srcu;
+ if (init_srcu_struct(&kvm->irq_srcu))
+ goto out_err_no_irq_srcu;
+
+ refcount_set(&kvm->users_count, 1);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
struct kvm_memslots *slots = kvm_alloc_memslots();
+
if (!slots)
- goto out_err_no_srcu;
+ goto out_err_no_arch_destroy_vm;
/* Generations must be different for each address space. */
slots->generation = i;
rcu_assign_pointer(kvm->memslots[i], slots);
}
- if (init_srcu_struct(&kvm->srcu))
- goto out_err_no_srcu;
- if (init_srcu_struct(&kvm->irq_srcu))
- goto out_err_no_irq_srcu;
for (i = 0; i < KVM_NR_BUSES; i++) {
rcu_assign_pointer(kvm->buses[i],
kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
if (!kvm->buses[i])
- goto out_err;
+ goto out_err_no_arch_destroy_vm;
}
+ r = kvm_arch_init_vm(kvm, type);
+ if (r)
+ goto out_err_no_arch_destroy_vm;
+
+ r = hardware_enable_all();
+ if (r)
+ goto out_err_no_disable;
+
+#ifdef CONFIG_HAVE_KVM_IRQFD
+ INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
+#endif
+
r = kvm_init_mmu_notifier(kvm);
if (r)
+ goto out_err_no_mmu_notifier;
+
+ r = kvm_arch_post_init_vm(kvm);
+ if (r)
goto out_err;
mutex_lock(&kvm_lock);
@@ -691,17 +748,24 @@ static struct kvm *kvm_create_vm(unsigned long type)
return kvm;
out_err:
- cleanup_srcu_struct(&kvm->irq_srcu);
-out_err_no_irq_srcu:
- cleanup_srcu_struct(&kvm->srcu);
-out_err_no_srcu:
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ if (kvm->mmu_notifier.ops)
+ mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
+#endif
+out_err_no_mmu_notifier:
hardware_disable_all();
out_err_no_disable:
- refcount_set(&kvm->users_count, 0);
+ kvm_arch_destroy_vm(kvm);
+out_err_no_arch_destroy_vm:
+ WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm_get_bus(kvm, i));
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
+ cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
+ cleanup_srcu_struct(&kvm->srcu);
+out_err_no_srcu:
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
return ERR_PTR(r);
@@ -733,6 +797,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
mutex_lock(&kvm_lock);
list_del(&kvm->vm_list);
mutex_unlock(&kvm_lock);
+ kvm_arch_pre_destroy_vm(kvm);
+
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
@@ -1853,7 +1919,7 @@ EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty);
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn)) {
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
SetPageDirty(page);
@@ -1863,7 +1929,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn))
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
@@ -2360,20 +2426,23 @@ out:
kvm_arch_vcpu_unblocking(vcpu);
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
- if (!vcpu_valid_wakeup(vcpu))
- shrink_halt_poll_ns(vcpu);
- else if (halt_poll_ns) {
- if (block_ns <= vcpu->halt_poll_ns)
- ;
- /* we had a long block, shrink polling */
- else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ if (!kvm_arch_no_poll(vcpu)) {
+ if (!vcpu_valid_wakeup(vcpu)) {
shrink_halt_poll_ns(vcpu);
- /* we had a short halt and our poll time is too small */
- else if (vcpu->halt_poll_ns < halt_poll_ns &&
- block_ns < halt_poll_ns)
- grow_halt_poll_ns(vcpu);
- } else
- vcpu->halt_poll_ns = 0;
+ } else if (halt_poll_ns) {
+ if (block_ns <= vcpu->halt_poll_ns)
+ ;
+ /* we had a long block, shrink polling */
+ else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ shrink_halt_poll_ns(vcpu);
+ /* we had a short halt and our poll time is too small */
+ else if (vcpu->halt_poll_ns < halt_poll_ns &&
+ block_ns < halt_poll_ns)
+ grow_halt_poll_ns(vcpu);
+ } else {
+ vcpu->halt_poll_ns = 0;
+ }
+ }
trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
kvm_arch_vcpu_block_finish(vcpu);
@@ -4364,3 +4433,86 @@ void kvm_exit(void)
kvm_vfio_ops_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
+
+struct kvm_vm_worker_thread_context {
+ struct kvm *kvm;
+ struct task_struct *parent;
+ struct completion init_done;
+ kvm_vm_thread_fn_t thread_fn;
+ uintptr_t data;
+ int err;
+};
+
+static int kvm_vm_worker_thread(void *context)
+{
+ /*
+ * The init_context is allocated on the stack of the parent thread, so
+ * we have to locally copy anything that is needed beyond initialization
+ */
+ struct kvm_vm_worker_thread_context *init_context = context;
+ struct kvm *kvm = init_context->kvm;
+ kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
+ uintptr_t data = init_context->data;
+ int err;
+
+ err = kthread_park(current);
+ /* kthread_park(current) is never supposed to return an error */
+ WARN_ON(err != 0);
+ if (err)
+ goto init_complete;
+
+ err = cgroup_attach_task_all(init_context->parent, current);
+ if (err) {
+ kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
+ __func__, err);
+ goto init_complete;
+ }
+
+ set_user_nice(current, task_nice(init_context->parent));
+
+init_complete:
+ init_context->err = err;
+ complete(&init_context->init_done);
+ init_context = NULL;
+
+ if (err)
+ return err;
+
+ /* Wait to be woken up by the spawner before proceeding. */
+ kthread_parkme();
+
+ if (!kthread_should_stop())
+ err = thread_fn(kvm, data);
+
+ return err;
+}
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+ uintptr_t data, const char *name,
+ struct task_struct **thread_ptr)
+{
+ struct kvm_vm_worker_thread_context init_context = {};
+ struct task_struct *thread;
+
+ *thread_ptr = NULL;
+ init_context.kvm = kvm;
+ init_context.parent = current;
+ init_context.thread_fn = thread_fn;
+ init_context.data = data;
+ init_completion(&init_context.init_done);
+
+ thread = kthread_run(kvm_vm_worker_thread, &init_context,
+ "%s-%d", name, task_pid_nr(current));
+ if (IS_ERR(thread))
+ return PTR_ERR(thread);
+
+ /* kthread_run is never supposed to return NULL */
+ WARN_ON(thread == NULL);
+
+ wait_for_completion(&init_context.init_done);
+
+ if (!init_context.err)
+ *thread_ptr = thread;
+
+ return init_context.err;
+}