aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--Documentation/ABI/testing/configfs-rdma_cm22
-rw-r--r--Documentation/ABI/testing/sysfs-class-infiniband16
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt67
-rw-r--r--Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt31
-rw-r--r--Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt26
-rw-r--r--Documentation/devicetree/bindings/net/ralink,rt2880-net.txt61
-rw-r--r--Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt32
-rw-r--r--Documentation/devicetree/bindings/phy/phy-ath79-usb.txt18
-rw-r--r--Documentation/devicetree/bindings/thermal/rockchip-thermal.txt2
-rw-r--r--Documentation/infiniband/core_locking.txt2
-rw-r--r--Documentation/kernel-parameters.txt35
-rw-r--r--Documentation/kernel-per-CPU-kthreads.txt2
-rw-r--r--Documentation/virtual/kvm/api.txt2
-rw-r--r--MAINTAINERS68
-rw-r--r--Makefile4
-rw-r--r--arch/arm/mm/dma-mapping.c11
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/configs/defconfig42
-rw-r--r--arch/arm64/include/asm/pgtable.h21
-rw-r--r--arch/arm64/kernel/head.S5
-rw-r--r--arch/arm64/kernel/image.h40
-rw-r--r--arch/arm64/mm/dump.c2
-rw-r--r--arch/arm64/mm/kasan_init.c9
-rw-r--r--arch/arm64/mm/pageattr.c3
-rw-r--r--arch/arm64/mm/proc-macros.S12
-rw-r--r--arch/arm64/mm/proc.S4
-rw-r--r--arch/mips/Kbuild.platforms1
-rw-r--r--arch/mips/Kconfig17
-rw-r--r--arch/mips/Makefile10
-rw-r--r--arch/mips/alchemy/common/gpiolib.c6
-rw-r--r--arch/mips/ar7/gpio.c2
-rw-r--r--arch/mips/ath79/common.h1
-rw-r--r--arch/mips/ath79/irq.c61
-rw-r--r--arch/mips/ath79/setup.c11
-rw-r--r--arch/mips/bcm47xx/sprom.c12
-rw-r--r--arch/mips/bcm63xx/nvram.c46
-rw-r--r--arch/mips/bmips/setup.c1
-rw-r--r--arch/mips/boot/compressed/Makefile15
-rw-r--r--arch/mips/boot/compressed/uart-prom.c7
-rw-r--r--arch/mips/boot/dts/Makefile1
-rw-r--r--arch/mips/boot/dts/brcm/bcm6328.dtsi9
-rw-r--r--arch/mips/boot/dts/brcm/bcm6368.dtsi22
-rw-r--r--arch/mips/boot/dts/ingenic/ci20.dts64
-rw-r--r--arch/mips/boot/dts/ingenic/jz4780.dtsi26
-rw-r--r--arch/mips/boot/dts/pic32/Makefile12
-rw-r--r--arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi236
-rw-r--r--arch/mips/boot/dts/pic32/pic32mzda.dtsi281
-rw-r--r--arch/mips/boot/dts/pic32/pic32mzda_sk.dts151
-rw-r--r--arch/mips/boot/dts/qca/ar9132.dtsi26
-rw-r--r--arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts8
-rw-r--r--arch/mips/configs/pic32mzda_defconfig89
-rw-r--r--arch/mips/include/asm/cacheops.h106
-rw-r--r--arch/mips/include/asm/cpu-features.h7
-rw-r--r--arch/mips/include/asm/cpu.h2
-rw-r--r--arch/mips/include/asm/elf.h15
-rw-r--r--arch/mips/include/asm/fpu_emulator.h2
-rw-r--r--arch/mips/include/asm/io.h1
-rw-r--r--arch/mips/include/asm/irqflags.h30
-rw-r--r--arch/mips/include/asm/kvm_host.h39
-rw-r--r--arch/mips/include/asm/mach-ath79/ath79.h1
-rw-r--r--arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h2
-rw-r--r--arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h32
-rw-r--r--arch/mips/include/asm/mach-pic32/irq.h22
-rw-r--r--arch/mips/include/asm/mach-pic32/pic32.h44
-rw-r--r--arch/mips/include/asm/mach-pic32/spaces.h24
-rw-r--r--arch/mips/include/asm/mach-ralink/irq.h9
-rw-r--r--arch/mips/include/asm/mach-ralink/mt7621.h38
-rw-r--r--arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h65
-rw-r--r--arch/mips/include/asm/mips-cm.h4
-rw-r--r--arch/mips/include/asm/mips-r2-to-r6-emul.h2
-rw-r--r--arch/mips/include/asm/mipsregs.h34
-rw-r--r--arch/mips/include/asm/page.h2
-rw-r--r--arch/mips/include/asm/pgtable.h4
-rw-r--r--arch/mips/include/uapi/asm/inst.h15
-rw-r--r--arch/mips/kernel/cpu-bugs64.c8
-rw-r--r--arch/mips/kernel/cpu-probe.c161
-rw-r--r--arch/mips/kernel/elf.c102
-rw-r--r--arch/mips/kernel/gpio_txx9.c2
-rw-r--r--arch/mips/kernel/ptrace.c3
-rw-r--r--arch/mips/kernel/setup.c2
-rw-r--r--arch/mips/kernel/smp-cps.c3
-rw-r--r--arch/mips/kernel/sync-r4k.c32
-rw-r--r--arch/mips/kernel/traps.c52
-rw-r--r--arch/mips/kvm/callback.c2
-rw-r--r--arch/mips/kvm/dyntrans.c10
-rw-r--r--arch/mips/kvm/emulate.c116
-rw-r--r--arch/mips/kvm/interrupt.c8
-rw-r--r--arch/mips/kvm/locore.S12
-rw-r--r--arch/mips/kvm/mips.c38
-rw-r--r--arch/mips/kvm/opcode.h22
-rw-r--r--arch/mips/kvm/tlb.c77
-rw-r--r--arch/mips/kvm/trap_emul.c1
-rw-r--r--arch/mips/lib/mips-atomic.c30
-rw-r--r--arch/mips/loongson64/Platform21
-rw-r--r--arch/mips/loongson64/loongson-3/hpet.c10
-rw-r--r--arch/mips/loongson64/loongson-3/smp.c20
-rw-r--r--arch/mips/math-emu/cp1emu.c4
-rw-r--r--arch/mips/math-emu/dp_simple.c38
-rw-r--r--arch/mips/math-emu/dp_tint.c9
-rw-r--r--arch/mips/math-emu/dp_tlong.c9
-rw-r--r--arch/mips/math-emu/dsemul.c77
-rw-r--r--arch/mips/math-emu/ieee754.c6
-rw-r--r--arch/mips/math-emu/ieee754.h42
-rw-r--r--arch/mips/math-emu/ieee754dp.c12
-rw-r--r--arch/mips/math-emu/ieee754int.h12
-rw-r--r--arch/mips/math-emu/ieee754sp.c12
-rw-r--r--arch/mips/math-emu/sp_fdp.c13
-rw-r--r--arch/mips/math-emu/sp_simple.c38
-rw-r--r--arch/mips/math-emu/sp_tint.c9
-rw-r--r--arch/mips/math-emu/sp_tlong.c9
-rw-r--r--arch/mips/mm/tlbex.c2
-rw-r--r--arch/mips/pci/Makefile1
-rw-r--r--arch/mips/pci/pci-mt7620.c426
-rw-r--r--arch/mips/pic32/Kconfig51
-rw-r--r--arch/mips/pic32/Makefile6
-rw-r--r--arch/mips/pic32/Platform7
-rw-r--r--arch/mips/pic32/common/Makefile5
-rw-r--r--arch/mips/pic32/common/irq.c21
-rw-r--r--arch/mips/pic32/common/reset.c62
-rw-r--r--arch/mips/pic32/pic32mzda/Makefile9
-rw-r--r--arch/mips/pic32/pic32mzda/config.c126
-rw-r--r--arch/mips/pic32/pic32mzda/early_clk.c106
-rw-r--r--arch/mips/pic32/pic32mzda/early_console.c171
-rw-r--r--arch/mips/pic32/pic32mzda/early_pin.c275
-rw-r--r--arch/mips/pic32/pic32mzda/early_pin.h241
-rw-r--r--arch/mips/pic32/pic32mzda/init.c156
-rw-r--r--arch/mips/pic32/pic32mzda/pic32mzda.h29
-rw-r--r--arch/mips/pic32/pic32mzda/time.c73
-rw-r--r--arch/mips/ralink/Kconfig16
-rw-r--r--arch/mips/ralink/Makefile10
-rw-r--r--arch/mips/ralink/Platform5
-rw-r--r--arch/mips/ralink/irq-gic.c25
-rw-r--r--arch/mips/ralink/mt7620.c87
-rw-r--r--arch/mips/ralink/mt7621.c226
-rw-r--r--arch/mips/ralink/rt288x.c2
-rw-r--r--arch/mips/ralink/rt305x.c1
-rw-r--r--arch/mips/ralink/rt3883.c1
-rw-r--r--arch/mips/ralink/timer-gic.c24
-rw-r--r--arch/mips/rb532/gpio.c2
-rw-r--r--arch/mips/txx9/generic/setup.c2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h3
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv.c18
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S4
-rw-r--r--arch/powerpc/kvm/powerpc.c20
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c12
-rw-r--r--arch/s390/hypfs/inode.c8
-rw-r--r--arch/s390/include/asm/kvm_host.h1
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/kvm-s390.c121
-rw-r--r--arch/sh/include/asm/barrier.h1
-rw-r--r--arch/x86/include/asm/pmem.h11
-rw-r--r--block/Makefile2
-rw-r--r--block/ioctl.c4
-rw-r--r--drivers/acpi/apei/erst.c6
-rw-r--r--drivers/base/devtmpfs.c12
-rw-r--r--drivers/block/aoe/aoecmd.c4
-rw-r--r--drivers/block/drbd/drbd_bitmap.c26
-rw-r--r--drivers/block/drbd/drbd_debugfs.c4
-rw-r--r--drivers/block/drbd/drbd_int.h3
-rw-r--r--drivers/block/rbd.c3
-rw-r--r--drivers/char/mem.c4
-rw-r--r--drivers/char/mspec.c15
-rw-r--r--drivers/char/ps3flash.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_dpm.c17
-rw-r--r--drivers/gpu/drm/amd/powerplay/amd_powerplay.c5
-rw-r--r--drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c41
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c24
-rw-r--r--drivers/gpu/drm/drm_hashtab.c5
-rw-r--r--drivers/gpu/drm/etnaviv/common.xml.h59
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_drv.c1
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_drv.h2
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_dump.c6
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.c36
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem.h1
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c10
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.c189
-rw-r--r--drivers/gpu/drm/etnaviv/etnaviv_gpu.h9
-rw-r--r--drivers/gpu/drm/etnaviv/state_hi.xml.h26
-rw-r--r--drivers/gpu/drm/radeon/dce6_afmt.c12
-rw-r--r--drivers/gpu/drm/radeon/evergreen_hdmi.c10
-rw-r--r--drivers/gpu/drm/radeon/evergreend.h5
-rw-r--r--drivers/gpu/drm/radeon/radeon.h2
-rw-r--r--drivers/gpu/drm/radeon/radeon_atombios.c49
-rw-r--r--drivers/gpu/drm/radeon/radeon_audio.c20
-rw-r--r--drivers/gpu/drm/radeon/radeon_audio.h1
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c6
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c1
-rw-r--r--drivers/gpu/drm/radeon/vce_v1_0.c12
-rw-r--r--drivers/gpu/drm/rockchip/Makefile8
-rw-r--r--drivers/gpu/drm/rockchip/dw-mipi-dsi.c3
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_drv.c4
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_fb.c24
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h11
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_gem.c9
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop.c16
-rw-r--r--drivers/gpu/drm/vc4/vc4_v3d.c17
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c7
-rw-r--r--drivers/infiniband/Kconfig10
-rw-r--r--drivers/infiniband/core/Makefile4
-rw-r--r--drivers/infiniband/core/addr.c194
-rw-r--r--drivers/infiniband/core/cache.c345
-rw-r--r--drivers/infiniband/core/cm.c49
-rw-r--r--drivers/infiniband/core/cma.c265
-rw-r--r--drivers/infiniband/core/cma_configfs.c321
-rw-r--r--drivers/infiniband/core/core_priv.h45
-rw-r--r--drivers/infiniband/core/cq.c209
-rw-r--r--drivers/infiniband/core/device.c51
-rw-r--r--drivers/infiniband/core/fmr_pool.c20
-rw-r--r--drivers/infiniband/core/mad.c162
-rw-r--r--drivers/infiniband/core/mad_priv.h2
-rw-r--r--drivers/infiniband/core/multicast.c17
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c81
-rw-r--r--drivers/infiniband/core/sa_query.c91
-rw-r--r--drivers/infiniband/core/sysfs.c377
-rw-r--r--drivers/infiniband/core/ud_header.c155
-rw-r--r--drivers/infiniband/core/umem_odp.c2
-rw-r--r--drivers/infiniband/core/user_mad.c1
-rw-r--r--drivers/infiniband/core/uverbs.h2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c38
-rw-r--r--drivers/infiniband/core/uverbs_main.c13
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c1
-rw-r--r--drivers/infiniband/core/verbs.c238
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_mem.c102
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c146
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h15
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c82
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c14
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c57
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h13
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c251
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h7
-rw-r--r--drivers/infiniband/hw/cxgb4/user.h2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c3
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c3
-rw-r--r--drivers/infiniband/hw/mlx4/main.c102
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h10
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c22
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c318
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c3
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c32
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c31
-rw-r--r--drivers/infiniband/hw/mlx5/main.c447
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h121
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c29
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c1014
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c41
-rw-r--r--drivers/infiniband/hw/mlx5/user.h63
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c84
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c17
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c216
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c1
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c163
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h3
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c51
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c46
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h4
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs_mcast.c35
-rw-r--r--drivers/infiniband/hw/usnic/usnic_debugfs.c5
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c24
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c54
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c21
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c40
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c45
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c13
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h156
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c323
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c179
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c337
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c118
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.h41
-rw-r--r--drivers/infiniband/ulp/isert/isert_proto.h47
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c205
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h7
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c443
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h53
-rw-r--r--drivers/irqchip/Kconfig5
-rw-r--r--drivers/irqchip/Makefile1
-rw-r--r--drivers/irqchip/irq-pic32-evic.c324
-rw-r--r--drivers/mtd/bcm63xxpart.c8
-rw-r--r--drivers/mtd/ubi/cdev.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c39
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c61
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c233
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/transobj.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c116
-rw-r--r--drivers/ntb/hw/Kconfig1
-rw-r--r--drivers/ntb/hw/Makefile1
-rw-r--r--drivers/ntb/hw/amd/Kconfig7
-rw-r--r--drivers/ntb/hw/amd/Makefile1
-rw-r--r--drivers/ntb/hw/amd/ntb_hw_amd.c1143
-rw-r--r--drivers/ntb/hw/amd/ntb_hw_amd.h217
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_intel.c4
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_intel.h8
-rw-r--r--drivers/ntb/ntb_transport.c50
-rw-r--r--drivers/ntb/test/Kconfig8
-rw-r--r--drivers/ntb/test/Makefile1
-rw-r--r--drivers/ntb/test/ntb_perf.c748
-rw-r--r--drivers/oprofile/oprofilefs.c16
-rw-r--r--drivers/platform/x86/ideapad-laptop.c7
-rw-r--r--drivers/platform/x86/intel_telemetry_debugfs.c2
-rw-r--r--drivers/scsi/Kconfig1
-rw-r--r--drivers/scsi/be2iscsi/Kconfig1
-rw-r--r--drivers/scsi/be2iscsi/be.h4
-rw-r--r--drivers/scsi/be2iscsi/be_iscsi.c4
-rw-r--r--drivers/scsi/be2iscsi/be_main.c20
-rw-r--r--drivers/scsi/ipr.c25
-rw-r--r--drivers/scsi/ipr.h4
-rw-r--r--drivers/staging/lustre/include/linux/libcfs/libcfs_private.h8
-rw-r--r--drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c21
-rw-r--r--drivers/staging/lustre/lustre/llite/dir.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/file.c16
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_internal.h2
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_lib.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/llite_nfs.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/lloop.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/rw.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/rw26.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_io.c4
-rw-r--r--drivers/staging/lustre/lustre/llite/vvp_page.c10
-rw-r--r--drivers/staging/rdma/amso1100/c2_cq.c3
-rw-r--r--drivers/staging/rdma/amso1100/c2_provider.c72
-rw-r--r--drivers/staging/rdma/ehca/ehca_classes.h5
-rw-r--r--drivers/staging/rdma/ehca/ehca_iverbs.h16
-rw-r--r--drivers/staging/rdma/ehca/ehca_main.c4
-rw-r--r--drivers/staging/rdma/ehca/ehca_mrmw.c477
-rw-r--r--drivers/staging/rdma/ehca/ehca_mrmw.h5
-rw-r--r--drivers/staging/rdma/ehca/ehca_reqs.c1
-rw-r--r--drivers/staging/rdma/hfi1/mr.c51
-rw-r--r--drivers/staging/rdma/hfi1/verbs.c1
-rw-r--r--drivers/staging/rdma/hfi1/verbs.h4
-rw-r--r--drivers/staging/rdma/ipath/ipath_fs.c8
-rw-r--r--drivers/staging/rdma/ipath/ipath_mr.c55
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.c1
-rw-r--r--drivers/staging/rdma/ipath/ipath_verbs.h4
-rw-r--r--drivers/thermal/int340x_thermal/processor_thermal_device.c10
-rw-r--r--drivers/thermal/intel_pch_thermal.c2
-rw-r--r--drivers/thermal/rcar_thermal.c53
-rw-r--r--drivers/thermal/rockchip_thermal.c172
-rw-r--r--drivers/thermal/step_wise.c17
-rw-r--r--drivers/thermal/thermal_core.c81
-rw-r--r--drivers/thermal/thermal_core.h1
-rw-r--r--drivers/usb/gadget/function/f_printer.c4
-rw-r--r--drivers/usb/gadget/legacy/inode.c4
-rw-r--r--drivers/usb/gadget/udc/atmel_usba_udc.c12
-rw-r--r--drivers/video/fbdev/core/fb_defio.c4
-rw-r--r--drivers/virtio/virtio_pci_common.c2
-rw-r--r--fs/9p/vfs_file.c8
-rw-r--r--fs/affs/file.c8
-rw-r--r--fs/afs/flock.c4
-rw-r--r--fs/afs/write.c4
-rw-r--r--fs/attr.c2
-rw-r--r--fs/binfmt_elf.c6
-rw-r--r--fs/binfmt_misc.c12
-rw-r--r--fs/block_dev.c22
-rw-r--r--fs/btrfs/file.c42
-rw-r--r--fs/btrfs/inode.c4
-rw-r--r--fs/btrfs/ioctl.c38
-rw-r--r--fs/btrfs/relocation.c4
-rw-r--r--fs/btrfs/scrub.c4
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/cachefiles/interface.c4
-rw-r--r--fs/cachefiles/namei.c40
-rw-r--r--fs/ceph/addr.c14
-rw-r--r--fs/ceph/cache.c12
-rw-r--r--fs/ceph/caps.c4
-rw-r--r--fs/ceph/dir.c4
-rw-r--r--fs/ceph/export.c4
-rw-r--r--fs/ceph/file.c527
-rw-r--r--fs/ceph/inode.c8
-rw-r--r--fs/cifs/cifs_debug.c2
-rw-r--r--fs/cifs/cifs_debug.h9
-rw-r--r--fs/cifs/cifsfs.c21
-rw-r--r--fs/cifs/cifsglob.h16
-rw-r--r--fs/cifs/cifsproto.h5
-rw-r--r--fs/cifs/connect.c36
-rw-r--r--fs/cifs/file.c12
-rw-r--r--fs/cifs/inode.c24
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/readdir.c1
-rw-r--r--fs/cifs/smb2misc.c36
-rw-r--r--fs/cifs/smb2ops.c13
-rw-r--r--fs/cifs/smb2pdu.c10
-rw-r--r--fs/cifs/smb2pdu.h8
-rw-r--r--fs/cifs/smb2proto.h3
-rw-r--r--fs/cifs/smb2transport.c102
-rw-r--r--fs/cifs/transport.c6
-rw-r--r--fs/coda/coda_linux.h3
-rw-r--r--fs/coda/dir.c4
-rw-r--r--fs/coda/file.c8
-rw-r--r--fs/configfs/dir.c58
-rw-r--r--fs/configfs/file.c8
-rw-r--r--fs/configfs/inode.c4
-rw-r--r--fs/dax.c280
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/debugfs/inode.c22
-rw-r--r--fs/devpts/inode.c12
-rw-r--r--fs/direct-io.c8
-rw-r--r--fs/ecryptfs/inode.c32
-rw-r--r--fs/ecryptfs/mmap.c4
-rw-r--r--fs/efivarfs/file.c4
-rw-r--r--fs/efivarfs/super.c4
-rw-r--r--fs/exec.c4
-rw-r--r--fs/exofs/file.c4
-rw-r--r--fs/exportfs/expfs.c12
-rw-r--r--fs/ext2/file.c4
-rw-r--r--fs/ext2/ioctl.c12
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/extents.c20
-rw-r--r--fs/ext4/file.c22
-rw-r--r--fs/ext4/inode.c12
-rw-r--r--fs/ext4/ioctl.c20
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/f2fs/data.c4
-rw-r--r--fs/f2fs/file.c20
-rw-r--r--fs/fat/dir.c4
-rw-r--r--fs/fat/file.c12
-rw-r--r--fs/fuse/dir.c10
-rw-r--r--fs/fuse/file.c36
-rw-r--r--fs/gfs2/file.c4
-rw-r--r--fs/gfs2/inode.c4
-rw-r--r--fs/gfs2/quota.c8
-rw-r--r--fs/hfs/dir.c4
-rw-r--r--fs/hfs/inode.c8
-rw-r--r--fs/hfsplus/dir.c4
-rw-r--r--fs/hfsplus/inode.c8
-rw-r--r--fs/hfsplus/ioctl.c4
-rw-r--r--fs/hostfs/hostfs_kern.c4
-rw-r--r--fs/hpfs/dir.c6
-rw-r--r--fs/hugetlbfs/inode.c12
-rw-r--r--fs/inode.c10
-rw-r--r--fs/ioctl.c4
-rw-r--r--fs/jffs2/build.c8
-rw-r--r--fs/jffs2/file.c4
-rw-r--r--fs/jffs2/fs.c5
-rw-r--r--fs/jffs2/super.c5
-rw-r--r--fs/jfs/file.c6
-rw-r--r--fs/jfs/ioctl.c6
-rw-r--r--fs/jfs/super.c6
-rw-r--r--fs/kernfs/dir.c4
-rw-r--r--fs/libfs.c10
-rw-r--r--fs/locks.c6
-rw-r--r--fs/logfs/file.c8
-rw-r--r--fs/namei.c74
-rw-r--r--fs/namespace.c10
-rw-r--r--fs/ncpfs/dir.c8
-rw-r--r--fs/ncpfs/file.c4
-rw-r--r--fs/nfs/dir.c8
-rw-r--r--fs/nfs/direct.c12
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/filelayout/filelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c6
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c99
-rw-r--r--fs/nfs/inode.c8
-rw-r--r--fs/nfs/internal.h2
-rw-r--r--fs/nfs/nfs42proc.c8
-rw-r--r--fs/nfs/nfs4file.c24
-rw-r--r--fs/nfs/write.c7
-rw-r--r--fs/nfsd/nfs4proc.c4
-rw-r--r--fs/nfsd/nfs4recover.c12
-rw-r--r--fs/nfsd/nfsfh.h4
-rw-r--r--fs/nfsd/vfs.c4
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/ioctl.c4
-rw-r--r--fs/ntfs/dir.c4
-rw-r--r--fs/ntfs/file.c8
-rw-r--r--fs/ntfs/quota.c6
-rw-r--r--fs/ntfs/super.c12
-rw-r--r--fs/ocfs2/alloc.c32
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/dir.c4
-rw-r--r--fs/ocfs2/file.c12
-rw-r--r--fs/ocfs2/inode.c12
-rw-r--r--fs/ocfs2/ioctl.c12
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/localalloc.c16
-rw-r--r--fs/ocfs2/move_extents.c16
-rw-r--r--fs/ocfs2/namei.c28
-rw-r--r--fs/ocfs2/quota_global.c4
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/resize.c8
-rw-r--r--fs/ocfs2/suballoc.c12
-rw-r--r--fs/ocfs2/xattr.c14
-rw-r--r--fs/open.c12
-rw-r--r--fs/overlayfs/copy_up.c4
-rw-r--r--fs/overlayfs/dir.c12
-rw-r--r--fs/overlayfs/inode.c4
-rw-r--r--fs/overlayfs/readdir.c20
-rw-r--r--fs/overlayfs/super.c14
-rw-r--r--fs/proc/kcore.c4
-rw-r--r--fs/proc/self.c4
-rw-r--r--fs/proc/thread_self.c4
-rw-r--r--fs/pstore/inode.c6
-rw-r--r--fs/quota/dquot.c20
-rw-r--r--fs/read_write.c7
-rw-r--r--fs/readdir.c2
-rw-r--r--fs/reiserfs/dir.c4
-rw-r--r--fs/reiserfs/file.c4
-rw-r--r--fs/reiserfs/ioctl.c2
-rw-r--r--fs/reiserfs/xattr.c64
-rw-r--r--fs/tracefs/inode.c34
-rw-r--r--fs/ubifs/dir.c18
-rw-r--r--fs/ubifs/file.c4
-rw-r--r--fs/ubifs/xattr.c4
-rw-r--r--fs/udf/file.c10
-rw-r--r--fs/udf/inode.c2
-rw-r--r--fs/udf/super.c7
-rw-r--r--fs/utimes.c4
-rw-r--r--fs/xattr.c8
-rw-r--r--fs/xfs/xfs_file.c13
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--include/drm/drm_atomic_helper.h4
-rw-r--r--include/linux/bcm963xx_nvram.h112
-rw-r--r--include/linux/bcm963xx_tag.h (renamed from arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h)24
-rw-r--r--include/linux/blk-iopoll.h46
-rw-r--r--include/linux/ceph/ceph_frag.h37
-rw-r--r--include/linux/ceph/messenger.h2
-rw-r--r--include/linux/dax.h7
-rw-r--r--include/linux/fs.h32
-rw-r--r--include/linux/ftrace.h1
-rw-r--r--include/linux/interrupt.h2
-rw-r--r--include/linux/irq_poll.h25
-rw-r--r--include/linux/mlx4/cmd.h3
-rw-r--r--include/linux/mlx4/device.h15
-rw-r--r--include/linux/mlx4/qp.h15
-rw-r--r--include/linux/mlx5/device.h40
-rw-r--r--include/linux/mlx5/driver.h20
-rw-r--r--include/linux/mlx5/mlx5_ifc.h48
-rw-r--r--include/linux/mlx5/qp.h46
-rw-r--r--include/linux/mlx5/transobj.h (renamed from drivers/net/ethernet/mellanox/mlx5/core/transobj.h)10
-rw-r--r--include/linux/mlx5/vport.h8
-rw-r--r--include/linux/pagemap.h3
-rw-r--r--include/linux/platform_data/sdhci-pic32.h22
-rw-r--r--include/linux/pmem.h22
-rw-r--r--include/linux/radix-tree.h9
-rw-r--r--include/linux/shmem_fs.h5
-rw-r--r--include/linux/sunrpc/svc_rdma.h39
-rw-r--r--include/linux/thermal.h5
-rw-r--r--include/rdma/ib_addr.h16
-rw-r--r--include/rdma/ib_cache.h4
-rw-r--r--include/rdma/ib_mad.h2
-rw-r--r--include/rdma/ib_pack.h45
-rw-r--r--include/rdma/ib_pma.h1
-rw-r--r--include/rdma/ib_sa.h3
-rw-r--r--include/rdma/ib_verbs.h356
-rw-r--r--include/scsi/iser.h78
-rw-r--r--include/trace/events/fence.h2
-rw-r--r--include/trace/events/irq.h2
-rw-r--r--include/uapi/drm/etnaviv_drm.h3
-rw-r--r--ipc/mqueue.c8
-rw-r--r--ipc/sem.c2
-rw-r--r--ipc/util.c11
-rw-r--r--ipc/util.h2
-rw-r--r--kernel/audit_fsnotify.c2
-rw-r--r--kernel/audit_watch.c2
-rw-r--r--kernel/events/core.c4
-rw-r--r--kernel/relay.c4
-rw-r--r--kernel/sched/core.c4
-rw-r--r--kernel/seccomp.c22
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--lib/Kconfig5
-rw-r--r--lib/Makefile1
-rw-r--r--lib/irq_poll.c (renamed from block/blk-iopoll.c)108
-rw-r--r--mm/filemap.c95
-rw-r--r--mm/percpu.c18
-rw-r--r--mm/shmem.c21
-rw-r--r--mm/swapfile.c12
-rw-r--r--mm/truncate.c69
-rw-r--r--mm/vmscan.c9
-rw-r--r--mm/vmstat.c12
-rw-r--r--mm/workingset.c4
-rw-r--r--net/9p/trans_fd.c88
-rw-r--r--net/9p/trans_virtio.c2
-rw-r--r--net/ceph/auth_x.c49
-rw-r--r--net/ceph/auth_x.h2
-rw-r--r--net/ceph/messenger.c105
-rw-r--r--net/ceph/mon_client.c4
-rw-r--r--net/ipv4/fib_trie.c4
-rw-r--r--net/rds/ib.c34
-rw-r--r--net/rds/iw.c23
-rw-r--r--net/sunrpc/cache.c10
-rw-r--r--net/sunrpc/rpc_pipe.c60
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtrdma/Makefile2
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c7
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c41
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c371
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c56
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c33
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c360
-rw-r--r--net/sunrpc/xprtrdma/transport.c30
-rw-r--r--net/sunrpc/xprtrdma/verbs.c24
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h21
-rw-r--r--security/inode.c10
-rw-r--r--security/integrity/ima/ima_main.c8
-rw-r--r--security/keys/key.c3
-rw-r--r--security/selinux/selinuxfs.c4
-rw-r--r--sound/core/Kconfig6
-rw-r--r--sound/core/compress_offload.c11
-rw-r--r--sound/core/seq/oss/seq_oss_init.c2
-rw-r--r--sound/core/seq/oss/seq_oss_synth.c2
-rw-r--r--sound/drivers/dummy.c2
-rw-r--r--sound/firewire/bebob/bebob_stream.c14
-rw-r--r--sound/isa/Kconfig4
-rw-r--r--sound/pci/Kconfig3
-rw-r--r--sound/pci/hda/hda_intel.c13
-rw-r--r--sound/pci/hda/patch_hdmi.c1
-rw-r--r--sound/sparc/Kconfig1
-rw-r--r--sound/usb/quirks.c14
-rw-r--r--tools/lib/traceevent/event-parse.c2
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/virtio/asm/barrier.h22
-rw-r--r--tools/virtio/linux/compiler.h9
-rw-r--r--tools/virtio/linux/kernel.h1
-rw-r--r--tools/virtio/ringtest/Makefile22
-rw-r--r--tools/virtio/ringtest/README2
-rw-r--r--tools/virtio/ringtest/main.c366
-rw-r--r--tools/virtio/ringtest/main.h119
-rw-r--r--tools/virtio/ringtest/ring.c272
-rwxr-xr-xtools/virtio/ringtest/run-on-all.sh24
-rw-r--r--tools/virtio/ringtest/virtio_ring_0_9.c316
-rw-r--r--tools/virtio/ringtest/virtio_ring_poll.c2
658 files changed, 18705 insertions, 7290 deletions
diff --git a/.mailmap b/.mailmap
index b1e9a97653dc..7e6c5334c337 100644
--- a/.mailmap
+++ b/.mailmap
@@ -21,6 +21,7 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
Andrew Morton <akpm@linux-foundation.org>
Andrew Vasquez <andrew.vasquez@qlogic.com>
Andy Adamson <andros@citi.umich.edu>
+Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
Archit Taneja <archit@ti.com>
Arnaud Patard <arnaud.patard@rtp-net.org>
Arnd Bergmann <arnd@arndb.de>
diff --git a/Documentation/ABI/testing/configfs-rdma_cm b/Documentation/ABI/testing/configfs-rdma_cm
new file mode 100644
index 000000000000..5c389aaf5291
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-rdma_cm
@@ -0,0 +1,22 @@
+What: /config/rdma_cm
+Date: November 29, 2015
+KernelVersion: 4.4.0
+Description: Interface is used to configure RDMA-cable HCAs in respect to
+ RDMA-CM attributes.
+
+ Attributes are visible only when configfs is mounted. To mount
+ configfs in /config directory use:
+ # mount -t configfs none /config/
+
+ In order to set parameters related to a specific HCA, a directory
+ for this HCA has to be created:
+ mkdir -p /config/rdma_cm/<hca>
+
+
+What: /config/rdma_cm/<hca>/ports/<port-num>/default_roce_mode
+Date: November 29, 2015
+KernelVersion: 4.4.0
+Description: RDMA-CM based connections from HCA <hca> at port <port-num>
+ will be initiated with this RoCE type as default.
+ The possible RoCE types are either "IB/RoCE v1" or "RoCE v2".
+ This parameter has RW access.
diff --git a/Documentation/ABI/testing/sysfs-class-infiniband b/Documentation/ABI/testing/sysfs-class-infiniband
new file mode 100644
index 000000000000..a86abe66a316
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-infiniband
@@ -0,0 +1,16 @@
+What: /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/ndevs/<gid-index>
+Date: November 29, 2015
+KernelVersion: 4.4.0
+Contact: linux-rdma@vger.kernel.org
+Description: The net-device's name associated with the GID resides
+ at index <gid-index>.
+
+What: /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/types/<gid-index>
+Date: November 29, 2015
+KernelVersion: 4.4.0
+Contact: linux-rdma@vger.kernel.org
+Description: The RoCE type of the associated GID resides at index <gid-index>.
+ This could either be "IB/RoCE v1" for IB and RoCE v1 based GODs
+ or "RoCE v2" for RoCE v2 based GIDs.
+
+
diff --git a/Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt b/Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt
new file mode 100644
index 000000000000..c3a1b37c4c35
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/microchip,pic32-evic.txt
@@ -0,0 +1,67 @@
+Microchip PIC32 Interrupt Controller
+====================================
+
+The Microchip PIC32 contains an Enhanced Vectored Interrupt Controller (EVIC).
+It handles all internal and external interrupts. This controller exists outside
+of the CPU and is the arbitrator of all interrupts (including interrupts from
+the CPU itself) before they are presented to the CPU.
+
+External interrupts have a software configurable edge polarity. Non external
+interrupts have a type and polarity that is determined by the source of the
+interrupt.
+
+Required properties
+-------------------
+
+- compatible: Should be "microchip,pic32mzda-evic"
+- reg: Specifies physical base address and size of register range.
+- interrupt-controller: Identifies the node as an interrupt controller.
+- #interrupt cells: Specifies the number of cells used to encode an interrupt
+ source connected to this controller. The value shall be 2 and interrupt
+ descriptor shall have the following format:
+
+ <hw_irq irq_type>
+
+ hw_irq - represents the hardware interrupt number as in the data sheet.
+ irq_type - is used to describe the type and polarity of an interrupt. For
+ internal interrupts use IRQ_TYPE_EDGE_RISING for non persistent interrupts and
+ IRQ_TYPE_LEVEL_HIGH for persistent interrupts. For external interrupts use
+ IRQ_TYPE_EDGE_RISING or IRQ_TYPE_EDGE_FALLING to select the desired polarity.
+
+Optional properties
+-------------------
+- microchip,external-irqs: u32 array of external interrupts with software
+ polarity configuration. This array corresponds to the bits in the INTCON
+ SFR.
+
+Example
+-------
+
+evic: interrupt-controller@1f810000 {
+ compatible = "microchip,pic32mzda-evic";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0x1f810000 0x1000>;
+ microchip,external-irqs = <3 8 13 18 23>;
+};
+
+Each device/peripheral must request its interrupt line with the associated type
+and polarity.
+
+Internal interrupt DTS snippet
+------------------------------
+
+device@1f800000 {
+ ...
+ interrupts = <113 IRQ_TYPE_LEVEL_HIGH>;
+ ...
+};
+
+External interrupt DTS snippet
+------------------------------
+
+device@1f800000 {
+ ...
+ interrupts = <3 IRQ_TYPE_EDGE_RISING>;
+ ...
+};
diff --git a/Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt b/Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt
new file mode 100644
index 000000000000..1c8dbc45feec
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/pic32/microchip,pic32mzda.txt
@@ -0,0 +1,31 @@
+* Microchip PIC32MZDA Platforms
+
+PIC32MZDA Starter Kit
+Required root node properties:
+ - compatible = "microchip,pic32mzda-sk", "microchip,pic32mzda"
+
+CPU nodes:
+----------
+A "cpus" node is required. Required properties:
+ - #address-cells: Must be 1.
+ - #size-cells: Must be 0.
+A CPU sub-node is also required. Required properties:
+ - device_type: Must be "cpu".
+ - compatible: Must be "mti,mips14KEc".
+Example:
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "mti,mips14KEc";
+ };
+ };
+
+Boot protocol
+--------------
+In accordance with Unified Hosting Interface Reference Manual (MD01069), the
+bootloader must pass the following arguments to the kernel:
+ - $a0: -2.
+ - $a1: KSEG0 address of the flattened device-tree blob.
diff --git a/Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt b/Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt
new file mode 100644
index 000000000000..aa6313024176
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mediatek,mt7620-gsw.txt
@@ -0,0 +1,26 @@
+Mediatek Gigabit Switch
+=======================
+
+The mediatek gigabit switch can be found on Mediatek SoCs (mt7620, mt7621).
+
+Required properties:
+- compatible: Should be "mediatek,mt7620-gsw" or "mediatek,mt7621-gsw"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the gigabit switches interrupt
+- resets: Should contain the gigabit switches resets
+- reset-names: Should contain the reset names "gsw"
+
+Example:
+
+gsw@10110000 {
+ compatible = "ralink,mt7620-gsw";
+ reg = <0x10110000 8000>;
+
+ resets = <&rstctrl 23>;
+ reset-names = "gsw";
+
+ interrupt-parent = <&intc>;
+ interrupts = <17>;
+};
diff --git a/Documentation/devicetree/bindings/net/ralink,rt2880-net.txt b/Documentation/devicetree/bindings/net/ralink,rt2880-net.txt
new file mode 100644
index 000000000000..88b095d1f13b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ralink,rt2880-net.txt
@@ -0,0 +1,61 @@
+Ralink Frame Engine Ethernet controller
+=======================================
+
+The Ralink frame engine ethernet controller can be found on Ralink and
+Mediatek SoCs (RT288x, RT3x5x, RT366x, RT388x, rt5350, mt7620, mt7621, mt76x8).
+
+Depending on the SoC, there is a number of ports connected to the CPU port
+directly and/or via a (gigabit-)switch.
+
+* Ethernet controller node
+
+Required properties:
+- compatible: Should be one of "ralink,rt2880-eth", "ralink,rt3050-eth",
+ "ralink,rt3050-eth", "ralink,rt3883-eth", "ralink,rt5350-eth",
+ "mediatek,mt7620-eth", "mediatek,mt7621-eth"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the frame engines interrupt
+- resets: Should contain the frame engines resets
+- reset-names: Should contain the reset names "fe". If a switch is present
+ "esw" is also required.
+
+
+* Ethernet port node
+
+Required properties:
+- compatible: Should be "ralink,eth-port"
+- reg: The number of the physical port
+- phy-handle: reference to the node describing the phy
+
+Example:
+
+mdio-bus {
+ ...
+ phy0: ethernet-phy@0 {
+ phy-mode = "mii";
+ reg = <0>;
+ };
+};
+
+ethernet@400000 {
+ compatible = "ralink,rt2880-eth";
+ reg = <0x00400000 10000>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ resets = <&rstctrl 18>;
+ reset-names = "fe";
+
+ interrupt-parent = <&cpuintc>;
+ interrupts = <5>;
+
+ port@0 {
+ compatible = "ralink,eth-port";
+ reg = <0>;
+ phy-handle = <&phy0>;
+ };
+
+};
diff --git a/Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt b/Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt
new file mode 100644
index 000000000000..2e79bd376c56
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ralink,rt3050-esw.txt
@@ -0,0 +1,32 @@
+Ralink Fast Ethernet Embedded Switch
+====================================
+
+The ralink fast ethernet embedded switch can be found on Ralink and Mediatek
+SoCs (RT3x5x, RT5350, MT76x8).
+
+Required properties:
+- compatible: Should be "ralink,rt3050-esw"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the embedded switches interrupt
+- resets: Should contain the embedded switches resets
+- reset-names: Should contain the reset names "esw"
+
+Optional properties:
+- ralink,portmap: can be used to choose if the default switch setup is
+ llllw or wllll
+- ralink,led_polarity: override the active high/low settings of the leds
+
+Example:
+
+esw@10110000 {
+ compatible = "ralink,rt3050-esw";
+ reg = <0x10110000 8000>;
+
+ resets = <&rstctrl 23>;
+ reset-names = "esw";
+
+ interrupt-parent = <&intc>;
+ interrupts = <17>;
+};
diff --git a/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt b/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt
new file mode 100644
index 000000000000..cafe2197dad9
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-ath79-usb.txt
@@ -0,0 +1,18 @@
+* Atheros AR71XX/9XXX USB PHY
+
+Required properties:
+- compatible: "qca,ar7100-usb-phy"
+- #phys-cells: should be 0
+- reset-names: "usb-phy"[, "usb-suspend-override"]
+- resets: references to the reset controllers
+
+Example:
+
+ usb-phy {
+ compatible = "qca,ar7100-usb-phy";
+
+ reset-names = "usb-phy", "usb-suspend-override";
+ resets = <&rst 4>, <&rst 3>;
+
+ #phy-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
index 0dfa60d88dd3..08efe6bc2193 100644
--- a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
@@ -2,8 +2,10 @@
Required properties:
- compatible : should be "rockchip,<name>-tsadc"
+ "rockchip,rk3228-tsadc": found on RK3228 SoCs
"rockchip,rk3288-tsadc": found on RK3288 SoCs
"rockchip,rk3368-tsadc": found on RK3368 SoCs
+ "rockchip,rk3399-tsadc": found on RK3399 SoCs
- reg : physical base address of the controller and length of memory mapped
region.
- interrupts : The interrupt number to the cpu. The interrupt specifier format
diff --git a/Documentation/infiniband/core_locking.txt b/Documentation/infiniband/core_locking.txt
index e1678542279a..4b1f36b6ada0 100644
--- a/Documentation/infiniband/core_locking.txt
+++ b/Documentation/infiniband/core_locking.txt
@@ -15,7 +15,6 @@ Sleeping and interrupt context
modify_ah
query_ah
destroy_ah
- bind_mw
post_send
post_recv
poll_cq
@@ -31,7 +30,6 @@ Sleeping and interrupt context
ib_modify_ah
ib_query_ah
ib_destroy_ah
- ib_bind_mw
ib_post_send
ib_post_recv
ib_req_notify_cq
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index cfb2c0f1a4a8..87d40a72f6a1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1454,6 +1454,41 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
In such case C2/C3 won't be used again.
idle=nomwait: Disable mwait for CPU C-states
+ ieee754= [MIPS] Select IEEE Std 754 conformance mode
+ Format: { strict | legacy | 2008 | relaxed }
+ Default: strict
+
+ Choose which programs will be accepted for execution
+ based on the IEEE 754 NaN encoding(s) supported by
+ the FPU and the NaN encoding requested with the value
+ of an ELF file header flag individually set by each
+ binary. Hardware implementations are permitted to
+ support either or both of the legacy and the 2008 NaN
+ encoding mode.
+
+ Available settings are as follows:
+ strict accept binaries that request a NaN encoding
+ supported by the FPU
+ legacy only accept legacy-NaN binaries, if supported
+ by the FPU
+ 2008 only accept 2008-NaN binaries, if supported
+ by the FPU
+ relaxed accept any binaries regardless of whether
+ supported by the FPU
+
+ The FPU emulator is always able to support both NaN
+ encodings, so if no FPU hardware is present or it has
+ been disabled with 'nofpu', then the settings of
+ 'legacy' and '2008' strap the emulator accordingly,
+ 'relaxed' straps the emulator for both legacy-NaN and
+ 2008-NaN, whereas 'strict' enables legacy-NaN only on
+ legacy processors and both NaN encodings on MIPS32 or
+ MIPS64 CPUs.
+
+ The setting for ABS.fmt/NEG.fmt instruction execution
+ mode generally follows that for the NaN encoding,
+ except where unsupported by hardware.
+
ignore_loglevel [KNL]
Ignore loglevel setting - this will print /all/
kernel messages to the console. Useful for debugging.
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index f4cbfe0ba108..edec3a3e648d 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -90,7 +90,7 @@ BLOCK_SOFTIRQ: Do all of the following:
from being initiated from tasks that might run on the CPU to
be de-jittered. (It is OK to force this CPU offline and then
bring it back online before you start your application.)
-BLOCK_IOPOLL_SOFTIRQ: Do all of the following:
+IRQ_POLL_SOFTIRQ: Do all of the following:
1. Force block-device interrupts onto some other CPU.
2. Initiate any block I/O and block-I/O polling on other CPUs.
3. Once your application has started, prevent CPU-hotplug operations
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 053f613fc9a9..07e4cdf02407 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3025,7 +3025,7 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
which is the maximum number of possibly pending cpu-local interrupts.
-4.90 KVM_SMI
+4.96 KVM_SMI
Capability: KVM_CAP_X86_SMM
Architectures: x86
diff --git a/MAINTAINERS b/MAINTAINERS
index b8a717c4f863..30aca4aa5467 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2420,6 +2420,8 @@ F: arch/mips/kernel/*bmips*
F: arch/mips/boot/dts/brcm/bcm*.dts*
F: drivers/irqchip/irq-bcm7*
F: drivers/irqchip/irq-brcmstb*
+F: include/linux/bcm963xx_nvram.h
+F: include/linux/bcm963xx_tag.h
BROADCOM TG3 GIGABIT ETHERNET DRIVER
M: Prashant Sreedharan <prashant@broadcom.com>
@@ -5780,10 +5782,8 @@ INTEL TELEMETRY DRIVER
M: Souvik Kumar Chakravarty <souvik.k.chakravarty@intel.com>
L: platform-driver-x86@vger.kernel.org
S: Maintained
-F: drivers/platform/x86/intel_telemetry_core.c
F: arch/x86/include/asm/intel_telemetry.h
-F: drivers/platform/x86/intel_telemetry_pltdrv.c
-F: drivers/platform/x86/intel_telemetry_debugfs.c
+F: drivers/platform/x86/intel_telemetry*
IOC3 ETHERNET DRIVER
M: Ralf Baechle <ralf@linux-mips.org>
@@ -6218,6 +6218,14 @@ F: arch/arm64/include/uapi/asm/kvm*
F: arch/arm64/include/asm/kvm*
F: arch/arm64/kvm/
+KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
+M: James Hogan <james.hogan@imgtec.com>
+L: linux-mips@linux-mips.org
+S: Supported
+F: arch/mips/include/uapi/asm/kvm*
+F: arch/mips/include/asm/kvm*
+F: arch/mips/kvm/
+
KEXEC
M: Eric Biederman <ebiederm@xmission.com>
W: http://kernel.org/pub/linux/utils/kernel/kexec/
@@ -6315,6 +6323,12 @@ S: Maintained
F: net/l3mdev
F: include/net/l3mdev.h
+LANTIQ MIPS ARCHITECTURE
+M: John Crispin <blogic@openwrt.org>
+L: linux-mips@linux-mips.org
+S: Maintained
+F: arch/mips/lantiq
+
LAPB module
L: linux-x25@vger.kernel.org
S: Orphan
@@ -7151,27 +7165,45 @@ W: https://linuxtv.org
S: Odd Fixes
F: drivers/media/radio/radio-miropcm20*
-Mellanox MLX5 core VPI driver
-M: Eli Cohen <eli@mellanox.com>
+MELLANOX MLX4 core VPI driver
+M: Yishai Hadas <yishaih@mellanox.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
Q: http://patchwork.ozlabs.org/project/netdev/list/
+S: Supported
+F: drivers/net/ethernet/mellanox/mlx4/
+F: include/linux/mlx4/
+
+MELLANOX MLX4 IB driver
+M: Yishai Hadas <yishaih@mellanox.com>
+L: linux-rdma@vger.kernel.org
+W: http://www.mellanox.com
Q: http://patchwork.kernel.org/project/linux-rdma/list/
-T: git git://openfabrics.org/~eli/connect-ib.git
+S: Supported
+F: drivers/infiniband/hw/mlx4/
+F: include/linux/mlx4/
+
+MELLANOX MLX5 core VPI driver
+M: Matan Barak <matanb@mellanox.com>
+M: Leon Romanovsky <leonro@mellanox.com>
+L: netdev@vger.kernel.org
+L: linux-rdma@vger.kernel.org
+W: http://www.mellanox.com
+Q: http://patchwork.ozlabs.org/project/netdev/list/
S: Supported
F: drivers/net/ethernet/mellanox/mlx5/core/
F: include/linux/mlx5/
-Mellanox MLX5 IB driver
-M: Eli Cohen <eli@mellanox.com>
+MELLANOX MLX5 IB driver
+M: Matan Barak <matanb@mellanox.com>
+M: Leon Romanovsky <leonro@mellanox.com>
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
Q: http://patchwork.kernel.org/project/linux-rdma/list/
-T: git git://openfabrics.org/~eli/connect-ib.git
S: Supported
-F: include/linux/mlx5/
F: drivers/infiniband/hw/mlx5/
+F: include/linux/mlx5/
MELEXIS MLX90614 DRIVER
M: Crt Mori <cmo@melexis.com>
@@ -7702,6 +7734,12 @@ W: https://github.com/jonmason/ntb/wiki
T: git git://github.com/jonmason/ntb.git
F: drivers/ntb/hw/intel/
+NTB AMD DRIVER
+M: Xiangliang Yu <Xiangliang.Yu@amd.com>
+L: linux-ntb@googlegroups.com
+S: Supported
+F: drivers/ntb/hw/amd/
+
NTFS FILESYSTEM
M: Anton Altaparmakov <anton@tuxera.com>
L: linux-ntfs-dev@lists.sourceforge.net
@@ -8975,6 +9013,12 @@ L: linux-fbdev@vger.kernel.org
S: Maintained
F: drivers/video/fbdev/aty/aty128fb.c
+RALINK MIPS ARCHITECTURE
+M: John Crispin <blogic@openwrt.org>
+L: linux-mips@linux-mips.org
+S: Maintained
+F: arch/mips/ralink
+
RALINK RT2X00 WIRELESS LAN DRIVER
P: rt2x00 project
M: Stanislaw Gruszka <sgruszka@redhat.com>
@@ -10453,9 +10497,11 @@ S: Maintained
F: drivers/net/ethernet/dlink/sundance.c
SUPERH
+M: Yoshinori Sato <ysato@users.sourceforge.jp>
+M: Rich Felker <dalias@libc.org>
L: linux-sh@vger.kernel.org
Q: http://patchwork.kernel.org/project/linux-sh/list/
-S: Orphan
+S: Maintained
F: Documentation/sh/
F: arch/sh/
F: drivers/sh/
diff --git a/Makefile b/Makefile
index abfb3e8eb0b1..c65fe37c99e5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
-PATCHLEVEL = 4
+PATCHLEVEL = 5
SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
NAME = Blurry Fish Butt
# *DOCUMENTATION*
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 534a60ae282e..0eca3812527e 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1200,10 +1200,7 @@ error:
while (i--)
if (pages[i])
__free_pages(pages[i], 0);
- if (array_size <= PAGE_SIZE)
- kfree(pages);
- else
- vfree(pages);
+ kvfree(pages);
return NULL;
}
@@ -1211,7 +1208,6 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
size_t size, struct dma_attrs *attrs)
{
int count = size >> PAGE_SHIFT;
- int array_size = count * sizeof(struct page *);
int i;
if (dma_get_attr(DMA_ATTR_FORCE_CONTIGUOUS, attrs)) {
@@ -1222,10 +1218,7 @@ static int __iommu_free_buffer(struct device *dev, struct page **pages,
__free_pages(pages[i], 0);
}
- if (array_size <= PAGE_SIZE)
- kfree(pages);
- else
- vfree(pages);
+ kvfree(pages);
return 0;
}
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index cd822d8454c0..307237cfe728 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -27,6 +27,8 @@ $(warning LSE atomics not supported by binutils)
endif
KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr)
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS += $(call cc-option, -mpc-relative-literal-loads)
KBUILD_AFLAGS += $(lseinstr)
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 18ca9fb9e65f..86581f793e39 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -16,7 +16,6 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
-CONFIG_MEMCG_KMEM=y
CONFIG_CGROUP_HUGETLB=y
# CONFIG_UTS_NS is not set
# CONFIG_IPC_NS is not set
@@ -37,15 +36,13 @@ CONFIG_ARCH_EXYNOS7=y
CONFIG_ARCH_LAYERSCAPE=y
CONFIG_ARCH_HISI=y
CONFIG_ARCH_MEDIATEK=y
+CONFIG_ARCH_QCOM=y
CONFIG_ARCH_ROCKCHIP=y
CONFIG_ARCH_SEATTLE=y
CONFIG_ARCH_RENESAS=y
CONFIG_ARCH_R8A7795=y
CONFIG_ARCH_STRATIX10=y
CONFIG_ARCH_TEGRA=y
-CONFIG_ARCH_TEGRA_132_SOC=y
-CONFIG_ARCH_TEGRA_210_SOC=y
-CONFIG_ARCH_QCOM=y
CONFIG_ARCH_SPRD=y
CONFIG_ARCH_THUNDER=y
CONFIG_ARCH_UNIPHIER=y
@@ -54,14 +51,19 @@ CONFIG_ARCH_XGENE=y
CONFIG_ARCH_ZYNQMP=y
CONFIG_PCI=y
CONFIG_PCI_MSI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_RCAR_GEN2_PCIE=y
CONFIG_PCI_HOST_GENERIC=y
CONFIG_PCI_XGENE=y
-CONFIG_SMP=y
+CONFIG_PCI_LAYERSCAPE=y
+CONFIG_PCI_HISI=y
+CONFIG_PCIE_QCOM=y
CONFIG_SCHED_MC=y
CONFIG_PREEMPT=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CMA=y
+CONFIG_XEN=y
CONFIG_CMDLINE="console=ttyAMA0"
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_COMPAT=y
@@ -100,7 +102,11 @@ CONFIG_PATA_OF_PLATFORM=y
CONFIG_NETDEVICES=y
CONFIG_TUN=y
CONFIG_VIRTIO_NET=y
+CONFIG_AMD_XGBE=y
CONFIG_NET_XGENE=y
+CONFIG_E1000E=y
+CONFIG_IGB=y
+CONFIG_IGBVF=y
CONFIG_SKY2=y
CONFIG_RAVB=y
CONFIG_SMC91X=y
@@ -117,25 +123,23 @@ CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_DW=y
CONFIG_SERIAL_8250_MT6577=y
CONFIG_SERIAL_8250_UNIPHIER=y
+CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_SERIAL_SAMSUNG=y
-CONFIG_SERIAL_SAMSUNG_UARTS_4=y
-CONFIG_SERIAL_SAMSUNG_UARTS=4
CONFIG_SERIAL_SAMSUNG_CONSOLE=y
+CONFIG_SERIAL_TEGRA=y
CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_SH_SCI_NR_UARTS=11
CONFIG_SERIAL_SH_SCI_CONSOLE=y
-CONFIG_SERIAL_TEGRA=y
CONFIG_SERIAL_MSM=y
CONFIG_SERIAL_MSM_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_XILINX_PS_UART=y
CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y
CONFIG_VIRTIO_CONSOLE=y
# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
CONFIG_I2C_QUP=y
+CONFIG_I2C_UNIPHIER_F=y
CONFIG_I2C_RCAR=y
CONFIG_SPI=y
CONFIG_SPI_PL022=y
@@ -176,8 +180,6 @@ CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SDHCI_TEGRA=y
CONFIG_MMC_SPI=y
CONFIG_MMC_DW=y
-CONFIG_MMC_DW_IDMAC=y
-CONFIG_MMC_DW_PLTFM=y
CONFIG_MMC_DW_EXYNOS=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
@@ -187,28 +189,33 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y
CONFIG_LEDS_TRIGGER_CPU=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_PL031=y
CONFIG_RTC_DRV_XGENE=y
CONFIG_DMADEVICES=y
-CONFIG_RCAR_DMAC=y
CONFIG_QCOM_BAM_DMA=y
CONFIG_TEGRA20_APB_DMA=y
+CONFIG_RCAR_DMAC=y
+CONFIG_VFIO=y
+CONFIG_VFIO_PCI=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_MMIO=y
+CONFIG_XEN_GNTDEV=y
+CONFIG_XEN_GRANT_DEV_ALLOC=y
CONFIG_COMMON_CLK_CS2000_CP=y
CONFIG_COMMON_CLK_QCOM=y
CONFIG_MSM_GCC_8916=y
CONFIG_HWSPINLOCK_QCOM=y
-# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_ARM_SMMU=y
CONFIG_QCOM_SMEM=y
CONFIG_QCOM_SMD=y
CONFIG_QCOM_SMD_RPM=y
+CONFIG_ARCH_TEGRA_132_SOC=y
+CONFIG_ARCH_TEGRA_210_SOC=y
+CONFIG_HISILICON_IRQ_MBIGEN=y
CONFIG_PHY_XGENE=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_EXT4_FS=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
@@ -239,6 +246,7 @@ CONFIG_LOCKUP_DETECTOR=y
# CONFIG_FTRACE is not set
CONFIG_MEMTEST=y
CONFIG_SECURITY=y
+CONFIG_CRYPTO_ECHAINIV=y
CONFIG_CRYPTO_ANSI_CPRNG=y
CONFIG_ARM64_CRYPTO=y
CONFIG_CRYPTO_SHA1_ARM64_CE=y
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 2d545d7aa80b..bf464de33f52 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -67,11 +67,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
-#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
-#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE))
-#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC))
-#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT))
-#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
+#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
+#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
+#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
+#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
#define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
#define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
@@ -81,7 +81,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val);
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
-#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
@@ -153,6 +153,7 @@ extern struct page *empty_zero_page;
#define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE))
#define pte_exec(pte) (!(pte_val(pte) & PTE_UXN))
#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT))
+#define pte_user(pte) (!!(pte_val(pte) & PTE_USER))
#ifdef CONFIG_ARM64_HW_AFDBM
#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -163,8 +164,6 @@ extern struct page *empty_zero_page;
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_user(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER))
#define pte_valid_not_user(pte) \
((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
#define pte_valid_young(pte) \
@@ -278,13 +277,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- if (pte_valid_user(pte)) {
- if (!pte_special(pte) && pte_exec(pte))
- __sync_icache_dcache(pte, addr);
+ if (pte_valid(pte)) {
if (pte_sw_dirty(pte) && pte_write(pte))
pte_val(pte) &= ~PTE_RDONLY;
else
pte_val(pte) |= PTE_RDONLY;
+ if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+ __sync_icache_dcache(pte, addr);
}
/*
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index ffe9c2b6431b..917d98108b3f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -514,9 +514,14 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
#endif
/* EL2 debug */
+ mrs x0, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
+ sbfx x0, x0, #8, #4
+ cmp x0, #1
+ b.lt 4f // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
ubfx x0, x0, #11, #5 // to EL2 and allow access to
msr mdcr_el2, x0 // all PMU counters from EL1
+4:
/* Stage-2 translation */
msr vttbr_el2, xzr
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index bc2abb8b1599..999633bd7294 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -65,6 +65,16 @@
#ifdef CONFIG_EFI
/*
+ * Prevent the symbol aliases below from being emitted into the kallsyms
+ * table, by forcing them to be absolute symbols (which are conveniently
+ * ignored by scripts/kallsyms) rather than section relative symbols.
+ * The distinction is only relevant for partial linking, and only for symbols
+ * that are defined within a section declaration (which is not the case for
+ * the definitions below) so the resulting values will be identical.
+ */
+#define KALLSYMS_HIDE(sym) ABSOLUTE(sym)
+
+/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
* isolate it from the kernel proper. The following symbols are legally
* accessed by the stub, so provide some aliases to make them accessible.
@@ -73,25 +83,25 @@
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
-__efistub_memcmp = __pi_memcmp;
-__efistub_memchr = __pi_memchr;
-__efistub_memcpy = __pi_memcpy;
-__efistub_memmove = __pi_memmove;
-__efistub_memset = __pi_memset;
-__efistub_strlen = __pi_strlen;
-__efistub_strcmp = __pi_strcmp;
-__efistub_strncmp = __pi_strncmp;
-__efistub___flush_dcache_area = __pi___flush_dcache_area;
+__efistub_memcmp = KALLSYMS_HIDE(__pi_memcmp);
+__efistub_memchr = KALLSYMS_HIDE(__pi_memchr);
+__efistub_memcpy = KALLSYMS_HIDE(__pi_memcpy);
+__efistub_memmove = KALLSYMS_HIDE(__pi_memmove);
+__efistub_memset = KALLSYMS_HIDE(__pi_memset);
+__efistub_strlen = KALLSYMS_HIDE(__pi_strlen);
+__efistub_strcmp = KALLSYMS_HIDE(__pi_strcmp);
+__efistub_strncmp = KALLSYMS_HIDE(__pi_strncmp);
+__efistub___flush_dcache_area = KALLSYMS_HIDE(__pi___flush_dcache_area);
#ifdef CONFIG_KASAN
-__efistub___memcpy = __pi_memcpy;
-__efistub___memmove = __pi_memmove;
-__efistub___memset = __pi_memset;
+__efistub___memcpy = KALLSYMS_HIDE(__pi_memcpy);
+__efistub___memmove = KALLSYMS_HIDE(__pi_memmove);
+__efistub___memset = KALLSYMS_HIDE(__pi_memset);
#endif
-__efistub__text = _text;
-__efistub__end = _end;
-__efistub__edata = _edata;
+__efistub__text = KALLSYMS_HIDE(_text);
+__efistub__end = KALLSYMS_HIDE(_end);
+__efistub__edata = KALLSYMS_HIDE(_edata);
#endif
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index 5a22a119a74c..0adbebbc2803 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -46,7 +46,7 @@ enum address_markers_idx {
PCI_START_NR,
PCI_END_NR,
MODULES_START_NR,
- MODUELS_END_NR,
+ MODULES_END_NR,
KERNEL_SPACE_NR,
};
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cf038c7d9fa9..cab7a5be40aa 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -120,6 +120,7 @@ static void __init cpu_set_ttbr1(unsigned long ttbr1)
void __init kasan_init(void)
{
struct memblock_region *reg;
+ int i;
/*
* We are going to perform proper setup of shadow memory.
@@ -155,6 +156,14 @@ void __init kasan_init(void)
pfn_to_nid(virt_to_pfn(start)));
}
+ /*
+ * KAsan may reuse the contents of kasan_zero_pte directly, so we
+ * should make sure that it maps the zero page read-only.
+ */
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(&kasan_zero_pte[i],
+ pfn_pte(virt_to_pfn(kasan_zero_page), PAGE_KERNEL_RO));
+
memset(kasan_zero_page, 0, PAGE_SIZE);
cpu_set_ttbr1(__pa(swapper_pg_dir));
flush_tlb_all();
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index 3571c7309c5e..cf6240741134 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -57,6 +57,9 @@ static int change_memory_common(unsigned long addr, int numpages,
if (end < MODULES_VADDR || end >= MODULES_END)
return -EINVAL;
+ if (!numpages)
+ return 0;
+
data.set_mask = set_mask;
data.clear_mask = clear_mask;
diff --git a/arch/arm64/mm/proc-macros.S b/arch/arm64/mm/proc-macros.S
index 146bd99a7532..e6a30e1268a8 100644
--- a/arch/arm64/mm/proc-macros.S
+++ b/arch/arm64/mm/proc-macros.S
@@ -84,3 +84,15 @@
b.lo 9998b
dsb \domain
.endm
+
+/*
+ * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
+ */
+ .macro reset_pmuserenr_el0, tmpreg
+ mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
+ sbfx \tmpreg, \tmpreg, #8, #4
+ cmp \tmpreg, #1 // Skip if no PMU present
+ b.lt 9000f
+ msr pmuserenr_el0, xzr // Disable PMU access from EL0
+9000:
+ .endm
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a3d867e723b4..c164d2cb35c0 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -117,7 +117,7 @@ ENTRY(cpu_do_resume)
*/
ubfx x11, x11, #1, #1
msr oslar_el1, x11
- msr pmuserenr_el0, xzr // Disable PMU access from EL0
+ reset_pmuserenr_el0 x0 // Disable PMU access from EL0
mov x0, x12
dsb nsh // Make sure local tlb invalidation completed
isb
@@ -154,7 +154,7 @@ ENTRY(__cpu_setup)
msr cpacr_el1, x0 // Enable FP/ASIMD
mov x0, #1 << 12 // Reset mdscr_el1 and disable
msr mdscr_el1, x0 // access to the DCC from EL0
- msr pmuserenr_el0, xzr // Disable PMU access from EL0
+ reset_pmuserenr_el0 x0 // Disable PMU access from EL0
/*
* Memory region attributes for LPAE:
*
diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms
index a96c81d1d22e..c5cd63a4b6d5 100644
--- a/arch/mips/Kbuild.platforms
+++ b/arch/mips/Kbuild.platforms
@@ -21,6 +21,7 @@ platforms += mti-malta
platforms += mti-sead3
platforms += netlogic
platforms += paravirt
+platforms += pic32
platforms += pistachio
platforms += pmcs-msp71xx
platforms += pnx833x
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index fbf3f6670b69..57a945e832f4 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -169,6 +169,7 @@ config BMIPS_GENERIC
select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
+ select ARCH_WANT_OPTIONAL_GPIOLIB
help
Build a generic DT-based kernel image that boots on select
BCM33xx cable modem chips, BCM63xx DSL chips, and BCM7xxx set-top
@@ -480,6 +481,14 @@ config MIPS_MALTA
This enables support for the MIPS Technologies Malta evaluation
board.
+config MACH_PIC32
+ bool "Microchip PIC32 Family"
+ help
+ This enables support for the Microchip PIC32 family of platforms.
+
+ Microchip PIC32 is a family of general-purpose 32 bit MIPS core
+ microcontrollers.
+
config MIPS_SEAD3
bool "MIPS SEAD3 board"
select BOOT_ELF32
@@ -979,6 +988,7 @@ source "arch/mips/jazz/Kconfig"
source "arch/mips/jz4740/Kconfig"
source "arch/mips/lantiq/Kconfig"
source "arch/mips/lasat/Kconfig"
+source "arch/mips/pic32/Kconfig"
source "arch/mips/pistachio/Kconfig"
source "arch/mips/pmcs-msp71xx/Kconfig"
source "arch/mips/ralink/Kconfig"
@@ -1755,6 +1765,10 @@ config SYS_SUPPORTS_ZBOOT_UART16550
bool
select SYS_SUPPORTS_ZBOOT
+config SYS_SUPPORTS_ZBOOT_UART_PROM
+ bool
+ select SYS_SUPPORTS_ZBOOT
+
config CPU_LOONGSON2
bool
select CPU_SUPPORTS_32BIT_KERNEL
@@ -2017,7 +2031,8 @@ config KVM_GUEST
bool "KVM Guest Kernel"
depends on BROKEN_ON_SMP
help
- Select this option if building a guest kernel for KVM (Trap & Emulate) mode
+ Select this option if building a guest kernel for KVM (Trap & Emulate)
+ mode.
config KVM_GUEST_TIMER_FREQ
int "Count/Compare Timer Frequency (MHz)"
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 3f70ba54ae21..e78d60dbdffd 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -166,16 +166,6 @@ cflags-$(CONFIG_CPU_CAVIUM_OCTEON) += -Wa,-march=octeon
endif
cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
cflags-$(CONFIG_CPU_BMIPS) += -march=mips32 -Wa,-mips32 -Wa,--trap
-#
-# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
-# as MIPS64 R1; older versions as just R1. This leaves the possibility open
-# that GCC might generate R2 code for -march=loongson3a which then is rejected
-# by GAS. The cc-option can't probe for this behaviour so -march=loongson3a
-# can't easily be used safely within the kbuild framework.
-#
-cflags-$(CONFIG_CPU_LOONGSON3) += \
- $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) \
- -Wa,-mips64r2 -Wa,--trap
cflags-$(CONFIG_CPU_R4000_WORKAROUNDS) += $(call cc-option,-mfix-r4000,)
cflags-$(CONFIG_CPU_R4400_WORKAROUNDS) += $(call cc-option,-mfix-r4400,)
diff --git a/arch/mips/alchemy/common/gpiolib.c b/arch/mips/alchemy/common/gpiolib.c
index f9bc4f520440..84548f704035 100644
--- a/arch/mips/alchemy/common/gpiolib.c
+++ b/arch/mips/alchemy/common/gpiolib.c
@@ -40,7 +40,7 @@
static int gpio2_get(struct gpio_chip *chip, unsigned offset)
{
- return alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE);
+ return !!alchemy_gpio2_get_value(offset + ALCHEMY_GPIO2_BASE);
}
static void gpio2_set(struct gpio_chip *chip, unsigned offset, int value)
@@ -68,7 +68,7 @@ static int gpio2_to_irq(struct gpio_chip *chip, unsigned offset)
static int gpio1_get(struct gpio_chip *chip, unsigned offset)
{
- return alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE);
+ return !!alchemy_gpio1_get_value(offset + ALCHEMY_GPIO1_BASE);
}
static void gpio1_set(struct gpio_chip *chip,
@@ -119,7 +119,7 @@ struct gpio_chip alchemy_gpio_chip[] = {
static int alchemy_gpic_get(struct gpio_chip *chip, unsigned int off)
{
- return au1300_gpio_get_value(off + AU1300_GPIO_BASE);
+ return !!au1300_gpio_get_value(off + AU1300_GPIO_BASE);
}
static void alchemy_gpic_set(struct gpio_chip *chip, unsigned int off, int v)
diff --git a/arch/mips/ar7/gpio.c b/arch/mips/ar7/gpio.c
index f4930456eb8e..f969f583c68c 100644
--- a/arch/mips/ar7/gpio.c
+++ b/arch/mips/ar7/gpio.c
@@ -37,7 +37,7 @@ static int ar7_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
container_of(chip, struct ar7_gpio_chip, chip);
void __iomem *gpio_in = gpch->regs + AR7_GPIO_INPUT;
- return readl(gpio_in) & (1 << gpio);
+ return !!(readl(gpio_in) & (1 << gpio));
}
static int titan_gpio_get_value(struct gpio_chip *chip, unsigned gpio)
diff --git a/arch/mips/ath79/common.h b/arch/mips/ath79/common.h
index ca7cc19adfea..870c6b2e97e8 100644
--- a/arch/mips/ath79/common.h
+++ b/arch/mips/ath79/common.h
@@ -23,7 +23,6 @@ void ath79_clocks_init(void);
unsigned long ath79_get_sys_clk_rate(const char *id);
void ath79_ddr_ctrl_init(void);
-void ath79_ddr_wb_flush(unsigned int reg);
void ath79_gpio_init(void);
diff --git a/arch/mips/ath79/irq.c b/arch/mips/ath79/irq.c
index eeb3953ed8ac..511c06560dc1 100644
--- a/arch/mips/ath79/irq.c
+++ b/arch/mips/ath79/irq.c
@@ -26,9 +26,13 @@
#include "common.h"
#include "machtypes.h"
+static void __init ath79_misc_intc_domain_init(
+ struct device_node *node, int irq);
+
static void ath79_misc_irq_handler(struct irq_desc *desc)
{
- void __iomem *base = ath79_reset_base;
+ struct irq_domain *domain = irq_desc_get_handler_data(desc);
+ void __iomem *base = domain->host_data;
u32 pending;
pending = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS) &
@@ -42,15 +46,15 @@ static void ath79_misc_irq_handler(struct irq_desc *desc)
while (pending) {
int bit = __ffs(pending);
- generic_handle_irq(ATH79_MISC_IRQ(bit));
+ generic_handle_irq(irq_linear_revmap(domain, bit));
pending &= ~BIT(bit);
}
}
static void ar71xx_misc_irq_unmask(struct irq_data *d)
{
- unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
- void __iomem *base = ath79_reset_base;
+ void __iomem *base = irq_data_get_irq_chip_data(d);
+ unsigned int irq = d->hwirq;
u32 t;
t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
@@ -62,8 +66,8 @@ static void ar71xx_misc_irq_unmask(struct irq_data *d)
static void ar71xx_misc_irq_mask(struct irq_data *d)
{
- unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
- void __iomem *base = ath79_reset_base;
+ void __iomem *base = irq_data_get_irq_chip_data(d);
+ unsigned int irq = d->hwirq;
u32 t;
t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_ENABLE);
@@ -75,8 +79,8 @@ static void ar71xx_misc_irq_mask(struct irq_data *d)
static void ar724x_misc_irq_ack(struct irq_data *d)
{
- unsigned int irq = d->irq - ATH79_MISC_IRQ_BASE;
- void __iomem *base = ath79_reset_base;
+ void __iomem *base = irq_data_get_irq_chip_data(d);
+ unsigned int irq = d->hwirq;
u32 t;
t = __raw_readl(base + AR71XX_RESET_REG_MISC_INT_STATUS);
@@ -94,12 +98,6 @@ static struct irq_chip ath79_misc_irq_chip = {
static void __init ath79_misc_irq_init(void)
{
- void __iomem *base = ath79_reset_base;
- int i;
-
- __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
- __raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
-
if (soc_is_ar71xx() || soc_is_ar913x())
ath79_misc_irq_chip.irq_mask_ack = ar71xx_misc_irq_mask;
else if (soc_is_ar724x() ||
@@ -110,13 +108,7 @@ static void __init ath79_misc_irq_init(void)
else
BUG();
- for (i = ATH79_MISC_IRQ_BASE;
- i < ATH79_MISC_IRQ_BASE + ATH79_MISC_IRQ_COUNT; i++) {
- irq_set_chip_and_handler(i, &ath79_misc_irq_chip,
- handle_level_irq);
- }
-
- irq_set_chained_handler(ATH79_CPU_IRQ(6), ath79_misc_irq_handler);
+ ath79_misc_intc_domain_init(NULL, ATH79_CPU_IRQ(6));
}
static void ar934x_ip2_irq_dispatch(struct irq_desc *desc)
@@ -256,10 +248,10 @@ asmlinkage void plat_irq_dispatch(void)
}
}
-#ifdef CONFIG_IRQCHIP
static int misc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
{
irq_set_chip_and_handler(irq, &ath79_misc_irq_chip, handle_level_irq);
+ irq_set_chip_data(irq, d->host_data);
return 0;
}
@@ -268,19 +260,14 @@ static const struct irq_domain_ops misc_irq_domain_ops = {
.map = misc_map,
};
-static int __init ath79_misc_intc_of_init(
- struct device_node *node, struct device_node *parent)
+static void __init ath79_misc_intc_domain_init(
+ struct device_node *node, int irq)
{
void __iomem *base = ath79_reset_base;
struct irq_domain *domain;
- int irq;
-
- irq = irq_of_parse_and_map(node, 0);
- if (!irq)
- panic("Failed to get MISC IRQ");
domain = irq_domain_add_legacy(node, ATH79_MISC_IRQ_COUNT,
- ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, NULL);
+ ATH79_MISC_IRQ_BASE, 0, &misc_irq_domain_ops, base);
if (!domain)
panic("Failed to add MISC irqdomain");
@@ -288,9 +275,19 @@ static int __init ath79_misc_intc_of_init(
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_ENABLE);
__raw_writel(0, base + AR71XX_RESET_REG_MISC_INT_STATUS);
+ irq_set_chained_handler_and_data(irq, ath79_misc_irq_handler, domain);
+}
- irq_set_chained_handler(irq, ath79_misc_irq_handler);
+static int __init ath79_misc_intc_of_init(
+ struct device_node *node, struct device_node *parent)
+{
+ int irq;
+ irq = irq_of_parse_and_map(node, 0);
+ if (!irq)
+ panic("Failed to get MISC IRQ");
+
+ ath79_misc_intc_domain_init(node, irq);
return 0;
}
@@ -349,8 +346,6 @@ static int __init ar79_cpu_intc_of_init(
IRQCHIP_DECLARE(ar79_cpu_intc, "qca,ar7100-cpu-intc",
ar79_cpu_intc_of_init);
-#endif
-
void __init arch_init_irq(void)
{
if (mips_machtype == ATH79_MACH_GENERIC_OF) {
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 8755d618e116..be451ee4a5ea 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -36,10 +36,6 @@
#define ATH79_SYS_TYPE_LEN 64
-#define AR71XX_BASE_FREQ 40000000
-#define AR724X_BASE_FREQ 5000000
-#define AR913X_BASE_FREQ 5000000
-
static char ath79_sys_type[ATH79_SYS_TYPE_LEN];
static void ath79_restart(char *command)
@@ -272,15 +268,10 @@ void __init device_tree_init(void)
unflatten_and_copy_device_tree();
}
-static void __init ath79_generic_init(void)
-{
- /* Nothing to do */
-}
-
MIPS_MACHINE(ATH79_MACH_GENERIC,
"Generic",
"Generic AR71XX/AR724X/AR913X based board",
- ath79_generic_init);
+ NULL);
MIPS_MACHINE(ATH79_MACH_GENERIC_OF,
"DTB",
diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c
index a7e569c7968e..959c145a0a2c 100644
--- a/arch/mips/bcm47xx/sprom.c
+++ b/arch/mips/bcm47xx/sprom.c
@@ -666,9 +666,15 @@ static int bcm47xx_get_sprom_bcma(struct bcma_bus *bus, struct ssb_sprom *out)
switch (bus->hosttype) {
case BCMA_HOSTTYPE_PCI:
memset(out, 0, sizeof(struct ssb_sprom));
- snprintf(buf, sizeof(buf), "pci/%u/%u/",
- bus->host_pci->bus->number + 1,
- PCI_SLOT(bus->host_pci->devfn));
+ /* On BCM47XX all PCI buses share the same domain */
+ if (config_enabled(CONFIG_BCM47XX))
+ snprintf(buf, sizeof(buf), "pci/%u/%u/",
+ bus->host_pci->bus->number + 1,
+ PCI_SLOT(bus->host_pci->devfn));
+ else
+ snprintf(buf, sizeof(buf), "pci/%u/%u/",
+ pci_domain_nr(bus->host_pci->bus) + 1,
+ bus->host_pci->bus->number);
bcm47xx_sprom_apply_prefix_alias(buf, sizeof(buf));
prefix = buf;
break;
diff --git a/arch/mips/bcm63xx/nvram.c b/arch/mips/bcm63xx/nvram.c
index 4b50d40f7451..5f2bc1e10eae 100644
--- a/arch/mips/bcm63xx/nvram.c
+++ b/arch/mips/bcm63xx/nvram.c
@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "bcm63xx_nvram: " fmt
+#include <linux/bcm963xx_nvram.h>
#include <linux/init.h>
#include <linux/crc32.h>
#include <linux/export.h>
@@ -18,51 +19,19 @@
#include <bcm63xx_nvram.h>
-/*
- * nvram structure
- */
-struct bcm963xx_nvram {
- u32 version;
- u8 reserved1[256];
- u8 name[16];
- u32 main_tp_number;
- u32 psi_size;
- u32 mac_addr_count;
- u8 mac_addr_base[ETH_ALEN];
- u8 reserved2[2];
- u32 checksum_old;
- u8 reserved3[720];
- u32 checksum_high;
-};
-
-#define BCM63XX_DEFAULT_PSI_SIZE 64
-
static struct bcm963xx_nvram nvram;
static int mac_addr_used;
void __init bcm63xx_nvram_init(void *addr)
{
- unsigned int check_len;
u32 crc, expected_crc;
u8 hcs_mac_addr[ETH_ALEN] = { 0x00, 0x10, 0x18, 0xff, 0xff, 0xff };
/* extract nvram data */
- memcpy(&nvram, addr, sizeof(nvram));
+ memcpy(&nvram, addr, BCM963XX_NVRAM_V5_SIZE);
/* check checksum before using data */
- if (nvram.version <= 4) {
- check_len = offsetof(struct bcm963xx_nvram, reserved3);
- expected_crc = nvram.checksum_old;
- nvram.checksum_old = 0;
- } else {
- check_len = sizeof(nvram);
- expected_crc = nvram.checksum_high;
- nvram.checksum_high = 0;
- }
-
- crc = crc32_le(~0, (u8 *)&nvram, check_len);
-
- if (crc != expected_crc)
+ if (bcm963xx_nvram_checksum(&nvram, &expected_crc, &crc))
pr_warn("nvram checksum failed, contents may be invalid (expected %08x, got %08x)\n",
expected_crc, crc);
@@ -116,12 +85,3 @@ int bcm63xx_nvram_get_mac_address(u8 *mac)
return 0;
}
EXPORT_SYMBOL(bcm63xx_nvram_get_mac_address);
-
-int bcm63xx_nvram_get_psi_size(void)
-{
- if (nvram.psi_size > 0)
- return nvram.psi_size;
-
- return BCM63XX_DEFAULT_PSI_SIZE;
-}
-EXPORT_SYMBOL(bcm63xx_nvram_get_psi_size);
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index 5b16d2955fbb..35535284b39e 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -105,6 +105,7 @@ static const struct bmips_quirk bmips_quirk_list[] = {
{ "brcm,bcm33843-viper", &bcm3384_viper_quirks },
{ "brcm,bcm6328", &bcm6328_quirks },
{ "brcm,bcm6368", &bcm6368_quirks },
+ { "brcm,bcm63168", &bcm6368_quirks },
{ },
};
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index d5bdee115f22..4eff1ef02eff 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -29,20 +29,23 @@ KBUILD_AFLAGS := $(LINUXINCLUDE) $(KBUILD_AFLAGS) -D__ASSEMBLY__ \
-DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \
-DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)
-targets := head.o decompress.o string.o dbg.o uart-16550.o uart-alchemy.o
-
# decompressor objects (linked with vmlinuz)
vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o
ifdef CONFIG_DEBUG_ZBOOT
vmlinuzobjs-$(CONFIG_DEBUG_ZBOOT) += $(obj)/dbg.o
vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART16550) += $(obj)/uart-16550.o
+vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART_PROM) += $(obj)/uart-prom.o
vmlinuzobjs-$(CONFIG_MIPS_ALCHEMY) += $(obj)/uart-alchemy.o
endif
-ifdef CONFIG_KERNEL_XZ
-vmlinuzobjs-y += $(obj)/../../lib/ashldi3.o
-endif
+vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
+
+$(obj)/ashldi3.o: KBUILD_CFLAGS += -I$(srctree)/arch/mips/lib
+$(obj)/ashldi3.c: $(srctree)/arch/mips/lib/ashldi3.c
+ $(call cmd,shipped)
+
+targets := $(notdir $(vmlinuzobjs-y))
targets += vmlinux.bin
OBJCOPYFLAGS_vmlinux.bin := $(OBJCOPYFLAGS) -O binary -R .comment -S
@@ -60,7 +63,7 @@ targets += vmlinux.bin.z
$(obj)/vmlinux.bin.z: $(obj)/vmlinux.bin FORCE
$(call if_changed,$(tool_y))
-targets += piggy.o
+targets += piggy.o dummy.o
OBJCOPYFLAGS_piggy.o := --add-section=.image=$(obj)/vmlinux.bin.z \
--set-section-flags=.image=contents,alloc,load,readonly,data
$(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE
diff --git a/arch/mips/boot/compressed/uart-prom.c b/arch/mips/boot/compressed/uart-prom.c
new file mode 100644
index 000000000000..1c3d51bc90bb
--- /dev/null
+++ b/arch/mips/boot/compressed/uart-prom.c
@@ -0,0 +1,7 @@
+
+extern void prom_putchar(unsigned char ch);
+
+void putc(char c)
+{
+ prom_putchar(c);
+}
diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile
index a0bf516ec394..fc7a0a98e9bf 100644
--- a/arch/mips/boot/dts/Makefile
+++ b/arch/mips/boot/dts/Makefile
@@ -4,6 +4,7 @@ dts-dirs += ingenic
dts-dirs += lantiq
dts-dirs += mti
dts-dirs += netlogic
+dts-dirs += pic32
dts-dirs += qca
dts-dirs += ralink
dts-dirs += xilfpga
diff --git a/arch/mips/boot/dts/brcm/bcm6328.dtsi b/arch/mips/boot/dts/brcm/bcm6328.dtsi
index d52ce3d07f16..459b9b252c3b 100644
--- a/arch/mips/boot/dts/brcm/bcm6328.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm6328.dtsi
@@ -31,6 +31,7 @@
};
aliases {
+ leds0 = &leds0;
uart0 = &uart0;
};
@@ -81,5 +82,13 @@
offset = <0x28>;
mask = <0x1>;
};
+
+ leds0: led-controller@10000800 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "brcm,bcm6328-leds";
+ reg = <0x10000800 0x24>;
+ status = "disabled";
+ };
};
};
diff --git a/arch/mips/boot/dts/brcm/bcm6368.dtsi b/arch/mips/boot/dts/brcm/bcm6368.dtsi
index 45152bc22117..9c8d3fe28b31 100644
--- a/arch/mips/boot/dts/brcm/bcm6368.dtsi
+++ b/arch/mips/boot/dts/brcm/bcm6368.dtsi
@@ -32,6 +32,7 @@
};
aliases {
+ leds0 = &leds0;
uart0 = &uart0;
};
@@ -50,6 +51,19 @@
compatible = "simple-bus";
ranges;
+ periph_cntl: syscon@10000000 {
+ compatible = "syscon";
+ reg = <0x10000000 0x14>;
+ little-endian;
+ };
+
+ reboot: syscon-reboot@10000008 {
+ compatible = "syscon-reboot";
+ regmap = <&periph_cntl>;
+ offset = <0x8>;
+ mask = <0x1>;
+ };
+
periph_intc: periph_intc@10000020 {
compatible = "brcm,bcm3380-l2-intc";
reg = <0x10000024 0x4 0x1000002c 0x4>,
@@ -62,6 +76,14 @@
interrupts = <2>;
};
+ leds0: led-controller@100000d0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "brcm,bcm6358-leds";
+ reg = <0x100000d0 0x8>;
+ status = "disabled";
+ };
+
uart0: serial@10000100 {
compatible = "brcm,bcm6345-uart";
reg = <0x10000100 0x18>;
diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts
index 9fcb9e7d1f57..1652d8d60b1e 100644
--- a/arch/mips/boot/dts/ingenic/ci20.dts
+++ b/arch/mips/boot/dts/ingenic/ci20.dts
@@ -42,3 +42,67 @@
&uart4 {
status = "okay";
};
+
+&nemc {
+ status = "okay";
+
+ nandc: nand-controller@1 {
+ compatible = "ingenic,jz4780-nand";
+ reg = <1 0 0x1000000>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ingenic,bch-controller = <&bch>;
+
+ ingenic,nemc-tAS = <10>;
+ ingenic,nemc-tAH = <5>;
+ ingenic,nemc-tBP = <10>;
+ ingenic,nemc-tAW = <15>;
+ ingenic,nemc-tSTRV = <100>;
+
+ nand@1 {
+ reg = <1>;
+
+ nand-ecc-step-size = <1024>;
+ nand-ecc-strength = <24>;
+ nand-ecc-mode = "hw";
+ nand-on-flash-bbt;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ partition@0 {
+ label = "u-boot-spl";
+ reg = <0x0 0x0 0x0 0x800000>;
+ };
+
+ partition@0x800000 {
+ label = "u-boot";
+ reg = <0x0 0x800000 0x0 0x200000>;
+ };
+
+ partition@0xa00000 {
+ label = "u-boot-env";
+ reg = <0x0 0xa00000 0x0 0x200000>;
+ };
+
+ partition@0xc00000 {
+ label = "boot";
+ reg = <0x0 0xc00000 0x0 0x4000000>;
+ };
+
+ partition@0x8c00000 {
+ label = "system";
+ reg = <0x0 0x4c00000 0x1 0xfb400000>;
+ };
+ };
+ };
+ };
+};
+
+&bch {
+ status = "okay";
+};
diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi
index 65389f602733..b868b429add2 100644
--- a/arch/mips/boot/dts/ingenic/jz4780.dtsi
+++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi
@@ -108,4 +108,30 @@
status = "disabled";
};
+
+ nemc: nemc@13410000 {
+ compatible = "ingenic,jz4780-nemc";
+ reg = <0x13410000 0x10000>;
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <1 0 0x1b000000 0x1000000
+ 2 0 0x1a000000 0x1000000
+ 3 0 0x19000000 0x1000000
+ 4 0 0x18000000 0x1000000
+ 5 0 0x17000000 0x1000000
+ 6 0 0x16000000 0x1000000>;
+
+ clocks = <&cgu JZ4780_CLK_NEMC>;
+
+ status = "disabled";
+ };
+
+ bch: bch@134d0000 {
+ compatible = "ingenic,jz4780-bch";
+ reg = <0x134d0000 0x10000>;
+
+ clocks = <&cgu JZ4780_CLK_BCH>;
+
+ status = "disabled";
+ };
};
diff --git a/arch/mips/boot/dts/pic32/Makefile b/arch/mips/boot/dts/pic32/Makefile
new file mode 100644
index 000000000000..7ac790551ec9
--- /dev/null
+++ b/arch/mips/boot/dts/pic32/Makefile
@@ -0,0 +1,12 @@
+dtb-$(CONFIG_DTB_PIC32_MZDA_SK) += pic32mzda_sk.dtb
+
+dtb-$(CONFIG_DTB_PIC32_NONE) += \
+ pic32mzda_sk.dtb
+
+obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y))
+
+# Force kbuild to make empty built-in.o if necessary
+obj- += dummy.o
+
+always := $(dtb-y)
+clean-files := *.dtb *.dtb.S
diff --git a/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi b/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi
new file mode 100644
index 000000000000..ef1335012f43
--- /dev/null
+++ b/arch/mips/boot/dts/pic32/pic32mzda-clk.dtsi
@@ -0,0 +1,236 @@
+/*
+ * Device Tree Source for PIC32MZDA clock data
+ *
+ * Purna Chandra Mandal <purna.mandal@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * Licensed under GPLv2 or later.
+ */
+
+/* all fixed rate clocks */
+
+/ {
+ POSC:posc_clk { /* On-chip primary oscillator */
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <24000000>;
+ };
+
+ FRC:frc_clk { /* internal FRC oscillator */
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <8000000>;
+ };
+
+ BFRC:bfrc_clk { /* internal backup FRC oscillator */
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <8000000>;
+ };
+
+ LPRC:lprc_clk { /* internal low-power FRC oscillator */
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <32000>;
+ };
+
+ /* UPLL provides clock to USBCORE */
+ UPLL:usb_phy_clk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <24000000>;
+ clock-output-names = "usbphy_clk";
+ };
+
+ TxCKI:txcki_clk { /* external clock input on TxCLKI pin */
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <4000000>;
+ status = "disabled";
+ };
+
+ /* external clock input on REFCLKIx pin */
+ REFIx:refix_clk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <24000000>;
+ status = "disabled";
+ };
+
+ /* PIC32 specific clks */
+ pic32_clktree {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x1f801200 0x200>;
+ compatible = "microchip,pic32mzda-clk";
+ ranges = <0 0x1f801200 0x200>;
+
+ /* secondary oscillator; external input on SOSCI pin */
+ SOSC:sosc_clk@0 {
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-sosc";
+ clock-frequency = <32768>;
+ reg = <0x000 0x10>, /* enable reg */
+ <0x1d0 0x10>; /* status reg */
+ microchip,bit-mask = <0x02>; /* enable mask */
+ microchip,status-bit-mask = <0x10>; /* status-mask*/
+ };
+
+ FRCDIV:frcdiv_clk {
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-frcdivclk";
+ clocks = <&FRC>;
+ clock-output-names = "frcdiv_clk";
+ };
+
+ /* System PLL clock */
+ SYSPLL:spll_clk@020 {
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-syspll";
+ reg = <0x020 0x10>, /* SPLL register */
+ <0x1d0 0x10>; /* CLKSTAT register */
+ clocks = <&POSC>, <&FRC>;
+ clock-output-names = "sys_pll";
+ microchip,status-bit-mask = <0x80>; /* SPLLRDY */
+ };
+
+ /* system clock; mux with postdiv & slew */
+ SYSCLK:sys_clk@1c0 {
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-sysclk-v2";
+ reg = <0x1c0 0x04>; /* SLEWCON */
+ clocks = <&FRCDIV>, <&SYSPLL>, <&POSC>, <&SOSC>,
+ <&LPRC>, <&FRCDIV>;
+ microchip,clock-indices = <0>, <1>, <2>, <4>,
+ <5>, <7>;
+ clock-output-names = "sys_clk";
+ };
+
+ /* Peripheral bus1 clock */
+ PBCLK1:pb1_clk@140 {
+ reg = <0x140 0x10>;
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-pbclk";
+ clocks = <&SYSCLK>;
+ clock-output-names = "pb1_clk";
+ /* used by system modules, not gateable */
+ microchip,ignore-unused;
+ };
+
+ /* Peripheral bus2 clock */
+ PBCLK2:pb2_clk@150 {
+ reg = <0x150 0x10>;
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-pbclk";
+ clocks = <&SYSCLK>;
+ clock-output-names = "pb2_clk";
+ /* avoid gating even if unused */
+ microchip,ignore-unused;
+ };
+
+ /* Peripheral bus3 clock */
+ PBCLK3:pb3_clk@160 {
+ reg = <0x160 0x10>;
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-pbclk";
+ clocks = <&SYSCLK>;
+ clock-output-names = "pb3_clk";
+ };
+
+ /* Peripheral bus4 clock(I/O ports, GPIO) */
+ PBCLK4:pb4_clk@170 {
+ reg = <0x170 0x10>;
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-pbclk";
+ clocks = <&SYSCLK>;
+ clock-output-names = "pb4_clk";
+ };
+
+ /* Peripheral bus clock */
+ PBCLK5:pb5_clk@180 {
+ reg = <0x180 0x10>;
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-pbclk";
+ clocks = <&SYSCLK>;
+ clock-output-names = "pb5_clk";
+ };
+
+ /* Peripheral Bus6 clock; */
+ PBCLK6:pb6_clk@190 {
+ reg = <0x190 0x10>;
+ compatible = "microchip,pic32mzda-pbclk";
+ clocks = <&SYSCLK>;
+ #clock-cells = <0>;
+ };
+
+ /* Peripheral bus7 clock */
+ PBCLK7:pb7_clk@1a0 {
+ reg = <0x1a0 0x10>;
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-pbclk";
+ /* CPU is driven by this clock; so named */
+ clock-output-names = "cpu_clk";
+ clocks = <&SYSCLK>;
+ };
+
+ /* Reference Oscillator clock for SPI/I2S */
+ REFCLKO1:refo1_clk@80 {
+ reg = <0x080 0x20>;
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-refoclk";
+ clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+ <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+ microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+ <5>, <7>, <8>, <9>;
+ clock-output-names = "refo1_clk";
+ };
+
+ /* Reference Oscillator clock for SQI */
+ REFCLKO2:refo2_clk@a0 {
+ reg = <0x0a0 0x20>;
+ #clock-cells = <0>;
+ compatible = "microchip,pic32mzda-refoclk";
+ clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+ <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+ microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+ <5>, <7>, <8>, <9>;
+ clock-output-names = "refo2_clk";
+ };
+
+ /* Reference Oscillator clock, ADC */
+ REFCLKO3:refo3_clk@c0 {
+ reg = <0x0c0 0x20>;
+ compatible = "microchip,pic32mzda-refoclk";
+ clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+ <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+ microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+ <5>, <7>, <8>, <9>;
+ #clock-cells = <0>;
+ clock-output-names = "refo3_clk";
+ };
+
+ /* Reference Oscillator clock */
+ REFCLKO4:refo4_clk@e0 {
+ reg = <0x0e0 0x20>;
+ compatible = "microchip,pic32mzda-refoclk";
+ clocks = <&SYSCLK>, <&PBCLK1>, <&POSC>, <&FRC>, <&LPRC>,
+ <&SOSC>, <&SYSPLL>, <&REFIx>, <&BFRC>;
+ microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+ <5>, <7>, <8>, <9>;
+ #clock-cells = <0>;
+ clock-output-names = "refo4_clk";
+ };
+
+ /* Reference Oscillator clock, LCD */
+ REFCLKO5:refo5_clk@100 {
+ reg = <0x100 0x20>;
+ compatible = "microchip,pic32mzda-refoclk";
+ clocks = <&SYSCLK>,<&PBCLK1>,<&POSC>,<&FRC>,<&LPRC>,
+ <&SOSC>,<&SYSPLL>,<&REFIx>,<&BFRC>;
+ microchip,clock-indices = <0>, <1>, <2>, <3>, <4>,
+ <5>, <7>, <8>, <9>;
+ #clock-cells = <0>;
+ clock-output-names = "refo5_clk";
+ };
+ };
+};
diff --git a/arch/mips/boot/dts/pic32/pic32mzda.dtsi b/arch/mips/boot/dts/pic32/pic32mzda.dtsi
new file mode 100644
index 000000000000..ad9e3318c2ce
--- /dev/null
+++ b/arch/mips/boot/dts/pic32/pic32mzda.dtsi
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+#include "pic32mzda-clk.dtsi"
+
+/ {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&evic>;
+
+ aliases {
+ gpio0 = &gpio0;
+ gpio1 = &gpio1;
+ gpio2 = &gpio2;
+ gpio3 = &gpio3;
+ gpio4 = &gpio4;
+ gpio5 = &gpio5;
+ gpio6 = &gpio6;
+ gpio7 = &gpio7;
+ gpio8 = &gpio8;
+ gpio9 = &gpio9;
+ serial0 = &uart1;
+ serial1 = &uart2;
+ serial2 = &uart3;
+ serial3 = &uart4;
+ serial4 = &uart5;
+ serial5 = &uart6;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "mti,mips14KEc";
+ device_type = "cpu";
+ };
+ };
+
+ soc {
+ compatible = "microchip,pic32mzda-infra";
+ interrupts = <0 IRQ_TYPE_EDGE_RISING>;
+ };
+
+ evic: interrupt-controller@1f810000 {
+ compatible = "microchip,pic32mzda-evic";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0x1f810000 0x1000>;
+ microchip,external-irqs = <3 8 13 18 23>;
+ };
+
+ pic32_pinctrl: pinctrl@1f801400{
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "microchip,pic32mzda-pinctrl";
+ reg = <0x1f801400 0x400>;
+ clocks = <&PBCLK1>;
+ };
+
+ /* PORTA */
+ gpio0: gpio0@1f860000 {
+ compatible = "microchip,pic32mzda-gpio";
+ reg = <0x1f860000 0x100>;
+ interrupts = <118 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&PBCLK4>;
+ microchip,gpio-bank = <0>;
+ gpio-ranges = <&pic32_pinctrl 0 0 16>;
+ };
+
+ /* PORTB */
+ gpio1: gpio1@1f860100 {
+ compatible = "microchip,pic32mzda-gpio";
+ reg = <0x1f860100 0x100>;
+ interrupts = <119 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&PBCLK4>;
+ microchip,gpio-bank = <1>;
+ gpio-ranges = <&pic32_pinctrl 0 16 16>;
+ };
+
+ /* PORTC */
+ gpio2: gpio2@1f860200 {
+ compatible = "microchip,pic32mzda-gpio";
+ reg = <0x1f860200 0x100>;
+ interrupts = <120 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&PBCLK4>;
+ microchip,gpio-bank = <2>;
+ gpio-ranges = <&pic32_pinctrl 0 32 16>;
+ };
+
+ /* PORTD */
+ gpio3: gpio3@1f860300 {
+ compatible = "microchip,pic32mzda-gpio";
+ reg = <0x1f860300 0x100>;
+ interrupts = <121 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&PBCLK4>;
+ microchip,gpio-bank = <3>;
+ gpio-ranges = <&pic32_pinctrl 0 48 16>;
+ };
+
+ /* PORTE */
+ gpio4: gpio4@1f860400 {
+ compatible = "microchip,pic32mzda-gpio";
+ reg = <0x1f860400 0x100>;
+ interrupts = <122 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&PBCLK4>;
+ microchip,gpio-bank = <4>;
+ gpio-ranges = <&pic32_pinctrl 0 64 16>;
+ };
+
+ /* PORTF */
+ gpio5: gpio5@1f860500 {
+ compatible = "microchip,pic32mzda-gpio";
+ reg = <0x1f860500 0x100>;
+ interrupts = <123 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&PBCLK4>;
+ microchip,gpio-bank = <5>;
+ gpio-ranges = <&pic32_pinctrl 0 80 16>;
+ };
+
+ /* PORTG */
+ gpio6: gpio6@1f860600 {
+ compatible = "microchip,pic32mzda-gpio";
+ reg = <0x1f860600 0x100>;
+ interrupts = <124 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&PBCLK4>;
+ microchip,gpio-bank = <6>;
+ gpio-ranges = <&pic32_pinctrl 0 96 16>;
+ };
+
+ /* PORTH */
+ gpio7: gpio7@1f860700 {
+ compatible = "microchip,pic32mzda-gpio";
+ reg = <0x1f860700 0x100>;
+ interrupts = <125 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&PBCLK4>;
+ microchip,gpio-bank = <7>;
+ gpio-ranges = <&pic32_pinctrl 0 112 16>;
+ };
+
+ /* PORTI does not exist */
+
+ /* PORTJ */
+ gpio8: gpio8@1f860800 {
+ compatible = "microchip,pic32mzda-gpio";
+ reg = <0x1f860800 0x100>;
+ interrupts = <126 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&PBCLK4>;
+ microchip,gpio-bank = <8>;
+ gpio-ranges = <&pic32_pinctrl 0 128 16>;
+ };
+
+ /* PORTK */
+ gpio9: gpio9@1f860900 {
+ compatible = "microchip,pic32mzda-gpio";
+ reg = <0x1f860900 0x100>;
+ interrupts = <127 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&PBCLK4>;
+ microchip,gpio-bank = <9>;
+ gpio-ranges = <&pic32_pinctrl 0 144 16>;
+ };
+
+ sdhci: sdhci@1f8ec000 {
+ compatible = "microchip,pic32mzda-sdhci";
+ reg = <0x1f8ec000 0x100>;
+ interrupts = <191 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&REFCLKO4>, <&PBCLK5>;
+ clock-names = "base_clk", "sys_clk";
+ bus-width = <4>;
+ cap-sd-highspeed;
+ status = "disabled";
+ };
+
+ uart1: serial@1f822000 {
+ compatible = "microchip,pic32mzda-uart";
+ reg = <0x1f822000 0x50>;
+ interrupts = <112 IRQ_TYPE_LEVEL_HIGH>,
+ <113 IRQ_TYPE_LEVEL_HIGH>,
+ <114 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&PBCLK2>;
+ status = "disabled";
+ };
+
+ uart2: serial@1f822200 {
+ compatible = "microchip,pic32mzda-uart";
+ reg = <0x1f822200 0x50>;
+ interrupts = <145 IRQ_TYPE_LEVEL_HIGH>,
+ <146 IRQ_TYPE_LEVEL_HIGH>,
+ <147 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&PBCLK2>;
+ status = "disabled";
+ };
+
+ uart3: serial@1f822400 {
+ compatible = "microchip,pic32mzda-uart";
+ reg = <0x1f822400 0x50>;
+ interrupts = <157 IRQ_TYPE_LEVEL_HIGH>,
+ <158 IRQ_TYPE_LEVEL_HIGH>,
+ <159 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&PBCLK2>;
+ status = "disabled";
+ };
+
+ uart4: serial@1f822600 {
+ compatible = "microchip,pic32mzda-uart";
+ reg = <0x1f822600 0x50>;
+ interrupts = <170 IRQ_TYPE_LEVEL_HIGH>,
+ <171 IRQ_TYPE_LEVEL_HIGH>,
+ <172 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&PBCLK2>;
+ status = "disabled";
+ };
+
+ uart5: serial@1f822800 {
+ compatible = "microchip,pic32mzda-uart";
+ reg = <0x1f822800 0x50>;
+ interrupts = <179 IRQ_TYPE_LEVEL_HIGH>,
+ <180 IRQ_TYPE_LEVEL_HIGH>,
+ <181 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&PBCLK2>;
+ status = "disabled";
+ };
+
+ uart6: serial@1f822A00 {
+ compatible = "microchip,pic32mzda-uart";
+ reg = <0x1f822A00 0x50>;
+ interrupts = <188 IRQ_TYPE_LEVEL_HIGH>,
+ <189 IRQ_TYPE_LEVEL_HIGH>,
+ <190 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&PBCLK2>;
+ status = "disabled";
+ };
+};
diff --git a/arch/mips/boot/dts/pic32/pic32mzda_sk.dts b/arch/mips/boot/dts/pic32/pic32mzda_sk.dts
new file mode 100644
index 000000000000..5d434a50e85b
--- /dev/null
+++ b/arch/mips/boot/dts/pic32/pic32mzda_sk.dts
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/dts-v1/;
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+
+#include "pic32mzda.dtsi"
+
+/ {
+ compatible = "microchip,pic32mzda-sk", "microchip,pic32mzda";
+ model = "Microchip PIC32MZDA Starter Kit";
+
+ memory {
+ device_type = "memory";
+ reg = <0x08000000 0x08000000>;
+ };
+
+ chosen {
+ bootargs = "earlyprintk=ttyPIC1,115200n8r console=ttyPIC1,115200n8";
+ };
+
+ leds0 {
+ compatible = "gpio-leds";
+ pinctrl-names = "default";
+ pinctrl-0 = <&user_leds_s0>;
+
+ led@1 {
+ label = "pic32mzda_sk:red:led1";
+ gpios = <&gpio7 0 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "heartbeat";
+ };
+
+ led@2 {
+ label = "pic32mzda_sk:yellow:led2";
+ gpios = <&gpio7 1 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "mmc0";
+ };
+
+ led@3 {
+ label = "pic32mzda_sk:green:led3";
+ gpios = <&gpio7 2 GPIO_ACTIVE_HIGH>;
+ default-state = "on";
+ };
+ };
+
+ keys0 {
+ compatible = "gpio-keys";
+ pinctrl-0 = <&user_buttons_s0>;
+ pinctrl-names = "default";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ button@sw1 {
+ label = "ESC";
+ linux,code = <1>;
+ gpios = <&gpio1 12 0>;
+ };
+
+ button@sw2 {
+ label = "Home";
+ linux,code = <102>;
+ gpios = <&gpio1 13 0>;
+ };
+
+ button@sw3 {
+ label = "Menu";
+ linux,code = <139>;
+ gpios = <&gpio1 14 0>;
+ };
+ };
+};
+
+&uart2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart2>;
+ status = "okay";
+};
+
+&uart4 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_uart4>;
+ status = "okay";
+};
+
+&sdhci {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sdhc1>;
+ status = "okay";
+ assigned-clocks = <&REFCLKO2>,<&REFCLKO4>,<&REFCLKO5>;
+ assigned-clock-rates = <50000000>,<25000000>,<40000000>;
+};
+
+&pic32_pinctrl {
+
+ pinctrl_sdhc1: sdhc1_pins0 {
+ pins = "A6", "D4", "G13", "G12", "G14", "A7", "A0";
+ microchip,digital;
+ };
+
+ user_leds_s0: user_leds_s0 {
+ pins = "H0", "H1", "H2";
+ output-low;
+ microchip,digital;
+ };
+
+ user_buttons_s0: user_buttons_s0 {
+ pins = "B12", "B13", "B14";
+ microchip,digital;
+ input-enable;
+ bias-pull-up;
+ };
+
+ pinctrl_uart2: pinctrl_uart2 {
+ uart2-tx {
+ pins = "G9";
+ function = "U2TX";
+ microchip,digital;
+ output-high;
+ };
+ uart2-rx {
+ pins = "B0";
+ function = "U2RX";
+ microchip,digital;
+ input-enable;
+ };
+ };
+
+ pinctrl_uart4: uart4-0 {
+ uart4-tx {
+ pins = "C3";
+ function = "U4TX";
+ microchip,digital;
+ output-high;
+ };
+ uart4-rx {
+ pins = "E8";
+ function = "U4RX";
+ microchip,digital;
+ input-enable;
+ };
+ };
+};
diff --git a/arch/mips/boot/dts/qca/ar9132.dtsi b/arch/mips/boot/dts/qca/ar9132.dtsi
index 13d0439496a9..3ad4ba9b12fd 100644
--- a/arch/mips/boot/dts/qca/ar9132.dtsi
+++ b/arch/mips/boot/dts/qca/ar9132.dtsi
@@ -125,6 +125,21 @@
};
};
+ usb@1b000100 {
+ compatible = "qca,ar7100-ehci", "generic-ehci";
+ reg = <0x1b000100 0x100>;
+
+ interrupts = <3>;
+ resets = <&rst 5>;
+
+ has-transaction-translator;
+
+ phy-names = "usb";
+ phys = <&usb_phy>;
+
+ status = "disabled";
+ };
+
spi@1f000000 {
compatible = "qca,ar9132-spi", "qca,ar7100-spi";
reg = <0x1f000000 0x10>;
@@ -138,4 +153,15 @@
#size-cells = <0>;
};
};
+
+ usb_phy: usb-phy {
+ compatible = "qca,ar7100-usb-phy";
+
+ reset-names = "usb-phy", "usb-suspend-override";
+ resets = <&rst 4>, <&rst 3>;
+
+ #phy-cells = <0>;
+
+ status = "disabled";
+ };
};
diff --git a/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts b/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
index 003015ab34e7..e535ee3c26a4 100644
--- a/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
+++ b/arch/mips/boot/dts/qca/ar9132_tl_wr1043nd_v1.dts
@@ -35,6 +35,10 @@
};
};
+ usb@1b000100 {
+ status = "okay";
+ };
+
spi@1f000000 {
status = "okay";
num-cs = <1>;
@@ -65,6 +69,10 @@
};
};
+ usb-phy {
+ status = "okay";
+ };
+
gpio-keys {
compatible = "gpio-keys-polled";
#address-cells = <1>;
diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
new file mode 100644
index 000000000000..52192c632ae8
--- /dev/null
+++ b/arch/mips/configs/pic32mzda_defconfig
@@ -0,0 +1,89 @@
+CONFIG_MACH_PIC32=y
+CONFIG_DTB_PIC32_MZDA_SK=y
+CONFIG_HZ_100=y
+CONFIG_PREEMPT_VOLUNTARY=y
+# CONFIG_SECCOMP is not set
+CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_RELAY=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EMBEDDED=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_SGI_PARTITION=y
+CONFIG_BINFMT_MISC=m
+# CONFIG_SUSPEND is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_INPUT_LEDS=m
+CONFIG_INPUT_POLLDEV=y
+CONFIG_INPUT_MOUSEDEV=m
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_EVBUG=m
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GPIO=m
+CONFIG_KEYBOARD_GPIO_POLLED=m
+# CONFIG_MOUSE_PS2 is not set
+# CONFIG_SERIO is not set
+CONFIG_SERIAL_PIC32=y
+CONFIG_SERIAL_PIC32_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_RAW_DRIVER=m
+CONFIG_GPIO_SYSFS=y
+# CONFIG_HWMON is not set
+CONFIG_HIDRAW=y
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_MICROCHIP_PIC32=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_TRIGGER_ONESHOT=m
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_GPIO=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+# CONFIG_MIPS_PLATFORM_DEVICES is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_FSCACHE=m
+CONFIG_ISO9660_FS=m
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZ4=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
diff --git a/arch/mips/include/asm/cacheops.h b/arch/mips/include/asm/cacheops.h
index 06b9bc7ea14b..c3212ff26723 100644
--- a/arch/mips/include/asm/cacheops.h
+++ b/arch/mips/include/asm/cacheops.h
@@ -12,54 +12,76 @@
#define __ASM_CACHEOPS_H
/*
+ * Most cache ops are split into a 2 bit field identifying the cache, and a 3
+ * bit field identifying the cache operation.
+ */
+#define CacheOp_Cache 0x03
+#define CacheOp_Op 0x1c
+
+#define Cache_I 0x00
+#define Cache_D 0x01
+#define Cache_T 0x02
+#define Cache_S 0x03
+
+#define Index_Writeback_Inv 0x00
+#define Index_Load_Tag 0x04
+#define Index_Store_Tag 0x08
+#define Hit_Invalidate 0x10
+#define Hit_Writeback_Inv 0x14 /* not with Cache_I though */
+#define Hit_Writeback 0x18
+
+/*
* Cache Operations available on all MIPS processors with R4000-style caches
*/
-#define Index_Invalidate_I 0x00
-#define Index_Writeback_Inv_D 0x01
-#define Index_Load_Tag_I 0x04
-#define Index_Load_Tag_D 0x05
-#define Index_Store_Tag_I 0x08
-#define Index_Store_Tag_D 0x09
-#define Hit_Invalidate_I 0x10
-#define Hit_Invalidate_D 0x11
-#define Hit_Writeback_Inv_D 0x15
+#define Index_Invalidate_I (Cache_I | Index_Writeback_Inv)
+#define Index_Writeback_Inv_D (Cache_D | Index_Writeback_Inv)
+#define Index_Load_Tag_I (Cache_I | Index_Load_Tag)
+#define Index_Load_Tag_D (Cache_D | Index_Load_Tag)
+#define Index_Store_Tag_I (Cache_I | Index_Store_Tag)
+#define Index_Store_Tag_D (Cache_D | Index_Store_Tag)
+#define Hit_Invalidate_I (Cache_I | Hit_Invalidate)
+#define Hit_Invalidate_D (Cache_D | Hit_Invalidate)
+#define Hit_Writeback_Inv_D (Cache_D | Hit_Writeback_Inv)
/*
* R4000-specific cacheops
*/
-#define Create_Dirty_Excl_D 0x0d
-#define Fill 0x14
-#define Hit_Writeback_I 0x18
-#define Hit_Writeback_D 0x19
+#define Create_Dirty_Excl_D (Cache_D | 0x0c)
+#define Fill (Cache_I | 0x14)
+#define Hit_Writeback_I (Cache_I | Hit_Writeback)
+#define Hit_Writeback_D (Cache_D | Hit_Writeback)
/*
* R4000SC and R4400SC-specific cacheops
*/
-#define Index_Invalidate_SI 0x02
-#define Index_Writeback_Inv_SD 0x03
-#define Index_Load_Tag_SI 0x06
-#define Index_Load_Tag_SD 0x07
-#define Index_Store_Tag_SI 0x0A
-#define Index_Store_Tag_SD 0x0B
-#define Create_Dirty_Excl_SD 0x0f
-#define Hit_Invalidate_SI 0x12
-#define Hit_Invalidate_SD 0x13
-#define Hit_Writeback_Inv_SD 0x17
-#define Hit_Writeback_SD 0x1b
-#define Hit_Set_Virtual_SI 0x1e
-#define Hit_Set_Virtual_SD 0x1f
+#define Cache_SI 0x02
+#define Cache_SD 0x03
+
+#define Index_Invalidate_SI (Cache_SI | Index_Writeback_Inv)
+#define Index_Writeback_Inv_SD (Cache_SD | Index_Writeback_Inv)
+#define Index_Load_Tag_SI (Cache_SI | Index_Load_Tag)
+#define Index_Load_Tag_SD (Cache_SD | Index_Load_Tag)
+#define Index_Store_Tag_SI (Cache_SI | Index_Store_Tag)
+#define Index_Store_Tag_SD (Cache_SD | Index_Store_Tag)
+#define Create_Dirty_Excl_SD (Cache_SD | 0x0c)
+#define Hit_Invalidate_SI (Cache_SI | Hit_Invalidate)
+#define Hit_Invalidate_SD (Cache_SD | Hit_Invalidate)
+#define Hit_Writeback_Inv_SD (Cache_SD | Hit_Writeback_Inv)
+#define Hit_Writeback_SD (Cache_SD | Hit_Writeback)
+#define Hit_Set_Virtual_SI (Cache_SI | 0x1c)
+#define Hit_Set_Virtual_SD (Cache_SD | 0x1c)
/*
* R5000-specific cacheops
*/
-#define R5K_Page_Invalidate_S 0x17
+#define R5K_Page_Invalidate_S (Cache_S | 0x14)
/*
* RM7000-specific cacheops
*/
-#define Page_Invalidate_T 0x16
-#define Index_Store_Tag_T 0x0a
-#define Index_Load_Tag_T 0x06
+#define Page_Invalidate_T (Cache_T | 0x14)
+#define Index_Store_Tag_T (Cache_T | Index_Store_Tag)
+#define Index_Load_Tag_T (Cache_T | Index_Load_Tag)
/*
* R10000-specific cacheops
@@ -67,22 +89,22 @@
* Cacheops 0x02, 0x06, 0x0a, 0x0c-0x0e, 0x16, 0x1a and 0x1e are unused.
* Most of the _S cacheops are identical to the R4000SC _SD cacheops.
*/
-#define Index_Writeback_Inv_S 0x03
-#define Index_Load_Tag_S 0x07
-#define Index_Store_Tag_S 0x0B
-#define Hit_Invalidate_S 0x13
+#define Index_Writeback_Inv_S (Cache_S | Index_Writeback_Inv)
+#define Index_Load_Tag_S (Cache_S | Index_Load_Tag)
+#define Index_Store_Tag_S (Cache_S | Index_Store_Tag)
+#define Hit_Invalidate_S (Cache_S | Hit_Invalidate)
#define Cache_Barrier 0x14
-#define Hit_Writeback_Inv_S 0x17
-#define Index_Load_Data_I 0x18
-#define Index_Load_Data_D 0x19
-#define Index_Load_Data_S 0x1b
-#define Index_Store_Data_I 0x1c
-#define Index_Store_Data_D 0x1d
-#define Index_Store_Data_S 0x1f
+#define Hit_Writeback_Inv_S (Cache_S | Hit_Writeback_Inv)
+#define Index_Load_Data_I (Cache_I | 0x18)
+#define Index_Load_Data_D (Cache_D | 0x18)
+#define Index_Load_Data_S (Cache_S | 0x18)
+#define Index_Store_Data_I (Cache_I | 0x1c)
+#define Index_Store_Data_D (Cache_D | 0x1c)
+#define Index_Store_Data_S (Cache_S | 0x1c)
/*
* Loongson2-specific cacheops
*/
-#define Hit_Invalidate_I_Loongson2 0x00
+#define Hit_Invalidate_I_Loongson2 (Cache_I | 0x00)
#endif /* __ASM_CACHEOPS_H */
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index d1e04c943f5f..eeec8c8e2da2 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -414,4 +414,11 @@
# define cpu_has_small_pages (cpu_data[0].options & MIPS_CPU_SP)
#endif
+#ifndef cpu_has_nan_legacy
+#define cpu_has_nan_legacy (cpu_data[0].options & MIPS_CPU_NAN_LEGACY)
+#endif
+#ifndef cpu_has_nan_2008
+#define cpu_has_nan_2008 (cpu_data[0].options & MIPS_CPU_NAN_2008)
+#endif
+
#endif /* __ASM_CPU_FEATURES_H */
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 82ad15f11049..a97ca97285ec 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -386,6 +386,8 @@ enum cpu_type_enum {
#define MIPS_CPU_BP_GHIST 0x8000000000ull /* R12K+ Branch Prediction Global History */
#define MIPS_CPU_SP 0x10000000000ull /* Small (1KB) page support */
#define MIPS_CPU_FTLB 0x20000000000ull /* CPU has Fixed-page-size TLB */
+#define MIPS_CPU_NAN_LEGACY 0x40000000000ull /* Legacy NaN implemented */
+#define MIPS_CPU_NAN_2008 0x80000000000ull /* 2008 NaN implemented */
/*
* CPU ASE encodings
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index b01a6ff468e0..cefb7a596878 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -12,7 +12,6 @@
#include <linux/fs.h>
#include <uapi/linux/elf.h>
-#include <asm/cpu-info.h>
#include <asm/current.h>
/* ELF header e_flags defines. */
@@ -44,6 +43,7 @@
#define EF_MIPS_OPTIONS_FIRST 0x00000080
#define EF_MIPS_32BITMODE 0x00000100
#define EF_MIPS_FP64 0x00000200
+#define EF_MIPS_NAN2008 0x00000400
#define EF_MIPS_ABI 0x0000f000
#define EF_MIPS_ARCH 0xf0000000
@@ -305,7 +305,7 @@ do { \
\
current->thread.abi = &mips_abi; \
\
- current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31; \
+ mips_set_personality_nan(state); \
} while (0)
#endif /* CONFIG_32BIT */
@@ -367,7 +367,7 @@ do { \
else \
current->thread.abi = &mips_abi; \
\
- current->thread.fpu.fcr31 = boot_cpu_data.fpu_csr31; \
+ mips_set_personality_nan(state); \
\
p = personality(current->personality); \
if (p != PER_LINUX32 && p != PER_LINUX) \
@@ -432,6 +432,7 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
struct arch_elf_state {
+ int nan_2008;
int fp_abi;
int interp_fp_abi;
int overall_fp_mode;
@@ -440,17 +441,23 @@ struct arch_elf_state {
#define MIPS_ABI_FP_UNKNOWN (-1) /* Unknown FP ABI (kernel internal) */
#define INIT_ARCH_ELF_STATE { \
+ .nan_2008 = -1, \
.fp_abi = MIPS_ABI_FP_UNKNOWN, \
.interp_fp_abi = MIPS_ABI_FP_UNKNOWN, \
.overall_fp_mode = -1, \
}
+/* Whether to accept legacy-NaN and 2008-NaN user binaries. */
+extern bool mips_use_nan_legacy;
+extern bool mips_use_nan_2008;
+
extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
bool is_interp, struct arch_elf_state *state);
-extern int arch_check_elf(void *ehdr, bool has_interpreter,
+extern int arch_check_elf(void *ehdr, bool has_interpreter, void *interp_ehdr,
struct arch_elf_state *state);
+extern void mips_set_personality_nan(struct arch_elf_state *state);
extern void mips_set_personality_fp(struct arch_elf_state *state);
#endif /* _ASM_ELF_H */
diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h
index 2f021cdfba4f..3225c3c0724b 100644
--- a/arch/mips/include/asm/fpu_emulator.h
+++ b/arch/mips/include/asm/fpu_emulator.h
@@ -79,7 +79,7 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
/*
* Break instruction with special math emu break code set
*/
-#define BREAK_MATH (0x0000000d | (BRK_MEMU << 16))
+#define BREAK_MATH(micromips) (((micromips) ? 0x7 : 0xd) | (BRK_MEMU << 16))
#define SIGNALLING_NAN 0x7ff800007ff80000LL
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index d10fd80dbb7e..2b4dc7ad53b8 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -275,6 +275,7 @@ static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long si
*/
#define ioremap_cachable(offset, size) \
__ioremap_mode((offset), (size), _page_cachable_default)
+#define ioremap_cache ioremap_cachable
/*
* These two are MIPS specific ioremap variant. ioremap_cacheable_cow
diff --git a/arch/mips/include/asm/irqflags.h b/arch/mips/include/asm/irqflags.h
index e7b138b4b3d3..65c351e328cc 100644
--- a/arch/mips/include/asm/irqflags.h
+++ b/arch/mips/include/asm/irqflags.h
@@ -84,41 +84,11 @@ static inline void arch_local_irq_restore(unsigned long flags)
: "memory");
}
-static inline void __arch_local_irq_restore(unsigned long flags)
-{
- __asm__ __volatile__(
- " .set push \n"
- " .set noreorder \n"
- " .set noat \n"
-#if defined(CONFIG_IRQ_MIPS_CPU)
- /*
- * Slow, but doesn't suffer from a relatively unlikely race
- * condition we're having since days 1.
- */
- " beqz %[flags], 1f \n"
- " di \n"
- " ei \n"
- "1: \n"
-#else
- /*
- * Fast, dangerous. Life is fun, life is good.
- */
- " mfc0 $1, $12 \n"
- " ins $1, %[flags], 0, 1 \n"
- " mtc0 $1, $12 \n"
-#endif
- " " __stringify(__irq_disable_hazard) " \n"
- " .set pop \n"
- : [flags] "=r" (flags)
- : "0" (flags)
- : "memory");
-}
#else
/* Functions that require preempt_{dis,en}able() are in mips-atomic.c */
void arch_local_irq_disable(void);
unsigned long arch_local_irq_save(void);
void arch_local_irq_restore(unsigned long flags);
-void __arch_local_irq_restore(unsigned long flags);
#endif /* CONFIG_CPU_MIPSR2 || CONFIG_CPU_MIPSR6 */
static inline void arch_local_irq_enable(void)
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 7c191443c7ea..f6b12790716c 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -58,7 +58,7 @@
#define KVM_MAX_VCPUS 1
#define KVM_USER_MEM_SLOTS 8
/* memory slots that does not exposed to userspace */
-#define KVM_PRIVATE_MEM_SLOTS 0
+#define KVM_PRIVATE_MEM_SLOTS 0
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HALT_POLL_NS_DEFAULT 500000
@@ -92,14 +92,6 @@
#define KVM_INVALID_INST 0xdeadbeef
#define KVM_INVALID_ADDR 0xdeadbeef
-#define KVM_MALTA_GUEST_RTC_ADDR 0xb8000070UL
-
-#define GUEST_TICKS_PER_JIFFY (40000000/HZ)
-#define MS_TO_NS(x) (x * 1E6L)
-
-#define CAUSEB_DC 27
-#define CAUSEF_DC (_ULCAST_(1) << 27)
-
extern atomic_t kvm_mips_instance;
extern kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn);
extern void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
@@ -289,34 +281,6 @@ enum mips_mmu_types {
MMU_TYPE_R8000
};
-/*
- * Trap codes
- */
-#define T_INT 0 /* Interrupt pending */
-#define T_TLB_MOD 1 /* TLB modified fault */
-#define T_TLB_LD_MISS 2 /* TLB miss on load or ifetch */
-#define T_TLB_ST_MISS 3 /* TLB miss on a store */
-#define T_ADDR_ERR_LD 4 /* Address error on a load or ifetch */
-#define T_ADDR_ERR_ST 5 /* Address error on a store */
-#define T_BUS_ERR_IFETCH 6 /* Bus error on an ifetch */
-#define T_BUS_ERR_LD_ST 7 /* Bus error on a load or store */
-#define T_SYSCALL 8 /* System call */
-#define T_BREAK 9 /* Breakpoint */
-#define T_RES_INST 10 /* Reserved instruction exception */
-#define T_COP_UNUSABLE 11 /* Coprocessor unusable */
-#define T_OVFLOW 12 /* Arithmetic overflow */
-
-/*
- * Trap definitions added for r4000 port.
- */
-#define T_TRAP 13 /* Trap instruction */
-#define T_VCEI 14 /* Virtual coherency exception */
-#define T_MSAFPE 14 /* MSA floating point exception */
-#define T_FPE 15 /* Floating point exception */
-#define T_MSADIS 21 /* MSA disabled exception */
-#define T_WATCH 23 /* Watch address reference */
-#define T_VCED 31 /* Virtual coherency data */
-
/* Resume Flags */
#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
@@ -686,7 +650,6 @@ extern void kvm_mips_dump_host_tlbs(void);
extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu);
extern void kvm_mips_flush_host_tlb(int skip_kseg0);
extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
-extern int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index);
extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu,
unsigned long entryhi);
diff --git a/arch/mips/include/asm/mach-ath79/ath79.h b/arch/mips/include/asm/mach-ath79/ath79.h
index 4eee221b0cf0..2b3487213d1e 100644
--- a/arch/mips/include/asm/mach-ath79/ath79.h
+++ b/arch/mips/include/asm/mach-ath79/ath79.h
@@ -115,6 +115,7 @@ static inline int soc_is_qca955x(void)
return soc_is_qca9556() || soc_is_qca9558();
}
+void ath79_ddr_wb_flush(unsigned int reg);
void ath79_ddr_set_pci_windows(void);
extern void __iomem *ath79_pll_base;
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
index 348df49dcc9f..4e0b6bc1165e 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_nvram.h
@@ -30,6 +30,4 @@ u8 *bcm63xx_nvram_get_name(void);
*/
int bcm63xx_nvram_get_mac_address(u8 *mac);
-int bcm63xx_nvram_get_psi_size(void);
-
#endif /* BCM63XX_NVRAM_H */
diff --git a/arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h b/arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h
new file mode 100644
index 000000000000..468230834e2f
--- /dev/null
+++ b/arch/mips/include/asm/mach-pic32/cpu-feature-overrides.h
@@ -0,0 +1,32 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H
+
+/*
+ * CPU feature overrides for PIC32 boards
+ */
+#ifdef CONFIG_CPU_MIPS32
+#define cpu_has_vint 1
+#define cpu_has_veic 0
+#define cpu_has_tlb 1
+#define cpu_has_4kex 1
+#define cpu_has_4k_cache 1
+#define cpu_has_fpu 0
+#define cpu_has_counter 1
+#define cpu_has_llsc 1
+#define cpu_has_nofpuex 0
+#define cpu_icache_snoops_remote_store 1
+#endif
+
+#ifdef CONFIG_CPU_MIPS64
+#error This platform does not support 64bit.
+#endif
+
+#endif /* __ASM_MACH_PIC32_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-pic32/irq.h b/arch/mips/include/asm/mach-pic32/irq.h
new file mode 100644
index 000000000000..864330ce8838
--- /dev/null
+++ b/arch/mips/include/asm/mach-pic32/irq.h
@@ -0,0 +1,22 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef __ASM_MACH_PIC32_IRQ_H
+#define __ASM_MACH_PIC32_IRQ_H
+
+#define NR_IRQS 256
+#define MIPS_CPU_IRQ_BASE 0
+
+#include_next <irq.h>
+
+#endif /* __ASM_MACH_PIC32_IRQ_H */
diff --git a/arch/mips/include/asm/mach-pic32/pic32.h b/arch/mips/include/asm/mach-pic32/pic32.h
new file mode 100644
index 000000000000..ce52e918daae
--- /dev/null
+++ b/arch/mips/include/asm/mach-pic32/pic32.h
@@ -0,0 +1,44 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef _ASM_MACH_PIC32_H
+#define _ASM_MACH_PIC32_H
+
+#include <linux/io.h>
+
+/*
+ * PIC32 register offsets for SET/CLR/INV where supported.
+ */
+#define PIC32_CLR(_reg) ((_reg) + 0x04)
+#define PIC32_SET(_reg) ((_reg) + 0x08)
+#define PIC32_INV(_reg) ((_reg) + 0x0C)
+
+/*
+ * PIC32 Base Register Offsets
+ */
+#define PIC32_BASE_CONFIG 0x1f800000
+#define PIC32_BASE_OSC 0x1f801200
+#define PIC32_BASE_RESET 0x1f801240
+#define PIC32_BASE_PPS 0x1f801400
+#define PIC32_BASE_UART 0x1f822000
+#define PIC32_BASE_PORT 0x1f860000
+#define PIC32_BASE_DEVCFG2 0x1fc4ff44
+
+/*
+ * Register unlock sequence required for some register access.
+ */
+void pic32_syskey_unlock_debug(const char *fn, const ulong ln);
+#define pic32_syskey_unlock() \
+ pic32_syskey_unlock_debug(__func__, __LINE__)
+
+#endif /* _ASM_MACH_PIC32_H */
diff --git a/arch/mips/include/asm/mach-pic32/spaces.h b/arch/mips/include/asm/mach-pic32/spaces.h
new file mode 100644
index 000000000000..046a0a9aa8b3
--- /dev/null
+++ b/arch/mips/include/asm/mach-pic32/spaces.h
@@ -0,0 +1,24 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef _ASM_MACH_PIC32_SPACES_H
+#define _ASM_MACH_PIC32_SPACES_H
+
+#ifdef CONFIG_PIC32MZDA
+#define PHYS_OFFSET _AC(0x08000000, UL)
+#define UNCAC_BASE _AC(0xa8000000, UL)
+#endif
+
+#include <asm/mach-generic/spaces.h>
+
+#endif /* __ASM_MACH_PIC32_SPACES_H */
diff --git a/arch/mips/include/asm/mach-ralink/irq.h b/arch/mips/include/asm/mach-ralink/irq.h
new file mode 100644
index 000000000000..4321865e04b9
--- /dev/null
+++ b/arch/mips/include/asm/mach-ralink/irq.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_MACH_RALINK_IRQ_H
+#define __ASM_MACH_RALINK_IRQ_H
+
+#define GIC_NUM_INTRS 64
+#define NR_IRQS 256
+
+#include_next <irq.h>
+
+#endif
diff --git a/arch/mips/include/asm/mach-ralink/mt7621.h b/arch/mips/include/asm/mach-ralink/mt7621.h
new file mode 100644
index 000000000000..610b61e3f9df
--- /dev/null
+++ b/arch/mips/include/asm/mach-ralink/mt7621.h
@@ -0,0 +1,38 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#ifndef _MT7621_REGS_H_
+#define _MT7621_REGS_H_
+
+#define MT7621_PALMBUS_BASE 0x1C000000
+#define MT7621_PALMBUS_SIZE 0x03FFFFFF
+
+#define MT7621_SYSC_BASE 0x1E000000
+
+#define SYSC_REG_CHIP_NAME0 0x00
+#define SYSC_REG_CHIP_NAME1 0x04
+#define SYSC_REG_CHIP_REV 0x0c
+#define SYSC_REG_SYSTEM_CONFIG0 0x10
+#define SYSC_REG_SYSTEM_CONFIG1 0x14
+
+#define CHIP_REV_PKG_MASK 0x1
+#define CHIP_REV_PKG_SHIFT 16
+#define CHIP_REV_VER_MASK 0xf
+#define CHIP_REV_VER_SHIFT 8
+#define CHIP_REV_ECO_MASK 0xf
+
+#define MT7621_DRAM_BASE 0x0
+#define MT7621_DDR2_SIZE_MIN 32
+#define MT7621_DDR2_SIZE_MAX 256
+
+#define MT7621_CHIP_NAME0 0x3637544D
+#define MT7621_CHIP_NAME1 0x20203132
+
+#define MIPS_GIC_IRQ_BASE (MIPS_CPU_IRQ_BASE + 8)
+
+#endif
diff --git a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h
new file mode 100644
index 000000000000..15db1b330fe8
--- /dev/null
+++ b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h
@@ -0,0 +1,65 @@
+/*
+ * Ralink MT7621 specific CPU feature overrides
+ *
+ * Copyright (C) 2008-2009 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
+ * Copyright (C) 2015 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This file was derived from: include/asm-mips/cpu-features.h
+ * Copyright (C) 2003, 2004 Ralf Baechle
+ * Copyright (C) 2004 Maciej W. Rozycki
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+#ifndef _MT7621_CPU_FEATURE_OVERRIDES_H
+#define _MT7621_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_tlb 1
+#define cpu_has_4kex 1
+#define cpu_has_3k_cache 0
+#define cpu_has_4k_cache 1
+#define cpu_has_tx39_cache 0
+#define cpu_has_sb1_cache 0
+#define cpu_has_fpu 0
+#define cpu_has_32fpr 0
+#define cpu_has_counter 1
+#define cpu_has_watch 1
+#define cpu_has_divec 1
+
+#define cpu_has_prefetch 1
+#define cpu_has_ejtag 1
+#define cpu_has_llsc 1
+
+#define cpu_has_mips16 1
+#define cpu_has_mdmx 0
+#define cpu_has_mips3d 0
+#define cpu_has_smartmips 0
+
+#define cpu_has_mips32r1 1
+#define cpu_has_mips32r2 1
+#define cpu_has_mips64r1 0
+#define cpu_has_mips64r2 0
+
+#define cpu_has_dsp 1
+#define cpu_has_dsp2 0
+#define cpu_has_mipsmt 1
+
+#define cpu_has_64bits 0
+#define cpu_has_64bit_zero_reg 0
+#define cpu_has_64bit_gp_regs 0
+#define cpu_has_64bit_addresses 0
+
+#define cpu_dcache_line_size() 32
+#define cpu_icache_line_size() 32
+
+#define cpu_has_dc_aliases 0
+#define cpu_has_vtag_icache 0
+
+#define cpu_has_rixi 0
+#define cpu_has_tlbinv 0
+#define cpu_has_userlocal 1
+
+#endif /* _MT7621_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
index 6516e9da5133..b196825a1de9 100644
--- a/arch/mips/include/asm/mips-cm.h
+++ b/arch/mips/include/asm/mips-cm.h
@@ -243,6 +243,10 @@ BUILD_CM_Cx_R_(tcid_8_priority, 0x80)
#define CM_GCR_BASE_CMDEFTGT_IOCU0 2
#define CM_GCR_BASE_CMDEFTGT_IOCU1 3
+/* GCR_RESET_EXT_BASE register fields */
+#define CM_GCR_RESET_EXT_BASE_EVARESET BIT(31)
+#define CM_GCR_RESET_EXT_BASE_UEB BIT(30)
+
/* GCR_ACCESS register fields */
#define CM_GCR_ACCESS_ACCESSEN_SHF 0
#define CM_GCR_ACCESS_ACCESSEN_MSK (_ULCAST_(0xff) << 0)
diff --git a/arch/mips/include/asm/mips-r2-to-r6-emul.h b/arch/mips/include/asm/mips-r2-to-r6-emul.h
index 4b89f28047f7..1f6ea8352ca9 100644
--- a/arch/mips/include/asm/mips-r2-to-r6-emul.h
+++ b/arch/mips/include/asm/mips-r2-to-r6-emul.h
@@ -52,7 +52,7 @@ do { \
__this_cpu_inc(mipsr2emustats.M); \
err = __get_user(nir, (u32 __user *)regs->cp0_epc); \
if (!err) { \
- if (nir == BREAK_MATH) \
+ if (nir == BREAK_MATH(0)) \
__this_cpu_inc(mipsr2bdemustats.M); \
} \
preempt_enable(); \
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index e43aca183c99..3ad19ad04d8a 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -394,6 +394,8 @@
#define CAUSEF_IV (_ULCAST_(1) << 23)
#define CAUSEB_PCI 26
#define CAUSEF_PCI (_ULCAST_(1) << 26)
+#define CAUSEB_DC 27
+#define CAUSEF_DC (_ULCAST_(1) << 27)
#define CAUSEB_CE 28
#define CAUSEF_CE (_ULCAST_(3) << 28)
#define CAUSEB_TI 30
@@ -402,6 +404,38 @@
#define CAUSEF_BD (_ULCAST_(1) << 31)
/*
+ * Cause.ExcCode trap codes.
+ */
+#define EXCCODE_INT 0 /* Interrupt pending */
+#define EXCCODE_MOD 1 /* TLB modified fault */
+#define EXCCODE_TLBL 2 /* TLB miss on load or ifetch */
+#define EXCCODE_TLBS 3 /* TLB miss on a store */
+#define EXCCODE_ADEL 4 /* Address error on a load or ifetch */
+#define EXCCODE_ADES 5 /* Address error on a store */
+#define EXCCODE_IBE 6 /* Bus error on an ifetch */
+#define EXCCODE_DBE 7 /* Bus error on a load or store */
+#define EXCCODE_SYS 8 /* System call */
+#define EXCCODE_BP 9 /* Breakpoint */
+#define EXCCODE_RI 10 /* Reserved instruction exception */
+#define EXCCODE_CPU 11 /* Coprocessor unusable */
+#define EXCCODE_OV 12 /* Arithmetic overflow */
+#define EXCCODE_TR 13 /* Trap instruction */
+#define EXCCODE_MSAFPE 14 /* MSA floating point exception */
+#define EXCCODE_FPE 15 /* Floating point exception */
+#define EXCCODE_TLBRI 19 /* TLB Read-Inhibit exception */
+#define EXCCODE_TLBXI 20 /* TLB Execution-Inhibit exception */
+#define EXCCODE_MSADIS 21 /* MSA disabled exception */
+#define EXCCODE_MDMX 22 /* MDMX unusable exception */
+#define EXCCODE_WATCH 23 /* Watch address reference */
+#define EXCCODE_MCHECK 24 /* Machine check */
+#define EXCCODE_THREAD 25 /* Thread exceptions (MT) */
+#define EXCCODE_DSPDIS 26 /* DSP disabled exception */
+#define EXCCODE_GE 27 /* Virtualized guest exception (VZ) */
+
+/* Implementation specific trap codes used by MIPS cores */
+#define MIPS_EXCCODE_TLBPAR 16 /* TLB parity error exception */
+
+/*
* Bits in the coprocessor 0 config register.
*/
/* Generic bits. */
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index 2046c0230224..21ed7150fec3 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -33,7 +33,7 @@
#define PAGE_SHIFT 16
#endif
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE - 1))
+#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1))
/*
* This is used for calculating the real page sizes
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 6995b4a02e23..9a4fe0133ff1 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -353,7 +353,7 @@ static inline pte_t pte_mkdirty(pte_t pte)
static inline pte_t pte_mkyoung(pte_t pte)
{
pte_val(pte) |= _PAGE_ACCESSED;
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
if (!(pte_val(pte) & _PAGE_NO_READ))
pte_val(pte) |= _PAGE_SILENT_READ;
else
@@ -542,7 +542,7 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd)
{
pmd_val(pmd) |= _PAGE_ACCESSED;
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
if (!(pmd_val(pmd) & _PAGE_NO_READ))
pmd_val(pmd) |= _PAGE_SILENT_READ;
else
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 9b44d5a816fa..ddea53e3a9bb 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -116,7 +116,8 @@ enum cop_op {
dmtc_op = 0x05, ctc_op = 0x06,
mthc0_op = 0x06, mthc_op = 0x07,
bc_op = 0x08, bc1eqz_op = 0x09,
- bc1nez_op = 0x0d, cop_op = 0x10,
+ mfmc0_op = 0x0b, bc1nez_op = 0x0d,
+ wrpgpr_op = 0x0e, cop_op = 0x10,
copm_op = 0x18
};
@@ -529,7 +530,7 @@ enum MIPS6e_i8_func {
};
/*
- * (microMIPS & MIPS16e) NOP instruction.
+ * (microMIPS) NOP instruction.
*/
#define MM_NOP16 0x0c00
@@ -679,7 +680,7 @@ struct fp0_format { /* FPU multiply and add format (MIPS32) */
;))))))
};
-struct mm_fp0_format { /* FPU multipy and add format (microMIPS) */
+struct mm_fp0_format { /* FPU multiply and add format (microMIPS) */
__BITFIELD_FIELD(unsigned int opcode : 6,
__BITFIELD_FIELD(unsigned int ft : 5,
__BITFIELD_FIELD(unsigned int fs : 5,
@@ -799,6 +800,13 @@ struct mm_x_format { /* Scaled indexed load format (microMIPS) */
;)))))
};
+struct mm_a_format { /* ADDIUPC format (microMIPS) */
+ __BITFIELD_FIELD(unsigned int opcode : 6,
+ __BITFIELD_FIELD(unsigned int rs : 3,
+ __BITFIELD_FIELD(signed int simmediate : 23,
+ ;)))
+};
+
/*
* microMIPS instruction formats (16-bit length)
*/
@@ -940,6 +948,7 @@ union mips_instruction {
struct mm_i_format mm_i_format;
struct mm_m_format mm_m_format;
struct mm_x_format mm_x_format;
+ struct mm_a_format mm_a_format;
struct mm_b0_format mm_b0_format;
struct mm_b1_format mm_b1_format;
struct mm16_m_format mm16_m_format ;
diff --git a/arch/mips/kernel/cpu-bugs64.c b/arch/mips/kernel/cpu-bugs64.c
index 09f4034f239f..6392dbe504fb 100644
--- a/arch/mips/kernel/cpu-bugs64.c
+++ b/arch/mips/kernel/cpu-bugs64.c
@@ -190,7 +190,7 @@ static inline void check_daddi(void)
printk("Checking for the daddi bug... ");
local_irq_save(flags);
- handler = set_except_vector(12, handle_daddi_ov);
+ handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
/*
* The following code fails to trigger an overflow exception
* when executed on R4000 rev. 2.2 or 3.0 (PRId 00000422 or
@@ -214,7 +214,7 @@ static inline void check_daddi(void)
".set pop"
: "=r" (v), "=&r" (tmp)
: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
- set_except_vector(12, handler);
+ set_except_vector(EXCCODE_OV, handler);
local_irq_restore(flags);
if (daddi_ov) {
@@ -225,14 +225,14 @@ static inline void check_daddi(void)
printk("yes, workaround... ");
local_irq_save(flags);
- handler = set_except_vector(12, handle_daddi_ov);
+ handler = set_except_vector(EXCCODE_OV, handle_daddi_ov);
asm volatile(
"addiu %1, $0, %2\n\t"
"dsrl %1, %1, 1\n\t"
"daddi %0, %1, %3"
: "=r" (v), "=&r" (tmp)
: "I" (0xffffffffffffdb9aUL), "I" (0x1234));
- set_except_vector(12, handler);
+ set_except_vector(EXCCODE_OV, handler);
local_irq_restore(flags);
if (daddi_ov) {
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 6b9064499bd3..b725b713b9f8 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -99,6 +99,161 @@ static inline void cpu_set_fpu_fcsr_mask(struct cpuinfo_mips *c)
}
/*
+ * Determine the IEEE 754 NaN encodings and ABS.fmt/NEG.fmt execution modes
+ * supported by FPU hardware.
+ */
+static void cpu_set_fpu_2008(struct cpuinfo_mips *c)
+{
+ if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+ MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+ MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+ unsigned long sr, fir, fcsr, fcsr0, fcsr1;
+
+ sr = read_c0_status();
+ __enable_fpu(FPU_AS_IS);
+
+ fir = read_32bit_cp1_register(CP1_REVISION);
+ if (fir & MIPS_FPIR_HAS2008) {
+ fcsr = read_32bit_cp1_register(CP1_STATUS);
+
+ fcsr0 = fcsr & ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+ write_32bit_cp1_register(CP1_STATUS, fcsr0);
+ fcsr0 = read_32bit_cp1_register(CP1_STATUS);
+
+ fcsr1 = fcsr | FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+ write_32bit_cp1_register(CP1_STATUS, fcsr1);
+ fcsr1 = read_32bit_cp1_register(CP1_STATUS);
+
+ write_32bit_cp1_register(CP1_STATUS, fcsr);
+
+ if (!(fcsr0 & FPU_CSR_NAN2008))
+ c->options |= MIPS_CPU_NAN_LEGACY;
+ if (fcsr1 & FPU_CSR_NAN2008)
+ c->options |= MIPS_CPU_NAN_2008;
+
+ if ((fcsr0 ^ fcsr1) & FPU_CSR_ABS2008)
+ c->fpu_msk31 &= ~FPU_CSR_ABS2008;
+ else
+ c->fpu_csr31 |= fcsr & FPU_CSR_ABS2008;
+
+ if ((fcsr0 ^ fcsr1) & FPU_CSR_NAN2008)
+ c->fpu_msk31 &= ~FPU_CSR_NAN2008;
+ else
+ c->fpu_csr31 |= fcsr & FPU_CSR_NAN2008;
+ } else {
+ c->options |= MIPS_CPU_NAN_LEGACY;
+ }
+
+ write_c0_status(sr);
+ } else {
+ c->options |= MIPS_CPU_NAN_LEGACY;
+ }
+}
+
+/*
+ * IEEE 754 conformance mode to use. Affects the NaN encoding and the
+ * ABS.fmt/NEG.fmt execution mode.
+ */
+static enum { STRICT, LEGACY, STD2008, RELAXED } ieee754 = STRICT;
+
+/*
+ * Set the IEEE 754 NaN encodings and the ABS.fmt/NEG.fmt execution modes
+ * to support by the FPU emulator according to the IEEE 754 conformance
+ * mode selected. Note that "relaxed" straps the emulator so that it
+ * allows 2008-NaN binaries even for legacy processors.
+ */
+static void cpu_set_nofpu_2008(struct cpuinfo_mips *c)
+{
+ c->options &= ~(MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY);
+ c->fpu_csr31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+ c->fpu_msk31 &= ~(FPU_CSR_ABS2008 | FPU_CSR_NAN2008);
+
+ switch (ieee754) {
+ case STRICT:
+ if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 |
+ MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
+ MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
+ c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
+ } else {
+ c->options |= MIPS_CPU_NAN_LEGACY;
+ c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+ }
+ break;
+ case LEGACY:
+ c->options |= MIPS_CPU_NAN_LEGACY;
+ c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+ break;
+ case STD2008:
+ c->options |= MIPS_CPU_NAN_2008;
+ c->fpu_csr31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+ c->fpu_msk31 |= FPU_CSR_ABS2008 | FPU_CSR_NAN2008;
+ break;
+ case RELAXED:
+ c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY;
+ break;
+ }
+}
+
+/*
+ * Override the IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode
+ * according to the "ieee754=" parameter.
+ */
+static void cpu_set_nan_2008(struct cpuinfo_mips *c)
+{
+ switch (ieee754) {
+ case STRICT:
+ mips_use_nan_legacy = !!cpu_has_nan_legacy;
+ mips_use_nan_2008 = !!cpu_has_nan_2008;
+ break;
+ case LEGACY:
+ mips_use_nan_legacy = !!cpu_has_nan_legacy;
+ mips_use_nan_2008 = !cpu_has_nan_legacy;
+ break;
+ case STD2008:
+ mips_use_nan_legacy = !cpu_has_nan_2008;
+ mips_use_nan_2008 = !!cpu_has_nan_2008;
+ break;
+ case RELAXED:
+ mips_use_nan_legacy = true;
+ mips_use_nan_2008 = true;
+ break;
+ }
+}
+
+/*
+ * IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode override
+ * settings:
+ *
+ * strict: accept binaries that request a NaN encoding supported by the FPU
+ * legacy: only accept legacy-NaN binaries
+ * 2008: only accept 2008-NaN binaries
+ * relaxed: accept any binaries regardless of whether supported by the FPU
+ */
+static int __init ieee754_setup(char *s)
+{
+ if (!s)
+ return -1;
+ else if (!strcmp(s, "strict"))
+ ieee754 = STRICT;
+ else if (!strcmp(s, "legacy"))
+ ieee754 = LEGACY;
+ else if (!strcmp(s, "2008"))
+ ieee754 = STD2008;
+ else if (!strcmp(s, "relaxed"))
+ ieee754 = RELAXED;
+ else
+ return -1;
+
+ if (!(boot_cpu_data.options & MIPS_CPU_FPU))
+ cpu_set_nofpu_2008(&boot_cpu_data);
+ cpu_set_nan_2008(&boot_cpu_data);
+
+ return 0;
+}
+
+early_param("ieee754", ieee754_setup);
+
+/*
* Set the FIR feature flags for the FPU emulator.
*/
static void cpu_set_nofpu_id(struct cpuinfo_mips *c)
@@ -113,6 +268,8 @@ static void cpu_set_nofpu_id(struct cpuinfo_mips *c)
if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 |
MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))
value |= MIPS_FPIR_F64 | MIPS_FPIR_L | MIPS_FPIR_W;
+ if (c->options & MIPS_CPU_NAN_2008)
+ value |= MIPS_FPIR_HAS2008;
c->fpu_id = value;
}
@@ -137,6 +294,8 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
}
cpu_set_fpu_fcsr_mask(c);
+ cpu_set_fpu_2008(c);
+ cpu_set_nan_2008(c);
}
/*
@@ -147,6 +306,8 @@ static void cpu_set_nofpu_opts(struct cpuinfo_mips *c)
c->options &= ~MIPS_CPU_FPU;
c->fpu_msk31 = mips_nofpu_msk31;
+ cpu_set_nofpu_2008(c);
+ cpu_set_nan_2008(c);
cpu_set_nofpu_id(c);
}
diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c
index 4a4d9e067c89..c3c234dc0c07 100644
--- a/arch/mips/kernel/elf.c
+++ b/arch/mips/kernel/elf.c
@@ -11,6 +11,12 @@
#include <linux/elf.h>
#include <linux/sched.h>
+#include <asm/cpu-info.h>
+
+/* Whether to accept legacy-NaN and 2008-NaN user binaries. */
+bool mips_use_nan_legacy;
+bool mips_use_nan_2008;
+
/* FPU modes */
enum {
FP_FRE,
@@ -68,15 +74,23 @@ static struct mode_req none_req = { true, true, false, true, true };
int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
bool is_interp, struct arch_elf_state *state)
{
- struct elf32_hdr *ehdr32 = _ehdr;
+ union {
+ struct elf32_hdr e32;
+ struct elf64_hdr e64;
+ } *ehdr = _ehdr;
struct elf32_phdr *phdr32 = _phdr;
struct elf64_phdr *phdr64 = _phdr;
struct mips_elf_abiflags_v0 abiflags;
+ bool elf32;
+ u32 flags;
int ret;
+ elf32 = ehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+ flags = elf32 ? ehdr->e32.e_flags : ehdr->e64.e_flags;
+
/* Lets see if this is an O32 ELF */
- if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) {
- if (ehdr32->e_flags & EF_MIPS_FP64) {
+ if (elf32) {
+ if (flags & EF_MIPS_FP64) {
/*
* Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it
* later if needed
@@ -120,13 +134,50 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf,
return 0;
}
-int arch_check_elf(void *_ehdr, bool has_interpreter,
+int arch_check_elf(void *_ehdr, bool has_interpreter, void *_interp_ehdr,
struct arch_elf_state *state)
{
- struct elf32_hdr *ehdr = _ehdr;
+ union {
+ struct elf32_hdr e32;
+ struct elf64_hdr e64;
+ } *ehdr = _ehdr;
+ union {
+ struct elf32_hdr e32;
+ struct elf64_hdr e64;
+ } *iehdr = _interp_ehdr;
struct mode_req prog_req, interp_req;
int fp_abi, interp_fp_abi, abi0, abi1, max_abi;
- bool is_mips64;
+ bool elf32;
+ u32 flags;
+
+ elf32 = ehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+ flags = elf32 ? ehdr->e32.e_flags : ehdr->e64.e_flags;
+
+ /*
+ * Determine the NaN personality, reject the binary if not allowed.
+ * Also ensure that any interpreter matches the executable.
+ */
+ if (flags & EF_MIPS_NAN2008) {
+ if (mips_use_nan_2008)
+ state->nan_2008 = 1;
+ else
+ return -ENOEXEC;
+ } else {
+ if (mips_use_nan_legacy)
+ state->nan_2008 = 0;
+ else
+ return -ENOEXEC;
+ }
+ if (has_interpreter) {
+ bool ielf32;
+ u32 iflags;
+
+ ielf32 = iehdr->e32.e_ident[EI_CLASS] == ELFCLASS32;
+ iflags = ielf32 ? iehdr->e32.e_flags : iehdr->e64.e_flags;
+
+ if ((flags ^ iflags) & EF_MIPS_NAN2008)
+ return -ELIBBAD;
+ }
if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT))
return 0;
@@ -142,21 +193,18 @@ int arch_check_elf(void *_ehdr, bool has_interpreter,
abi0 = abi1 = fp_abi;
}
- is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) ||
- (ehdr->e_flags & EF_MIPS_ABI2);
+ if (elf32 && !(flags & EF_MIPS_ABI2)) {
+ /* Default to a mode capable of running code expecting FR=0 */
+ state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
- if (is_mips64) {
+ /* Allow all ABIs we know about */
+ max_abi = MIPS_ABI_FP_64A;
+ } else {
/* MIPS64 code always uses FR=1, thus the default is easy */
state->overall_fp_mode = FP_FR1;
/* Disallow access to the various FPXX & FP64 ABIs */
max_abi = MIPS_ABI_FP_SOFT;
- } else {
- /* Default to a mode capable of running code expecting FR=0 */
- state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0;
-
- /* Allow all ABIs we know about */
- max_abi = MIPS_ABI_FP_64A;
}
if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) ||
@@ -254,3 +302,27 @@ void mips_set_personality_fp(struct arch_elf_state *state)
BUG();
}
}
+
+/*
+ * Select the IEEE 754 NaN encoding and ABS.fmt/NEG.fmt execution mode
+ * in FCSR according to the ELF NaN personality.
+ */
+void mips_set_personality_nan(struct arch_elf_state *state)
+{
+ struct cpuinfo_mips *c = &boot_cpu_data;
+ struct task_struct *t = current;
+
+ t->thread.fpu.fcr31 = c->fpu_csr31;
+ switch (state->nan_2008) {
+ case 0:
+ break;
+ case 1:
+ if (!(c->fpu_msk31 & FPU_CSR_NAN2008))
+ t->thread.fpu.fcr31 |= FPU_CSR_NAN2008;
+ if (!(c->fpu_msk31 & FPU_CSR_ABS2008))
+ t->thread.fpu.fcr31 |= FPU_CSR_ABS2008;
+ break;
+ default:
+ BUG();
+ }
+}
diff --git a/arch/mips/kernel/gpio_txx9.c b/arch/mips/kernel/gpio_txx9.c
index c6854d9df926..705be43c3533 100644
--- a/arch/mips/kernel/gpio_txx9.c
+++ b/arch/mips/kernel/gpio_txx9.c
@@ -21,7 +21,7 @@ static struct txx9_pio_reg __iomem *txx9_pioptr;
static int txx9_gpio_get(struct gpio_chip *chip, unsigned int offset)
{
- return __raw_readl(&txx9_pioptr->din) & (1 << offset);
+ return !!(__raw_readl(&txx9_pioptr->din) & (1 << offset));
}
static void txx9_gpio_set_raw(unsigned int offset, int value)
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 4f0ac78d17f1..a5279b2f3198 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -548,9 +548,6 @@ static const struct pt_regs_offset regoffset_table[] = {
REG_OFFSET_NAME(c0_badvaddr, cp0_badvaddr),
REG_OFFSET_NAME(c0_cause, cp0_cause),
REG_OFFSET_NAME(c0_epc, cp0_epc),
-#ifdef CONFIG_MIPS_MT_SMTC
- REG_OFFSET_NAME(c0_tcstatus, cp0_tcstatus),
-#endif
#ifdef CONFIG_CPU_CAVIUM_OCTEON
REG_OFFSET_NAME(mpl0, mpl[0]),
REG_OFFSET_NAME(mpl1, mpl[1]),
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 66aac55df349..569a7d5242dd 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -623,7 +623,7 @@ static void __init request_crashkernel(struct resource *res)
#define USE_PROM_CMDLINE IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER)
#define USE_DTB_CMDLINE IS_ENABLED(CONFIG_MIPS_CMDLINE_FROM_DTB)
-#define EXTEND_WITH_PROM IS_ENABLED(CONFIG_MIPS_CMDLINE_EXTEND)
+#define EXTEND_WITH_PROM IS_ENABLED(CONFIG_MIPS_CMDLINE_DTB_EXTEND)
static void __init arch_mem_init(char **cmdline_p)
{
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index e04c8057b882..2ad4e4c96d61 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -202,6 +202,9 @@ static void boot_core(unsigned core)
/* Ensure its coherency is disabled */
write_gcr_co_coherence(0);
+ /* Start it with the legacy memory map and exception base */
+ write_gcr_co_reset_ext_base(CM_GCR_RESET_EXT_BASE_UEB);
+
/* Ensure the core can access the GCRs */
access = read_gcr_access();
access |= 1 << (CM_GCR_ACCESS_ACCESSEN_SHF + core);
diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c
index 2242bdd4370e..4472a7f98577 100644
--- a/arch/mips/kernel/sync-r4k.c
+++ b/arch/mips/kernel/sync-r4k.c
@@ -17,35 +17,23 @@
#include <asm/barrier.h>
#include <asm/mipsregs.h>
-static atomic_t count_start_flag = ATOMIC_INIT(0);
+static unsigned int initcount = 0;
static atomic_t count_count_start = ATOMIC_INIT(0);
static atomic_t count_count_stop = ATOMIC_INIT(0);
-static atomic_t count_reference = ATOMIC_INIT(0);
#define COUNTON 100
-#define NR_LOOPS 5
+#define NR_LOOPS 3
void synchronise_count_master(int cpu)
{
int i;
unsigned long flags;
- unsigned int initcount;
printk(KERN_INFO "Synchronize counters for CPU %u: ", cpu);
local_irq_save(flags);
/*
- * Notify the slaves that it's time to start
- */
- atomic_set(&count_reference, read_c0_count());
- atomic_set(&count_start_flag, cpu);
- smp_wmb();
-
- /* Count will be initialised to current timer for all CPU's */
- initcount = read_c0_count();
-
- /*
* We loop a few times to get a primed instruction cache,
* then the last pass is more or less synchronised and
* the master and slaves each set their cycle counters to a known
@@ -63,9 +51,13 @@ void synchronise_count_master(int cpu)
atomic_set(&count_count_stop, 0);
smp_wmb();
- /* this lets the slaves write their count register */
+ /* Let the slave writes its count register */
atomic_inc(&count_count_start);
+ /* Count will be initialised to current timer */
+ if (i == 1)
+ initcount = read_c0_count();
+
/*
* Everyone initialises count in the last loop:
*/
@@ -73,7 +65,7 @@ void synchronise_count_master(int cpu)
write_c0_count(initcount);
/*
- * Wait for all slaves to leave the synchronization point:
+ * Wait for slave to leave the synchronization point:
*/
while (atomic_read(&count_count_stop) != 1)
mb();
@@ -83,7 +75,6 @@ void synchronise_count_master(int cpu)
}
/* Arrange for an interrupt in a short while */
write_c0_compare(read_c0_count() + COUNTON);
- atomic_set(&count_start_flag, 0);
local_irq_restore(flags);
@@ -98,19 +89,12 @@ void synchronise_count_master(int cpu)
void synchronise_count_slave(int cpu)
{
int i;
- unsigned int initcount;
/*
* Not every cpu is online at the time this gets called,
* so we first wait for the master to say everyone is ready
*/
- while (atomic_read(&count_start_flag) != cpu)
- mb();
-
- /* Count will be initialised to next expire for all CPU's */
- initcount = atomic_read(&count_reference);
-
for (i = 0; i < NR_LOOPS; i++) {
atomic_inc(&count_count_start);
while (atomic_read(&count_count_start) != 2)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 886cb1976e90..bafcb7ad5c85 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2250,7 +2250,7 @@ void __init trap_init(void)
* Only some CPUs have the watch exceptions.
*/
if (cpu_has_watch)
- set_except_vector(23, handle_watch);
+ set_except_vector(EXCCODE_WATCH, handle_watch);
/*
* Initialise interrupt handlers
@@ -2277,27 +2277,27 @@ void __init trap_init(void)
if (board_be_init)
board_be_init();
- set_except_vector(0, using_rollback_handler() ? rollback_handle_int
- : handle_int);
- set_except_vector(1, handle_tlbm);
- set_except_vector(2, handle_tlbl);
- set_except_vector(3, handle_tlbs);
+ set_except_vector(EXCCODE_INT, using_rollback_handler() ?
+ rollback_handle_int : handle_int);
+ set_except_vector(EXCCODE_MOD, handle_tlbm);
+ set_except_vector(EXCCODE_TLBL, handle_tlbl);
+ set_except_vector(EXCCODE_TLBS, handle_tlbs);
- set_except_vector(4, handle_adel);
- set_except_vector(5, handle_ades);
+ set_except_vector(EXCCODE_ADEL, handle_adel);
+ set_except_vector(EXCCODE_ADES, handle_ades);
- set_except_vector(6, handle_ibe);
- set_except_vector(7, handle_dbe);
+ set_except_vector(EXCCODE_IBE, handle_ibe);
+ set_except_vector(EXCCODE_DBE, handle_dbe);
- set_except_vector(8, handle_sys);
- set_except_vector(9, handle_bp);
- set_except_vector(10, rdhwr_noopt ? handle_ri :
+ set_except_vector(EXCCODE_SYS, handle_sys);
+ set_except_vector(EXCCODE_BP, handle_bp);
+ set_except_vector(EXCCODE_RI, rdhwr_noopt ? handle_ri :
(cpu_has_vtag_icache ?
handle_ri_rdhwr_vivt : handle_ri_rdhwr));
- set_except_vector(11, handle_cpu);
- set_except_vector(12, handle_ov);
- set_except_vector(13, handle_tr);
- set_except_vector(14, handle_msa_fpe);
+ set_except_vector(EXCCODE_CPU, handle_cpu);
+ set_except_vector(EXCCODE_OV, handle_ov);
+ set_except_vector(EXCCODE_TR, handle_tr);
+ set_except_vector(EXCCODE_MSAFPE, handle_msa_fpe);
if (current_cpu_type() == CPU_R6000 ||
current_cpu_type() == CPU_R6000A) {
@@ -2318,25 +2318,25 @@ void __init trap_init(void)
board_nmi_handler_setup();
if (cpu_has_fpu && !cpu_has_nofpuex)
- set_except_vector(15, handle_fpe);
+ set_except_vector(EXCCODE_FPE, handle_fpe);
- set_except_vector(16, handle_ftlb);
+ set_except_vector(MIPS_EXCCODE_TLBPAR, handle_ftlb);
if (cpu_has_rixiex) {
- set_except_vector(19, tlb_do_page_fault_0);
- set_except_vector(20, tlb_do_page_fault_0);
+ set_except_vector(EXCCODE_TLBRI, tlb_do_page_fault_0);
+ set_except_vector(EXCCODE_TLBXI, tlb_do_page_fault_0);
}
- set_except_vector(21, handle_msa);
- set_except_vector(22, handle_mdmx);
+ set_except_vector(EXCCODE_MSADIS, handle_msa);
+ set_except_vector(EXCCODE_MDMX, handle_mdmx);
if (cpu_has_mcheck)
- set_except_vector(24, handle_mcheck);
+ set_except_vector(EXCCODE_MCHECK, handle_mcheck);
if (cpu_has_mipsmt)
- set_except_vector(25, handle_mt);
+ set_except_vector(EXCCODE_THREAD, handle_mt);
- set_except_vector(26, handle_dsp);
+ set_except_vector(EXCCODE_DSPDIS, handle_dsp);
if (board_cache_error_setup)
board_cache_error_setup();
diff --git a/arch/mips/kvm/callback.c b/arch/mips/kvm/callback.c
index 313c2e37b978..d88aa2173fb0 100644
--- a/arch/mips/kvm/callback.c
+++ b/arch/mips/kvm/callback.c
@@ -11,4 +11,4 @@
#include <linux/kvm_host.h>
struct kvm_mips_callbacks *kvm_mips_callbacks;
-EXPORT_SYMBOL(kvm_mips_callbacks);
+EXPORT_SYMBOL_GPL(kvm_mips_callbacks);
diff --git a/arch/mips/kvm/dyntrans.c b/arch/mips/kvm/dyntrans.c
index 521121bdebff..f1527a465c1b 100644
--- a/arch/mips/kvm/dyntrans.c
+++ b/arch/mips/kvm/dyntrans.c
@@ -86,10 +86,8 @@ int kvm_mips_trans_mfc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
} else {
mfc0_inst = LW_TEMPLATE;
mfc0_inst |= ((rt & 0x1f) << 16);
- mfc0_inst |=
- offsetof(struct mips_coproc,
- reg[rd][sel]) + offsetof(struct kvm_mips_commpage,
- cop0);
+ mfc0_inst |= offsetof(struct kvm_mips_commpage,
+ cop0.reg[rd][sel]);
}
if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
@@ -123,9 +121,7 @@ int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu)
sel = inst & 0x7;
mtc0_inst |= ((rt & 0x1f) << 16);
- mtc0_inst |=
- offsetof(struct mips_coproc,
- reg[rd][sel]) + offsetof(struct kvm_mips_commpage, cop0);
+ mtc0_inst |= offsetof(struct kvm_mips_commpage, cop0.reg[rd][sel]);
if (KVM_GUEST_KSEGX(opc) == KVM_GUEST_KSEG0) {
kseg0_opc =
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 1b675c7ce89f..b37954cc880d 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -20,6 +20,7 @@
#include <linux/random.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
+#include <asm/cacheops.h>
#include <asm/cpu-info.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -29,7 +30,6 @@
#include <asm/r4kcache.h>
#define CONFIG_MIPS_MT
-#include "opcode.h"
#include "interrupt.h"
#include "commpage.h"
@@ -1239,21 +1239,20 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
er = EMULATE_FAIL;
break;
- case mfmcz_op:
+ case mfmc0_op:
#ifdef KVM_MIPS_DEBUG_COP0_COUNTERS
cop0->stat[MIPS_CP0_STATUS][0]++;
#endif
- if (rt != 0) {
+ if (rt != 0)
vcpu->arch.gprs[rt] =
kvm_read_c0_guest_status(cop0);
- }
/* EI */
if (inst & 0x20) {
- kvm_debug("[%#lx] mfmcz_op: EI\n",
+ kvm_debug("[%#lx] mfmc0_op: EI\n",
vcpu->arch.pc);
kvm_set_c0_guest_status(cop0, ST0_IE);
} else {
- kvm_debug("[%#lx] mfmcz_op: DI\n",
+ kvm_debug("[%#lx] mfmc0_op: DI\n",
vcpu->arch.pc);
kvm_clear_c0_guest_status(cop0, ST0_IE);
}
@@ -1545,19 +1544,6 @@ int kvm_mips_sync_icache(unsigned long va, struct kvm_vcpu *vcpu)
return 0;
}
-#define MIPS_CACHE_OP_INDEX_INV 0x0
-#define MIPS_CACHE_OP_INDEX_LD_TAG 0x1
-#define MIPS_CACHE_OP_INDEX_ST_TAG 0x2
-#define MIPS_CACHE_OP_IMP 0x3
-#define MIPS_CACHE_OP_HIT_INV 0x4
-#define MIPS_CACHE_OP_FILL_WB_INV 0x5
-#define MIPS_CACHE_OP_HIT_HB 0x6
-#define MIPS_CACHE_OP_FETCH_LOCK 0x7
-
-#define MIPS_CACHE_ICACHE 0x0
-#define MIPS_CACHE_DCACHE 0x1
-#define MIPS_CACHE_SEC 0x3
-
enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
uint32_t cause,
struct kvm_run *run,
@@ -1582,8 +1568,8 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
base = (inst >> 21) & 0x1f;
op_inst = (inst >> 16) & 0x1f;
offset = (int16_t)inst;
- cache = (inst >> 16) & 0x3;
- op = (inst >> 18) & 0x7;
+ cache = op_inst & CacheOp_Cache;
+ op = op_inst & CacheOp_Op;
va = arch->gprs[base] + offset;
@@ -1595,14 +1581,14 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
* invalidate the caches entirely by stepping through all the
* ways/indexes
*/
- if (op == MIPS_CACHE_OP_INDEX_INV) {
+ if (op == Index_Writeback_Inv) {
kvm_debug("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base,
arch->gprs[base], offset);
- if (cache == MIPS_CACHE_DCACHE)
+ if (cache == Cache_D)
r4k_blast_dcache();
- else if (cache == MIPS_CACHE_ICACHE)
+ else if (cache == Cache_I)
r4k_blast_icache();
else {
kvm_err("%s: unsupported CACHE INDEX operation\n",
@@ -1675,9 +1661,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
skip_fault:
/* XXXKYMA: Only a subset of cache ops are supported, used by Linux */
- if (cache == MIPS_CACHE_DCACHE
- && (op == MIPS_CACHE_OP_FILL_WB_INV
- || op == MIPS_CACHE_OP_HIT_INV)) {
+ if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) {
flush_dcache_line(va);
#ifdef CONFIG_KVM_MIPS_DYN_TRANS
@@ -1687,7 +1671,7 @@ skip_fault:
*/
kvm_mips_trans_cache_va(inst, opc, vcpu);
#endif
- } else if (op == MIPS_CACHE_OP_HIT_INV && cache == MIPS_CACHE_ICACHE) {
+ } else if (op_inst == Hit_Invalidate_I) {
flush_dcache_line(va);
flush_icache_line(va);
@@ -1781,7 +1765,7 @@ enum emulation_result kvm_mips_emulate_syscall(unsigned long cause,
kvm_debug("Delivering SYSCALL @ pc %#lx\n", arch->pc);
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_SYSCALL << CAUSEB_EXCCODE));
+ (EXCCODE_SYS << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -1828,7 +1812,7 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(unsigned long cause,
}
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_TLB_LD_MISS << CAUSEB_EXCCODE));
+ (EXCCODE_TLBL << CAUSEB_EXCCODE));
/* setup badvaddr, context and entryhi registers for the guest */
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1874,7 +1858,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(unsigned long cause,
}
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_TLB_LD_MISS << CAUSEB_EXCCODE));
+ (EXCCODE_TLBL << CAUSEB_EXCCODE));
/* setup badvaddr, context and entryhi registers for the guest */
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1918,7 +1902,7 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(unsigned long cause,
}
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_TLB_ST_MISS << CAUSEB_EXCCODE));
+ (EXCCODE_TLBS << CAUSEB_EXCCODE));
/* setup badvaddr, context and entryhi registers for the guest */
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -1962,7 +1946,7 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(unsigned long cause,
}
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_TLB_ST_MISS << CAUSEB_EXCCODE));
+ (EXCCODE_TLBS << CAUSEB_EXCCODE));
/* setup badvaddr, context and entryhi registers for the guest */
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -2033,7 +2017,8 @@ enum emulation_result kvm_mips_emulate_tlbmod(unsigned long cause,
arch->pc = KVM_GUEST_KSEG0 + 0x180;
}
- kvm_change_c0_guest_cause(cop0, (0xff), (T_TLB_MOD << CAUSEB_EXCCODE));
+ kvm_change_c0_guest_cause(cop0, (0xff),
+ (EXCCODE_MOD << CAUSEB_EXCCODE));
/* setup badvaddr, context and entryhi registers for the guest */
kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr);
@@ -2068,7 +2053,7 @@ enum emulation_result kvm_mips_emulate_fpu_exc(unsigned long cause,
arch->pc = KVM_GUEST_KSEG0 + 0x180;
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_COP_UNUSABLE << CAUSEB_EXCCODE));
+ (EXCCODE_CPU << CAUSEB_EXCCODE));
kvm_change_c0_guest_cause(cop0, (CAUSEF_CE), (0x1 << CAUSEB_CE));
return EMULATE_DONE;
@@ -2096,7 +2081,7 @@ enum emulation_result kvm_mips_emulate_ri_exc(unsigned long cause,
kvm_debug("Delivering RI @ pc %#lx\n", arch->pc);
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_RES_INST << CAUSEB_EXCCODE));
+ (EXCCODE_RI << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2131,7 +2116,7 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
kvm_debug("Delivering BP @ pc %#lx\n", arch->pc);
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_BREAK << CAUSEB_EXCCODE));
+ (EXCCODE_BP << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2166,7 +2151,7 @@ enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc);
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_TRAP << CAUSEB_EXCCODE));
+ (EXCCODE_TR << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2201,7 +2186,7 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc);
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_MSAFPE << CAUSEB_EXCCODE));
+ (EXCCODE_MSAFPE << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2236,7 +2221,7 @@ enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc);
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_FPE << CAUSEB_EXCCODE));
+ (EXCCODE_FPE << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2271,7 +2256,7 @@ enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc);
kvm_change_c0_guest_cause(cop0, (0xff),
- (T_MSADIS << CAUSEB_EXCCODE));
+ (EXCCODE_MSADIS << CAUSEB_EXCCODE));
/* Set PC to the exception entry point */
arch->pc = KVM_GUEST_KSEG0 + 0x180;
@@ -2480,25 +2465,25 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
if (usermode) {
switch (exccode) {
- case T_INT:
- case T_SYSCALL:
- case T_BREAK:
- case T_RES_INST:
- case T_TRAP:
- case T_MSAFPE:
- case T_FPE:
- case T_MSADIS:
+ case EXCCODE_INT:
+ case EXCCODE_SYS:
+ case EXCCODE_BP:
+ case EXCCODE_RI:
+ case EXCCODE_TR:
+ case EXCCODE_MSAFPE:
+ case EXCCODE_FPE:
+ case EXCCODE_MSADIS:
break;
- case T_COP_UNUSABLE:
+ case EXCCODE_CPU:
if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 0)
er = EMULATE_PRIV_FAIL;
break;
- case T_TLB_MOD:
+ case EXCCODE_MOD:
break;
- case T_TLB_LD_MISS:
+ case EXCCODE_TLBL:
/*
* We we are accessing Guest kernel space, then send an
* address error exception to the guest
@@ -2507,12 +2492,12 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
kvm_debug("%s: LD MISS @ %#lx\n", __func__,
badvaddr);
cause &= ~0xff;
- cause |= (T_ADDR_ERR_LD << CAUSEB_EXCCODE);
+ cause |= (EXCCODE_ADEL << CAUSEB_EXCCODE);
er = EMULATE_PRIV_FAIL;
}
break;
- case T_TLB_ST_MISS:
+ case EXCCODE_TLBS:
/*
* We we are accessing Guest kernel space, then send an
* address error exception to the guest
@@ -2521,26 +2506,26 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
kvm_debug("%s: ST MISS @ %#lx\n", __func__,
badvaddr);
cause &= ~0xff;
- cause |= (T_ADDR_ERR_ST << CAUSEB_EXCCODE);
+ cause |= (EXCCODE_ADES << CAUSEB_EXCCODE);
er = EMULATE_PRIV_FAIL;
}
break;
- case T_ADDR_ERR_ST:
+ case EXCCODE_ADES:
kvm_debug("%s: address error ST @ %#lx\n", __func__,
badvaddr);
if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) {
cause &= ~0xff;
- cause |= (T_TLB_ST_MISS << CAUSEB_EXCCODE);
+ cause |= (EXCCODE_TLBS << CAUSEB_EXCCODE);
}
er = EMULATE_PRIV_FAIL;
break;
- case T_ADDR_ERR_LD:
+ case EXCCODE_ADEL:
kvm_debug("%s: address error LD @ %#lx\n", __func__,
badvaddr);
if ((badvaddr & PAGE_MASK) == KVM_GUEST_COMMPAGE_ADDR) {
cause &= ~0xff;
- cause |= (T_TLB_LD_MISS << CAUSEB_EXCCODE);
+ cause |= (EXCCODE_TLBL << CAUSEB_EXCCODE);
}
er = EMULATE_PRIV_FAIL;
break;
@@ -2583,13 +2568,12 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
* an entry into the guest TLB.
*/
index = kvm_mips_guest_tlb_lookup(vcpu,
- (va & VPN2_MASK) |
- (kvm_read_c0_guest_entryhi
- (vcpu->arch.cop0) & ASID_MASK));
+ (va & VPN2_MASK) |
+ (kvm_read_c0_guest_entryhi(vcpu->arch.cop0) & ASID_MASK));
if (index < 0) {
- if (exccode == T_TLB_LD_MISS) {
+ if (exccode == EXCCODE_TLBL) {
er = kvm_mips_emulate_tlbmiss_ld(cause, opc, run, vcpu);
- } else if (exccode == T_TLB_ST_MISS) {
+ } else if (exccode == EXCCODE_TLBS) {
er = kvm_mips_emulate_tlbmiss_st(cause, opc, run, vcpu);
} else {
kvm_err("%s: invalid exc code: %d\n", __func__,
@@ -2604,10 +2588,10 @@ enum emulation_result kvm_mips_handle_tlbmiss(unsigned long cause,
* exception to the guest
*/
if (!TLB_IS_VALID(*tlb, va)) {
- if (exccode == T_TLB_LD_MISS) {
+ if (exccode == EXCCODE_TLBL) {
er = kvm_mips_emulate_tlbinv_ld(cause, opc, run,
vcpu);
- } else if (exccode == T_TLB_ST_MISS) {
+ } else if (exccode == EXCCODE_TLBS) {
er = kvm_mips_emulate_tlbinv_st(cause, opc, run,
vcpu);
} else {
diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c
index 9b4445940c2b..95f790663b0c 100644
--- a/arch/mips/kvm/interrupt.c
+++ b/arch/mips/kvm/interrupt.c
@@ -128,7 +128,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
&& (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
&& (kvm_read_c0_guest_status(cop0) & IE_IRQ5)) {
allowed = 1;
- exccode = T_INT;
+ exccode = EXCCODE_INT;
}
break;
@@ -137,7 +137,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
&& (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
&& (kvm_read_c0_guest_status(cop0) & IE_IRQ0)) {
allowed = 1;
- exccode = T_INT;
+ exccode = EXCCODE_INT;
}
break;
@@ -146,7 +146,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
&& (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
&& (kvm_read_c0_guest_status(cop0) & IE_IRQ1)) {
allowed = 1;
- exccode = T_INT;
+ exccode = EXCCODE_INT;
}
break;
@@ -155,7 +155,7 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
&& (!(kvm_read_c0_guest_status(cop0) & (ST0_EXL | ST0_ERL)))
&& (kvm_read_c0_guest_status(cop0) & IE_IRQ2)) {
allowed = 1;
- exccode = T_INT;
+ exccode = EXCCODE_INT;
}
break;
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
index 7e2210846b8b..81687ab1b523 100644
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -335,7 +335,7 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
/* Now restore the host state just enough to run the handlers */
- /* Swtich EBASE to the one used by Linux */
+ /* Switch EBASE to the one used by Linux */
/* load up the host EBASE */
mfc0 v0, CP0_STATUS
@@ -490,11 +490,11 @@ __kvm_mips_return_to_guest:
REG_ADDU t3, t1, t2
LONG_L k0, (t3)
andi k0, k0, 0xff
- mtc0 k0,CP0_ENTRYHI
+ mtc0 k0, CP0_ENTRYHI
ehb
/* Disable RDHWR access */
- mtc0 zero, CP0_HWRENA
+ mtc0 zero, CP0_HWRENA
/* load the guest context from VCPU and return */
LONG_L $0, VCPU_R0(k1)
@@ -606,11 +606,11 @@ __kvm_mips_return_to_host:
/* Restore RDHWR access */
PTR_LI k0, 0x2000000F
- mtc0 k0, CP0_HWRENA
+ mtc0 k0, CP0_HWRENA
/* Restore RA, which is the address we will return to */
- LONG_L ra, PT_R31(k1)
- j ra
+ LONG_L ra, PT_R31(k1)
+ j ra
nop
VECTOR_END(MIPSX(GuestExceptionEnd))
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index b9b803facdbf..8bc3977576e6 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -229,7 +229,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
kzalloc(npages * sizeof(unsigned long), GFP_KERNEL);
if (!kvm->arch.guest_pmap) {
- kvm_err("Failed to allocate guest PMAP");
+ kvm_err("Failed to allocate guest PMAP\n");
return;
}
@@ -1264,8 +1264,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
switch (exccode) {
- case T_INT:
- kvm_debug("[%d]T_INT @ %p\n", vcpu->vcpu_id, opc);
+ case EXCCODE_INT:
+ kvm_debug("[%d]EXCCODE_INT @ %p\n", vcpu->vcpu_id, opc);
++vcpu->stat.int_exits;
trace_kvm_exit(vcpu, INT_EXITS);
@@ -1276,8 +1276,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
ret = RESUME_GUEST;
break;
- case T_COP_UNUSABLE:
- kvm_debug("T_COP_UNUSABLE: @ PC: %p\n", opc);
+ case EXCCODE_CPU:
+ kvm_debug("EXCCODE_CPU: @ PC: %p\n", opc);
++vcpu->stat.cop_unusable_exits;
trace_kvm_exit(vcpu, COP_UNUSABLE_EXITS);
@@ -1287,13 +1287,13 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
ret = RESUME_HOST;
break;
- case T_TLB_MOD:
+ case EXCCODE_MOD:
++vcpu->stat.tlbmod_exits;
trace_kvm_exit(vcpu, TLBMOD_EXITS);
ret = kvm_mips_callbacks->handle_tlb_mod(vcpu);
break;
- case T_TLB_ST_MISS:
+ case EXCCODE_TLBS:
kvm_debug("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n",
cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc,
badvaddr);
@@ -1303,7 +1303,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
ret = kvm_mips_callbacks->handle_tlb_st_miss(vcpu);
break;
- case T_TLB_LD_MISS:
+ case EXCCODE_TLBL:
kvm_debug("TLB LD fault: cause %#x, PC: %p, BadVaddr: %#lx\n",
cause, opc, badvaddr);
@@ -1312,55 +1312,55 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
ret = kvm_mips_callbacks->handle_tlb_ld_miss(vcpu);
break;
- case T_ADDR_ERR_ST:
+ case EXCCODE_ADES:
++vcpu->stat.addrerr_st_exits;
trace_kvm_exit(vcpu, ADDRERR_ST_EXITS);
ret = kvm_mips_callbacks->handle_addr_err_st(vcpu);
break;
- case T_ADDR_ERR_LD:
+ case EXCCODE_ADEL:
++vcpu->stat.addrerr_ld_exits;
trace_kvm_exit(vcpu, ADDRERR_LD_EXITS);
ret = kvm_mips_callbacks->handle_addr_err_ld(vcpu);
break;
- case T_SYSCALL:
+ case EXCCODE_SYS:
++vcpu->stat.syscall_exits;
trace_kvm_exit(vcpu, SYSCALL_EXITS);
ret = kvm_mips_callbacks->handle_syscall(vcpu);
break;
- case T_RES_INST:
+ case EXCCODE_RI:
++vcpu->stat.resvd_inst_exits;
trace_kvm_exit(vcpu, RESVD_INST_EXITS);
ret = kvm_mips_callbacks->handle_res_inst(vcpu);
break;
- case T_BREAK:
+ case EXCCODE_BP:
++vcpu->stat.break_inst_exits;
trace_kvm_exit(vcpu, BREAK_INST_EXITS);
ret = kvm_mips_callbacks->handle_break(vcpu);
break;
- case T_TRAP:
+ case EXCCODE_TR:
++vcpu->stat.trap_inst_exits;
trace_kvm_exit(vcpu, TRAP_INST_EXITS);
ret = kvm_mips_callbacks->handle_trap(vcpu);
break;
- case T_MSAFPE:
+ case EXCCODE_MSAFPE:
++vcpu->stat.msa_fpe_exits;
trace_kvm_exit(vcpu, MSA_FPE_EXITS);
ret = kvm_mips_callbacks->handle_msa_fpe(vcpu);
break;
- case T_FPE:
+ case EXCCODE_FPE:
++vcpu->stat.fpe_exits;
trace_kvm_exit(vcpu, FPE_EXITS);
ret = kvm_mips_callbacks->handle_fpe(vcpu);
break;
- case T_MSADIS:
+ case EXCCODE_MSADIS:
++vcpu->stat.msa_disabled_exits;
trace_kvm_exit(vcpu, MSA_DISABLED_EXITS);
ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
@@ -1620,7 +1620,7 @@ static struct notifier_block kvm_mips_csr_die_notifier = {
.notifier_call = kvm_mips_csr_die_notify,
};
-int __init kvm_mips_init(void)
+static int __init kvm_mips_init(void)
{
int ret;
@@ -1646,7 +1646,7 @@ int __init kvm_mips_init(void)
return 0;
}
-void __exit kvm_mips_exit(void)
+static void __exit kvm_mips_exit(void)
{
kvm_exit();
diff --git a/arch/mips/kvm/opcode.h b/arch/mips/kvm/opcode.h
deleted file mode 100644
index 03a6ae84c7df..000000000000
--- a/arch/mips/kvm/opcode.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
- * Authors: Sanjay Lal <sanjayl@kymasys.com>
- */
-
-/* Define opcode values not defined in <asm/isnt.h> */
-
-#ifndef __KVM_MIPS_OPCODE_H__
-#define __KVM_MIPS_OPCODE_H__
-
-/* COP0 Ops */
-#define mfmcz_op 0x0b /* 01011 */
-#define wrpgpr_op 0x0e /* 01110 */
-
-/* COP0 opcodes (only if COP0 and CO=1): */
-#define wait_op 0x20 /* 100000 */
-
-#endif /* __KVM_MIPS_OPCODE_H__ */
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 570479c03bdc..a08c43946247 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -35,17 +35,17 @@
#define PRIx64 "llx"
atomic_t kvm_mips_instance;
-EXPORT_SYMBOL(kvm_mips_instance);
+EXPORT_SYMBOL_GPL(kvm_mips_instance);
/* These function pointers are initialized once the KVM module is loaded */
kvm_pfn_t (*kvm_mips_gfn_to_pfn)(struct kvm *kvm, gfn_t gfn);
-EXPORT_SYMBOL(kvm_mips_gfn_to_pfn);
+EXPORT_SYMBOL_GPL(kvm_mips_gfn_to_pfn);
void (*kvm_mips_release_pfn_clean)(kvm_pfn_t pfn);
-EXPORT_SYMBOL(kvm_mips_release_pfn_clean);
+EXPORT_SYMBOL_GPL(kvm_mips_release_pfn_clean);
bool (*kvm_mips_is_error_pfn)(kvm_pfn_t pfn);
-EXPORT_SYMBOL(kvm_mips_is_error_pfn);
+EXPORT_SYMBOL_GPL(kvm_mips_is_error_pfn);
uint32_t kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
{
@@ -111,7 +111,7 @@ void kvm_mips_dump_host_tlbs(void)
mtc0_tlbw_hazard();
local_irq_restore(flags);
}
-EXPORT_SYMBOL(kvm_mips_dump_host_tlbs);
+EXPORT_SYMBOL_GPL(kvm_mips_dump_host_tlbs);
void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
{
@@ -139,7 +139,7 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu)
(tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask);
}
}
-EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs);
+EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs);
static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
{
@@ -191,7 +191,7 @@ unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu,
return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset;
}
-EXPORT_SYMBOL(kvm_mips_translate_guest_kseg0_to_hpa);
+EXPORT_SYMBOL_GPL(kvm_mips_translate_guest_kseg0_to_hpa);
/* XXXKYMA: Must be called with interrupts disabled */
/* set flush_dcache_mask == 0 if no dcache flush required */
@@ -308,7 +308,7 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
flush_dcache_mask);
}
-EXPORT_SYMBOL(kvm_mips_handle_kseg0_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_kseg0_tlb_fault);
int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
struct kvm_vcpu *vcpu)
@@ -351,7 +351,7 @@ int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr,
return 0;
}
-EXPORT_SYMBOL(kvm_mips_handle_commpage_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_commpage_tlb_fault);
int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
struct kvm_mips_tlb *tlb,
@@ -401,7 +401,7 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu,
return kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1,
tlb->tlb_mask);
}
-EXPORT_SYMBOL(kvm_mips_handle_mapped_seg_tlb_fault);
+EXPORT_SYMBOL_GPL(kvm_mips_handle_mapped_seg_tlb_fault);
int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
{
@@ -422,7 +422,7 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi)
return index;
}
-EXPORT_SYMBOL(kvm_mips_guest_tlb_lookup);
+EXPORT_SYMBOL_GPL(kvm_mips_guest_tlb_lookup);
int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
{
@@ -458,7 +458,7 @@ int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr)
return idx;
}
-EXPORT_SYMBOL(kvm_mips_host_tlb_lookup);
+EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_lookup);
int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
{
@@ -505,44 +505,7 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
return 0;
}
-EXPORT_SYMBOL(kvm_mips_host_tlb_inv);
-
-/* XXXKYMA: Fix Guest USER/KERNEL no longer share the same ASID */
-int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index)
-{
- unsigned long flags, old_entryhi;
-
- if (index >= current_cpu_data.tlbsize)
- BUG();
-
- local_irq_save(flags);
-
- old_entryhi = read_c0_entryhi();
-
- write_c0_entryhi(UNIQUE_ENTRYHI(index));
- mtc0_tlbw_hazard();
-
- write_c0_index(index);
- mtc0_tlbw_hazard();
-
- write_c0_entrylo0(0);
- mtc0_tlbw_hazard();
-
- write_c0_entrylo1(0);
- mtc0_tlbw_hazard();
-
- tlb_write_indexed();
- mtc0_tlbw_hazard();
- tlbw_use_hazard();
-
- write_c0_entryhi(old_entryhi);
- mtc0_tlbw_hazard();
- tlbw_use_hazard();
-
- local_irq_restore(flags);
-
- return 0;
-}
+EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv);
void kvm_mips_flush_host_tlb(int skip_kseg0)
{
@@ -594,7 +557,7 @@ void kvm_mips_flush_host_tlb(int skip_kseg0)
local_irq_restore(flags);
}
-EXPORT_SYMBOL(kvm_mips_flush_host_tlb);
+EXPORT_SYMBOL_GPL(kvm_mips_flush_host_tlb);
void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu,
struct kvm_vcpu *vcpu)
@@ -642,7 +605,7 @@ void kvm_local_flush_tlb_all(void)
local_irq_restore(flags);
}
-EXPORT_SYMBOL(kvm_local_flush_tlb_all);
+EXPORT_SYMBOL_GPL(kvm_local_flush_tlb_all);
/**
* kvm_mips_migrate_count() - Migrate timer.
@@ -673,8 +636,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
local_irq_save(flags);
- if (((vcpu->arch.
- guest_kernel_asid[cpu] ^ asid_cache(cpu)) & ASID_VERSION_MASK)) {
+ if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) &
+ ASID_VERSION_MASK) {
kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu);
vcpu->arch.guest_kernel_asid[cpu] =
vcpu->arch.guest_kernel_mm.context.asid[cpu];
@@ -739,7 +702,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
local_irq_restore(flags);
}
-EXPORT_SYMBOL(kvm_arch_vcpu_load);
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_load);
/* ASID can change if another task is scheduled during preemption */
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -768,7 +731,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
local_irq_restore(flags);
}
-EXPORT_SYMBOL(kvm_arch_vcpu_put);
+EXPORT_SYMBOL_GPL(kvm_arch_vcpu_put);
uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
{
@@ -813,4 +776,4 @@ uint32_t kvm_get_inst(uint32_t *opc, struct kvm_vcpu *vcpu)
return inst;
}
-EXPORT_SYMBOL(kvm_get_inst);
+EXPORT_SYMBOL_GPL(kvm_get_inst);
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index d836ed5b0bc7..ad988000563f 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -16,7 +16,6 @@
#include <linux/kvm_host.h>
-#include "opcode.h"
#include "interrupt.h"
static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
diff --git a/arch/mips/lib/mips-atomic.c b/arch/mips/lib/mips-atomic.c
index 272af8ac2425..5530070e0d05 100644
--- a/arch/mips/lib/mips-atomic.c
+++ b/arch/mips/lib/mips-atomic.c
@@ -57,7 +57,6 @@ notrace void arch_local_irq_disable(void)
}
EXPORT_SYMBOL(arch_local_irq_disable);
-
notrace unsigned long arch_local_irq_save(void)
{
unsigned long flags;
@@ -111,31 +110,4 @@ notrace void arch_local_irq_restore(unsigned long flags)
}
EXPORT_SYMBOL(arch_local_irq_restore);
-
-notrace void __arch_local_irq_restore(unsigned long flags)
-{
- unsigned long __tmp1;
-
- preempt_disable();
-
- __asm__ __volatile__(
- " .set push \n"
- " .set noreorder \n"
- " .set noat \n"
- " mfc0 $1, $12 \n"
- " andi %[flags], 1 \n"
- " ori $1, 0x1f \n"
- " xori $1, 0x1f \n"
- " or %[flags], $1 \n"
- " mtc0 %[flags], $12 \n"
- " " __stringify(__irq_disable_hazard) " \n"
- " .set pop \n"
- : [flags] "=r" (__tmp1)
- : "0" (flags)
- : "memory");
-
- preempt_enable();
-}
-EXPORT_SYMBOL(__arch_local_irq_restore);
-
-#endif /* !CONFIG_CPU_MIPSR2 */
+#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR6 */
diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform
index 2e48e83d5524..85d808924c94 100644
--- a/arch/mips/loongson64/Platform
+++ b/arch/mips/loongson64/Platform
@@ -22,6 +22,27 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
endif
endif
+cflags-$(CONFIG_CPU_LOONGSON3) += -Wa,--trap
+#
+# binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a
+# as MIPS64 R2; older versions as just R1. This leaves the possibility open
+# that GCC might generate R2 code for -march=loongson3a which then is rejected
+# by GAS. The cc-option can't probe for this behaviour so -march=loongson3a
+# can't easily be used safely within the kbuild framework.
+#
+ifeq ($(call cc-ifversion, -ge, 0409, y), y)
+ ifeq ($(call ld-ifversion, -ge, 22500000, y), y)
+ cflags-$(CONFIG_CPU_LOONGSON3) += \
+ $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+ else
+ cflags-$(CONFIG_CPU_LOONGSON3) += \
+ $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+ endif
+else
+ cflags-$(CONFIG_CPU_LOONGSON3) += \
+ $(call cc-option,-march=mips64r2,-mips64r2 -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
+endif
+
#
# Loongson Machines' Support
#
diff --git a/arch/mips/loongson64/loongson-3/hpet.c b/arch/mips/loongson64/loongson-3/hpet.c
index bf9f1a77f0e5..a2631a52ca99 100644
--- a/arch/mips/loongson64/loongson-3/hpet.c
+++ b/arch/mips/loongson64/loongson-3/hpet.c
@@ -13,6 +13,9 @@
#define SMBUS_PCI_REG64 0x64
#define SMBUS_PCI_REGB4 0xb4
+#define HPET_MIN_CYCLES 64
+#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1))
+
static DEFINE_SPINLOCK(hpet_lock);
DEFINE_PER_CPU(struct clock_event_device, hpet_clockevent_device);
@@ -161,8 +164,9 @@ static int hpet_next_event(unsigned long delta,
cnt += delta;
hpet_write(HPET_T0_CMP, cnt);
- res = ((int)(hpet_read(HPET_COUNTER) - cnt) > 0) ? -ETIME : 0;
- return res;
+ res = (int)(cnt - hpet_read(HPET_COUNTER));
+
+ return res < HPET_MIN_CYCLES ? -ETIME : 0;
}
static irqreturn_t hpet_irq_handler(int irq, void *data)
@@ -237,7 +241,7 @@ void __init setup_hpet_timer(void)
cd->cpumask = cpumask_of(cpu);
clockevent_set_clock(cd, HPET_FREQ);
cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
- cd->min_delta_ns = 5000;
+ cd->min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, cd);
clockevents_register_device(cd);
setup_irq(HPET_T0_IRQ, &hpet_irq);
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 1a4738a8f2d3..509832a9836c 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -30,13 +30,13 @@
#include "smp.h"
DEFINE_PER_CPU(int, cpu_state);
-DEFINE_PER_CPU(uint32_t, core0_c0count);
static void *ipi_set0_regs[16];
static void *ipi_clear0_regs[16];
static void *ipi_status0_regs[16];
static void *ipi_en0_regs[16];
static void *ipi_mailbox_buf[16];
+static uint32_t core0_c0count[NR_CPUS];
/* read a 32bit value from ipi register */
#define loongson3_ipi_read32(addr) readl(addr)
@@ -275,12 +275,14 @@ void loongson3_ipi_interrupt(struct pt_regs *regs)
if (action & SMP_ASK_C0COUNT) {
BUG_ON(cpu != 0);
c0count = read_c0_count();
- for (i = 1; i < num_possible_cpus(); i++)
- per_cpu(core0_c0count, i) = c0count;
+ c0count = c0count ? c0count : 1;
+ for (i = 1; i < nr_cpu_ids; i++)
+ core0_c0count[i] = c0count;
+ __wbflush(); /* Let others see the result ASAP */
}
}
-#define MAX_LOOPS 1111
+#define MAX_LOOPS 800
/*
* SMP init and finish on secondary CPUs
*/
@@ -305,16 +307,20 @@ static void loongson3_init_secondary(void)
cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
i = 0;
- __this_cpu_write(core0_c0count, 0);
+ core0_c0count[cpu] = 0;
loongson3_send_ipi_single(0, SMP_ASK_C0COUNT);
- while (!__this_cpu_read(core0_c0count)) {
+ while (!core0_c0count[cpu]) {
i++;
cpu_relax();
}
if (i > MAX_LOOPS)
i = MAX_LOOPS;
- initcount = __this_cpu_read(core0_c0count) + i;
+ if (cpu_data[cpu].package)
+ initcount = core0_c0count[cpu] + i;
+ else /* Local access is faster for loops */
+ initcount = core0_c0count[cpu] + i/2;
+
write_c0_count(initcount);
}
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index 32f0e19a0d7f..cdfd44ffa51c 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -1266,6 +1266,8 @@ branch_common:
*/
sig = mips_dsemul(xcp, ir,
contpc);
+ if (sig < 0)
+ break;
if (sig)
xcp->cp0_epc = bcpc;
/*
@@ -1319,6 +1321,8 @@ branch_common:
* instruction in the dslot
*/
sig = mips_dsemul(xcp, ir, contpc);
+ if (sig < 0)
+ break;
if (sig)
xcp->cp0_epc = bcpc;
/* SIGILL forces out of the emulation loop. */
diff --git a/arch/mips/math-emu/dp_simple.c b/arch/mips/math-emu/dp_simple.c
index 926d56bf37f2..eb96485ed939 100644
--- a/arch/mips/math-emu/dp_simple.c
+++ b/arch/mips/math-emu/dp_simple.c
@@ -23,27 +23,39 @@
union ieee754dp ieee754dp_neg(union ieee754dp x)
{
- unsigned int oldrm;
union ieee754dp y;
- oldrm = ieee754_csr.rm;
- ieee754_csr.rm = FPU_CSR_RD;
- y = ieee754dp_sub(ieee754dp_zero(0), x);
- ieee754_csr.rm = oldrm;
+ if (ieee754_csr.abs2008) {
+ y = x;
+ DPSIGN(y) = !DPSIGN(x);
+ } else {
+ unsigned int oldrm;
+
+ oldrm = ieee754_csr.rm;
+ ieee754_csr.rm = FPU_CSR_RD;
+ y = ieee754dp_sub(ieee754dp_zero(0), x);
+ ieee754_csr.rm = oldrm;
+ }
return y;
}
union ieee754dp ieee754dp_abs(union ieee754dp x)
{
- unsigned int oldrm;
union ieee754dp y;
- oldrm = ieee754_csr.rm;
- ieee754_csr.rm = FPU_CSR_RD;
- if (DPSIGN(x))
- y = ieee754dp_sub(ieee754dp_zero(0), x);
- else
- y = ieee754dp_add(ieee754dp_zero(0), x);
- ieee754_csr.rm = oldrm;
+ if (ieee754_csr.abs2008) {
+ y = x;
+ DPSIGN(y) = 0;
+ } else {
+ unsigned int oldrm;
+
+ oldrm = ieee754_csr.rm;
+ ieee754_csr.rm = FPU_CSR_RD;
+ if (DPSIGN(x))
+ y = ieee754dp_sub(ieee754dp_zero(0), x);
+ else
+ y = ieee754dp_add(ieee754dp_zero(0), x);
+ ieee754_csr.rm = oldrm;
+ }
return y;
}
diff --git a/arch/mips/math-emu/dp_tint.c b/arch/mips/math-emu/dp_tint.c
index 6ffc336c530e..f3985617ce31 100644
--- a/arch/mips/math-emu/dp_tint.c
+++ b/arch/mips/math-emu/dp_tint.c
@@ -38,10 +38,13 @@ int ieee754dp_tint(union ieee754dp x)
switch (xc) {
case IEEE754_CLASS_SNAN:
case IEEE754_CLASS_QNAN:
- case IEEE754_CLASS_INF:
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754si_indef();
+ case IEEE754_CLASS_INF:
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754si_overflow(xs);
+
case IEEE754_CLASS_ZERO:
return 0;
@@ -53,7 +56,7 @@ int ieee754dp_tint(union ieee754dp x)
/* Set invalid. We will only use overflow for floating
point overflow */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754si_indef();
+ return ieee754si_overflow(xs);
}
/* oh gawd */
if (xe > DP_FBITS) {
@@ -93,7 +96,7 @@ int ieee754dp_tint(union ieee754dp x)
if ((xm >> 31) != 0 && (xs == 0 || xm != 0x80000000)) {
/* This can happen after rounding */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754si_indef();
+ return ieee754si_overflow(xs);
}
if (round || sticky)
ieee754_setcx(IEEE754_INEXACT);
diff --git a/arch/mips/math-emu/dp_tlong.c b/arch/mips/math-emu/dp_tlong.c
index 9cdc145b75e0..748fa10ed4cf 100644
--- a/arch/mips/math-emu/dp_tlong.c
+++ b/arch/mips/math-emu/dp_tlong.c
@@ -38,10 +38,13 @@ s64 ieee754dp_tlong(union ieee754dp x)
switch (xc) {
case IEEE754_CLASS_SNAN:
case IEEE754_CLASS_QNAN:
- case IEEE754_CLASS_INF:
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754di_indef();
+ case IEEE754_CLASS_INF:
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754di_overflow(xs);
+
case IEEE754_CLASS_ZERO:
return 0;
@@ -56,7 +59,7 @@ s64 ieee754dp_tlong(union ieee754dp x)
/* Set invalid. We will only use overflow for floating
point overflow */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754di_indef();
+ return ieee754di_overflow(xs);
}
/* oh gawd */
if (xe > DP_FBITS) {
@@ -97,7 +100,7 @@ s64 ieee754dp_tlong(union ieee754dp x)
if ((xm >> 63) != 0) {
/* This can happen after rounding */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754di_indef();
+ return ieee754di_overflow(xs);
}
if (round || sticky)
ieee754_setcx(IEEE754_INEXACT);
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index cbb36c14b155..46b964d2b79c 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -31,17 +31,41 @@ struct emuframe {
unsigned long epc;
};
+/*
+ * Set up an emulation frame for instruction IR, from a delay slot of
+ * a branch jumping to CPC. Return 0 if successful, -1 if no emulation
+ * required, otherwise a signal number causing a frame setup failure.
+ */
int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
{
+ int isa16 = get_isa16_mode(regs->cp0_epc);
+ mips_instruction break_math;
struct emuframe __user *fr;
int err;
- if ((get_isa16_mode(regs->cp0_epc) && ((ir >> 16) == MM_NOP16)) ||
- (ir == 0)) {
- /* NOP is easy */
- regs->cp0_epc = cpc;
- clear_delay_slot(regs);
- return 0;
+ /* NOP is easy */
+ if (ir == 0)
+ return -1;
+
+ /* microMIPS instructions */
+ if (isa16) {
+ union mips_instruction insn = { .word = ir };
+
+ /* NOP16 aka MOVE16 $0, $0 */
+ if ((ir >> 16) == MM_NOP16)
+ return -1;
+
+ /* ADDIUPC */
+ if (insn.mm_a_format.opcode == mm_addiupc_op) {
+ unsigned int rs;
+ s32 v;
+
+ rs = (((insn.mm_a_format.rs + 0x1e) & 0xf) + 2);
+ v = regs->cp0_epc & ~3;
+ v += insn.mm_a_format.simmediate << 2;
+ regs->regs[rs] = (long)v;
+ return -1;
+ }
}
pr_debug("dsemul %lx %lx\n", regs->cp0_epc, cpc);
@@ -55,14 +79,10 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
* Algorithmics used a system call instruction, and
* borrowed that vector. MIPS/Linux version is a bit
* more heavyweight in the interests of portability and
- * multiprocessor support. For Linux we generate a
- * an unaligned access and force an address error exception.
- *
- * For embedded systems (stand-alone) we prefer to use a
- * non-existing CP1 instruction. This prevents us from emulating
- * branches, but gives us a cleaner interface to the exception
- * handler (single entry point).
+ * multiprocessor support. For Linux we use a BREAK 514
+ * instruction causing a breakpoint exception.
*/
+ break_math = BREAK_MATH(isa16);
/* Ensure that the two instructions are in the same cache line */
fr = (struct emuframe __user *)
@@ -72,14 +92,18 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
if (unlikely(!access_ok(VERIFY_WRITE, fr, sizeof(struct emuframe))))
return SIGBUS;
- if (get_isa16_mode(regs->cp0_epc)) {
- err = __put_user(ir >> 16, (u16 __user *)(&fr->emul));
- err |= __put_user(ir & 0xffff, (u16 __user *)((long)(&fr->emul) + 2));
- err |= __put_user(BREAK_MATH >> 16, (u16 __user *)(&fr->badinst));
- err |= __put_user(BREAK_MATH & 0xffff, (u16 __user *)((long)(&fr->badinst) + 2));
+ if (isa16) {
+ err = __put_user(ir >> 16,
+ (u16 __user *)(&fr->emul));
+ err |= __put_user(ir & 0xffff,
+ (u16 __user *)((long)(&fr->emul) + 2));
+ err |= __put_user(break_math >> 16,
+ (u16 __user *)(&fr->badinst));
+ err |= __put_user(break_math & 0xffff,
+ (u16 __user *)((long)(&fr->badinst) + 2));
} else {
err = __put_user(ir, &fr->emul);
- err |= __put_user((mips_instruction)BREAK_MATH, &fr->badinst);
+ err |= __put_user(break_math, &fr->badinst);
}
err |= __put_user((mips_instruction)BD_COOKIE, &fr->cookie);
@@ -90,8 +114,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
return SIGBUS;
}
- regs->cp0_epc = ((unsigned long) &fr->emul) |
- get_isa16_mode(regs->cp0_epc);
+ regs->cp0_epc = (unsigned long)&fr->emul | isa16;
flush_cache_sigtramp((unsigned long)&fr->emul);
@@ -100,6 +123,7 @@ int mips_dsemul(struct pt_regs *regs, mips_instruction ir, unsigned long cpc)
int do_dsemulret(struct pt_regs *xcp)
{
+ int isa16 = get_isa16_mode(xcp->cp0_epc);
struct emuframe __user *fr;
unsigned long epc;
u32 insn, cookie;
@@ -122,16 +146,19 @@ int do_dsemulret(struct pt_regs *xcp)
* - Is the instruction pointed to by the EPC an BREAK_MATH?
* - Is the following memory word the BD_COOKIE?
*/
- if (get_isa16_mode(xcp->cp0_epc)) {
- err = __get_user(instr[0], (u16 __user *)(&fr->badinst));
- err |= __get_user(instr[1], (u16 __user *)((long)(&fr->badinst) + 2));
+ if (isa16) {
+ err = __get_user(instr[0],
+ (u16 __user *)(&fr->badinst));
+ err |= __get_user(instr[1],
+ (u16 __user *)((long)(&fr->badinst) + 2));
insn = (instr[0] << 16) | instr[1];
} else {
err = __get_user(insn, &fr->badinst);
}
err |= __get_user(cookie, &fr->cookie);
- if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) {
+ if (unlikely(err ||
+ insn != BREAK_MATH(isa16) || cookie != BD_COOKIE)) {
MIPS_FPU_EMU_INC_STATS(errors);
return 0;
}
diff --git a/arch/mips/math-emu/ieee754.c b/arch/mips/math-emu/ieee754.c
index 8e97acbbe22c..e16ae7b75dbb 100644
--- a/arch/mips/math-emu/ieee754.c
+++ b/arch/mips/math-emu/ieee754.c
@@ -59,7 +59,8 @@ const union ieee754dp __ieee754dp_spcvals[] = {
DPCNST(1, 3, 0x4000000000000ULL), /* - 10.0 */
DPCNST(0, DP_EMAX + 1, 0x0000000000000ULL), /* + infinity */
DPCNST(1, DP_EMAX + 1, 0x0000000000000ULL), /* - infinity */
- DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL), /* + indef quiet Nan */
+ DPCNST(0, DP_EMAX + 1, 0x7FFFFFFFFFFFFULL), /* + ind legacy qNaN */
+ DPCNST(0, DP_EMAX + 1, 0x8000000000000ULL), /* + indef 2008 qNaN */
DPCNST(0, DP_EMAX, 0xFFFFFFFFFFFFFULL), /* + max */
DPCNST(1, DP_EMAX, 0xFFFFFFFFFFFFFULL), /* - max */
DPCNST(0, DP_EMIN, 0x0000000000000ULL), /* + min normal */
@@ -82,7 +83,8 @@ const union ieee754sp __ieee754sp_spcvals[] = {
SPCNST(1, 3, 0x200000), /* - 10.0 */
SPCNST(0, SP_EMAX + 1, 0x000000), /* + infinity */
SPCNST(1, SP_EMAX + 1, 0x000000), /* - infinity */
- SPCNST(0, SP_EMAX + 1, 0x3FFFFF), /* + indef quiet Nan */
+ SPCNST(0, SP_EMAX + 1, 0x3FFFFF), /* + indef legacy quiet NaN */
+ SPCNST(0, SP_EMAX + 1, 0x400000), /* + indef 2008 quiet NaN */
SPCNST(0, SP_EMAX, 0x7FFFFF), /* + max normal */
SPCNST(1, SP_EMAX, 0x7FFFFF), /* - max normal */
SPCNST(0, SP_EMIN, 0x000000), /* + min normal */
diff --git a/arch/mips/math-emu/ieee754.h b/arch/mips/math-emu/ieee754.h
index df94720714c7..d3be351aed15 100644
--- a/arch/mips/math-emu/ieee754.h
+++ b/arch/mips/math-emu/ieee754.h
@@ -221,15 +221,16 @@ union ieee754dp ieee754dp_dump(char *s, union ieee754dp x);
#define IEEE754_SPCVAL_NTEN 5 /* -10.0 */
#define IEEE754_SPCVAL_PINFINITY 6 /* +inf */
#define IEEE754_SPCVAL_NINFINITY 7 /* -inf */
-#define IEEE754_SPCVAL_INDEF 8 /* quiet NaN */
-#define IEEE754_SPCVAL_PMAX 9 /* +max norm */
-#define IEEE754_SPCVAL_NMAX 10 /* -max norm */
-#define IEEE754_SPCVAL_PMIN 11 /* +min norm */
-#define IEEE754_SPCVAL_NMIN 12 /* -min norm */
-#define IEEE754_SPCVAL_PMIND 13 /* +min denorm */
-#define IEEE754_SPCVAL_NMIND 14 /* -min denorm */
-#define IEEE754_SPCVAL_P1E31 15 /* + 1.0e31 */
-#define IEEE754_SPCVAL_P1E63 16 /* + 1.0e63 */
+#define IEEE754_SPCVAL_INDEF_LEG 8 /* legacy quiet NaN */
+#define IEEE754_SPCVAL_INDEF_2008 9 /* IEEE 754-2008 quiet NaN */
+#define IEEE754_SPCVAL_PMAX 10 /* +max norm */
+#define IEEE754_SPCVAL_NMAX 11 /* -max norm */
+#define IEEE754_SPCVAL_PMIN 12 /* +min norm */
+#define IEEE754_SPCVAL_NMIN 13 /* -min norm */
+#define IEEE754_SPCVAL_PMIND 14 /* +min denorm */
+#define IEEE754_SPCVAL_NMIND 15 /* -min denorm */
+#define IEEE754_SPCVAL_P1E31 16 /* + 1.0e31 */
+#define IEEE754_SPCVAL_P1E63 17 /* + 1.0e63 */
extern const union ieee754dp __ieee754dp_spcvals[];
extern const union ieee754sp __ieee754sp_spcvals[];
@@ -243,7 +244,8 @@ extern const union ieee754sp __ieee754sp_spcvals[];
#define ieee754dp_zero(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PZERO+(sn)])
#define ieee754dp_one(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PONE+(sn)])
#define ieee754dp_ten(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PTEN+(sn)])
-#define ieee754dp_indef() (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF])
+#define ieee754dp_indef() (ieee754dp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \
+ ieee754_csr.nan2008])
#define ieee754dp_max(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMAX+(sn)])
#define ieee754dp_min(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMIN+(sn)])
#define ieee754dp_mind(sn) (ieee754dp_spcvals[IEEE754_SPCVAL_PMIND+(sn)])
@@ -254,7 +256,8 @@ extern const union ieee754sp __ieee754sp_spcvals[];
#define ieee754sp_zero(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PZERO+(sn)])
#define ieee754sp_one(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PONE+(sn)])
#define ieee754sp_ten(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PTEN+(sn)])
-#define ieee754sp_indef() (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF])
+#define ieee754sp_indef() (ieee754sp_spcvals[IEEE754_SPCVAL_INDEF_LEG + \
+ ieee754_csr.nan2008])
#define ieee754sp_max(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMAX+(sn)])
#define ieee754sp_min(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMIN+(sn)])
#define ieee754sp_mind(sn) (ieee754sp_spcvals[IEEE754_SPCVAL_PMIND+(sn)])
@@ -266,12 +269,25 @@ extern const union ieee754sp __ieee754sp_spcvals[];
*/
static inline int ieee754si_indef(void)
{
- return INT_MAX;
+ return ieee754_csr.nan2008 ? 0 : INT_MAX;
}
static inline s64 ieee754di_indef(void)
{
- return S64_MAX;
+ return ieee754_csr.nan2008 ? 0 : S64_MAX;
+}
+
+/*
+ * Overflow integer value
+ */
+static inline int ieee754si_overflow(int xs)
+{
+ return ieee754_csr.nan2008 && xs ? INT_MIN : INT_MAX;
+}
+
+static inline s64 ieee754di_overflow(int xs)
+{
+ return ieee754_csr.nan2008 && xs ? S64_MIN : S64_MAX;
}
/* result types for xctx.rt */
diff --git a/arch/mips/math-emu/ieee754dp.c b/arch/mips/math-emu/ieee754dp.c
index 522d843f2ffd..ad3c73436777 100644
--- a/arch/mips/math-emu/ieee754dp.c
+++ b/arch/mips/math-emu/ieee754dp.c
@@ -37,8 +37,11 @@ static inline int ieee754dp_isnan(union ieee754dp x)
static inline int ieee754dp_issnan(union ieee754dp x)
{
+ int qbit;
+
assert(ieee754dp_isnan(x));
- return (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1);
+ qbit = (DPMANT(x) & DP_MBIT(DP_FBITS - 1)) == DP_MBIT(DP_FBITS - 1);
+ return ieee754_csr.nan2008 ^ qbit;
}
@@ -51,7 +54,12 @@ union ieee754dp __cold ieee754dp_nanxcpt(union ieee754dp r)
assert(ieee754dp_issnan(r));
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754dp_indef();
+ if (ieee754_csr.nan2008)
+ DPMANT(r) |= DP_MBIT(DP_FBITS - 1);
+ else
+ r = ieee754dp_indef();
+
+ return r;
}
static u64 ieee754dp_get_rounding(int sn, u64 xm)
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 6383e2c5c1ad..ed7bb277b3e0 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -63,10 +63,10 @@ static inline int ieee754_class_nan(int xc)
if (ve == SP_EMAX+1+SP_EBIAS) { \
if (vm == 0) \
vc = IEEE754_CLASS_INF; \
- else if (vm & SP_MBIT(SP_FBITS-1)) \
- vc = IEEE754_CLASS_SNAN; \
- else \
+ else if (ieee754_csr.nan2008 ^ !(vm & SP_MBIT(SP_FBITS - 1))) \
vc = IEEE754_CLASS_QNAN; \
+ else \
+ vc = IEEE754_CLASS_SNAN; \
} else if (ve == SP_EMIN-1+SP_EBIAS) { \
if (vm) { \
ve = SP_EMIN; \
@@ -97,10 +97,10 @@ static inline int ieee754_class_nan(int xc)
if (ve == DP_EMAX+1+DP_EBIAS) { \
if (vm == 0) \
vc = IEEE754_CLASS_INF; \
- else if (vm & DP_MBIT(DP_FBITS-1)) \
- vc = IEEE754_CLASS_SNAN; \
- else \
+ else if (ieee754_csr.nan2008 ^ !(vm & DP_MBIT(DP_FBITS - 1))) \
vc = IEEE754_CLASS_QNAN; \
+ else \
+ vc = IEEE754_CLASS_SNAN; \
} else if (ve == DP_EMIN-1+DP_EBIAS) { \
if (vm) { \
ve = DP_EMIN; \
diff --git a/arch/mips/math-emu/ieee754sp.c b/arch/mips/math-emu/ieee754sp.c
index ca8e35e33bf7..def00ffc50fc 100644
--- a/arch/mips/math-emu/ieee754sp.c
+++ b/arch/mips/math-emu/ieee754sp.c
@@ -37,8 +37,11 @@ static inline int ieee754sp_isnan(union ieee754sp x)
static inline int ieee754sp_issnan(union ieee754sp x)
{
+ int qbit;
+
assert(ieee754sp_isnan(x));
- return SPMANT(x) & SP_MBIT(SP_FBITS - 1);
+ qbit = (SPMANT(x) & SP_MBIT(SP_FBITS - 1)) == SP_MBIT(SP_FBITS - 1);
+ return ieee754_csr.nan2008 ^ qbit;
}
@@ -51,7 +54,12 @@ union ieee754sp __cold ieee754sp_nanxcpt(union ieee754sp r)
assert(ieee754sp_issnan(r));
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754sp_indef();
+ if (ieee754_csr.nan2008)
+ SPMANT(r) |= SP_MBIT(SP_FBITS - 1);
+ else
+ r = ieee754sp_indef();
+
+ return r;
}
static unsigned ieee754sp_get_rounding(int sn, unsigned xm)
diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c
index 3797148893ad..5060e8fdcb0b 100644
--- a/arch/mips/math-emu/sp_fdp.c
+++ b/arch/mips/math-emu/sp_fdp.c
@@ -44,13 +44,16 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x)
switch (xc) {
case IEEE754_CLASS_SNAN:
- return ieee754sp_nanxcpt(ieee754sp_nan_fdp(xs, xm));
-
+ x = ieee754dp_nanxcpt(x);
+ EXPLODEXDP;
+ /* Fall through. */
case IEEE754_CLASS_QNAN:
y = ieee754sp_nan_fdp(xs, xm);
- EXPLODEYSP;
- if (!ieee754_class_nan(yc))
- y = ieee754sp_indef();
+ if (!ieee754_csr.nan2008) {
+ EXPLODEYSP;
+ if (!ieee754_class_nan(yc))
+ y = ieee754sp_indef();
+ }
return y;
case IEEE754_CLASS_INF:
diff --git a/arch/mips/math-emu/sp_simple.c b/arch/mips/math-emu/sp_simple.c
index c50e9451f2d2..756c9cf2dfd2 100644
--- a/arch/mips/math-emu/sp_simple.c
+++ b/arch/mips/math-emu/sp_simple.c
@@ -23,27 +23,39 @@
union ieee754sp ieee754sp_neg(union ieee754sp x)
{
- unsigned int oldrm;
union ieee754sp y;
- oldrm = ieee754_csr.rm;
- ieee754_csr.rm = FPU_CSR_RD;
- y = ieee754sp_sub(ieee754sp_zero(0), x);
- ieee754_csr.rm = oldrm;
+ if (ieee754_csr.abs2008) {
+ y = x;
+ SPSIGN(y) = !SPSIGN(x);
+ } else {
+ unsigned int oldrm;
+
+ oldrm = ieee754_csr.rm;
+ ieee754_csr.rm = FPU_CSR_RD;
+ y = ieee754sp_sub(ieee754sp_zero(0), x);
+ ieee754_csr.rm = oldrm;
+ }
return y;
}
union ieee754sp ieee754sp_abs(union ieee754sp x)
{
- unsigned int oldrm;
union ieee754sp y;
- oldrm = ieee754_csr.rm;
- ieee754_csr.rm = FPU_CSR_RD;
- if (SPSIGN(x))
- y = ieee754sp_sub(ieee754sp_zero(0), x);
- else
- y = ieee754sp_add(ieee754sp_zero(0), x);
- ieee754_csr.rm = oldrm;
+ if (ieee754_csr.abs2008) {
+ y = x;
+ SPSIGN(y) = 0;
+ } else {
+ unsigned int oldrm;
+
+ oldrm = ieee754_csr.rm;
+ ieee754_csr.rm = FPU_CSR_RD;
+ if (SPSIGN(x))
+ y = ieee754sp_sub(ieee754sp_zero(0), x);
+ else
+ y = ieee754sp_add(ieee754sp_zero(0), x);
+ ieee754_csr.rm = oldrm;
+ }
return y;
}
diff --git a/arch/mips/math-emu/sp_tint.c b/arch/mips/math-emu/sp_tint.c
index 091299a31798..f4b4cabfe2e1 100644
--- a/arch/mips/math-emu/sp_tint.c
+++ b/arch/mips/math-emu/sp_tint.c
@@ -38,10 +38,13 @@ int ieee754sp_tint(union ieee754sp x)
switch (xc) {
case IEEE754_CLASS_SNAN:
case IEEE754_CLASS_QNAN:
- case IEEE754_CLASS_INF:
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754si_indef();
+ case IEEE754_CLASS_INF:
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754si_overflow(xs);
+
case IEEE754_CLASS_ZERO:
return 0;
@@ -56,7 +59,7 @@ int ieee754sp_tint(union ieee754sp x)
/* Set invalid. We will only use overflow for floating
point overflow */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754si_indef();
+ return ieee754si_overflow(xs);
}
/* oh gawd */
if (xe > SP_FBITS) {
@@ -97,7 +100,7 @@ int ieee754sp_tint(union ieee754sp x)
if ((xm >> 31) != 0) {
/* This can happen after rounding */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754si_indef();
+ return ieee754si_overflow(xs);
}
if (round || sticky)
ieee754_setcx(IEEE754_INEXACT);
diff --git a/arch/mips/math-emu/sp_tlong.c b/arch/mips/math-emu/sp_tlong.c
index 9f3c742c1cea..a2450c7e452a 100644
--- a/arch/mips/math-emu/sp_tlong.c
+++ b/arch/mips/math-emu/sp_tlong.c
@@ -39,10 +39,13 @@ s64 ieee754sp_tlong(union ieee754sp x)
switch (xc) {
case IEEE754_CLASS_SNAN:
case IEEE754_CLASS_QNAN:
- case IEEE754_CLASS_INF:
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754di_indef();
+ case IEEE754_CLASS_INF:
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754di_overflow(xs);
+
case IEEE754_CLASS_ZERO:
return 0;
@@ -57,7 +60,7 @@ s64 ieee754sp_tlong(union ieee754sp x)
/* Set invalid. We will only use overflow for floating
point overflow */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754di_indef();
+ return ieee754di_overflow(xs);
}
/* oh gawd */
if (xe > SP_FBITS) {
@@ -94,7 +97,7 @@ s64 ieee754sp_tlong(union ieee754sp x)
if ((xm >> 63) != 0) {
/* This can happen after rounding */
ieee754_setcx(IEEE754_INVALID_OPERATION);
- return ieee754di_indef();
+ return ieee754di_overflow(xs);
}
if (round || sticky)
ieee754_setcx(IEEE754_INEXACT);
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 482192cc8f2b..5a04b6f5c6fb 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -241,7 +241,7 @@ static void output_pgtable_bits_defines(void)
#ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
pr_define("_PAGE_HUGE_SHIFT %d\n", _PAGE_HUGE_SHIFT);
#endif
-#ifdef CONFIG_CPU_MIPSR2
+#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)
if (cpu_has_rixi) {
#ifdef _PAGE_NO_EXEC_SHIFT
pr_define("_PAGE_NO_EXEC_SHIFT %d\n", _PAGE_NO_EXEC_SHIFT);
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index 2eda01e6e08f..139ad1d7ab5e 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_SIBYTE_BCM1x80) += pci-bcm1480.o pci-bcm1480ht.o
obj-$(CONFIG_SNI_RM) += fixup-sni.o ops-sni.o
obj-$(CONFIG_LANTIQ) += fixup-lantiq.o
obj-$(CONFIG_PCI_LANTIQ) += pci-lantiq.o ops-lantiq.o
+obj-$(CONFIG_SOC_MT7620) += pci-mt7620.o
obj-$(CONFIG_SOC_RT288X) += pci-rt2880.o
obj-$(CONFIG_SOC_RT3883) += pci-rt3883.o
obj-$(CONFIG_TANBAC_TB0219) += fixup-tb0219.o
diff --git a/arch/mips/pci/pci-mt7620.c b/arch/mips/pci/pci-mt7620.c
new file mode 100644
index 000000000000..a009ee458934
--- /dev/null
+++ b/arch/mips/pci/pci-mt7620.c
@@ -0,0 +1,426 @@
+/*
+ * Ralink MT7620A SoC PCI support
+ *
+ * Copyright (C) 2007-2013 Bruce Chang (Mediatek)
+ * Copyright (C) 2013-2016 John Crispin <blogic@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/reset.h>
+#include <linux/platform_device.h>
+
+#include <asm/mach-ralink/ralink_regs.h>
+#include <asm/mach-ralink/mt7620.h>
+
+#define RALINK_PCI_IO_MAP_BASE 0x10160000
+#define RALINK_PCI_MEMORY_BASE 0x0
+
+#define RALINK_INT_PCIE0 4
+
+#define RALINK_CLKCFG1 0x30
+#define RALINK_GPIOMODE 0x60
+
+#define PPLL_CFG1 0x9c
+#define PDRV_SW_SET BIT(23)
+
+#define PPLL_DRV 0xa0
+#define PDRV_SW_SET (1<<31)
+#define LC_CKDRVPD (1<<19)
+#define LC_CKDRVOHZ (1<<18)
+#define LC_CKDRVHZ (1<<17)
+#define LC_CKTEST (1<<16)
+
+/* PCI Bridge registers */
+#define RALINK_PCI_PCICFG_ADDR 0x00
+#define PCIRST BIT(1)
+
+#define RALINK_PCI_PCIENA 0x0C
+#define PCIINT2 BIT(20)
+
+#define RALINK_PCI_CONFIG_ADDR 0x20
+#define RALINK_PCI_CONFIG_DATA_VIRT_REG 0x24
+#define RALINK_PCI_MEMBASE 0x28
+#define RALINK_PCI_IOBASE 0x2C
+
+/* PCI RC registers */
+#define RALINK_PCI0_BAR0SETUP_ADDR 0x10
+#define RALINK_PCI0_IMBASEBAR0_ADDR 0x18
+#define RALINK_PCI0_ID 0x30
+#define RALINK_PCI0_CLASS 0x34
+#define RALINK_PCI0_SUBID 0x38
+#define RALINK_PCI0_STATUS 0x50
+#define PCIE_LINK_UP_ST BIT(0)
+
+#define PCIEPHY0_CFG 0x90
+
+#define RALINK_PCIEPHY_P0_CTL_OFFSET 0x7498
+#define RALINK_PCIE0_CLK_EN (1 << 26)
+
+#define BUSY 0x80000000
+#define WAITRETRY_MAX 10
+#define WRITE_MODE (1UL << 23)
+#define DATA_SHIFT 0
+#define ADDR_SHIFT 8
+
+
+static void __iomem *bridge_base;
+static void __iomem *pcie_base;
+
+static struct reset_control *rstpcie0;
+
+static inline void bridge_w32(u32 val, unsigned reg)
+{
+ iowrite32(val, bridge_base + reg);
+}
+
+static inline u32 bridge_r32(unsigned reg)
+{
+ return ioread32(bridge_base + reg);
+}
+
+static inline void pcie_w32(u32 val, unsigned reg)
+{
+ iowrite32(val, pcie_base + reg);
+}
+
+static inline u32 pcie_r32(unsigned reg)
+{
+ return ioread32(pcie_base + reg);
+}
+
+static inline void pcie_m32(u32 clr, u32 set, unsigned reg)
+{
+ u32 val = pcie_r32(reg);
+
+ val &= ~clr;
+ val |= set;
+ pcie_w32(val, reg);
+}
+
+static int wait_pciephy_busy(void)
+{
+ unsigned long reg_value = 0x0, retry = 0;
+
+ while (1) {
+ reg_value = pcie_r32(PCIEPHY0_CFG);
+
+ if (reg_value & BUSY)
+ mdelay(100);
+ else
+ break;
+ if (retry++ > WAITRETRY_MAX) {
+ printk(KERN_WARN "PCIE-PHY retry failed.\n");
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static void pcie_phy(unsigned long addr, unsigned long val)
+{
+ wait_pciephy_busy();
+ pcie_w32(WRITE_MODE | (val << DATA_SHIFT) | (addr << ADDR_SHIFT),
+ PCIEPHY0_CFG);
+ mdelay(1);
+ wait_pciephy_busy();
+}
+
+static int pci_config_read(struct pci_bus *bus, unsigned int devfn, int where,
+ int size, u32 *val)
+{
+ unsigned int slot = PCI_SLOT(devfn);
+ u8 func = PCI_FUNC(devfn);
+ u32 address;
+ u32 data;
+ u32 num = 0;
+
+ if (bus)
+ num = bus->number;
+
+ address = (((where & 0xF00) >> 8) << 24) | (num << 16) | (slot << 11) |
+ (func << 8) | (where & 0xfc) | 0x80000000;
+ bridge_w32(address, RALINK_PCI_CONFIG_ADDR);
+ data = bridge_r32(RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+ switch (size) {
+ case 1:
+ *val = (data >> ((where & 3) << 3)) & 0xff;
+ break;
+ case 2:
+ *val = (data >> ((where & 3) << 3)) & 0xffff;
+ break;
+ case 4:
+ *val = data;
+ break;
+ }
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int pci_config_write(struct pci_bus *bus, unsigned int devfn, int where,
+ int size, u32 val)
+{
+ unsigned int slot = PCI_SLOT(devfn);
+ u8 func = PCI_FUNC(devfn);
+ u32 address;
+ u32 data;
+ u32 num = 0;
+
+ if (bus)
+ num = bus->number;
+
+ address = (((where & 0xF00) >> 8) << 24) | (num << 16) | (slot << 11) |
+ (func << 8) | (where & 0xfc) | 0x80000000;
+ bridge_w32(address, RALINK_PCI_CONFIG_ADDR);
+ data = bridge_r32(RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+ switch (size) {
+ case 1:
+ data = (data & ~(0xff << ((where & 3) << 3))) |
+ (val << ((where & 3) << 3));
+ break;
+ case 2:
+ data = (data & ~(0xffff << ((where & 3) << 3))) |
+ (val << ((where & 3) << 3));
+ break;
+ case 4:
+ data = val;
+ break;
+ }
+
+ bridge_w32(data, RALINK_PCI_CONFIG_DATA_VIRT_REG);
+
+ return PCIBIOS_SUCCESSFUL;
+}
+
+struct pci_ops mt7620_pci_ops = {
+ .read = pci_config_read,
+ .write = pci_config_write,
+};
+
+static struct resource mt7620_res_pci_mem1;
+static struct resource mt7620_res_pci_io1;
+struct pci_controller mt7620_controller = {
+ .pci_ops = &mt7620_pci_ops,
+ .mem_resource = &mt7620_res_pci_mem1,
+ .mem_offset = 0x00000000UL,
+ .io_resource = &mt7620_res_pci_io1,
+ .io_offset = 0x00000000UL,
+ .io_map_base = 0xa0000000,
+};
+
+static int mt7620_pci_hw_init(struct platform_device *pdev)
+{
+ /* bypass PCIe DLL */
+ pcie_phy(0x0, 0x80);
+ pcie_phy(0x1, 0x04);
+
+ /* Elastic buffer control */
+ pcie_phy(0x68, 0xB4);
+
+ /* put core into reset */
+ pcie_m32(0, PCIRST, RALINK_PCI_PCICFG_ADDR);
+ reset_control_assert(rstpcie0);
+
+ /* disable power and all clocks */
+ rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+ rt_sysc_m32(LC_CKDRVPD, PDRV_SW_SET, PPLL_DRV);
+
+ /* bring core out of reset */
+ reset_control_deassert(rstpcie0);
+ rt_sysc_m32(0, RALINK_PCIE0_CLK_EN, RALINK_CLKCFG1);
+ mdelay(100);
+
+ if (!(rt_sysc_r32(PPLL_CFG1) & PDRV_SW_SET)) {
+ dev_err(&pdev->dev, "MT7620 PPLL unlock\n");
+ reset_control_assert(rstpcie0);
+ rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+ return -1;
+ }
+
+ /* power up the bus */
+ rt_sysc_m32(LC_CKDRVHZ | LC_CKDRVOHZ, LC_CKDRVPD | PDRV_SW_SET,
+ PPLL_DRV);
+
+ return 0;
+}
+
+static int mt7628_pci_hw_init(struct platform_device *pdev)
+{
+ u32 val = 0;
+
+ /* bring the core out of reset */
+ rt_sysc_m32(BIT(16), 0, RALINK_GPIOMODE);
+ reset_control_deassert(rstpcie0);
+
+ /* enable the pci clk */
+ rt_sysc_m32(0, RALINK_PCIE0_CLK_EN, RALINK_CLKCFG1);
+ mdelay(100);
+
+ /* voodoo from the SDK driver */
+ pcie_m32(~0xff, 0x5, RALINK_PCIEPHY_P0_CTL_OFFSET);
+
+ pci_config_read(NULL, 0, 0x70c, 4, &val);
+ val &= ~(0xff) << 8;
+ val |= 0x50 << 8;
+ pci_config_write(NULL, 0, 0x70c, 4, val);
+
+ pci_config_read(NULL, 0, 0x70c, 4, &val);
+ dev_err(&pdev->dev, "Port 0 N_FTS = %x\n", (unsigned int) val);
+
+ return 0;
+}
+
+static int mt7620_pci_probe(struct platform_device *pdev)
+{
+ struct resource *bridge_res = platform_get_resource(pdev,
+ IORESOURCE_MEM, 0);
+ struct resource *pcie_res = platform_get_resource(pdev,
+ IORESOURCE_MEM, 1);
+ u32 val = 0;
+
+ rstpcie0 = devm_reset_control_get(&pdev->dev, "pcie0");
+ if (IS_ERR(rstpcie0))
+ return PTR_ERR(rstpcie0);
+
+ bridge_base = devm_ioremap_resource(&pdev->dev, bridge_res);
+ if (!bridge_base)
+ return -ENOMEM;
+
+ pcie_base = devm_ioremap_resource(&pdev->dev, pcie_res);
+ if (!pcie_base)
+ return -ENOMEM;
+
+ iomem_resource.start = 0;
+ iomem_resource.end = ~0;
+ ioport_resource.start = 0;
+ ioport_resource.end = ~0;
+
+ /* bring up the pci core */
+ switch (ralink_soc) {
+ case MT762X_SOC_MT7620A:
+ if (mt7620_pci_hw_init(pdev))
+ return -1;
+ break;
+
+ case MT762X_SOC_MT7628AN:
+ if (mt7628_pci_hw_init(pdev))
+ return -1;
+ break;
+
+ default:
+ dev_err(&pdev->dev, "pcie is not supported on this hardware\n");
+ return -1;
+ }
+ mdelay(50);
+
+ /* enable write access */
+ pcie_m32(PCIRST, 0, RALINK_PCI_PCICFG_ADDR);
+ mdelay(100);
+
+ /* check if there is a card present */
+ if ((pcie_r32(RALINK_PCI0_STATUS) & PCIE_LINK_UP_ST) == 0) {
+ reset_control_assert(rstpcie0);
+ rt_sysc_m32(RALINK_PCIE0_CLK_EN, 0, RALINK_CLKCFG1);
+ if (ralink_soc == MT762X_SOC_MT7620A)
+ rt_sysc_m32(LC_CKDRVPD, PDRV_SW_SET, PPLL_DRV);
+ dev_err(&pdev->dev, "PCIE0 no card, disable it(RST&CLK)\n");
+ return -1;
+ }
+
+ /* setup ranges */
+ bridge_w32(0xffffffff, RALINK_PCI_MEMBASE);
+ bridge_w32(RALINK_PCI_IO_MAP_BASE, RALINK_PCI_IOBASE);
+
+ pcie_w32(0x7FFF0001, RALINK_PCI0_BAR0SETUP_ADDR);
+ pcie_w32(RALINK_PCI_MEMORY_BASE, RALINK_PCI0_IMBASEBAR0_ADDR);
+ pcie_w32(0x06040001, RALINK_PCI0_CLASS);
+
+ /* enable interrupts */
+ pcie_m32(0, PCIINT2, RALINK_PCI_PCIENA);
+
+ /* voodoo from the SDK driver */
+ pci_config_read(NULL, 0, 4, 4, &val);
+ pci_config_write(NULL, 0, 4, 4, val | 0x7);
+
+ pci_load_of_ranges(&mt7620_controller, pdev->dev.of_node);
+ register_pci_controller(&mt7620_controller);
+
+ return 0;
+}
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+ u16 cmd;
+ u32 val;
+ int irq = 0;
+
+ if ((dev->bus->number == 0) && (slot == 0)) {
+ pcie_w32(0x7FFF0001, RALINK_PCI0_BAR0SETUP_ADDR);
+ pci_config_write(dev->bus, 0, PCI_BASE_ADDRESS_0, 4,
+ RALINK_PCI_MEMORY_BASE);
+ pci_config_read(dev->bus, 0, PCI_BASE_ADDRESS_0, 4, &val);
+ } else if ((dev->bus->number == 1) && (slot == 0x0)) {
+ irq = RALINK_INT_PCIE0;
+ } else {
+ dev_err(&dev->dev, "no irq found - bus=0x%x, slot = 0x%x\n",
+ dev->bus->number, slot);
+ return 0;
+ }
+ dev_err(&dev->dev, "card - bus=0x%x, slot = 0x%x irq=%d\n",
+ dev->bus->number, slot, irq);
+
+ /* configure the cache line size to 0x14 */
+ pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, 0x14);
+
+ /* configure latency timer to 0xff */
+ pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0xff);
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+
+ /* setup the slot */
+ cmd = cmd | PCI_COMMAND_MASTER | PCI_COMMAND_IO | PCI_COMMAND_MEMORY;
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
+
+ return irq;
+}
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+ return 0;
+}
+
+static const struct of_device_id mt7620_pci_ids[] = {
+ { .compatible = "mediatek,mt7620-pci" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, mt7620_pci_ids);
+
+static struct platform_driver mt7620_pci_driver = {
+ .probe = mt7620_pci_probe,
+ .driver = {
+ .name = "mt7620-pci",
+ .owner = THIS_MODULE,
+ .of_match_table = of_match_ptr(mt7620_pci_ids),
+ },
+};
+
+static int __init mt7620_pci_init(void)
+{
+ return platform_driver_register(&mt7620_pci_driver);
+}
+
+arch_initcall(mt7620_pci_init);
diff --git a/arch/mips/pic32/Kconfig b/arch/mips/pic32/Kconfig
new file mode 100644
index 000000000000..fde56a8b85ca
--- /dev/null
+++ b/arch/mips/pic32/Kconfig
@@ -0,0 +1,51 @@
+if MACH_PIC32
+
+choice
+ prompt "Machine Type"
+
+config PIC32MZDA
+ bool "Microchip PIC32MZDA Platform"
+ select BOOT_ELF32
+ select BOOT_RAW
+ select CEVT_R4K
+ select CSRC_R4K
+ select DMA_NONCOHERENT
+ select SYS_HAS_CPU_MIPS32_R2
+ select SYS_HAS_EARLY_PRINTK
+ select SYS_SUPPORTS_32BIT_KERNEL
+ select SYS_SUPPORTS_LITTLE_ENDIAN
+ select ARCH_REQUIRE_GPIOLIB
+ select HAVE_MACH_CLKDEV
+ select COMMON_CLK
+ select CLKDEV_LOOKUP
+ select LIBFDT
+ select USE_OF
+ select PINCTRL
+ select PIC32_EVIC
+ help
+ Support for the Microchip PIC32MZDA microcontroller.
+
+ This is a 32-bit microcontroller with support for external or
+ internally packaged DDR2 memory up to 128MB.
+
+ For more information, see <http://www.microchip.com/>.
+
+endchoice
+
+choice
+ prompt "Devicetree selection"
+ default DTB_PIC32_NONE
+ help
+ Select the devicetree.
+
+config DTB_PIC32_NONE
+ bool "None"
+
+config DTB_PIC32_MZDA_SK
+ bool "PIC32MZDA Starter Kit"
+ depends on PIC32MZDA
+ select BUILTIN_DTB
+
+endchoice
+
+endif # MACH_PIC32
diff --git a/arch/mips/pic32/Makefile b/arch/mips/pic32/Makefile
new file mode 100644
index 000000000000..fd357f49ac6c
--- /dev/null
+++ b/arch/mips/pic32/Makefile
@@ -0,0 +1,6 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc. All rights reserved.
+#
+obj-$(CONFIG_MACH_PIC32) += common/
+obj-$(CONFIG_PIC32MZDA) += pic32mzda/
diff --git a/arch/mips/pic32/Platform b/arch/mips/pic32/Platform
new file mode 100644
index 000000000000..cd2084f44507
--- /dev/null
+++ b/arch/mips/pic32/Platform
@@ -0,0 +1,7 @@
+#
+# PIC32MZDA
+#
+platform-$(CONFIG_PIC32MZDA) += pic32/
+cflags-$(CONFIG_PIC32MZDA) += -I$(srctree)/arch/mips/include/asm/mach-pic32
+load-$(CONFIG_PIC32MZDA) += 0xffffffff88000000
+all-$(CONFIG_PIC32MZDA) := $(COMPRESSION_FNAME).bin
diff --git a/arch/mips/pic32/common/Makefile b/arch/mips/pic32/common/Makefile
new file mode 100644
index 000000000000..be1909cc0467
--- /dev/null
+++ b/arch/mips/pic32/common/Makefile
@@ -0,0 +1,5 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc. All rights reserved.
+#
+obj-y = reset.o irq.o
diff --git a/arch/mips/pic32/common/irq.c b/arch/mips/pic32/common/irq.c
new file mode 100644
index 000000000000..6df347e36036
--- /dev/null
+++ b/arch/mips/pic32/common/irq.c
@@ -0,0 +1,21 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/irqchip.h>
+#include <asm/irq.h>
+
+void __init arch_init_irq(void)
+{
+ irqchip_init();
+}
diff --git a/arch/mips/pic32/common/reset.c b/arch/mips/pic32/common/reset.c
new file mode 100644
index 000000000000..83345757be5f
--- /dev/null
+++ b/arch/mips/pic32/common/reset.c
@@ -0,0 +1,62 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <asm/reboot.h>
+#include <asm/mach-pic32/pic32.h>
+
+#define PIC32_RSWRST 0x10
+
+static void pic32_halt(void)
+{
+ while (1) {
+ __asm__(".set push;\n"
+ ".set arch=r4000;\n"
+ "wait;\n"
+ ".set pop;\n"
+ );
+ }
+}
+
+static void pic32_machine_restart(char *command)
+{
+ void __iomem *reg =
+ ioremap(PIC32_BASE_RESET + PIC32_RSWRST, sizeof(u32));
+
+ pic32_syskey_unlock();
+
+ /* magic write/read */
+ __raw_writel(1, reg);
+ (void)__raw_readl(reg);
+
+ pic32_halt();
+}
+
+static void pic32_machine_halt(void)
+{
+ local_irq_disable();
+
+ pic32_halt();
+}
+
+static int __init mips_reboot_setup(void)
+{
+ _machine_restart = pic32_machine_restart;
+ _machine_halt = pic32_machine_halt;
+ pm_power_off = pic32_machine_halt;
+
+ return 0;
+}
+
+arch_initcall(mips_reboot_setup);
diff --git a/arch/mips/pic32/pic32mzda/Makefile b/arch/mips/pic32/pic32mzda/Makefile
new file mode 100644
index 000000000000..4a4c2728c027
--- /dev/null
+++ b/arch/mips/pic32/pic32mzda/Makefile
@@ -0,0 +1,9 @@
+#
+# Joshua Henderson, <joshua.henderson@microchip.com>
+# Copyright (C) 2015 Microchip Technology, Inc. All rights reserved.
+#
+obj-y := init.o time.o config.o
+
+obj-$(CONFIG_EARLY_PRINTK) += early_console.o \
+ early_pin.o \
+ early_clk.o
diff --git a/arch/mips/pic32/pic32mzda/config.c b/arch/mips/pic32/pic32mzda/config.c
new file mode 100644
index 000000000000..fe293a070003
--- /dev/null
+++ b/arch/mips/pic32/pic32mzda/config.c
@@ -0,0 +1,126 @@
+/*
+ * Purna Chandra Mandal, purna.mandal@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/of_platform.h>
+
+#include <asm/mach-pic32/pic32.h>
+
+#include "pic32mzda.h"
+
+#define PIC32_CFGCON 0x0000
+#define PIC32_DEVID 0x0020
+#define PIC32_SYSKEY 0x0030
+#define PIC32_CFGEBIA 0x00c0
+#define PIC32_CFGEBIC 0x00d0
+#define PIC32_CFGCON2 0x00f0
+#define PIC32_RCON 0x1240
+
+static void __iomem *pic32_conf_base;
+static DEFINE_SPINLOCK(config_lock);
+static u32 pic32_reset_status;
+
+static u32 pic32_conf_get_reg_field(u32 offset, u32 rshift, u32 mask)
+{
+ u32 v;
+
+ v = readl(pic32_conf_base + offset);
+ v >>= rshift;
+ v &= mask;
+
+ return v;
+}
+
+static u32 pic32_conf_modify_atomic(u32 offset, u32 mask, u32 set)
+{
+ u32 v;
+ unsigned long flags;
+
+ spin_lock_irqsave(&config_lock, flags);
+ v = readl(pic32_conf_base + offset);
+ v &= ~mask;
+ v |= (set & mask);
+ writel(v, pic32_conf_base + offset);
+ spin_unlock_irqrestore(&config_lock, flags);
+
+ return 0;
+}
+
+int pic32_enable_lcd(void)
+{
+ return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(31), BIT(31));
+}
+
+int pic32_disable_lcd(void)
+{
+ return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(31), 0);
+}
+
+int pic32_set_lcd_mode(int mode)
+{
+ u32 mask = mode ? BIT(30) : 0;
+
+ return pic32_conf_modify_atomic(PIC32_CFGCON2, BIT(30), mask);
+}
+
+int pic32_set_sdhci_adma_fifo_threshold(u32 rthrsh, u32 wthrsh)
+{
+ u32 clr, set;
+
+ clr = (0x3ff << 4) | (0x3ff << 16);
+ set = (rthrsh << 4) | (wthrsh << 16);
+ return pic32_conf_modify_atomic(PIC32_CFGCON2, clr, set);
+}
+
+void pic32_syskey_unlock_debug(const char *func, const ulong line)
+{
+ void __iomem *syskey = pic32_conf_base + PIC32_SYSKEY;
+
+ pr_debug("%s: called from %s:%lu\n", __func__, func, line);
+ writel(0x00000000, syskey);
+ writel(0xAA996655, syskey);
+ writel(0x556699AA, syskey);
+}
+
+static u32 pic32_get_device_id(void)
+{
+ return pic32_conf_get_reg_field(PIC32_DEVID, 0, 0x0fffffff);
+}
+
+static u32 pic32_get_device_version(void)
+{
+ return pic32_conf_get_reg_field(PIC32_DEVID, 28, 0xf);
+}
+
+u32 pic32_get_boot_status(void)
+{
+ return pic32_reset_status;
+}
+EXPORT_SYMBOL(pic32_get_boot_status);
+
+void __init pic32_config_init(void)
+{
+ pic32_conf_base = ioremap(PIC32_BASE_CONFIG, 0x110);
+ if (!pic32_conf_base)
+ panic("pic32: config base not mapped");
+
+ /* Boot Status */
+ pic32_reset_status = readl(pic32_conf_base + PIC32_RCON);
+ writel(-1, PIC32_CLR(pic32_conf_base + PIC32_RCON));
+
+ /* Device Inforation */
+ pr_info("Device Id: 0x%08x, Device Ver: 0x%04x\n",
+ pic32_get_device_id(),
+ pic32_get_device_version());
+}
diff --git a/arch/mips/pic32/pic32mzda/early_clk.c b/arch/mips/pic32/pic32mzda/early_clk.c
new file mode 100644
index 000000000000..96c090e9d637
--- /dev/null
+++ b/arch/mips/pic32/pic32mzda/early_clk.c
@@ -0,0 +1,106 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#include <asm/mach-pic32/pic32.h>
+
+#include "pic32mzda.h"
+
+/* Oscillators, PLL & clocks */
+#define ICLK_MASK 0x00000080
+#define PLLDIV_MASK 0x00000007
+#define CUROSC_MASK 0x00000007
+#define PLLMUL_MASK 0x0000007F
+#define PB_MASK 0x00000007
+#define FRC1 0
+#define FRC2 7
+#define SPLL 1
+#define POSC 2
+#define FRC_CLK 8000000
+
+#define PIC32_POSC_FREQ 24000000
+
+#define OSCCON 0x0000
+#define SPLLCON 0x0020
+#define PB1DIV 0x0140
+
+u32 pic32_get_sysclk(void)
+{
+ u32 osc_freq = 0;
+ u32 pllclk;
+ u32 frcdivn;
+ u32 osccon;
+ u32 spllcon;
+ int curr_osc;
+
+ u32 plliclk;
+ u32 pllidiv;
+ u32 pllodiv;
+ u32 pllmult;
+ u32 frcdiv;
+
+ void __iomem *osc_base = ioremap(PIC32_BASE_OSC, 0x200);
+
+ osccon = __raw_readl(osc_base + OSCCON);
+ spllcon = __raw_readl(osc_base + SPLLCON);
+
+ plliclk = (spllcon & ICLK_MASK);
+ pllidiv = ((spllcon >> 8) & PLLDIV_MASK) + 1;
+ pllodiv = ((spllcon >> 24) & PLLDIV_MASK);
+ pllmult = ((spllcon >> 16) & PLLMUL_MASK) + 1;
+ frcdiv = ((osccon >> 24) & PLLDIV_MASK);
+
+ pllclk = plliclk ? FRC_CLK : PIC32_POSC_FREQ;
+ frcdivn = ((1 << frcdiv) + 1) + (128 * (frcdiv == 7));
+
+ if (pllodiv < 2)
+ pllodiv = 2;
+ else if (pllodiv < 5)
+ pllodiv = (1 << pllodiv);
+ else
+ pllodiv = 32;
+
+ curr_osc = (int)((osccon >> 12) & CUROSC_MASK);
+
+ switch (curr_osc) {
+ case FRC1:
+ case FRC2:
+ osc_freq = FRC_CLK / frcdivn;
+ break;
+ case SPLL:
+ osc_freq = ((pllclk / pllidiv) * pllmult) / pllodiv;
+ break;
+ case POSC:
+ osc_freq = PIC32_POSC_FREQ;
+ break;
+ default:
+ break;
+ }
+
+ iounmap(osc_base);
+
+ return osc_freq;
+}
+
+u32 pic32_get_pbclk(int bus)
+{
+ u32 clk_freq;
+ void __iomem *osc_base = ioremap(PIC32_BASE_OSC, 0x200);
+ u32 pbxdiv = PB1DIV + ((bus - 1) * 0x10);
+ u32 pbdiv = (__raw_readl(osc_base + pbxdiv) & PB_MASK) + 1;
+
+ iounmap(osc_base);
+
+ clk_freq = pic32_get_sysclk();
+
+ return clk_freq / pbdiv;
+}
diff --git a/arch/mips/pic32/pic32mzda/early_console.c b/arch/mips/pic32/pic32mzda/early_console.c
new file mode 100644
index 000000000000..d7b783463fac
--- /dev/null
+++ b/arch/mips/pic32/pic32mzda/early_console.c
@@ -0,0 +1,171 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#include <asm/mach-pic32/pic32.h>
+#include <asm/fw/fw.h>
+
+#include "pic32mzda.h"
+#include "early_pin.h"
+
+/* Default early console parameters */
+#define EARLY_CONSOLE_PORT 1
+#define EARLY_CONSOLE_BAUDRATE 115200
+
+#define UART_ENABLE BIT(15)
+#define UART_ENABLE_RX BIT(12)
+#define UART_ENABLE_TX BIT(10)
+#define UART_TX_FULL BIT(9)
+
+/* UART1(x == 0) - UART6(x == 5) */
+#define UART_BASE(x) ((x) * 0x0200)
+#define U_MODE(x) UART_BASE(x)
+#define U_STA(x) (UART_BASE(x) + 0x10)
+#define U_TXR(x) (UART_BASE(x) + 0x20)
+#define U_BRG(x) (UART_BASE(x) + 0x40)
+
+static void __iomem *uart_base;
+static char console_port = -1;
+
+static int __init configure_uart_pins(int port)
+{
+ switch (port) {
+ case 1:
+ pic32_pps_input(IN_FUNC_U2RX, IN_RPB0);
+ pic32_pps_output(OUT_FUNC_U2TX, OUT_RPG9);
+ break;
+ case 5:
+ pic32_pps_input(IN_FUNC_U6RX, IN_RPD0);
+ pic32_pps_output(OUT_FUNC_U6TX, OUT_RPB8);
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+static void __init configure_uart(char port, int baud)
+{
+ u32 pbclk;
+
+ pbclk = pic32_get_pbclk(2);
+
+ __raw_writel(0, uart_base + U_MODE(port));
+ __raw_writel(((pbclk / baud) / 16) - 1, uart_base + U_BRG(port));
+ __raw_writel(UART_ENABLE, uart_base + U_MODE(port));
+ __raw_writel(UART_ENABLE_TX | UART_ENABLE_RX,
+ uart_base + PIC32_SET(U_STA(port)));
+}
+
+static void __init setup_early_console(char port, int baud)
+{
+ if (configure_uart_pins(port))
+ return;
+
+ console_port = port;
+ configure_uart(console_port, baud);
+}
+
+static char * __init pic32_getcmdline(void)
+{
+ /*
+ * arch_mem_init() has not been called yet, so we don't have a real
+ * command line setup if using CONFIG_CMDLINE_BOOL.
+ */
+#ifdef CONFIG_CMDLINE_OVERRIDE
+ return CONFIG_CMDLINE;
+#else
+ return fw_getcmdline();
+#endif
+}
+
+static int __init get_port_from_cmdline(char *arch_cmdline)
+{
+ char *s;
+ int port = -1;
+
+ if (!arch_cmdline || *arch_cmdline == '\0')
+ goto _out;
+
+ s = strstr(arch_cmdline, "earlyprintk=");
+ if (s) {
+ s = strstr(s, "ttyS");
+ if (s)
+ s += 4;
+ else
+ goto _out;
+
+ port = (*s) - '0';
+ }
+
+_out:
+ return port;
+}
+
+static int __init get_baud_from_cmdline(char *arch_cmdline)
+{
+ char *s;
+ int baud = -1;
+
+ if (!arch_cmdline || *arch_cmdline == '\0')
+ goto _out;
+
+ s = strstr(arch_cmdline, "earlyprintk=");
+ if (s) {
+ s = strstr(s, "ttyS");
+ if (s)
+ s += 6;
+ else
+ goto _out;
+
+ baud = 0;
+ while (*s >= '0' && *s <= '9')
+ baud = baud * 10 + *s++ - '0';
+ }
+
+_out:
+ return baud;
+}
+
+void __init fw_init_early_console(char port)
+{
+ char *arch_cmdline = pic32_getcmdline();
+ int baud = -1;
+
+ uart_base = ioremap_nocache(PIC32_BASE_UART, 0xc00);
+
+ baud = get_baud_from_cmdline(arch_cmdline);
+ if (port == -1)
+ port = get_port_from_cmdline(arch_cmdline);
+
+ if (port == -1)
+ port = EARLY_CONSOLE_PORT;
+
+ if (baud == -1)
+ baud = EARLY_CONSOLE_BAUDRATE;
+
+ setup_early_console(port, baud);
+}
+
+int prom_putchar(char c)
+{
+ if (console_port >= 0) {
+ while (__raw_readl(
+ uart_base + U_STA(console_port)) & UART_TX_FULL)
+ ;
+
+ __raw_writel(c, uart_base + U_TXR(console_port));
+ }
+
+ return 1;
+}
diff --git a/arch/mips/pic32/pic32mzda/early_pin.c b/arch/mips/pic32/pic32mzda/early_pin.c
new file mode 100644
index 000000000000..aa673f8023a8
--- /dev/null
+++ b/arch/mips/pic32/pic32mzda/early_pin.c
@@ -0,0 +1,275 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#include <asm/io.h>
+
+#include "early_pin.h"
+
+#define PPS_BASE 0x1f800000
+
+/* Input PPS Registers */
+#define INT1R 0x1404
+#define INT2R 0x1408
+#define INT3R 0x140C
+#define INT4R 0x1410
+#define T2CKR 0x1418
+#define T3CKR 0x141C
+#define T4CKR 0x1420
+#define T5CKR 0x1424
+#define T6CKR 0x1428
+#define T7CKR 0x142C
+#define T8CKR 0x1430
+#define T9CKR 0x1434
+#define IC1R 0x1438
+#define IC2R 0x143C
+#define IC3R 0x1440
+#define IC4R 0x1444
+#define IC5R 0x1448
+#define IC6R 0x144C
+#define IC7R 0x1450
+#define IC8R 0x1454
+#define IC9R 0x1458
+#define OCFAR 0x1460
+#define U1RXR 0x1468
+#define U1CTSR 0x146C
+#define U2RXR 0x1470
+#define U2CTSR 0x1474
+#define U3RXR 0x1478
+#define U3CTSR 0x147C
+#define U4RXR 0x1480
+#define U4CTSR 0x1484
+#define U5RXR 0x1488
+#define U5CTSR 0x148C
+#define U6RXR 0x1490
+#define U6CTSR 0x1494
+#define SDI1R 0x149C
+#define SS1R 0x14A0
+#define SDI2R 0x14A8
+#define SS2R 0x14AC
+#define SDI3R 0x14B4
+#define SS3R 0x14B8
+#define SDI4R 0x14C0
+#define SS4R 0x14C4
+#define SDI5R 0x14CC
+#define SS5R 0x14D0
+#define SDI6R 0x14D8
+#define SS6R 0x14DC
+#define C1RXR 0x14E0
+#define C2RXR 0x14E4
+#define REFCLKI1R 0x14E8
+#define REFCLKI3R 0x14F0
+#define REFCLKI4R 0x14F4
+
+static const struct
+{
+ int function;
+ int reg;
+} input_pin_reg[] = {
+ { IN_FUNC_INT3, INT3R },
+ { IN_FUNC_T2CK, T2CKR },
+ { IN_FUNC_T6CK, T6CKR },
+ { IN_FUNC_IC3, IC3R },
+ { IN_FUNC_IC7, IC7R },
+ { IN_FUNC_U1RX, U1RXR },
+ { IN_FUNC_U2CTS, U2CTSR },
+ { IN_FUNC_U5RX, U5RXR },
+ { IN_FUNC_U6CTS, U6CTSR },
+ { IN_FUNC_SDI1, SDI1R },
+ { IN_FUNC_SDI3, SDI3R },
+ { IN_FUNC_SDI5, SDI5R },
+ { IN_FUNC_SS6, SS6R },
+ { IN_FUNC_REFCLKI1, REFCLKI1R },
+ { IN_FUNC_INT4, INT4R },
+ { IN_FUNC_T5CK, T5CKR },
+ { IN_FUNC_T7CK, T7CKR },
+ { IN_FUNC_IC4, IC4R },
+ { IN_FUNC_IC8, IC8R },
+ { IN_FUNC_U3RX, U3RXR },
+ { IN_FUNC_U4CTS, U4CTSR },
+ { IN_FUNC_SDI2, SDI2R },
+ { IN_FUNC_SDI4, SDI4R },
+ { IN_FUNC_C1RX, C1RXR },
+ { IN_FUNC_REFCLKI4, REFCLKI4R },
+ { IN_FUNC_INT2, INT2R },
+ { IN_FUNC_T3CK, T3CKR },
+ { IN_FUNC_T8CK, T8CKR },
+ { IN_FUNC_IC2, IC2R },
+ { IN_FUNC_IC5, IC5R },
+ { IN_FUNC_IC9, IC9R },
+ { IN_FUNC_U1CTS, U1CTSR },
+ { IN_FUNC_U2RX, U2RXR },
+ { IN_FUNC_U5CTS, U5CTSR },
+ { IN_FUNC_SS1, SS1R },
+ { IN_FUNC_SS3, SS3R },
+ { IN_FUNC_SS4, SS4R },
+ { IN_FUNC_SS5, SS5R },
+ { IN_FUNC_C2RX, C2RXR },
+ { IN_FUNC_INT1, INT1R },
+ { IN_FUNC_T4CK, T4CKR },
+ { IN_FUNC_T9CK, T9CKR },
+ { IN_FUNC_IC1, IC1R },
+ { IN_FUNC_IC6, IC6R },
+ { IN_FUNC_U3CTS, U3CTSR },
+ { IN_FUNC_U4RX, U4RXR },
+ { IN_FUNC_U6RX, U6RXR },
+ { IN_FUNC_SS2, SS2R },
+ { IN_FUNC_SDI6, SDI6R },
+ { IN_FUNC_OCFA, OCFAR },
+ { IN_FUNC_REFCLKI3, REFCLKI3R },
+};
+
+void pic32_pps_input(int function, int pin)
+{
+ void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0xF4);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(input_pin_reg); i++) {
+ if (input_pin_reg[i].function == function) {
+ __raw_writel(pin, pps_base + input_pin_reg[i].reg);
+ return;
+ }
+ }
+
+ iounmap(pps_base);
+}
+
+/* Output PPS Registers */
+#define RPA14R 0x1538
+#define RPA15R 0x153C
+#define RPB0R 0x1540
+#define RPB1R 0x1544
+#define RPB2R 0x1548
+#define RPB3R 0x154C
+#define RPB5R 0x1554
+#define RPB6R 0x1558
+#define RPB7R 0x155C
+#define RPB8R 0x1560
+#define RPB9R 0x1564
+#define RPB10R 0x1568
+#define RPB14R 0x1578
+#define RPB15R 0x157C
+#define RPC1R 0x1584
+#define RPC2R 0x1588
+#define RPC3R 0x158C
+#define RPC4R 0x1590
+#define RPC13R 0x15B4
+#define RPC14R 0x15B8
+#define RPD0R 0x15C0
+#define RPD1R 0x15C4
+#define RPD2R 0x15C8
+#define RPD3R 0x15CC
+#define RPD4R 0x15D0
+#define RPD5R 0x15D4
+#define RPD6R 0x15D8
+#define RPD7R 0x15DC
+#define RPD9R 0x15E4
+#define RPD10R 0x15E8
+#define RPD11R 0x15EC
+#define RPD12R 0x15F0
+#define RPD14R 0x15F8
+#define RPD15R 0x15FC
+#define RPE3R 0x160C
+#define RPE5R 0x1614
+#define RPE8R 0x1620
+#define RPE9R 0x1624
+#define RPF0R 0x1640
+#define RPF1R 0x1644
+#define RPF2R 0x1648
+#define RPF3R 0x164C
+#define RPF4R 0x1650
+#define RPF5R 0x1654
+#define RPF8R 0x1660
+#define RPF12R 0x1670
+#define RPF13R 0x1674
+#define RPG0R 0x1680
+#define RPG1R 0x1684
+#define RPG6R 0x1698
+#define RPG7R 0x169C
+#define RPG8R 0x16A0
+#define RPG9R 0x16A4
+
+static const struct
+{
+ int pin;
+ int reg;
+} output_pin_reg[] = {
+ { OUT_RPD2, RPD2R },
+ { OUT_RPG8, RPG8R },
+ { OUT_RPF4, RPF4R },
+ { OUT_RPD10, RPD10R },
+ { OUT_RPF1, RPF1R },
+ { OUT_RPB9, RPB9R },
+ { OUT_RPB10, RPB10R },
+ { OUT_RPC14, RPC14R },
+ { OUT_RPB5, RPB5R },
+ { OUT_RPC1, RPC1R },
+ { OUT_RPD14, RPD14R },
+ { OUT_RPG1, RPG1R },
+ { OUT_RPA14, RPA14R },
+ { OUT_RPD6, RPD6R },
+ { OUT_RPD3, RPD3R },
+ { OUT_RPG7, RPG7R },
+ { OUT_RPF5, RPF5R },
+ { OUT_RPD11, RPD11R },
+ { OUT_RPF0, RPF0R },
+ { OUT_RPB1, RPB1R },
+ { OUT_RPE5, RPE5R },
+ { OUT_RPC13, RPC13R },
+ { OUT_RPB3, RPB3R },
+ { OUT_RPC4, RPC4R },
+ { OUT_RPD15, RPD15R },
+ { OUT_RPG0, RPG0R },
+ { OUT_RPA15, RPA15R },
+ { OUT_RPD7, RPD7R },
+ { OUT_RPD9, RPD9R },
+ { OUT_RPG6, RPG6R },
+ { OUT_RPB8, RPB8R },
+ { OUT_RPB15, RPB15R },
+ { OUT_RPD4, RPD4R },
+ { OUT_RPB0, RPB0R },
+ { OUT_RPE3, RPE3R },
+ { OUT_RPB7, RPB7R },
+ { OUT_RPF12, RPF12R },
+ { OUT_RPD12, RPD12R },
+ { OUT_RPF8, RPF8R },
+ { OUT_RPC3, RPC3R },
+ { OUT_RPE9, RPE9R },
+ { OUT_RPD1, RPD1R },
+ { OUT_RPG9, RPG9R },
+ { OUT_RPB14, RPB14R },
+ { OUT_RPD0, RPD0R },
+ { OUT_RPB6, RPB6R },
+ { OUT_RPD5, RPD5R },
+ { OUT_RPB2, RPB2R },
+ { OUT_RPF3, RPF3R },
+ { OUT_RPF13, RPF13R },
+ { OUT_RPC2, RPC2R },
+ { OUT_RPE8, RPE8R },
+ { OUT_RPF2, RPF2R },
+};
+
+void pic32_pps_output(int function, int pin)
+{
+ void __iomem *pps_base = ioremap_nocache(PPS_BASE, 0x170);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(output_pin_reg); i++) {
+ if (output_pin_reg[i].pin == pin) {
+ __raw_writel(function,
+ pps_base + output_pin_reg[i].reg);
+ return;
+ }
+ }
+
+ iounmap(pps_base);
+}
diff --git a/arch/mips/pic32/pic32mzda/early_pin.h b/arch/mips/pic32/pic32mzda/early_pin.h
new file mode 100644
index 000000000000..417fae9a9627
--- /dev/null
+++ b/arch/mips/pic32/pic32mzda/early_pin.h
@@ -0,0 +1,241 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef _PIC32MZDA_EARLY_PIN_H
+#define _PIC32MZDA_EARLY_PIN_H
+
+/*
+ * This is a complete, yet overly simplistic and unoptimized, PIC32MZDA PPS
+ * configuration only useful before we have full pinctrl initialized.
+ */
+
+/* Input PPS Functions */
+enum {
+ IN_FUNC_INT3,
+ IN_FUNC_T2CK,
+ IN_FUNC_T6CK,
+ IN_FUNC_IC3,
+ IN_FUNC_IC7,
+ IN_FUNC_U1RX,
+ IN_FUNC_U2CTS,
+ IN_FUNC_U5RX,
+ IN_FUNC_U6CTS,
+ IN_FUNC_SDI1,
+ IN_FUNC_SDI3,
+ IN_FUNC_SDI5,
+ IN_FUNC_SS6,
+ IN_FUNC_REFCLKI1,
+ IN_FUNC_INT4,
+ IN_FUNC_T5CK,
+ IN_FUNC_T7CK,
+ IN_FUNC_IC4,
+ IN_FUNC_IC8,
+ IN_FUNC_U3RX,
+ IN_FUNC_U4CTS,
+ IN_FUNC_SDI2,
+ IN_FUNC_SDI4,
+ IN_FUNC_C1RX,
+ IN_FUNC_REFCLKI4,
+ IN_FUNC_INT2,
+ IN_FUNC_T3CK,
+ IN_FUNC_T8CK,
+ IN_FUNC_IC2,
+ IN_FUNC_IC5,
+ IN_FUNC_IC9,
+ IN_FUNC_U1CTS,
+ IN_FUNC_U2RX,
+ IN_FUNC_U5CTS,
+ IN_FUNC_SS1,
+ IN_FUNC_SS3,
+ IN_FUNC_SS4,
+ IN_FUNC_SS5,
+ IN_FUNC_C2RX,
+ IN_FUNC_INT1,
+ IN_FUNC_T4CK,
+ IN_FUNC_T9CK,
+ IN_FUNC_IC1,
+ IN_FUNC_IC6,
+ IN_FUNC_U3CTS,
+ IN_FUNC_U4RX,
+ IN_FUNC_U6RX,
+ IN_FUNC_SS2,
+ IN_FUNC_SDI6,
+ IN_FUNC_OCFA,
+ IN_FUNC_REFCLKI3,
+};
+
+/* Input PPS Pins */
+#define IN_RPD2 0x00
+#define IN_RPG8 0x01
+#define IN_RPF4 0x02
+#define IN_RPD10 0x03
+#define IN_RPF1 0x04
+#define IN_RPB9 0x05
+#define IN_RPB10 0x06
+#define IN_RPC14 0x07
+#define IN_RPB5 0x08
+#define IN_RPC1 0x0A
+#define IN_RPD14 0x0B
+#define IN_RPG1 0x0C
+#define IN_RPA14 0x0D
+#define IN_RPD6 0x0E
+#define IN_RPD3 0x00
+#define IN_RPG7 0x01
+#define IN_RPF5 0x02
+#define IN_RPD11 0x03
+#define IN_RPF0 0x04
+#define IN_RPB1 0x05
+#define IN_RPE5 0x06
+#define IN_RPC13 0x07
+#define IN_RPB3 0x08
+#define IN_RPC4 0x0A
+#define IN_RPD15 0x0B
+#define IN_RPG0 0x0C
+#define IN_RPA15 0x0D
+#define IN_RPD7 0x0E
+#define IN_RPD9 0x00
+#define IN_RPG6 0x01
+#define IN_RPB8 0x02
+#define IN_RPB15 0x03
+#define IN_RPD4 0x04
+#define IN_RPB0 0x05
+#define IN_RPE3 0x06
+#define IN_RPB7 0x07
+#define IN_RPF12 0x09
+#define IN_RPD12 0x0A
+#define IN_RPF8 0x0B
+#define IN_RPC3 0x0C
+#define IN_RPE9 0x0D
+#define IN_RPD1 0x00
+#define IN_RPG9 0x01
+#define IN_RPB14 0x02
+#define IN_RPD0 0x03
+#define IN_RPB6 0x05
+#define IN_RPD5 0x06
+#define IN_RPB2 0x07
+#define IN_RPF3 0x08
+#define IN_RPF13 0x09
+#define IN_RPF2 0x0B
+#define IN_RPC2 0x0C
+#define IN_RPE8 0x0D
+
+/* Output PPS Pins */
+enum {
+ OUT_RPD2,
+ OUT_RPG8,
+ OUT_RPF4,
+ OUT_RPD10,
+ OUT_RPF1,
+ OUT_RPB9,
+ OUT_RPB10,
+ OUT_RPC14,
+ OUT_RPB5,
+ OUT_RPC1,
+ OUT_RPD14,
+ OUT_RPG1,
+ OUT_RPA14,
+ OUT_RPD6,
+ OUT_RPD3,
+ OUT_RPG7,
+ OUT_RPF5,
+ OUT_RPD11,
+ OUT_RPF0,
+ OUT_RPB1,
+ OUT_RPE5,
+ OUT_RPC13,
+ OUT_RPB3,
+ OUT_RPC4,
+ OUT_RPD15,
+ OUT_RPG0,
+ OUT_RPA15,
+ OUT_RPD7,
+ OUT_RPD9,
+ OUT_RPG6,
+ OUT_RPB8,
+ OUT_RPB15,
+ OUT_RPD4,
+ OUT_RPB0,
+ OUT_RPE3,
+ OUT_RPB7,
+ OUT_RPF12,
+ OUT_RPD12,
+ OUT_RPF8,
+ OUT_RPC3,
+ OUT_RPE9,
+ OUT_RPD1,
+ OUT_RPG9,
+ OUT_RPB14,
+ OUT_RPD0,
+ OUT_RPB6,
+ OUT_RPD5,
+ OUT_RPB2,
+ OUT_RPF3,
+ OUT_RPF13,
+ OUT_RPC2,
+ OUT_RPE8,
+ OUT_RPF2,
+};
+
+/* Output PPS Functions */
+#define OUT_FUNC_U3TX 0x01
+#define OUT_FUNC_U4RTS 0x02
+#define OUT_FUNC_SDO1 0x05
+#define OUT_FUNC_SDO2 0x06
+#define OUT_FUNC_SDO3 0x07
+#define OUT_FUNC_SDO5 0x09
+#define OUT_FUNC_SS6 0x0A
+#define OUT_FUNC_OC3 0x0B
+#define OUT_FUNC_OC6 0x0C
+#define OUT_FUNC_REFCLKO4 0x0D
+#define OUT_FUNC_C2OUT 0x0E
+#define OUT_FUNC_C1TX 0x0F
+#define OUT_FUNC_U1TX 0x01
+#define OUT_FUNC_U2RTS 0x02
+#define OUT_FUNC_U5TX 0x03
+#define OUT_FUNC_U6RTS 0x04
+#define OUT_FUNC_SDO1 0x05
+#define OUT_FUNC_SDO2 0x06
+#define OUT_FUNC_SDO3 0x07
+#define OUT_FUNC_SDO4 0x08
+#define OUT_FUNC_SDO5 0x09
+#define OUT_FUNC_OC4 0x0B
+#define OUT_FUNC_OC7 0x0C
+#define OUT_FUNC_REFCLKO1 0x0F
+#define OUT_FUNC_U3RTS 0x01
+#define OUT_FUNC_U4TX 0x02
+#define OUT_FUNC_U6TX 0x04
+#define OUT_FUNC_SS1 0x05
+#define OUT_FUNC_SS3 0x07
+#define OUT_FUNC_SS4 0x08
+#define OUT_FUNC_SS5 0x09
+#define OUT_FUNC_SDO6 0x0A
+#define OUT_FUNC_OC5 0x0B
+#define OUT_FUNC_OC8 0x0C
+#define OUT_FUNC_C1OUT 0x0E
+#define OUT_FUNC_REFCLKO3 0x0F
+#define OUT_FUNC_U1RTS 0x01
+#define OUT_FUNC_U2TX 0x02
+#define OUT_FUNC_U5RTS 0x03
+#define OUT_FUNC_U6TX 0x04
+#define OUT_FUNC_SS2 0x06
+#define OUT_FUNC_SDO4 0x08
+#define OUT_FUNC_SDO6 0x0A
+#define OUT_FUNC_OC2 0x0B
+#define OUT_FUNC_OC1 0x0C
+#define OUT_FUNC_OC9 0x0D
+#define OUT_FUNC_C2TX 0x0F
+
+void pic32_pps_input(int function, int pin);
+void pic32_pps_output(int function, int pin);
+
+#endif
diff --git a/arch/mips/pic32/pic32mzda/init.c b/arch/mips/pic32/pic32mzda/init.c
new file mode 100644
index 000000000000..775ff90a9962
--- /dev/null
+++ b/arch/mips/pic32/pic32mzda/init.c
@@ -0,0 +1,156 @@
+/*
+ * Joshua Henderson, joshua.henderson@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/platform_data/sdhci-pic32.h>
+
+#include <asm/fw/fw.h>
+#include <asm/mips-boards/generic.h>
+#include <asm/prom.h>
+
+#include "pic32mzda.h"
+
+const char *get_system_type(void)
+{
+ return "PIC32MZDA";
+}
+
+static ulong get_fdtaddr(void)
+{
+ ulong ftaddr = 0;
+
+ if ((fw_arg0 == -2) && fw_arg1 && !fw_arg2 && !fw_arg3)
+ return (ulong)fw_arg1;
+
+ if (__dtb_start < __dtb_end)
+ ftaddr = (ulong)__dtb_start;
+
+ return ftaddr;
+}
+
+void __init plat_mem_setup(void)
+{
+ void *dtb;
+
+ dtb = (void *)get_fdtaddr();
+ if (!dtb) {
+ pr_err("pic32: no DTB found.\n");
+ return;
+ }
+
+ /*
+ * Load the builtin device tree. This causes the chosen node to be
+ * parsed resulting in our memory appearing.
+ */
+ __dt_setup_arch(dtb);
+
+ pr_info("Found following command lines\n");
+ pr_info(" boot_command_line: %s\n", boot_command_line);
+ pr_info(" arcs_cmdline : %s\n", arcs_cmdline);
+#ifdef CONFIG_CMDLINE_BOOL
+ pr_info(" builtin_cmdline : %s\n", CONFIG_CMDLINE);
+#endif
+ if (dtb != __dtb_start)
+ strlcpy(arcs_cmdline, boot_command_line, COMMAND_LINE_SIZE);
+
+#ifdef CONFIG_EARLY_PRINTK
+ fw_init_early_console(-1);
+#endif
+ pic32_config_init();
+}
+
+static __init void pic32_init_cmdline(int argc, char *argv[])
+{
+ unsigned int count = COMMAND_LINE_SIZE - 1;
+ int i;
+ char *dst = &(arcs_cmdline[0]);
+ char *src;
+
+ for (i = 1; i < argc && count; ++i) {
+ src = argv[i];
+ while (*src && count) {
+ *dst++ = *src++;
+ --count;
+ }
+ *dst++ = ' ';
+ }
+ if (i > 1)
+ --dst;
+
+ *dst = 0;
+}
+
+void __init prom_init(void)
+{
+ pic32_init_cmdline((int)fw_arg0, (char **)fw_arg1);
+}
+
+void __init prom_free_prom_memory(void)
+{
+}
+
+void __init device_tree_init(void)
+{
+ if (!initial_boot_params)
+ return;
+
+ unflatten_and_copy_device_tree();
+}
+
+static struct pic32_sdhci_platform_data sdhci_data = {
+ .setup_dma = pic32_set_sdhci_adma_fifo_threshold,
+};
+
+static struct of_dev_auxdata pic32_auxdata_lookup[] __initdata = {
+ OF_DEV_AUXDATA("microchip,pic32mzda-sdhci", 0, "sdhci", &sdhci_data),
+ { /* sentinel */}
+};
+
+static int __init pic32_of_prepare_platform_data(struct of_dev_auxdata *lookup)
+{
+ struct device_node *root, *np;
+ struct resource res;
+
+ root = of_find_node_by_path("/");
+
+ for (; lookup->compatible; lookup++) {
+ np = of_find_compatible_node(NULL, NULL, lookup->compatible);
+ if (np) {
+ lookup->name = (char *)np->name;
+ if (lookup->phys_addr)
+ continue;
+ if (!of_address_to_resource(np, 0, &res))
+ lookup->phys_addr = res.start;
+ }
+ }
+
+ return 0;
+}
+
+static int __init plat_of_setup(void)
+{
+ if (!of_have_populated_dt())
+ panic("Device tree not present");
+
+ pic32_of_prepare_platform_data(pic32_auxdata_lookup);
+ if (of_platform_populate(NULL, of_default_bus_match_table,
+ pic32_auxdata_lookup, NULL))
+ panic("Failed to populate DT");
+
+ return 0;
+}
+arch_initcall(plat_of_setup);
diff --git a/arch/mips/pic32/pic32mzda/pic32mzda.h b/arch/mips/pic32/pic32mzda/pic32mzda.h
new file mode 100644
index 000000000000..96d10e2af475
--- /dev/null
+++ b/arch/mips/pic32/pic32mzda/pic32mzda.h
@@ -0,0 +1,29 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef PIC32MZDA_COMMON_H
+#define PIC32MZDA_COMMON_H
+
+/* early clock */
+u32 pic32_get_pbclk(int bus);
+u32 pic32_get_sysclk(void);
+
+/* Device configuration */
+void __init pic32_config_init(void);
+int pic32_set_lcd_mode(int mode);
+int pic32_set_sdhci_adma_fifo_threshold(u32 rthrs, u32 wthrs);
+u32 pic32_get_boot_status(void);
+int pic32_disable_lcd(void);
+int pic32_enable_lcd(void);
+
+#endif
diff --git a/arch/mips/pic32/pic32mzda/time.c b/arch/mips/pic32/pic32mzda/time.c
new file mode 100644
index 000000000000..ca6a62bb10db
--- /dev/null
+++ b/arch/mips/pic32/pic32mzda/time.c
@@ -0,0 +1,73 @@
+/*
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/irqdomain.h>
+
+#include <asm/time.h>
+
+#include "pic32mzda.h"
+
+static const struct of_device_id pic32_infra_match[] = {
+ { .compatible = "microchip,pic32mzda-infra", },
+ { },
+};
+
+#define DEFAULT_CORE_TIMER_INTERRUPT 0
+
+static unsigned int pic32_xlate_core_timer_irq(void)
+{
+ static struct device_node *node;
+ unsigned int irq;
+
+ node = of_find_matching_node(NULL, pic32_infra_match);
+
+ if (WARN_ON(!node))
+ goto default_map;
+
+ irq = irq_of_parse_and_map(node, 0);
+ if (!irq)
+ goto default_map;
+
+ return irq;
+
+default_map:
+
+ return irq_create_mapping(NULL, DEFAULT_CORE_TIMER_INTERRUPT);
+}
+
+unsigned int get_c0_compare_int(void)
+{
+ return pic32_xlate_core_timer_irq();
+}
+
+void __init plat_time_init(void)
+{
+ struct clk *clk;
+
+ of_clk_init(NULL);
+ clk = clk_get_sys("cpu_clk", NULL);
+ if (IS_ERR(clk))
+ panic("unable to get CPU clock, err=%ld", PTR_ERR(clk));
+
+ clk_prepare_enable(clk);
+ pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000);
+ mips_hpt_frequency = clk_get_rate(clk) / 2;
+
+ clocksource_probe();
+}
diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig
index e9bc8c96174e..813826a456ca 100644
--- a/arch/mips/ralink/Kconfig
+++ b/arch/mips/ralink/Kconfig
@@ -12,6 +12,11 @@ config RALINK_ILL_ACC
depends on SOC_RT305X
default y
+config IRQ_INTC
+ bool
+ default y
+ depends on !SOC_MT7621
+
choice
prompt "Ralink SoC selection"
default SOC_RT305X
@@ -33,7 +38,18 @@ choice
config SOC_MT7620
bool "MT7620/8"
+ select HW_HAS_PCI
+ config SOC_MT7621
+ bool "MT7621"
+ select MIPS_CPU_SCACHE
+ select SYS_SUPPORTS_MULTITHREADING
+ select SYS_SUPPORTS_SMP
+ select SYS_SUPPORTS_MIPS_CPS
+ select MIPS_GIC
+ select COMMON_CLK
+ select CLKSRC_MIPS_GIC
+ select HW_HAS_PCI
endchoice
choice
diff --git a/arch/mips/ralink/Makefile b/arch/mips/ralink/Makefile
index a6c9d0061326..0d1795a0321e 100644
--- a/arch/mips/ralink/Makefile
+++ b/arch/mips/ralink/Makefile
@@ -6,16 +6,24 @@
# Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
# Copyright (C) 2013 John Crispin <blogic@openwrt.org>
-obj-y := prom.o of.o reset.o clk.o irq.o timer.o
+obj-y := prom.o of.o reset.o
+
+ifndef CONFIG_MIPS_GIC
+ obj-y += clk.o timer.o
+endif
obj-$(CONFIG_CLKEVT_RT3352) += cevt-rt3352.o
obj-$(CONFIG_RALINK_ILL_ACC) += ill_acc.o
+obj-$(CONFIG_IRQ_INTC) += irq.o
+obj-$(CONFIG_MIPS_GIC) += irq-gic.o timer-gic.o
+
obj-$(CONFIG_SOC_RT288X) += rt288x.o
obj-$(CONFIG_SOC_RT305X) += rt305x.o
obj-$(CONFIG_SOC_RT3883) += rt3883.o
obj-$(CONFIG_SOC_MT7620) += mt7620.o
+obj-$(CONFIG_SOC_MT7621) += mt7621.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
diff --git a/arch/mips/ralink/Platform b/arch/mips/ralink/Platform
index 6d9c8c499f98..6095fcc334f4 100644
--- a/arch/mips/ralink/Platform
+++ b/arch/mips/ralink/Platform
@@ -27,3 +27,8 @@ cflags-$(CONFIG_SOC_RT3883) += -I$(srctree)/arch/mips/include/asm/mach-ralink/rt
#
load-$(CONFIG_SOC_MT7620) += 0xffffffff80000000
cflags-$(CONFIG_SOC_MT7620) += -I$(srctree)/arch/mips/include/asm/mach-ralink/mt7620
+
+# Ralink MT7621
+#
+load-$(CONFIG_SOC_MT7621) += 0xffffffff80001000
+cflags-$(CONFIG_SOC_MT7621) += -I$(srctree)/arch/mips/include/asm/mach-ralink/mt7621
diff --git a/arch/mips/ralink/irq-gic.c b/arch/mips/ralink/irq-gic.c
new file mode 100644
index 000000000000..50d6c55ab1de
--- /dev/null
+++ b/arch/mips/ralink/irq-gic.c
@@ -0,0 +1,25 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+
+#include <linux/of.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/mips-gic.h>
+
+int get_c0_perfcount_int(void)
+{
+ return gic_get_c0_perfcount_int();
+}
+EXPORT_SYMBOL_GPL(get_c0_perfcount_int);
+
+void __init arch_init_irq(void)
+{
+ irqchip_init();
+}
diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c
index dfb04fcedb04..0d3d1a97895f 100644
--- a/arch/mips/ralink/mt7620.c
+++ b/arch/mips/ralink/mt7620.c
@@ -107,31 +107,31 @@ static struct rt2880_pmx_group mt7620a_pinmux_data[] = {
};
static struct rt2880_pmx_func pwm1_grp_mt7628[] = {
- FUNC("sdcx", 3, 19, 1),
+ FUNC("sdxc d6", 3, 19, 1),
FUNC("utif", 2, 19, 1),
FUNC("gpio", 1, 19, 1),
- FUNC("pwm", 0, 19, 1),
+ FUNC("pwm1", 0, 19, 1),
};
static struct rt2880_pmx_func pwm0_grp_mt7628[] = {
- FUNC("sdcx", 3, 18, 1),
+ FUNC("sdxc d7", 3, 18, 1),
FUNC("utif", 2, 18, 1),
FUNC("gpio", 1, 18, 1),
- FUNC("pwm", 0, 18, 1),
+ FUNC("pwm0", 0, 18, 1),
};
static struct rt2880_pmx_func uart2_grp_mt7628[] = {
- FUNC("sdcx", 3, 20, 2),
+ FUNC("sdxc d5 d4", 3, 20, 2),
FUNC("pwm", 2, 20, 2),
FUNC("gpio", 1, 20, 2),
- FUNC("uart", 0, 20, 2),
+ FUNC("uart2", 0, 20, 2),
};
static struct rt2880_pmx_func uart1_grp_mt7628[] = {
- FUNC("sdcx", 3, 45, 2),
+ FUNC("sw_r", 3, 45, 2),
FUNC("pwm", 2, 45, 2),
FUNC("gpio", 1, 45, 2),
- FUNC("uart", 0, 45, 2),
+ FUNC("uart1", 0, 45, 2),
};
static struct rt2880_pmx_func i2c_grp_mt7628[] = {
@@ -143,21 +143,21 @@ static struct rt2880_pmx_func i2c_grp_mt7628[] = {
static struct rt2880_pmx_func refclk_grp_mt7628[] = { FUNC("reclk", 0, 36, 1) };
static struct rt2880_pmx_func perst_grp_mt7628[] = { FUNC("perst", 0, 37, 1) };
-static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 15, 38) };
+static struct rt2880_pmx_func wdt_grp_mt7628[] = { FUNC("wdt", 0, 38, 1) };
static struct rt2880_pmx_func spi_grp_mt7628[] = { FUNC("spi", 0, 7, 4) };
static struct rt2880_pmx_func sd_mode_grp_mt7628[] = {
FUNC("jtag", 3, 22, 8),
FUNC("utif", 2, 22, 8),
FUNC("gpio", 1, 22, 8),
- FUNC("sdcx", 0, 22, 8),
+ FUNC("sdxc", 0, 22, 8),
};
static struct rt2880_pmx_func uart0_grp_mt7628[] = {
FUNC("-", 3, 12, 2),
FUNC("-", 2, 12, 2),
FUNC("gpio", 1, 12, 2),
- FUNC("uart", 0, 12, 2),
+ FUNC("uart0", 0, 12, 2),
};
static struct rt2880_pmx_func i2s_grp_mt7628[] = {
@@ -171,7 +171,7 @@ static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = {
FUNC("-", 3, 6, 1),
FUNC("refclk", 2, 6, 1),
FUNC("gpio", 1, 6, 1),
- FUNC("spi", 0, 6, 1),
+ FUNC("spi cs1", 0, 6, 1),
};
static struct rt2880_pmx_func spis_grp_mt7628[] = {
@@ -188,28 +188,44 @@ static struct rt2880_pmx_func gpio_grp_mt7628[] = {
FUNC("gpio", 0, 11, 1),
};
-#define MT7628_GPIO_MODE_MASK 0x3
-
-#define MT7628_GPIO_MODE_PWM1 30
-#define MT7628_GPIO_MODE_PWM0 28
-#define MT7628_GPIO_MODE_UART2 26
-#define MT7628_GPIO_MODE_UART1 24
-#define MT7628_GPIO_MODE_I2C 20
-#define MT7628_GPIO_MODE_REFCLK 18
-#define MT7628_GPIO_MODE_PERST 16
-#define MT7628_GPIO_MODE_WDT 14
-#define MT7628_GPIO_MODE_SPI 12
-#define MT7628_GPIO_MODE_SDMODE 10
-#define MT7628_GPIO_MODE_UART0 8
-#define MT7628_GPIO_MODE_I2S 6
-#define MT7628_GPIO_MODE_CS1 4
-#define MT7628_GPIO_MODE_SPIS 2
-#define MT7628_GPIO_MODE_GPIO 0
+static struct rt2880_pmx_func wled_kn_grp_mt7628[] = {
+ FUNC("rsvd", 3, 35, 1),
+ FUNC("rsvd", 2, 35, 1),
+ FUNC("gpio", 1, 35, 1),
+ FUNC("wled_kn", 0, 35, 1),
+};
+
+static struct rt2880_pmx_func wled_an_grp_mt7628[] = {
+ FUNC("rsvd", 3, 35, 1),
+ FUNC("rsvd", 2, 35, 1),
+ FUNC("gpio", 1, 35, 1),
+ FUNC("wled_an", 0, 35, 1),
+};
+
+#define MT7628_GPIO_MODE_MASK 0x3
+
+#define MT7628_GPIO_MODE_WLED_KN 48
+#define MT7628_GPIO_MODE_WLED_AN 32
+#define MT7628_GPIO_MODE_PWM1 30
+#define MT7628_GPIO_MODE_PWM0 28
+#define MT7628_GPIO_MODE_UART2 26
+#define MT7628_GPIO_MODE_UART1 24
+#define MT7628_GPIO_MODE_I2C 20
+#define MT7628_GPIO_MODE_REFCLK 18
+#define MT7628_GPIO_MODE_PERST 16
+#define MT7628_GPIO_MODE_WDT 14
+#define MT7628_GPIO_MODE_SPI 12
+#define MT7628_GPIO_MODE_SDMODE 10
+#define MT7628_GPIO_MODE_UART0 8
+#define MT7628_GPIO_MODE_I2S 6
+#define MT7628_GPIO_MODE_CS1 4
+#define MT7628_GPIO_MODE_SPIS 2
+#define MT7628_GPIO_MODE_GPIO 0
static struct rt2880_pmx_group mt7628an_pinmux_data[] = {
GRP_G("pmw1", pwm1_grp_mt7628, MT7628_GPIO_MODE_MASK,
1, MT7628_GPIO_MODE_PWM1),
- GRP_G("pmw1", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK,
+ GRP_G("pmw0", pwm0_grp_mt7628, MT7628_GPIO_MODE_MASK,
1, MT7628_GPIO_MODE_PWM0),
GRP_G("uart2", uart2_grp_mt7628, MT7628_GPIO_MODE_MASK,
1, MT7628_GPIO_MODE_UART2),
@@ -233,6 +249,10 @@ static struct rt2880_pmx_group mt7628an_pinmux_data[] = {
1, MT7628_GPIO_MODE_SPIS),
GRP_G("gpio", gpio_grp_mt7628, MT7628_GPIO_MODE_MASK,
1, MT7628_GPIO_MODE_GPIO),
+ GRP_G("wled_an", wled_an_grp_mt7628, MT7628_GPIO_MODE_MASK,
+ 1, MT7628_GPIO_MODE_WLED_AN),
+ GRP_G("wled_kn", wled_kn_grp_mt7628, MT7628_GPIO_MODE_MASK,
+ 1, MT7628_GPIO_MODE_WLED_KN),
{ 0 }
};
@@ -436,10 +456,13 @@ void __init ralink_clk_init(void)
ralink_clk_add("10000100.timer", periph_rate);
ralink_clk_add("10000120.watchdog", periph_rate);
ralink_clk_add("10000b00.spi", sys_rate);
+ ralink_clk_add("10000b40.spi", sys_rate);
ralink_clk_add("10000c00.uartlite", periph_rate);
+ ralink_clk_add("10000d00.uart1", periph_rate);
+ ralink_clk_add("10000e00.uart2", periph_rate);
ralink_clk_add("10180000.wmac", xtal_rate);
- if (IS_ENABLED(CONFIG_USB) && is_mt76x8()) {
+ if (IS_ENABLED(CONFIG_USB) && !is_mt76x8()) {
/*
* When the CPU goes into sleep mode, the BUS clock will be
* too low for USB to function properly. Adjust the busses
@@ -552,7 +575,7 @@ void prom_soc_init(struct ralink_soc_info *soc_info)
}
snprintf(soc_info->sys_type, RAMIPS_SYS_TYPE_LEN,
- "Ralink %s ver:%u eco:%u",
+ "MediaTek %s ver:%u eco:%u",
name,
(rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK,
(rev & CHIP_REV_ECO_MASK));
diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c
new file mode 100644
index 000000000000..e9b9fa3e1e51
--- /dev/null
+++ b/arch/mips/ralink/mt7621.c
@@ -0,0 +1,226 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/mipsregs.h>
+#include <asm/smp-ops.h>
+#include <asm/mips-cm.h>
+#include <asm/mips-cpc.h>
+#include <asm/mach-ralink/ralink_regs.h>
+#include <asm/mach-ralink/mt7621.h>
+
+#include <pinmux.h>
+
+#include "common.h"
+
+#define SYSC_REG_SYSCFG 0x10
+#define SYSC_REG_CPLL_CLKCFG0 0x2c
+#define SYSC_REG_CUR_CLK_STS 0x44
+#define CPU_CLK_SEL (BIT(30) | BIT(31))
+
+#define MT7621_GPIO_MODE_UART1 1
+#define MT7621_GPIO_MODE_I2C 2
+#define MT7621_GPIO_MODE_UART3_MASK 0x3
+#define MT7621_GPIO_MODE_UART3_SHIFT 3
+#define MT7621_GPIO_MODE_UART3_GPIO 1
+#define MT7621_GPIO_MODE_UART2_MASK 0x3
+#define MT7621_GPIO_MODE_UART2_SHIFT 5
+#define MT7621_GPIO_MODE_UART2_GPIO 1
+#define MT7621_GPIO_MODE_JTAG 7
+#define MT7621_GPIO_MODE_WDT_MASK 0x3
+#define MT7621_GPIO_MODE_WDT_SHIFT 8
+#define MT7621_GPIO_MODE_WDT_GPIO 1
+#define MT7621_GPIO_MODE_PCIE_RST 0
+#define MT7621_GPIO_MODE_PCIE_REF 2
+#define MT7621_GPIO_MODE_PCIE_MASK 0x3
+#define MT7621_GPIO_MODE_PCIE_SHIFT 10
+#define MT7621_GPIO_MODE_PCIE_GPIO 1
+#define MT7621_GPIO_MODE_MDIO_MASK 0x3
+#define MT7621_GPIO_MODE_MDIO_SHIFT 12
+#define MT7621_GPIO_MODE_MDIO_GPIO 1
+#define MT7621_GPIO_MODE_RGMII1 14
+#define MT7621_GPIO_MODE_RGMII2 15
+#define MT7621_GPIO_MODE_SPI_MASK 0x3
+#define MT7621_GPIO_MODE_SPI_SHIFT 16
+#define MT7621_GPIO_MODE_SPI_GPIO 1
+#define MT7621_GPIO_MODE_SDHCI_MASK 0x3
+#define MT7621_GPIO_MODE_SDHCI_SHIFT 18
+#define MT7621_GPIO_MODE_SDHCI_GPIO 1
+
+static struct rt2880_pmx_func uart1_grp[] = { FUNC("uart1", 0, 1, 2) };
+static struct rt2880_pmx_func i2c_grp[] = { FUNC("i2c", 0, 3, 2) };
+static struct rt2880_pmx_func uart3_grp[] = {
+ FUNC("uart3", 0, 5, 4),
+ FUNC("i2s", 2, 5, 4),
+ FUNC("spdif3", 3, 5, 4),
+};
+static struct rt2880_pmx_func uart2_grp[] = {
+ FUNC("uart2", 0, 9, 4),
+ FUNC("pcm", 2, 9, 4),
+ FUNC("spdif2", 3, 9, 4),
+};
+static struct rt2880_pmx_func jtag_grp[] = { FUNC("jtag", 0, 13, 5) };
+static struct rt2880_pmx_func wdt_grp[] = {
+ FUNC("wdt rst", 0, 18, 1),
+ FUNC("wdt refclk", 2, 18, 1),
+};
+static struct rt2880_pmx_func pcie_rst_grp[] = {
+ FUNC("pcie rst", MT7621_GPIO_MODE_PCIE_RST, 19, 1),
+ FUNC("pcie refclk", MT7621_GPIO_MODE_PCIE_REF, 19, 1)
+};
+static struct rt2880_pmx_func mdio_grp[] = { FUNC("mdio", 0, 20, 2) };
+static struct rt2880_pmx_func rgmii2_grp[] = { FUNC("rgmii2", 0, 22, 12) };
+static struct rt2880_pmx_func spi_grp[] = {
+ FUNC("spi", 0, 34, 7),
+ FUNC("nand1", 2, 34, 7),
+};
+static struct rt2880_pmx_func sdhci_grp[] = {
+ FUNC("sdhci", 0, 41, 8),
+ FUNC("nand2", 2, 41, 8),
+};
+static struct rt2880_pmx_func rgmii1_grp[] = { FUNC("rgmii1", 0, 49, 12) };
+
+static struct rt2880_pmx_group mt7621_pinmux_data[] = {
+ GRP("uart1", uart1_grp, 1, MT7621_GPIO_MODE_UART1),
+ GRP("i2c", i2c_grp, 1, MT7621_GPIO_MODE_I2C),
+ GRP_G("uart3", uart3_grp, MT7621_GPIO_MODE_UART3_MASK,
+ MT7621_GPIO_MODE_UART3_GPIO, MT7621_GPIO_MODE_UART3_SHIFT),
+ GRP_G("uart2", uart2_grp, MT7621_GPIO_MODE_UART2_MASK,
+ MT7621_GPIO_MODE_UART2_GPIO, MT7621_GPIO_MODE_UART2_SHIFT),
+ GRP("jtag", jtag_grp, 1, MT7621_GPIO_MODE_JTAG),
+ GRP_G("wdt", wdt_grp, MT7621_GPIO_MODE_WDT_MASK,
+ MT7621_GPIO_MODE_WDT_GPIO, MT7621_GPIO_MODE_WDT_SHIFT),
+ GRP_G("pcie", pcie_rst_grp, MT7621_GPIO_MODE_PCIE_MASK,
+ MT7621_GPIO_MODE_PCIE_GPIO, MT7621_GPIO_MODE_PCIE_SHIFT),
+ GRP_G("mdio", mdio_grp, MT7621_GPIO_MODE_MDIO_MASK,
+ MT7621_GPIO_MODE_MDIO_GPIO, MT7621_GPIO_MODE_MDIO_SHIFT),
+ GRP("rgmii2", rgmii2_grp, 1, MT7621_GPIO_MODE_RGMII2),
+ GRP_G("spi", spi_grp, MT7621_GPIO_MODE_SPI_MASK,
+ MT7621_GPIO_MODE_SPI_GPIO, MT7621_GPIO_MODE_SPI_SHIFT),
+ GRP_G("sdhci", sdhci_grp, MT7621_GPIO_MODE_SDHCI_MASK,
+ MT7621_GPIO_MODE_SDHCI_GPIO, MT7621_GPIO_MODE_SDHCI_SHIFT),
+ GRP("rgmii1", rgmii1_grp, 1, MT7621_GPIO_MODE_RGMII1),
+ { 0 }
+};
+
+phys_addr_t mips_cpc_default_phys_base(void)
+{
+ panic("Cannot detect cpc address");
+}
+
+void __init ralink_clk_init(void)
+{
+ int cpu_fdiv = 0;
+ int cpu_ffrac = 0;
+ int fbdiv = 0;
+ u32 clk_sts, syscfg;
+ u8 clk_sel = 0, xtal_mode;
+ u32 cpu_clk;
+
+ if ((rt_sysc_r32(SYSC_REG_CPLL_CLKCFG0) & CPU_CLK_SEL) != 0)
+ clk_sel = 1;
+
+ switch (clk_sel) {
+ case 0:
+ clk_sts = rt_sysc_r32(SYSC_REG_CUR_CLK_STS);
+ cpu_fdiv = ((clk_sts >> 8) & 0x1F);
+ cpu_ffrac = (clk_sts & 0x1F);
+ cpu_clk = (500 * cpu_ffrac / cpu_fdiv) * 1000 * 1000;
+ break;
+
+ case 1:
+ fbdiv = ((rt_sysc_r32(0x648) >> 4) & 0x7F) + 1;
+ syscfg = rt_sysc_r32(SYSC_REG_SYSCFG);
+ xtal_mode = (syscfg >> 6) & 0x7;
+ if (xtal_mode >= 6) {
+ /* 25Mhz Xtal */
+ cpu_clk = 25 * fbdiv * 1000 * 1000;
+ } else if (xtal_mode >= 3) {
+ /* 40Mhz Xtal */
+ cpu_clk = 40 * fbdiv * 1000 * 1000;
+ } else {
+ /* 20Mhz Xtal */
+ cpu_clk = 20 * fbdiv * 1000 * 1000;
+ }
+ break;
+ }
+}
+
+void __init ralink_of_remap(void)
+{
+ rt_sysc_membase = plat_of_remap_node("mtk,mt7621-sysc");
+ rt_memc_membase = plat_of_remap_node("mtk,mt7621-memc");
+
+ if (!rt_sysc_membase || !rt_memc_membase)
+ panic("Failed to remap core resources");
+}
+
+void prom_soc_init(struct ralink_soc_info *soc_info)
+{
+ void __iomem *sysc = (void __iomem *) KSEG1ADDR(MT7621_SYSC_BASE);
+ unsigned char *name = NULL;
+ u32 n0;
+ u32 n1;
+ u32 rev;
+
+ n0 = __raw_readl(sysc + SYSC_REG_CHIP_NAME0);
+ n1 = __raw_readl(sysc + SYSC_REG_CHIP_NAME1);
+
+ if (n0 == MT7621_CHIP_NAME0 && n1 == MT7621_CHIP_NAME1) {
+ name = "MT7621";
+ soc_info->compatible = "mtk,mt7621-soc";
+ } else {
+ panic("mt7621: unknown SoC, n0:%08x n1:%08x\n", n0, n1);
+ }
+
+ rev = __raw_readl(sysc + SYSC_REG_CHIP_REV);
+
+ snprintf(soc_info->sys_type, RAMIPS_SYS_TYPE_LEN,
+ "MediaTek %s ver:%u eco:%u",
+ name,
+ (rev >> CHIP_REV_VER_SHIFT) & CHIP_REV_VER_MASK,
+ (rev & CHIP_REV_ECO_MASK));
+
+ soc_info->mem_size_min = MT7621_DDR2_SIZE_MIN;
+ soc_info->mem_size_max = MT7621_DDR2_SIZE_MAX;
+ soc_info->mem_base = MT7621_DRAM_BASE;
+
+ rt2880_pinmux_data = mt7621_pinmux_data;
+
+ /* Early detection of CMP support */
+ mips_cm_probe();
+ mips_cpc_probe();
+
+ if (mips_cm_numiocu()) {
+ /*
+ * mips_cm_probe() wipes out bootloader
+ * config for CM regions and we have to configure them
+ * again. This SoC cannot talk to pamlbus devices
+ * witout proper iocu region set up.
+ *
+ * FIXME: it would be better to do this with values
+ * from DT, but we need this very early because
+ * without this we cannot talk to pretty much anything
+ * including serial.
+ */
+ write_gcr_reg0_base(MT7621_PALMBUS_BASE);
+ write_gcr_reg0_mask(~MT7621_PALMBUS_SIZE |
+ CM_GCR_REGn_MASK_CMTGT_IOCU0);
+ }
+
+ if (!register_cps_smp_ops())
+ return;
+ if (!register_cmp_smp_ops())
+ return;
+ if (!register_vsmp_smp_ops())
+ return;
+}
diff --git a/arch/mips/ralink/rt288x.c b/arch/mips/ralink/rt288x.c
index 844f5cd55c8f..3c84166ebcb7 100644
--- a/arch/mips/ralink/rt288x.c
+++ b/arch/mips/ralink/rt288x.c
@@ -119,5 +119,5 @@ void prom_soc_init(struct ralink_soc_info *soc_info)
soc_info->mem_size_max = RT2880_MEM_SIZE_MAX;
rt2880_pinmux_data = rt2880_pinmux_data_act;
- ralink_soc == RT2880_SOC;
+ ralink_soc = RT2880_SOC;
}
diff --git a/arch/mips/ralink/rt305x.c b/arch/mips/ralink/rt305x.c
index 9e4572592065..d7c4ba43a428 100644
--- a/arch/mips/ralink/rt305x.c
+++ b/arch/mips/ralink/rt305x.c
@@ -201,6 +201,7 @@ void __init ralink_clk_init(void)
ralink_clk_add("cpu", cpu_rate);
ralink_clk_add("sys", sys_rate);
ralink_clk_add("10000b00.spi", sys_rate);
+ ralink_clk_add("10000b40.spi", sys_rate);
ralink_clk_add("10000100.timer", wdt_rate);
ralink_clk_add("10000120.watchdog", wdt_rate);
ralink_clk_add("10000500.uart", uart_rate);
diff --git a/arch/mips/ralink/rt3883.c b/arch/mips/ralink/rt3883.c
index 582995aaaf4e..fafec947b27d 100644
--- a/arch/mips/ralink/rt3883.c
+++ b/arch/mips/ralink/rt3883.c
@@ -109,6 +109,7 @@ void __init ralink_clk_init(void)
ralink_clk_add("10000120.watchdog", sys_rate);
ralink_clk_add("10000500.uart", 40000000);
ralink_clk_add("10000b00.spi", sys_rate);
+ ralink_clk_add("10000b40.spi", sys_rate);
ralink_clk_add("10000c00.uartlite", 40000000);
ralink_clk_add("10100000.ethernet", sys_rate);
ralink_clk_add("10180000.wmac", 40000000);
diff --git a/arch/mips/ralink/timer-gic.c b/arch/mips/ralink/timer-gic.c
new file mode 100644
index 000000000000..5b4f186bcf95
--- /dev/null
+++ b/arch/mips/ralink/timer-gic.c
@@ -0,0 +1,24 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Nikolay Martynov <mar.kolya@gmail.com>
+ * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/init.h>
+
+#include <linux/of.h>
+#include <linux/clk-provider.h>
+#include <linux/clocksource.h>
+
+#include "common.h"
+
+void __init plat_time_init(void)
+{
+ ralink_of_remap();
+
+ of_clk_init(NULL);
+ clocksource_probe();
+}
diff --git a/arch/mips/rb532/gpio.c b/arch/mips/rb532/gpio.c
index 650d5d39f34d..fd1108543a71 100644
--- a/arch/mips/rb532/gpio.c
+++ b/arch/mips/rb532/gpio.c
@@ -89,7 +89,7 @@ static int rb532_gpio_get(struct gpio_chip *chip, unsigned offset)
struct rb532_gpio_chip *gpch;
gpch = container_of(chip, struct rb532_gpio_chip, chip);
- return rb532_get_bit(offset, gpch->regbase + GPIOD);
+ return !!rb532_get_bit(offset, gpch->regbase + GPIOD);
}
/*
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index 9d9962ab7d25..2fd350f31f4b 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -689,7 +689,7 @@ static int txx9_iocled_get(struct gpio_chip *chip, unsigned int offset)
{
struct txx9_iocled_data *data =
container_of(chip, struct txx9_iocled_data, chip);
- return data->cur_val & (1 << offset);
+ return !!(data->cur_val & (1 << offset));
}
static void txx9_iocled_set(struct gpio_chip *chip, unsigned int offset,
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 271fefbbe521..9d08d8cbed1a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -38,8 +38,7 @@
#define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
+#define KVM_USER_MEM_SLOTS 512
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 774a253ca4e1..9bf7031a67ff 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -377,15 +377,12 @@ no_seg_found:
static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s;
u64 esid, esid_1t;
int slb_nr;
struct kvmppc_slb *slbe;
dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
- vcpu_book3s = to_book3s(vcpu);
-
esid = GET_ESID(rb);
esid_1t = GET_ESID_1T(rb);
slb_nr = rb & 0xfff;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cff207b72c46..baeddb06811d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -833,6 +833,24 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->stat.sum_exits++;
+ /*
+ * This can happen if an interrupt occurs in the last stages
+ * of guest entry or the first stages of guest exit (i.e. after
+ * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+ * and before setting it to KVM_GUEST_MODE_HOST_HV).
+ * That can happen due to a bug, or due to a machine check
+ * occurring at just the wrong time.
+ */
+ if (vcpu->arch.shregs.msr & MSR_HV) {
+ printk(KERN_EMERG "KVM trap in HV mode!\n");
+ printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+ vcpu->arch.trap, kvmppc_get_pc(vcpu),
+ vcpu->arch.shregs.msr);
+ kvmppc_dump_regs(vcpu);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
+ return RESUME_HOST;
+ }
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
switch (vcpu->arch.trap) {
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3c6badcd53ef..6ee26de9a1de 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
- rlwimi r5, r4, 1, DAWRX_WT
+ rlwimi r5, r4, 2, DAWRX_WT
clrrdi r4, r4, 3
std r4, VCPU_DAWR(r3)
std r5, VCPU_DAWRX(r3)
@@ -2404,6 +2404,8 @@ machine_check_realmode:
* guest as machine check causing guest to crash.
*/
ld r11, VCPU_MSR(r9)
+ rldicl. r0, r11, 64-MSR_HV_LG, 63 /* check if it happened in HV mode */
+ bne mc_cont /* if so, exit to host */
andi. r10, r11, MSR_RI /* check for unrecoverable exception */
beq 1f /* Deliver a machine check to guest */
ld r10, VCPU_PC(r9)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6fd2405c7f4a..a3b182dcb823 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -919,21 +919,17 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
r = -ENXIO;
break;
}
- vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+ val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
break;
case KVM_REG_PPC_VSCR:
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
r = -ENXIO;
break;
}
- vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
+ val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
break;
case KVM_REG_PPC_VRSAVE:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ val = get_reg_val(reg->id, vcpu->arch.vrsave);
break;
#endif /* CONFIG_ALTIVEC */
default:
@@ -974,17 +970,21 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
r = -ENXIO;
break;
}
- val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
+ vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
break;
case KVM_REG_PPC_VSCR:
if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
r = -ENXIO;
break;
}
- val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
+ vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
break;
case KVM_REG_PPC_VRSAVE:
- val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ vcpu->arch.vrsave = set_reg_val(reg->id, val);
break;
#endif /* CONFIG_ALTIVEC */
default:
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 5038fd578e65..2936a0044c04 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -1799,9 +1799,9 @@ static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int data
struct inode *inode = file_inode(file);
int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (!err) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = spufs_mfc_flush(file, NULL);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return err;
}
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index ad4840f86be1..dfa863876778 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -163,7 +163,7 @@ static void spufs_prune_dir(struct dentry *dir)
{
struct dentry *dentry, *tmp;
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
spin_lock(&dentry->d_lock);
if (simple_positive(dentry)) {
@@ -180,7 +180,7 @@ static void spufs_prune_dir(struct dentry *dir)
}
}
shrink_dcache_parent(dir);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
}
/* Caller must hold parent->i_mutex */
@@ -225,9 +225,9 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
parent = d_inode(dir->d_parent);
ctx = SPUFS_I(d_inode(dir))->i_ctx;
- mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(parent, I_MUTEX_PARENT);
ret = spufs_rmdir(parent, dir);
- mutex_unlock(&parent->i_mutex);
+ inode_unlock(parent);
WARN_ON(ret);
return dcache_dir_close(inode, file);
@@ -270,7 +270,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
dget(dentry);
inc_nlink(dir);
@@ -291,7 +291,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
if (ret)
spufs_rmdir(dir, dentry);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index b2e5902bd8f4..0f3da2cb2bd6 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -67,7 +67,7 @@ static void hypfs_remove(struct dentry *dentry)
struct dentry *parent;
parent = dentry->d_parent;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
if (simple_positive(dentry)) {
if (d_is_dir(dentry))
simple_rmdir(d_inode(parent), dentry);
@@ -76,7 +76,7 @@ static void hypfs_remove(struct dentry *dentry)
}
d_delete(dentry);
dput(dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
}
static void hypfs_delete_tree(struct dentry *root)
@@ -331,7 +331,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
struct dentry *dentry;
struct inode *inode;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
dentry = lookup_one_len(name, parent, strlen(name));
if (IS_ERR(dentry)) {
dentry = ERR_PTR(-ENOMEM);
@@ -359,7 +359,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
d_instantiate(dentry, inode);
dget(dentry);
fail:
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return dentry;
}
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 6742414dbd6f..8959ebb6d2c9 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -546,7 +546,6 @@ struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
unsigned int host_acrs[NUM_ACRS];
struct fpu host_fpregs;
- struct fpu guest_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 5fce52cf0e57..5ea5af3c7db7 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -29,6 +29,7 @@ config KVM
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select SRCU
+ select KVM_VFIO
---help---
Support hosting paravirtualized guest machines using the SIE
virtualization capability on the mainframe. This should work
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index b3b553469650..d42fa38c2429 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -7,7 +7,7 @@
# as published by the Free Software Foundation.
KVM := ../../../virt/kvm
-common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o
+common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqchip.o $(KVM)/vfio.o
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 835d60bedb54..4af21c771f9b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1423,44 +1423,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
return 0;
}
-/*
- * Backs up the current FP/VX register save area on a particular
- * destination. Used to switch between different register save
- * areas.
- */
-static inline void save_fpu_to(struct fpu *dst)
-{
- dst->fpc = current->thread.fpu.fpc;
- dst->regs = current->thread.fpu.regs;
-}
-
-/*
- * Switches the FP/VX register save area from which to lazy
- * restore register contents.
- */
-static inline void load_fpu_from(struct fpu *from)
-{
- current->thread.fpu.fpc = from->fpc;
- current->thread.fpu.regs = from->regs;
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Save host register state */
save_fpu_regs();
- save_fpu_to(&vcpu->arch.host_fpregs);
-
- if (test_kvm_facility(vcpu->kvm, 129)) {
- current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
- /*
- * Use the register save area in the SIE-control block
- * for register restore and save in kvm_arch_vcpu_put()
- */
- current->thread.fpu.vxrs =
- (__vector128 *)&vcpu->run->s.regs.vrs;
- } else
- load_fpu_from(&vcpu->arch.guest_fpregs);
+ vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
+ vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
+ /* Depending on MACHINE_HAS_VX, data stored to vrs either
+ * has vector register or floating point register format.
+ */
+ current->thread.fpu.regs = vcpu->run->s.regs.vrs;
+ current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
current->thread.fpu.fpc = 0;
@@ -1476,19 +1450,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
+ /* Save guest register state */
save_fpu_regs();
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
- if (test_kvm_facility(vcpu->kvm, 129))
- /*
- * kvm_arch_vcpu_load() set up the register save area to
- * the &vcpu->run->s.regs.vrs and, thus, the vector registers
- * are already saved. Only the floating-point control must be
- * copied.
- */
- vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
- else
- save_fpu_to(&vcpu->arch.guest_fpregs);
- load_fpu_from(&vcpu->arch.host_fpregs);
+ /* Restore host register state */
+ current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
+ current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
@@ -1506,8 +1474,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
vcpu->arch.sie_block->gcr[0] = 0xE0UL;
vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
- vcpu->arch.guest_fpregs.fpc = 0;
- asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+ /* make sure the new fpc will be lazily loaded */
+ save_fpu_regs();
+ current->thread.fpu.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1648,17 +1617,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
- /*
- * Allocate a save area for floating-point registers. If the vector
- * extension is available, register contents are saved in the SIE
- * control block. The allocated save area is still required in
- * particular places, for example, in kvm_s390_vcpu_store_status().
- */
- vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
- GFP_KERNEL);
- if (!vcpu->arch.guest_fpregs.fprs)
- goto out_free_sie_block;
-
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
goto out_free_sie_block;
@@ -1879,19 +1837,27 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
+ /* make sure the new values will be lazily loaded */
+ save_fpu_regs();
if (test_fp_ctl(fpu->fpc))
return -EINVAL;
- memcpy(vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
- vcpu->arch.guest_fpregs.fpc = fpu->fpc;
- save_fpu_regs();
- load_fpu_from(&vcpu->arch.guest_fpregs);
+ current->thread.fpu.fpc = fpu->fpc;
+ if (MACHINE_HAS_VX)
+ convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+ else
+ memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
return 0;
}
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- memcpy(&fpu->fprs, vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
- fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+ /* make sure we have the latest values */
+ save_fpu_regs();
+ if (MACHINE_HAS_VX)
+ convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+ else
+ memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+ fpu->fpc = current->thread.fpu.fpc;
return 0;
}
@@ -2396,6 +2362,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
{
unsigned char archmode = 1;
+ freg_t fprs[NUM_FPRS];
unsigned int px;
u64 clkcomp;
int rc;
@@ -2411,8 +2378,16 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
gpa = px;
} else
gpa -= __LC_FPREGS_SAVE_AREA;
- rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
- vcpu->arch.guest_fpregs.fprs, 128);
+
+ /* manually convert vector registers if necessary */
+ if (MACHINE_HAS_VX) {
+ convert_vx_to_fp(fprs, current->thread.fpu.vxrs);
+ rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+ fprs, 128);
+ } else {
+ rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
+ vcpu->run->s.regs.vrs, 128);
+ }
rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
vcpu->run->s.regs.gprs, 128);
rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
@@ -2420,7 +2395,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
&px, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
- &vcpu->arch.guest_fpregs.fpc, 4);
+ &vcpu->run->s.regs.fpc, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
&vcpu->arch.sie_block->todpr, 4);
rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
@@ -2443,19 +2418,7 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
* it into the save area
*/
save_fpu_regs();
- if (test_kvm_facility(vcpu->kvm, 129)) {
- /*
- * If the vector extension is available, the vector registers
- * which overlaps with floating-point registers are saved in
- * the SIE-control block. Hence, extract the floating-point
- * registers and the FPC value and store them in the
- * guest_fpregs structure.
- */
- vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
- convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
- current->thread.fpu.vxrs);
- } else
- save_fpu_to(&vcpu->arch.guest_fpregs);
+ vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
save_access_regs(vcpu->run->s.regs.acrs);
return kvm_s390_store_status_unloaded(vcpu, addr);
diff --git a/arch/sh/include/asm/barrier.h b/arch/sh/include/asm/barrier.h
index f887c6465a82..8a84e05adb2e 100644
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -33,7 +33,6 @@
#endif
#define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
-#define smp_store_mb(var, value) __smp_store_mb(var, value)
#include <asm-generic/barrier.h>
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 1544fabcd7f9..c57fd1ea9689 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -67,18 +67,19 @@ static inline void arch_wmb_pmem(void)
}
/**
- * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
* instruction. This function requires explicit ordering with an
- * arch_wmb_pmem() call. This API is internal to the x86 PMEM implementation.
+ * arch_wmb_pmem() call.
*/
-static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
{
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1;
+ void *vaddr = (void __force *)addr;
void *vend = vaddr + size;
void *p;
@@ -115,7 +116,7 @@ static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
len = copy_from_iter_nocache(vaddr, bytes, i);
if (__iter_needs_pmem_wb(i))
- __arch_wb_cache_pmem(vaddr, bytes);
+ arch_wb_cache_pmem(addr, bytes);
return len;
}
@@ -133,7 +134,7 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
void *vaddr = (void __force *)addr;
memset(vaddr, 0, size);
- __arch_wb_cache_pmem(vaddr, size);
+ arch_wb_cache_pmem(addr, size);
}
static inline bool __arch_has_wmb_pmem(void)
diff --git a/block/Makefile b/block/Makefile
index db5f622c9d67..9eda2322b2d4 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
- blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
+ blk-lib.o blk-mq.o blk-mq-tag.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
diff --git a/block/ioctl.c b/block/ioctl.c
index 2c84683aada5..77f5d17779d6 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -455,12 +455,12 @@ static int blkdev_daxset(struct block_device *bdev, unsigned long argp)
if (arg && !blkdev_dax_capable(bdev))
return -ENOTTY;
- mutex_lock(&bdev->bd_inode->i_mutex);
+ inode_lock(bdev->bd_inode);
if (bdev->bd_map_count == 0)
inode_set_flags(bdev->bd_inode, arg, S_DAX);
else
rc = -EBUSY;
- mutex_unlock(&bdev->bd_inode->i_mutex);
+ inode_unlock(bdev->bd_inode);
return rc;
}
#else
diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 6682c5daf742..6e6bc1059301 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -32,6 +32,7 @@
#include <linux/hardirq.h>
#include <linux/pstore.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h> /* kvfree() */
#include <acpi/apei.h>
#include "apei-internal.h"
@@ -532,10 +533,7 @@ retry:
return -ENOMEM;
memcpy(new_entries, entries,
erst_record_id_cache.len * sizeof(entries[0]));
- if (erst_record_id_cache.size < PAGE_SIZE)
- kfree(entries);
- else
- vfree(entries);
+ kvfree(entries);
erst_record_id_cache.entries = entries = new_entries;
erst_record_id_cache.size = new_size;
}
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 68f03141e432..44a74cf1372c 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -215,9 +215,9 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
newattrs.ia_uid = uid;
newattrs.ia_gid = gid;
newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
notify_change(dentry, &newattrs, NULL);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
/* mark as kernel-created inode */
d_inode(dentry)->i_private = &thread;
@@ -244,7 +244,7 @@ static int dev_rmdir(const char *name)
err = -ENOENT;
}
dput(dentry);
- mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+ inode_unlock(d_inode(parent.dentry));
path_put(&parent);
return err;
}
@@ -321,9 +321,9 @@ static int handle_remove(const char *nodename, struct device *dev)
newattrs.ia_mode = stat.mode & ~0777;
newattrs.ia_valid =
ATTR_UID|ATTR_GID|ATTR_MODE;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
notify_change(dentry, &newattrs, NULL);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
err = vfs_unlink(d_inode(parent.dentry), dentry, NULL);
if (!err || err == -ENOENT)
deleted = 1;
@@ -332,7 +332,7 @@ static int handle_remove(const char *nodename, struct device *dev)
err = -ENOENT;
}
dput(dentry);
- mutex_unlock(&d_inode(parent.dentry)->i_mutex);
+ inode_unlock(d_inode(parent.dentry));
path_put(&parent);
if (deleted && strchr(nodename, '/'))
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index ad80c85e0857..d048d2009e89 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -964,9 +964,9 @@ aoecmd_sleepwork(struct work_struct *work)
ssize = get_capacity(d->gd);
bd = bdget_disk(d->gd, 0);
if (bd) {
- mutex_lock(&bd->bd_inode->i_mutex);
+ inode_lock(bd->bd_inode);
i_size_write(bd->bd_inode, (loff_t)ssize<<9);
- mutex_unlock(&bd->bd_inode->i_mutex);
+ inode_unlock(bd->bd_inode);
bdput(bd);
}
spin_lock_irq(&d->lock);
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 0dabc9b93725..92d6fc020a65 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -364,12 +364,9 @@ static void bm_free_pages(struct page **pages, unsigned long number)
}
}
-static void bm_vk_free(void *ptr, int v)
+static inline void bm_vk_free(void *ptr)
{
- if (v)
- vfree(ptr);
- else
- kfree(ptr);
+ kvfree(ptr);
}
/*
@@ -379,7 +376,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
{
struct page **old_pages = b->bm_pages;
struct page **new_pages, *page;
- unsigned int i, bytes, vmalloced = 0;
+ unsigned int i, bytes;
unsigned long have = b->bm_number_of_pages;
BUG_ON(have == 0 && old_pages != NULL);
@@ -401,7 +398,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
PAGE_KERNEL);
if (!new_pages)
return NULL;
- vmalloced = 1;
}
if (want >= have) {
@@ -411,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
if (!page) {
bm_free_pages(new_pages + have, i - have);
- bm_vk_free(new_pages, vmalloced);
+ bm_vk_free(new_pages);
return NULL;
}
/* we want to know which page it is
@@ -427,11 +423,6 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
*/
}
- if (vmalloced)
- b->bm_flags |= BM_P_VMALLOCED;
- else
- b->bm_flags &= ~BM_P_VMALLOCED;
-
return new_pages;
}
@@ -469,7 +460,7 @@ void drbd_bm_cleanup(struct drbd_device *device)
if (!expect(device->bitmap))
return;
bm_free_pages(device->bitmap->bm_pages, device->bitmap->bm_number_of_pages);
- bm_vk_free(device->bitmap->bm_pages, (BM_P_VMALLOCED & device->bitmap->bm_flags));
+ bm_vk_free(device->bitmap->bm_pages);
kfree(device->bitmap);
device->bitmap = NULL;
}
@@ -643,7 +634,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
unsigned long want, have, onpages; /* number of pages */
struct page **npages, **opages = NULL;
int err = 0, growing;
- int opages_vmalloced;
if (!expect(b))
return -ENOMEM;
@@ -656,8 +646,6 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
if (capacity == b->bm_dev_capacity)
goto out;
- opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
-
if (capacity == 0) {
spin_lock_irq(&b->bm_lock);
opages = b->bm_pages;
@@ -671,7 +659,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
b->bm_dev_capacity = 0;
spin_unlock_irq(&b->bm_lock);
bm_free_pages(opages, onpages);
- bm_vk_free(opages, opages_vmalloced);
+ bm_vk_free(opages);
goto out;
}
bits = BM_SECT_TO_BIT(ALIGN(capacity, BM_SECT_PER_BIT));
@@ -744,7 +732,7 @@ int drbd_bm_resize(struct drbd_device *device, sector_t capacity, int set_new_bi
spin_unlock_irq(&b->bm_lock);
if (opages != npages)
- bm_vk_free(opages, opages_vmalloced);
+ bm_vk_free(opages);
if (!growing)
b->bm_set = bm_count_bits(b);
drbd_info(device, "resync bitmap: bits=%lu words=%lu pages=%lu\n", bits, words, want);
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index 96a0107a72ea..4de95bbff486 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -434,12 +434,12 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo
if (!parent || d_really_is_negative(parent))
goto out;
/* serialize with d_delete() */
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
/* Make sure the object is still alive */
if (simple_positive(file->f_path.dentry)
&& kref_get_unless_zero(kref))
ret = 0;
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
if (!ret) {
ret = single_open(file, show, data);
if (ret)
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index b6844feb9f9b..34bc84efc29e 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -536,9 +536,6 @@ struct drbd_bitmap; /* opaque for drbd_device */
/* definition of bits in bm_flags to be used in drbd_bm_lock
* and drbd_bitmap_io and friends. */
enum bm_flag {
- /* do we need to kfree, or vfree bm_pages? */
- BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
-
/* currently locked for bulk operation */
BM_LOCKED_MASK = 0xf,
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 81ea69fee7ca..4a876785b68c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -5185,8 +5185,7 @@ static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
out_err:
rbd_dev_unparent(rbd_dev);
- if (parent)
- rbd_dev_destroy(parent);
+ rbd_dev_destroy(parent);
return ret;
}
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 6b1721f978c2..4f6f94c43412 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -689,7 +689,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
{
loff_t ret;
- mutex_lock(&file_inode(file)->i_mutex);
+ inode_lock(file_inode(file));
switch (orig) {
case SEEK_CUR:
offset += file->f_pos;
@@ -706,7 +706,7 @@ static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
default:
ret = -EINVAL;
}
- mutex_unlock(&file_inode(file)->i_mutex);
+ inode_unlock(file_inode(file));
return ret;
}
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index f1d7fa45c275..f3f92d5fcda0 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -93,14 +93,11 @@ struct vma_data {
spinlock_t lock; /* Serialize access to this structure. */
int count; /* Number of pages allocated. */
enum mspec_page_type type; /* Type of pages allocated. */
- int flags; /* See VMD_xxx below. */
unsigned long vm_start; /* Original (unsplit) base. */
unsigned long vm_end; /* Original (unsplit) end. */
unsigned long maddr[0]; /* Array of MSPEC addresses. */
};
-#define VMD_VMALLOCED 0x1 /* vmalloc'd rather than kmalloc'd */
-
/* used on shub2 to clear FOP cache in the HUB */
static unsigned long scratch_page[MAX_NUMNODES];
#define SH2_AMO_CACHE_ENTRIES 4
@@ -185,10 +182,7 @@ mspec_close(struct vm_area_struct *vma)
"failed to zero page %ld\n", my_page);
}
- if (vdata->flags & VMD_VMALLOCED)
- vfree(vdata);
- else
- kfree(vdata);
+ kvfree(vdata);
}
/*
@@ -256,7 +250,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
enum mspec_page_type type)
{
struct vma_data *vdata;
- int pages, vdata_size, flags = 0;
+ int pages, vdata_size;
if (vma->vm_pgoff != 0)
return -EINVAL;
@@ -271,16 +265,13 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
vdata_size = sizeof(struct vma_data) + pages * sizeof(long);
if (vdata_size <= PAGE_SIZE)
vdata = kzalloc(vdata_size, GFP_KERNEL);
- else {
+ else
vdata = vzalloc(vdata_size);
- flags = VMD_VMALLOCED;
- }
if (!vdata)
return -ENOMEM;
vdata->vm_start = vma->vm_start;
vdata->vm_end = vma->vm_end;
- vdata->flags = flags;
vdata->type = type;
spin_lock_init(&vdata->lock);
atomic_set(&vdata->refcnt, 1);
diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c
index 0b311fa277ef..b526dc15c271 100644
--- a/drivers/char/ps3flash.c
+++ b/drivers/char/ps3flash.c
@@ -290,9 +290,9 @@ static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datas
{
struct inode *inode = file_inode(file);
int err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = ps3flash_writeback(ps3flash_dev);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 313b0cc8d676..82edf95b7740 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -2278,60 +2278,60 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
#define amdgpu_dpm_get_temperature(adev) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle) : \
- (adev)->pm.funcs->get_temperature((adev))
+ (adev)->pm.funcs->get_temperature((adev)))
#define amdgpu_dpm_set_fan_control_mode(adev, m) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)) : \
- (adev)->pm.funcs->set_fan_control_mode((adev), (m))
+ (adev)->pm.funcs->set_fan_control_mode((adev), (m)))
#define amdgpu_dpm_get_fan_control_mode(adev) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_fan_control_mode((adev)->powerplay.pp_handle) : \
- (adev)->pm.funcs->get_fan_control_mode((adev))
+ (adev)->pm.funcs->get_fan_control_mode((adev)))
#define amdgpu_dpm_set_fan_speed_percent(adev, s) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->set_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
- (adev)->pm.funcs->set_fan_speed_percent((adev), (s))
+ (adev)->pm.funcs->set_fan_speed_percent((adev), (s)))
#define amdgpu_dpm_get_fan_speed_percent(adev, s) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_fan_speed_percent((adev)->powerplay.pp_handle, (s)) : \
- (adev)->pm.funcs->get_fan_speed_percent((adev), (s))
+ (adev)->pm.funcs->get_fan_speed_percent((adev), (s)))
#define amdgpu_dpm_get_sclk(adev, l) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l)) : \
- (adev)->pm.funcs->get_sclk((adev), (l))
+ (adev)->pm.funcs->get_sclk((adev), (l)))
#define amdgpu_dpm_get_mclk(adev, l) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (l)) : \
- (adev)->pm.funcs->get_mclk((adev), (l))
+ (adev)->pm.funcs->get_mclk((adev), (l)))
#define amdgpu_dpm_force_performance_level(adev, l) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)) : \
- (adev)->pm.funcs->force_performance_level((adev), (l))
+ (adev)->pm.funcs->force_performance_level((adev), (l)))
#define amdgpu_dpm_powergate_uvd(adev, g) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)) : \
- (adev)->pm.funcs->powergate_uvd((adev), (g))
+ (adev)->pm.funcs->powergate_uvd((adev), (g)))
#define amdgpu_dpm_powergate_vce(adev, g) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)) : \
- (adev)->pm.funcs->powergate_vce((adev), (g))
+ (adev)->pm.funcs->powergate_vce((adev), (g)))
#define amdgpu_dpm_debugfs_print_current_performance_level(adev, m) \
- (adev)->pp_enabled ? \
+ ((adev)->pp_enabled ? \
(adev)->powerplay.pp_funcs->print_current_performance_level((adev)->powerplay.pp_handle, (m)) : \
- (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m))
+ (adev)->pm.funcs->debugfs_print_current_performance_level((adev), (m)))
#define amdgpu_dpm_get_current_power_state(adev) \
(adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 6f89f8e034d0..b882e8175615 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -478,9 +478,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
unsigned i;
- amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
-
if (!error) {
+ amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
+
/* Sort the buffer list from the smallest to largest buffer,
* which affects the order of buffers in the LRU list.
* This assures that the smallest buffers are added first
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index cfb6caad2a73..919146780a15 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -333,6 +333,10 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
if (!adev->mode_info.mode_config_initialized)
return 0;
+ /* don't init fbdev if there are no connectors */
+ if (list_empty(&adev->ddev->mode_config.connector_list))
+ return 0;
+
/* select 8 bpp console on low vram cards */
if (adev->mc.real_vram_size <= (32*1024*1024))
bpp_sel = 8;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index c3ce103b6a33..a2a16acee34d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -399,7 +399,8 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
}
if (fpfn > bo->placements[i].fpfn)
bo->placements[i].fpfn = fpfn;
- if (lpfn && lpfn < bo->placements[i].lpfn)
+ if (!bo->placements[i].lpfn ||
+ (lpfn && lpfn < bo->placements[i].lpfn))
bo->placements[i].lpfn = lpfn;
bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 5ee9a0690278..b9d0d55f6b47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -99,13 +99,24 @@ static int amdgpu_pp_early_init(void *handle)
#ifdef CONFIG_DRM_AMD_POWERPLAY
switch (adev->asic_type) {
- case CHIP_TONGA:
- case CHIP_FIJI:
- adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
- break;
- default:
- adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
- break;
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ adev->pp_enabled = (amdgpu_powerplay == 0) ? false : true;
+ break;
+ case CHIP_CARRIZO:
+ case CHIP_STONEY:
+ adev->pp_enabled = (amdgpu_powerplay > 0) ? true : false;
+ break;
+ /* These chips don't have powerplay implemenations */
+ case CHIP_BONAIRE:
+ case CHIP_HAWAII:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ case CHIP_KAVERI:
+ case CHIP_TOPAZ:
+ default:
+ adev->pp_enabled = false;
+ break;
}
#else
adev->pp_enabled = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 78e9b0f14661..d1f234dd2126 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -487,7 +487,7 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
seq_printf(m, "rptr: 0x%08x [%5d]\n",
rptr, rptr);
- rptr_next = ~0;
+ rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
ring->wptr, ring->wptr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index aefc668e6b5d..9599f7559b3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1282,7 +1282,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT * 8);
- unsigned pd_size, pd_entries, pts_size;
+ unsigned pd_size, pd_entries;
int i, r;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
@@ -1300,8 +1300,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
pd_entries = amdgpu_vm_num_pdes(adev);
/* allocate page table array */
- pts_size = pd_entries * sizeof(struct amdgpu_vm_pt);
- vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+ vm->page_tables = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
if (vm->page_tables == NULL) {
DRM_ERROR("Cannot allocate memory for page table array\n");
return -ENOMEM;
@@ -1361,7 +1360,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
- kfree(vm->page_tables);
+ drm_free_large(vm->page_tables);
amdgpu_bo_unref(&vm->page_directory);
fence_put(vm->page_directory_fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 13235d84e5a6..95c0cdfbd1b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4186,7 +4186,18 @@ static int gfx_v8_0_soft_reset(void *handle)
gfx_v8_0_cp_gfx_enable(adev, false);
/* Disable MEC parsing/prefetching */
- /* XXX todo */
+ gfx_v8_0_cp_compute_enable(adev, false);
+
+ if (grbm_soft_reset || srbm_soft_reset) {
+ tmp = RREG32(mmGMCON_DEBUG);
+ tmp = REG_SET_FIELD(tmp,
+ GMCON_DEBUG, GFX_STALL, 1);
+ tmp = REG_SET_FIELD(tmp,
+ GMCON_DEBUG, GFX_CLEAR, 1);
+ WREG32(mmGMCON_DEBUG, tmp);
+
+ udelay(50);
+ }
if (grbm_soft_reset) {
tmp = RREG32(mmGRBM_SOFT_RESET);
@@ -4215,6 +4226,16 @@ static int gfx_v8_0_soft_reset(void *handle)
WREG32(mmSRBM_SOFT_RESET, tmp);
tmp = RREG32(mmSRBM_SOFT_RESET);
}
+
+ if (grbm_soft_reset || srbm_soft_reset) {
+ tmp = RREG32(mmGMCON_DEBUG);
+ tmp = REG_SET_FIELD(tmp,
+ GMCON_DEBUG, GFX_STALL, 0);
+ tmp = REG_SET_FIELD(tmp,
+ GMCON_DEBUG, GFX_CLEAR, 0);
+ WREG32(mmGMCON_DEBUG, tmp);
+ }
+
/* Wait a little for things to settle down */
udelay(50);
gfx_v8_0_print_status((void *)adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
index f4a1346525fe..0497784b3652 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
@@ -122,25 +122,12 @@ static int tonga_dpm_hw_fini(void *handle)
static int tonga_dpm_suspend(void *handle)
{
- return 0;
+ return tonga_dpm_hw_fini(handle);
}
static int tonga_dpm_resume(void *handle)
{
- int ret;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- mutex_lock(&adev->pm.mutex);
-
- ret = tonga_smu_start(adev);
- if (ret) {
- DRM_ERROR("SMU start failed\n");
- goto fail;
- }
-
-fail:
- mutex_unlock(&adev->pm.mutex);
- return ret;
+ return tonga_dpm_hw_init(handle);
}
static int tonga_dpm_set_clockgating_state(void *handle,
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 8f5d5edcf193..aa67244a77ae 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -64,6 +64,11 @@ static int pp_sw_init(void *handle)
if (ret == 0)
ret = hwmgr->hwmgr_func->backend_init(hwmgr);
+ if (ret)
+ printk("amdgpu: powerplay initialization failed\n");
+ else
+ printk("amdgpu: powerplay initialized\n");
+
return ret;
}
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
index 873a8d264d5c..ec222c665602 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/cz_smumgr.c
@@ -272,6 +272,9 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
UCODE_ID_CP_MEC_JT1_MASK |
UCODE_ID_CP_MEC_JT2_MASK;
+ if (smumgr->chip_id == CHIP_STONEY)
+ fw_to_check &= ~(UCODE_ID_SDMA1_MASK | UCODE_ID_CP_MEC_JT2_MASK);
+
cz_request_smu_load_fw(smumgr);
cz_check_fw_load_finish(smumgr, fw_to_check);
@@ -282,7 +285,7 @@ static int cz_start_smu(struct pp_smumgr *smumgr)
return ret;
}
-static uint8_t cz_translate_firmware_enum_to_arg(
+static uint8_t cz_translate_firmware_enum_to_arg(struct pp_smumgr *smumgr,
enum cz_scratch_entry firmware_enum)
{
uint8_t ret = 0;
@@ -292,7 +295,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
ret = UCODE_ID_SDMA0;
break;
case CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1:
- ret = UCODE_ID_SDMA1;
+ if (smumgr->chip_id == CHIP_STONEY)
+ ret = UCODE_ID_SDMA0;
+ else
+ ret = UCODE_ID_SDMA1;
break;
case CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE:
ret = UCODE_ID_CP_CE;
@@ -307,7 +313,10 @@ static uint8_t cz_translate_firmware_enum_to_arg(
ret = UCODE_ID_CP_MEC_JT1;
break;
case CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2:
- ret = UCODE_ID_CP_MEC_JT2;
+ if (smumgr->chip_id == CHIP_STONEY)
+ ret = UCODE_ID_CP_MEC_JT1;
+ else
+ ret = UCODE_ID_CP_MEC_JT2;
break;
case CZ_SCRATCH_ENTRY_UCODE_ID_GMCON_RENG:
ret = UCODE_ID_GMCON_RENG;
@@ -396,7 +405,7 @@ static int cz_smu_populate_single_scratch_task(
struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
task->type = type;
- task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+ task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
for (i = 0; i < cz_smu->scratch_buffer_length; i++)
@@ -433,7 +442,7 @@ static int cz_smu_populate_single_ucode_load_task(
struct SMU_Task *task = &toc->tasks[cz_smu->toc_entry_used_count++];
task->type = TASK_TYPE_UCODE_LOAD;
- task->arg = cz_translate_firmware_enum_to_arg(fw_enum);
+ task->arg = cz_translate_firmware_enum_to_arg(smumgr, fw_enum);
task->next = is_last ? END_OF_TASK_LIST : cz_smu->toc_entry_used_count;
for (i = 0; i < cz_smu->driver_buffer_length; i++)
@@ -509,8 +518,14 @@ static int cz_smu_construct_toc_for_vddgfx_exit(struct pp_smumgr *smumgr)
CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
- cz_smu_populate_single_ucode_load_task(smumgr,
+
+ if (smumgr->chip_id == CHIP_STONEY)
+ cz_smu_populate_single_ucode_load_task(smumgr,
+ CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+ else
+ cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
+
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, false);
@@ -551,7 +566,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
- cz_smu_populate_single_ucode_load_task(smumgr,
+ if (smumgr->chip_id == CHIP_STONEY)
+ cz_smu_populate_single_ucode_load_task(smumgr,
+ CZ_SCRATCH_ENTRY_UCODE_ID_SDMA0, false);
+ else
+ cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_SDMA1, false);
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_CP_CE, false);
@@ -561,7 +580,11 @@ static int cz_smu_construct_toc_for_bootup(struct pp_smumgr *smumgr)
CZ_SCRATCH_ENTRY_UCODE_ID_CP_ME, false);
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
- cz_smu_populate_single_ucode_load_task(smumgr,
+ if (smumgr->chip_id == CHIP_STONEY)
+ cz_smu_populate_single_ucode_load_task(smumgr,
+ CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT1, false);
+ else
+ cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_CP_MEC_JT2, false);
cz_smu_populate_single_ucode_load_task(smumgr,
CZ_SCRATCH_ENTRY_UCODE_ID_RLC_G, true);
@@ -618,7 +641,7 @@ static int cz_smu_populate_firmware_entries(struct pp_smumgr *smumgr)
for (i = 0; i < sizeof(firmware_list)/sizeof(*firmware_list); i++) {
- firmware_type = cz_translate_firmware_enum_to_arg(
+ firmware_type = cz_translate_firmware_enum_to_arg(smumgr,
firmware_list[i]);
ucode_id = cz_convert_fw_type_to_cgs(firmware_type);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 57cccd68ca52..7c523060a076 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -946,9 +946,23 @@ static void wait_for_fences(struct drm_device *dev,
}
}
-static bool framebuffer_changed(struct drm_device *dev,
- struct drm_atomic_state *old_state,
- struct drm_crtc *crtc)
+/**
+ * drm_atomic_helper_framebuffer_changed - check if framebuffer has changed
+ * @dev: DRM device
+ * @old_state: atomic state object with old state structures
+ * @crtc: DRM crtc
+ *
+ * Checks whether the framebuffer used for this CRTC changes as a result of
+ * the atomic update. This is useful for drivers which cannot use
+ * drm_atomic_helper_wait_for_vblanks() and need to reimplement its
+ * functionality.
+ *
+ * Returns:
+ * true if the framebuffer changed.
+ */
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+ struct drm_atomic_state *old_state,
+ struct drm_crtc *crtc)
{
struct drm_plane *plane;
struct drm_plane_state *old_plane_state;
@@ -965,6 +979,7 @@ static bool framebuffer_changed(struct drm_device *dev,
return false;
}
+EXPORT_SYMBOL(drm_atomic_helper_framebuffer_changed);
/**
* drm_atomic_helper_wait_for_vblanks - wait for vblank on crtcs
@@ -999,7 +1014,8 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
if (old_state->legacy_cursor_update)
continue;
- if (!framebuffer_changed(dev, old_state, crtc))
+ if (!drm_atomic_helper_framebuffer_changed(dev,
+ old_state, crtc))
continue;
ret = drm_crtc_vblank_get(crtc);
diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c
index c3b80fd65d62..7b30b307674b 100644
--- a/drivers/gpu/drm/drm_hashtab.c
+++ b/drivers/gpu/drm/drm_hashtab.c
@@ -198,10 +198,7 @@ EXPORT_SYMBOL(drm_ht_remove_item);
void drm_ht_remove(struct drm_open_hash *ht)
{
if (ht->table) {
- if ((PAGE_SIZE / sizeof(*ht->table)) >> ht->order)
- kfree(ht->table);
- else
- vfree(ht->table);
+ kvfree(ht->table);
ht->table = NULL;
}
}
diff --git a/drivers/gpu/drm/etnaviv/common.xml.h b/drivers/gpu/drm/etnaviv/common.xml.h
index 9e585d51fb78..e881482b5971 100644
--- a/drivers/gpu/drm/etnaviv/common.xml.h
+++ b/drivers/gpu/drm/etnaviv/common.xml.h
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- state_vg.xml ( 5973 bytes, from 2015-03-25 11:26:01)
-- common.xml ( 18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml ( 24309 bytes, from 2015-12-12 09:02:53)
+- common.xml ( 18379 bytes, from 2015-12-12 09:02:53)
Copyright (C) 2015
*/
@@ -30,15 +30,19 @@ Copyright (C) 2015
#define ENDIAN_MODE_NO_SWAP 0x00000000
#define ENDIAN_MODE_SWAP_16 0x00000001
#define ENDIAN_MODE_SWAP_32 0x00000002
+#define chipModel_GC200 0x00000200
#define chipModel_GC300 0x00000300
#define chipModel_GC320 0x00000320
+#define chipModel_GC328 0x00000328
#define chipModel_GC350 0x00000350
#define chipModel_GC355 0x00000355
#define chipModel_GC400 0x00000400
#define chipModel_GC410 0x00000410
#define chipModel_GC420 0x00000420
+#define chipModel_GC428 0x00000428
#define chipModel_GC450 0x00000450
#define chipModel_GC500 0x00000500
+#define chipModel_GC520 0x00000520
#define chipModel_GC530 0x00000530
#define chipModel_GC600 0x00000600
#define chipModel_GC700 0x00000700
@@ -46,9 +50,16 @@ Copyright (C) 2015
#define chipModel_GC860 0x00000860
#define chipModel_GC880 0x00000880
#define chipModel_GC1000 0x00001000
+#define chipModel_GC1500 0x00001500
#define chipModel_GC2000 0x00002000
#define chipModel_GC2100 0x00002100
+#define chipModel_GC2200 0x00002200
+#define chipModel_GC2500 0x00002500
+#define chipModel_GC3000 0x00003000
#define chipModel_GC4000 0x00004000
+#define chipModel_GC5000 0x00005000
+#define chipModel_GC5200 0x00005200
+#define chipModel_GC6400 0x00006400
#define RGBA_BITS_R 0x00000001
#define RGBA_BITS_G 0x00000002
#define RGBA_BITS_B 0x00000004
@@ -160,7 +171,7 @@ Copyright (C) 2015
#define chipMinorFeatures2_UNK8 0x00000100
#define chipMinorFeatures2_UNK9 0x00000200
#define chipMinorFeatures2_UNK10 0x00000400
-#define chipMinorFeatures2_SAMPLERBASE_16 0x00000800
+#define chipMinorFeatures2_HALTI1 0x00000800
#define chipMinorFeatures2_UNK12 0x00001000
#define chipMinorFeatures2_UNK13 0x00002000
#define chipMinorFeatures2_UNK14 0x00004000
@@ -189,7 +200,7 @@ Copyright (C) 2015
#define chipMinorFeatures3_UNK5 0x00000020
#define chipMinorFeatures3_UNK6 0x00000040
#define chipMinorFeatures3_UNK7 0x00000080
-#define chipMinorFeatures3_UNK8 0x00000100
+#define chipMinorFeatures3_FAST_MSAA 0x00000100
#define chipMinorFeatures3_UNK9 0x00000200
#define chipMinorFeatures3_BUG_FIXES10 0x00000400
#define chipMinorFeatures3_UNK11 0x00000800
@@ -199,7 +210,7 @@ Copyright (C) 2015
#define chipMinorFeatures3_UNK15 0x00008000
#define chipMinorFeatures3_UNK16 0x00010000
#define chipMinorFeatures3_UNK17 0x00020000
-#define chipMinorFeatures3_UNK18 0x00040000
+#define chipMinorFeatures3_ACE 0x00040000
#define chipMinorFeatures3_UNK19 0x00080000
#define chipMinorFeatures3_UNK20 0x00100000
#define chipMinorFeatures3_UNK21 0x00200000
@@ -207,7 +218,7 @@ Copyright (C) 2015
#define chipMinorFeatures3_UNK23 0x00800000
#define chipMinorFeatures3_UNK24 0x01000000
#define chipMinorFeatures3_UNK25 0x02000000
-#define chipMinorFeatures3_UNK26 0x04000000
+#define chipMinorFeatures3_NEW_HZ 0x04000000
#define chipMinorFeatures3_UNK27 0x08000000
#define chipMinorFeatures3_UNK28 0x10000000
#define chipMinorFeatures3_UNK29 0x20000000
@@ -229,9 +240,9 @@ Copyright (C) 2015
#define chipMinorFeatures4_UNK13 0x00002000
#define chipMinorFeatures4_UNK14 0x00004000
#define chipMinorFeatures4_UNK15 0x00008000
-#define chipMinorFeatures4_UNK16 0x00010000
+#define chipMinorFeatures4_HALTI2 0x00010000
#define chipMinorFeatures4_UNK17 0x00020000
-#define chipMinorFeatures4_UNK18 0x00040000
+#define chipMinorFeatures4_SMALL_MSAA 0x00040000
#define chipMinorFeatures4_UNK19 0x00080000
#define chipMinorFeatures4_UNK20 0x00100000
#define chipMinorFeatures4_UNK21 0x00200000
@@ -245,5 +256,37 @@ Copyright (C) 2015
#define chipMinorFeatures4_UNK29 0x20000000
#define chipMinorFeatures4_UNK30 0x40000000
#define chipMinorFeatures4_UNK31 0x80000000
+#define chipMinorFeatures5_UNK0 0x00000001
+#define chipMinorFeatures5_UNK1 0x00000002
+#define chipMinorFeatures5_UNK2 0x00000004
+#define chipMinorFeatures5_UNK3 0x00000008
+#define chipMinorFeatures5_UNK4 0x00000010
+#define chipMinorFeatures5_UNK5 0x00000020
+#define chipMinorFeatures5_UNK6 0x00000040
+#define chipMinorFeatures5_UNK7 0x00000080
+#define chipMinorFeatures5_UNK8 0x00000100
+#define chipMinorFeatures5_HALTI3 0x00000200
+#define chipMinorFeatures5_UNK10 0x00000400
+#define chipMinorFeatures5_UNK11 0x00000800
+#define chipMinorFeatures5_UNK12 0x00001000
+#define chipMinorFeatures5_UNK13 0x00002000
+#define chipMinorFeatures5_UNK14 0x00004000
+#define chipMinorFeatures5_UNK15 0x00008000
+#define chipMinorFeatures5_UNK16 0x00010000
+#define chipMinorFeatures5_UNK17 0x00020000
+#define chipMinorFeatures5_UNK18 0x00040000
+#define chipMinorFeatures5_UNK19 0x00080000
+#define chipMinorFeatures5_UNK20 0x00100000
+#define chipMinorFeatures5_UNK21 0x00200000
+#define chipMinorFeatures5_UNK22 0x00400000
+#define chipMinorFeatures5_UNK23 0x00800000
+#define chipMinorFeatures5_UNK24 0x01000000
+#define chipMinorFeatures5_UNK25 0x02000000
+#define chipMinorFeatures5_UNK26 0x04000000
+#define chipMinorFeatures5_UNK27 0x08000000
+#define chipMinorFeatures5_UNK28 0x10000000
+#define chipMinorFeatures5_UNK29 0x20000000
+#define chipMinorFeatures5_UNK30 0x40000000
+#define chipMinorFeatures5_UNK31 0x80000000
#endif /* COMMON_XML */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 5c89ebb52fd2..e8858985f01e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -668,7 +668,6 @@ static struct platform_driver etnaviv_platform_driver = {
.probe = etnaviv_pdev_probe,
.remove = etnaviv_pdev_remove,
.driver = {
- .owner = THIS_MODULE,
.name = "etnaviv",
.of_match_table = dt_match,
},
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index d6bd438bd5be..1cd6046e76b1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -85,7 +85,7 @@ struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf_attachment *attach, struct sg_table *sg);
int etnaviv_gem_prime_pin(struct drm_gem_object *obj);
void etnaviv_gem_prime_unpin(struct drm_gem_object *obj);
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj);
+void *etnaviv_gem_vmap(struct drm_gem_object *obj);
int etnaviv_gem_cpu_prep(struct drm_gem_object *obj, u32 op,
struct timespec *timeout);
int etnaviv_gem_cpu_fini(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index bf8fa859e8be..4a29eeadbf1e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -201,7 +201,9 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
obj = vram->object;
+ mutex_lock(&obj->lock);
pages = etnaviv_gem_get_pages(obj);
+ mutex_unlock(&obj->lock);
if (pages) {
int j;
@@ -213,8 +215,8 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
iter.hdr->iova = cpu_to_le64(vram->iova);
- vaddr = etnaviv_gem_vaddr(&obj->base);
- if (vaddr && !IS_ERR(vaddr))
+ vaddr = etnaviv_gem_vmap(&obj->base);
+ if (vaddr)
memcpy(iter.data, vaddr, obj->base.size);
etnaviv_core_dump_header(&iter, ETDUMP_BUF_BO, iter.data +
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 9f77c3b94cc6..4b519e4309b2 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -353,25 +353,39 @@ void etnaviv_gem_put_iova(struct etnaviv_gpu *gpu, struct drm_gem_object *obj)
drm_gem_object_unreference_unlocked(obj);
}
-void *etnaviv_gem_vaddr(struct drm_gem_object *obj)
+void *etnaviv_gem_vmap(struct drm_gem_object *obj)
{
struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
- mutex_lock(&etnaviv_obj->lock);
- if (!etnaviv_obj->vaddr) {
- struct page **pages = etnaviv_gem_get_pages(etnaviv_obj);
-
- if (IS_ERR(pages))
- return ERR_CAST(pages);
+ if (etnaviv_obj->vaddr)
+ return etnaviv_obj->vaddr;
- etnaviv_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
- VM_MAP, pgprot_writecombine(PAGE_KERNEL));
- }
+ mutex_lock(&etnaviv_obj->lock);
+ /*
+ * Need to check again, as we might have raced with another thread
+ * while waiting for the mutex.
+ */
+ if (!etnaviv_obj->vaddr)
+ etnaviv_obj->vaddr = etnaviv_obj->ops->vmap(etnaviv_obj);
mutex_unlock(&etnaviv_obj->lock);
return etnaviv_obj->vaddr;
}
+static void *etnaviv_gem_vmap_impl(struct etnaviv_gem_object *obj)
+{
+ struct page **pages;
+
+ lockdep_assert_held(&obj->lock);
+
+ pages = etnaviv_gem_get_pages(obj);
+ if (IS_ERR(pages))
+ return NULL;
+
+ return vmap(pages, obj->base.size >> PAGE_SHIFT,
+ VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
static inline enum dma_data_direction etnaviv_op_to_dma_dir(u32 op)
{
if (op & ETNA_PREP_READ)
@@ -522,6 +536,7 @@ static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj)
static const struct etnaviv_gem_ops etnaviv_gem_shmem_ops = {
.get_pages = etnaviv_gem_shmem_get_pages,
.release = etnaviv_gem_shmem_release,
+ .vmap = etnaviv_gem_vmap_impl,
};
void etnaviv_gem_free_object(struct drm_gem_object *obj)
@@ -866,6 +881,7 @@ static void etnaviv_gem_userptr_release(struct etnaviv_gem_object *etnaviv_obj)
static const struct etnaviv_gem_ops etnaviv_gem_userptr_ops = {
.get_pages = etnaviv_gem_userptr_get_pages,
.release = etnaviv_gem_userptr_release,
+ .vmap = etnaviv_gem_vmap_impl,
};
int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index a300b4b3d545..ab5df8147a5f 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -78,6 +78,7 @@ struct etnaviv_gem_object *to_etnaviv_bo(struct drm_gem_object *obj)
struct etnaviv_gem_ops {
int (*get_pages)(struct etnaviv_gem_object *);
void (*release)(struct etnaviv_gem_object *);
+ void *(*vmap)(struct etnaviv_gem_object *);
};
static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
index e94db4f95770..4e67395f5fa1 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@@ -31,7 +31,7 @@ struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj)
void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj)
{
- return etnaviv_gem_vaddr(obj);
+ return etnaviv_gem_vmap(obj);
}
void etnaviv_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
@@ -77,9 +77,17 @@ static void etnaviv_gem_prime_release(struct etnaviv_gem_object *etnaviv_obj)
drm_prime_gem_destroy(&etnaviv_obj->base, etnaviv_obj->sgt);
}
+static void *etnaviv_gem_prime_vmap_impl(struct etnaviv_gem_object *etnaviv_obj)
+{
+ lockdep_assert_held(&etnaviv_obj->lock);
+
+ return dma_buf_vmap(etnaviv_obj->base.import_attach->dmabuf);
+}
+
static const struct etnaviv_gem_ops etnaviv_gem_prime_ops = {
/* .get_pages should never be called */
.release = etnaviv_gem_prime_release,
+ .vmap = etnaviv_gem_prime_vmap_impl,
};
struct drm_gem_object *etnaviv_gem_prime_import_sg_table(struct drm_device *dev,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 056a72e6ed26..a33162cf4f4c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -72,6 +72,14 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
*value = gpu->identity.minor_features3;
break;
+ case ETNAVIV_PARAM_GPU_FEATURES_5:
+ *value = gpu->identity.minor_features4;
+ break;
+
+ case ETNAVIV_PARAM_GPU_FEATURES_6:
+ *value = gpu->identity.minor_features5;
+ break;
+
case ETNAVIV_PARAM_GPU_STREAM_COUNT:
*value = gpu->identity.stream_count;
break;
@@ -112,6 +120,10 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
*value = gpu->identity.num_constants;
break;
+ case ETNAVIV_PARAM_GPU_NUM_VARYINGS:
+ *value = gpu->identity.varyings_count;
+ break;
+
default:
DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
return -EINVAL;
@@ -120,46 +132,56 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
return 0;
}
+
+#define etnaviv_is_model_rev(gpu, mod, rev) \
+ ((gpu)->identity.model == chipModel_##mod && \
+ (gpu)->identity.revision == rev)
+#define etnaviv_field(val, field) \
+ (((val) & field##__MASK) >> field##__SHIFT)
+
static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
{
if (gpu->identity.minor_features0 &
chipMinorFeatures0_MORE_MINOR_FEATURES) {
- u32 specs[2];
+ u32 specs[4];
+ unsigned int streams;
specs[0] = gpu_read(gpu, VIVS_HI_CHIP_SPECS);
specs[1] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_2);
-
- gpu->identity.stream_count =
- (specs[0] & VIVS_HI_CHIP_SPECS_STREAM_COUNT__MASK)
- >> VIVS_HI_CHIP_SPECS_STREAM_COUNT__SHIFT;
- gpu->identity.register_max =
- (specs[0] & VIVS_HI_CHIP_SPECS_REGISTER_MAX__MASK)
- >> VIVS_HI_CHIP_SPECS_REGISTER_MAX__SHIFT;
- gpu->identity.thread_count =
- (specs[0] & VIVS_HI_CHIP_SPECS_THREAD_COUNT__MASK)
- >> VIVS_HI_CHIP_SPECS_THREAD_COUNT__SHIFT;
- gpu->identity.vertex_cache_size =
- (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__MASK)
- >> VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE__SHIFT;
- gpu->identity.shader_core_count =
- (specs[0] & VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__MASK)
- >> VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT__SHIFT;
- gpu->identity.pixel_pipes =
- (specs[0] & VIVS_HI_CHIP_SPECS_PIXEL_PIPES__MASK)
- >> VIVS_HI_CHIP_SPECS_PIXEL_PIPES__SHIFT;
+ specs[2] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_3);
+ specs[3] = gpu_read(gpu, VIVS_HI_CHIP_SPECS_4);
+
+ gpu->identity.stream_count = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_STREAM_COUNT);
+ gpu->identity.register_max = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_REGISTER_MAX);
+ gpu->identity.thread_count = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_THREAD_COUNT);
+ gpu->identity.vertex_cache_size = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_VERTEX_CACHE_SIZE);
+ gpu->identity.shader_core_count = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_SHADER_CORE_COUNT);
+ gpu->identity.pixel_pipes = etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_PIXEL_PIPES);
gpu->identity.vertex_output_buffer_size =
- (specs[0] & VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__MASK)
- >> VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE__SHIFT;
-
- gpu->identity.buffer_size =
- (specs[1] & VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__MASK)
- >> VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE__SHIFT;
- gpu->identity.instruction_count =
- (specs[1] & VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__MASK)
- >> VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT__SHIFT;
- gpu->identity.num_constants =
- (specs[1] & VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__MASK)
- >> VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS__SHIFT;
+ etnaviv_field(specs[0],
+ VIVS_HI_CHIP_SPECS_VERTEX_OUTPUT_BUFFER_SIZE);
+
+ gpu->identity.buffer_size = etnaviv_field(specs[1],
+ VIVS_HI_CHIP_SPECS_2_BUFFER_SIZE);
+ gpu->identity.instruction_count = etnaviv_field(specs[1],
+ VIVS_HI_CHIP_SPECS_2_INSTRUCTION_COUNT);
+ gpu->identity.num_constants = etnaviv_field(specs[1],
+ VIVS_HI_CHIP_SPECS_2_NUM_CONSTANTS);
+
+ gpu->identity.varyings_count = etnaviv_field(specs[2],
+ VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT);
+
+ /* This overrides the value from older register if non-zero */
+ streams = etnaviv_field(specs[3],
+ VIVS_HI_CHIP_SPECS_4_STREAM_COUNT);
+ if (streams)
+ gpu->identity.stream_count = streams;
}
/* Fill in the stream count if not specified */
@@ -173,7 +195,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
/* Convert the register max value */
if (gpu->identity.register_max)
gpu->identity.register_max = 1 << gpu->identity.register_max;
- else if (gpu->identity.model == 0x0400)
+ else if (gpu->identity.model == chipModel_GC400)
gpu->identity.register_max = 32;
else
gpu->identity.register_max = 64;
@@ -181,10 +203,10 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
/* Convert thread count */
if (gpu->identity.thread_count)
gpu->identity.thread_count = 1 << gpu->identity.thread_count;
- else if (gpu->identity.model == 0x0400)
+ else if (gpu->identity.model == chipModel_GC400)
gpu->identity.thread_count = 64;
- else if (gpu->identity.model == 0x0500 ||
- gpu->identity.model == 0x0530)
+ else if (gpu->identity.model == chipModel_GC500 ||
+ gpu->identity.model == chipModel_GC530)
gpu->identity.thread_count = 128;
else
gpu->identity.thread_count = 256;
@@ -206,7 +228,7 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
if (gpu->identity.vertex_output_buffer_size) {
gpu->identity.vertex_output_buffer_size =
1 << gpu->identity.vertex_output_buffer_size;
- } else if (gpu->identity.model == 0x0400) {
+ } else if (gpu->identity.model == chipModel_GC400) {
if (gpu->identity.revision < 0x4000)
gpu->identity.vertex_output_buffer_size = 512;
else if (gpu->identity.revision < 0x4200)
@@ -219,9 +241,8 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
switch (gpu->identity.instruction_count) {
case 0:
- if ((gpu->identity.model == 0x2000 &&
- gpu->identity.revision == 0x5108) ||
- gpu->identity.model == 0x880)
+ if (etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+ gpu->identity.model == chipModel_GC880)
gpu->identity.instruction_count = 512;
else
gpu->identity.instruction_count = 256;
@@ -242,6 +263,30 @@ static void etnaviv_hw_specs(struct etnaviv_gpu *gpu)
if (gpu->identity.num_constants == 0)
gpu->identity.num_constants = 168;
+
+ if (gpu->identity.varyings_count == 0) {
+ if (gpu->identity.minor_features1 & chipMinorFeatures1_HALTI0)
+ gpu->identity.varyings_count = 12;
+ else
+ gpu->identity.varyings_count = 8;
+ }
+
+ /*
+ * For some cores, two varyings are consumed for position, so the
+ * maximum varying count needs to be reduced by one.
+ */
+ if (etnaviv_is_model_rev(gpu, GC5000, 0x5434) ||
+ etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
+ etnaviv_is_model_rev(gpu, GC4000, 0x5245) ||
+ etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+ etnaviv_is_model_rev(gpu, GC3000, 0x5435) ||
+ etnaviv_is_model_rev(gpu, GC2200, 0x5244) ||
+ etnaviv_is_model_rev(gpu, GC2100, 0x5108) ||
+ etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+ etnaviv_is_model_rev(gpu, GC1500, 0x5246) ||
+ etnaviv_is_model_rev(gpu, GC880, 0x5107) ||
+ etnaviv_is_model_rev(gpu, GC880, 0x5106))
+ gpu->identity.varyings_count -= 1;
}
static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
@@ -251,12 +296,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
chipIdentity = gpu_read(gpu, VIVS_HI_CHIP_IDENTITY);
/* Special case for older graphic cores. */
- if (((chipIdentity & VIVS_HI_CHIP_IDENTITY_FAMILY__MASK)
- >> VIVS_HI_CHIP_IDENTITY_FAMILY__SHIFT) == 0x01) {
- gpu->identity.model = 0x500; /* gc500 */
- gpu->identity.revision =
- (chipIdentity & VIVS_HI_CHIP_IDENTITY_REVISION__MASK)
- >> VIVS_HI_CHIP_IDENTITY_REVISION__SHIFT;
+ if (etnaviv_field(chipIdentity, VIVS_HI_CHIP_IDENTITY_FAMILY) == 0x01) {
+ gpu->identity.model = chipModel_GC500;
+ gpu->identity.revision = etnaviv_field(chipIdentity,
+ VIVS_HI_CHIP_IDENTITY_REVISION);
} else {
gpu->identity.model = gpu_read(gpu, VIVS_HI_CHIP_MODEL);
@@ -269,13 +312,12 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
* same. Only for GC400 family.
*/
if ((gpu->identity.model & 0xff00) == 0x0400 &&
- gpu->identity.model != 0x0420) {
+ gpu->identity.model != chipModel_GC420) {
gpu->identity.model = gpu->identity.model & 0x0400;
}
/* Another special case */
- if (gpu->identity.model == 0x300 &&
- gpu->identity.revision == 0x2201) {
+ if (etnaviv_is_model_rev(gpu, GC300, 0x2201)) {
u32 chipDate = gpu_read(gpu, VIVS_HI_CHIP_DATE);
u32 chipTime = gpu_read(gpu, VIVS_HI_CHIP_TIME);
@@ -295,11 +337,13 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
gpu->identity.features = gpu_read(gpu, VIVS_HI_CHIP_FEATURE);
/* Disable fast clear on GC700. */
- if (gpu->identity.model == 0x700)
+ if (gpu->identity.model == chipModel_GC700)
gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
- if ((gpu->identity.model == 0x500 && gpu->identity.revision < 2) ||
- (gpu->identity.model == 0x300 && gpu->identity.revision < 0x2000)) {
+ if ((gpu->identity.model == chipModel_GC500 &&
+ gpu->identity.revision < 2) ||
+ (gpu->identity.model == chipModel_GC300 &&
+ gpu->identity.revision < 0x2000)) {
/*
* GC500 rev 1.x and GC300 rev < 2.0 doesn't have these
@@ -309,6 +353,8 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
gpu->identity.minor_features1 = 0;
gpu->identity.minor_features2 = 0;
gpu->identity.minor_features3 = 0;
+ gpu->identity.minor_features4 = 0;
+ gpu->identity.minor_features5 = 0;
} else
gpu->identity.minor_features0 =
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_0);
@@ -321,6 +367,10 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_2);
gpu->identity.minor_features3 =
gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_3);
+ gpu->identity.minor_features4 =
+ gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_4);
+ gpu->identity.minor_features5 =
+ gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
}
/* GC600 idle register reports zero bits where modules aren't present */
@@ -441,10 +491,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
{
u16 prefetch;
- if (gpu->identity.model == chipModel_GC320 &&
- gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400 &&
- (gpu->identity.revision == 0x5007 ||
- gpu->identity.revision == 0x5220)) {
+ if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
+ etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
+ gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
u32 mc_memory_debug;
mc_memory_debug = gpu_read(gpu, VIVS_MC_DEBUG_MEMORY) & ~0xff;
@@ -466,7 +515,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
VIVS_HI_AXI_CONFIG_ARCACHE(2));
/* GC2000 rev 5108 needs a special bus config */
- if (gpu->identity.model == 0x2000 && gpu->identity.revision == 0x5108) {
+ if (etnaviv_is_model_rev(gpu, GC2000, 0x5108)) {
u32 bus_config = gpu_read(gpu, VIVS_MC_BUS_CONFIG);
bus_config &= ~(VIVS_MC_BUS_CONFIG_FE_BUS_CONFIG__MASK |
VIVS_MC_BUS_CONFIG_TX_BUS_CONFIG__MASK);
@@ -511,8 +560,16 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
if (gpu->identity.model == 0) {
dev_err(gpu->dev, "Unknown GPU model\n");
- pm_runtime_put_autosuspend(gpu->dev);
- return -ENXIO;
+ ret = -ENXIO;
+ goto fail;
+ }
+
+ /* Exclude VG cores with FE2.0 */
+ if (gpu->identity.features & chipFeatures_PIPE_VG &&
+ gpu->identity.features & chipFeatures_FE20) {
+ dev_info(gpu->dev, "Ignoring GPU with VG and FE2.0\n");
+ ret = -ENXIO;
+ goto fail;
}
ret = etnaviv_hw_reset(gpu);
@@ -539,10 +596,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
goto fail;
}
- /* TODO: we will leak here memory - fix it! */
-
gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
if (!gpu->mmu) {
+ iommu_domain_free(iommu);
ret = -ENOMEM;
goto fail;
}
@@ -552,7 +608,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
if (!gpu->buffer) {
ret = -ENOMEM;
dev_err(gpu->dev, "could not create command buffer\n");
- goto fail;
+ goto destroy_iommu;
}
if (gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
ret = -EINVAL;
@@ -582,6 +638,9 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
free_buffer:
etnaviv_gpu_cmdbuf_free(gpu->buffer);
gpu->buffer = NULL;
+destroy_iommu:
+ etnaviv_iommu_destroy(gpu->mmu);
+ gpu->mmu = NULL;
fail:
pm_runtime_mark_last_busy(gpu->dev);
pm_runtime_put_autosuspend(gpu->dev);
@@ -642,6 +701,10 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
gpu->identity.minor_features2);
seq_printf(m, "\t minor_features3: 0x%08x\n",
gpu->identity.minor_features3);
+ seq_printf(m, "\t minor_features4: 0x%08x\n",
+ gpu->identity.minor_features4);
+ seq_printf(m, "\t minor_features5: 0x%08x\n",
+ gpu->identity.minor_features5);
seq_puts(m, "\tspecs\n");
seq_printf(m, "\t stream_count: %d\n",
@@ -664,6 +727,8 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
gpu->identity.instruction_count);
seq_printf(m, "\t num_constants: %d\n",
gpu->identity.num_constants);
+ seq_printf(m, "\t varyings_count: %d\n",
+ gpu->identity.varyings_count);
seq_printf(m, "\taxi: 0x%08x\n", axi);
seq_printf(m, "\tidle: 0x%08x\n", idle);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index c75d50359ab0..f233ac4c7c1c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -46,6 +46,12 @@ struct etnaviv_chip_identity {
/* Supported minor feature 3 fields. */
u32 minor_features3;
+ /* Supported minor feature 4 fields. */
+ u32 minor_features4;
+
+ /* Supported minor feature 5 fields. */
+ u32 minor_features5;
+
/* Number of streams supported. */
u32 stream_count;
@@ -75,6 +81,9 @@ struct etnaviv_chip_identity {
/* Buffer size */
u32 buffer_size;
+
+ /* Number of varyings */
+ u8 varyings_count;
};
struct etnaviv_event {
diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h
index 0064f2640396..6a7de5f1454a 100644
--- a/drivers/gpu/drm/etnaviv/state_hi.xml.h
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -8,8 +8,8 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
git clone git://0x04.net/rules-ng-ng
The rules-ng-ng source files this header was generated from are:
-- state_hi.xml ( 23420 bytes, from 2015-03-25 11:47:21)
-- common.xml ( 18437 bytes, from 2015-03-25 11:27:41)
+- state_hi.xml ( 24309 bytes, from 2015-12-12 09:02:53)
+- common.xml ( 18437 bytes, from 2015-12-12 09:02:53)
Copyright (C) 2015
*/
@@ -182,8 +182,25 @@ Copyright (C) 2015
#define VIVS_HI_CHIP_MINOR_FEATURE_3 0x00000088
+#define VIVS_HI_CHIP_SPECS_3 0x0000008c
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK 0x000001f0
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT 4
+#define VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT(x) (((x) << VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_VARYINGS_COUNT__MASK)
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK 0x00000007
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT 0
+#define VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT(x) (((x) << VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_3_GPU_CORE_COUNT__MASK)
+
#define VIVS_HI_CHIP_MINOR_FEATURE_4 0x00000094
+#define VIVS_HI_CHIP_SPECS_4 0x0000009c
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK 0x0001f000
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT 12
+#define VIVS_HI_CHIP_SPECS_4_STREAM_COUNT(x) (((x) << VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__SHIFT) & VIVS_HI_CHIP_SPECS_4_STREAM_COUNT__MASK)
+
+#define VIVS_HI_CHIP_MINOR_FEATURE_5 0x000000a0
+
+#define VIVS_HI_CHIP_PRODUCT_ID 0x000000a8
+
#define VIVS_PM 0x00000000
#define VIVS_PM_POWER_CONTROLS 0x00000100
@@ -206,6 +223,11 @@ Copyright (C) 2015
#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE 0x00000001
#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_DE 0x00000002
#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PE 0x00000004
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SH 0x00000008
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_PA 0x00000010
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_SE 0x00000020
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_RA 0x00000040
+#define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_TX 0x00000080
#define VIVS_PM_PULSE_EATER 0x0000010c
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
index 6bfc46369db1..367a916f364e 100644
--- a/drivers/gpu/drm/radeon/dce6_afmt.c
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -304,18 +304,10 @@ void dce6_dp_audio_set_dto(struct radeon_device *rdev,
unsigned int div = (RREG32(DENTIST_DISPCLK_CNTL) &
DENTIST_DPREFCLK_WDIVIDER_MASK) >>
DENTIST_DPREFCLK_WDIVIDER_SHIFT;
-
- if (div < 128 && div >= 96)
- div -= 64;
- else if (div >= 64)
- div = div / 2 - 16;
- else if (div >= 8)
- div /= 4;
- else
- div = 0;
+ div = radeon_audio_decode_dfs_div(div);
if (div)
- clock = rdev->clock.gpupll_outputfreq * 10 / div;
+ clock = clock * 100 / div;
WREG32(DCE8_DCCG_AUDIO_DTO1_PHASE, 24000);
WREG32(DCE8_DCCG_AUDIO_DTO1_MODULE, clock);
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index 9953356fe263..3cf04a2f44bb 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -289,6 +289,16 @@ void dce4_dp_audio_set_dto(struct radeon_device *rdev,
* number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE
* is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
*/
+ if (ASIC_IS_DCE41(rdev)) {
+ unsigned int div = (RREG32(DCE41_DENTIST_DISPCLK_CNTL) &
+ DENTIST_DPREFCLK_WDIVIDER_MASK) >>
+ DENTIST_DPREFCLK_WDIVIDER_SHIFT;
+ div = radeon_audio_decode_dfs_div(div);
+
+ if (div)
+ clock = 100 * clock / div;
+ }
+
WREG32(DCCG_AUDIO_DTO1_PHASE, 24000);
WREG32(DCCG_AUDIO_DTO1_MODULE, clock);
}
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 4aa5f755572b..13b6029d65cc 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -511,6 +511,11 @@
#define DCCG_AUDIO_DTO1_CNTL 0x05cc
# define DCCG_AUDIO_DTO1_USE_512FBR_DTO (1 << 3)
+#define DCE41_DENTIST_DISPCLK_CNTL 0x049c
+# define DENTIST_DPREFCLK_WDIVIDER(x) (((x) & 0x7f) << 24)
+# define DENTIST_DPREFCLK_WDIVIDER_MASK (0x7f << 24)
+# define DENTIST_DPREFCLK_WDIVIDER_SHIFT 24
+
/* DCE 4.0 AFMT */
#define HDMI_CONTROL 0x7030
# define HDMI_KEEPOUT_MODE (1 << 0)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5ae6db98aa4d..78a51b3eda10 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -268,7 +268,7 @@ struct radeon_clock {
uint32_t current_dispclk;
uint32_t dp_extclk;
uint32_t max_pixel_clock;
- uint32_t gpupll_outputfreq;
+ uint32_t vco_freq;
};
/*
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 08fc1b5effa8..de9a2ffcf5f7 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -1106,6 +1106,31 @@ union firmware_info {
ATOM_FIRMWARE_INFO_V2_2 info_22;
};
+union igp_info {
+ struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+ struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+};
+
+static void radeon_atombios_get_dentist_vco_freq(struct radeon_device *rdev)
+{
+ struct radeon_mode_info *mode_info = &rdev->mode_info;
+ int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+ union igp_info *igp_info;
+ u8 frev, crev;
+ u16 data_offset;
+
+ if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+ &frev, &crev, &data_offset)) {
+ igp_info = (union igp_info *)(mode_info->atom_context->bios +
+ data_offset);
+ rdev->clock.vco_freq =
+ le32_to_cpu(igp_info->info_6.ulDentistVCOFreq);
+ }
+}
+
bool radeon_atom_get_clock_info(struct drm_device *dev)
{
struct radeon_device *rdev = dev->dev_private;
@@ -1257,12 +1282,18 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
rdev->mode_info.firmware_flags =
le16_to_cpu(firmware_info->info.usFirmwareCapability.susAccess);
- if (ASIC_IS_DCE8(rdev)) {
- rdev->clock.gpupll_outputfreq =
+ if (ASIC_IS_DCE8(rdev))
+ rdev->clock.vco_freq =
le32_to_cpu(firmware_info->info_22.ulGPUPLL_OutputFreq);
- if (rdev->clock.gpupll_outputfreq == 0)
- rdev->clock.gpupll_outputfreq = 360000; /* 3.6 GHz */
- }
+ else if (ASIC_IS_DCE5(rdev))
+ rdev->clock.vco_freq = rdev->clock.current_dispclk;
+ else if (ASIC_IS_DCE41(rdev))
+ radeon_atombios_get_dentist_vco_freq(rdev);
+ else
+ rdev->clock.vco_freq = rdev->clock.current_dispclk;
+
+ if (rdev->clock.vco_freq == 0)
+ rdev->clock.vco_freq = 360000; /* 3.6 GHz */
return true;
}
@@ -1270,14 +1301,6 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
return false;
}
-union igp_info {
- struct _ATOM_INTEGRATED_SYSTEM_INFO info;
- struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
- struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
- struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
- struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
-};
-
bool radeon_atombios_sideport_present(struct radeon_device *rdev)
{
struct radeon_mode_info *mode_info = &rdev->mode_info;
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 2c02e99b5f95..b214663b370d 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -739,9 +739,6 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
- struct radeon_connector *radeon_connector = to_radeon_connector(connector);
- struct radeon_connector_atom_dig *dig_connector =
- radeon_connector->con_priv;
if (!dig || !dig->afmt)
return;
@@ -753,10 +750,7 @@ static void radeon_audio_dp_mode_set(struct drm_encoder *encoder,
radeon_audio_write_speaker_allocation(encoder);
radeon_audio_write_sad_regs(encoder);
radeon_audio_write_latency_fields(encoder, mode);
- if (rdev->clock.dp_extclk || ASIC_IS_DCE5(rdev))
- radeon_audio_set_dto(encoder, rdev->clock.default_dispclk * 10);
- else
- radeon_audio_set_dto(encoder, dig_connector->dp_clock);
+ radeon_audio_set_dto(encoder, rdev->clock.vco_freq * 10);
radeon_audio_set_audio_packet(encoder);
radeon_audio_select_pin(encoder);
@@ -781,3 +775,15 @@ void radeon_audio_dpms(struct drm_encoder *encoder, int mode)
if (radeon_encoder->audio && radeon_encoder->audio->dpms)
radeon_encoder->audio->dpms(encoder, mode == DRM_MODE_DPMS_ON);
}
+
+unsigned int radeon_audio_decode_dfs_div(unsigned int div)
+{
+ if (div >= 8 && div < 64)
+ return (div - 8) * 25 + 200;
+ else if (div >= 64 && div < 96)
+ return (div - 64) * 50 + 1600;
+ else if (div >= 96 && div < 128)
+ return (div - 96) * 100 + 3200;
+ else
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_audio.h b/drivers/gpu/drm/radeon/radeon_audio.h
index 059cc3012062..5c70cceaa4a6 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.h
+++ b/drivers/gpu/drm/radeon/radeon_audio.h
@@ -79,5 +79,6 @@ void radeon_audio_fini(struct radeon_device *rdev);
void radeon_audio_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode);
void radeon_audio_dpms(struct drm_encoder *encoder, int mode);
+unsigned int radeon_audio_decode_dfs_div(unsigned int div);
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index b3bb92368ae0..298ea1c453c3 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -1670,8 +1670,10 @@ int radeon_modeset_init(struct radeon_device *rdev)
/* setup afmt */
radeon_afmt_init(rdev);
- radeon_fbdev_init(rdev);
- drm_kms_helper_poll_init(rdev->ddev);
+ if (!list_empty(&rdev->ddev->mode_config.connector_list)) {
+ radeon_fbdev_init(rdev);
+ drm_kms_helper_poll_init(rdev->ddev);
+ }
/* do pm late init */
ret = radeon_pm_late_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 3dcc5733ff69..e26c963f2e93 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -663,6 +663,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
bo_va = radeon_vm_bo_find(&fpriv->vm, rbo);
if (!bo_va) {
args->operation = RADEON_VA_RESULT_ERROR;
+ radeon_bo_unreserve(rbo);
drm_gem_object_unreference_unlocked(gobj);
return -ENOENT;
}
diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
index 07a0d378e122..a01efe39a820 100644
--- a/drivers/gpu/drm/radeon/vce_v1_0.c
+++ b/drivers/gpu/drm/radeon/vce_v1_0.c
@@ -178,12 +178,12 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
return -EINVAL;
}
- for (i = 0; i < sign->num; ++i) {
- if (sign->val[i].chip_id == chip_id)
+ for (i = 0; i < le32_to_cpu(sign->num); ++i) {
+ if (le32_to_cpu(sign->val[i].chip_id) == chip_id)
break;
}
- if (i == sign->num)
+ if (i == le32_to_cpu(sign->num))
return -EINVAL;
data += (256 - 64) / 4;
@@ -191,18 +191,18 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
data[1] = sign->val[i].nonce[1];
data[2] = sign->val[i].nonce[2];
data[3] = sign->val[i].nonce[3];
- data[4] = sign->len + 64;
+ data[4] = cpu_to_le32(le32_to_cpu(sign->len) + 64);
memset(&data[5], 0, 44);
memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
- data += data[4] / 4;
+ data += le32_to_cpu(data[4]) / 4;
data[0] = sign->val[i].sigval[0];
data[1] = sign->val[i].sigval[1];
data[2] = sign->val[i].sigval[2];
data[3] = sign->val[i].sigval[3];
- rdev->vce.keyselect = sign->val[i].keyselect;
+ rdev->vce.keyselect = le32_to_cpu(sign->val[i].keyselect);
return 0;
}
diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile
index d1dc0f7b01db..f6a809afceec 100644
--- a/drivers/gpu/drm/rockchip/Makefile
+++ b/drivers/gpu/drm/rockchip/Makefile
@@ -2,11 +2,11 @@
# Makefile for the drm device driver. This driver provides support for the
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
-rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o rockchip_drm_fbdev.o \
- rockchip_drm_gem.o
+rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \
+ rockchip_drm_gem.o rockchip_drm_vop.o
+rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
-obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_drm_vop.o \
- rockchip_vop_reg.o
+obj-$(CONFIG_DRM_ROCKCHIP) += rockchipdrm.o rockchip_vop_reg.o
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
index 7bfe243c6173..f8f8f29fb7c3 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi.c
@@ -461,10 +461,11 @@ static int dw_mipi_dsi_phy_init(struct dw_mipi_dsi *dsi)
static int dw_mipi_dsi_get_lane_bps(struct dw_mipi_dsi *dsi)
{
- unsigned int bpp, i, pre;
+ unsigned int i, pre;
unsigned long mpclk, pllref, tmp;
unsigned int m = 1, n = 1, target_mbps = 1000;
unsigned int max_mbps = dptdin_map[ARRAY_SIZE(dptdin_map) - 1].max_mbps;
+ int bpp;
bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
if (bpp < 0) {
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 8397d1b62ef9..a0d51ccb6ea4 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -55,14 +55,12 @@ int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
return arm_iommu_attach_device(dev, mapping);
}
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_attach_device);
void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
struct device *dev)
{
arm_iommu_detach_device(dev);
}
-EXPORT_SYMBOL_GPL(rockchip_drm_dma_detach_device);
int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
const struct rockchip_crtc_funcs *crtc_funcs)
@@ -77,7 +75,6 @@ int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
return 0;
}
-EXPORT_SYMBOL_GPL(rockchip_register_crtc_funcs);
void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
{
@@ -89,7 +86,6 @@ void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
priv->crtc_funcs[pipe] = NULL;
}
-EXPORT_SYMBOL_GPL(rockchip_unregister_crtc_funcs);
static struct drm_crtc *rockchip_crtc_from_pipe(struct drm_device *drm,
int pipe)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index f7844883cb76..3b8f652698f8 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -39,7 +39,6 @@ struct drm_gem_object *rockchip_fb_get_gem_obj(struct drm_framebuffer *fb,
return rk_fb->obj[plane];
}
-EXPORT_SYMBOL_GPL(rockchip_fb_get_gem_obj);
static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
{
@@ -177,8 +176,23 @@ static void rockchip_crtc_wait_for_update(struct drm_crtc *crtc)
crtc_funcs->wait_for_update(crtc);
}
+/*
+ * We can't use drm_atomic_helper_wait_for_vblanks() because rk3288 and rk3066
+ * have hardware counters for neither vblanks nor scanlines, which results in
+ * a race where:
+ * | <-- HW vsync irq and reg take effect
+ * plane_commit --> |
+ * get_vblank and wait --> |
+ * | <-- handle_vblank, vblank->count + 1
+ * cleanup_fb --> |
+ * iommu crash --> |
+ * | <-- HW vsync irq and reg take effect
+ *
+ * This function is equivalent but uses rockchip_crtc_wait_for_update() instead
+ * of waiting for vblank_count to change.
+ */
static void
-rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
+rockchip_atomic_wait_for_complete(struct drm_device *dev, struct drm_atomic_state *old_state)
{
struct drm_crtc_state *old_crtc_state;
struct drm_crtc *crtc;
@@ -194,6 +208,10 @@ rockchip_atomic_wait_for_complete(struct drm_atomic_state *old_state)
if (!crtc->state->active)
continue;
+ if (!drm_atomic_helper_framebuffer_changed(dev,
+ old_state, crtc))
+ continue;
+
ret = drm_crtc_vblank_get(crtc);
if (ret != 0)
continue;
@@ -241,7 +259,7 @@ rockchip_atomic_commit_complete(struct rockchip_atomic_commit *commit)
drm_atomic_helper_commit_planes(dev, state, true);
- rockchip_atomic_wait_for_complete(state);
+ rockchip_atomic_wait_for_complete(dev, state);
drm_atomic_helper_cleanup_planes(dev, state);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
index 50432e9b5b37..73718c5f5bbf 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.h
@@ -15,7 +15,18 @@
#ifndef _ROCKCHIP_DRM_FBDEV_H
#define _ROCKCHIP_DRM_FBDEV_H
+#ifdef CONFIG_DRM_FBDEV_EMULATION
int rockchip_drm_fbdev_init(struct drm_device *dev);
void rockchip_drm_fbdev_fini(struct drm_device *dev);
+#else
+static inline int rockchip_drm_fbdev_init(struct drm_device *dev)
+{
+ return 0;
+}
+
+static inline void rockchip_drm_fbdev_fini(struct drm_device *dev)
+{
+}
+#endif
#endif /* _ROCKCHIP_DRM_FBDEV_H */
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index d908321b94ce..18e07338c6e5 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -234,13 +234,8 @@ int rockchip_gem_dumb_create(struct drm_file *file_priv,
/*
* align to 64 bytes since Mali requires it.
*/
- min_pitch = ALIGN(min_pitch, 64);
-
- if (args->pitch < min_pitch)
- args->pitch = min_pitch;
-
- if (args->size < args->pitch * args->height)
- args->size = args->pitch * args->height;
+ args->pitch = ALIGN(min_pitch, 64);
+ args->size = args->pitch * args->height;
rk_obj = rockchip_gem_create_with_handle(file_priv, dev, args->size,
&args->handle);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 46c2a8dfd8aa..fd370548d7d7 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -43,8 +43,8 @@
#define REG_SET(x, base, reg, v, mode) \
__REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
-#define REG_SET_MASK(x, base, reg, v, mode) \
- __REG_SET_##mode(x, base + reg.offset, reg.mask, reg.shift, v)
+#define REG_SET_MASK(x, base, reg, mask, v, mode) \
+ __REG_SET_##mode(x, base + reg.offset, mask, reg.shift, v)
#define VOP_WIN_SET(x, win, name, v) \
REG_SET(x, win->base, win->phy->name, v, RELAXED)
@@ -58,16 +58,18 @@
#define VOP_INTR_GET(vop, name) \
vop_read_reg(vop, 0, &vop->data->ctrl->name)
-#define VOP_INTR_SET(vop, name, v) \
- REG_SET(vop, 0, vop->data->intr->name, v, NORMAL)
+#define VOP_INTR_SET(vop, name, mask, v) \
+ REG_SET_MASK(vop, 0, vop->data->intr->name, mask, v, NORMAL)
#define VOP_INTR_SET_TYPE(vop, name, type, v) \
do { \
- int i, reg = 0; \
+ int i, reg = 0, mask = 0; \
for (i = 0; i < vop->data->intr->nintrs; i++) { \
- if (vop->data->intr->intrs[i] & type) \
+ if (vop->data->intr->intrs[i] & type) { \
reg |= (v) << i; \
+ mask |= 1 << i; \
+ } \
} \
- VOP_INTR_SET(vop, name, reg); \
+ VOP_INTR_SET(vop, name, mask, reg); \
} while (0)
#define VOP_INTR_GET_TYPE(vop, name, type) \
vop_get_intr_type(vop, &vop->data->intr->name, type)
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 424d515ffcda..314ff71db978 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -144,19 +144,16 @@ int vc4_v3d_debugfs_ident(struct seq_file *m, void *unused)
}
#endif /* CONFIG_DEBUG_FS */
-/*
- * Asks the firmware to turn on power to the V3D engine.
- *
- * This may be doable with just the clocks interface, though this
- * packet does some other register setup from the firmware, too.
- */
int
vc4_v3d_set_power(struct vc4_dev *vc4, bool on)
{
- if (on)
- return pm_generic_poweroff(&vc4->v3d->pdev->dev);
- else
- return pm_generic_resume(&vc4->v3d->pdev->dev);
+ /* XXX: This interface is needed for GPU reset, and the way to
+ * do it is to turn our power domain off and back on. We
+ * can't just reset from within the driver, because the reset
+ * bits are in the power domain's register area, and get set
+ * during the poweron process.
+ */
+ return 0;
}
static void vc4_v3d_init_hw(struct drm_device *dev)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index c49812b80dd0..24fb348a44e1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -25,6 +25,7 @@
*
**************************************************************************/
#include <linux/module.h>
+#include <linux/console.h>
#include <drm/drmP.h>
#include "vmwgfx_drv.h"
@@ -1538,6 +1539,12 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
static int __init vmwgfx_init(void)
{
int ret;
+
+#ifdef CONFIG_VGA_CONSOLE
+ if (vgacon_text_force())
+ return -EINVAL;
+#endif
+
ret = drm_pci_init(&driver, &vmw_pci_driver);
if (ret)
DRM_ERROR("Failed initializing DRM.\n");
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index aa26f3c3416b..8a8440c0eed1 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -5,6 +5,7 @@ menuconfig INFINIBAND
depends on NET
depends on INET
depends on m || IPV6 != m
+ select IRQ_POLL
---help---
Core support for InfiniBand (IB). Make sure to also select
any protocols you wish to use as well as drivers for your
@@ -54,6 +55,15 @@ config INFINIBAND_ADDR_TRANS
depends on INFINIBAND
default y
+config INFINIBAND_ADDR_TRANS_CONFIGFS
+ bool
+ depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
+ default y
+ ---help---
+ ConfigFS support for RDMA communication manager (CM).
+ This allows the user to config the default GID type that the CM
+ uses for each device, when initiaing new connections.
+
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index d43a8994ac5c..f818538a7f4e 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
-ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
+ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
@@ -24,6 +24,8 @@ iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
rdma_cm-y := cma.o
+rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
+
rdma_ucm-y := ucma.o
ib_addr-y := addr.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 34b1adad07aa..337353d86cfa 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -121,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
}
EXPORT_SYMBOL(rdma_copy_addr);
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+int rdma_translate_ip(const struct sockaddr *addr,
+ struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
{
struct net_device *dev;
@@ -139,7 +140,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(dev_addr->net,
- ((struct sockaddr_in *) addr)->sin_addr.s_addr);
+ ((const struct sockaddr_in *)addr)->sin_addr.s_addr);
if (!dev)
return ret;
@@ -154,7 +155,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
rcu_read_lock();
for_each_netdev_rcu(dev_addr->net, dev) {
if (ipv6_chk_addr(dev_addr->net,
- &((struct sockaddr_in6 *) addr)->sin6_addr,
+ &((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
@@ -198,7 +199,8 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
+static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+ const void *daddr)
{
struct neighbour *n;
int ret;
@@ -222,8 +224,9 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, v
}
static int addr4_resolve(struct sockaddr_in *src_in,
- struct sockaddr_in *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr_in *dst_in,
+ struct rdma_dev_addr *addr,
+ struct rtable **prt)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -243,33 +246,29 @@ static int addr4_resolve(struct sockaddr_in *src_in,
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr;
- if (rt->dst.dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
- if (!ret)
- memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- goto put;
- }
+ /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+ * routable) and we could set the network type accordingly.
+ */
+ if (rt->rt_uses_gateway)
+ addr->network = RDMA_NETWORK_IPV4;
- /* If the device does ARP internally, return 'done' */
- if (rt->dst.dev->flags & IFF_NOARP) {
- ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
- goto put;
- }
+ addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
- ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
-put:
- ip_rt_put(rt);
+ *prt = rt;
+ return 0;
out:
return ret;
}
#if IS_ENABLED(CONFIG_IPV6)
static int addr6_resolve(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr,
+ struct dst_entry **pdst)
{
struct flowi6 fl6;
struct dst_entry *dst;
+ struct rt6_info *rt;
int ret;
memset(&fl6, 0, sizeof fl6);
@@ -281,6 +280,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
if ((ret = dst->error))
goto put;
+ rt = (struct rt6_info *)dst;
if (ipv6_addr_any(&fl6.saddr)) {
ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
&fl6.daddr, 0, &fl6.saddr);
@@ -291,43 +291,111 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
src_in->sin6_addr = fl6.saddr;
}
- if (dst->dev->flags & IFF_LOOPBACK) {
- ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
- if (!ret)
- memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
- goto put;
- }
+ /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
+ * routable) and we could set the network type accordingly.
+ */
+ if (rt->rt6i_flags & RTF_GATEWAY)
+ addr->network = RDMA_NETWORK_IPV6;
- /* If the device does ARP internally, return 'done' */
- if (dst->dev->flags & IFF_NOARP) {
- ret = rdma_copy_addr(addr, dst->dev, NULL);
- goto put;
- }
+ addr->hoplimit = ip6_dst_hoplimit(dst);
- ret = dst_fetch_ha(dst, addr, &fl6.daddr);
+ *pdst = dst;
+ return 0;
put:
dst_release(dst);
return ret;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
- struct sockaddr_in6 *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr_in6 *dst_in,
+ struct rdma_dev_addr *addr,
+ struct dst_entry **pdst)
{
return -EADDRNOTAVAIL;
}
#endif
+static int addr_resolve_neigh(struct dst_entry *dst,
+ const struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr)
+{
+ if (dst->dev->flags & IFF_LOOPBACK) {
+ int ret;
+
+ ret = rdma_translate_ip(dst_in, addr, NULL);
+ if (!ret)
+ memcpy(addr->dst_dev_addr, addr->src_dev_addr,
+ MAX_ADDR_LEN);
+
+ return ret;
+ }
+
+ /* If the device doesn't do ARP internally */
+ if (!(dst->dev->flags & IFF_NOARP)) {
+ const struct sockaddr_in *dst_in4 =
+ (const struct sockaddr_in *)dst_in;
+ const struct sockaddr_in6 *dst_in6 =
+ (const struct sockaddr_in6 *)dst_in;
+
+ return dst_fetch_ha(dst, addr,
+ dst_in->sa_family == AF_INET ?
+ (const void *)&dst_in4->sin_addr.s_addr :
+ (const void *)&dst_in6->sin6_addr);
+ }
+
+ return rdma_copy_addr(addr, dst->dev, NULL);
+}
+
static int addr_resolve(struct sockaddr *src_in,
- struct sockaddr *dst_in,
- struct rdma_dev_addr *addr)
+ const struct sockaddr *dst_in,
+ struct rdma_dev_addr *addr,
+ bool resolve_neigh)
{
+ struct net_device *ndev;
+ struct dst_entry *dst;
+ int ret;
+
if (src_in->sa_family == AF_INET) {
- return addr4_resolve((struct sockaddr_in *) src_in,
- (struct sockaddr_in *) dst_in, addr);
- } else
- return addr6_resolve((struct sockaddr_in6 *) src_in,
- (struct sockaddr_in6 *) dst_in, addr);
+ struct rtable *rt = NULL;
+ const struct sockaddr_in *dst_in4 =
+ (const struct sockaddr_in *)dst_in;
+
+ ret = addr4_resolve((struct sockaddr_in *)src_in,
+ dst_in4, addr, &rt);
+ if (ret)
+ return ret;
+
+ if (resolve_neigh)
+ ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
+
+ ndev = rt->dst.dev;
+ dev_hold(ndev);
+
+ ip_rt_put(rt);
+ } else {
+ const struct sockaddr_in6 *dst_in6 =
+ (const struct sockaddr_in6 *)dst_in;
+
+ ret = addr6_resolve((struct sockaddr_in6 *)src_in,
+ dst_in6, addr,
+ &dst);
+ if (ret)
+ return ret;
+
+ if (resolve_neigh)
+ ret = addr_resolve_neigh(dst, dst_in, addr);
+
+ ndev = dst->dev;
+ dev_hold(ndev);
+
+ dst_release(dst);
+ }
+
+ addr->bound_dev_if = ndev->ifindex;
+ addr->net = dev_net(ndev);
+ dev_put(ndev);
+
+ return ret;
}
static void process_req(struct work_struct *work)
@@ -343,7 +411,8 @@ static void process_req(struct work_struct *work)
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
- req->status = addr_resolve(src_in, dst_in, req->addr);
+ req->status = addr_resolve(src_in, dst_in, req->addr,
+ true);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@@ -403,7 +472,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
req->client = client;
atomic_inc(&client->refcount);
- req->status = addr_resolve(src_in, dst_in, addr);
+ req->status = addr_resolve(src_in, dst_in, addr, true);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -425,6 +494,26 @@ err:
}
EXPORT_SYMBOL(rdma_resolve_ip);
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr)
+{
+ struct sockaddr_storage ssrc_addr = {};
+ struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
+
+ if (src_addr) {
+ if (src_addr->sa_family != dst_addr->sa_family)
+ return -EINVAL;
+
+ memcpy(src_in, src_addr, rdma_addr_size(src_addr));
+ } else {
+ src_in->sa_family = dst_addr->sa_family;
+ }
+
+ return addr_resolve(src_in, dst_addr, addr, false);
+}
+EXPORT_SYMBOL(rdma_resolve_ip_route);
+
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
@@ -456,8 +545,10 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
complete(&((struct resolve_cb_context *)context)->comp);
}
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
- u8 *dmac, u16 *vlan_id, int if_index)
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+ const union ib_gid *dgid,
+ u8 *dmac, u16 *vlan_id, int *if_index,
+ int *hoplimit)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
@@ -475,7 +566,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
- dev_addr.bound_dev_if = if_index;
+ if (if_index)
+ dev_addr.bound_dev_if = *if_index;
dev_addr.net = &init_net;
ctx.addr = &dev_addr;
@@ -491,12 +583,16 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
if (!dev)
return -ENODEV;
+ if (if_index)
+ *if_index = dev_addr.bound_dev_if;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
+ if (hoplimit)
+ *hoplimit = dev_addr.hoplimit;
dev_put(dev);
return ret;
}
-EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 89bebeada38b..53343ffbff7a 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -64,6 +64,7 @@ enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID = 1UL << 0,
GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2,
+ GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3,
};
enum gid_table_entry_props {
@@ -81,10 +82,6 @@ enum gid_table_write_action {
};
struct ib_gid_table_entry {
- /* This lock protects an entry from being
- * read and written simultaneously.
- */
- rwlock_t lock;
unsigned long props;
union ib_gid gid;
struct ib_gid_attr attr;
@@ -109,28 +106,86 @@ struct ib_gid_table {
* are locked by this lock.
**/
struct mutex lock;
+ /* This lock protects the table entries from being
+ * read and written simultaneously.
+ */
+ rwlock_t rwlock;
struct ib_gid_table_entry *data_vec;
};
+static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
+{
+ if (rdma_cap_roce_gid_table(ib_dev, port)) {
+ struct ib_event event;
+
+ event.device = ib_dev;
+ event.element.port_num = port;
+ event.event = IB_EVENT_GID_CHANGE;
+
+ ib_dispatch_event(&event);
+ }
+}
+
+static const char * const gid_type_str[] = {
+ [IB_GID_TYPE_IB] = "IB/RoCE v1",
+ [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2",
+};
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
+{
+ if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type])
+ return gid_type_str[gid_type];
+
+ return "Invalid GID type";
+}
+EXPORT_SYMBOL(ib_cache_gid_type_str);
+
+int ib_cache_gid_parse_type_str(const char *buf)
+{
+ unsigned int i;
+ size_t len;
+ int err = -EINVAL;
+
+ len = strlen(buf);
+ if (len == 0)
+ return -EINVAL;
+
+ if (buf[len - 1] == '\n')
+ len--;
+
+ for (i = 0; i < ARRAY_SIZE(gid_type_str); ++i)
+ if (gid_type_str[i] && !strncmp(buf, gid_type_str[i], len) &&
+ len == strlen(gid_type_str[i])) {
+ err = i;
+ break;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
+
+/* This function expects that rwlock will be write locked in all
+ * scenarios and that lock will be locked in sleep-able (RoCE)
+ * scenarios.
+ */
static int write_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix,
const union ib_gid *gid,
const struct ib_gid_attr *attr,
enum gid_table_write_action action,
bool default_gid)
+ __releases(&table->rwlock) __acquires(&table->rwlock)
{
int ret = 0;
struct net_device *old_net_dev;
- unsigned long flags;
/* in rdma_cap_roce_gid_table, this funciton should be protected by a
* sleep-able lock.
*/
- write_lock_irqsave(&table->data_vec[ix].lock, flags);
if (rdma_cap_roce_gid_table(ib_dev, port)) {
table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
- write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
+ write_unlock_irq(&table->rwlock);
/* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
* RoCE providers and thus only updates the cache.
*/
@@ -140,7 +195,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
else if (action == GID_TABLE_WRITE_ACTION_DEL)
ret = ib_dev->del_gid(ib_dev, port, ix,
&table->data_vec[ix].context);
- write_lock_irqsave(&table->data_vec[ix].lock, flags);
+ write_lock_irq(&table->rwlock);
}
old_net_dev = table->data_vec[ix].attr.ndev;
@@ -162,17 +217,6 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
- write_unlock_irqrestore(&table->data_vec[ix].lock, flags);
-
- if (!ret && rdma_cap_roce_gid_table(ib_dev, port)) {
- struct ib_event event;
-
- event.device = ib_dev;
- event.element.port_num = port;
- event.event = IB_EVENT_GID_CHANGE;
-
- ib_dispatch_event(&event);
- }
return ret;
}
@@ -201,41 +245,58 @@ static int del_gid(struct ib_device *ib_dev, u8 port,
GID_TABLE_WRITE_ACTION_DEL, default_gid);
}
+/* rwlock should be read locked */
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
const struct ib_gid_attr *val, bool default_gid,
- unsigned long mask)
+ unsigned long mask, int *pempty)
{
- int i;
+ int i = 0;
+ int found = -1;
+ int empty = pempty ? -1 : 0;
- for (i = 0; i < table->sz; i++) {
- unsigned long flags;
- struct ib_gid_attr *attr = &table->data_vec[i].attr;
+ while (i < table->sz && (found < 0 || empty < 0)) {
+ struct ib_gid_table_entry *data = &table->data_vec[i];
+ struct ib_gid_attr *attr = &data->attr;
+ int curr_index = i;
- read_lock_irqsave(&table->data_vec[i].lock, flags);
+ i++;
- if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
- goto next;
+ if (data->props & GID_TABLE_ENTRY_INVALID)
+ continue;
+
+ if (empty < 0)
+ if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
+ !memcmp(attr, &zattr, sizeof(*attr)) &&
+ !data->props)
+ empty = curr_index;
+
+ if (found >= 0)
+ continue;
+
+ if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
+ attr->gid_type != val->gid_type)
+ continue;
if (mask & GID_ATTR_FIND_MASK_GID &&
- memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
- goto next;
+ memcmp(gid, &data->gid, sizeof(*gid)))
+ continue;
if (mask & GID_ATTR_FIND_MASK_NETDEV &&
attr->ndev != val->ndev)
- goto next;
+ continue;
if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
- !!(table->data_vec[i].props & GID_TABLE_ENTRY_DEFAULT) !=
+ !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
default_gid)
- goto next;
+ continue;
- read_unlock_irqrestore(&table->data_vec[i].lock, flags);
- return i;
-next:
- read_unlock_irqrestore(&table->data_vec[i].lock, flags);
+ found = curr_index;
}
- return -1;
+ if (pempty)
+ *pempty = empty;
+
+ return found;
}
static void make_default_gid(struct net_device *dev, union ib_gid *gid)
@@ -252,6 +313,7 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
int ix;
int ret = 0;
struct net_device *idev;
+ int empty;
table = ports_table[port - rdma_start_port(ib_dev)];
@@ -275,22 +337,25 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
}
mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_NETDEV);
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_NETDEV, &empty);
if (ix >= 0)
goto out_unlock;
- ix = find_gid(table, &zgid, NULL, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_DEFAULT);
- if (ix < 0) {
+ if (empty < 0) {
ret = -ENOSPC;
goto out_unlock;
}
- add_gid(ib_dev, port, table, ix, gid, attr, false);
+ ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
+ if (!ret)
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
+ write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
return ret;
}
@@ -305,17 +370,22 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
table = ports_table[port - rdma_start_port(ib_dev)];
mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
ix = find_gid(table, gid, attr, false,
GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_NETDEV |
- GID_ATTR_FIND_MASK_DEFAULT);
+ GID_ATTR_FIND_MASK_DEFAULT,
+ NULL);
if (ix < 0)
goto out_unlock;
- del_gid(ib_dev, port, table, ix, false);
+ if (!del_gid(ib_dev, port, table, ix, false))
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
+ write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
return 0;
}
@@ -326,16 +396,24 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
+ bool deleted = false;
table = ports_table[port - rdma_start_port(ib_dev)];
mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
for (ix = 0; ix < table->sz; ix++)
if (table->data_vec[ix].attr.ndev == ndev)
- del_gid(ib_dev, port, table, ix, false);
+ if (!del_gid(ib_dev, port, table, ix, false))
+ deleted = true;
+ write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
+
+ if (deleted)
+ dispatch_gid_change_event(ib_dev, port);
+
return 0;
}
@@ -344,18 +422,14 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- unsigned long flags;
table = ports_table[port - rdma_start_port(ib_dev)];
if (index < 0 || index >= table->sz)
return -EINVAL;
- read_lock_irqsave(&table->data_vec[index].lock, flags);
- if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) {
- read_unlock_irqrestore(&table->data_vec[index].lock, flags);
+ if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
return -EAGAIN;
- }
memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
if (attr) {
@@ -364,7 +438,6 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
dev_hold(attr->ndev);
}
- read_unlock_irqrestore(&table->data_vec[index].lock, flags);
return 0;
}
@@ -378,17 +451,21 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
struct ib_gid_table *table;
u8 p;
int local_index;
+ unsigned long flags;
for (p = 0; p < ib_dev->phys_port_cnt; p++) {
table = ports_table[p];
- local_index = find_gid(table, gid, val, false, mask);
+ read_lock_irqsave(&table->rwlock, flags);
+ local_index = find_gid(table, gid, val, false, mask, NULL);
if (local_index >= 0) {
if (index)
*index = local_index;
if (port)
*port = p + rdma_start_port(ib_dev);
+ read_unlock_irqrestore(&table->rwlock, flags);
return 0;
}
+ read_unlock_irqrestore(&table->rwlock, flags);
}
return -ENOENT;
@@ -396,11 +473,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
static int ib_cache_gid_find(struct ib_device *ib_dev,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
struct net_device *ndev, u8 *port,
u16 *index)
{
- unsigned long mask = GID_ATTR_FIND_MASK_GID;
- struct ib_gid_attr gid_attr_val = {.ndev = ndev};
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -411,14 +490,17 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
u8 port, struct net_device *ndev,
u16 *index)
{
int local_index;
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- unsigned long mask = GID_ATTR_FIND_MASK_GID;
- struct ib_gid_attr val = {.ndev = ndev};
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+ unsigned long flags;
if (port < rdma_start_port(ib_dev) ||
port > rdma_end_port(ib_dev))
@@ -429,13 +511,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
- local_index = find_gid(table, gid, &val, false, mask);
+ read_lock_irqsave(&table->rwlock, flags);
+ local_index = find_gid(table, gid, &val, false, mask, NULL);
if (local_index >= 0) {
if (index)
*index = local_index;
+ read_unlock_irqrestore(&table->rwlock, flags);
return 0;
}
+ read_unlock_irqrestore(&table->rwlock, flags);
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_cached_gid_by_port);
@@ -472,6 +557,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned int i;
+ unsigned long flags;
bool found = false;
if (!ports_table)
@@ -484,11 +570,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
table = ports_table[port - rdma_start_port(ib_dev)];
+ read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
struct ib_gid_attr attr;
- unsigned long flags;
- read_lock_irqsave(&table->data_vec[i].lock, flags);
if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
goto next;
@@ -501,11 +586,10 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
found = true;
next:
- read_unlock_irqrestore(&table->data_vec[i].lock, flags);
-
if (found)
break;
}
+ read_unlock_irqrestore(&table->rwlock, flags);
if (!found)
return -ENOENT;
@@ -517,9 +601,9 @@ next:
static struct ib_gid_table *alloc_gid_table(int sz)
{
- unsigned int i;
struct ib_gid_table *table =
kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+
if (!table)
return NULL;
@@ -530,9 +614,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
mutex_init(&table->lock);
table->sz = sz;
-
- for (i = 0; i < sz; i++)
- rwlock_init(&table->data_vec[i].lock);
+ rwlock_init(&table->rwlock);
return table;
@@ -553,30 +635,37 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table)
{
int i;
+ bool deleted = false;
if (!table)
return;
+ write_lock_irq(&table->rwlock);
for (i = 0; i < table->sz; ++i) {
if (memcmp(&table->data_vec[i].gid, &zgid,
sizeof(table->data_vec[i].gid)))
- del_gid(ib_dev, port, table, i,
- table->data_vec[i].props &
- GID_ATTR_FIND_MASK_DEFAULT);
+ if (!del_gid(ib_dev, port, table, i,
+ table->data_vec[i].props &
+ GID_ATTR_FIND_MASK_DEFAULT))
+ deleted = true;
}
+ write_unlock_irq(&table->rwlock);
+
+ if (deleted)
+ dispatch_gid_change_event(ib_dev, port);
}
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
struct net_device *ndev,
+ unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
union ib_gid gid;
struct ib_gid_attr gid_attr;
+ struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
- int ix;
- union ib_gid current_gid;
- struct ib_gid_attr current_gid_attr = {};
+ unsigned int gid_type;
table = ports_table[port - rdma_start_port(ib_dev)];
@@ -584,46 +673,82 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
memset(&gid_attr, 0, sizeof(gid_attr));
gid_attr.ndev = ndev;
- mutex_lock(&table->lock);
- ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT);
-
- /* Coudn't find default GID location */
- WARN_ON(ix < 0);
-
- if (!__ib_cache_gid_get(ib_dev, port, ix,
- &current_gid, &current_gid_attr) &&
- mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
- !memcmp(&gid, &current_gid, sizeof(gid)) &&
- !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
- goto unlock;
-
- if ((memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
- memcmp(&current_gid_attr, &zattr,
- sizeof(current_gid_attr))) &&
- del_gid(ib_dev, port, table, ix, true)) {
- pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
- ix, gid.raw);
- goto unlock;
- }
+ for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
+ int ix;
+ union ib_gid current_gid;
+ struct ib_gid_attr current_gid_attr = {};
+
+ if (1UL << gid_type & ~gid_type_mask)
+ continue;
+
+ gid_attr.gid_type = gid_type;
+
+ mutex_lock(&table->lock);
+ write_lock_irq(&table->rwlock);
+ ix = find_gid(table, NULL, &gid_attr, true,
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT,
+ NULL);
+
+ /* Coudn't find default GID location */
+ WARN_ON(ix < 0);
+
+ zattr_type.gid_type = gid_type;
+
+ if (!__ib_cache_gid_get(ib_dev, port, ix,
+ &current_gid, &current_gid_attr) &&
+ mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
+ !memcmp(&gid, &current_gid, sizeof(gid)) &&
+ !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
+ goto release;
+
+ if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
+ memcmp(&current_gid_attr, &zattr_type,
+ sizeof(current_gid_attr))) {
+ if (del_gid(ib_dev, port, table, ix, true)) {
+ pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
+ ix, gid.raw);
+ goto release;
+ } else {
+ dispatch_gid_change_event(ib_dev, port);
+ }
+ }
- if (mode == IB_CACHE_GID_DEFAULT_MODE_SET)
- if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
- pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
- gid.raw);
+ if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
+ if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
+ pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
+ gid.raw);
+ else
+ dispatch_gid_change_event(ib_dev, port);
+ }
-unlock:
- if (current_gid_attr.ndev)
- dev_put(current_gid_attr.ndev);
- mutex_unlock(&table->lock);
+release:
+ if (current_gid_attr.ndev)
+ dev_put(current_gid_attr.ndev);
+ write_unlock_irq(&table->rwlock);
+ mutex_unlock(&table->lock);
+ }
}
static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table)
{
- if (rdma_protocol_roce(ib_dev, port)) {
- struct ib_gid_table_entry *entry = &table->data_vec[0];
+ unsigned int i;
+ unsigned long roce_gid_type_mask;
+ unsigned int num_default_gids;
+ unsigned int current_gid = 0;
+
+ roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+ num_default_gids = hweight_long(roce_gid_type_mask);
+ for (i = 0; i < num_default_gids && i < table->sz; i++) {
+ struct ib_gid_table_entry *entry =
+ &table->data_vec[i];
entry->props |= GID_TABLE_ENTRY_DEFAULT;
+ current_gid = find_next_bit(&roce_gid_type_mask,
+ BITS_PER_LONG,
+ current_gid);
+ entry->attr.gid_type = current_gid++;
}
return 0;
@@ -728,20 +853,30 @@ int ib_get_cached_gid(struct ib_device *device,
union ib_gid *gid,
struct ib_gid_attr *gid_attr)
{
+ int res;
+ unsigned long flags;
+ struct ib_gid_table **ports_table = device->cache.gid_cache;
+ struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+
if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
return -EINVAL;
- return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+ read_lock_irqsave(&table->rwlock, flags);
+ res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
+ read_unlock_irqrestore(&table->rwlock, flags);
+
+ return res;
}
EXPORT_SYMBOL(ib_get_cached_gid);
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
struct net_device *ndev,
u8 *port_num,
u16 *index)
{
- return ib_cache_gid_find(device, gid, ndev, port_num, index);
+ return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
}
EXPORT_SYMBOL(ib_find_cached_gid);
@@ -956,10 +1091,12 @@ static void ib_cache_update(struct ib_device *device,
device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
if (!use_roce_gid_table) {
+ write_lock(&table->rwlock);
for (i = 0; i < gid_cache->table_len; i++) {
modify_gid(device, port, table, i, gid_cache->table + i,
&zattr, false);
}
+ write_unlock(&table->rwlock);
}
device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 0a26dd6d9b19..1d92e091e22e 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -364,7 +364,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- ndev, &p, NULL)) {
+ path->gid_type, ndev, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
@@ -782,11 +782,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
/* Check if the device started its remove_one */
- spin_lock_irq(&cm.lock);
+ spin_lock_irqsave(&cm.lock, flags);
if (!cm_dev->going_down)
queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
msecs_to_jiffies(wait_time));
- spin_unlock_irq(&cm.lock);
+ spin_unlock_irqrestore(&cm.lock, flags);
cm_id_priv->timewait_info = NULL;
}
@@ -1600,6 +1600,8 @@ static int cm_req_handler(struct cm_work *work)
struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1639,11 +1641,31 @@ static int cm_req_handler(struct cm_work *work)
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+ work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
+ ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num,
+ cm_id_priv->av.ah_attr.grh.sgid_index,
+ &gid, &gid_attr);
+ if (!ret) {
+ if (gid_attr.ndev) {
+ work->path[0].ifindex = gid_attr.ndev->ifindex;
+ work->path[0].net = dev_net(gid_attr.ndev);
+ dev_put(gid_attr.ndev);
+ }
+ work->path[0].gid_type = gid_attr.gid_type;
+ ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
+ }
if (ret) {
- ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0, &work->path[0].sgid,
- NULL);
+ int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
+ work->port->port_num, 0,
+ &work->path[0].sgid,
+ &gid_attr);
+ if (!err && gid_attr.ndev) {
+ work->path[0].ifindex = gid_attr.ndev->ifindex;
+ work->path[0].net = dev_net(gid_attr.ndev);
+ dev_put(gid_attr.ndev);
+ }
+ work->path[0].gid_type = gid_attr.gid_type;
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
@@ -3482,6 +3504,7 @@ int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
EXPORT_SYMBOL(ib_cm_notify);
static void cm_recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct cm_port *port = mad_agent->context;
@@ -3731,16 +3754,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
}
EXPORT_SYMBOL(ib_cm_init_qp_attr);
-static void cm_get_ack_delay(struct cm_device *cm_dev)
-{
- struct ib_device_attr attr;
-
- if (ib_query_device(cm_dev->ib_device, &attr))
- cm_dev->ack_delay = 0; /* acks will rely on packet life time */
- else
- cm_dev->ack_delay = attr.local_ca_ack_delay;
-}
-
static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
char *buf)
{
@@ -3852,7 +3865,7 @@ static void cm_add_one(struct ib_device *ib_device)
return;
cm_dev->ib_device = ib_device;
- cm_get_ack_delay(cm_dev);
+ cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2d762a2ecd81..9729639df407 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -38,6 +38,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
+#include <linux/igmp.h>
#include <linux/idr.h>
#include <linux/inetdevice.h>
#include <linux/slab.h>
@@ -60,6 +61,8 @@
#include <rdma/ib_sa.h>
#include <rdma/iw_cm.h>
+#include "core_priv.h"
+
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
@@ -150,6 +153,7 @@ struct cma_device {
struct completion comp;
atomic_t refcount;
struct list_head id_list;
+ enum ib_gid_type *default_gid_type;
};
struct rdma_bind_list {
@@ -185,6 +189,67 @@ enum {
CMA_OPTION_AFONLY,
};
+void cma_ref_dev(struct cma_device *cma_dev)
+{
+ atomic_inc(&cma_dev->refcount);
+}
+
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie)
+{
+ struct cma_device *cma_dev;
+ struct cma_device *found_cma_dev = NULL;
+
+ mutex_lock(&lock);
+
+ list_for_each_entry(cma_dev, &dev_list, list)
+ if (filter(cma_dev->device, cookie)) {
+ found_cma_dev = cma_dev;
+ break;
+ }
+
+ if (found_cma_dev)
+ cma_ref_dev(found_cma_dev);
+ mutex_unlock(&lock);
+ return found_cma_dev;
+}
+
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port)
+{
+ if (port < rdma_start_port(cma_dev->device) ||
+ port > rdma_end_port(cma_dev->device))
+ return -EINVAL;
+
+ return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port,
+ enum ib_gid_type default_gid_type)
+{
+ unsigned long supported_gids;
+
+ if (port < rdma_start_port(cma_dev->device) ||
+ port > rdma_end_port(cma_dev->device))
+ return -EINVAL;
+
+ supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
+
+ if (!(supported_gids & 1 << default_gid_type))
+ return -EINVAL;
+
+ cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
+ default_gid_type;
+
+ return 0;
+}
+
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
+{
+ return cma_dev->device;
+}
+
/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
@@ -228,6 +293,7 @@ struct rdma_id_private {
u8 tos;
u8 reuseaddr;
u8 afonly;
+ enum ib_gid_type gid_type;
};
struct cma_multicast {
@@ -239,6 +305,7 @@ struct cma_multicast {
void *context;
struct sockaddr_storage addr;
struct kref mcref;
+ bool igmp_joined;
};
struct cma_work {
@@ -335,18 +402,48 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
-static void cma_attach_to_dev(struct rdma_id_private *id_priv,
- struct cma_device *cma_dev)
+static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
- atomic_inc(&cma_dev->refcount);
+ struct in_device *in_dev = NULL;
+
+ if (ndev) {
+ rtnl_lock();
+ in_dev = __in_dev_get_rtnl(ndev);
+ if (in_dev) {
+ if (join)
+ ip_mc_inc_group(in_dev,
+ *(__be32 *)(mgid->raw + 12));
+ else
+ ip_mc_dec_group(in_dev,
+ *(__be32 *)(mgid->raw + 12));
+ }
+ rtnl_unlock();
+ }
+ return (in_dev) ? 0 : -ENODEV;
+}
+
+static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev)
+{
+ cma_ref_dev(cma_dev);
id_priv->cma_dev = cma_dev;
+ id_priv->gid_type = 0;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
}
-static inline void cma_deref_dev(struct cma_device *cma_dev)
+static void cma_attach_to_dev(struct rdma_id_private *id_priv,
+ struct cma_device *cma_dev)
+{
+ _cma_attach_to_dev(id_priv, cma_dev);
+ id_priv->gid_type =
+ cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(cma_dev->device)];
+}
+
+void cma_deref_dev(struct cma_device *cma_dev)
{
if (atomic_dec_and_test(&cma_dev->refcount))
complete(&cma_dev->comp);
@@ -441,6 +538,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
}
static inline int cma_validate_port(struct ib_device *device, u8 port,
+ enum ib_gid_type gid_type,
union ib_gid *gid, int dev_type,
int bound_if_index)
{
@@ -453,10 +551,25 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ret;
- if (dev_type == ARPHRD_ETHER)
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
ndev = dev_get_by_index(&init_net, bound_if_index);
+ if (ndev && ndev->flags & IFF_LOOPBACK) {
+ pr_info("detected loopback device\n");
+ dev_put(ndev);
- ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
+
+ ndev = device->get_netdev(device, port);
+ if (!ndev)
+ return -ENODEV;
+ }
+ } else {
+ gid_type = IB_GID_TYPE_IB;
+ }
+
+ ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
+ ndev, NULL);
if (ndev)
dev_put(ndev);
@@ -490,7 +603,10 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
- ret = cma_validate_port(cma_dev->device, port, gidp,
+ ret = cma_validate_port(cma_dev->device, port,
+ rdma_protocol_ib(cma_dev->device, port) ?
+ IB_GID_TYPE_IB :
+ listen_id_priv->gid_type, gidp,
dev_addr->dev_type,
dev_addr->bound_dev_if);
if (!ret) {
@@ -509,8 +625,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
- ret = cma_validate_port(cma_dev->device, port, gidp,
- dev_addr->dev_type,
+ ret = cma_validate_port(cma_dev->device, port,
+ rdma_protocol_ib(cma_dev->device, port) ?
+ IB_GID_TYPE_IB :
+ cma_dev->default_gid_type[port - 1],
+ gidp, dev_addr->dev_type,
dev_addr->bound_dev_if);
if (!ret) {
id_priv->id.port_num = port;
@@ -1437,8 +1556,24 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
id_priv->id.port_num)) {
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
- } else
+ } else {
+ if (mc->igmp_joined) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(&init_net,
+ dev_addr->bound_dev_if);
+ if (ndev) {
+ cma_igmp_send(ndev,
+ &mc->multicast.ib->rec.mgid,
+ false);
+ dev_put(ndev);
+ }
+ }
kref_put(&mc->mcref, release_mc);
+ }
}
}
@@ -1896,7 +2031,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
int ret;
- struct ib_device_attr attr;
struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
@@ -1938,13 +2072,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
- ret = ib_query_device(conn_id->id.device, &attr);
- if (ret) {
- mutex_unlock(&conn_id->handler_mutex);
- rdma_destroy_id(new_cm_id);
- goto out;
- }
-
memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
event.param.conn.private_data = iw_event->private_data;
@@ -2051,7 +2178,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
- cma_attach_to_dev(dev_id_priv, cma_dev);
+ _cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
@@ -2321,8 +2448,23 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
if (addr->dev_addr.bound_dev_if) {
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+ if (!ndev)
+ return -ENODEV;
+
+ if (ndev->flags & IFF_LOOPBACK) {
+ dev_put(ndev);
+ if (!id_priv->id.device->get_netdev)
+ return -EOPNOTSUPP;
+
+ ndev = id_priv->id.device->get_netdev(id_priv->id.device,
+ id_priv->id.port_num);
+ if (!ndev)
+ return -ENODEV;
+ }
+
route->path_rec->net = &init_net;
- route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
+ route->path_rec->ifindex = ndev->ifindex;
+ route->path_rec->gid_type = id_priv->gid_type;
}
if (!ndev) {
ret = -ENODEV;
@@ -2336,7 +2478,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
- route->path_rec->hop_limit = 1;
+ /* Use the hint from IP Stack to select GID Type */
+ if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
+ route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
+ if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
+ /* TODO: get the hoplimit from the inet/inet6 device */
+ route->path_rec->hop_limit = addr->dev_addr.hoplimit;
+ else
+ route->path_rec->hop_limit = 1;
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
@@ -3534,12 +3683,23 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
event.status = status;
event.param.ud.private_data = mc->context;
if (!status) {
+ struct rdma_dev_addr *dev_addr =
+ &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev =
+ dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+ enum ib_gid_type gid_type =
+ id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
ib_init_ah_from_mcmember(id_priv->id.device,
id_priv->id.port_num, &multicast->rec,
+ ndev, gid_type,
&event.param.ud.ah_attr);
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
+ if (ndev)
+ dev_put(ndev);
} else
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
@@ -3672,9 +3832,10 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
{
struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
- int err;
+ int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
+ enum ib_gid_type gid_type;
if (cma_zero_addr((struct sockaddr *)&mc->addr))
return -EINVAL;
@@ -3704,9 +3865,25 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
+
+ gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ if (addr->sa_family == AF_INET) {
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
+ true);
+ if (!err) {
+ mc->igmp_joined = true;
+ mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+ }
+ } else {
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ err = -ENOTSUPP;
+ }
dev_put(ndev);
- if (!mc->multicast.ib->rec.mtu) {
- err = -EINVAL;
+ if (err || !mc->multicast.ib->rec.mtu) {
+ if (!err)
+ err = -EINVAL;
goto out2;
}
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
@@ -3745,7 +3922,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
memcpy(&mc->addr, addr, rdma_addr_size(addr));
mc->context = context;
mc->id_priv = id_priv;
-
+ mc->igmp_joined = false;
spin_lock(&id_priv->lock);
list_add(&mc->list, &id_priv->mc_list);
spin_unlock(&id_priv->lock);
@@ -3790,9 +3967,25 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
if (rdma_cap_ib_mcast(id->device, id->port_num)) {
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
- } else if (rdma_protocol_roce(id->device, id->port_num))
+ } else if (rdma_protocol_roce(id->device, id->port_num)) {
+ if (mc->igmp_joined) {
+ struct rdma_dev_addr *dev_addr =
+ &id->route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(&init_net,
+ dev_addr->bound_dev_if);
+ if (ndev) {
+ cma_igmp_send(ndev,
+ &mc->multicast.ib->rec.mgid,
+ false);
+ dev_put(ndev);
+ }
+ mc->igmp_joined = false;
+ }
kref_put(&mc->mcref, release_mc);
-
+ }
return;
}
}
@@ -3861,12 +4054,27 @@ static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
+ unsigned int i;
+ unsigned long supported_gids = 0;
cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
if (!cma_dev)
return;
cma_dev->device = device;
+ cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_gid_type),
+ GFP_KERNEL);
+ if (!cma_dev->default_gid_type) {
+ kfree(cma_dev);
+ return;
+ }
+ for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
+ supported_gids = roce_gid_type_mask_support(device, i);
+ WARN_ON(!supported_gids);
+ cma_dev->default_gid_type[i - rdma_start_port(device)] =
+ find_first_bit(&supported_gids, BITS_PER_LONG);
+ }
init_completion(&cma_dev->comp);
atomic_set(&cma_dev->refcount, 1);
@@ -3946,6 +4154,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
mutex_unlock(&lock);
cma_process_remove(cma_dev);
+ kfree(cma_dev->default_gid_type);
kfree(cma_dev);
}
@@ -4079,6 +4288,7 @@ static int __init cma_init(void)
if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
+ cma_configfs_init();
return 0;
@@ -4093,6 +4303,7 @@ err_wq:
static void __exit cma_cleanup(void)
{
+ cma_configfs_exit();
ibnl_remove_client(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
new file mode 100644
index 000000000000..18b112aa577e
--- /dev/null
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/configfs.h>
+#include <rdma/ib_verbs.h>
+#include "core_priv.h"
+
+struct cma_device;
+
+struct cma_dev_group;
+
+struct cma_dev_port_group {
+ unsigned int port_num;
+ struct cma_dev_group *cma_dev_group;
+ struct config_group group;
+};
+
+struct cma_dev_group {
+ char name[IB_DEVICE_NAME_MAX];
+ struct config_group device_group;
+ struct config_group ports_group;
+ struct config_group *default_dev_group[2];
+ struct config_group **default_ports_group;
+ struct cma_dev_port_group *ports;
+};
+
+static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
+{
+ struct config_group *group;
+
+ if (!item)
+ return NULL;
+
+ group = container_of(item, struct config_group, cg_item);
+ return container_of(group, struct cma_dev_port_group, group);
+}
+
+static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
+{
+ return !strcmp(ib_dev->name, cookie);
+}
+
+static int cma_configfs_params_get(struct config_item *item,
+ struct cma_device **pcma_dev,
+ struct cma_dev_port_group **pgroup)
+{
+ struct cma_dev_port_group *group = to_dev_port_group(item);
+ struct cma_device *cma_dev;
+
+ if (!group)
+ return -ENODEV;
+
+ cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+ group->cma_dev_group->name);
+ if (!cma_dev)
+ return -ENODEV;
+
+ *pcma_dev = cma_dev;
+ *pgroup = group;
+
+ return 0;
+}
+
+static void cma_configfs_params_put(struct cma_device *cma_dev)
+{
+ cma_deref_dev(cma_dev);
+}
+
+static ssize_t default_roce_mode_show(struct config_item *item,
+ char *buf)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ int gid_type;
+ ssize_t ret;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ gid_type = cma_get_default_gid_type(cma_dev, group->port_num);
+ cma_configfs_params_put(cma_dev);
+
+ if (gid_type < 0)
+ return gid_type;
+
+ return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
+}
+
+static ssize_t default_roce_mode_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ int gid_type = ib_cache_gid_parse_type_str(buf);
+ ssize_t ret;
+
+ if (gid_type < 0)
+ return -EINVAL;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
+
+ cma_configfs_params_put(cma_dev);
+
+ return !ret ? strnlen(buf, count) : ret;
+}
+
+CONFIGFS_ATTR(, default_roce_mode);
+
+static struct configfs_attribute *cma_configfs_attributes[] = {
+ &attr_default_roce_mode,
+ NULL,
+};
+
+static struct config_item_type cma_port_group_type = {
+ .ct_attrs = cma_configfs_attributes,
+ .ct_owner = THIS_MODULE
+};
+
+static int make_cma_ports(struct cma_dev_group *cma_dev_group,
+ struct cma_device *cma_dev)
+{
+ struct ib_device *ibdev;
+ unsigned int i;
+ unsigned int ports_num;
+ struct cma_dev_port_group *ports;
+ struct config_group **ports_group;
+ int err;
+
+ ibdev = cma_get_ib_dev(cma_dev);
+
+ if (!ibdev)
+ return -ENODEV;
+
+ ports_num = ibdev->phys_port_cnt;
+ ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
+ GFP_KERNEL);
+ ports_group = kcalloc(ports_num + 1, sizeof(*ports_group), GFP_KERNEL);
+
+ if (!ports || !ports_group) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ for (i = 0; i < ports_num; i++) {
+ char port_str[10];
+
+ ports[i].port_num = i + 1;
+ snprintf(port_str, sizeof(port_str), "%u", i + 1);
+ ports[i].cma_dev_group = cma_dev_group;
+ config_group_init_type_name(&ports[i].group,
+ port_str,
+ &cma_port_group_type);
+ ports_group[i] = &ports[i].group;
+ }
+ ports_group[i] = NULL;
+ cma_dev_group->default_ports_group = ports_group;
+ cma_dev_group->ports = ports;
+
+ return 0;
+free:
+ kfree(ports);
+ kfree(ports_group);
+ cma_dev_group->ports = NULL;
+ cma_dev_group->default_ports_group = NULL;
+ return err;
+}
+
+static void release_cma_dev(struct config_item *item)
+{
+ struct config_group *group = container_of(item, struct config_group,
+ cg_item);
+ struct cma_dev_group *cma_dev_group = container_of(group,
+ struct cma_dev_group,
+ device_group);
+
+ kfree(cma_dev_group);
+};
+
+static void release_cma_ports_group(struct config_item *item)
+{
+ struct config_group *group = container_of(item, struct config_group,
+ cg_item);
+ struct cma_dev_group *cma_dev_group = container_of(group,
+ struct cma_dev_group,
+ ports_group);
+
+ kfree(cma_dev_group->ports);
+ kfree(cma_dev_group->default_ports_group);
+ cma_dev_group->ports = NULL;
+ cma_dev_group->default_ports_group = NULL;
+};
+
+static struct configfs_item_operations cma_ports_item_ops = {
+ .release = release_cma_ports_group
+};
+
+static struct config_item_type cma_ports_group_type = {
+ .ct_item_ops = &cma_ports_item_ops,
+ .ct_owner = THIS_MODULE
+};
+
+static struct configfs_item_operations cma_device_item_ops = {
+ .release = release_cma_dev
+};
+
+static struct config_item_type cma_device_group_type = {
+ .ct_item_ops = &cma_device_item_ops,
+ .ct_owner = THIS_MODULE
+};
+
+static struct config_group *make_cma_dev(struct config_group *group,
+ const char *name)
+{
+ int err = -ENODEV;
+ struct cma_device *cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
+ (void *)name);
+ struct cma_dev_group *cma_dev_group = NULL;
+
+ if (!cma_dev)
+ goto fail;
+
+ cma_dev_group = kzalloc(sizeof(*cma_dev_group), GFP_KERNEL);
+
+ if (!cma_dev_group) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
+
+ err = make_cma_ports(cma_dev_group, cma_dev);
+ if (err)
+ goto fail;
+
+ cma_dev_group->ports_group.default_groups =
+ cma_dev_group->default_ports_group;
+ config_group_init_type_name(&cma_dev_group->ports_group, "ports",
+ &cma_ports_group_type);
+
+ cma_dev_group->device_group.default_groups
+ = cma_dev_group->default_dev_group;
+ cma_dev_group->default_dev_group[0] = &cma_dev_group->ports_group;
+ cma_dev_group->default_dev_group[1] = NULL;
+
+ config_group_init_type_name(&cma_dev_group->device_group, name,
+ &cma_device_group_type);
+
+ cma_deref_dev(cma_dev);
+ return &cma_dev_group->device_group;
+
+fail:
+ if (cma_dev)
+ cma_deref_dev(cma_dev);
+ kfree(cma_dev_group);
+ return ERR_PTR(err);
+}
+
+static struct configfs_group_operations cma_subsys_group_ops = {
+ .make_group = make_cma_dev,
+};
+
+static struct config_item_type cma_subsys_type = {
+ .ct_group_ops = &cma_subsys_group_ops,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem cma_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "rdma_cm",
+ .ci_type = &cma_subsys_type,
+ },
+ },
+};
+
+int __init cma_configfs_init(void)
+{
+ config_group_init(&cma_subsys.su_group);
+ mutex_init(&cma_subsys.su_mutex);
+ return configfs_register_subsystem(&cma_subsys);
+}
+
+void __exit cma_configfs_exit(void)
+{
+ configfs_unregister_subsystem(&cma_subsys);
+}
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 5cf6eb716f00..eab32215756b 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -38,6 +38,32 @@
#include <rdma/ib_verbs.h>
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+ return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+struct cma_device;
+void cma_ref_dev(struct cma_device *cma_dev);
+void cma_deref_dev(struct cma_device *cma_dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie);
+int cma_get_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port);
+int cma_set_default_gid_type(struct cma_device *cma_dev,
+ unsigned int port,
+ enum ib_gid_type default_gid_type);
+struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
+
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
@@ -70,8 +96,13 @@ enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_DELETE
};
+int ib_cache_gid_parse_type_str(const char *buf);
+
+const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
+
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
struct net_device *ndev,
+ unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode);
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
@@ -87,9 +118,23 @@ int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
int roce_rescan_device(struct ib_device *ib_dev);
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
+ struct net_device *upper)
+{
+ struct net_device *_upper = NULL;
+ struct list_head *iter;
+
+ netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
+ if (_upper == upper)
+ break;
+
+ return _upper == upper;
+}
+
#endif /* _CORE_PRIV_H */
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
new file mode 100644
index 000000000000..a754fc727de5
--- /dev/null
+++ b/drivers/infiniband/core/cq.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2015 HGST, a Western Digital Company.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <rdma/ib_verbs.h>
+
+/* # of WCs to poll for with a single call to ib_poll_cq */
+#define IB_POLL_BATCH 16
+
+/* # of WCs to iterate over before yielding */
+#define IB_POLL_BUDGET_IRQ 256
+#define IB_POLL_BUDGET_WORKQUEUE 65536
+
+#define IB_POLL_FLAGS \
+ (IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
+
+static int __ib_process_cq(struct ib_cq *cq, int budget)
+{
+ int i, n, completed = 0;
+
+ while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
+ for (i = 0; i < n; i++) {
+ struct ib_wc *wc = &cq->wc[i];
+
+ if (wc->wr_cqe)
+ wc->wr_cqe->done(cq, wc);
+ else
+ WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
+ }
+
+ completed += n;
+
+ if (n != IB_POLL_BATCH ||
+ (budget != -1 && completed >= budget))
+ break;
+ }
+
+ return completed;
+}
+
+/**
+ * ib_process_direct_cq - process a CQ in caller context
+ * @cq: CQ to process
+ * @budget: number of CQEs to poll for
+ *
+ * This function is used to process all outstanding CQ entries on a
+ * %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
+ * context and does not ask for completion interrupts from the HCA.
+ *
+ * Note: for compatibility reasons -1 can be passed in %budget for unlimited
+ * polling. Do not use this feature in new code, it will be removed soon.
+ */
+int ib_process_cq_direct(struct ib_cq *cq, int budget)
+{
+ WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
+
+ return __ib_process_cq(cq, budget);
+}
+EXPORT_SYMBOL(ib_process_cq_direct);
+
+static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
+{
+ WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
+}
+
+static int ib_poll_handler(struct irq_poll *iop, int budget)
+{
+ struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
+ int completed;
+
+ completed = __ib_process_cq(cq, budget);
+ if (completed < budget) {
+ irq_poll_complete(&cq->iop);
+ if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ irq_poll_sched(&cq->iop);
+ }
+
+ return completed;
+}
+
+static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
+{
+ irq_poll_sched(&cq->iop);
+}
+
+static void ib_cq_poll_work(struct work_struct *work)
+{
+ struct ib_cq *cq = container_of(work, struct ib_cq, work);
+ int completed;
+
+ completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
+ if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
+ ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
+ queue_work(ib_comp_wq, &cq->work);
+}
+
+static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
+{
+ queue_work(ib_comp_wq, &cq->work);
+}
+
+/**
+ * ib_alloc_cq - allocate a completion queue
+ * @dev: device to allocate the CQ for
+ * @private: driver private data, accessible from cq->cq_context
+ * @nr_cqe: number of CQEs to allocate
+ * @comp_vector: HCA completion vectors for this CQ
+ * @poll_ctx: context to poll the CQ from.
+ *
+ * This is the proper interface to allocate a CQ for in-kernel users. A
+ * CQ allocated with this interface will automatically be polled from the
+ * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * to use this CQ abstraction.
+ */
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
+{
+ struct ib_cq_init_attr cq_attr = {
+ .cqe = nr_cqe,
+ .comp_vector = comp_vector,
+ };
+ struct ib_cq *cq;
+ int ret = -ENOMEM;
+
+ cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
+ if (IS_ERR(cq))
+ return cq;
+
+ cq->device = dev;
+ cq->uobject = NULL;
+ cq->event_handler = NULL;
+ cq->cq_context = private;
+ cq->poll_ctx = poll_ctx;
+ atomic_set(&cq->usecnt, 0);
+
+ cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
+ if (!cq->wc)
+ goto out_destroy_cq;
+
+ switch (cq->poll_ctx) {
+ case IB_POLL_DIRECT:
+ cq->comp_handler = ib_cq_completion_direct;
+ break;
+ case IB_POLL_SOFTIRQ:
+ cq->comp_handler = ib_cq_completion_softirq;
+
+ irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ break;
+ case IB_POLL_WORKQUEUE:
+ cq->comp_handler = ib_cq_completion_workqueue;
+ INIT_WORK(&cq->work, ib_cq_poll_work);
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ break;
+ default:
+ ret = -EINVAL;
+ goto out_free_wc;
+ }
+
+ return cq;
+
+out_free_wc:
+ kfree(cq->wc);
+out_destroy_cq:
+ cq->device->destroy_cq(cq);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_cq);
+
+/**
+ * ib_free_cq - free a completion queue
+ * @cq: completion queue to free.
+ */
+void ib_free_cq(struct ib_cq *cq)
+{
+ int ret;
+
+ if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
+ return;
+
+ switch (cq->poll_ctx) {
+ case IB_POLL_DIRECT:
+ break;
+ case IB_POLL_SOFTIRQ:
+ irq_poll_disable(&cq->iop);
+ break;
+ case IB_POLL_WORKQUEUE:
+ flush_work(&cq->work);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ kfree(cq->wc);
+ ret = cq->device->destroy_cq(cq);
+ WARN_ON_ONCE(ret);
+}
+EXPORT_SYMBOL(ib_free_cq);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 179e8134d57f..00da80e02154 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -58,6 +58,7 @@ struct ib_client_data {
bool going_down;
};
+struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
@@ -325,6 +326,7 @@ int ib_register_device(struct ib_device *device,
{
int ret;
struct ib_client *client;
+ struct ib_udata uhw = {.outlen = 0, .inlen = 0};
mutex_lock(&device_mutex);
@@ -352,6 +354,13 @@ int ib_register_device(struct ib_device *device,
goto out;
}
+ memset(&device->attrs, 0, sizeof(device->attrs));
+ ret = device->query_device(device, &device->attrs, &uhw);
+ if (ret) {
+ printk(KERN_WARNING "Couldn't query the device attributes\n");
+ goto out;
+ }
+
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
@@ -628,25 +637,6 @@ void ib_dispatch_event(struct ib_event *event)
EXPORT_SYMBOL(ib_dispatch_event);
/**
- * ib_query_device - Query IB device attributes
- * @device:Device to query
- * @device_attr:Device attributes
- *
- * ib_query_device() returns the attributes of a device through the
- * @device_attr pointer.
- */
-int ib_query_device(struct ib_device *device,
- struct ib_device_attr *device_attr)
-{
- struct ib_udata uhw = {.outlen = 0, .inlen = 0};
-
- memset(device_attr, 0, sizeof(*device_attr));
-
- return device->query_device(device, device_attr, &uhw);
-}
-EXPORT_SYMBOL(ib_query_device);
-
-/**
* ib_query_port - Query IB port attributes
* @device:Device to query
* @port_num:Port number to query
@@ -825,26 +815,31 @@ EXPORT_SYMBOL(ib_modify_port);
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
+ * @gid_type: Type of GID.
* @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- struct net_device *ndev, u8 *port_num, u16 *index)
+ enum ib_gid_type gid_type, struct net_device *ndev,
+ u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
if (rdma_cap_roce_gid_table(device, port)) {
- if (!ib_find_cached_gid_by_port(device, gid, port,
+ if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
ndev, index)) {
*port_num = port;
return 0;
}
}
+ if (gid_type != IB_GID_TYPE_IB)
+ continue;
+
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
if (ret)
@@ -954,10 +949,18 @@ static int __init ib_core_init(void)
if (!ib_wq)
return -ENOMEM;
+ ib_comp_wq = alloc_workqueue("ib-comp-wq",
+ WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
+ WQ_UNBOUND_MAX_ACTIVE);
+ if (!ib_comp_wq) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
ret = class_register(&ib_class);
if (ret) {
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
- goto err;
+ goto err_comp;
}
ret = ibnl_init();
@@ -972,7 +975,8 @@ static int __init ib_core_init(void)
err_sysfs:
class_unregister(&ib_class);
-
+err_comp:
+ destroy_workqueue(ib_comp_wq);
err:
destroy_workqueue(ib_wq);
return ret;
@@ -983,6 +987,7 @@ static void __exit ib_core_cleanup(void)
ib_cache_cleanup();
ibnl_cleanup();
class_unregister(&ib_class);
+ destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
}
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 9f5ad7cc33c8..6ac3683c144b 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -212,7 +212,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
{
struct ib_device *device;
struct ib_fmr_pool *pool;
- struct ib_device_attr *attr;
int i;
int ret;
int max_remaps;
@@ -228,25 +227,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
return ERR_PTR(-ENOSYS);
}
- attr = kmalloc(sizeof *attr, GFP_KERNEL);
- if (!attr) {
- printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
- return ERR_PTR(-ENOMEM);
- }
-
- ret = ib_query_device(device, attr);
- if (ret) {
- printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
- kfree(attr);
- return ERR_PTR(ret);
- }
-
- if (!attr->max_map_per_fmr)
+ if (!device->attrs.max_map_per_fmr)
max_remaps = IB_FMR_MAX_REMAPS;
else
- max_remaps = attr->max_map_per_fmr;
-
- kfree(attr);
+ max_remaps = device->attrs.max_map_per_fmr;
pool = kmalloc(sizeof *pool, GFP_KERNEL);
if (!pool) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 2281de122038..9fa5bf33f5a3 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -84,6 +84,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
u8 mgmt_class);
static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
struct ib_mad_agent_private *agent_priv);
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc);
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
/*
* Returns a ib_mad_port_private structure or NULL for a device/port
@@ -681,7 +684,7 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
atomic_inc(&mad_snoop_priv->refcount);
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
- mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
+ mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
mad_recv_wc);
deref_snoop_agent(mad_snoop_priv);
spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -689,12 +692,11 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
}
-static void build_smp_wc(struct ib_qp *qp,
- u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
- struct ib_wc *wc)
+static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
+ u16 pkey_index, u8 port_num, struct ib_wc *wc)
{
memset(wc, 0, sizeof *wc);
- wc->wr_id = wr_id;
+ wc->wr_cqe = cqe;
wc->status = IB_WC_SUCCESS;
wc->opcode = IB_WC_RECV;
wc->pkey_index = pkey_index;
@@ -832,7 +834,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
}
build_smp_wc(mad_agent_priv->agent.qp,
- send_wr->wr.wr_id, drslid,
+ send_wr->wr.wr_cqe, drslid,
send_wr->pkey_index,
send_wr->port_num, &mad_wc);
@@ -1039,7 +1041,9 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
- mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
+ mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+
+ mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
mad_send_wr->send_wr.wr.num_sge = 2;
mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
@@ -1151,8 +1155,9 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
/* Set WR ID to find mad_send_wr upon completion */
qp_info = mad_send_wr->mad_agent_priv->qp_info;
- mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
+ mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
+ mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
mad_agent = mad_send_wr->send_buf.mad_agent;
sge = mad_send_wr->sg_list;
@@ -1982,9 +1987,9 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
/* user rmpp is in effect
* and this is an active RMPP MAD
*/
- mad_recv_wc->wc->wr_id = 0;
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
- mad_recv_wc);
+ mad_agent_priv->agent.recv_handler(
+ &mad_agent_priv->agent, NULL,
+ mad_recv_wc);
atomic_dec(&mad_agent_priv->refcount);
} else {
/* not user rmpp, revert to normal behavior and
@@ -1998,9 +2003,10 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Defined behavior is to complete response before request */
- mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
- mad_recv_wc);
+ mad_agent_priv->agent.recv_handler(
+ &mad_agent_priv->agent,
+ &mad_send_wr->send_buf,
+ mad_recv_wc);
atomic_dec(&mad_agent_priv->refcount);
mad_send_wc.status = IB_WC_SUCCESS;
@@ -2009,7 +2015,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
}
} else {
- mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
+ mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
mad_recv_wc);
deref_mad_agent(mad_agent_priv);
}
@@ -2172,13 +2178,14 @@ handle_smi(struct ib_mad_port_private *port_priv,
return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
}
-static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
- struct ib_wc *wc)
+static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct ib_mad_port_private *port_priv = cq->cq_context;
+ struct ib_mad_list_head *mad_list =
+ container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
struct ib_mad_qp_info *qp_info;
struct ib_mad_private_header *mad_priv_hdr;
struct ib_mad_private *recv, *response = NULL;
- struct ib_mad_list_head *mad_list;
struct ib_mad_agent_private *mad_agent;
int port_num;
int ret = IB_MAD_RESULT_SUCCESS;
@@ -2186,7 +2193,17 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
u16 resp_mad_pkey_index = 0;
bool opa;
- mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ if (list_empty_careful(&port_priv->port_list))
+ return;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ /*
+ * Receive errors indicate that the QP has entered the error
+ * state - error handling/shutdown code will cleanup
+ */
+ return;
+ }
+
qp_info = mad_list->mad_queue->qp_info;
dequeue_mad(mad_list);
@@ -2227,7 +2244,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
response = alloc_mad_private(mad_size, GFP_KERNEL);
if (!response) {
dev_err(&port_priv->device->dev,
- "ib_mad_recv_done_handler no memory for response buffer\n");
+ "%s: no memory for response buffer\n", __func__);
goto out;
}
@@ -2413,11 +2430,12 @@ done:
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
}
-static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
- struct ib_wc *wc)
+static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct ib_mad_port_private *port_priv = cq->cq_context;
+ struct ib_mad_list_head *mad_list =
+ container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
- struct ib_mad_list_head *mad_list;
struct ib_mad_qp_info *qp_info;
struct ib_mad_queue *send_queue;
struct ib_send_wr *bad_send_wr;
@@ -2425,7 +2443,14 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
unsigned long flags;
int ret;
- mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+ if (list_empty_careful(&port_priv->port_list))
+ return;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ if (!ib_mad_send_error(port_priv, wc))
+ return;
+ }
+
mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
mad_list);
send_queue = mad_list->mad_queue;
@@ -2490,24 +2515,15 @@ static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
}
-static void mad_error_handler(struct ib_mad_port_private *port_priv,
- struct ib_wc *wc)
+static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
+ struct ib_wc *wc)
{
- struct ib_mad_list_head *mad_list;
- struct ib_mad_qp_info *qp_info;
+ struct ib_mad_list_head *mad_list =
+ container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
+ struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
struct ib_mad_send_wr_private *mad_send_wr;
int ret;
- /* Determine if failure was a send or receive */
- mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
- qp_info = mad_list->mad_queue->qp_info;
- if (mad_list->mad_queue == &qp_info->recv_queue)
- /*
- * Receive errors indicate that the QP has entered the error
- * state - error handling/shutdown code will cleanup
- */
- return;
-
/*
* Send errors will transition the QP to SQE - move
* QP to RTS and repost flushed work requests
@@ -2522,10 +2538,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
mad_send_wr->retry = 0;
ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
&bad_send_wr);
- if (ret)
- ib_mad_send_done_handler(port_priv, wc);
- } else
- ib_mad_send_done_handler(port_priv, wc);
+ if (!ret)
+ return false;
+ }
} else {
struct ib_qp_attr *attr;
@@ -2539,42 +2554,14 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
kfree(attr);
if (ret)
dev_err(&port_priv->device->dev,
- "mad_error_handler - ib_modify_qp to RTS : %d\n",
- ret);
+ "%s - ib_modify_qp to RTS: %d\n",
+ __func__, ret);
else
mark_sends_for_retry(qp_info);
}
- ib_mad_send_done_handler(port_priv, wc);
}
-}
-/*
- * IB MAD completion callback
- */
-static void ib_mad_completion_handler(struct work_struct *work)
-{
- struct ib_mad_port_private *port_priv;
- struct ib_wc wc;
-
- port_priv = container_of(work, struct ib_mad_port_private, work);
- ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
-
- while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
- if (wc.status == IB_WC_SUCCESS) {
- switch (wc.opcode) {
- case IB_WC_SEND:
- ib_mad_send_done_handler(port_priv, &wc);
- break;
- case IB_WC_RECV:
- ib_mad_recv_done_handler(port_priv, &wc);
- break;
- default:
- BUG_ON(1);
- break;
- }
- } else
- mad_error_handler(port_priv, &wc);
- }
+ return true;
}
static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
@@ -2716,7 +2703,7 @@ static void local_completions(struct work_struct *work)
* before request
*/
build_smp_wc(recv_mad_agent->agent.qp,
- (unsigned long) local->mad_send_wr,
+ local->mad_send_wr->send_wr.wr.wr_cqe,
be16_to_cpu(IB_LID_PERMISSIVE),
local->mad_send_wr->send_wr.pkey_index,
recv_mad_agent->agent.port_num, &wc);
@@ -2744,6 +2731,7 @@ static void local_completions(struct work_struct *work)
IB_MAD_SNOOP_RECVS);
recv_mad_agent->agent.recv_handler(
&recv_mad_agent->agent,
+ &local->mad_send_wr->send_buf,
&local->mad_priv->header.recv_wc);
spin_lock_irqsave(&recv_mad_agent->lock, flags);
atomic_dec(&recv_mad_agent->refcount);
@@ -2855,17 +2843,6 @@ static void timeout_sends(struct work_struct *work)
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
}
-static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
-{
- struct ib_mad_port_private *port_priv = cq->cq_context;
- unsigned long flags;
-
- spin_lock_irqsave(&ib_mad_port_list_lock, flags);
- if (!list_empty(&port_priv->port_list))
- queue_work(port_priv->wq, &port_priv->work);
- spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-}
-
/*
* Allocate receive MADs and post receive WRs for them
*/
@@ -2913,8 +2890,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
break;
}
mad_priv->header.mapping = sg_list.addr;
- recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
mad_priv->header.mad_list.mad_queue = recv_queue;
+ mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
+ recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
/* Post receive WR */
spin_lock_irqsave(&recv_queue->lock, flags);
@@ -3151,7 +3129,6 @@ static int ib_mad_port_open(struct ib_device *device,
unsigned long flags;
char name[sizeof "ib_mad123"];
int has_smi;
- struct ib_cq_init_attr cq_attr = {};
if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
return -EFAULT;
@@ -3179,10 +3156,8 @@ static int ib_mad_port_open(struct ib_device *device,
if (has_smi)
cq_size *= 2;
- cq_attr.cqe = cq_size;
- port_priv->cq = ib_create_cq(port_priv->device,
- ib_mad_thread_completion_handler,
- NULL, port_priv, &cq_attr);
+ port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
+ IB_POLL_WORKQUEUE);
if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
@@ -3211,7 +3186,6 @@ static int ib_mad_port_open(struct ib_device *device,
ret = -ENOMEM;
goto error8;
}
- INIT_WORK(&port_priv->work, ib_mad_completion_handler);
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -3238,7 +3212,7 @@ error7:
error6:
ib_dealloc_pd(port_priv->pd);
error4:
- ib_destroy_cq(port_priv->cq);
+ ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
error3:
@@ -3271,7 +3245,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
ib_dealloc_pd(port_priv->pd);
- ib_destroy_cq(port_priv->cq);
+ ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
/* XXX: Handle deallocation of MAD registration tables */
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 990698a6ab4b..28669f6419e1 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -64,6 +64,7 @@
struct ib_mad_list_head {
struct list_head list;
+ struct ib_cqe cqe;
struct ib_mad_queue *mad_queue;
};
@@ -204,7 +205,6 @@ struct ib_mad_port_private {
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
struct list_head agent_list;
struct workqueue_struct *wq;
- struct work_struct work;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
};
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index bb6685fb08c6..250937cb9a1a 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -723,14 +723,27 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
+ struct net_device *ndev,
+ enum ib_gid_type gid_type,
struct ib_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
u8 p;
- ret = ib_find_cached_gid(device, &rec->port_gid,
- NULL, &p, &gid_index);
+ if (rdma_protocol_roce(device, port_num)) {
+ ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+ gid_type, port_num,
+ ndev,
+ &gid_index);
+ } else if (rdma_protocol_ib(device, port_num)) {
+ ret = ib_find_cached_gid(device, &rec->port_gid,
+ IB_GID_TYPE_IB, NULL, &p,
+ &gid_index);
+ } else {
+ ret = -EINVAL;
+ }
+
if (ret)
return ret;
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 178f98482e13..06556c34606d 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -67,17 +67,53 @@ struct netdev_event_work {
struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ];
};
+static const struct {
+ bool (*is_supported)(const struct ib_device *device, u8 port_num);
+ enum ib_gid_type gid_type;
+} PORT_CAP_TO_GID_TYPE[] = {
+ {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
+ {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
+};
+
+#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
+
+unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
+{
+ int i;
+ unsigned int ret_flags = 0;
+
+ if (!rdma_protocol_roce(ib_dev, port))
+ return 1UL << IB_GID_TYPE_IB;
+
+ for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
+ if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
+ ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
+
+ return ret_flags;
+}
+EXPORT_SYMBOL(roce_gid_type_mask_support);
+
static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
u8 port, union ib_gid *gid,
struct ib_gid_attr *gid_attr)
{
- switch (gid_op) {
- case GID_ADD:
- ib_cache_gid_add(ib_dev, port, gid, gid_attr);
- break;
- case GID_DEL:
- ib_cache_gid_del(ib_dev, port, gid, gid_attr);
- break;
+ int i;
+ unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+ for (i = 0; i < IB_GID_TYPE_SIZE; i++) {
+ if ((1UL << i) & gid_type_mask) {
+ gid_attr->gid_type = i;
+ switch (gid_op) {
+ case GID_ADD:
+ ib_cache_gid_add(ib_dev, port,
+ gid, gid_attr);
+ break;
+ case GID_DEL:
+ ib_cache_gid_del(ib_dev, port,
+ gid, gid_attr);
+ break;
+ }
+ }
}
}
@@ -103,18 +139,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
return BONDING_SLAVE_STATE_NA;
}
-static bool is_upper_dev_rcu(struct net_device *dev, struct net_device *upper)
-{
- struct net_device *_upper = NULL;
- struct list_head *iter;
-
- netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
- if (_upper == upper)
- break;
-
- return _upper == upper;
-}
-
#define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \
BONDING_SLAVE_STATE_NA)
static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
@@ -132,7 +156,7 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
if (!real_dev)
real_dev = event_ndev;
- res = ((is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
(is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
REQUIRED_BOND_STATES)) ||
real_dev == rdma_ndev);
@@ -178,7 +202,7 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port,
return 1;
rcu_read_lock();
- res = is_upper_dev_rcu(rdma_ndev, event_ndev);
+ res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
rcu_read_unlock();
return res;
@@ -203,10 +227,12 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
u8 port, struct net_device *event_ndev,
struct net_device *rdma_ndev)
{
+ unsigned long gid_type_mask;
+
rcu_read_lock();
if (!rdma_ndev ||
((rdma_ndev != event_ndev &&
- !is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
+ !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
is_eth_active_slave_of_bonding_rcu(rdma_ndev,
netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
BONDING_SLAVE_STATE_INACTIVE)) {
@@ -215,7 +241,9 @@ static void enum_netdev_default_gids(struct ib_device *ib_dev,
}
rcu_read_unlock();
- ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+ ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
IB_CACHE_GID_DEFAULT_MODE_SET);
}
@@ -234,12 +262,17 @@ static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
rcu_read_lock();
- if (is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ if (rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) ==
BONDING_SLAVE_STATE_INACTIVE) {
+ unsigned long gid_type_mask;
+
rcu_read_unlock();
+ gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev,
+ gid_type_mask,
IB_CACHE_GID_DEFAULT_MODE_DELETE);
} else {
rcu_read_unlock();
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index a95a32ba596e..f334090bb612 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -49,7 +49,9 @@
#include <net/netlink.h>
#include <uapi/rdma/ib_user_sa.h>
#include <rdma/ib_marshall.h>
+#include <rdma/ib_addr.h>
#include "sa.h"
+#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand subnet administration query support");
@@ -715,7 +717,9 @@ static int ib_nl_handle_set_timeout(struct sk_buff *skb,
struct nlattr *tb[LS_NLA_TYPE_MAX];
int ret;
- if (!netlink_capable(skb, CAP_NET_ADMIN))
+ if (!(nlh->nlmsg_flags & NLM_F_REQUEST) ||
+ !(NETLINK_CB(skb).sk) ||
+ !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
@@ -789,7 +793,9 @@ static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
int found = 0;
int ret;
- if (!netlink_capable(skb, CAP_NET_ADMIN))
+ if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
+ !(NETLINK_CB(skb).sk) ||
+ !netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
@@ -996,7 +1002,8 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
{
int ret;
u16 gid_index;
- int force_grh;
+ int use_roce;
+ struct net_device *ndev = NULL;
memset(ah_attr, 0, sizeof *ah_attr);
ah_attr->dlid = be16_to_cpu(rec->dlid);
@@ -1006,16 +1013,71 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
ah_attr->port_num = port_num;
ah_attr->static_rate = rec->rate;
- force_grh = rdma_cap_eth_ah(device, port_num);
+ use_roce = rdma_cap_eth_ah(device, port_num);
+
+ if (use_roce) {
+ struct net_device *idev;
+ struct net_device *resolved_dev;
+ struct rdma_dev_addr dev_addr = {.bound_dev_if = rec->ifindex,
+ .net = rec->net ? rec->net :
+ &init_net};
+ union {
+ struct sockaddr _sockaddr;
+ struct sockaddr_in _sockaddr_in;
+ struct sockaddr_in6 _sockaddr_in6;
+ } sgid_addr, dgid_addr;
+
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
+
+ rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
+ rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
+
+ /* validate the route */
+ ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
+ &dgid_addr._sockaddr, &dev_addr);
+ if (ret)
+ return ret;
- if (rec->hop_limit > 1 || force_grh) {
- struct net_device *ndev = ib_get_ndev_from_path(rec);
+ if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+ dev_addr.network == RDMA_NETWORK_IPV6) &&
+ rec->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return -EINVAL;
+
+ idev = device->get_netdev(device, port_num);
+ if (!idev)
+ return -ENODEV;
+
+ resolved_dev = dev_get_by_index(dev_addr.net,
+ dev_addr.bound_dev_if);
+ if (resolved_dev->flags & IFF_LOOPBACK) {
+ dev_put(resolved_dev);
+ resolved_dev = idev;
+ dev_hold(resolved_dev);
+ }
+ ndev = ib_get_ndev_from_path(rec);
+ rcu_read_lock();
+ if ((ndev && ndev != resolved_dev) ||
+ (resolved_dev != idev &&
+ !rdma_is_upper_dev_rcu(idev, resolved_dev)))
+ ret = -EHOSTUNREACH;
+ rcu_read_unlock();
+ dev_put(idev);
+ dev_put(resolved_dev);
+ if (ret) {
+ if (ndev)
+ dev_put(ndev);
+ return ret;
+ }
+ }
+ if (rec->hop_limit > 1 || use_roce) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->dgid;
- ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
- &gid_index);
+ ret = ib_find_cached_gid_by_port(device, &rec->sgid,
+ rec->gid_type, port_num, ndev,
+ &gid_index);
if (ret) {
if (ndev)
dev_put(ndev);
@@ -1029,9 +1091,10 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
if (ndev)
dev_put(ndev);
}
- if (force_grh) {
+
+ if (use_roce)
memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
- }
+
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1157,6 +1220,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
mad->data, &rec);
rec.net = NULL;
rec.ifindex = 0;
+ rec.gid_type = IB_GID_TYPE_IB;
memset(rec.dmac, 0, ETH_ALEN);
query->callback(status, &rec, query->context);
} else
@@ -1609,14 +1673,15 @@ static void send_handler(struct ib_mad_agent *agent,
}
static void recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_sa_query *query;
- struct ib_mad_send_buf *mad_buf;
- mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
- query = mad_buf->context[0];
+ if (!send_buf)
+ return;
+ query = send_buf->context[0];
if (query->callback) {
if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
query->callback(query,
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index b1f37d4095fa..3de93517efe4 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -37,15 +37,27 @@
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/string.h>
+#include <linux/netdevice.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
+struct ib_port;
+
+struct gid_attr_group {
+ struct ib_port *port;
+ struct kobject kobj;
+ struct attribute_group ndev;
+ struct attribute_group type;
+};
struct ib_port {
struct kobject kobj;
struct ib_device *ibdev;
+ struct gid_attr_group *gid_attr_group;
struct attribute_group gid_group;
struct attribute_group pkey_group;
u8 port_num;
+ struct attribute_group *pma_table;
};
struct port_attribute {
@@ -65,6 +77,7 @@ struct port_table_attribute {
struct port_attribute attr;
char name[8];
int index;
+ __be16 attr_id;
};
static ssize_t port_attr_show(struct kobject *kobj,
@@ -84,6 +97,24 @@ static const struct sysfs_ops port_sysfs_ops = {
.show = port_attr_show
};
+static ssize_t gid_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct port_attribute *port_attr =
+ container_of(attr, struct port_attribute, attr);
+ struct ib_port *p = container_of(kobj, struct gid_attr_group,
+ kobj)->port;
+
+ if (!port_attr->show)
+ return -EIO;
+
+ return port_attr->show(p, port_attr, buf);
+}
+
+static const struct sysfs_ops gid_attr_sysfs_ops = {
+ .show = gid_attr_show
+};
+
static ssize_t state_show(struct ib_port *p, struct port_attribute *unused,
char *buf)
{
@@ -281,6 +312,46 @@ static struct attribute *port_default_attrs[] = {
NULL
};
+static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+{
+ if (!gid_attr->ndev)
+ return -EINVAL;
+
+ return sprintf(buf, "%s\n", gid_attr->ndev->name);
+}
+
+static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+{
+ return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
+}
+
+static ssize_t _show_port_gid_attr(struct ib_port *p,
+ struct port_attribute *attr,
+ char *buf,
+ size_t (*print)(struct ib_gid_attr *gid_attr,
+ char *buf))
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ union ib_gid gid;
+ struct ib_gid_attr gid_attr = {};
+ ssize_t ret;
+ va_list args;
+
+ ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
+ &gid_attr);
+ if (ret)
+ goto err;
+
+ ret = print(&gid_attr, buf);
+
+err:
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ va_end(args);
+ return ret;
+}
+
static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
@@ -296,6 +367,19 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
return sprintf(buf, "%pI6\n", gid.raw);
}
+static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
+ struct port_attribute *attr, char *buf)
+{
+ return _show_port_gid_attr(p, attr, buf, print_ndev);
+}
+
+static ssize_t show_port_gid_attr_gid_type(struct ib_port *p,
+ struct port_attribute *attr,
+ char *buf)
+{
+ return _show_port_gid_attr(p, attr, buf, print_gid_type);
+}
+
static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
char *buf)
{
@@ -314,24 +398,32 @@ static ssize_t show_port_pkey(struct ib_port *p, struct port_attribute *attr,
#define PORT_PMA_ATTR(_name, _counter, _width, _offset) \
struct port_table_attribute port_pma_attr_##_name = { \
.attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
- .index = (_offset) | ((_width) << 16) | ((_counter) << 24) \
+ .index = (_offset) | ((_width) << 16) | ((_counter) << 24), \
+ .attr_id = IB_PMA_PORT_COUNTERS , \
}
-static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
- char *buf)
+#define PORT_PMA_ATTR_EXT(_name, _width, _offset) \
+struct port_table_attribute port_pma_attr_ext_##_name = { \
+ .attr = __ATTR(_name, S_IRUGO, show_pma_counter, NULL), \
+ .index = (_offset) | ((_width) << 16), \
+ .attr_id = IB_PMA_PORT_COUNTERS_EXT , \
+}
+
+/*
+ * Get a Perfmgmt MAD block of data.
+ * Returns error code or the number of bytes retrieved.
+ */
+static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
+ void *data, int offset, size_t size)
{
- struct port_table_attribute *tab_attr =
- container_of(attr, struct port_table_attribute, attr);
- int offset = tab_attr->index & 0xffff;
- int width = (tab_attr->index >> 16) & 0xff;
- struct ib_mad *in_mad = NULL;
- struct ib_mad *out_mad = NULL;
+ struct ib_mad *in_mad;
+ struct ib_mad *out_mad;
size_t mad_size = sizeof(*out_mad);
u16 out_mad_pkey_index = 0;
ssize_t ret;
- if (!p->ibdev->process_mad)
- return sprintf(buf, "N/A (no PMA)\n");
+ if (!dev->process_mad)
+ return -ENOSYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -344,12 +436,13 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
in_mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_PERF_MGMT;
in_mad->mad_hdr.class_version = 1;
in_mad->mad_hdr.method = IB_MGMT_METHOD_GET;
- in_mad->mad_hdr.attr_id = cpu_to_be16(0x12); /* PortCounters */
+ in_mad->mad_hdr.attr_id = attr;
- in_mad->data[41] = p->port_num; /* PortSelect field */
+ if (attr != IB_PMA_CLASS_PORT_INFO)
+ in_mad->data[41] = port_num; /* PortSelect field */
- if ((p->ibdev->process_mad(p->ibdev, IB_MAD_IGNORE_MKEY,
- p->port_num, NULL, NULL,
+ if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
+ port_num, NULL, NULL,
(const struct ib_mad_hdr *)in_mad, mad_size,
(struct ib_mad_hdr *)out_mad, &mad_size,
&out_mad_pkey_index) &
@@ -358,31 +451,54 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
ret = -EINVAL;
goto out;
}
+ memcpy(data, out_mad->data + offset, size);
+ ret = size;
+out:
+ kfree(in_mad);
+ kfree(out_mad);
+ return ret;
+}
+
+static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
+ char *buf)
+{
+ struct port_table_attribute *tab_attr =
+ container_of(attr, struct port_table_attribute, attr);
+ int offset = tab_attr->index & 0xffff;
+ int width = (tab_attr->index >> 16) & 0xff;
+ ssize_t ret;
+ u8 data[8];
+
+ ret = get_perf_mad(p->ibdev, p->port_num, tab_attr->attr_id, &data,
+ 40 + offset / 8, sizeof(data));
+ if (ret < 0)
+ return sprintf(buf, "N/A (no PMA)\n");
switch (width) {
case 4:
- ret = sprintf(buf, "%u\n", (out_mad->data[40 + offset / 8] >>
+ ret = sprintf(buf, "%u\n", (*data >>
(4 - (offset % 8))) & 0xf);
break;
case 8:
- ret = sprintf(buf, "%u\n", out_mad->data[40 + offset / 8]);
+ ret = sprintf(buf, "%u\n", *data);
break;
case 16:
ret = sprintf(buf, "%u\n",
- be16_to_cpup((__be16 *)(out_mad->data + 40 + offset / 8)));
+ be16_to_cpup((__be16 *)data));
break;
case 32:
ret = sprintf(buf, "%u\n",
- be32_to_cpup((__be32 *)(out_mad->data + 40 + offset / 8)));
+ be32_to_cpup((__be32 *)data));
+ break;
+ case 64:
+ ret = sprintf(buf, "%llu\n",
+ be64_to_cpup((__be64 *)data));
break;
+
default:
ret = 0;
}
-out:
- kfree(in_mad);
- kfree(out_mad);
-
return ret;
}
@@ -403,6 +519,18 @@ static PORT_PMA_ATTR(port_rcv_data , 13, 32, 224);
static PORT_PMA_ATTR(port_xmit_packets , 14, 32, 256);
static PORT_PMA_ATTR(port_rcv_packets , 15, 32, 288);
+/*
+ * Counters added by extended set
+ */
+static PORT_PMA_ATTR_EXT(port_xmit_data , 64, 64);
+static PORT_PMA_ATTR_EXT(port_rcv_data , 64, 128);
+static PORT_PMA_ATTR_EXT(port_xmit_packets , 64, 192);
+static PORT_PMA_ATTR_EXT(port_rcv_packets , 64, 256);
+static PORT_PMA_ATTR_EXT(unicast_xmit_packets , 64, 320);
+static PORT_PMA_ATTR_EXT(unicast_rcv_packets , 64, 384);
+static PORT_PMA_ATTR_EXT(multicast_xmit_packets , 64, 448);
+static PORT_PMA_ATTR_EXT(multicast_rcv_packets , 64, 512);
+
static struct attribute *pma_attrs[] = {
&port_pma_attr_symbol_error.attr.attr,
&port_pma_attr_link_error_recovery.attr.attr,
@@ -423,11 +551,65 @@ static struct attribute *pma_attrs[] = {
NULL
};
+static struct attribute *pma_attrs_ext[] = {
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_ext_port_xmit_data.attr.attr,
+ &port_pma_attr_ext_port_rcv_data.attr.attr,
+ &port_pma_attr_ext_port_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_rcv_packets.attr.attr,
+ &port_pma_attr_ext_unicast_rcv_packets.attr.attr,
+ &port_pma_attr_ext_unicast_xmit_packets.attr.attr,
+ &port_pma_attr_ext_multicast_rcv_packets.attr.attr,
+ &port_pma_attr_ext_multicast_xmit_packets.attr.attr,
+ NULL
+};
+
+static struct attribute *pma_attrs_noietf[] = {
+ &port_pma_attr_symbol_error.attr.attr,
+ &port_pma_attr_link_error_recovery.attr.attr,
+ &port_pma_attr_link_downed.attr.attr,
+ &port_pma_attr_port_rcv_errors.attr.attr,
+ &port_pma_attr_port_rcv_remote_physical_errors.attr.attr,
+ &port_pma_attr_port_rcv_switch_relay_errors.attr.attr,
+ &port_pma_attr_port_xmit_discards.attr.attr,
+ &port_pma_attr_port_xmit_constraint_errors.attr.attr,
+ &port_pma_attr_port_rcv_constraint_errors.attr.attr,
+ &port_pma_attr_local_link_integrity_errors.attr.attr,
+ &port_pma_attr_excessive_buffer_overrun_errors.attr.attr,
+ &port_pma_attr_VL15_dropped.attr.attr,
+ &port_pma_attr_ext_port_xmit_data.attr.attr,
+ &port_pma_attr_ext_port_rcv_data.attr.attr,
+ &port_pma_attr_ext_port_xmit_packets.attr.attr,
+ &port_pma_attr_ext_port_rcv_packets.attr.attr,
+ NULL
+};
+
static struct attribute_group pma_group = {
.name = "counters",
.attrs = pma_attrs
};
+static struct attribute_group pma_group_ext = {
+ .name = "counters",
+ .attrs = pma_attrs_ext
+};
+
+static struct attribute_group pma_group_noietf = {
+ .name = "counters",
+ .attrs = pma_attrs_noietf
+};
+
static void ib_port_release(struct kobject *kobj)
{
struct ib_port *p = container_of(kobj, struct ib_port, kobj);
@@ -451,12 +633,41 @@ static void ib_port_release(struct kobject *kobj)
kfree(p);
}
+static void ib_port_gid_attr_release(struct kobject *kobj)
+{
+ struct gid_attr_group *g = container_of(kobj, struct gid_attr_group,
+ kobj);
+ struct attribute *a;
+ int i;
+
+ if (g->ndev.attrs) {
+ for (i = 0; (a = g->ndev.attrs[i]); ++i)
+ kfree(a);
+
+ kfree(g->ndev.attrs);
+ }
+
+ if (g->type.attrs) {
+ for (i = 0; (a = g->type.attrs[i]); ++i)
+ kfree(a);
+
+ kfree(g->type.attrs);
+ }
+
+ kfree(g);
+}
+
static struct kobj_type port_type = {
.release = ib_port_release,
.sysfs_ops = &port_sysfs_ops,
.default_attrs = port_default_attrs
};
+static struct kobj_type gid_attr_type = {
+ .sysfs_ops = &gid_attr_sysfs_ops,
+ .release = ib_port_gid_attr_release
+};
+
static struct attribute **
alloc_group_attrs(ssize_t (*show)(struct ib_port *,
struct port_attribute *, char *buf),
@@ -500,6 +711,31 @@ err:
return NULL;
}
+/*
+ * Figure out which counter table to use depending on
+ * the device capabilities.
+ */
+static struct attribute_group *get_counter_table(struct ib_device *dev,
+ int port_num)
+{
+ struct ib_class_port_info cpi;
+
+ if (get_perf_mad(dev, port_num, IB_PMA_CLASS_PORT_INFO,
+ &cpi, 40, sizeof(cpi)) >= 0) {
+
+ if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH)
+ /* We have extended counters */
+ return &pma_group_ext;
+
+ if (cpi.capability_mask && IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF)
+ /* But not the IETF ones */
+ return &pma_group_noietf;
+ }
+
+ /* Fall back to normal counters */
+ return &pma_group;
+}
+
static int add_port(struct ib_device *device, int port_num,
int (*port_callback)(struct ib_device *,
u8, struct kobject *))
@@ -528,9 +764,24 @@ static int add_port(struct ib_device *device, int port_num,
return ret;
}
- ret = sysfs_create_group(&p->kobj, &pma_group);
- if (ret)
+ p->gid_attr_group = kzalloc(sizeof(*p->gid_attr_group), GFP_KERNEL);
+ if (!p->gid_attr_group) {
+ ret = -ENOMEM;
goto err_put;
+ }
+
+ p->gid_attr_group->port = p;
+ ret = kobject_init_and_add(&p->gid_attr_group->kobj, &gid_attr_type,
+ &p->kobj, "gid_attrs");
+ if (ret) {
+ kfree(p->gid_attr_group);
+ goto err_put;
+ }
+
+ p->pma_table = get_counter_table(device, port_num);
+ ret = sysfs_create_group(&p->kobj, p->pma_table);
+ if (ret)
+ goto err_put_gid_attrs;
p->gid_group.name = "gids";
p->gid_group.attrs = alloc_group_attrs(show_port_gid, attr.gid_tbl_len);
@@ -543,12 +794,38 @@ static int add_port(struct ib_device *device, int port_num,
if (ret)
goto err_free_gid;
+ p->gid_attr_group->ndev.name = "ndevs";
+ p->gid_attr_group->ndev.attrs = alloc_group_attrs(show_port_gid_attr_ndev,
+ attr.gid_tbl_len);
+ if (!p->gid_attr_group->ndev.attrs) {
+ ret = -ENOMEM;
+ goto err_remove_gid;
+ }
+
+ ret = sysfs_create_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->ndev);
+ if (ret)
+ goto err_free_gid_ndev;
+
+ p->gid_attr_group->type.name = "types";
+ p->gid_attr_group->type.attrs = alloc_group_attrs(show_port_gid_attr_gid_type,
+ attr.gid_tbl_len);
+ if (!p->gid_attr_group->type.attrs) {
+ ret = -ENOMEM;
+ goto err_remove_gid_ndev;
+ }
+
+ ret = sysfs_create_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->type);
+ if (ret)
+ goto err_free_gid_type;
+
p->pkey_group.name = "pkeys";
p->pkey_group.attrs = alloc_group_attrs(show_port_pkey,
attr.pkey_tbl_len);
if (!p->pkey_group.attrs) {
ret = -ENOMEM;
- goto err_remove_gid;
+ goto err_remove_gid_type;
}
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
@@ -576,6 +853,28 @@ err_free_pkey:
kfree(p->pkey_group.attrs);
p->pkey_group.attrs = NULL;
+err_remove_gid_type:
+ sysfs_remove_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->type);
+
+err_free_gid_type:
+ for (i = 0; i < attr.gid_tbl_len; ++i)
+ kfree(p->gid_attr_group->type.attrs[i]);
+
+ kfree(p->gid_attr_group->type.attrs);
+ p->gid_attr_group->type.attrs = NULL;
+
+err_remove_gid_ndev:
+ sysfs_remove_group(&p->gid_attr_group->kobj,
+ &p->gid_attr_group->ndev);
+
+err_free_gid_ndev:
+ for (i = 0; i < attr.gid_tbl_len; ++i)
+ kfree(p->gid_attr_group->ndev.attrs[i]);
+
+ kfree(p->gid_attr_group->ndev.attrs);
+ p->gid_attr_group->ndev.attrs = NULL;
+
err_remove_gid:
sysfs_remove_group(&p->kobj, &p->gid_group);
@@ -587,7 +886,10 @@ err_free_gid:
p->gid_group.attrs = NULL;
err_remove_pma:
- sysfs_remove_group(&p->kobj, &pma_group);
+ sysfs_remove_group(&p->kobj, p->pma_table);
+
+err_put_gid_attrs:
+ kobject_put(&p->gid_attr_group->kobj);
err_put:
kobject_put(&p->kobj);
@@ -614,18 +916,12 @@ static ssize_t show_sys_image_guid(struct device *device,
struct device_attribute *dev_attr, char *buf)
{
struct ib_device *dev = container_of(device, struct ib_device, dev);
- struct ib_device_attr attr;
- ssize_t ret;
-
- ret = ib_query_device(dev, &attr);
- if (ret)
- return ret;
return sprintf(buf, "%04x:%04x:%04x:%04x\n",
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[0]),
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[1]),
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[2]),
- be16_to_cpu(((__be16 *) &attr.sys_image_guid)[3]));
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[0]),
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[1]),
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[2]),
+ be16_to_cpu(((__be16 *) &dev->attrs.sys_image_guid)[3]));
}
static ssize_t show_node_guid(struct device *device,
@@ -800,9 +1096,14 @@ static void free_port_list_attributes(struct ib_device *device)
list_for_each_entry_safe(p, t, &device->port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
list_del(&p->entry);
- sysfs_remove_group(p, &pma_group);
+ sysfs_remove_group(p, port->pma_table);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
+ sysfs_remove_group(&port->gid_attr_group->kobj,
+ &port->gid_attr_group->ndev);
+ sysfs_remove_group(&port->gid_attr_group->kobj,
+ &port->gid_attr_group->type);
+ kobject_put(&port->gid_attr_group->kobj);
kobject_put(p);
}
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 72feee620ebf..19837d270278 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -35,6 +35,7 @@
#include <linux/string.h>
#include <linux/export.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
#include <rdma/ib_pack.h>
@@ -116,6 +117,72 @@ static const struct ib_field vlan_table[] = {
.size_bits = 16 }
};
+static const struct ib_field ip4_table[] = {
+ { STRUCT_FIELD(ip4, ver),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 4 },
+ { STRUCT_FIELD(ip4, hdr_len),
+ .offset_words = 0,
+ .offset_bits = 4,
+ .size_bits = 4 },
+ { STRUCT_FIELD(ip4, tos),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, tot_len),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, id),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, frag_off),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, ttl),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, protocol),
+ .offset_words = 2,
+ .offset_bits = 8,
+ .size_bits = 8 },
+ { STRUCT_FIELD(ip4, check),
+ .offset_words = 2,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(ip4, saddr),
+ .offset_words = 3,
+ .offset_bits = 0,
+ .size_bits = 32 },
+ { STRUCT_FIELD(ip4, daddr),
+ .offset_words = 4,
+ .offset_bits = 0,
+ .size_bits = 32 }
+};
+
+static const struct ib_field udp_table[] = {
+ { STRUCT_FIELD(udp, sport),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, dport),
+ .offset_words = 0,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, length),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { STRUCT_FIELD(udp, csum),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 }
+};
+
static const struct ib_field grh_table[] = {
{ STRUCT_FIELD(grh, ip_version),
.offset_words = 0,
@@ -213,26 +280,57 @@ static const struct ib_field deth_table[] = {
.size_bits = 24 }
};
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header)
+{
+ struct iphdr iph;
+
+ iph.ihl = 5;
+ iph.version = 4;
+ iph.tos = header->ip4.tos;
+ iph.tot_len = header->ip4.tot_len;
+ iph.id = header->ip4.id;
+ iph.frag_off = header->ip4.frag_off;
+ iph.ttl = header->ip4.ttl;
+ iph.protocol = header->ip4.protocol;
+ iph.check = 0;
+ iph.saddr = header->ip4.saddr;
+ iph.daddr = header->ip4.daddr;
+
+ return ip_fast_csum((u8 *)&iph, iph.ihl);
+}
+EXPORT_SYMBOL(ib_ud_ip4_csum);
+
/**
* ib_ud_header_init - Initialize UD header structure
* @payload_bytes:Length of packet payload
* @lrh_present: specify if LRH is present
* @eth_present: specify if Eth header is present
* @vlan_present: packet is tagged vlan
- * @grh_present:GRH flag (if non-zero, GRH will be included)
+ * @grh_present: GRH flag (if non-zero, GRH will be included)
+ * @ip_version: if non-zero, IP header, V4 or V6, will be included
+ * @udp_present :if non-zero, UDP header will be included
* @immediate_present: specify if immediate data is present
* @header:Structure to initialize
*/
-void ib_ud_header_init(int payload_bytes,
- int lrh_present,
- int eth_present,
- int vlan_present,
- int grh_present,
- int immediate_present,
- struct ib_ud_header *header)
+int ib_ud_header_init(int payload_bytes,
+ int lrh_present,
+ int eth_present,
+ int vlan_present,
+ int grh_present,
+ int ip_version,
+ int udp_present,
+ int immediate_present,
+ struct ib_ud_header *header)
{
+ grh_present = grh_present && !ip_version;
memset(header, 0, sizeof *header);
+ /*
+ * UDP header without IP header doesn't make sense
+ */
+ if (udp_present && ip_version != 4 && ip_version != 6)
+ return -EINVAL;
+
if (lrh_present) {
u16 packet_length;
@@ -252,7 +350,7 @@ void ib_ud_header_init(int payload_bytes,
if (vlan_present)
header->eth.type = cpu_to_be16(ETH_P_8021Q);
- if (grh_present) {
+ if (ip_version == 6 || grh_present) {
header->grh.ip_version = 6;
header->grh.payload_length =
cpu_to_be16((IB_BTH_BYTES +
@@ -260,8 +358,30 @@ void ib_ud_header_init(int payload_bytes,
payload_bytes +
4 + /* ICRC */
3) & ~3); /* round up */
- header->grh.next_header = 0x1b;
+ header->grh.next_header = udp_present ? IPPROTO_UDP : 0x1b;
+ }
+
+ if (ip_version == 4) {
+ int udp_bytes = udp_present ? IB_UDP_BYTES : 0;
+
+ header->ip4.ver = 4; /* version 4 */
+ header->ip4.hdr_len = 5; /* 5 words */
+ header->ip4.tot_len =
+ cpu_to_be16(IB_IP4_BYTES +
+ udp_bytes +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4); /* ICRC */
+ header->ip4.protocol = IPPROTO_UDP;
}
+ if (udp_present && ip_version)
+ header->udp.length =
+ cpu_to_be16(IB_UDP_BYTES +
+ IB_BTH_BYTES +
+ IB_DETH_BYTES +
+ payload_bytes +
+ 4); /* ICRC */
if (immediate_present)
header->bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
@@ -273,8 +393,11 @@ void ib_ud_header_init(int payload_bytes,
header->lrh_present = lrh_present;
header->eth_present = eth_present;
header->vlan_present = vlan_present;
- header->grh_present = grh_present;
+ header->grh_present = grh_present || (ip_version == 6);
+ header->ipv4_present = ip_version == 4;
+ header->udp_present = udp_present;
header->immediate_present = immediate_present;
+ return 0;
}
EXPORT_SYMBOL(ib_ud_header_init);
@@ -311,6 +434,16 @@ int ib_ud_header_pack(struct ib_ud_header *header,
&header->grh, buf + len);
len += IB_GRH_BYTES;
}
+ if (header->ipv4_present) {
+ ib_pack(ip4_table, ARRAY_SIZE(ip4_table),
+ &header->ip4, buf + len);
+ len += IB_IP4_BYTES;
+ }
+ if (header->udp_present) {
+ ib_pack(udp_table, ARRAY_SIZE(udp_table),
+ &header->udp, buf + len);
+ len += IB_UDP_BYTES;
+ }
ib_pack(bth_table, ARRAY_SIZE(bth_table),
&header->bth, buf + len);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 40becdb3196e..e69bf266049d 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -232,7 +232,7 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
ib_ucontext_notifier_end_account(context);
}
-static struct mmu_notifier_ops ib_umem_notifiers = {
+static const struct mmu_notifier_ops ib_umem_notifiers = {
.release = ib_umem_notifier_release,
.invalidate_page = ib_umem_notifier_invalidate_page,
.invalidate_range_start = ib_umem_notifier_invalidate_range_start,
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 57f281f8d686..415a3185cde7 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -210,6 +210,7 @@ static void send_handler(struct ib_mad_agent *agent,
}
static void recv_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct ib_umad_file *file = agent->context;
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 94bbd8c155fc..612ccfd39bf9 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -204,6 +204,8 @@ void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
+int uverbs_dealloc_mw(struct ib_mw *mw);
+
struct ib_uverbs_flow_spec {
union {
union {
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 1c02deab068f..6ffc9c4e93af 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -291,9 +291,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_uverbs_get_context cmd;
struct ib_uverbs_get_context_resp resp;
struct ib_udata udata;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- struct ib_device_attr dev_attr;
-#endif
struct ib_ucontext *ucontext;
struct file *filp;
int ret;
@@ -342,10 +339,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext->odp_mrs_count = 0;
INIT_LIST_HEAD(&ucontext->no_private_counters);
- ret = ib_query_device(ib_dev, &dev_attr);
- if (ret)
- goto err_free;
- if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+ if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
ucontext->invalidate_range = NULL;
#endif
@@ -447,8 +441,6 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
{
struct ib_uverbs_query_device cmd;
struct ib_uverbs_query_device_resp resp;
- struct ib_device_attr attr;
- int ret;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -456,12 +448,8 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- ret = ib_query_device(ib_dev, &attr);
- if (ret)
- return ret;
-
memset(&resp, 0, sizeof resp);
- copy_query_dev_fields(file, ib_dev, &resp, &attr);
+ copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
@@ -986,11 +974,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
}
if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
- struct ib_device_attr attr;
-
- ret = ib_query_device(pd->device, &attr);
- if (ret || !(attr.device_cap_flags &
- IB_DEVICE_ON_DEMAND_PAGING)) {
+ if (!(pd->device->attrs.device_cap_flags &
+ IB_DEVICE_ON_DEMAND_PAGING)) {
pr_debug("ODP support not available\n");
ret = -EINVAL;
goto err_put;
@@ -1008,7 +993,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->pd = pd;
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
- atomic_set(&mr->usecnt, 0);
uobj->object = mr;
ret = idr_add_uobj(&ib_uverbs_mr_idr, uobj);
@@ -1106,11 +1090,6 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
}
- if (atomic_read(&mr->usecnt)) {
- ret = -EBUSY;
- goto put_uobj_pd;
- }
-
old_pd = mr->pd;
ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
cmd.length, cmd.hca_va,
@@ -1258,7 +1237,7 @@ err_copy:
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
err_unalloc:
- ib_dealloc_mw(mw);
+ uverbs_dealloc_mw(mw);
err_put:
put_pd_read(pd);
@@ -1287,7 +1266,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
mw = uobj->object;
- ret = ib_dealloc_mw(mw);
+ ret = uverbs_dealloc_mw(mw);
if (!ret)
uobj->live = 0;
@@ -1845,7 +1824,10 @@ static int create_qp(struct ib_uverbs_file *file,
sizeof(cmd->create_flags))
attr.create_flags = cmd->create_flags;
- if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+ if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+ IB_QP_CREATE_CROSS_CHANNEL |
+ IB_QP_CREATE_MANAGED_SEND |
+ IB_QP_CREATE_MANAGED_RECV)) {
ret = -EINVAL;
goto err_put;
}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index e3ef28861be6..39680aed99dd 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -133,6 +133,17 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
+int uverbs_dealloc_mw(struct ib_mw *mw)
+{
+ struct ib_pd *pd = mw->pd;
+ int ret;
+
+ ret = mw->device->dealloc_mw(mw);
+ if (!ret)
+ atomic_dec(&pd->usecnt);
+ return ret;
+}
+
static void ib_uverbs_release_dev(struct kobject *kobj)
{
struct ib_uverbs_device *dev =
@@ -224,7 +235,7 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
struct ib_mw *mw = uobj->object;
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
- ib_dealloc_mw(mw);
+ uverbs_dealloc_mw(mw);
kfree(uobj);
}
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index 7d2f14c9bbef..af020f80d50f 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -144,5 +144,6 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
memset(dst->dmac, 0, sizeof(dst->dmac));
dst->net = NULL;
dst->ifindex = 0;
+ dst->gid_type = IB_GID_TYPE_IB;
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 545906dec26d..5af6d024e053 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -229,12 +229,6 @@ EXPORT_SYMBOL(rdma_port_get_link_layer);
struct ib_pd *ib_alloc_pd(struct ib_device *device)
{
struct ib_pd *pd;
- struct ib_device_attr devattr;
- int rc;
-
- rc = ib_query_device(device, &devattr);
- if (rc)
- return ERR_PTR(rc);
pd = device->alloc_pd(device, NULL, NULL);
if (IS_ERR(pd))
@@ -245,7 +239,7 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device)
pd->local_mr = NULL;
atomic_set(&pd->usecnt, 0);
- if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
+ if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else {
struct ib_mr *mr;
@@ -311,8 +305,61 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
}
EXPORT_SYMBOL(ib_create_ah);
+static int ib_get_header_version(const union rdma_network_hdr *hdr)
+{
+ const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
+ struct iphdr ip4h_checked;
+ const struct ipv6hdr *ip6h = (struct ipv6hdr *)&hdr->ibgrh;
+
+ /* If it's IPv6, the version must be 6, otherwise, the first
+ * 20 bytes (before the IPv4 header) are garbled.
+ */
+ if (ip6h->version != 6)
+ return (ip4h->version == 4) ? 4 : 0;
+ /* version may be 6 or 4 because the first 20 bytes could be garbled */
+
+ /* RoCE v2 requires no options, thus header length
+ * must be 5 words
+ */
+ if (ip4h->ihl != 5)
+ return 6;
+
+ /* Verify checksum.
+ * We can't write on scattered buffers so we need to copy to
+ * temp buffer.
+ */
+ memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
+ ip4h_checked.check = 0;
+ ip4h_checked.check = ip_fast_csum((u8 *)&ip4h_checked, 5);
+ /* if IPv4 header checksum is OK, believe it */
+ if (ip4h->check == ip4h_checked.check)
+ return 4;
+ return 6;
+}
+
+static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
+ u8 port_num,
+ const struct ib_grh *grh)
+{
+ int grh_version;
+
+ if (rdma_protocol_ib(device, port_num))
+ return RDMA_NETWORK_IB;
+
+ grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
+
+ if (grh_version == 4)
+ return RDMA_NETWORK_IPV4;
+
+ if (grh->next_hdr == IPPROTO_UDP)
+ return RDMA_NETWORK_IPV6;
+
+ return RDMA_NETWORK_ROCE_V1;
+}
+
struct find_gid_index_context {
u16 vlan_id;
+ enum ib_gid_type gid_type;
};
static bool find_gid_index(const union ib_gid *gid,
@@ -322,6 +369,9 @@ static bool find_gid_index(const union ib_gid *gid,
struct find_gid_index_context *ctx =
(struct find_gid_index_context *)context;
+ if (ctx->gid_type != gid_attr->gid_type)
+ return false;
+
if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
(is_vlan_dev(gid_attr->ndev) &&
vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
@@ -332,14 +382,49 @@ static bool find_gid_index(const union ib_gid *gid,
static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
u16 vlan_id, const union ib_gid *sgid,
+ enum ib_gid_type gid_type,
u16 *gid_index)
{
- struct find_gid_index_context context = {.vlan_id = vlan_id};
+ struct find_gid_index_context context = {.vlan_id = vlan_id,
+ .gid_type = gid_type};
return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
&context, gid_index);
}
+static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
+ enum rdma_network_type net_type,
+ union ib_gid *sgid, union ib_gid *dgid)
+{
+ struct sockaddr_in src_in;
+ struct sockaddr_in dst_in;
+ __be32 src_saddr, dst_saddr;
+
+ if (!sgid || !dgid)
+ return -EINVAL;
+
+ if (net_type == RDMA_NETWORK_IPV4) {
+ memcpy(&src_in.sin_addr.s_addr,
+ &hdr->roce4grh.saddr, 4);
+ memcpy(&dst_in.sin_addr.s_addr,
+ &hdr->roce4grh.daddr, 4);
+ src_saddr = src_in.sin_addr.s_addr;
+ dst_saddr = dst_in.sin_addr.s_addr;
+ ipv6_addr_set_v4mapped(src_saddr,
+ (struct in6_addr *)sgid);
+ ipv6_addr_set_v4mapped(dst_saddr,
+ (struct in6_addr *)dgid);
+ return 0;
+ } else if (net_type == RDMA_NETWORK_IPV6 ||
+ net_type == RDMA_NETWORK_IB) {
+ *dgid = hdr->ibgrh.dgid;
+ *sgid = hdr->ibgrh.sgid;
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct ib_ah_attr *ah_attr)
@@ -347,33 +432,72 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
u32 flow_class;
u16 gid_index;
int ret;
+ enum rdma_network_type net_type = RDMA_NETWORK_IB;
+ enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ int hoplimit = 0xff;
+ union ib_gid dgid;
+ union ib_gid sgid;
memset(ah_attr, 0, sizeof *ah_attr);
if (rdma_cap_eth_ah(device, port_num)) {
+ if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
+ net_type = wc->network_hdr_type;
+ else
+ net_type = ib_get_net_type_by_grh(device, port_num, grh);
+ gid_type = ib_network_to_gid_type(net_type);
+ }
+ ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+ &sgid, &dgid);
+ if (ret)
+ return ret;
+
+ if (rdma_protocol_roce(device, port_num)) {
+ int if_index = 0;
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
+ struct net_device *idev;
+ struct net_device *resolved_dev;
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
- !(wc->wc_flags & IB_WC_WITH_VLAN)) {
- ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
- ah_attr->dmac,
- wc->wc_flags & IB_WC_WITH_VLAN ?
- NULL : &vlan_id,
- 0);
- if (ret)
- return ret;
+ if (!device->get_netdev)
+ return -EOPNOTSUPP;
+
+ idev = device->get_netdev(device, port_num);
+ if (!idev)
+ return -ENODEV;
+
+ ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid,
+ ah_attr->dmac,
+ wc->wc_flags & IB_WC_WITH_VLAN ?
+ NULL : &vlan_id,
+ &if_index, &hoplimit);
+ if (ret) {
+ dev_put(idev);
+ return ret;
}
- ret = get_sgid_index_from_eth(device, port_num, vlan_id,
- &grh->dgid, &gid_index);
+ resolved_dev = dev_get_by_index(&init_net, if_index);
+ if (resolved_dev->flags & IFF_LOOPBACK) {
+ dev_put(resolved_dev);
+ resolved_dev = idev;
+ dev_hold(resolved_dev);
+ }
+ rcu_read_lock();
+ if (resolved_dev != idev && !rdma_is_upper_dev_rcu(idev,
+ resolved_dev))
+ ret = -EHOSTUNREACH;
+ rcu_read_unlock();
+ dev_put(idev);
+ dev_put(resolved_dev);
if (ret)
return ret;
- if (wc->wc_flags & IB_WC_WITH_SMAC)
- memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+ ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+ &dgid, gid_type, &gid_index);
+ if (ret)
+ return ret;
}
ah_attr->dlid = wc->slid;
@@ -383,10 +507,11 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
if (wc->wc_flags & IB_WC_GRH) {
ah_attr->ah_flags = IB_AH_GRH;
- ah_attr->grh.dgid = grh->sgid;
+ ah_attr->grh.dgid = sgid;
if (!rdma_cap_eth_ah(device, port_num)) {
- ret = ib_find_cached_gid_by_port(device, &grh->dgid,
+ ret = ib_find_cached_gid_by_port(device, &dgid,
+ IB_GID_TYPE_IB,
port_num, NULL,
&gid_index);
if (ret)
@@ -396,7 +521,7 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
ah_attr->grh.sgid_index = (u8) gid_index;
flow_class = be32_to_cpu(grh->version_tclass_flow);
ah_attr->grh.flow_label = flow_class & 0xFFFFF;
- ah_attr->grh.hop_limit = 0xFF;
+ ah_attr->grh.hop_limit = hoplimit;
ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
}
return 0;
@@ -1014,6 +1139,7 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
int ifindex;
+ int hop_limit;
ret = ib_query_gid(qp->device,
qp_attr->ah_attr.port_num,
@@ -1028,12 +1154,14 @@ int ib_resolve_eth_dmac(struct ib_qp *qp,
ifindex = sgid_attr.ndev->ifindex;
- ret = rdma_addr_find_dmac_by_grh(&sgid,
- &qp_attr->ah_attr.grh.dgid,
- qp_attr->ah_attr.dmac,
- NULL, ifindex);
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid,
+ &qp_attr->ah_attr.grh.dgid,
+ qp_attr->ah_attr.dmac,
+ NULL, &ifindex, &hop_limit);
dev_put(sgid_attr.ndev);
+
+ qp_attr->ah_attr.grh.hop_limit = hop_limit;
}
}
out:
@@ -1215,29 +1343,17 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags)
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
- atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_get_dma_mr);
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
- return mr->device->query_mr ?
- mr->device->query_mr(mr, mr_attr) : -ENOSYS;
-}
-EXPORT_SYMBOL(ib_query_mr);
-
int ib_dereg_mr(struct ib_mr *mr)
{
- struct ib_pd *pd;
+ struct ib_pd *pd = mr->pd;
int ret;
- if (atomic_read(&mr->usecnt))
- return -EBUSY;
-
- pd = mr->pd;
ret = mr->device->dereg_mr(mr);
if (!ret)
atomic_dec(&pd->usecnt);
@@ -1273,49 +1389,12 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
mr->pd = pd;
mr->uobject = NULL;
atomic_inc(&pd->usecnt);
- atomic_set(&mr->usecnt, 0);
}
return mr;
}
EXPORT_SYMBOL(ib_alloc_mr);
-/* Memory windows */
-
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
-{
- struct ib_mw *mw;
-
- if (!pd->device->alloc_mw)
- return ERR_PTR(-ENOSYS);
-
- mw = pd->device->alloc_mw(pd, type);
- if (!IS_ERR(mw)) {
- mw->device = pd->device;
- mw->pd = pd;
- mw->uobject = NULL;
- mw->type = type;
- atomic_inc(&pd->usecnt);
- }
-
- return mw;
-}
-EXPORT_SYMBOL(ib_alloc_mw);
-
-int ib_dealloc_mw(struct ib_mw *mw)
-{
- struct ib_pd *pd;
- int ret;
-
- pd = mw->pd;
- ret = mw->device->dealloc_mw(mw);
- if (!ret)
- atomic_dec(&pd->usecnt);
-
- return ret;
-}
-EXPORT_SYMBOL(ib_dealloc_mw);
-
/* "Fast" memory regions */
struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
@@ -1530,7 +1609,7 @@ int ib_sg_to_pages(struct ib_mr *mr,
int (*set_page)(struct ib_mr *, u64))
{
struct scatterlist *sg;
- u64 last_end_dma_addr = 0, last_page_addr = 0;
+ u64 last_end_dma_addr = 0;
unsigned int last_page_off = 0;
u64 page_mask = ~((u64)mr->page_size - 1);
int i, ret;
@@ -1572,7 +1651,6 @@ next_page:
mr->length += dma_len;
last_end_dma_addr = end_dma_addr;
- last_page_addr = end_dma_addr & page_mask;
last_page_off = end_dma_addr & ~page_mask;
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index cb78b1e9bcd9..f504ba73e5dc 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -149,7 +149,7 @@ static int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_en
error = l2t_send(tdev, skb, l2e);
if (error < 0)
kfree_skb(skb);
- return error;
+ return error < 0 ? error : 0;
}
int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
@@ -165,7 +165,7 @@ int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
error = cxgb3_ofld_send(tdev, skb);
if (error < 0)
kfree_skb(skb);
- return error;
+ return error < 0 ? error : 0;
}
static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index cfe404925a39..97fbfd2c298e 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -115,10 +115,6 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
case T3_SEND_WITH_SE_INV:
wc->opcode = IB_WC_SEND;
break;
- case T3_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
-
case T3_LOCAL_INV:
wc->opcode = IB_WC_LOCAL_INV;
break;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
index 5c36ee2809ac..1d04c872c9d5 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
@@ -75,37 +75,6 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
return ret;
}
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- int npages)
-{
- u32 stag;
- int ret;
-
- /* We could support this... */
- if (npages > mhp->attr.pbl_size)
- return -ENOMEM;
-
- stag = mhp->attr.stag;
- if (cxio_reregister_phys_mem(&rhp->rdev,
- &stag, mhp->attr.pdid,
- mhp->attr.perms,
- mhp->attr.zbva,
- mhp->attr.va_fbo,
- mhp->attr.len,
- shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr))
- return -ENOMEM;
-
- ret = iwch_finish_mem_reg(mhp, stag);
- if (ret)
- cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-
- return ret;
-}
-
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
{
mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
@@ -130,74 +99,3 @@ int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
return cxio_write_pbl(&mhp->rhp->rdev, pages,
mhp->attr.pbl_addr + (offset << 3), npages);
}
-
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- u64 *iova_start,
- u64 *total_size,
- int *npages,
- int *shift,
- __be64 **page_list)
-{
- u64 mask;
- int i, j, n;
-
- mask = 0;
- *total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
- return -EINVAL;
- if (i != 0 && i != num_phys_buf - 1 &&
- (buffer_list[i].size & ~PAGE_MASK))
- return -EINVAL;
- *total_size += buffer_list[i].size;
- if (i > 0)
- mask |= buffer_list[i].addr;
- else
- mask |= buffer_list[i].addr & PAGE_MASK;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
- else
- mask |= (buffer_list[i].addr + buffer_list[i].size +
- PAGE_SIZE - 1) & PAGE_MASK;
- }
-
- if (*total_size > 0xFFFFFFFFULL)
- return -ENOMEM;
-
- /* Find largest page shift we can use to cover buffers */
- for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
- if ((1ULL << *shift) & mask)
- break;
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
- buffer_list[0].addr &= ~0ull << *shift;
-
- *npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- *npages += (buffer_list[i].size +
- (1ULL << *shift) - 1) >> *shift;
-
- if (!*npages)
- return -EINVAL;
-
- *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
- if (!*page_list)
- return -ENOMEM;
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
- ++j)
- (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
- ((u64) j << *shift));
-
- PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, (unsigned long long) *iova_start,
- (unsigned long long) mask, *shift, (unsigned long long) *total_size,
- *npages);
-
- return 0;
-
-}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index c34725ca0bb4..2734820d291b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -458,9 +458,6 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
u32 mmid;
PDBG("%s ib_mr %p\n", __func__, ib_mr);
- /* There can be no memory windows */
- if (atomic_read(&ib_mr->usecnt))
- return -EINVAL;
mhp = to_iwch_mr(ib_mr);
kfree(mhp->pages);
@@ -479,24 +476,25 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
return 0;
}
-static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start)
+static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
{
- __be64 *page_list;
- int shift;
- u64 total_size;
- int npages;
- struct iwch_dev *rhp;
- struct iwch_pd *php;
+ const u64 total_size = 0xffffffff;
+ const u64 mask = (total_size + PAGE_SIZE - 1) & PAGE_MASK;
+ struct iwch_pd *php = to_iwch_pd(pd);
+ struct iwch_dev *rhp = php->rhp;
struct iwch_mr *mhp;
- int ret;
+ __be64 *page_list;
+ int shift = 26, npages, ret, i;
PDBG("%s ib_pd %p\n", __func__, pd);
- php = to_iwch_pd(pd);
- rhp = php->rhp;
+
+ /*
+ * T3 only supports 32 bits of size.
+ */
+ if (sizeof(phys_addr_t) > 4) {
+ pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
+ return ERR_PTR(-ENOTSUPP);
+ }
mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
if (!mhp)
@@ -504,22 +502,23 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->rhp = rhp;
- /* First check that we have enough alignment */
- if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+ npages = (total_size + (1ULL << shift) - 1) >> shift;
+ if (!npages) {
ret = -EINVAL;
goto err;
}
- if (num_phys_buf > 1 &&
- ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
- ret = -EINVAL;
+ page_list = kmalloc_array(npages, sizeof(u64), GFP_KERNEL);
+ if (!page_list) {
+ ret = -ENOMEM;
goto err;
}
- ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
- &total_size, &npages, &shift, &page_list);
- if (ret)
- goto err;
+ for (i = 0; i < npages; i++)
+ page_list[i] = cpu_to_be64((u64)i << shift);
+
+ PDBG("%s mask 0x%llx shift %d len %lld pbl_size %d\n",
+ __func__, mask, shift, total_size, npages);
ret = iwch_alloc_pbl(mhp, npages);
if (ret) {
@@ -536,7 +535,7 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
mhp->attr.zbva = 0;
mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = *iova_start;
+ mhp->attr.va_fbo = 0;
mhp->attr.page_size = shift - 12;
mhp->attr.len = (u32) total_size;
@@ -553,76 +552,8 @@ err_pbl:
err:
kfree(mhp);
return ERR_PTR(ret);
-
-}
-
-static int iwch_reregister_phys_mem(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc, u64 * iova_start)
-{
-
- struct iwch_mr mh, *mhp;
- struct iwch_pd *php;
- struct iwch_dev *rhp;
- __be64 *page_list = NULL;
- int shift = 0;
- u64 total_size;
- int npages = 0;
- int ret;
-
- PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
- /* There can be no memory windows */
- if (atomic_read(&mr->usecnt))
- return -EINVAL;
-
- mhp = to_iwch_mr(mr);
- rhp = mhp->rhp;
- php = to_iwch_pd(mr->pd);
-
- /* make sure we are on the same adapter */
- if (rhp != php->rhp)
- return -EINVAL;
-
- memcpy(&mh, mhp, sizeof *mhp);
-
- if (mr_rereg_mask & IB_MR_REREG_PD)
- php = to_iwch_pd(pd);
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mh.attr.perms = iwch_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- ret = build_phys_page_list(buffer_list, num_phys_buf,
- iova_start,
- &total_size, &npages,
- &shift, &page_list);
- if (ret)
- return ret;
- }
-
- ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
- kfree(page_list);
- if (ret) {
- return ret;
- }
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mhp->attr.pdid = php->pdid;
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mhp->attr.perms = iwch_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- mhp->attr.zbva = 0;
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- }
-
- return 0;
}
-
static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
@@ -726,28 +657,6 @@ err:
return ERR_PTR(err);
}
-static struct ib_mr *iwch_get_dma_mr(struct ib_pd *pd, int acc)
-{
- struct ib_phys_buf bl;
- u64 kva;
- struct ib_mr *ibmr;
-
- PDBG("%s ib_pd %p\n", __func__, pd);
-
- /*
- * T3 only supports 32 bits of size.
- */
- if (sizeof(phys_addr_t) > 4) {
- pr_warn_once(MOD "Cannot support dma_mrs on this platform.\n");
- return ERR_PTR(-ENOTSUPP);
- }
- bl.size = 0xffffffff;
- bl.addr = 0;
- kva = 0;
- ibmr = iwch_register_phys_mem(pd, &bl, 1, acc, &kva);
- return ibmr;
-}
-
static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
{
struct iwch_dev *rhp;
@@ -1452,12 +1361,9 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.resize_cq = iwch_resize_cq;
dev->ibdev.poll_cq = iwch_poll_cq;
dev->ibdev.get_dma_mr = iwch_get_dma_mr;
- dev->ibdev.reg_phys_mr = iwch_register_phys_mem;
- dev->ibdev.rereg_phys_mr = iwch_reregister_phys_mem;
dev->ibdev.reg_user_mr = iwch_reg_user_mr;
dev->ibdev.dereg_mr = iwch_dereg_mr;
dev->ibdev.alloc_mw = iwch_alloc_mw;
- dev->ibdev.bind_mw = iwch_bind_mw;
dev->ibdev.dealloc_mw = iwch_dealloc_mw;
dev->ibdev.alloc_mr = iwch_alloc_mr;
dev->ibdev.map_mr_sg = iwch_map_mr_sg;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 2ac85b86a680..252c464a09f6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -330,9 +330,6 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int iwch_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
int iwch_post_zb_read(struct iwch_ep *ep);
@@ -341,21 +338,9 @@ void iwch_unregister_device(struct iwch_dev *dev);
void stop_read_rep_timer(struct iwch_qp *qhp);
int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
struct iwch_mr *mhp, int shift);
-int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
- struct iwch_mr *mhp,
- int shift,
- int npages);
int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
void iwch_free_pbl(struct iwch_mr *mhp);
int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
-int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- u64 *iova_start,
- u64 *total_size,
- int *npages,
- int *shift,
- __be64 **page_list);
-
#define IWCH_NODE_DESC "cxgb3 Chelsio Communications"
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index d0548fc6395e..d939980a708f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -526,88 +526,6 @@ out:
return err;
}
-int iwch_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- struct iwch_dev *rhp;
- struct iwch_mw *mhp;
- struct iwch_qp *qhp;
- union t3_wr *wqe;
- u32 pbl_addr;
- u8 page_size;
- u32 num_wrs;
- unsigned long flag;
- struct ib_sge sgl;
- int err=0;
- enum t3_wr_flags t3_wr_flags;
- u32 idx;
- struct t3_swsq *sqp;
-
- qhp = to_iwch_qp(qp);
- mhp = to_iwch_mw(mw);
- rhp = qhp->rhp;
-
- spin_lock_irqsave(&qhp->lock, flag);
- if (qhp->attr.state > IWCH_QP_STATE_RTS) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return -EINVAL;
- }
- num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
- qhp->wq.sq_size_log2);
- if (num_wrs == 0) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return -ENOMEM;
- }
- idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
- PDBG("%s: idx 0x%0x, mw 0x%p, mw_bind 0x%p\n", __func__, idx,
- mw, mw_bind);
- wqe = (union t3_wr *) (qhp->wq.queue + idx);
-
- t3_wr_flags = 0;
- if (mw_bind->send_flags & IB_SEND_SIGNALED)
- t3_wr_flags = T3_COMPLETION_FLAG;
-
- sgl.addr = mw_bind->bind_info.addr;
- sgl.lkey = mw_bind->bind_info.mr->lkey;
- sgl.length = mw_bind->bind_info.length;
- wqe->bind.reserved = 0;
- wqe->bind.type = TPT_VATO;
-
- /* TBD: check perms */
- wqe->bind.perms = iwch_ib_to_tpt_bind_access(
- mw_bind->bind_info.mw_access_flags);
- wqe->bind.mr_stag = cpu_to_be32(mw_bind->bind_info.mr->lkey);
- wqe->bind.mw_stag = cpu_to_be32(mw->rkey);
- wqe->bind.mw_len = cpu_to_be32(mw_bind->bind_info.length);
- wqe->bind.mw_va = cpu_to_be64(mw_bind->bind_info.addr);
- err = iwch_sgl2pbl_map(rhp, &sgl, 1, &pbl_addr, &page_size);
- if (err) {
- spin_unlock_irqrestore(&qhp->lock, flag);
- return err;
- }
- wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
- sqp = qhp->wq.sq + Q_PTR2IDX(qhp->wq.sq_wptr, qhp->wq.sq_size_log2);
- sqp->wr_id = mw_bind->wr_id;
- sqp->opcode = T3_BIND_MW;
- sqp->sq_wptr = qhp->wq.sq_wptr;
- sqp->complete = 0;
- sqp->signaled = (mw_bind->send_flags & IB_SEND_SIGNALED);
- wqe->bind.mr_pbl_addr = cpu_to_be32(pbl_addr);
- wqe->bind.mr_pagesz = page_size;
- build_fw_riwrh((void *)wqe, T3_WR_BIND, t3_wr_flags,
- Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2), 0,
- sizeof(struct t3_bind_mw_wr) >> 3, T3_SOPEOP);
- ++(qhp->wq.wptr);
- ++(qhp->wq.sq_wptr);
- spin_unlock_irqrestore(&qhp->lock, flag);
-
- if (cxio_wq_db_enabled(&qhp->wq))
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
-
- return err;
-}
-
static inline void build_term_codes(struct respQ_msg_t *rsp_msg,
u8 *layer_type, u8 *ecode)
{
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 326d07d823a5..cd2ff5f9518a 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3271,6 +3271,12 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
&ep->com.mapped_local_addr;
+ if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
+ err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
+ (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+ if (err)
+ return err;
+ }
c4iw_init_wr_wait(&ep->com.wr_wait);
err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
ep->stid, &sin6->sin6_addr,
@@ -3282,13 +3288,13 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
0, 0, __func__);
else if (err > 0)
err = net_xmit_errno(err);
- if (err)
+ if (err) {
+ cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
+ (const u32 *)&sin6->sin6_addr.s6_addr, 1);
pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
err, ep->stid,
sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
- else
- cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
- (const u32 *)&sin6->sin6_addr.s6_addr, 1);
+ }
return err;
}
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index de9cd6901752..cf21df4a8bf5 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -744,9 +744,6 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
case FW_RI_SEND_WITH_SE:
wc->opcode = IB_WC_SEND;
break;
- case FW_RI_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case FW_RI_LOCAL_INV:
wc->opcode = IB_WC_LOCAL_INV;
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 58fce1742b8d..8024ea4417b8 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -315,14 +315,12 @@ static int qp_release(struct inode *inode, struct file *file)
static int qp_open(struct inode *inode, struct file *file)
{
struct c4iw_debugfs_data *qpd;
- int ret = 0;
int count = 1;
qpd = kmalloc(sizeof *qpd, GFP_KERNEL);
- if (!qpd) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!qpd)
+ return -ENOMEM;
+
qpd->devp = inode->i_private;
qpd->pos = 0;
@@ -333,8 +331,8 @@ static int qp_open(struct inode *inode, struct file *file)
qpd->bufsize = count * 128;
qpd->buf = vmalloc(qpd->bufsize);
if (!qpd->buf) {
- ret = -ENOMEM;
- goto err1;
+ kfree(qpd);
+ return -ENOMEM;
}
spin_lock_irq(&qpd->devp->lock);
@@ -343,11 +341,7 @@ static int qp_open(struct inode *inode, struct file *file)
qpd->buf[qpd->pos++] = 0;
file->private_data = qpd;
- goto out;
-err1:
- kfree(qpd);
-out:
- return ret;
+ return 0;
}
static const struct file_operations qp_debugfs_fops = {
@@ -781,8 +775,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
pr_err(MOD "%s: unsupported udb/ucq densities %u/%u\n",
pci_name(rdev->lldi.pdev), rdev->lldi.udb_density,
rdev->lldi.ucq_density);
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
if (rdev->lldi.vr->qp.start != rdev->lldi.vr->cq.start ||
rdev->lldi.vr->qp.size != rdev->lldi.vr->cq.size) {
@@ -791,8 +784,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
pci_name(rdev->lldi.pdev), rdev->lldi.vr->qp.start,
rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.size,
rdev->lldi.vr->cq.size);
- err = -EINVAL;
- goto err1;
+ return -EINVAL;
}
rdev->qpmask = rdev->lldi.udb_density - 1;
@@ -816,10 +808,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->lldi.db_reg, rdev->lldi.gts_reg,
rdev->qpmask, rdev->cqmask);
- if (c4iw_num_stags(rdev) == 0) {
- err = -EINVAL;
- goto err1;
- }
+ if (c4iw_num_stags(rdev) == 0)
+ return -EINVAL;
rdev->stats.pd.total = T4_MAX_NUM_PD;
rdev->stats.stag.total = rdev->lldi.vr->stag.size;
@@ -831,29 +821,31 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
if (err) {
printk(KERN_ERR MOD "error %d initializing resources\n", err);
- goto err1;
+ return err;
}
err = c4iw_pblpool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing pbl pool\n", err);
- goto err2;
+ goto destroy_resource;
}
err = c4iw_rqtpool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing rqt pool\n", err);
- goto err3;
+ goto destroy_pblpool;
}
err = c4iw_ocqp_pool_create(rdev);
if (err) {
printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
- goto err4;
+ goto destroy_rqtpool;
}
rdev->status_page = (struct t4_dev_status_page *)
__get_free_page(GFP_KERNEL);
- if (!rdev->status_page) {
- pr_err(MOD "error allocating status page\n");
- goto err4;
- }
+ if (!rdev->status_page)
+ goto destroy_ocqp_pool;
+ rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
+ rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
+ rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
+ rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
if (c4iw_wr_log) {
rdev->wr_log = kzalloc((1 << c4iw_wr_log_size_order) *
@@ -869,13 +861,14 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
rdev->status_page->db_off = 0;
return 0;
-err4:
+destroy_ocqp_pool:
+ c4iw_ocqp_pool_destroy(rdev);
+destroy_rqtpool:
c4iw_rqtpool_destroy(rdev);
-err3:
+destroy_pblpool:
c4iw_pblpool_destroy(rdev);
-err2:
+destroy_resource:
c4iw_destroy_resource(&rdev->resource);
-err1:
return err;
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 00e55faa086a..fb2de75a0392 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -947,8 +947,6 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -968,17 +966,6 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
u64 length, u64 virt, int acc,
struct ib_udata *udata);
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start);
-int c4iw_reregister_phys_mem(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc, u64 *iova_start);
int c4iw_dereg_mr(struct ib_mr *ib_mr);
int c4iw_destroy_cq(struct ib_cq *ib_cq);
struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index e1629ab58db7..7849890c4781 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -392,32 +392,6 @@ static int register_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
return ret;
}
-static int reregister_mem(struct c4iw_dev *rhp, struct c4iw_pd *php,
- struct c4iw_mr *mhp, int shift, int npages)
-{
- u32 stag;
- int ret;
-
- if (npages > mhp->attr.pbl_size)
- return -ENOMEM;
-
- stag = mhp->attr.stag;
- ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
- FW_RI_STAG_NSMR, mhp->attr.perms,
- mhp->attr.mw_bind_enable, mhp->attr.zbva,
- mhp->attr.va_fbo, mhp->attr.len, shift - 12,
- mhp->attr.pbl_size, mhp->attr.pbl_addr);
- if (ret)
- return ret;
-
- ret = finish_mem_reg(mhp, stag);
- if (ret)
- dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
- mhp->attr.pbl_addr);
-
- return ret;
-}
-
static int alloc_pbl(struct c4iw_mr *mhp, int npages)
{
mhp->attr.pbl_addr = c4iw_pblpool_alloc(&mhp->rhp->rdev,
@@ -431,228 +405,6 @@ static int alloc_pbl(struct c4iw_mr *mhp, int npages)
return 0;
}
-static int build_phys_page_list(struct ib_phys_buf *buffer_list,
- int num_phys_buf, u64 *iova_start,
- u64 *total_size, int *npages,
- int *shift, __be64 **page_list)
-{
- u64 mask;
- int i, j, n;
-
- mask = 0;
- *total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
- return -EINVAL;
- if (i != 0 && i != num_phys_buf - 1 &&
- (buffer_list[i].size & ~PAGE_MASK))
- return -EINVAL;
- *total_size += buffer_list[i].size;
- if (i > 0)
- mask |= buffer_list[i].addr;
- else
- mask |= buffer_list[i].addr & PAGE_MASK;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
- else
- mask |= (buffer_list[i].addr + buffer_list[i].size +
- PAGE_SIZE - 1) & PAGE_MASK;
- }
-
- if (*total_size > 0xFFFFFFFFULL)
- return -ENOMEM;
-
- /* Find largest page shift we can use to cover buffers */
- for (*shift = PAGE_SHIFT; *shift < 27; ++(*shift))
- if ((1ULL << *shift) & mask)
- break;
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << *shift) - 1);
- buffer_list[0].addr &= ~0ull << *shift;
-
- *npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- *npages += (buffer_list[i].size +
- (1ULL << *shift) - 1) >> *shift;
-
- if (!*npages)
- return -EINVAL;
-
- *page_list = kmalloc(sizeof(u64) * *npages, GFP_KERNEL);
- if (!*page_list)
- return -ENOMEM;
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << *shift) - 1) >> *shift;
- ++j)
- (*page_list)[n++] = cpu_to_be64(buffer_list[i].addr +
- ((u64) j << *shift));
-
- PDBG("%s va 0x%llx mask 0x%llx shift %d len %lld pbl_size %d\n",
- __func__, (unsigned long long)*iova_start,
- (unsigned long long)mask, *shift, (unsigned long long)*total_size,
- *npages);
-
- return 0;
-
-}
-
-int c4iw_reregister_phys_mem(struct ib_mr *mr, int mr_rereg_mask,
- struct ib_pd *pd, struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
-
- struct c4iw_mr mh, *mhp;
- struct c4iw_pd *php;
- struct c4iw_dev *rhp;
- __be64 *page_list = NULL;
- int shift = 0;
- u64 total_size;
- int npages;
- int ret;
-
- PDBG("%s ib_mr %p ib_pd %p\n", __func__, mr, pd);
-
- /* There can be no memory windows */
- if (atomic_read(&mr->usecnt))
- return -EINVAL;
-
- mhp = to_c4iw_mr(mr);
- rhp = mhp->rhp;
- php = to_c4iw_pd(mr->pd);
-
- /* make sure we are on the same adapter */
- if (rhp != php->rhp)
- return -EINVAL;
-
- memcpy(&mh, mhp, sizeof *mhp);
-
- if (mr_rereg_mask & IB_MR_REREG_PD)
- php = to_c4iw_pd(pd);
- if (mr_rereg_mask & IB_MR_REREG_ACCESS) {
- mh.attr.perms = c4iw_ib_to_tpt_access(acc);
- mh.attr.mw_bind_enable = (acc & IB_ACCESS_MW_BIND) ==
- IB_ACCESS_MW_BIND;
- }
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- ret = build_phys_page_list(buffer_list, num_phys_buf,
- iova_start,
- &total_size, &npages,
- &shift, &page_list);
- if (ret)
- return ret;
- }
-
- if (mr_exceeds_hw_limits(rhp, total_size)) {
- kfree(page_list);
- return -EINVAL;
- }
-
- ret = reregister_mem(rhp, php, &mh, shift, npages);
- kfree(page_list);
- if (ret)
- return ret;
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mhp->attr.pdid = php->pdid;
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- mhp->attr.zbva = 0;
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- }
-
- return 0;
-}
-
-struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- __be64 *page_list;
- int shift;
- u64 total_size;
- int npages;
- struct c4iw_dev *rhp;
- struct c4iw_pd *php;
- struct c4iw_mr *mhp;
- int ret;
-
- PDBG("%s ib_pd %p\n", __func__, pd);
- php = to_c4iw_pd(pd);
- rhp = php->rhp;
-
- mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
- if (!mhp)
- return ERR_PTR(-ENOMEM);
-
- mhp->rhp = rhp;
-
- /* First check that we have enough alignment */
- if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
- ret = -EINVAL;
- goto err;
- }
-
- if (num_phys_buf > 1 &&
- ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK)) {
- ret = -EINVAL;
- goto err;
- }
-
- ret = build_phys_page_list(buffer_list, num_phys_buf, iova_start,
- &total_size, &npages, &shift,
- &page_list);
- if (ret)
- goto err;
-
- if (mr_exceeds_hw_limits(rhp, total_size)) {
- kfree(page_list);
- ret = -EINVAL;
- goto err;
- }
-
- ret = alloc_pbl(mhp, npages);
- if (ret) {
- kfree(page_list);
- goto err;
- }
-
- ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
- npages);
- kfree(page_list);
- if (ret)
- goto err_pbl;
-
- mhp->attr.pdid = php->pdid;
- mhp->attr.zbva = 0;
-
- mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
- mhp->attr.va_fbo = *iova_start;
- mhp->attr.page_size = shift - 12;
-
- mhp->attr.len = (u32) total_size;
- mhp->attr.pbl_size = npages;
- ret = register_mem(rhp, php, mhp, shift);
- if (ret)
- goto err_pbl;
-
- return &mhp->ibmr;
-
-err_pbl:
- c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
- mhp->attr.pbl_size << 3);
-
-err:
- kfree(mhp);
- return ERR_PTR(ret);
-
-}
-
struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc)
{
struct c4iw_dev *rhp;
@@ -952,9 +704,6 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
u32 mmid;
PDBG("%s ib_mr %p\n", __func__, ib_mr);
- /* There can be no memory windows */
- if (atomic_read(&ib_mr->usecnt))
- return -EINVAL;
mhp = to_c4iw_mr(ib_mr);
rhp = mhp->rhp;
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 0a7d99818b17..ec04272fbdc2 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -549,12 +549,9 @@ int c4iw_register_device(struct c4iw_dev *dev)
dev->ibdev.resize_cq = c4iw_resize_cq;
dev->ibdev.poll_cq = c4iw_poll_cq;
dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
- dev->ibdev.reg_phys_mr = c4iw_register_phys_mem;
- dev->ibdev.rereg_phys_mr = c4iw_reregister_phys_mem;
dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
dev->ibdev.dereg_mr = c4iw_dereg_mr;
dev->ibdev.alloc_mw = c4iw_alloc_mw;
- dev->ibdev.bind_mw = c4iw_bind_mw;
dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
dev->ibdev.alloc_mr = c4iw_alloc_mr;
dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index aa515afee724..e99345eb875a 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -933,11 +933,6 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
return err;
}
-int c4iw_bind_mw(struct ib_qp *qp, struct ib_mw *mw, struct ib_mw_bind *mw_bind)
-{
- return -ENOSYS;
-}
-
static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
u8 *ecode)
{
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 1092a2d1f607..6126bbe36095 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -699,4 +699,11 @@ static inline void t4_set_cq_in_error(struct t4_cq *cq)
struct t4_dev_status_page {
u8 db_off;
+ u8 pad1;
+ u16 pad2;
+ u32 pad3;
+ u64 qp_start;
+ u64 qp_size;
+ u64 cq_start;
+ u64 cq_size;
};
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index cbd0ce170728..295f422b9a3a 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -32,7 +32,7 @@
#ifndef __C4IW_USER_H__
#define __C4IW_USER_H__
-#define C4IW_UVERBS_ABI_VERSION 2
+#define C4IW_UVERBS_ABI_VERSION 3
/*
* Make sure that all structs defined in this file remain laid out so
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 86af71351d9a..105246fba2e7 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -92,7 +92,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
ah_attr->grh.sgid_index, &sgid, &gid_attr);
if (ret)
return ERR_PTR(ret);
- memset(ah->av.eth.s_mac, 0, ETH_ALEN);
+ eth_zero_addr(ah->av.eth.s_mac);
if (gid_attr.ndev) {
if (is_vlan_dev(gid_attr.ndev))
vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
@@ -104,6 +104,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
ah->av.eth.gid_index = mlx4_ib_gid_index_to_real_index(ibdev, ah_attr->port_num, ah_attr->grh.sgid_index);
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
+ ah->av.eth.hop_limit = ah_attr->grh.hop_limit;
if (ah_attr->static_rate) {
ah->av.eth.stat_rate = ah_attr->static_rate + MLX4_STAT_RATE_OFFSET;
while (ah->av.eth.stat_rate > IB_RATE_2_5_GBPS + MLX4_STAT_RATE_OFFSET &&
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index b88fc8f5ab18..9f8b516eb2b0 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -811,9 +811,6 @@ repoll:
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case MLX4_OPCODE_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case MLX4_OPCODE_LSO:
wc->opcode = IB_WC_LSO;
break;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 97d6878f9938..1c7ab6cabbb8 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -154,9 +154,9 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_n
return dev;
}
-static int mlx4_ib_update_gids(struct gid_entry *gids,
- struct mlx4_ib_dev *ibdev,
- u8 port_num)
+static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
{
struct mlx4_cmd_mailbox *mailbox;
int err;
@@ -187,6 +187,63 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return err;
}
+static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ int err;
+ struct mlx4_dev *dev = ibdev->dev;
+ int i;
+ struct {
+ union ib_gid gid;
+ __be32 rsrvd1[2];
+ __be16 rsrvd2;
+ u8 type;
+ u8 version;
+ __be32 rsrvd3;
+ } *gid_tbl;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return -ENOMEM;
+
+ gid_tbl = mailbox->buf;
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
+ memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
+ if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ gid_tbl[i].version = 2;
+ if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
+ gid_tbl[i].type = 1;
+ else
+ memset(&gid_tbl[i].gid, 0, 12);
+ }
+ }
+
+ err = mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+ if (mlx4_is_bonded(dev))
+ err += mlx4_cmd(dev, mailbox->dma,
+ MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
+ 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+ MLX4_CMD_WRAPPED);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return err;
+}
+
+static int mlx4_ib_update_gids(struct gid_entry *gids,
+ struct mlx4_ib_dev *ibdev,
+ u8 port_num)
+{
+ if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
+
+ return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
+}
+
static int mlx4_ib_add_gid(struct ib_device *device,
u8 port_num,
unsigned int index,
@@ -215,7 +272,8 @@ static int mlx4_ib_add_gid(struct ib_device *device,
port_gid_table = &iboe->gids[port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
- if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) {
+ if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
+ (port_gid_table->gids[i].gid_type == attr->gid_type)) {
found = i;
break;
}
@@ -233,6 +291,7 @@ static int mlx4_ib_add_gid(struct ib_device *device,
} else {
*context = port_gid_table->gids[free].ctx;
memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+ port_gid_table->gids[free].gid_type = attr->gid_type;
port_gid_table->gids[free].ctx->real_index = free;
port_gid_table->gids[free].ctx->refcount = 1;
hw_update = 1;
@@ -248,8 +307,10 @@ static int mlx4_ib_add_gid(struct ib_device *device,
if (!gids) {
ret = -ENOMEM;
} else {
- for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
+ for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
+ gids[i].gid_type = port_gid_table->gids[i].gid_type;
+ }
}
}
spin_unlock_bh(&iboe->lock);
@@ -325,6 +386,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
int i;
int ret;
unsigned long flags;
+ struct ib_gid_attr attr;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
@@ -335,10 +397,13 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
return index;
- ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
+ ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
if (ret)
return ret;
+ if (attr.ndev)
+ dev_put(attr.ndev);
+
if (!memcmp(&gid, &zgid, sizeof(gid)))
return -EINVAL;
@@ -346,7 +411,8 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
port_gid_table = &iboe->gids[port_num - 1];
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
- if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) {
+ if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
+ attr.gid_type == port_gid_table->gids[i].gid_type) {
ctx = port_gid_table->gids[i].ctx;
break;
}
@@ -2119,6 +2185,7 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
+ struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
err = mlx4_ib_query_port(ibdev, port_num, &attr);
@@ -2128,10 +2195,15 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND)
+ if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
- else
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ } else {
+ if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ }
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
@@ -2283,7 +2355,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
- ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw;
ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
ibdev->ib_dev.uverbs_cmd_mask |=
@@ -2423,7 +2494,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
if (!iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
@@ -2432,6 +2504,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_notif;
}
}
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
+ if (err) {
+ goto err_notif;
+ }
+ }
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 1caa11edac03..52ce7b000044 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -177,11 +177,18 @@ struct mlx4_ib_wq {
unsigned tail;
};
+enum {
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI = IB_QP_CREATE_RESERVED_START
+};
+
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
+
+ /* Mellanox specific flags start from IB_QP_CREATE_RESERVED_START */
+ MLX4_IB_ROCE_V2_GSI_QP = MLX4_IB_QP_CREATE_ROCE_V2_GSI,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
@@ -478,6 +485,7 @@ struct gid_cache_context {
struct gid_entry {
union ib_gid gid;
+ enum ib_gid_type gid_type;
struct gid_cache_context *ctx;
};
@@ -704,8 +712,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_udata *udata);
int mlx4_ib_dereg_mr(struct ib_mr *mr);
struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int mlx4_ib_dealloc_mw(struct ib_mw *mw);
struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
enum ib_mr_type mr_type,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 4d1e1c632603..242b94ec105b 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -366,28 +366,6 @@ err_free:
return ERR_PTR(err);
}
-int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- struct ib_bind_mw_wr wr;
- struct ib_send_wr *bad_wr;
- int ret;
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.opcode = IB_WR_BIND_MW;
- wr.wr.wr_id = mw_bind->wr_id;
- wr.wr.send_flags = mw_bind->send_flags;
- wr.mw = mw;
- wr.bind_info = mw_bind->bind_info;
- wr.rkey = ib_inc_rkey(mw->rkey);
-
- ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
- if (!ret)
- mw->rkey = wr.rkey;
-
- return ret;
-}
-
int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
{
struct mlx4_ib_mw *mw = to_mmw(ibmw);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 13eaaf45288f..bc5536f00b6c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -32,6 +32,8 @@
*/
#include <linux/log2.h>
+#include <linux/etherdevice.h>
+#include <net/ip.h>
#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
@@ -85,6 +87,7 @@ struct mlx4_ib_sqp {
u32 send_psn;
struct ib_ud_header ud_header;
u8 header_buf[MLX4_IB_UD_HEADER_SIZE];
+ struct ib_qp *roce_v2_gsi;
};
enum {
@@ -115,7 +118,6 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR),
[IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
- [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW),
};
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -154,7 +156,10 @@ static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
}
}
- return proxy_sqp;
+ if (proxy_sqp)
+ return 1;
+
+ return !!(qp->flags & MLX4_IB_ROCE_V2_GSI_QP);
}
/* used for INIT/CLOSE port logic */
@@ -796,11 +801,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (err)
goto err_mtt;
- qp->sq.wrid = kmalloc(qp->sq.wqe_cnt * sizeof(u64), gfp);
+ qp->sq.wrid = kmalloc_array(qp->sq.wqe_cnt, sizeof(u64),
+ gfp | __GFP_NOWARN);
if (!qp->sq.wrid)
qp->sq.wrid = __vmalloc(qp->sq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
- qp->rq.wrid = kmalloc(qp->rq.wqe_cnt * sizeof(u64), gfp);
+ qp->rq.wrid = kmalloc_array(qp->rq.wqe_cnt, sizeof(u64),
+ gfp | __GFP_NOWARN);
if (!qp->rq.wrid)
qp->rq.wrid = __vmalloc(qp->rq.wqe_cnt * sizeof(u64),
gfp, PAGE_KERNEL);
@@ -1099,9 +1106,9 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
return dev->dev->caps.qp1_proxy[attr->port_num - 1];
}
-struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
- struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
{
struct mlx4_ib_qp *qp = NULL;
int err;
@@ -1120,6 +1127,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
MLX4_IB_SRIOV_TUNNEL_QP |
MLX4_IB_SRIOV_SQP |
MLX4_IB_QP_NETIF |
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI |
MLX4_IB_QP_CREATE_USE_GFP_NOIO))
return ERR_PTR(-EINVAL);
@@ -1128,15 +1136,21 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
- if (init_attr->create_flags &&
- ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
- ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
- MLX4_IB_QP_CREATE_USE_GFP_NOIO |
- MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
- init_attr->qp_type != IB_QPT_UD) ||
- ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
- init_attr->qp_type > IB_QPT_GSI)))
- return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags) {
+ if (udata && init_attr->create_flags & ~(sup_u_create_flags))
+ return ERR_PTR(-EINVAL);
+
+ if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+ MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+ MLX4_IB_QP_CREATE_ROCE_V2_GSI |
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) &&
+ init_attr->qp_type != IB_QPT_UD) ||
+ (init_attr->create_flags & MLX4_IB_SRIOV_SQP &&
+ init_attr->qp_type > IB_QPT_GSI) ||
+ (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI &&
+ init_attr->qp_type != IB_QPT_GSI))
+ return ERR_PTR(-EINVAL);
+ }
switch (init_attr->qp_type) {
case IB_QPT_XRC_TGT:
@@ -1173,19 +1187,29 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
case IB_QPT_SMI:
case IB_QPT_GSI:
{
+ int sqpn;
+
/* Userspace is not allowed to create special QPs: */
if (udata)
return ERR_PTR(-EINVAL);
+ if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
+ int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+
+ if (res)
+ return ERR_PTR(res);
+ } else {
+ sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
+ }
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
- get_sqp_num(to_mdev(pd->device), init_attr),
+ sqpn,
&qp, gfp);
if (err)
return ERR_PTR(err);
qp->port = init_attr->port_num;
- qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
-
+ qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 :
+ init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI ? sqpn : 1;
break;
}
default:
@@ -1196,7 +1220,41 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
return &qp->ibqp;
}
-int mlx4_ib_destroy_qp(struct ib_qp *qp)
+struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata) {
+ struct ib_device *device = pd ? pd->device : init_attr->xrcd->device;
+ struct ib_qp *ibqp;
+ struct mlx4_ib_dev *dev = to_mdev(device);
+
+ ibqp = _mlx4_ib_create_qp(pd, init_attr, udata);
+
+ if (!IS_ERR(ibqp) &&
+ (init_attr->qp_type == IB_QPT_GSI) &&
+ !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) {
+ struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp)));
+ int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num);
+
+ if (is_eth &&
+ dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ init_attr->create_flags |= MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+ sqp->roce_v2_gsi = ib_create_qp(pd, init_attr);
+
+ if (IS_ERR(sqp->roce_v2_gsi)) {
+ pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi));
+ sqp->roce_v2_gsi = NULL;
+ } else {
+ sqp = to_msqp(to_mqp(sqp->roce_v2_gsi));
+ sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP;
+ }
+
+ init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI;
+ }
+ }
+ return ibqp;
+}
+
+static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
@@ -1225,6 +1283,20 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
return 0;
}
+int mlx4_ib_destroy_qp(struct ib_qp *qp)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(qp);
+
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+
+ if (sqp->roce_v2_gsi)
+ ib_destroy_qp(sqp->roce_v2_gsi);
+ }
+
+ return _mlx4_ib_destroy_qp(qp);
+}
+
static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
{
switch (type) {
@@ -1507,6 +1579,24 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
return 0;
}
+enum {
+ MLX4_QPC_ROCE_MODE_1 = 0,
+ MLX4_QPC_ROCE_MODE_2 = 2,
+ MLX4_QPC_ROCE_MODE_UNDEFINED = 0xff
+};
+
+static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
+{
+ switch (gid_type) {
+ case IB_GID_TYPE_ROCE:
+ return MLX4_QPC_ROCE_MODE_1;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ return MLX4_QPC_ROCE_MODE_2;
+ default:
+ return MLX4_QPC_ROCE_MODE_UNDEFINED;
+ }
+}
+
static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1633,6 +1723,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
mlx4_ib_steer_qp_reg(dev, qp, 1);
steer_qp = 1;
}
+
+ if (ibqp->qp_type == IB_QPT_GSI) {
+ enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
+ IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
+
+ context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+ }
}
if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1650,9 +1748,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
u16 vlan = 0xffff;
u8 smac[ETH_ALEN];
int status = 0;
+ int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+ attr->ah_attr.ah_flags & IB_AH_GRH;
- if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
- attr->ah_attr.ah_flags & IB_AH_GRH) {
+ if (is_eth) {
int index = attr->ah_attr.grh.sgid_index;
status = ib_get_cached_gid(ibqp->device, port_num,
@@ -1674,6 +1773,18 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
MLX4_QP_OPTPAR_SCHED_QUEUE);
+
+ if (is_eth &&
+ (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+
+ if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
+ err = -EINVAL;
+ goto out;
+ }
+ context->rlkey_roce_mode |= (qpc_roce_mode << 6);
+ }
+
}
if (attr_mask & IB_QP_TIMEOUT) {
@@ -1845,7 +1956,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
sqd_event = 0;
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
- context->rlkey |= (1 << 4);
+ context->rlkey_roce_mode |= (1 << 4);
/*
* Before passing a kernel QP to the HW, make sure that the
@@ -2022,8 +2133,8 @@ out:
return err;
}
-int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
- int attr_mask, struct ib_udata *udata)
+static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
{
struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
struct mlx4_ib_qp *qp = to_mqp(ibqp);
@@ -2126,6 +2237,27 @@ out:
return err;
}
+int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ int ret;
+
+ ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata);
+
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(mqp);
+ int err = 0;
+
+ if (sqp->roce_v2_gsi)
+ err = ib_modify_qp(sqp->roce_v2_gsi, attr, attr_mask);
+ if (err)
+ pr_err("Failed to modify GSI QP for RoCEv2 (%d)\n",
+ err);
+ }
+ return ret;
+}
+
static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
{
int i;
@@ -2168,7 +2300,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
send_size += sizeof (struct mlx4_ib_tunnel_header);
- ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
+ ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header);
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
sqp->ud_header.lrh.service_level =
@@ -2252,16 +2384,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
return 0;
}
-static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
-{
- int i;
-
- for (i = ETH_ALEN; i; i--) {
- dst_mac[i - 1] = src_mac & 0xff;
- src_mac >>= 8;
- }
-}
-
+#define MLX4_ROCEV2_QP1_SPORT 0xC000
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
@@ -2281,6 +2404,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
bool is_eth;
bool is_vlan = false;
bool is_grh;
+ bool is_udp = false;
+ int ip_version = 0;
send_size = 0;
for (i = 0; i < wr->wr.num_sge; ++i)
@@ -2289,6 +2414,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
+ struct ib_gid_attr gid_attr;
+
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@@ -2302,19 +2429,35 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &sgid,
- NULL);
- if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
- err = -ENOENT;
- if (err)
+ &gid_attr);
+ if (!err) {
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ if (!memcmp(&sgid, &zgid, sizeof(sgid)))
+ err = -ENOENT;
+ }
+ if (!err) {
+ is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ if (is_udp) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
+ ip_version = 4;
+ else
+ ip_version = 6;
+ is_grh = false;
+ }
+ } else {
return err;
+ }
}
-
if (ah->av.eth.vlan != cpu_to_be16(0xffff)) {
vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
is_vlan = 1;
}
}
- ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
+ err = ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh,
+ ip_version, is_udp, 0, &sqp->ud_header);
+ if (err)
+ return err;
if (!is_eth) {
sqp->ud_header.lrh.service_level =
@@ -2323,7 +2466,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
sqp->ud_header.lrh.source_lid = cpu_to_be16(ah->av.ib.g_slid & 0x7f);
}
- if (is_grh) {
+ if (is_grh || (ip_version == 6)) {
sqp->ud_header.grh.traffic_class =
(be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
sqp->ud_header.grh.flow_label =
@@ -2352,6 +2495,25 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
ah->av.ib.dgid, 16);
}
+ if (ip_version == 4) {
+ sqp->ud_header.ip4.tos =
+ (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 20) & 0xff;
+ sqp->ud_header.ip4.id = 0;
+ sqp->ud_header.ip4.frag_off = htons(IP_DF);
+ sqp->ud_header.ip4.ttl = ah->av.eth.hop_limit;
+
+ memcpy(&sqp->ud_header.ip4.saddr,
+ sgid.raw + 12, 4);
+ memcpy(&sqp->ud_header.ip4.daddr, ah->av.ib.dgid + 12, 4);
+ sqp->ud_header.ip4.check = ib_ud_ip4_csum(&sqp->ud_header);
+ }
+
+ if (is_udp) {
+ sqp->ud_header.udp.dport = htons(ROCE_V2_UDP_DPORT);
+ sqp->ud_header.udp.sport = htons(MLX4_ROCEV2_QP1_SPORT);
+ sqp->ud_header.udp.csum = 0;
+ }
+
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
if (!is_eth) {
@@ -2380,34 +2542,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
if (is_eth) {
struct in6_addr in6;
-
+ u16 ether_type;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
+ ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
+ (ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
+
mlx->sched_prio = cpu_to_be16(pcp);
+ ether_addr_copy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac);
memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
- /* FIXME: cache smac value? */
memcpy(&ctrl->srcrb_flags16[0], ah->av.eth.mac, 2);
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
memcpy(&in6, sgid.raw, sizeof(in6));
- if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
- u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
- u8 smac[ETH_ALEN];
-
- mlx4_u64_to_smac(smac, mac);
- memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
- } else {
- /* use the src mac of the tunnel */
- memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
- }
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
if (!is_vlan) {
- sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.eth.type = cpu_to_be16(ether_type);
} else {
- sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
+ sqp->ud_header.vlan.type = cpu_to_be16(ether_type);
sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
}
} else {
@@ -2528,25 +2683,6 @@ static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
fseg->reserved[1] = 0;
}
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
- struct ib_bind_mw_wr *wr)
-{
- bseg->flags1 =
- convert_access(wr->bind_info.mw_access_flags) &
- cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ |
- MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
- MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
- bseg->flags2 = 0;
- if (wr->mw->type == IB_MW_TYPE_2)
- bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
- if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
- bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
- bseg->new_rkey = cpu_to_be32(wr->rkey);
- bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
- bseg->addr = cpu_to_be64(wr->bind_info.addr);
- bseg->length = cpu_to_be64(wr->bind_info.length);
-}
-
static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
{
memset(iseg, 0, sizeof(*iseg));
@@ -2766,6 +2902,29 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int i;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
+ if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) {
+ struct mlx4_ib_sqp *sqp = to_msqp(qp);
+
+ if (sqp->roce_v2_gsi) {
+ struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
+ struct ib_gid_attr gid_attr;
+ union ib_gid gid;
+
+ if (!ib_get_cached_gid(ibqp->device,
+ be32_to_cpu(ah->av.ib.port_pd) >> 24,
+ ah->av.ib.gid_index, &gid,
+ &gid_attr)) {
+ if (gid_attr.ndev)
+ dev_put(gid_attr.ndev);
+ qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+ to_mqp(sqp->roce_v2_gsi) : qp;
+ } else {
+ pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
+ ah->av.ib.gid_index);
+ }
+ }
+ }
+
spin_lock_irqsave(&qp->sq.lock, flags);
if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
err = -EIO;
@@ -2867,13 +3026,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
break;
- case IB_WR_BIND_MW:
- ctrl->srcrb_flags |=
- cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
- set_bind_seg(wqe, bind_mw_wr(wr));
- wqe += sizeof(struct mlx4_wqe_bind_seg);
- size += sizeof(struct mlx4_wqe_bind_seg) / 16;
- break;
default:
/* No extra segments required for sends */
break;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index c394376ebe06..0597f3eef5d0 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -171,7 +171,8 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
if (err)
goto err_mtt;
- srq->wrid = kmalloc(srq->msrq.max * sizeof (u64), GFP_KERNEL);
+ srq->wrid = kmalloc_array(srq->msrq.max, sizeof(u64),
+ GFP_KERNEL | __GFP_NOWARN);
if (!srq->wrid) {
srq->wrid = __vmalloc(srq->msrq.max * sizeof(u64),
GFP_KERNEL, PAGE_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 66080580e24d..745efa4cfc71 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -32,8 +32,10 @@
#include "mlx5_ib.h"
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
- struct mlx5_ib_ah *ah)
+static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ah *ah,
+ struct ib_ah_attr *ah_attr,
+ enum rdma_link_layer ll)
{
if (ah_attr->ah_flags & IB_AH_GRH) {
memcpy(ah->av.rgid, &ah_attr->grh.dgid, 16);
@@ -44,9 +46,20 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
ah->av.tclass = ah_attr->grh.traffic_class;
}
- ah->av.rlid = cpu_to_be16(ah_attr->dlid);
- ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
- ah->av.stat_rate_sl = (ah_attr->static_rate << 4) | (ah_attr->sl & 0xf);
+ ah->av.stat_rate_sl = (ah_attr->static_rate << 4);
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ memcpy(ah->av.rmac, ah_attr->dmac, sizeof(ah_attr->dmac));
+ ah->av.udp_sport =
+ mlx5_get_roce_udp_sport(dev,
+ ah_attr->port_num,
+ ah_attr->grh.sgid_index);
+ ah->av.stat_rate_sl |= (ah_attr->sl & 0x7) << 1;
+ } else {
+ ah->av.rlid = cpu_to_be16(ah_attr->dlid);
+ ah->av.fl_mlid = ah_attr->src_path_bits & 0x7f;
+ ah->av.stat_rate_sl |= (ah_attr->sl & 0xf);
+ }
return &ah->ibah;
}
@@ -54,12 +67,19 @@ struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct mlx5_ib_ah *ah;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ enum rdma_link_layer ll;
+
+ ll = pd->device->get_link_layer(pd->device, ah_attr->port_num);
+
+ if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
+ return ERR_PTR(-EINVAL);
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
- return create_ib_ah(ah_attr, ah); /* never fails */
+ return create_ib_ah(dev, ah, ah_attr, ll); /* never fails */
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 92ddae101ecc..fd1de31e0611 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -154,9 +154,6 @@ static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
wc->opcode = IB_WC_MASKED_FETCH_ADD;
wc->byte_len = 8;
break;
- case MLX5_OPCODE_BIND_MW:
- wc->opcode = IB_WC_BIND_MW;
- break;
case MLX5_OPCODE_UMR:
wc->opcode = get_umr_comp(wq, idx);
break;
@@ -171,6 +168,7 @@ enum {
static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_ib_qp *qp)
{
+ enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1);
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
struct mlx5_ib_srq *srq;
struct mlx5_ib_wq *wq;
@@ -236,6 +234,22 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
} else {
wc->pkey_index = 0;
}
+
+ if (ll != IB_LINK_LAYER_ETHERNET)
+ return;
+
+ switch (wc->sl & 0x3) {
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH:
+ wc->network_hdr_type = RDMA_NETWORK_IB;
+ break;
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6:
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+ break;
+ case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4:
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ break;
+ }
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
}
static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
@@ -760,12 +774,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
int eqn;
int err;
- if (attr->flags)
- return ERR_PTR(-EINVAL);
-
if (entries < 0)
return ERR_PTR(-EINVAL);
+ if (check_cq_create_flags(attr->flags))
+ return ERR_PTR(-EOPNOTSUPP);
+
entries = roundup_pow_of_two(entries + 1);
if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))
return ERR_PTR(-EINVAL);
@@ -779,6 +793,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
spin_lock_init(&cq->lock);
cq->resize_buf = NULL;
cq->resize_umem = NULL;
+ cq->create_flags = attr->flags;
if (context) {
err = create_cq_user(dev, udata, context, cq, entries,
@@ -796,6 +811,10 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
cq->cqe_size = cqe_size;
cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+
+ if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+ cqb->ctx.cqe_sz_flags |= (1 << 1);
+
cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
if (err)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b0ec175cc6ba..ec737e2287fe 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -40,6 +40,8 @@
#include <linux/io-mapping.h>
#include <linux/sched.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
#include <linux/mlx5/vport.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
@@ -66,12 +68,14 @@ static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
+enum {
+ MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
+};
+
static enum rdma_link_layer
-mlx5_ib_port_link_layer(struct ib_device *device)
+mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
{
- struct mlx5_ib_dev *dev = to_mdev(device);
-
- switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
+ switch (port_type_cap) {
case MLX5_CAP_PORT_TYPE_IB:
return IB_LINK_LAYER_INFINIBAND;
case MLX5_CAP_PORT_TYPE_ETH:
@@ -81,6 +85,202 @@ mlx5_ib_port_link_layer(struct ib_device *device)
}
}
+static enum rdma_link_layer
+mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
+
+ return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+}
+
+static int mlx5_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+ struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev,
+ roce.nb);
+
+ if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER))
+ return NOTIFY_DONE;
+
+ write_lock(&ibdev->roce.netdev_lock);
+ if (ndev->dev.parent == &ibdev->mdev->pdev->dev)
+ ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev;
+ write_unlock(&ibdev->roce.netdev_lock);
+
+ return NOTIFY_DONE;
+}
+
+static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
+ u8 port_num)
+{
+ struct mlx5_ib_dev *ibdev = to_mdev(device);
+ struct net_device *ndev;
+
+ /* Ensure ndev does not disappear before we invoke dev_hold()
+ */
+ read_lock(&ibdev->roce.netdev_lock);
+ ndev = ibdev->roce.netdev;
+ if (ndev)
+ dev_hold(ndev);
+ read_unlock(&ibdev->roce.netdev_lock);
+
+ return ndev;
+}
+
+static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
+ struct ib_port_attr *props)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ struct net_device *ndev;
+ enum ib_mtu ndev_ib_mtu;
+ u16 qkey_viol_cntr;
+
+ memset(props, 0, sizeof(*props));
+
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
+
+ props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
+ roce_address_table_size);
+ props->max_mtu = IB_MTU_4096;
+ props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
+ props->pkey_tbl_len = 1;
+ props->state = IB_PORT_DOWN;
+ props->phys_state = 3;
+
+ mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr);
+ props->qkey_viol_cntr = qkey_viol_cntr;
+
+ ndev = mlx5_ib_get_netdev(device, port_num);
+ if (!ndev)
+ return 0;
+
+ if (netif_running(ndev) && netif_carrier_ok(ndev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = 5;
+ }
+
+ ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
+
+ dev_put(ndev);
+
+ props->active_mtu = min(props->max_mtu, ndev_ib_mtu);
+
+ props->active_width = IB_WIDTH_4X; /* TODO */
+ props->active_speed = IB_SPEED_QDR; /* TODO */
+
+ return 0;
+}
+
+static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void *mlx5_addr)
+{
+#define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
+ char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+ source_l3_address);
+ void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr,
+ source_mac_47_32);
+
+ if (!gid)
+ return;
+
+ ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr);
+
+ if (is_vlan_dev(attr->ndev)) {
+ MLX5_SET_RA(mlx5_addr, vlan_valid, 1);
+ MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev));
+ }
+
+ switch (attr->gid_type) {
+ case IB_GID_TYPE_IB:
+ MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1);
+ break;
+ case IB_GID_TYPE_ROCE_UDP_ENCAP:
+ MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2);
+ break;
+
+ default:
+ WARN_ON(true);
+ }
+
+ if (attr->gid_type != IB_GID_TYPE_IB) {
+ if (ipv6_addr_v4mapped((void *)gid))
+ MLX5_SET_RA(mlx5_addr, roce_l3_type,
+ MLX5_ROCE_L3_TYPE_IPV4);
+ else
+ MLX5_SET_RA(mlx5_addr, roce_l3_type,
+ MLX5_ROCE_L3_TYPE_IPV6);
+ }
+
+ if ((attr->gid_type == IB_GID_TYPE_IB) ||
+ !ipv6_addr_v4mapped((void *)gid))
+ memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid));
+ else
+ memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4);
+}
+
+static int set_roce_addr(struct ib_device *device, u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(device);
+ u32 in[MLX5_ST_SZ_DW(set_roce_address_in)];
+ u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+ void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
+
+ if (ll != IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
+
+ memset(in, 0, sizeof(in));
+
+ ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
+
+ MLX5_SET(set_roce_address_in, in, roce_address_index, index);
+ MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
+
+ memset(out, 0, sizeof(out));
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ __always_unused void **context)
+{
+ return set_roce_addr(device, port_num, index, gid, attr);
+}
+
+static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, __always_unused void **context)
+{
+ return set_roce_addr(device, port_num, index, NULL, NULL);
+}
+
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+ int index)
+{
+ struct ib_gid_attr attr;
+ union ib_gid gid;
+
+ if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
+ return 0;
+
+ if (!attr.ndev)
+ return 0;
+
+ dev_put(attr.ndev);
+
+ if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return 0;
+
+ return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
+}
+
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
return !dev->mdev->issi;
@@ -97,13 +297,35 @@ static int mlx5_get_vport_access_method(struct ib_device *ibdev)
if (mlx5_use_mad_ifc(to_mdev(ibdev)))
return MLX5_VPORT_ACCESS_METHOD_MAD;
- if (mlx5_ib_port_link_layer(ibdev) ==
+ if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET)
return MLX5_VPORT_ACCESS_METHOD_NIC;
return MLX5_VPORT_ACCESS_METHOD_HCA;
}
+static void get_atomic_caps(struct mlx5_ib_dev *dev,
+ struct ib_device_attr *props)
+{
+ u8 tmp;
+ u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+ u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+ u8 atomic_req_8B_endianness_mode =
+ MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode);
+
+ /* Check if HW supports 8 bytes standard atomic operations and capable
+ * of host endianness respond
+ */
+ tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
+ if (((atomic_operations & tmp) == tmp) &&
+ (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
+ (atomic_req_8B_endianness_mode)) {
+ props->atomic_cap = IB_ATOMIC_HCA;
+ } else {
+ props->atomic_cap = IB_ATOMIC_NONE;
+ }
+}
+
static int mlx5_query_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
@@ -119,13 +341,21 @@ static int mlx5_query_system_image_guid(struct ib_device *ibdev,
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
- if (!err)
- *sys_image_guid = cpu_to_be64(tmp);
- return err;
+ break;
+
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
+ break;
default:
return -EINVAL;
}
+
+ if (!err)
+ *sys_image_guid = cpu_to_be64(tmp);
+
+ return err;
+
}
static int mlx5_query_max_pkeys(struct ib_device *ibdev,
@@ -179,13 +409,20 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
case MLX5_VPORT_ACCESS_METHOD_HCA:
err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
- if (!err)
- *node_guid = cpu_to_be64(tmp);
- return err;
+ break;
+
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
+ break;
default:
return -EINVAL;
}
+
+ if (!err)
+ *node_guid = cpu_to_be64(tmp);
+
+ return err;
}
struct mlx5_reg_node_desc {
@@ -263,6 +500,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+ if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ (MLX5_CAP_ETH(dev->mdev, csum_cap)))
+ props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
props->vendor_part_id = mdev->pdev->device;
props->hw_ver = mdev->pdev->revision;
@@ -278,7 +519,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = props->max_sge;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
- props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_eq_sz)) - 1;
+ props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
@@ -289,13 +530,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
props->max_srq_sge = max_rq_sg - 1;
props->max_fast_reg_page_list_len = (unsigned int)-1;
- props->atomic_cap = IB_ATOMIC_NONE;
+ get_atomic_caps(dev, props);
props->masked_atomic_cap = IB_ATOMIC_NONE;
props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+ props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
+ props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
if (MLX5_CAP_GEN(mdev, pg))
@@ -303,6 +546,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->odp_caps = dev->odp_caps;
#endif
+ if (MLX5_CAP_GEN(mdev, cd))
+ props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
+
return 0;
}
@@ -483,6 +729,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
case MLX5_VPORT_ACCESS_METHOD_HCA:
return mlx5_query_hca_port(ibdev, port, props);
+ case MLX5_VPORT_ACCESS_METHOD_NIC:
+ return mlx5_query_port_roce(ibdev, port, props);
+
default:
return -EINVAL;
}
@@ -583,8 +832,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
- struct mlx5_ib_alloc_ucontext_req_v2 req;
- struct mlx5_ib_alloc_ucontext_resp resp;
+ struct mlx5_ib_alloc_ucontext_req_v2 req = {};
+ struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_ib_ucontext *context;
struct mlx5_uuar_info *uuari;
struct mlx5_uar *uars;
@@ -599,20 +848,22 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
- memset(&req, 0, sizeof(req));
+ if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr))
+ return ERR_PTR(-EINVAL);
+
reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
ver = 0;
- else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
+ else if (reqlen >= sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
ver = 2;
else
return ERR_PTR(-EINVAL);
- err = ib_copy_from_udata(&req, udata, reqlen);
+ err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req)));
if (err)
return ERR_PTR(err);
- if (req.flags || req.reserved)
+ if (req.flags)
return ERR_PTR(-EINVAL);
if (req.total_num_uuars > MLX5_MAX_UUARS)
@@ -621,6 +872,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (req.total_num_uuars == 0)
return ERR_PTR(-EINVAL);
+ if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (reqlen > sizeof(req) &&
+ !ib_is_udata_cleared(udata, sizeof(req),
+ reqlen - sizeof(req)))
+ return ERR_PTR(-EOPNOTSUPP);
+
req.total_num_uuars = ALIGN(req.total_num_uuars,
MLX5_NON_FP_BF_REGS_PER_PAGE);
if (req.num_low_latency_uuars > req.total_num_uuars - 1)
@@ -636,6 +895,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ resp.cqe_version = min_t(__u8,
+ (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
+ req.max_cqe_version);
+ resp.response_length = min(offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length), udata->outlen);
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
@@ -681,22 +945,49 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
+ err = mlx5_core_alloc_transport_domain(dev->mdev,
+ &context->tdn);
+ if (err)
+ goto out_uars;
+ }
+
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
resp.tot_uuars = req.total_num_uuars;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
- err = ib_copy_to_udata(udata, &resp,
- sizeof(resp) - sizeof(resp.reserved));
+
+ if (field_avail(typeof(resp), cqe_version, udata->outlen))
+ resp.response_length += sizeof(resp.cqe_version);
+
+ if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+ resp.comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp.hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg, internal_timer_h) %
+ PAGE_SIZE;
+ resp.response_length += sizeof(resp.hca_core_clock_offset) +
+ sizeof(resp.reserved2) +
+ sizeof(resp.reserved3);
+ }
+
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
- goto out_uars;
+ goto out_td;
uuari->ver = ver;
uuari->num_low_latency_uuars = req.num_low_latency_uuars;
uuari->uars = uars;
uuari->num_uars = num_uars;
+ context->cqe_version = resp.cqe_version;
+
return &context->ibucontext;
+out_td:
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
out_uars:
for (i--; i >= 0; i--)
mlx5_cmd_free_uar(dev->mdev, uars[i].index);
@@ -721,6 +1012,9 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct mlx5_uuar_info *uuari = &context->uuari;
int i;
+ if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+
for (i = 0; i < uuari->num_uars; i++) {
if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
@@ -790,6 +1084,30 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
return -ENOSYS;
+ case MLX5_IB_MMAP_CORE_CLOCK:
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+
+ /* Don't expose to user-space information it shouldn't have */
+ if (PAGE_SIZE > 4096)
+ return -EOPNOTSUPP;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ pfn = (dev->mdev->iseg_base +
+ offsetof(struct mlx5_init_seg, internal_timer_h)) >>
+ PAGE_SHIFT;
+ if (io_remap_pfn_range(vma, vma->vm_start, pfn,
+ PAGE_SIZE, vma->vm_page_prot))
+ return -EAGAIN;
+
+ mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
+ vma->vm_start,
+ (unsigned long long)pfn << PAGE_SHIFT);
+ break;
+
default:
return -EINVAL;
}
@@ -1758,6 +2076,32 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
mlx5_ib_dealloc_pd(devr->p0);
}
+static u32 get_core_cap_flags(struct ib_device *ibdev)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
+ u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
+ u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
+ u32 ret = 0;
+
+ if (ll == IB_LINK_LAYER_INFINIBAND)
+ return RDMA_CORE_PORT_IBA_IB;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
+ return 0;
+
+ if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
+ return 0;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE;
+
+ if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
+ ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ return ret;
+}
+
static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
@@ -1770,20 +2114,50 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
+static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+{
+ int err;
+
+ dev->roce.nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier(&dev->roce.nb);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ goto err_unregister_netdevice_notifier;
+
+ return 0;
+
+err_unregister_netdevice_notifier:
+ unregister_netdevice_notifier(&dev->roce.nb);
+ return err;
+}
+
+static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+{
+ mlx5_nic_vport_disable_roce(dev->mdev);
+ unregister_netdevice_notifier(&dev->roce.nb);
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
+ enum rdma_link_layer ll;
+ int port_type_cap;
int err;
int i;
- /* don't create IB instance over Eth ports, no RoCE yet! */
- if (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH)
+ port_type_cap = MLX5_CAP_GEN(mdev, port_type);
+ ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
+
+ if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
return NULL;
printk_once(KERN_INFO "%s", mlx5_version);
@@ -1794,6 +2168,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->mdev = mdev;
+ rwlock_init(&dev->roce.netdev_lock);
err = get_port_caps(dev);
if (err)
goto err_dealloc;
@@ -1843,7 +2218,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
+ dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
dev->ib_dev.query_gid = mlx5_ib_query_gid;
+ dev->ib_dev.add_gid = mlx5_ib_add_gid;
+ dev->ib_dev.del_gid = mlx5_ib_del_gid;
dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
dev->ib_dev.modify_device = mlx5_ib_modify_device;
dev->ib_dev.modify_port = mlx5_ib_modify_port;
@@ -1893,7 +2273,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
- if (mlx5_ib_port_link_layer(&dev->ib_dev) ==
+ if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
dev->ib_dev.create_flow = mlx5_ib_create_flow;
dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
@@ -1908,9 +2288,15 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ err = mlx5_enable_roce(dev);
+ if (err)
+ goto err_dealloc;
+ }
+
err = create_dev_resources(&dev->devr);
if (err)
- goto err_dealloc;
+ goto err_disable_roce;
err = mlx5_ib_odp_init_one(dev);
if (err)
@@ -1947,6 +2333,10 @@ err_odp:
err_rsrc:
destroy_dev_resources(&dev->devr);
+err_disable_roce:
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ mlx5_disable_roce(dev);
+
err_dealloc:
ib_dealloc_device((struct ib_device *)dev);
@@ -1956,11 +2346,14 @@ err_dealloc:
static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
{
struct mlx5_ib_dev *dev = context;
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
ib_unregister_device(&dev->ib_dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
+ if (ll == IB_LINK_LAYER_ETHERNET)
+ mlx5_disable_roce(dev);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 1474cccd1e0f..d2b9737baa36 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -42,6 +42,7 @@
#include <linux/mlx5/qp.h>
#include <linux/mlx5/srq.h>
#include <linux/types.h>
+#include <linux/mlx5/transobj.h>
#define mlx5_ib_dbg(dev, format, arg...) \
pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
@@ -55,6 +56,11 @@ pr_err("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
__LINE__, current->pid, ##arg)
+#define field_avail(type, fld, sz) (offsetof(type, fld) + \
+ sizeof(((type *)0)->fld) <= (sz))
+#define MLX5_IB_DEFAULT_UIDX 0xffffff
+#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
+
enum {
MLX5_IB_MMAP_CMD_SHIFT = 8,
MLX5_IB_MMAP_CMD_MASK = 0xff,
@@ -62,7 +68,9 @@ enum {
enum mlx5_ib_mmap_cmd {
MLX5_IB_MMAP_REGULAR_PAGE = 0,
- MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1, /* always last */
+ MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES = 1,
+ /* 5 is chosen in order to be compatible with old versions of libmlx5 */
+ MLX5_IB_MMAP_CORE_CLOCK = 5,
};
enum {
@@ -85,6 +93,15 @@ enum mlx5_ib_mad_ifc_flags {
MLX5_MAD_IFC_NET_VIEW = 4,
};
+enum {
+ MLX5_CROSS_CHANNEL_UUAR = 0,
+};
+
+enum {
+ MLX5_CQE_VERSION_V0,
+ MLX5_CQE_VERSION_V1,
+};
+
struct mlx5_ib_ucontext {
struct ib_ucontext ibucontext;
struct list_head db_page_list;
@@ -93,6 +110,9 @@ struct mlx5_ib_ucontext {
*/
struct mutex db_page_mutex;
struct mlx5_uuar_info uuari;
+ u8 cqe_version;
+ /* Transport Domain number */
+ u32 tdn;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -201,47 +221,70 @@ struct mlx5_ib_pfault {
struct mlx5_pagefault mpfault;
};
+struct mlx5_ib_ubuffer {
+ struct ib_umem *umem;
+ int buf_size;
+ u64 buf_addr;
+};
+
+struct mlx5_ib_qp_base {
+ struct mlx5_ib_qp *container_mibqp;
+ struct mlx5_core_qp mqp;
+ struct mlx5_ib_ubuffer ubuffer;
+};
+
+struct mlx5_ib_qp_trans {
+ struct mlx5_ib_qp_base base;
+ u16 xrcdn;
+ u8 alt_port;
+ u8 atomic_rd_en;
+ u8 resp_depth;
+};
+
struct mlx5_ib_rq {
+ struct mlx5_ib_qp_base base;
+ struct mlx5_ib_wq *rq;
+ struct mlx5_ib_ubuffer ubuffer;
+ struct mlx5_db *doorbell;
u32 tirn;
+ u8 state;
+};
+
+struct mlx5_ib_sq {
+ struct mlx5_ib_qp_base base;
+ struct mlx5_ib_wq *sq;
+ struct mlx5_ib_ubuffer ubuffer;
+ struct mlx5_db *doorbell;
+ u32 tisn;
+ u8 state;
};
struct mlx5_ib_raw_packet_qp {
+ struct mlx5_ib_sq sq;
struct mlx5_ib_rq rq;
};
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
- struct mlx5_core_qp mqp;
- struct mlx5_ib_raw_packet_qp raw_packet_qp;
+ struct mlx5_ib_qp_trans trans_qp;
+ struct mlx5_ib_raw_packet_qp raw_packet_qp;
};
-
struct mlx5_buf buf;
struct mlx5_db db;
struct mlx5_ib_wq rq;
- u32 doorbell_qpn;
u8 sq_signal_bits;
u8 fm_cache;
- int sq_max_wqes_per_wr;
- int sq_spare_wqes;
struct mlx5_ib_wq sq;
- struct ib_umem *umem;
- int buf_size;
-
/* serialize qp state modifications
*/
struct mutex mutex;
- u16 xrcdn;
u32 flags;
u8 port;
- u8 alt_port;
- u8 atomic_rd_en;
- u8 resp_depth;
u8 state;
- int mlx_type;
int wq_sig;
int scat_cqe;
int max_inline_data;
@@ -284,6 +327,9 @@ struct mlx5_ib_cq_buf {
enum mlx5_ib_qp_flags {
MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 0,
MLX5_IB_QP_SIGNATURE_HANDLING = 1 << 1,
+ MLX5_IB_QP_CROSS_CHANNEL = 1 << 2,
+ MLX5_IB_QP_MANAGED_SEND = 1 << 3,
+ MLX5_IB_QP_MANAGED_RECV = 1 << 4,
};
struct mlx5_umr_wr {
@@ -326,6 +372,7 @@ struct mlx5_ib_cq {
struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
+ u32 create_flags;
};
struct mlx5_ib_srq {
@@ -449,9 +496,19 @@ struct mlx5_ib_resources {
struct ib_srq *s1;
};
+struct mlx5_roce {
+ /* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
+ * netdev pointer
+ */
+ rwlock_t netdev_lock;
+ struct net_device *netdev;
+ struct notifier_block nb;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
+ struct mlx5_roce roce;
MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
int num_ports;
/* serialize update of capability mask
@@ -498,7 +555,7 @@ static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
{
- return container_of(mqp, struct mlx5_ib_qp, mqp);
+ return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
}
static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
@@ -550,8 +607,6 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-struct ib_ah *create_ib_ah(struct ib_ah_attr *ah_attr,
- struct mlx5_ib_ah *ah);
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx5_ib_destroy_ah(struct ib_ah *ah);
@@ -578,7 +633,8 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length);
+ void *buffer, u32 length,
+ struct mlx5_ib_qp_base *base);
struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
const struct ib_cq_init_attr *attr,
struct ib_ucontext *context,
@@ -680,6 +736,9 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
+ int index);
+
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -705,4 +764,28 @@ static inline int is_qp1(enum ib_qp_type qp_type)
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+static inline u32 check_cq_create_flags(u32 flags)
+{
+ /*
+ * It returns non-zero value for unsupported CQ
+ * create flags, otherwise it returns zero.
+ */
+ return (flags & ~(IB_CQ_FLAGS_IGNORE_OVERRUN |
+ IB_CQ_FLAGS_TIMESTAMP_COMPLETION));
+}
+
+static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx,
+ u32 *user_index)
+{
+ if (cqe_version) {
+ if ((cmd_uidx == MLX5_IB_DEFAULT_UIDX) ||
+ (cmd_uidx & ~MLX5_USER_ASSIGNED_UIDX_MASK))
+ return -EINVAL;
+ *user_index = cmd_uidx;
+ } else {
+ *user_index = MLX5_IB_DEFAULT_UIDX;
+ }
+
+ return 0;
+}
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index aa8391e75385..b8d76361a48d 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -153,14 +153,16 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
struct mlx5_ib_pfault *pfault,
- int error) {
+ int error)
+{
struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
- int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
+ int ret = mlx5_core_page_fault_resume(dev->mdev,
+ qpn,
pfault->mpfault.flags,
error);
if (ret)
- pr_err("Failed to resolve the page fault on QP 0x%x\n",
- qp->mqp.qpn);
+ pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
}
/*
@@ -391,6 +393,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
#if defined(DEBUG)
u32 ctrl_wqe_index, ctrl_qpn;
#endif
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
@@ -401,7 +404,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
if (ds == 0) {
mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
- wqe_index, qp->mqp.qpn);
+ wqe_index, qpn);
return -EFAULT;
}
@@ -411,16 +414,16 @@ static int mlx5_ib_mr_initiator_pfault_handler(
MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
if (wqe_index != ctrl_wqe_index) {
mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
- wqe_index, qp->mqp.qpn,
+ wqe_index, qpn,
ctrl_wqe_index);
return -EFAULT;
}
ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
MLX5_WQE_CTRL_QPN_SHIFT;
- if (qp->mqp.qpn != ctrl_qpn) {
+ if (qpn != ctrl_qpn) {
mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
- wqe_index, qp->mqp.qpn,
+ wqe_index, qpn,
ctrl_qpn);
return -EFAULT;
}
@@ -537,6 +540,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
int resume_with_error = 0;
u16 wqe_index = pfault->mpfault.wqe.wqe_index;
int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+ u32 qpn = qp->trans_qp.base.mqp.qpn;
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
@@ -546,10 +550,10 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
}
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
- PAGE_SIZE);
+ PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
- -ret, wqe_index, qp->mqp.qpn);
+ -ret, wqe_index, qpn);
resume_with_error = 1;
goto resolve_page_fault;
}
@@ -586,7 +590,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
resolve_page_fault:
mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
- qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+ qpn, resume_with_error,
+ pfault->mpfault.flags);
free_page((unsigned long)buffer);
}
@@ -753,7 +758,7 @@ void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
qp->disable_page_faults = 1;
spin_lock_init(&qp->disable_page_faults_lock);
- qp->mqp.pfault_handler = mlx5_ib_pfault_handler;
+ qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 307bdbca8938..8fb9c27485e1 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -32,6 +32,8 @@
#include <linux/module.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_user_verbs.h>
#include "mlx5_ib.h"
#include "user.h"
@@ -114,14 +116,15 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
* Return: the number of bytes copied, or an error code.
*/
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
- void *buffer, u32 length)
+ void *buffer, u32 length,
+ struct mlx5_ib_qp_base *base)
{
struct ib_device *ibdev = qp->ibqp.device;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
size_t offset;
size_t wq_end;
- struct ib_umem *umem = qp->umem;
+ struct ib_umem *umem = base->ubuffer.umem;
u32 first_copy_length;
int wqe_length;
int ret;
@@ -172,8 +175,10 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
struct ib_event event;
- if (type == MLX5_EVENT_TYPE_PATH_MIG)
- to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
+ if (type == MLX5_EVENT_TYPE_PATH_MIG) {
+ /* This event is only valid for trans_qps */
+ to_mibqp(qp)->port = to_mibqp(qp)->trans_qp.alt_port;
+ }
if (ibqp->event_handler) {
event.device = ibqp->device;
@@ -366,7 +371,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
static int set_user_buf_size(struct mlx5_ib_dev *dev,
struct mlx5_ib_qp *qp,
- struct mlx5_ib_create_qp *ucmd)
+ struct mlx5_ib_create_qp *ucmd,
+ struct mlx5_ib_qp_base *base,
+ struct ib_qp_init_attr *attr)
{
int desc_sz = 1 << qp->sq.wqe_shift;
@@ -391,8 +398,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
return -EINVAL;
}
- qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
- (qp->sq.wqe_cnt << 6);
+ if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
+ qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
+ } else {
+ base->ubuffer.buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << 6);
+ }
return 0;
}
@@ -578,8 +590,8 @@ static int to_mlx5_st(enum ib_qp_type type)
case IB_QPT_SMI: return MLX5_QP_ST_QP0;
case IB_QPT_GSI: return MLX5_QP_ST_QP1;
case IB_QPT_RAW_IPV6: return MLX5_QP_ST_RAW_IPV6;
- case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_RAW_PACKET:
+ case IB_QPT_RAW_ETHERTYPE: return MLX5_QP_ST_RAW_ETHERTYPE;
case IB_QPT_MAX:
default: return -EINVAL;
}
@@ -590,13 +602,51 @@ static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
}
+static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
+ struct ib_pd *pd,
+ unsigned long addr, size_t size,
+ struct ib_umem **umem,
+ int *npages, int *page_shift, int *ncont,
+ u32 *offset)
+{
+ int err;
+
+ *umem = ib_umem_get(pd->uobject->context, addr, size, 0, 0);
+ if (IS_ERR(*umem)) {
+ mlx5_ib_dbg(dev, "umem_get failed\n");
+ return PTR_ERR(*umem);
+ }
+
+ mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+
+ err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
+ if (err) {
+ mlx5_ib_warn(dev, "bad offset\n");
+ goto err_umem;
+ }
+
+ mlx5_ib_dbg(dev, "addr 0x%lx, size %zu, npages %d, page_shift %d, ncont %d, offset %d\n",
+ addr, size, *npages, *page_shift, *ncont, *offset);
+
+ return 0;
+
+err_umem:
+ ib_umem_release(*umem);
+ *umem = NULL;
+
+ return err;
+}
+
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
+ struct ib_qp_init_attr *attr,
struct mlx5_create_qp_mbox_in **in,
- struct mlx5_ib_create_qp_resp *resp, int *inlen)
+ struct mlx5_ib_create_qp_resp *resp, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
struct mlx5_ib_create_qp ucmd;
+ struct mlx5_ib_ubuffer *ubuffer = &base->ubuffer;
int page_shift = 0;
int uar_index;
int npages;
@@ -615,18 +665,23 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
/*
* TBD: should come from the verbs when we have the API
*/
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to medium latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ /* In CROSS_CHANNEL CQ and QP must use the same UAR */
+ uuarn = MLX5_CROSS_CHANNEL_UUAR;
+ else {
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
- mlx5_ib_dbg(dev, "reverting to high latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to medium latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
if (uuarn < 0) {
- mlx5_ib_warn(dev, "uuar allocation failed\n");
- return uuarn;
+ mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+ mlx5_ib_dbg(dev, "reverting to high latency\n");
+ uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+ if (uuarn < 0) {
+ mlx5_ib_warn(dev, "uuar allocation failed\n");
+ return uuarn;
+ }
}
}
}
@@ -638,32 +693,20 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- err = set_user_buf_size(dev, qp, &ucmd);
+ err = set_user_buf_size(dev, qp, &ucmd, base, attr);
if (err)
goto err_uuar;
- if (ucmd.buf_addr && qp->buf_size) {
- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
- if (IS_ERR(qp->umem)) {
- mlx5_ib_dbg(dev, "umem_get failed\n");
- err = PTR_ERR(qp->umem);
+ if (ucmd.buf_addr && ubuffer->buf_size) {
+ ubuffer->buf_addr = ucmd.buf_addr;
+ err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr,
+ ubuffer->buf_size,
+ &ubuffer->umem, &npages, &page_shift,
+ &ncont, &offset);
+ if (err)
goto err_uuar;
- }
} else {
- qp->umem = NULL;
- }
-
- if (qp->umem) {
- mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
- &ncont, NULL);
- err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
- if (err) {
- mlx5_ib_warn(dev, "bad offset\n");
- goto err_umem;
- }
- mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
- ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
+ ubuffer->umem = NULL;
}
*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
@@ -672,8 +715,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = -ENOMEM;
goto err_umem;
}
- if (qp->umem)
- mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+ if (ubuffer->umem)
+ mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
+ (*in)->pas, 0);
(*in)->ctx.log_pg_sz_remote_qpn =
cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
(*in)->ctx.params2 = cpu_to_be32(offset << 6);
@@ -704,29 +748,31 @@ err_free:
kvfree(*in);
err_umem:
- if (qp->umem)
- ib_umem_release(qp->umem);
+ if (ubuffer->umem)
+ ib_umem_release(ubuffer->umem);
err_uuar:
free_uuar(&context->uuari, uuarn);
return err;
}
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
+static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
+ struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &qp->db);
- if (qp->umem)
- ib_umem_release(qp->umem);
+ if (base->ubuffer.umem)
+ ib_umem_release(base->ubuffer.umem);
free_uuar(&context->uuari, qp->uuarn);
}
static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
- struct mlx5_create_qp_mbox_in **in, int *inlen)
+ struct mlx5_create_qp_mbox_in **in, int *inlen,
+ struct mlx5_ib_qp_base *base)
{
enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
struct mlx5_uuar_info *uuari;
@@ -758,9 +804,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qp->rq.offset = 0;
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
- qp->buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
+ base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
- err = mlx5_buf_alloc(dev->mdev, qp->buf_size, &qp->buf);
+ err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
goto err_uuar;
@@ -853,19 +899,304 @@ static int is_connected(enum ib_qp_type qp_type)
return 0;
}
+static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq, u32 tdn)
+{
+ u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+ void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
+
+ memset(in, 0, sizeof(in));
+
+ MLX5_SET(tisc, tisc, transport_domain, tdn);
+
+ return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+}
+
+static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ mlx5_core_destroy_tis(dev->mdev, sq->tisn);
+}
+
+static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq, void *qpin,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer;
+ __be64 *pas;
+ void *in;
+ void *sqc;
+ void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+ void *wq;
+ int inlen;
+ int err;
+ int page_shift = 0;
+ int npages;
+ int ncont = 0;
+ u32 offset = 0;
+
+ err = mlx5_ib_umem_get(dev, pd, ubuffer->buf_addr, ubuffer->buf_size,
+ &sq->ubuffer.umem, &npages, &page_shift,
+ &ncont, &offset);
+ if (err)
+ return err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * ncont;
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_umem;
+ }
+
+ sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+ MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
+ MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index));
+ MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd));
+ MLX5_SET(sqc, sqc, tis_lst_sz, 1);
+ MLX5_SET(sqc, sqc, tis_num_0, sq->tisn);
+
+ wq = MLX5_ADDR_OF(sqc, sqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+ MLX5_SET(wq, wq, uar_page, MLX5_GET(qpc, qpc, uar_page));
+ MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+ MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
+ MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_sq_size));
+ MLX5_SET(wq, wq, log_wq_pg_sz, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(wq, wq, page_offset, offset);
+
+ pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
+
+ err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+
+ kvfree(in);
+
+ if (err)
+ goto err_umem;
+
+ return 0;
+
+err_umem:
+ ib_umem_release(sq->ubuffer.umem);
+ sq->ubuffer.umem = NULL;
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq)
+{
+ mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+ ib_umem_release(sq->ubuffer.umem);
+}
+
+static int get_rq_pas_size(void *qpc)
+{
+ u32 log_page_size = MLX5_GET(qpc, qpc, log_page_size) + 12;
+ u32 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride);
+ u32 log_rq_size = MLX5_GET(qpc, qpc, log_rq_size);
+ u32 page_offset = MLX5_GET(qpc, qpc, page_offset);
+ u32 po_quanta = 1 << (log_page_size - 6);
+ u32 rq_sz = 1 << (log_rq_size + 4 + log_rq_stride);
+ u32 page_size = 1 << log_page_size;
+ u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
+ u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
+
+ return rq_num_pas * sizeof(u64);
+}
+
+static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq, void *qpin)
+{
+ __be64 *pas;
+ __be64 *qp_pas;
+ void *in;
+ void *rqc;
+ void *wq;
+ void *qpc = MLX5_ADDR_OF(create_qp_in, qpin, qpc);
+ int inlen;
+ int err;
+ u32 rq_pas_size = get_rq_pas_size(qpc);
+
+ inlen = MLX5_ST_SZ_BYTES(create_rq_in) + rq_pas_size;
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, vsd, 1);
+ MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
+ MLX5_SET(rqc, rqc, flush_in_error_en, 1);
+ MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index));
+ MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv));
+
+ wq = MLX5_ADDR_OF(rqc, rqc, wq);
+ MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
+ MLX5_SET(wq, wq, end_padding_mode,
+ MLX5_GET64(qpc, qpc, end_padding_mode));
+ MLX5_SET(wq, wq, page_offset, MLX5_GET(qpc, qpc, page_offset));
+ MLX5_SET(wq, wq, pd, MLX5_GET(qpc, qpc, pd));
+ MLX5_SET64(wq, wq, dbr_addr, MLX5_GET64(qpc, qpc, dbr_addr));
+ MLX5_SET(wq, wq, log_wq_stride, MLX5_GET(qpc, qpc, log_rq_stride) + 4);
+ MLX5_SET(wq, wq, log_wq_pg_sz, MLX5_GET(qpc, qpc, log_page_size));
+ MLX5_SET(wq, wq, log_wq_sz, MLX5_GET(qpc, qpc, log_rq_size));
+
+ pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
+ qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
+ memcpy(pas, qp_pas, rq_pas_size);
+
+ err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq)
+{
+ mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
+}
+
+static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq, u32 tdn)
+{
+ u32 *in;
+ void *tirc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(create_tir_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
+ MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
+ MLX5_SET(tirc, tirc, inline_rqn, rq->base.mqp.qpn);
+ MLX5_SET(tirc, tirc, transport_domain, tdn);
+
+ err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
+
+ kvfree(in);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq)
+{
+ mlx5_core_destroy_tir(dev->mdev, rq->tirn);
+}
+
+static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct mlx5_create_qp_mbox_in *in,
+ struct ib_pd *pd)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct ib_uobject *uobj = pd->uobject;
+ struct ib_ucontext *ucontext = uobj->context;
+ struct mlx5_ib_ucontext *mucontext = to_mucontext(ucontext);
+ int err;
+ u32 tdn = mucontext->tdn;
+
+ if (qp->sq.wqe_cnt) {
+ err = create_raw_packet_qp_tis(dev, sq, tdn);
+ if (err)
+ return err;
+
+ err = create_raw_packet_qp_sq(dev, sq, in, pd);
+ if (err)
+ goto err_destroy_tis;
+
+ sq->base.container_mibqp = qp;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = create_raw_packet_qp_rq(dev, rq, in);
+ if (err)
+ goto err_destroy_sq;
+
+ rq->base.container_mibqp = qp;
+
+ err = create_raw_packet_qp_tir(dev, rq, tdn);
+ if (err)
+ goto err_destroy_rq;
+ }
+
+ qp->trans_qp.base.mqp.qpn = qp->sq.wqe_cnt ? sq->base.mqp.qpn :
+ rq->base.mqp.qpn;
+
+ return 0;
+
+err_destroy_rq:
+ destroy_raw_packet_qp_rq(dev, rq);
+err_destroy_sq:
+ if (!qp->sq.wqe_cnt)
+ return err;
+ destroy_raw_packet_qp_sq(dev, sq);
+err_destroy_tis:
+ destroy_raw_packet_qp_tis(dev, sq);
+
+ return err;
+}
+
+static void destroy_raw_packet_qp(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+ if (qp->rq.wqe_cnt) {
+ destroy_raw_packet_qp_tir(dev, rq);
+ destroy_raw_packet_qp_rq(dev, rq);
+ }
+
+ if (qp->sq.wqe_cnt) {
+ destroy_raw_packet_qp_sq(dev, sq);
+ destroy_raw_packet_qp_tis(dev, sq);
+ }
+}
+
+static void raw_packet_qp_copy_info(struct mlx5_ib_qp *qp,
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp)
+{
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+
+ sq->sq = &qp->sq;
+ rq->rq = &qp->rq;
+ sq->doorbell = &qp->db;
+ rq->doorbell = &qp->db;
+}
+
static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_resources *devr = &dev->devr;
struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_qp_base *base;
struct mlx5_ib_create_qp_resp resp;
struct mlx5_create_qp_mbox_in *in;
struct mlx5_ib_create_qp ucmd;
int inlen = sizeof(*in);
int err;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
+ void *qpc;
+
+ base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
- mlx5_ib_odp_create_qp(qp);
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET)
+ mlx5_ib_odp_create_qp(qp);
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
@@ -880,6 +1211,21 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
}
}
+ if (init_attr->create_flags &
+ (IB_QP_CREATE_CROSS_CHANNEL |
+ IB_QP_CREATE_MANAGED_SEND |
+ IB_QP_CREATE_MANAGED_RECV)) {
+ if (!MLX5_CAP_GEN(mdev, cd)) {
+ mlx5_ib_dbg(dev, "cross-channel isn't supported\n");
+ return -EINVAL;
+ }
+ if (init_attr->create_flags & IB_QP_CREATE_CROSS_CHANNEL)
+ qp->flags |= MLX5_IB_QP_CROSS_CHANNEL;
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_SEND)
+ qp->flags |= MLX5_IB_QP_MANAGED_SEND;
+ if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
+ qp->flags |= MLX5_IB_QP_MANAGED_RECV;
+ }
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
@@ -889,6 +1235,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return -EFAULT;
}
+ err = get_qp_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
} else {
@@ -918,11 +1269,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
ucmd.sq_wqe_count, max_wqes);
return -EINVAL;
}
- err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
+ err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
+ &resp, &inlen, base);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
} else {
- err = create_kernel_qp(dev, init_attr, qp, &in, &inlen);
+ err = create_kernel_qp(dev, init_attr, qp, &in, &inlen,
+ base);
if (err)
mlx5_ib_dbg(dev, "err %d\n", err);
}
@@ -954,6 +1307,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+ if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+ if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
int rcqe_sz;
int scqe_sz;
@@ -1018,26 +1378,35 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
- err = mlx5_core_create_qp(dev->mdev, &qp->mqp, in, inlen);
+ if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ /* 0xffffff means we ask to work with cqe version 0 */
+ MLX5_SET(qpc, qpc, user_index, uidx);
+ }
+
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
+ raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
+ err = create_raw_packet_qp(dev, qp, in, pd);
+ } else {
+ err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
+ }
+
if (err) {
mlx5_ib_dbg(dev, "create qp failed\n");
goto err_create;
}
kvfree(in);
- /* Hardware wants QPN written in big-endian order (after
- * shifting) for send doorbell. Precompute this value to save
- * a little bit when posting sends.
- */
- qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
- qp->mqp.event = mlx5_ib_qp_event;
+ base->container_mibqp = qp;
+ base->mqp.event = mlx5_ib_qp_event;
return 0;
err_create:
if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(pd, qp);
+ destroy_qp_user(pd, qp, base);
else if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
@@ -1129,11 +1498,11 @@ static void get_cqs(struct mlx5_ib_qp *qp,
case IB_QPT_UD:
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
+ case IB_QPT_RAW_PACKET:
*send_cq = to_mcq(qp->ibqp.send_cq);
*recv_cq = to_mcq(qp->ibqp.recv_cq);
break;
- case IB_QPT_RAW_PACKET:
case IB_QPT_MAX:
default:
*send_cq = NULL;
@@ -1142,45 +1511,66 @@ static void get_cqs(struct mlx5_ib_qp *qp,
}
}
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ u16 operation);
+
static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_modify_qp_mbox_in *in;
int err;
+ base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
+
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
return;
if (qp->state != IB_QPS_RESET) {
- mlx5_ib_qp_disable_pagefaults(qp);
- if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
- MLX5_QP_STATE_RST, in, 0, &qp->mqp))
- mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
- qp->mqp.qpn);
+ if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+ mlx5_ib_qp_disable_pagefaults(qp);
+ err = mlx5_core_qp_modify(dev->mdev,
+ MLX5_CMD_OP_2RST_QP, in, 0,
+ &base->mqp);
+ } else {
+ err = modify_raw_packet_qp(dev, qp,
+ MLX5_CMD_OP_2RST_QP);
+ }
+ if (err)
+ mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n",
+ base->mqp.qpn);
}
get_cqs(qp, &send_cq, &recv_cq);
if (qp->create_type == MLX5_QP_KERNEL) {
mlx5_ib_lock_cqs(send_cq, recv_cq);
- __mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ __mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
qp->ibqp.srq ? to_msrq(qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
- __mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ __mlx5_ib_cq_clean(send_cq, base->mqp.qpn,
+ NULL);
mlx5_ib_unlock_cqs(send_cq, recv_cq);
}
- err = mlx5_core_destroy_qp(dev->mdev, &qp->mqp);
- if (err)
- mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n", qp->mqp.qpn);
- kfree(in);
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ destroy_raw_packet_qp(dev, qp);
+ } else {
+ err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+ if (err)
+ mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
+ base->mqp.qpn);
+ }
+ kfree(in);
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(&get_pd(qp)->ibpd, qp);
+ destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
}
static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -1234,6 +1624,16 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
}
dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
+
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (!pd->uobject) {
+ mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
+ return ERR_PTR(-EINVAL);
+ } else if (!to_mucontext(pd->uobject->context)->cqe_version) {
+ mlx5_ib_dbg(dev, "Raw Packet QP is only supported for CQE version > 0\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
}
switch (init_attr->qp_type) {
@@ -1250,6 +1650,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
}
/* fall through */
+ case IB_QPT_RAW_PACKET:
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
@@ -1272,19 +1673,19 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
else if (is_qp1(init_attr->qp_type))
qp->ibqp.qp_num = 1;
else
- qp->ibqp.qp_num = qp->mqp.qpn;
+ qp->ibqp.qp_num = qp->trans_qp.base.mqp.qpn;
mlx5_ib_dbg(dev, "ib qpnum 0x%x, mlx qpn 0x%x, rcqn 0x%x, scqn 0x%x\n",
- qp->ibqp.qp_num, qp->mqp.qpn, to_mcq(init_attr->recv_cq)->mcq.cqn,
+ qp->ibqp.qp_num, qp->trans_qp.base.mqp.qpn,
+ to_mcq(init_attr->recv_cq)->mcq.cqn,
to_mcq(init_attr->send_cq)->mcq.cqn);
- qp->xrcdn = xrcdn;
+ qp->trans_qp.xrcdn = xrcdn;
break;
case IB_QPT_RAW_IPV6:
case IB_QPT_RAW_ETHERTYPE:
- case IB_QPT_RAW_PACKET:
case IB_QPT_MAX:
default:
mlx5_ib_dbg(dev, "unsupported qp type %d\n",
@@ -1318,12 +1719,12 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
dest_rd_atomic = attr->max_dest_rd_atomic;
else
- dest_rd_atomic = qp->resp_depth;
+ dest_rd_atomic = qp->trans_qp.resp_depth;
if (attr_mask & IB_QP_ACCESS_FLAGS)
access_flags = attr->qp_access_flags;
else
- access_flags = qp->atomic_rd_en;
+ access_flags = qp->trans_qp.atomic_rd_en;
if (!dest_rd_atomic)
access_flags &= IB_ACCESS_REMOTE_WRITE;
@@ -1360,21 +1761,42 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
return rate + MLX5_STAT_RATE_OFFSET;
}
-static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
+static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
+ struct mlx5_ib_sq *sq, u8 sl)
+{
+ void *in;
+ void *tisc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_tis_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_tis_in, in, bitmask.prio, 1);
+
+ tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
+ MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
+
+ err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ const struct ib_ah_attr *ah,
struct mlx5_qp_path *path, u8 port, int attr_mask,
u32 path_flags, const struct ib_qp_attr *attr)
{
+ enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
int err;
- path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
- path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
-
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = attr->pkey_index;
- path->grh_mlid = ah->src_path_bits & 0x7f;
- path->rlid = cpu_to_be16(ah->dlid);
-
if (ah->ah_flags & IB_AH_GRH) {
if (ah->grh.sgid_index >=
dev->mdev->port_caps[port - 1].gid_table_len) {
@@ -1383,7 +1805,27 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
dev->mdev->port_caps[port - 1].gid_table_len);
return -EINVAL;
}
- path->grh_mlid |= 1 << 7;
+ }
+
+ if (ll == IB_LINK_LAYER_ETHERNET) {
+ if (!(ah->ah_flags & IB_AH_GRH))
+ return -EINVAL;
+ memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
+ path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
+ ah->grh.sgid_index);
+ path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+ } else {
+ path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+ path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
+ 0;
+ path->rlid = cpu_to_be16(ah->dlid);
+ path->grh_mlid = ah->src_path_bits & 0x7f;
+ if (ah->ah_flags & IB_AH_GRH)
+ path->grh_mlid |= 1 << 7;
+ path->dci_cfi_prio_sl = ah->sl & 0xf;
+ }
+
+ if (ah->ah_flags & IB_AH_GRH) {
path->mgid_index = ah->grh.sgid_index;
path->hop_limit = ah->grh.hop_limit;
path->tclass_flowlabel =
@@ -1401,7 +1843,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
if (attr_mask & IB_QP_TIMEOUT)
path->ackto_lt = attr->timeout << 3;
- path->sl = ah->sl & 0xf;
+ if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
+ return modify_raw_packet_eth_prio(dev->mdev,
+ &qp->raw_packet_qp.sq,
+ ah->sl & 0xf);
return 0;
}
@@ -1549,12 +1994,154 @@ static int ib_mask_to_mlx5_opt(int ib_mask)
return result;
}
+static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev,
+ struct mlx5_ib_rq *rq, int new_state)
+{
+ void *in;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, state, new_state);
+
+ err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen);
+ if (err)
+ goto out;
+
+ rq->state = new_state;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
+ struct mlx5_ib_sq *sq, int new_state)
+{
+ void *in;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_sq_in, in, sq_state, sq->state);
+
+ sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
+ MLX5_SET(sqc, sqc, state, new_state);
+
+ err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+ if (err)
+ goto out;
+
+ sq->state = new_state;
+
+out:
+ kvfree(in);
+ return err;
+}
+
+static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ u16 operation)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ int rq_state;
+ int sq_state;
+ int err;
+
+ switch (operation) {
+ case MLX5_CMD_OP_RST2INIT_QP:
+ rq_state = MLX5_RQC_STATE_RDY;
+ sq_state = MLX5_SQC_STATE_RDY;
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ rq_state = MLX5_RQC_STATE_ERR;
+ sq_state = MLX5_SQC_STATE_ERR;
+ break;
+ case MLX5_CMD_OP_2RST_QP:
+ rq_state = MLX5_RQC_STATE_RST;
+ sq_state = MLX5_SQC_STATE_RST;
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ /* Nothing to do here... */
+ return 0;
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = modify_raw_packet_qp_rq(dev->mdev, rq, rq_state);
+ if (err)
+ return err;
+ }
+
+ if (qp->sq.wqe_cnt)
+ return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+
+ return 0;
+}
+
static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
+ static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
+ [MLX5_QP_STATE_RST] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
+ },
+ [MLX5_QP_STATE_INIT] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
+ [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
+ },
+ [MLX5_QP_STATE_RTR] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
+ },
+ [MLX5_QP_STATE_RTS] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
+ },
+ [MLX5_QP_STATE_SQD] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ },
+ [MLX5_QP_STATE_SQER] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
+ },
+ [MLX5_QP_STATE_ERR] = {
+ [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
+ [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
+ }
+ };
+
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
struct mlx5_ib_cq *send_cq, *recv_cq;
struct mlx5_qp_context *context;
struct mlx5_modify_qp_mbox_in *in;
@@ -1564,6 +2151,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
int sqd_event;
int mlx5_st;
int err;
+ u16 op;
in = kzalloc(sizeof(*in), GFP_KERNEL);
if (!in)
@@ -1623,7 +2211,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.port = attr->port_num;
if (attr_mask & IB_QP_AV) {
- err = mlx5_set_path(dev, &attr->ah_attr, &context->pri_path,
+ err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
attr_mask, 0, attr);
if (err)
@@ -1634,7 +2222,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
context->pri_path.ackto_lt |= attr->timeout << 3;
if (attr_mask & IB_QP_ALT_PATH) {
- err = mlx5_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
+ err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
+ &context->alt_path,
attr->alt_port_num, attr_mask, 0, attr);
if (err)
goto out;
@@ -1706,41 +2295,51 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
* again to RTS, and may cause the driver and the device to get out of
* sync. */
if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
- (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+ (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
+ (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_disable_pagefaults(qp);
+ if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
+ !optab[mlx5_cur][mlx5_new])
+ goto out;
+
+ op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
in->optparam = cpu_to_be32(optpar);
- err = mlx5_core_qp_modify(dev->mdev, to_mlx5_state(cur_state),
- to_mlx5_state(new_state), in, sqd_event,
- &qp->mqp);
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
+ err = modify_raw_packet_qp(dev, qp, op);
+ else
+ err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+ &base->mqp);
if (err)
goto out;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
+ (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
mlx5_ib_qp_enable_pagefaults(qp);
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
- qp->atomic_rd_en = attr->qp_access_flags;
+ qp->trans_qp.atomic_rd_en = attr->qp_access_flags;
if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
- qp->resp_depth = attr->max_dest_rd_atomic;
+ qp->trans_qp.resp_depth = attr->max_dest_rd_atomic;
if (attr_mask & IB_QP_PORT)
qp->port = attr->port_num;
if (attr_mask & IB_QP_ALT_PATH)
- qp->alt_port = attr->alt_port_num;
+ qp->trans_qp.alt_port = attr->alt_port_num;
/*
* If we moved a kernel QP to RESET, clean up all old CQ
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET && !ibqp->uobject) {
- mlx5_ib_cq_clean(recv_cq, qp->mqp.qpn,
+ mlx5_ib_cq_clean(recv_cq, base->mqp.qpn,
ibqp->srq ? to_msrq(ibqp->srq) : NULL);
if (send_cq != recv_cq)
- mlx5_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
+ mlx5_ib_cq_clean(send_cq, base->mqp.qpn, NULL);
qp->rq.head = 0;
qp->rq.tail = 0;
@@ -1765,15 +2364,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
enum ib_qp_state cur_state, new_state;
int err = -EINVAL;
int port;
+ enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
mutex_lock(&qp->mutex);
cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
+ if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) {
+ port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
+ }
+
if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
- IB_LINK_LAYER_UNSPECIFIED))
+ ll))
goto out;
if ((attr_mask & IB_QP_PORT) &&
@@ -2570,7 +3175,7 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) |
mlx5_opcode | ((u32)opmod << 24));
- ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8));
+ ctrl->qpn_ds = cpu_to_be32(size | (qp->trans_qp.base.mqp.qpn << 8));
ctrl->fm_ce_se |= fence;
qp->fm_cache = next_fence;
if (unlikely(qp->wq_sig))
@@ -3003,7 +3608,7 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
ib_ah_attr->port_num > MLX5_CAP_GEN(dev, num_ports))
return;
- ib_ah_attr->sl = path->sl & 0xf;
+ ib_ah_attr->sl = path->dci_cfi_prio_sl & 0xf;
ib_ah_attr->dlid = be16_to_cpu(path->rlid);
ib_ah_attr->src_path_bits = path->grh_mlid & 0x7f;
@@ -3021,39 +3626,153 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
}
}
-int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
+static int query_raw_packet_qp_sq_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_sq *sq,
+ u8 *sq_state)
+{
+ void *out;
+ void *sqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_sq_out);
+ out = mlx5_vzalloc(inlen);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_sq(dev->mdev, sq->base.mqp.qpn, out);
+ if (err)
+ goto out;
+
+ sqc = MLX5_ADDR_OF(query_sq_out, out, sq_context);
+ *sq_state = MLX5_GET(sqc, sqc, state);
+ sq->state = *sq_state;
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int query_raw_packet_qp_rq_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_rq *rq,
+ u8 *rq_state)
+{
+ void *out;
+ void *rqc;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(query_rq_out);
+ out = mlx5_vzalloc(inlen);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_core_query_rq(dev->mdev, rq->base.mqp.qpn, out);
+ if (err)
+ goto out;
+
+ rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
+ *rq_state = MLX5_GET(rqc, rqc, state);
+ rq->state = *rq_state;
+
+out:
+ kvfree(out);
+ return err;
+}
+
+static int sqrq_state_to_qp_state(u8 sq_state, u8 rq_state,
+ struct mlx5_ib_qp *qp, u8 *qp_state)
+{
+ static const u8 sqrq_trans[MLX5_RQ_NUM_STATE][MLX5_SQ_NUM_STATE] = {
+ [MLX5_RQC_STATE_RST] = {
+ [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE_BAD,
+ [MLX5_SQ_STATE_NA] = IB_QPS_RESET,
+ },
+ [MLX5_RQC_STATE_RDY] = {
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
+ [MLX5_SQC_STATE_ERR] = IB_QPS_SQE,
+ [MLX5_SQ_STATE_NA] = MLX5_QP_STATE,
+ },
+ [MLX5_RQC_STATE_ERR] = {
+ [MLX5_SQC_STATE_RST] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE_BAD,
+ [MLX5_SQC_STATE_ERR] = IB_QPS_ERR,
+ [MLX5_SQ_STATE_NA] = IB_QPS_ERR,
+ },
+ [MLX5_RQ_STATE_NA] = {
+ [MLX5_SQC_STATE_RST] = IB_QPS_RESET,
+ [MLX5_SQC_STATE_RDY] = MLX5_QP_STATE,
+ [MLX5_SQC_STATE_ERR] = MLX5_QP_STATE,
+ [MLX5_SQ_STATE_NA] = MLX5_QP_STATE_BAD,
+ },
+ };
+
+ *qp_state = sqrq_trans[rq_state][sq_state];
+
+ if (*qp_state == MLX5_QP_STATE_BAD) {
+ WARN(1, "Buggy Raw Packet QP state, SQ 0x%x state: 0x%x, RQ 0x%x state: 0x%x",
+ qp->raw_packet_qp.sq.base.mqp.qpn, sq_state,
+ qp->raw_packet_qp.rq.base.mqp.qpn, rq_state);
+ return -EINVAL;
+ }
+
+ if (*qp_state == MLX5_QP_STATE)
+ *qp_state = qp->state;
+
+ return 0;
+}
+
+static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
+ u8 *raw_packet_qp_state)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ int err;
+ u8 sq_state = MLX5_SQ_STATE_NA;
+ u8 rq_state = MLX5_RQ_STATE_NA;
+
+ if (qp->sq.wqe_cnt) {
+ err = query_raw_packet_qp_sq_state(dev, sq, &sq_state);
+ if (err)
+ return err;
+ }
+
+ if (qp->rq.wqe_cnt) {
+ err = query_raw_packet_qp_rq_state(dev, rq, &rq_state);
+ if (err)
+ return err;
+ }
+
+ return sqrq_state_to_qp_state(sq_state, rq_state, qp,
+ raw_packet_qp_state);
+}
+
+static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
+ struct ib_qp_attr *qp_attr)
{
- struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_query_qp_mbox_out *outb;
struct mlx5_qp_context *context;
int mlx5_state;
int err = 0;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * Wait for any outstanding page faults, in case the user frees memory
- * based upon this query's result.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
-
- mutex_lock(&qp->mutex);
outb = kzalloc(sizeof(*outb), GFP_KERNEL);
- if (!outb) {
- err = -ENOMEM;
- goto out;
- }
+ if (!outb)
+ return -ENOMEM;
+
context = &outb->ctx;
- err = mlx5_core_qp_query(dev->mdev, &qp->mqp, outb, sizeof(*outb));
+ err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
+ sizeof(*outb));
if (err)
- goto out_free;
+ goto out;
mlx5_state = be32_to_cpu(context->flags) >> 28;
qp->state = to_ib_qp_state(mlx5_state);
- qp_attr->qp_state = qp->state;
qp_attr->path_mtu = context->mtu_msgmax >> 5;
qp_attr->path_mig_state =
to_ib_mig_state((be32_to_cpu(context->flags) >> 11) & 0x3);
@@ -3087,6 +3806,43 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
qp_attr->retry_cnt = (be32_to_cpu(context->params1) >> 16) & 0x7;
qp_attr->rnr_retry = (be32_to_cpu(context->params1) >> 13) & 0x7;
qp_attr->alt_timeout = context->alt_path.ackto_lt >> 3;
+
+out:
+ kfree(outb);
+ return err;
+}
+
+int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ int err = 0;
+ u8 raw_packet_qp_state;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ /*
+ * Wait for any outstanding page faults, in case the user frees memory
+ * based upon this query's result.
+ */
+ flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
+ mutex_lock(&qp->mutex);
+
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
+ if (err)
+ goto out;
+ qp->state = raw_packet_qp_state;
+ qp_attr->port_num = 1;
+ } else {
+ err = query_qp_attr(dev, qp, qp_attr);
+ if (err)
+ goto out;
+ }
+
+ qp_attr->qp_state = qp->state;
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
qp_attr->cap.max_recv_sge = qp->rq.max_gs;
@@ -3110,12 +3866,16 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
qp_init_attr->create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+ if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
+ qp_init_attr->create_flags |= IB_QP_CREATE_CROSS_CHANNEL;
+ if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
+ if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
+ qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+
qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
-out_free:
- kfree(outb);
-
out:
mutex_unlock(&qp->mutex);
return err;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index e008505e96e9..4659256cd95e 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -78,28 +78,41 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
struct ib_udata *udata, int buf_size, int *inlen)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_ib_create_srq ucmd;
+ struct mlx5_ib_create_srq ucmd = {};
size_t ucmdlen;
+ void *xsrqc;
int err;
int npages;
int page_shift;
int ncont;
u32 offset;
+ u32 uidx = MLX5_IB_DEFAULT_UIDX;
+ int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
- ucmdlen =
- (udata->inlen - sizeof(struct ib_uverbs_cmd_hdr) <
- sizeof(ucmd)) ? (sizeof(ucmd) -
- sizeof(ucmd.reserved)) : sizeof(ucmd);
+ if (drv_data < 0)
+ return -EINVAL;
+
+ ucmdlen = (drv_data < sizeof(ucmd)) ?
+ drv_data : sizeof(ucmd);
if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
mlx5_ib_dbg(dev, "failed copy udata\n");
return -EFAULT;
}
- if (ucmdlen == sizeof(ucmd) &&
- ucmd.reserved != 0)
+ if (ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
+ if (drv_data > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ drv_data - sizeof(ucmd)))
+ return -EINVAL;
+
+ err = get_srq_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, buf_size,
@@ -138,6 +151,12 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+ xrc_srq_context_entry);
+ MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
+ }
+
return 0;
err_in:
@@ -158,6 +177,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
struct mlx5_wqe_srq_next_seg *next;
int page_shift;
int npages;
+ void *xsrqc;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@@ -204,6 +224,13 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
+ xrc_srq_context_entry);
+ /* 0xffffff means we ask to work with cqe version 0 */
+ MLX5_SET(xrc_srqc, xsrqc, user_index, MLX5_IB_DEFAULT_UIDX);
+ }
+
return 0;
err_in:
diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h
index 76fb7b927d37..b94a55404a59 100644
--- a/drivers/infiniband/hw/mlx5/user.h
+++ b/drivers/infiniband/hw/mlx5/user.h
@@ -35,6 +35,8 @@
#include <linux/types.h>
+#include "mlx5_ib.h"
+
enum {
MLX5_QP_FLAG_SIGNATURE = 1 << 0,
MLX5_QP_FLAG_SCATTER_CQE = 1 << 1,
@@ -66,7 +68,15 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
__u32 total_num_uuars;
__u32 num_low_latency_uuars;
__u32 flags;
- __u32 reserved;
+ __u32 comp_mask;
+ __u8 max_cqe_version;
+ __u8 reserved0;
+ __u16 reserved1;
+ __u32 reserved2;
+};
+
+enum mlx5_ib_alloc_ucontext_resp_mask {
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
};
struct mlx5_ib_alloc_ucontext_resp {
@@ -80,7 +90,13 @@ struct mlx5_ib_alloc_ucontext_resp {
__u32 max_recv_wr;
__u32 max_srq_recv_wr;
__u16 num_ports;
- __u16 reserved;
+ __u16 reserved1;
+ __u32 comp_mask;
+ __u32 response_length;
+ __u8 cqe_version;
+ __u8 reserved2;
+ __u16 reserved3;
+ __u64 hca_core_clock_offset;
};
struct mlx5_ib_alloc_pd_resp {
@@ -110,7 +126,9 @@ struct mlx5_ib_create_srq {
__u64 buf_addr;
__u64 db_addr;
__u32 flags;
- __u32 reserved; /* explicit padding (optional on i386) */
+ __u32 reserved0; /* explicit padding (optional on i386) */
+ __u32 uidx;
+ __u32 reserved1;
};
struct mlx5_ib_create_srq_resp {
@@ -125,9 +143,48 @@ struct mlx5_ib_create_qp {
__u32 rq_wqe_count;
__u32 rq_wqe_shift;
__u32 flags;
+ __u32 uidx;
+ __u32 reserved0;
+ __u64 sq_buf_addr;
};
struct mlx5_ib_create_qp_resp {
__u32 uuar_index;
};
+
+static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
+ struct mlx5_ib_create_qp *ucmd,
+ int inlen,
+ u32 *user_index)
+{
+ u8 cqe_version = ucontext->cqe_version;
+
+ if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) &&
+ !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ return 0;
+
+ if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) !=
+ !!cqe_version))
+ return -EINVAL;
+
+ return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
+
+static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
+ struct mlx5_ib_create_srq *ucmd,
+ int inlen,
+ u32 *user_index)
+{
+ u8 cqe_version = ucontext->cqe_version;
+
+ if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) &&
+ !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX))
+ return 0;
+
+ if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) !=
+ !!cqe_version))
+ return -EINVAL;
+
+ return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
+}
#endif /* MLX5_IB_USER_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 40ba83338155..a6531ffe29a6 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -608,9 +608,6 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
entry->opcode = IB_WC_FETCH_ADD;
entry->byte_len = MTHCA_ATOMIC_BYTE_LEN;
break;
- case MTHCA_OPCODE_BIND_MW:
- entry->opcode = IB_WC_BIND_MW;
- break;
default:
entry->opcode = MTHCA_OPCODE_INVALID;
break;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index dc2d48c59e62..9866c35cc977 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -898,89 +898,6 @@ static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
return &mr->ibmr;
}
-static struct ib_mr *mthca_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf,
- int acc,
- u64 *iova_start)
-{
- struct mthca_mr *mr;
- u64 *page_list;
- u64 total_size;
- unsigned long mask;
- int shift;
- int npages;
- int err;
- int i, j, n;
-
- mask = buffer_list[0].addr ^ *iova_start;
- total_size = 0;
- for (i = 0; i < num_phys_buf; ++i) {
- if (i != 0)
- mask |= buffer_list[i].addr;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
-
- total_size += buffer_list[i].size;
- }
-
- if (mask & ~PAGE_MASK)
- return ERR_PTR(-EINVAL);
-
- shift = __ffs(mask | 1 << 31);
-
- buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
- buffer_list[0].addr &= ~0ull << shift;
-
- mr = kmalloc(sizeof *mr, GFP_KERNEL);
- if (!mr)
- return ERR_PTR(-ENOMEM);
-
- npages = 0;
- for (i = 0; i < num_phys_buf; ++i)
- npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
-
- if (!npages)
- return &mr->ibmr;
-
- page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
- if (!page_list) {
- kfree(mr);
- return ERR_PTR(-ENOMEM);
- }
-
- n = 0;
- for (i = 0; i < num_phys_buf; ++i)
- for (j = 0;
- j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
- ++j)
- page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
-
- mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
- "in PD %x; shift %d, npages %d.\n",
- (unsigned long long) buffer_list[0].addr,
- (unsigned long long) *iova_start,
- to_mpd(pd)->pd_num,
- shift, npages);
-
- err = mthca_mr_alloc_phys(to_mdev(pd->device),
- to_mpd(pd)->pd_num,
- page_list, shift, npages,
- *iova_start, total_size,
- convert_access(acc), mr);
-
- if (err) {
- kfree(page_list);
- kfree(mr);
- return ERR_PTR(err);
- }
-
- kfree(page_list);
- mr->umem = NULL;
-
- return &mr->ibmr;
-}
-
static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
@@ -1346,7 +1263,6 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.destroy_cq = mthca_destroy_cq;
dev->ib_dev.poll_cq = mthca_poll_cq;
dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
- dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr;
dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
dev->ib_dev.dereg_mr = mthca_dereg_mr;
dev->ib_dev.get_port_immutable = mthca_port_immutable;
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 35fe506e2cfa..96e5fb91fb48 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1485,7 +1485,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
u16 pkey;
ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
- mthca_ah_grh_present(to_mah(wr->ah)), 0,
+ mthca_ah_grh_present(to_mah(wr->ah)), 0, 0, 0,
&sqp->ud_header);
err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 8a3ad170d790..cb9f0f27308d 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -134,7 +134,7 @@ static void record_ird_ord(struct nes_cm_node *, u16, u16);
/* External CM API Interface */
/* instance of function pointers for client API */
/* set address of this instance to cm_core->cm_ops at cm_core alloc */
-static struct nes_cm_ops nes_cm_api = {
+static const struct nes_cm_ops nes_cm_api = {
mini_cm_accelerated,
mini_cm_listen,
mini_cm_del_listen,
@@ -3232,7 +3232,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
int passive_state;
struct nes_ib_device *nesibdev;
struct ib_mr *ibmr = NULL;
- struct ib_phys_buf ibphysbuf;
struct nes_pd *nespd;
u64 tagged_offset;
u8 mpa_frame_offset = 0;
@@ -3316,21 +3315,19 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
u64temp = (unsigned long)nesqp;
nesibdev = nesvnic->nesibdev;
nespd = nesqp->nespd;
- ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset;
- ibphysbuf.size = buff_len;
tagged_offset = (u64)(unsigned long)*start_buff;
- ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
- &ibphysbuf, 1,
- IB_ACCESS_LOCAL_WRITE,
- &tagged_offset);
- if (!ibmr) {
+ ibmr = nes_reg_phys_mr(&nespd->ibpd,
+ nesqp->ietf_frame_pbase + mpa_frame_offset,
+ buff_len, IB_ACCESS_LOCAL_WRITE,
+ &tagged_offset);
+ if (IS_ERR(ibmr)) {
nes_debug(NES_DBG_CM, "Unable to register memory region"
"for lSMM for cm_node = %p \n",
cm_node);
pci_free_consistent(nesdev->pcidev,
nesqp->private_data_len + nesqp->ietf_frame_size,
nesqp->ietf_frame, nesqp->ietf_frame_pbase);
- return -ENOMEM;
+ return PTR_ERR(ibmr);
}
ibmr->pd = &nespd->ibpd;
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 32a6420c2940..147c2c884227 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -423,7 +423,7 @@ struct nes_cm_core {
struct timer_list tcp_timer;
- struct nes_cm_ops *api;
+ const struct nes_cm_ops *api;
int (*post_event)(struct nes_cm_event *event);
atomic_t events_posted;
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 2042c0f29759..6d3a169c049b 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -727,7 +727,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
if (action == NES_ARP_DELETE) {
nes_debug(NES_DBG_NETDEV, "DELETE, arp_index=%d\n", arp_index);
nesadapter->arp_table[arp_index].ip_addr = 0;
- memset(nesadapter->arp_table[arp_index].mac_addr, 0x00, ETH_ALEN);
+ eth_zero_addr(nesadapter->arp_table[arp_index].mac_addr);
nes_free_resource(nesadapter, nesadapter->allocated_arps, arp_index);
return arp_index;
}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 137880a19ebe..8c4daf7f22ec 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -206,80 +206,6 @@ static int nes_dealloc_mw(struct ib_mw *ibmw)
}
-/**
- * nes_bind_mw
- */
-static int nes_bind_mw(struct ib_qp *ibqp, struct ib_mw *ibmw,
- struct ib_mw_bind *ibmw_bind)
-{
- u64 u64temp;
- struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
- struct nes_device *nesdev = nesvnic->nesdev;
- /* struct nes_mr *nesmr = to_nesmw(ibmw); */
- struct nes_qp *nesqp = to_nesqp(ibqp);
- struct nes_hw_qp_wqe *wqe;
- unsigned long flags = 0;
- u32 head;
- u32 wqe_misc = 0;
- u32 qsize;
-
- if (nesqp->ibqp_state > IB_QPS_RTS)
- return -EINVAL;
-
- spin_lock_irqsave(&nesqp->lock, flags);
-
- head = nesqp->hwqp.sq_head;
- qsize = nesqp->hwqp.sq_tail;
-
- /* Check for SQ overflow */
- if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
- spin_unlock_irqrestore(&nesqp->lock, flags);
- return -ENOMEM;
- }
-
- wqe = &nesqp->hwqp.sq_vbase[head];
- /* nes_debug(NES_DBG_MR, "processing sq wqe at %p, head = %u.\n", wqe, head); */
- nes_fill_init_qp_wqe(wqe, nesqp, head);
- u64temp = ibmw_bind->wr_id;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX, u64temp);
- wqe_misc = NES_IWARP_SQ_OP_BIND;
-
- wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-
- if (ibmw_bind->send_flags & IB_SEND_SIGNALED)
- wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
-
- if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_WRITE)
- wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE;
- if (ibmw_bind->bind_info.mw_access_flags & IB_ACCESS_REMOTE_READ)
- wqe_misc |= NES_CQP_STAG_RIGHTS_REMOTE_READ;
-
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_MISC_IDX, wqe_misc);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MR_IDX,
- ibmw_bind->bind_info.mr->lkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_MW_IDX, ibmw->rkey);
- set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_LENGTH_LOW_IDX,
- ibmw_bind->bind_info.length);
- wqe->wqe_words[NES_IWARP_SQ_BIND_WQE_LENGTH_HIGH_IDX] = 0;
- u64temp = (u64)ibmw_bind->bind_info.addr;
- set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_BIND_WQE_VA_FBO_LOW_IDX, u64temp);
-
- head++;
- if (head >= qsize)
- head = 0;
-
- nesqp->hwqp.sq_head = head;
- barrier();
-
- nes_write32(nesdev->regs+NES_WQE_ALLOC,
- (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
-
- spin_unlock_irqrestore(&nesqp->lock, flags);
-
- return 0;
-}
-
-
/*
* nes_alloc_fast_mr
*/
@@ -2074,9 +2000,8 @@ static int nes_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
/**
* nes_reg_phys_mr
*/
-static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
- struct ib_phys_buf *buffer_list, int num_phys_buf, int acc,
- u64 * iova_start)
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
+ int acc, u64 *iova_start)
{
u64 region_length;
struct nes_pd *nespd = to_nespd(ib_pd);
@@ -2088,13 +2013,10 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
struct nes_vpbl vpbl;
struct nes_root_vpbl root_vpbl;
u32 stag;
- u32 i;
unsigned long mask;
u32 stag_index = 0;
u32 next_stag_index = 0;
u32 driver_key = 0;
- u32 root_pbl_index = 0;
- u32 cur_pbl_index = 0;
int err = 0;
int ret = 0;
u16 pbl_count = 0;
@@ -2113,11 +2035,8 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
next_stag_index >>= 8;
next_stag_index %= nesadapter->max_mr;
- if (num_phys_buf > (1024*512)) {
- return ERR_PTR(-E2BIG);
- }
- if ((buffer_list[0].addr ^ *iova_start) & ~PAGE_MASK)
+ if ((addr ^ *iova_start) & ~PAGE_MASK)
return ERR_PTR(-EINVAL);
err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
@@ -2132,84 +2051,33 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
return ERR_PTR(-ENOMEM);
}
- for (i = 0; i < num_phys_buf; i++) {
+ /* Allocate a 4K buffer for the PBL */
+ vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
+ &vpbl.pbl_pbase);
+ nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
+ vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
+ if (!vpbl.pbl_vbase) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ ibmr = ERR_PTR(-ENOMEM);
+ kfree(nesmr);
+ goto reg_phys_err;
+ }
- if ((i & 0x01FF) == 0) {
- if (root_pbl_index == 1) {
- /* Allocate the root PBL */
- root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
- &root_vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n",
- root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase);
- if (!root_vpbl.pbl_vbase) {
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- kfree(nesmr);
- return ERR_PTR(-ENOMEM);
- }
- root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, GFP_KERNEL);
- if (!root_vpbl.leaf_vpbl) {
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase,
- vpbl.pbl_pbase);
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- kfree(nesmr);
- return ERR_PTR(-ENOMEM);
- }
- root_vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[0].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[0] = vpbl;
- }
- /* Allocate a 4K buffer for the PBL */
- vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
- &vpbl.pbl_pbase);
- nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%016lX\n",
- vpbl.pbl_vbase, (unsigned long)vpbl.pbl_pbase);
- if (!vpbl.pbl_vbase) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- ibmr = ERR_PTR(-ENOMEM);
- kfree(nesmr);
- goto reg_phys_err;
- }
- /* Fill in the root table */
- if (1 <= root_pbl_index) {
- root_vpbl.pbl_vbase[root_pbl_index].pa_low =
- cpu_to_le32((u32)vpbl.pbl_pbase);
- root_vpbl.pbl_vbase[root_pbl_index].pa_high =
- cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32)));
- root_vpbl.leaf_vpbl[root_pbl_index] = vpbl;
- }
- root_pbl_index++;
- cur_pbl_index = 0;
- }
- mask = !buffer_list[i].size;
- if (i != 0)
- mask |= buffer_list[i].addr;
- if (i != num_phys_buf - 1)
- mask |= buffer_list[i].addr + buffer_list[i].size;
-
- if (mask & ~PAGE_MASK) {
- nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
- nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
- ibmr = ERR_PTR(-EINVAL);
- kfree(nesmr);
- goto reg_phys_err;
- }
+ mask = !size;
- region_length += buffer_list[i].size;
- if ((i != 0) && (single_page)) {
- if ((buffer_list[i-1].addr+PAGE_SIZE) != buffer_list[i].addr)
- single_page = 0;
- }
- vpbl.pbl_vbase[cur_pbl_index].pa_low = cpu_to_le32((u32)buffer_list[i].addr & PAGE_MASK);
- vpbl.pbl_vbase[cur_pbl_index++].pa_high =
- cpu_to_le32((u32)((((u64)buffer_list[i].addr) >> 32)));
+ if (mask & ~PAGE_MASK) {
+ nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+ nes_debug(NES_DBG_MR, "Invalid buffer addr or size\n");
+ ibmr = ERR_PTR(-EINVAL);
+ kfree(nesmr);
+ goto reg_phys_err;
}
+ region_length += size;
+ vpbl.pbl_vbase[0].pa_low = cpu_to_le32((u32)addr & PAGE_MASK);
+ vpbl.pbl_vbase[0].pa_high = cpu_to_le32((u32)((((u64)addr) >> 32)));
+
stag = stag_index << 8;
stag |= driver_key;
stag += (u32)stag_key;
@@ -2219,17 +2087,15 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
stag, (unsigned long)*iova_start, (unsigned long)region_length, stag_index);
/* Make the leaf PBL the root if only one PBL */
- if (root_pbl_index == 1) {
- root_vpbl.pbl_pbase = vpbl.pbl_pbase;
- }
+ root_vpbl.pbl_pbase = vpbl.pbl_pbase;
if (single_page) {
pbl_count = 0;
} else {
- pbl_count = root_pbl_index;
+ pbl_count = 1;
}
ret = nes_reg_mr(nesdev, nespd, stag, region_length, &root_vpbl,
- buffer_list[0].addr, pbl_count, (u16)cur_pbl_index, acc, iova_start,
+ addr, pbl_count, 1, acc, iova_start,
&nesmr->pbls_used, &nesmr->pbl_4k);
if (ret == 0) {
@@ -2242,21 +2108,9 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
ibmr = ERR_PTR(-ENOMEM);
}
- reg_phys_err:
- /* free the resources */
- if (root_pbl_index == 1) {
- /* single PBL case */
- pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
- } else {
- for (i=0; i<root_pbl_index; i++) {
- pci_free_consistent(nesdev->pcidev, 4096, root_vpbl.leaf_vpbl[i].pbl_vbase,
- root_vpbl.leaf_vpbl[i].pbl_pbase);
- }
- kfree(root_vpbl.leaf_vpbl);
- pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase,
- root_vpbl.pbl_pbase);
- }
-
+reg_phys_err:
+ /* single PBL case */
+ pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, vpbl.pbl_pbase);
return ibmr;
}
@@ -2266,17 +2120,13 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
*/
static struct ib_mr *nes_get_dma_mr(struct ib_pd *pd, int acc)
{
- struct ib_phys_buf bl;
u64 kva = 0;
nes_debug(NES_DBG_MR, "\n");
- bl.size = (u64)0xffffffffffULL;
- bl.addr = 0;
- return nes_reg_phys_mr(pd, &bl, 1, acc, &kva);
+ return nes_reg_phys_mr(pd, 0, 0xffffffffffULL, acc, &kva);
}
-
/**
* nes_reg_user_mr
*/
@@ -3888,12 +3738,10 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.destroy_cq = nes_destroy_cq;
nesibdev->ibdev.poll_cq = nes_poll_cq;
nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
- nesibdev->ibdev.reg_phys_mr = nes_reg_phys_mr;
nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
nesibdev->ibdev.dereg_mr = nes_dereg_mr;
nesibdev->ibdev.alloc_mw = nes_alloc_mw;
nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
- nesibdev->ibdev.bind_mw = nes_bind_mw;
nesibdev->ibdev.alloc_mr = nes_alloc_mr;
nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index a204b677af22..70290883d067 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -190,4 +190,8 @@ struct nes_qp {
u8 pau_state;
__u64 nesuqp_addr;
};
+
+struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
+ u64 addr, u64 size, int acc, u64 *iova_start);
+
#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 9820074be59d..3790771f2baa 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -152,9 +152,10 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
if ((pd->uctx) &&
(!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
(!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
- status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
- attr->dmac, &vlan_tag,
- sgid_attr.ndev->ifindex);
+ status = rdma_addr_find_l2_eth_by_grh(&sgid, &attr->grh.dgid,
+ attr->dmac, &vlan_tag,
+ &sgid_attr.ndev->ifindex,
+ NULL);
if (status) {
pr_err("%s(): Failed to resolve dmac from gid."
"status = %d\n", __func__, status);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 3afb40b85159..573849354cb9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -175,7 +175,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.req_notify_cq = ocrdma_arm_cq;
dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
- dev->ibdev.reg_phys_mr = ocrdma_reg_kernel_mr;
dev->ibdev.dereg_mr = ocrdma_dereg_mr;
dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 76e96f97b3f6..d4c687b548d8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -3066,169 +3066,6 @@ pl_err:
return ERR_PTR(-ENOMEM);
}
-#define MAX_KERNEL_PBE_SIZE 65536
-static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
- int buf_cnt, u32 *pbe_size)
-{
- u64 total_size = 0;
- u64 buf_size = 0;
- int i;
- *pbe_size = roundup(buf_list[0].size, PAGE_SIZE);
- *pbe_size = roundup_pow_of_two(*pbe_size);
-
- /* find the smallest PBE size that we can have */
- for (i = 0; i < buf_cnt; i++) {
- /* first addr may not be page aligned, so ignore checking */
- if ((i != 0) && ((buf_list[i].addr & ~PAGE_MASK) ||
- (buf_list[i].size & ~PAGE_MASK))) {
- return 0;
- }
-
- /* if configured PBE size is greater then the chosen one,
- * reduce the PBE size.
- */
- buf_size = roundup(buf_list[i].size, PAGE_SIZE);
- /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
- buf_size = roundup_pow_of_two(buf_size);
- if (*pbe_size > buf_size)
- *pbe_size = buf_size;
-
- total_size += buf_size;
- }
- *pbe_size = *pbe_size > MAX_KERNEL_PBE_SIZE ?
- (MAX_KERNEL_PBE_SIZE) : (*pbe_size);
-
- /* num_pbes = total_size / (*pbe_size); this is implemented below. */
-
- return total_size >> ilog2(*pbe_size);
-}
-
-static void build_kernel_pbes(struct ib_phys_buf *buf_list, int ib_buf_cnt,
- u32 pbe_size, struct ocrdma_pbl *pbl_tbl,
- struct ocrdma_hw_mr *hwmr)
-{
- int i;
- int idx;
- int pbes_per_buf = 0;
- u64 buf_addr = 0;
- int num_pbes;
- struct ocrdma_pbe *pbe;
- int total_num_pbes = 0;
-
- if (!hwmr->num_pbes)
- return;
-
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- num_pbes = 0;
-
- /* go through the OS phy regions & fill hw pbe entries into pbls. */
- for (i = 0; i < ib_buf_cnt; i++) {
- buf_addr = buf_list[i].addr;
- pbes_per_buf =
- roundup_pow_of_two(roundup(buf_list[i].size, PAGE_SIZE)) /
- pbe_size;
- hwmr->len += buf_list[i].size;
- /* number of pbes can be more for one OS buf, when
- * buffers are of different sizes.
- * split the ib_buf to one or more pbes.
- */
- for (idx = 0; idx < pbes_per_buf; idx++) {
- /* we program always page aligned addresses,
- * first unaligned address is taken care by fbo.
- */
- if (i == 0) {
- /* for non zero fbo, assign the
- * start of the page.
- */
- pbe->pa_lo =
- cpu_to_le32((u32) (buf_addr & PAGE_MASK));
- pbe->pa_hi =
- cpu_to_le32((u32) upper_32_bits(buf_addr));
- } else {
- pbe->pa_lo =
- cpu_to_le32((u32) (buf_addr & 0xffffffff));
- pbe->pa_hi =
- cpu_to_le32((u32) upper_32_bits(buf_addr));
- }
- buf_addr += pbe_size;
- num_pbes += 1;
- total_num_pbes += 1;
- pbe++;
-
- if (total_num_pbes == hwmr->num_pbes)
- goto mr_tbl_done;
- /* if the pbl is full storing the pbes,
- * move to next pbl.
- */
- if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
- pbl_tbl++;
- pbe = (struct ocrdma_pbe *)pbl_tbl->va;
- num_pbes = 0;
- }
- }
- }
-mr_tbl_done:
- return;
-}
-
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *ibpd,
- struct ib_phys_buf *buf_list,
- int buf_cnt, int acc, u64 *iova_start)
-{
- int status = -ENOMEM;
- struct ocrdma_mr *mr;
- struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
- struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
- u32 num_pbes;
- u32 pbe_size = 0;
-
- if ((acc & IB_ACCESS_REMOTE_WRITE) && !(acc & IB_ACCESS_LOCAL_WRITE))
- return ERR_PTR(-EINVAL);
-
- mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return ERR_PTR(status);
-
- num_pbes = count_kernel_pbes(buf_list, buf_cnt, &pbe_size);
- if (num_pbes == 0) {
- status = -EINVAL;
- goto pbl_err;
- }
- status = ocrdma_get_pbl_info(dev, mr, num_pbes);
- if (status)
- goto pbl_err;
-
- mr->hwmr.pbe_size = pbe_size;
- mr->hwmr.fbo = *iova_start - (buf_list[0].addr & PAGE_MASK);
- mr->hwmr.va = *iova_start;
- mr->hwmr.local_rd = 1;
- mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
- mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
- mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
- mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
- mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
-
- status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
- if (status)
- goto pbl_err;
- build_kernel_pbes(buf_list, buf_cnt, pbe_size, mr->hwmr.pbl_table,
- &mr->hwmr);
- status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
- if (status)
- goto mbx_err;
-
- mr->ibmr.lkey = mr->hwmr.lkey;
- if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
- mr->ibmr.rkey = mr->hwmr.lkey;
- return &mr->ibmr;
-
-mbx_err:
- ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
-pbl_err:
- kfree(mr);
- return ERR_PTR(status);
-}
-
static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
{
struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index a2f3b4dc20b0..8b517fd36779 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -117,9 +117,6 @@ int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
int ocrdma_dereg_mr(struct ib_mr *);
struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
-struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *);
struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 13ef22bd9459..fcdf37913a26 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -89,14 +89,14 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
error = qibfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
else
error = PTR_ERR(*dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return error;
}
@@ -481,7 +481,7 @@ static int remove_device_files(struct super_block *sb,
int ret, i;
root = dget(sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
snprintf(unit, sizeof(unit), "%u", dd->unit);
dir = lookup_one_len(unit, root, strlen(unit));
@@ -491,7 +491,7 @@ static int remove_device_files(struct super_block *sb,
goto bail;
}
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
remove_file(dir, "counters");
remove_file(dir, "counter_names");
remove_file(dir, "portcounter_names");
@@ -506,13 +506,13 @@ static int remove_device_files(struct super_block *sb,
}
}
remove_file(dir, "flash");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = simple_rmdir(d_inode(root), dir);
d_delete(dir);
dput(dir);
bail:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
return ret;
}
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 294f5c706be9..5f53304e8a9b 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -150,10 +150,7 @@ static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
rval = init_qib_mregion(&mr->mr, pd, count);
if (rval)
goto bail;
- /*
- * ib_reg_phys_mr() will initialize mr->ibmr except for
- * lkey and rkey.
- */
+
rval = qib_alloc_lkey(&mr->mr, 0);
if (rval)
goto bail_mregion;
@@ -171,52 +168,6 @@ bail:
}
/**
- * qib_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- struct qib_mr *mr;
- int n, m, i;
- struct ib_mr *ret;
-
- mr = alloc_mr(num_phys_buf, pd);
- if (IS_ERR(mr)) {
- ret = (struct ib_mr *)mr;
- goto bail;
- }
-
- mr->mr.user_base = *iova_start;
- mr->mr.iova = *iova_start;
- mr->mr.access_flags = acc;
-
- m = 0;
- n = 0;
- for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
- mr->mr.map[m]->segs[n].length = buffer_list[i].size;
- mr->mr.length += buffer_list[i].size;
- n++;
- if (n == QIB_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
-/**
* qib_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @start: starting userspace address
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 40f85bb3e0d3..3eff35c2d453 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -100,9 +100,10 @@ static u32 credit_table[31] = {
32768 /* 1E */
};
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
+static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+ gfp_t gfp)
{
- unsigned long page = get_zeroed_page(GFP_KERNEL);
+ unsigned long page = get_zeroed_page(gfp);
/*
* Free the page if someone raced with us installing it.
@@ -121,7 +122,7 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map)
* zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
*/
static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
- enum ib_qp_type type, u8 port)
+ enum ib_qp_type type, u8 port, gfp_t gfp)
{
u32 i, offset, max_scan, qpn;
struct qpn_map *map;
@@ -151,7 +152,7 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
max_scan = qpt->nmaps - !offset;
for (i = 0;;) {
if (unlikely(!map->page)) {
- get_map_page(qpt, map);
+ get_map_page(qpt, map, gfp);
if (unlikely(!map->page))
break;
}
@@ -983,13 +984,21 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
size_t sz;
size_t sg_list_sz;
struct ib_qp *ret;
+ gfp_t gfp;
+
if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
- init_attr->create_flags) {
- ret = ERR_PTR(-EINVAL);
- goto bail;
- }
+ init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
+ return ERR_PTR(-EINVAL);
+
+ /* GFP_NOIO is applicable in RC QPs only */
+ if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
+ init_attr->qp_type != IB_QPT_RC)
+ return ERR_PTR(-EINVAL);
+
+ gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
+ GFP_NOIO : GFP_KERNEL;
/* Check receive queue parameters if no SRQ is specified. */
if (!init_attr->srq) {
@@ -1021,7 +1030,8 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
sz = sizeof(struct qib_sge) *
init_attr->cap.max_send_sge +
sizeof(struct qib_swqe);
- swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz);
+ swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
+ gfp, PAGE_KERNEL);
if (swq == NULL) {
ret = ERR_PTR(-ENOMEM);
goto bail;
@@ -1037,13 +1047,13 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
} else if (init_attr->cap.max_recv_sge > 1)
sg_list_sz = sizeof(*qp->r_sg_list) *
(init_attr->cap.max_recv_sge - 1);
- qp = kzalloc(sz + sg_list_sz, GFP_KERNEL);
+ qp = kzalloc(sz + sg_list_sz, gfp);
if (!qp) {
ret = ERR_PTR(-ENOMEM);
goto bail_swq;
}
RCU_INIT_POINTER(qp->next, NULL);
- qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
+ qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
if (!qp->s_hdr) {
ret = ERR_PTR(-ENOMEM);
goto bail_qp;
@@ -1058,8 +1068,16 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
sizeof(struct qib_rwqe);
- qp->r_rq.wq = vmalloc_user(sizeof(struct qib_rwq) +
- qp->r_rq.size * sz);
+ if (gfp != GFP_NOIO)
+ qp->r_rq.wq = vmalloc_user(
+ sizeof(struct qib_rwq) +
+ qp->r_rq.size * sz);
+ else
+ qp->r_rq.wq = __vmalloc(
+ sizeof(struct qib_rwq) +
+ qp->r_rq.size * sz,
+ gfp, PAGE_KERNEL);
+
if (!qp->r_rq.wq) {
ret = ERR_PTR(-ENOMEM);
goto bail_qp;
@@ -1090,7 +1108,7 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
dev = to_idev(ibpd->device);
dd = dd_from_dev(dev);
err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
- init_attr->port_num);
+ init_attr->port_num, gfp);
if (err < 0) {
ret = ERR_PTR(err);
vfree(qp->r_rq.wq);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index de6cb6fcda8d..baf1e42b6896 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -346,6 +346,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
unsigned long flags;
struct qib_lkey_table *rkt;
struct qib_pd *pd;
+ int avoid_schedule = 0;
spin_lock_irqsave(&qp->s_lock, flags);
@@ -438,11 +439,15 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
qp->ibqp.qp_type == IB_QPT_RC) {
if (wqe->length > 0x80000000U)
goto bail_inval_free;
+ if (wqe->length <= qp->pmtu)
+ avoid_schedule = 1;
} else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
- qp->port_num - 1)->ibmtu)
+ qp->port_num - 1)->ibmtu) {
goto bail_inval_free;
- else
+ } else {
atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
+ avoid_schedule = 1;
+ }
wqe->ssn = qp->s_ssn++;
qp->s_head = next;
@@ -458,7 +463,7 @@ bail_inval_free:
bail_inval:
ret = -EINVAL;
bail:
- if (!ret && !wr->next &&
+ if (!ret && !wr->next && !avoid_schedule &&
!qib_sdma_empty(
dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
qib_schedule_send(qp);
@@ -2256,7 +2261,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->poll_cq = qib_poll_cq;
ibdev->req_notify_cq = qib_req_notify_cq;
ibdev->get_dma_mr = qib_get_dma_mr;
- ibdev->reg_phys_mr = qib_reg_phys_mr;
ibdev->reg_user_mr = qib_reg_user_mr;
ibdev->dereg_mr = qib_dereg_mr;
ibdev->alloc_mr = qib_alloc_mr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index bc803f33d5f6..6c5e77753d85 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1032,10 +1032,6 @@ int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
-
struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
index f8ea069a3eaf..b2fb5286dbd9 100644
--- a/drivers/infiniband/hw/qib/qib_verbs_mcast.c
+++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
@@ -286,15 +286,13 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
struct qib_ibdev *dev = to_idev(ibqp->device);
struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
struct qib_mcast *mcast = NULL;
- struct qib_mcast_qp *p, *tmp;
+ struct qib_mcast_qp *p, *tmp, *delp = NULL;
struct rb_node *n;
int last = 0;
int ret;
- if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
- ret = -EINVAL;
- goto bail;
- }
+ if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+ return -EINVAL;
spin_lock_irq(&ibp->lock);
@@ -303,8 +301,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
while (1) {
if (n == NULL) {
spin_unlock_irq(&ibp->lock);
- ret = -EINVAL;
- goto bail;
+ return -EINVAL;
}
mcast = rb_entry(n, struct qib_mcast, rb_node);
@@ -328,6 +325,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
*/
list_del_rcu(&p->list);
mcast->n_attached--;
+ delp = p;
/* If this was the last attached QP, remove the GID too. */
if (list_empty(&mcast->qp_list)) {
@@ -338,15 +336,16 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
}
spin_unlock_irq(&ibp->lock);
+ /* QP not attached */
+ if (!delp)
+ return -EINVAL;
+ /*
+ * Wait for any list walkers to finish before freeing the
+ * list element.
+ */
+ wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+ qib_mcast_qp_free(delp);
- if (p) {
- /*
- * Wait for any list walkers to finish before freeing the
- * list element.
- */
- wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
- qib_mcast_qp_free(p);
- }
if (last) {
atomic_dec(&mcast->refcount);
wait_event(mcast->wait, !atomic_read(&mcast->refcount));
@@ -355,11 +354,7 @@ int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
dev->n_mcast_grps_allocated--;
spin_unlock_irq(&dev->n_mcast_grps_lock);
}
-
- ret = 0;
-
-bail:
- return ret;
+ return 0;
}
int qib_mcast_tree_empty(struct qib_ibport *ibp)
diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c
index 5e55b8bc6fe4..92dc66cc2d50 100644
--- a/drivers/infiniband/hw/usnic/usnic_debugfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c
@@ -157,8 +157,9 @@ void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
qp_flow,
&flowinfo_ops);
if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
- usnic_err("Failed to create dbg fs entry for flow %u\n",
- qp_flow->flow->flow_id);
+ usnic_err("Failed to create dbg fs entry for flow %u with error %ld\n",
+ qp_flow->flow->flow_id,
+ PTR_ERR(qp_flow->dbgfs_dentry));
}
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index fcea3a24d3eb..5f44b66ccb86 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -521,7 +521,7 @@ int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
if (!status) {
qp_grp->state = new_state;
- usnic_info("Transistioned %u from %s to %s",
+ usnic_info("Transitioned %u from %s to %s",
qp_grp->grp_id,
usnic_ib_qp_grp_state_to_string(old_state),
usnic_ib_qp_grp_state_to_string(new_state));
@@ -575,7 +575,7 @@ alloc_res_chunk_list(struct usnic_vnic *vnic,
return res_chunk_list;
out_free_res:
- for (i--; i > 0; i--)
+ for (i--; i >= 0; i--)
usnic_vnic_put_resources(res_chunk_list[i]);
kfree(res_chunk_list);
return ERR_PTR(err);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index f8e3211689a3..6cdb4d23f78f 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -51,7 +51,7 @@
static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
{
- *fw_ver = (u64) *fw_ver_str;
+ *fw_ver = *((u64 *)fw_ver_str);
}
static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
@@ -571,20 +571,20 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp_grp = to_uqp_grp(ibqp);
- /* TODO: Future Support All States */
mutex_lock(&qp_grp->vf->pf->usdev_lock);
- if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
- } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
- } else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
- status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+ if ((attr_mask & IB_QP_PORT) && attr->port_num != 1) {
+ /* usnic devices only have one port */
+ status = -EINVAL;
+ goto out_unlock;
+ }
+ if (attr_mask & IB_QP_STATE) {
+ status = usnic_ib_qp_grp_modify(qp_grp, attr->qp_state, NULL);
} else {
- usnic_err("Unexpected combination mask: %u state: %u\n",
- attr_mask & IB_QP_STATE, attr->qp_state);
+ usnic_err("Unhandled request, attr_mask=0x%x\n", attr_mask);
status = -EINVAL;
}
+out_unlock:
mutex_unlock(&qp_grp->vf->pf->usdev_lock);
return status;
}
@@ -625,8 +625,8 @@ struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
virt_addr, length);
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (IS_ERR_OR_NULL(mr))
- return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
access_flags, 0);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 414eaa566bd9..0d9d2e6a14d5 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -43,8 +43,6 @@ int usnic_ib_query_device(struct ib_device *ibdev,
struct ib_udata *uhw);
int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
-enum rdma_protocol_type
-usnic_ib_query_protocol(struct ib_device *device, u8 port_num);
int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
index 66de93fb8ea9..887510718690 100644
--- a/drivers/infiniband/hw/usnic/usnic_vnic.c
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -237,7 +237,7 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
struct usnic_vnic_res *res;
int i;
- if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+ if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 0 || !owner)
return ERR_PTR(-EINVAL);
ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
@@ -247,26 +247,28 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
return ERR_PTR(-ENOMEM);
}
- ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
- if (!ret->res) {
- usnic_err("Failed to allocate resources for %s. Out of memory\n",
- usnic_vnic_pci_name(vnic));
- kfree(ret);
- return ERR_PTR(-ENOMEM);
- }
+ if (cnt > 0) {
+ ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
+ if (!ret->res) {
+ usnic_err("Failed to allocate resources for %s. Out of memory\n",
+ usnic_vnic_pci_name(vnic));
+ kfree(ret);
+ return ERR_PTR(-ENOMEM);
+ }
- spin_lock(&vnic->res_lock);
- src = &vnic->chunks[type];
- for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
- res = src->res[i];
- if (!res->owner) {
- src->free_cnt--;
- res->owner = owner;
- ret->res[ret->cnt++] = res;
+ spin_lock(&vnic->res_lock);
+ src = &vnic->chunks[type];
+ for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+ res = src->res[i];
+ if (!res->owner) {
+ src->free_cnt--;
+ res->owner = owner;
+ ret->res[ret->cnt++] = res;
+ }
}
- }
- spin_unlock(&vnic->res_lock);
+ spin_unlock(&vnic->res_lock);
+ }
ret->type = type;
ret->vnic = vnic;
WARN_ON(ret->cnt != cnt);
@@ -281,14 +283,16 @@ void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
int i;
struct usnic_vnic *vnic = chunk->vnic;
- spin_lock(&vnic->res_lock);
- while ((i = --chunk->cnt) >= 0) {
- res = chunk->res[i];
- chunk->res[i] = NULL;
- res->owner = NULL;
- vnic->chunks[res->type].free_cnt++;
+ if (chunk->cnt > 0) {
+ spin_lock(&vnic->res_lock);
+ while ((i = --chunk->cnt) >= 0) {
+ res = chunk->res[i];
+ chunk->res[i] = NULL;
+ res->owner = NULL;
+ vnic->chunks[res->type].free_cnt++;
+ }
+ spin_unlock(&vnic->res_lock);
}
- spin_unlock(&vnic->res_lock);
kfree(chunk->res);
kfree(chunk);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 3ede10309754..a6f3eab0f350 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -495,7 +495,6 @@ void ipoib_dev_cleanup(struct net_device *dev);
void ipoib_mcast_join_task(struct work_struct *work);
void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
-void ipoib_mcast_free(struct ipoib_mcast *mc);
void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev);
@@ -549,8 +548,9 @@ void ipoib_path_iter_read(struct ipoib_path_iter *iter,
int ipoib_mcast_attach(struct net_device *dev, u16 mlid,
union ib_gid *mgid, int set_qkey);
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast);
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid);
+void ipoib_mcast_remove_list(struct list_head *remove_list);
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+ struct list_head *remove_list);
int ipoib_init_qp(struct net_device *dev);
int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 3ae9726efb98..917e46ea3bf6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -70,7 +70,6 @@ static struct ib_qp_attr ipoib_cm_err_attr = {
#define IPOIB_CM_RX_DRAIN_WRID 0xffffffff
static struct ib_send_wr ipoib_cm_rx_drain_wr = {
- .wr_id = IPOIB_CM_RX_DRAIN_WRID,
.opcode = IB_WR_SEND,
};
@@ -223,6 +222,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
* error" WC will be immediately generated for each WR we post.
*/
p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+ ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
ipoib_warn(priv, "failed to post drain wr\n");
@@ -1522,8 +1522,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
int ipoib_cm_dev_init(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
- int i, ret;
- struct ib_device_attr attr;
+ int max_srq_sge, i;
INIT_LIST_HEAD(&priv->cm.passive_ids);
INIT_LIST_HEAD(&priv->cm.reap_list);
@@ -1540,19 +1539,13 @@ int ipoib_cm_dev_init(struct net_device *dev)
skb_queue_head_init(&priv->cm.skb_queue);
- ret = ib_query_device(priv->ca, &attr);
- if (ret) {
- printk(KERN_WARNING "ib_query_device() failed with %d\n", ret);
- return ret;
- }
-
- ipoib_dbg(priv, "max_srq_sge=%d\n", attr.max_srq_sge);
+ ipoib_dbg(priv, "max_srq_sge=%d\n", priv->ca->attrs.max_srq_sge);
- attr.max_srq_sge = min_t(int, IPOIB_CM_RX_SG, attr.max_srq_sge);
- ipoib_cm_create_srq(dev, attr.max_srq_sge);
+ max_srq_sge = min_t(int, IPOIB_CM_RX_SG, priv->ca->attrs.max_srq_sge);
+ ipoib_cm_create_srq(dev, max_srq_sge);
if (ipoib_cm_has_srq(dev)) {
- priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x10;
- priv->cm.num_frags = attr.max_srq_sge;
+ priv->cm.max_cm_mtu = max_srq_sge * PAGE_SIZE - 0x10;
+ priv->cm.num_frags = max_srq_sge;
ipoib_dbg(priv, "max_cm_mtu = 0x%x, num_frags=%d\n",
priv->cm.max_cm_mtu, priv->cm.num_frags);
} else {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 078cadd6c797..a53fa5fc0dec 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -40,15 +40,11 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
struct ipoib_dev_priv *priv = netdev_priv(netdev);
- struct ib_device_attr *attr;
-
- attr = kmalloc(sizeof(*attr), GFP_KERNEL);
- if (attr && !ib_query_device(priv->ca, attr))
- snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
- "%d.%d.%d", (int)(attr->fw_ver >> 32),
- (int)(attr->fw_ver >> 16) & 0xffff,
- (int)attr->fw_ver & 0xffff);
- kfree(attr);
+
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%d", (int)(priv->ca->attrs.fw_ver >> 32),
+ (int)(priv->ca->attrs.fw_ver >> 16) & 0xffff,
+ (int)priv->ca->attrs.fw_ver & 0xffff);
strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
sizeof(drvinfo->bus_info));
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 7d3281866ffc..25509bbd4a05 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1150,8 +1150,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
unsigned long flags;
int i;
LIST_HEAD(remove_list);
- struct ipoib_mcast *mcast, *tmcast;
- struct net_device *dev = priv->dev;
if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
return;
@@ -1179,18 +1177,8 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
lockdep_is_held(&priv->lock))) != NULL) {
/* was the neigh idle for two GC periods */
if (time_after(neigh_obsolete, neigh->alive)) {
- u8 *mgid = neigh->daddr + 4;
- /* Is this multicast ? */
- if (*mgid == 0xff) {
- mcast = __ipoib_mcast_find(dev, mgid);
-
- if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
- list_del(&mcast->list);
- rb_erase(&mcast->rb_node, &priv->multicast_tree);
- list_add_tail(&mcast->list, &remove_list);
- }
- }
+ ipoib_check_and_add_mcast_sendonly(priv, neigh->daddr + 4, &remove_list);
rcu_assign_pointer(*np,
rcu_dereference_protected(neigh->hnext,
@@ -1207,10 +1195,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
out_unlock:
spin_unlock_irqrestore(&priv->lock, flags);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
}
static void ipoib_reap_neigh(struct work_struct *work)
@@ -1777,26 +1762,7 @@ int ipoib_add_pkey_attr(struct net_device *dev)
int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
{
- struct ib_device_attr *device_attr;
- int result = -ENOMEM;
-
- device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
- if (!device_attr) {
- printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
- hca->name, sizeof *device_attr);
- return result;
- }
-
- result = ib_query_device(hca, device_attr);
- if (result) {
- printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
- hca->name, result);
- kfree(device_attr);
- return result;
- }
- priv->hca_caps = device_attr->device_cap_flags;
-
- kfree(device_attr);
+ priv->hca_caps = hca->attrs.device_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
priv->dev->hw_features = NETIF_F_SG |
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index f357ca67a41c..050dfa175d16 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -106,7 +106,7 @@ static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
queue_delayed_work(priv->wq, &priv->mcast_task, 0);
}
-void ipoib_mcast_free(struct ipoib_mcast *mcast)
+static void ipoib_mcast_free(struct ipoib_mcast *mcast)
{
struct net_device *dev = mcast->dev;
int tx_dropped = 0;
@@ -153,7 +153,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
return mcast;
}
-struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
+static struct ipoib_mcast *__ipoib_mcast_find(struct net_device *dev, void *mgid)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct rb_node *n = priv->multicast_tree.rb_node;
@@ -677,7 +677,7 @@ int ipoib_mcast_stop_thread(struct net_device *dev)
return 0;
}
-int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
+static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret = 0;
@@ -704,6 +704,35 @@ int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
return 0;
}
+/*
+ * Check if the multicast group is sendonly. If so remove it from the maps
+ * and add to the remove list
+ */
+void ipoib_check_and_add_mcast_sendonly(struct ipoib_dev_priv *priv, u8 *mgid,
+ struct list_head *remove_list)
+{
+ /* Is this multicast ? */
+ if (*mgid == 0xff) {
+ struct ipoib_mcast *mcast = __ipoib_mcast_find(priv->dev, mgid);
+
+ if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+ list_del(&mcast->list);
+ rb_erase(&mcast->rb_node, &priv->multicast_tree);
+ list_add_tail(&mcast->list, remove_list);
+ }
+ }
+}
+
+void ipoib_mcast_remove_list(struct list_head *remove_list)
+{
+ struct ipoib_mcast *mcast, *tmcast;
+
+ list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
+ ipoib_mcast_leave(mcast->dev, mcast);
+ ipoib_mcast_free(mcast);
+ }
+}
+
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -810,10 +839,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
}
static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
@@ -939,10 +965,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
wait_for_completion(&mcast->done);
- list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
- ipoib_mcast_leave(mcast->dev, mcast);
- ipoib_mcast_free(mcast);
- }
+ ipoib_mcast_remove_list(&remove_list);
/*
* Double check that we are still up
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 9080161e01af..c827c93f46c5 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -644,7 +644,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
ib_conn = &iser_conn->ib_conn;
if (ib_conn->pi_support) {
- u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
+ u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap;
scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
@@ -656,7 +656,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
* max fastreg page list length.
*/
shost->sg_tablesize = min_t(unsigned short, shost->sg_tablesize,
- ib_conn->device->dev_attr.max_fast_reg_page_list_len);
+ ib_conn->device->ib_device->attrs.max_fast_reg_page_list_len);
shost->max_sectors = min_t(unsigned int,
1024, (shost->sg_tablesize * PAGE_SIZE) >> 9);
@@ -1059,7 +1059,8 @@ static int __init iser_init(void)
release_wq = alloc_workqueue("release workqueue", 0, 0);
if (!release_wq) {
iser_err("failed to allocate release workqueue\n");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_alloc_wq;
}
iscsi_iser_scsi_transport = iscsi_register_transport(
@@ -1067,12 +1068,14 @@ static int __init iser_init(void)
if (!iscsi_iser_scsi_transport) {
iser_err("iscsi_register_transport failed\n");
err = -EINVAL;
- goto register_transport_failure;
+ goto err_reg;
}
return 0;
-register_transport_failure:
+err_reg:
+ destroy_workqueue(release_wq);
+err_alloc_wq:
kmem_cache_destroy(ig.desc_cache);
return err;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 8a5998e6a407..95f0a64e076b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -48,6 +48,7 @@
#include <scsi/scsi_transport_iscsi.h>
#include <scsi/scsi_cmnd.h>
#include <scsi/scsi_device.h>
+#include <scsi/iser.h>
#include <linux/interrupt.h>
#include <linux/wait.h>
@@ -151,46 +152,10 @@
- ISER_MAX_RX_MISC_PDUS) / \
(1 + ISER_INFLIGHT_DATAOUTS))
-#define ISER_WC_BATCH_COUNT 16
#define ISER_SIGNAL_CMD_COUNT 32
-#define ISER_VER 0x10
-#define ISER_WSV 0x08
-#define ISER_RSV 0x04
-
-#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
-#define ISER_BEACON_WRID 0xfffffffffffffffeULL
-
-/**
- * struct iser_hdr - iSER header
- *
- * @flags: flags support (zbva, remote_inv)
- * @rsvd: reserved
- * @write_stag: write rkey
- * @write_va: write virtual address
- * @reaf_stag: read rkey
- * @read_va: read virtual address
- */
-struct iser_hdr {
- u8 flags;
- u8 rsvd[3];
- __be32 write_stag;
- __be64 write_va;
- __be32 read_stag;
- __be64 read_va;
-} __attribute__((packed));
-
-
-#define ISER_ZBVA_NOT_SUPPORTED 0x80
-#define ISER_SEND_W_INV_NOT_SUPPORTED 0x40
-
-struct iser_cm_hdr {
- u8 flags;
- u8 rsvd[3];
-} __packed;
-
/* Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
+#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + sizeof(struct iscsi_hdr))
#define ISER_RECV_DATA_SEG_LEN 128
#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
@@ -269,7 +234,7 @@ enum iser_desc_type {
#define ISER_MAX_WRS 7
/**
- * struct iser_tx_desc - iSER TX descriptor (for send wr_id)
+ * struct iser_tx_desc - iSER TX descriptor
*
* @iser_header: iser header
* @iscsi_header: iscsi header
@@ -287,12 +252,13 @@ enum iser_desc_type {
* @sig_attrs: Signature attributes
*/
struct iser_tx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
enum iser_desc_type type;
u64 dma_addr;
struct ib_sge tx_sg[2];
int num_sge;
+ struct ib_cqe cqe;
bool mapped;
u8 wr_idx;
union iser_wr {
@@ -306,9 +272,10 @@ struct iser_tx_desc {
};
#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
- sizeof(u64) + sizeof(struct ib_sge)))
+ sizeof(u64) + sizeof(struct ib_sge) + \
+ sizeof(struct ib_cqe)))
/**
- * struct iser_rx_desc - iSER RX descriptor (for recv wr_id)
+ * struct iser_rx_desc - iSER RX descriptor
*
* @iser_header: iser header
* @iscsi_header: iscsi header
@@ -318,12 +285,32 @@ struct iser_tx_desc {
* @pad: for sense data TODO: Modify to maximum sense length supported
*/
struct iser_rx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
char data[ISER_RECV_DATA_SEG_LEN];
u64 dma_addr;
struct ib_sge rx_sg;
+ struct ib_cqe cqe;
char pad[ISER_RX_PAD_SIZE];
+} __packed;
+
+/**
+ * struct iser_login_desc - iSER login descriptor
+ *
+ * @req: pointer to login request buffer
+ * @resp: pointer to login response buffer
+ * @req_dma: DMA address of login request buffer
+ * @rsp_dma: DMA address of login response buffer
+ * @sge: IB sge for login post recv
+ * @cqe: completion handler
+ */
+struct iser_login_desc {
+ void *req;
+ void *rsp;
+ u64 req_dma;
+ u64 rsp_dma;
+ struct ib_sge sge;
+ struct ib_cqe cqe;
} __attribute__((packed));
struct iser_conn;
@@ -333,18 +320,12 @@ struct iscsi_iser_task;
/**
* struct iser_comp - iSER completion context
*
- * @device: pointer to device handle
* @cq: completion queue
- * @wcs: work completion array
- * @tasklet: Tasklet handle
* @active_qps: Number of active QPs attached
* to completion context
*/
struct iser_comp {
- struct iser_device *device;
struct ib_cq *cq;
- struct ib_wc wcs[ISER_WC_BATCH_COUNT];
- struct tasklet_struct tasklet;
int active_qps;
};
@@ -380,7 +361,6 @@ struct iser_reg_ops {
*
* @ib_device: RDMA device
* @pd: Protection Domain for this device
- * @dev_attr: Device attributes container
* @mr: Global DMA memory region
* @event_handler: IB events handle routine
* @ig_list: entry in devices list
@@ -389,18 +369,19 @@ struct iser_reg_ops {
* cpus and device max completion vectors
* @comps: Dinamically allocated array of completion handlers
* @reg_ops: Registration ops
+ * @remote_inv_sup: Remote invalidate is supported on this device
*/
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
- struct ib_device_attr dev_attr;
struct ib_mr *mr;
struct ib_event_handler event_handler;
struct list_head ig_list;
int refcount;
int comps_used;
struct iser_comp *comps;
- struct iser_reg_ops *reg_ops;
+ const struct iser_reg_ops *reg_ops;
+ bool remote_inv_sup;
};
#define ISER_CHECK_GUARD 0xc0
@@ -475,10 +456,11 @@ struct iser_fr_pool {
* @rx_wr: receive work request for batch posts
* @device: reference to iser device
* @comp: iser completion context
- * @pi_support: Indicate device T10-PI support
- * @beacon: beacon send wr to signal all flush errors were drained
- * @flush_comp: completes when all connection completions consumed
* @fr_pool: connection fast registration poool
+ * @pi_support: Indicate device T10-PI support
+ * @last: last send wr to signal all flush errors were drained
+ * @last_cqe: cqe handler for last wr
+ * @last_comp: completes when all connection completions consumed
*/
struct ib_conn {
struct rdma_cm_id *cma_id;
@@ -488,10 +470,12 @@ struct ib_conn {
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
struct iser_device *device;
struct iser_comp *comp;
- bool pi_support;
- struct ib_send_wr beacon;
- struct completion flush_comp;
struct iser_fr_pool fr_pool;
+ bool pi_support;
+ struct ib_send_wr last;
+ struct ib_cqe last_cqe;
+ struct ib_cqe reg_cqe;
+ struct completion last_comp;
};
/**
@@ -514,11 +498,7 @@ struct ib_conn {
* @up_completion: connection establishment completed
* (state is ISER_CONN_UP)
* @conn_list: entry in ig conn list
- * @login_buf: login data buffer (stores login parameters)
- * @login_req_buf: login request buffer
- * @login_req_dma: login request buffer dma address
- * @login_resp_buf: login response buffer
- * @login_resp_dma: login response buffer dma address
+ * @login_desc: login descriptor
* @rx_desc_head: head of rx_descs cyclic buffer
* @rx_descs: rx buffers array (cyclic buffer)
* @num_rx_descs: number of rx descriptors
@@ -541,15 +521,13 @@ struct iser_conn {
struct completion ib_completion;
struct completion up_completion;
struct list_head conn_list;
-
- char *login_buf;
- char *login_req_buf, *login_resp_buf;
- u64 login_req_dma, login_resp_dma;
+ struct iser_login_desc login_desc;
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
u32 num_rx_descs;
unsigned short scsi_sg_tablesize;
unsigned int scsi_max_sectors;
+ bool snd_w_inv;
};
/**
@@ -579,9 +557,8 @@ struct iscsi_iser_task {
struct iser_page_vec {
u64 *pages;
- int length;
- int offset;
- int data_size;
+ int npages;
+ struct ib_mr fake_mr;
};
/**
@@ -633,12 +610,14 @@ int iser_conn_terminate(struct iser_conn *iser_conn);
void iser_release_work(struct work_struct *work);
-void iser_rcv_completion(struct iser_rx_desc *desc,
- unsigned long dto_xfer_len,
- struct ib_conn *ib_conn);
-
-void iser_snd_completion(struct iser_tx_desc *desc,
- struct ib_conn *ib_conn);
+void iser_err_comp(struct ib_wc *wc, const char *type);
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc);
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc);
void iser_task_rdma_init(struct iscsi_iser_task *task);
@@ -651,7 +630,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir dir);
+ enum iser_data_dir dir,
+ bool all_imm);
void iser_unreg_rdma_mem(struct iscsi_iser_task *task,
enum iser_data_dir dir);
@@ -719,4 +699,28 @@ iser_tx_next_wr(struct iser_tx_desc *tx_desc)
return cur_wr;
}
+static inline struct iser_conn *
+to_iser_conn(struct ib_conn *ib_conn)
+{
+ return container_of(ib_conn, struct iser_conn, ib_conn);
+}
+
+static inline struct iser_rx_desc *
+iser_rx(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_rx_desc, cqe);
+}
+
+static inline struct iser_tx_desc *
+iser_tx(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_tx_desc, cqe);
+}
+
+static inline struct iser_login_desc *
+iser_login(struct ib_cqe *cqe)
+{
+ return container_of(cqe, struct iser_login_desc, cqe);
+}
+
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index ffd00c420729..ed54b388e7ad 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -51,7 +51,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_mem_reg *mem_reg;
int err;
- struct iser_hdr *hdr = &iser_task->desc.iser_header;
+ struct iser_ctrl *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
err = iser_dma_map_task_data(iser_task,
@@ -72,7 +72,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
return err;
}
- err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
+ err = iser_reg_rdma_mem(iser_task, ISER_DIR_IN, false);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
return err;
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_mem_reg *mem_reg;
int err;
- struct iser_hdr *hdr = &iser_task->desc.iser_header;
+ struct iser_ctrl *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
@@ -126,7 +126,8 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return err;
}
- err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
+ err = iser_reg_rdma_mem(iser_task, ISER_DIR_OUT,
+ buf_out->data_len == imm_sz);
if (err != 0) {
iser_err("Failed to register write cmd RDMA mem\n");
return err;
@@ -166,7 +167,7 @@ static void iser_create_send_desc(struct iser_conn *iser_conn,
ib_dma_sync_single_for_cpu(device->ib_device,
tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
- memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
+ memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
tx_desc->iser_header.flags = ISER_VER;
tx_desc->num_sge = 1;
}
@@ -174,73 +175,63 @@ static void iser_create_send_desc(struct iser_conn *iser_conn,
static void iser_free_login_buf(struct iser_conn *iser_conn)
{
struct iser_device *device = iser_conn->ib_conn.device;
+ struct iser_login_desc *desc = &iser_conn->login_desc;
- if (!iser_conn->login_buf)
+ if (!desc->req)
return;
- if (iser_conn->login_req_dma)
- ib_dma_unmap_single(device->ib_device,
- iser_conn->login_req_dma,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+ ib_dma_unmap_single(device->ib_device, desc->req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
- if (iser_conn->login_resp_dma)
- ib_dma_unmap_single(device->ib_device,
- iser_conn->login_resp_dma,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+ ib_dma_unmap_single(device->ib_device, desc->rsp_dma,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
- kfree(iser_conn->login_buf);
+ kfree(desc->req);
+ kfree(desc->rsp);
/* make sure we never redo any unmapping */
- iser_conn->login_req_dma = 0;
- iser_conn->login_resp_dma = 0;
- iser_conn->login_buf = NULL;
+ desc->req = NULL;
+ desc->rsp = NULL;
}
static int iser_alloc_login_buf(struct iser_conn *iser_conn)
{
struct iser_device *device = iser_conn->ib_conn.device;
- int req_err, resp_err;
-
- BUG_ON(device == NULL);
-
- iser_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
- ISER_RX_LOGIN_SIZE, GFP_KERNEL);
- if (!iser_conn->login_buf)
- goto out_err;
-
- iser_conn->login_req_buf = iser_conn->login_buf;
- iser_conn->login_resp_buf = iser_conn->login_buf +
- ISCSI_DEF_MAX_RECV_SEG_LEN;
-
- iser_conn->login_req_dma = ib_dma_map_single(device->ib_device,
- iser_conn->login_req_buf,
- ISCSI_DEF_MAX_RECV_SEG_LEN,
- DMA_TO_DEVICE);
-
- iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device,
- iser_conn->login_resp_buf,
- ISER_RX_LOGIN_SIZE,
- DMA_FROM_DEVICE);
-
- req_err = ib_dma_mapping_error(device->ib_device,
- iser_conn->login_req_dma);
- resp_err = ib_dma_mapping_error(device->ib_device,
- iser_conn->login_resp_dma);
-
- if (req_err || resp_err) {
- if (req_err)
- iser_conn->login_req_dma = 0;
- if (resp_err)
- iser_conn->login_resp_dma = 0;
- goto free_login_buf;
- }
+ struct iser_login_desc *desc = &iser_conn->login_desc;
+
+ desc->req = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
+ if (!desc->req)
+ return -ENOMEM;
+
+ desc->req_dma = ib_dma_map_single(device->ib_device, desc->req,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(device->ib_device,
+ desc->req_dma))
+ goto free_req;
+
+ desc->rsp = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+ if (!desc->rsp)
+ goto unmap_req;
+
+ desc->rsp_dma = ib_dma_map_single(device->ib_device, desc->rsp,
+ ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(device->ib_device,
+ desc->rsp_dma))
+ goto free_rsp;
+
return 0;
-free_login_buf:
- iser_free_login_buf(iser_conn);
+free_rsp:
+ kfree(desc->rsp);
+unmap_req:
+ ib_dma_unmap_single(device->ib_device, desc->req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN,
+ DMA_TO_DEVICE);
+free_req:
+ kfree(desc->req);
-out_err:
- iser_err("unable to alloc or map login buf\n");
return -ENOMEM;
}
@@ -280,11 +271,11 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
goto rx_desc_dma_map_failed;
rx_desc->dma_addr = dma_addr;
-
+ rx_desc->cqe.done = iser_task_rsp;
rx_sg = &rx_desc->rx_sg;
- rx_sg->addr = rx_desc->dma_addr;
+ rx_sg->addr = rx_desc->dma_addr;
rx_sg->length = ISER_RX_PAYLOAD_SIZE;
- rx_sg->lkey = device->pd->local_dma_lkey;
+ rx_sg->lkey = device->pd->local_dma_lkey;
}
iser_conn->rx_desc_head = 0;
@@ -383,6 +374,7 @@ int iser_send_command(struct iscsi_conn *conn,
/* build the tx desc regd header and add it to the tx desc dto */
tx_desc->type = ISCSI_TX_SCSI_COMMAND;
+ tx_desc->cqe.done = iser_cmd_comp;
iser_create_send_desc(iser_conn, tx_desc);
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -464,6 +456,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
}
tx_desc->type = ISCSI_TX_DATAOUT;
+ tx_desc->cqe.done = iser_dataout_comp;
tx_desc->iser_header.flags = ISER_VER;
memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
@@ -513,6 +506,7 @@ int iser_send_control(struct iscsi_conn *conn,
/* build the tx desc regd header and add it to the tx desc dto */
mdesc->type = ISCSI_TX_CONTROL;
+ mdesc->cqe.done = iser_ctrl_comp;
iser_create_send_desc(iser_conn, mdesc);
device = iser_conn->ib_conn.device;
@@ -520,25 +514,25 @@ int iser_send_control(struct iscsi_conn *conn,
data_seg_len = ntoh24(task->hdr->dlength);
if (data_seg_len > 0) {
+ struct iser_login_desc *desc = &iser_conn->login_desc;
struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
+
if (task != conn->login_task) {
iser_err("data present on non login task!!!\n");
goto send_control_error;
}
- ib_dma_sync_single_for_cpu(device->ib_device,
- iser_conn->login_req_dma, task->data_count,
- DMA_TO_DEVICE);
+ ib_dma_sync_single_for_cpu(device->ib_device, desc->req_dma,
+ task->data_count, DMA_TO_DEVICE);
- memcpy(iser_conn->login_req_buf, task->data, task->data_count);
+ memcpy(desc->req, task->data, task->data_count);
- ib_dma_sync_single_for_device(device->ib_device,
- iser_conn->login_req_dma, task->data_count,
- DMA_TO_DEVICE);
+ ib_dma_sync_single_for_device(device->ib_device, desc->req_dma,
+ task->data_count, DMA_TO_DEVICE);
- tx_dsg->addr = iser_conn->login_req_dma;
- tx_dsg->length = task->data_count;
- tx_dsg->lkey = device->pd->local_dma_lkey;
+ tx_dsg->addr = desc->req_dma;
+ tx_dsg->length = task->data_count;
+ tx_dsg->lkey = device->pd->local_dma_lkey;
mdesc->num_sge = 2;
}
@@ -562,41 +556,126 @@ send_control_error:
return err;
}
-/**
- * iser_rcv_dto_completion - recv DTO completion
- */
-void iser_rcv_completion(struct iser_rx_desc *rx_desc,
- unsigned long rx_xfer_len,
- struct ib_conn *ib_conn)
+void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc)
{
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+ struct iser_login_desc *desc = iser_login(wc->wr_cqe);
struct iscsi_hdr *hdr;
- u64 rx_dma;
- int rx_buflen, outstanding, count, err;
+ char *data;
+ int length;
- /* differentiate between login to all other PDUs */
- if ((char *)rx_desc == iser_conn->login_resp_buf) {
- rx_dma = iser_conn->login_resp_dma;
- rx_buflen = ISER_RX_LOGIN_SIZE;
- } else {
- rx_dma = rx_desc->dma_addr;
- rx_buflen = ISER_RX_PAYLOAD_SIZE;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "login_rsp");
+ return;
+ }
+
+ ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+ desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+
+ hdr = desc->rsp + sizeof(struct iser_ctrl);
+ data = desc->rsp + ISER_HEADERS_LEN;
+ length = wc->byte_len - ISER_HEADERS_LEN;
+
+ iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
+ hdr->itt, length);
+
+ iscsi_iser_recv(iser_conn->iscsi_conn, hdr, data, length);
+
+ ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+ desc->rsp_dma, ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+
+ ib_conn->post_recv_buf_count--;
+}
+
+static inline void
+iser_inv_desc(struct iser_fr_desc *desc, u32 rkey)
+{
+ if (likely(rkey == desc->rsc.mr->rkey))
+ desc->rsc.mr_valid = 0;
+ else if (likely(rkey == desc->pi_ctx->sig_mr->rkey))
+ desc->pi_ctx->sig_mr_valid = 0;
+}
+
+static int
+iser_check_remote_inv(struct iser_conn *iser_conn,
+ struct ib_wc *wc,
+ struct iscsi_hdr *hdr)
+{
+ if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
+ struct iscsi_task *task;
+ u32 rkey = wc->ex.invalidate_rkey;
+
+ iser_dbg("conn %p: remote invalidation for rkey %#x\n",
+ iser_conn, rkey);
+
+ if (unlikely(!iser_conn->snd_w_inv)) {
+ iser_err("conn %p: unexepected remote invalidation, "
+ "terminating connection\n", iser_conn);
+ return -EPROTO;
+ }
+
+ task = iscsi_itt_to_ctask(iser_conn->iscsi_conn, hdr->itt);
+ if (likely(task)) {
+ struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_fr_desc *desc;
+
+ if (iser_task->dir[ISER_DIR_IN]) {
+ desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h;
+ iser_inv_desc(desc, rkey);
+ }
+
+ if (iser_task->dir[ISER_DIR_OUT]) {
+ desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h;
+ iser_inv_desc(desc, rkey);
+ }
+ } else {
+ iser_err("failed to get task for itt=%d\n", hdr->itt);
+ return -EINVAL;
+ }
}
- ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ return 0;
+}
- hdr = &rx_desc->iscsi_header;
+
+void iser_task_rsp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
+ struct iser_rx_desc *desc = iser_rx(wc->wr_cqe);
+ struct iscsi_hdr *hdr;
+ int length;
+ int outstanding, count, err;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "task_rsp");
+ return;
+ }
+
+ ib_dma_sync_single_for_cpu(ib_conn->device->ib_device,
+ desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+ DMA_FROM_DEVICE);
+
+ hdr = &desc->iscsi_header;
+ length = wc->byte_len - ISER_HEADERS_LEN;
iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
- hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
+ hdr->itt, length);
+
+ if (iser_check_remote_inv(iser_conn, wc, hdr)) {
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
+ return;
+ }
- iscsi_iser_recv(iser_conn->iscsi_conn, hdr, rx_desc->data,
- rx_xfer_len - ISER_HEADERS_LEN);
+ iscsi_iser_recv(iser_conn->iscsi_conn, hdr, desc->data, length);
- ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
- rx_buflen, DMA_FROM_DEVICE);
+ ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+ desc->dma_addr, ISER_RX_PAYLOAD_SIZE,
+ DMA_FROM_DEVICE);
/* decrementing conn->post_recv_buf_count only --after-- freeing the *
* task eliminates the need to worry on tasks which are completed in *
@@ -604,9 +683,6 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
* for the posted rx bufs refcount to become zero handles everything */
ib_conn->post_recv_buf_count--;
- if (rx_dma == iser_conn->login_resp_dma)
- return;
-
outstanding = ib_conn->post_recv_buf_count;
if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) {
count = min(iser_conn->qp_max_recv_dtos - outstanding,
@@ -617,26 +693,47 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
}
}
-void iser_snd_completion(struct iser_tx_desc *tx_desc,
- struct ib_conn *ib_conn)
+void iser_cmd_comp(struct ib_cq *cq, struct ib_wc *wc)
{
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ iser_err_comp(wc, "command");
+}
+
+void iser_ctrl_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
struct iscsi_task *task;
- struct iser_device *device = ib_conn->device;
- if (tx_desc->type == ISCSI_TX_DATAOUT) {
- ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
- ISER_HEADERS_LEN, DMA_TO_DEVICE);
- kmem_cache_free(ig.desc_cache, tx_desc);
- tx_desc = NULL;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ iser_err_comp(wc, "control");
+ return;
}
- if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) {
- /* this arithmetic is legal by libiscsi dd_data allocation */
- task = (void *) ((long)(void *)tx_desc -
- sizeof(struct iscsi_task));
- if (task->hdr->itt == RESERVED_ITT)
- iscsi_put_task(task);
- }
+ /* this arithmetic is legal by libiscsi dd_data allocation */
+ task = (void *)desc - sizeof(struct iscsi_task);
+ if (task->hdr->itt == RESERVED_ITT)
+ iscsi_put_task(task);
+}
+
+void iser_dataout_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct iser_tx_desc *desc = iser_tx(wc->wr_cqe);
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+ struct iser_device *device = ib_conn->device;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ iser_err_comp(wc, "dataout");
+
+ ib_dma_unmap_single(device->ib_device, desc->dma_addr,
+ ISER_HEADERS_LEN, DMA_TO_DEVICE);
+ kmem_cache_free(ig.desc_cache, desc);
+}
+
+void iser_last_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_conn *ib_conn = wc->qp->qp_context;
+
+ complete(&ib_conn->last_comp);
}
void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index ea765fb9664d..9a391cc5b9b3 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -49,7 +49,7 @@ int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_reg_resources *rsc,
struct iser_mem_reg *mem_reg);
-static struct iser_reg_ops fastreg_ops = {
+static const struct iser_reg_ops fastreg_ops = {
.alloc_reg_res = iser_alloc_fastreg_pool,
.free_reg_res = iser_free_fastreg_pool,
.reg_mem = iser_fast_reg_mr,
@@ -58,7 +58,7 @@ static struct iser_reg_ops fastreg_ops = {
.reg_desc_put = iser_reg_desc_put_fr,
};
-static struct iser_reg_ops fmr_ops = {
+static const struct iser_reg_ops fmr_ops = {
.alloc_reg_res = iser_alloc_fmr_pool,
.free_reg_res = iser_free_fmr_pool,
.reg_mem = iser_fast_reg_fmr,
@@ -67,19 +67,24 @@ static struct iser_reg_ops fmr_ops = {
.reg_desc_put = iser_reg_desc_put_fmr,
};
+void iser_reg_comp(struct ib_cq *cq, struct ib_wc *wc)
+{
+ iser_err_comp(wc, "memreg");
+}
+
int iser_assign_reg_ops(struct iser_device *device)
{
- struct ib_device_attr *dev_attr = &device->dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
/* Assign function handles - based on FMR support */
- if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
- device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+ if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr &&
+ ib_dev->map_phys_fmr && ib_dev->unmap_fmr) {
iser_info("FMR supported, using FMR for registration\n");
device->reg_ops = &fmr_ops;
- } else
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ } else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
iser_info("FastReg supported, using FastReg for registration\n");
device->reg_ops = &fastreg_ops;
+ device->remote_inv_sup = iser_always_reg;
} else {
iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
return -1;
@@ -131,67 +136,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
{
}
-#define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & ~MASK_4K) == 0)
-
-/**
- * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses
- * and returns the length of resulting physical address array (may be less than
- * the original due to possible compaction).
- *
- * we build a "page vec" under the assumption that the SG meets the RDMA
- * alignment requirements. Other then the first and last SG elements, all
- * the "internal" elements can be compacted into a list whose elements are
- * dma addresses of physical pages. The code supports also the weird case
- * where --few fragments of the same page-- are present in the SG as
- * consecutive elements. Also, it handles one entry SG.
- */
-
-static int iser_sg_to_page_vec(struct iser_data_buf *data,
- struct ib_device *ibdev, u64 *pages,
- int *offset, int *data_size)
-{
- struct scatterlist *sg, *sgl = data->sg;
- u64 start_addr, end_addr, page, chunk_start = 0;
- unsigned long total_sz = 0;
- unsigned int dma_len;
- int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
-
- /* compute the offset of first element */
- *offset = (u64) sgl[0].offset & ~MASK_4K;
-
- new_chunk = 1;
- cur_page = 0;
- for_each_sg(sgl, sg, data->dma_nents, i) {
- start_addr = ib_sg_dma_address(ibdev, sg);
- if (new_chunk)
- chunk_start = start_addr;
- dma_len = ib_sg_dma_len(ibdev, sg);
- end_addr = start_addr + dma_len;
- total_sz += dma_len;
-
- /* collect page fragments until aligned or end of SG list */
- if (!IS_4K_ALIGNED(end_addr) && i < last_ent) {
- new_chunk = 0;
- continue;
- }
- new_chunk = 1;
-
- /* address of the first page in the contiguous chunk;
- masking relevant for the very first SG entry,
- which might be unaligned */
- page = chunk_start & MASK_4K;
- do {
- pages[cur_page++] = page;
- page += SIZE_4K;
- } while (page < end_addr);
- }
-
- *data_size = total_sz;
- iser_dbg("page_vec->data_size:%d cur_page %d\n",
- *data_size, cur_page);
- return cur_page;
-}
-
static void iser_data_buf_dump(struct iser_data_buf *data,
struct ib_device *ibdev)
{
@@ -210,10 +154,10 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
{
int i;
- iser_err("page vec length %d data size %d\n",
- page_vec->length, page_vec->data_size);
- for (i = 0; i < page_vec->length; i++)
- iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
+ iser_err("page vec npages %d data length %d\n",
+ page_vec->npages, page_vec->fake_mr.length);
+ for (i = 0; i < page_vec->npages; i++)
+ iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);
}
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
@@ -251,7 +195,11 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
struct scatterlist *sg = mem->sg;
reg->sge.lkey = device->pd->local_dma_lkey;
- reg->rkey = device->mr->rkey;
+ /*
+ * FIXME: rework the registration code path to differentiate
+ * rkey/lkey use cases
+ */
+ reg->rkey = device->mr ? device->mr->rkey : 0;
reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
@@ -262,11 +210,16 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
return 0;
}
-/**
- * iser_reg_page_vec - Register physical memory
- *
- * returns: 0 on success, errno code on failure
- */
+static int iser_set_page(struct ib_mr *mr, u64 addr)
+{
+ struct iser_page_vec *page_vec =
+ container_of(mr, struct iser_page_vec, fake_mr);
+
+ page_vec->pages[page_vec->npages++] = addr;
+
+ return 0;
+}
+
static
int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
@@ -280,22 +233,19 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
struct ib_pool_fmr *fmr;
int ret, plen;
- plen = iser_sg_to_page_vec(mem, device->ib_device,
- page_vec->pages,
- &page_vec->offset,
- &page_vec->data_size);
- page_vec->length = plen;
- if (plen * SIZE_4K < page_vec->data_size) {
+ page_vec->npages = 0;
+ page_vec->fake_mr.page_size = SIZE_4K;
+ plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg,
+ mem->size, iser_set_page);
+ if (unlikely(plen < mem->size)) {
iser_err("page vec too short to hold this SG\n");
iser_data_buf_dump(mem, device->ib_device);
iser_dump_page_vec(page_vec);
return -EINVAL;
}
- fmr = ib_fmr_pool_map_phys(fmr_pool,
- page_vec->pages,
- page_vec->length,
- page_vec->pages[0]);
+ fmr = ib_fmr_pool_map_phys(fmr_pool, page_vec->pages,
+ page_vec->npages, page_vec->pages[0]);
if (IS_ERR(fmr)) {
ret = PTR_ERR(fmr);
iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
@@ -304,8 +254,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task,
reg->sge.lkey = fmr->fmr->lkey;
reg->rkey = fmr->fmr->rkey;
- reg->sge.addr = page_vec->pages[0] + page_vec->offset;
- reg->sge.length = page_vec->data_size;
+ reg->sge.addr = page_vec->fake_mr.iova;
+ reg->sge.length = page_vec->fake_mr.length;
reg->mem_h = fmr;
iser_dbg("fmr reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
@@ -413,19 +363,16 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
*mask |= ISER_CHECK_GUARD;
}
-static void
-iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+static inline void
+iser_inv_rkey(struct ib_send_wr *inv_wr,
+ struct ib_mr *mr,
+ struct ib_cqe *cqe)
{
- u32 rkey;
-
inv_wr->opcode = IB_WR_LOCAL_INV;
- inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+ inv_wr->wr_cqe = cqe;
inv_wr->ex.invalidate_rkey = mr->rkey;
inv_wr->send_flags = 0;
inv_wr->num_sge = 0;
-
- rkey = ib_inc_rkey(mr->rkey);
- ib_update_fast_reg_key(mr, rkey);
}
static int
@@ -437,7 +384,9 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
+ struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_sig_handover_wr *wr;
+ struct ib_mr *mr = pi_ctx->sig_mr;
int ret;
memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -447,17 +396,19 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
- if (!pi_ctx->sig_mr_valid)
- iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr);
+ if (pi_ctx->sig_mr_valid)
+ iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+ ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
wr->wr.opcode = IB_WR_REG_SIG_MR;
- wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+ wr->wr.wr_cqe = cqe;
wr->wr.sg_list = &data_reg->sge;
wr->wr.num_sge = 1;
wr->wr.send_flags = 0;
wr->sig_attrs = sig_attrs;
- wr->sig_mr = pi_ctx->sig_mr;
+ wr->sig_mr = mr;
if (scsi_prot_sg_count(iser_task->sc))
wr->prot = &prot_reg->sge;
else
@@ -465,10 +416,10 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
wr->access_flags = IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE;
- pi_ctx->sig_mr_valid = 0;
+ pi_ctx->sig_mr_valid = 1;
- sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
- sig_reg->rkey = pi_ctx->sig_mr->rkey;
+ sig_reg->sge.lkey = mr->lkey;
+ sig_reg->rkey = mr->rkey;
sig_reg->sge.addr = 0;
sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
@@ -485,12 +436,15 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
struct iser_mem_reg *reg)
{
struct iser_tx_desc *tx_desc = &iser_task->desc;
+ struct ib_cqe *cqe = &iser_task->iser_conn->ib_conn.reg_cqe;
struct ib_mr *mr = rsc->mr;
struct ib_reg_wr *wr;
int n;
- if (!rsc->mr_valid)
- iser_inv_rkey(iser_tx_next_wr(tx_desc), mr);
+ if (rsc->mr_valid)
+ iser_inv_rkey(iser_tx_next_wr(tx_desc), mr, cqe);
+
+ ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
if (unlikely(n != mem->size)) {
@@ -501,7 +455,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
wr = reg_wr(iser_tx_next_wr(tx_desc));
wr->wr.opcode = IB_WR_REG_MR;
- wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+ wr->wr.wr_cqe = cqe;
wr->wr.send_flags = 0;
wr->wr.num_sge = 0;
wr->mr = mr;
@@ -510,7 +464,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ;
- rsc->mr_valid = 0;
+ rsc->mr_valid = 1;
reg->sge.lkey = mr->lkey;
reg->rkey = mr->rkey;
@@ -554,7 +508,8 @@ iser_reg_data_sg(struct iscsi_iser_task *task,
}
int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir dir)
+ enum iser_data_dir dir,
+ bool all_imm)
{
struct ib_conn *ib_conn = &task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
@@ -565,8 +520,8 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
bool use_dma_key;
int err;
- use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
- scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
+ use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) &&
+ scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL;
if (!use_dma_key) {
desc = device->reg_ops->reg_desc_get(ib_conn);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 42f4da620f2e..40c0f4978e2f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -44,17 +44,6 @@
#define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
ISCSI_ISER_MAX_CONN)
-static int iser_cq_poll_limit = 512;
-
-static void iser_cq_tasklet_fn(unsigned long data);
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
-
-static void iser_cq_event_callback(struct ib_event *cause, void *context)
-{
- iser_err("cq event %s (%d)\n",
- ib_event_msg(cause->event), cause->event);
-}
-
static void iser_qp_event_callback(struct ib_event *cause, void *context)
{
iser_err("qp event %s (%d)\n",
@@ -78,59 +67,40 @@ static void iser_event_handler(struct ib_event_handler *handler,
*/
static int iser_create_device_ib_res(struct iser_device *device)
{
- struct ib_device_attr *dev_attr = &device->dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
int ret, i, max_cqe;
- ret = ib_query_device(device->ib_device, dev_attr);
- if (ret) {
- pr_warn("Query device failed for %s\n", device->ib_device->name);
- return ret;
- }
-
ret = iser_assign_reg_ops(device);
if (ret)
return ret;
device->comps_used = min_t(int, num_online_cpus(),
- device->ib_device->num_comp_vectors);
+ ib_dev->num_comp_vectors);
device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
GFP_KERNEL);
if (!device->comps)
goto comps_err;
- max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+ max_cqe = min(ISER_MAX_CQ_LEN, ib_dev->attrs.max_cqe);
iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
- device->comps_used, device->ib_device->name,
- device->ib_device->num_comp_vectors, max_cqe);
+ device->comps_used, ib_dev->name,
+ ib_dev->num_comp_vectors, max_cqe);
- device->pd = ib_alloc_pd(device->ib_device);
+ device->pd = ib_alloc_pd(ib_dev);
if (IS_ERR(device->pd))
goto pd_err;
for (i = 0; i < device->comps_used; i++) {
- struct ib_cq_init_attr cq_attr = {};
struct iser_comp *comp = &device->comps[i];
- comp->device = device;
- cq_attr.cqe = max_cqe;
- cq_attr.comp_vector = i;
- comp->cq = ib_create_cq(device->ib_device,
- iser_cq_callback,
- iser_cq_event_callback,
- (void *)comp,
- &cq_attr);
+ comp->cq = ib_alloc_cq(ib_dev, comp, max_cqe, i,
+ IB_POLL_SOFTIRQ);
if (IS_ERR(comp->cq)) {
comp->cq = NULL;
goto cq_err;
}
-
- if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
- goto cq_err;
-
- tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
- (unsigned long)comp);
}
if (!iser_always_reg) {
@@ -140,11 +110,11 @@ static int iser_create_device_ib_res(struct iser_device *device)
device->mr = ib_get_dma_mr(device->pd, access);
if (IS_ERR(device->mr))
- goto dma_mr_err;
+ goto cq_err;
}
- INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
- iser_event_handler);
+ INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev,
+ iser_event_handler);
if (ib_register_event_handler(&device->event_handler))
goto handler_err;
@@ -153,15 +123,12 @@ static int iser_create_device_ib_res(struct iser_device *device)
handler_err:
if (device->mr)
ib_dereg_mr(device->mr);
-dma_mr_err:
- for (i = 0; i < device->comps_used; i++)
- tasklet_kill(&device->comps[i].tasklet);
cq_err:
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
if (comp->cq)
- ib_destroy_cq(comp->cq);
+ ib_free_cq(comp->cq);
}
ib_dealloc_pd(device->pd);
pd_err:
@@ -182,8 +149,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
for (i = 0; i < device->comps_used; i++) {
struct iser_comp *comp = &device->comps[i];
- tasklet_kill(&comp->tasklet);
- ib_destroy_cq(comp->cq);
+ ib_free_cq(comp->cq);
comp->cq = NULL;
}
@@ -299,7 +265,7 @@ iser_alloc_reg_res(struct ib_device *ib_device,
iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
return ret;
}
- res->mr_valid = 1;
+ res->mr_valid = 0;
return 0;
}
@@ -336,7 +302,7 @@ iser_alloc_pi_ctx(struct ib_device *ib_device,
ret = PTR_ERR(pi_ctx->sig_mr);
goto sig_mr_failure;
}
- pi_ctx->sig_mr_valid = 1;
+ pi_ctx->sig_mr_valid = 0;
desc->pi_ctx->sig_protected = 0;
return 0;
@@ -461,10 +427,9 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
*/
static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
{
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
+ struct iser_conn *iser_conn = to_iser_conn(ib_conn);
struct iser_device *device;
- struct ib_device_attr *dev_attr;
+ struct ib_device *ib_dev;
struct ib_qp_init_attr init_attr;
int ret = -ENOMEM;
int index, min_index = 0;
@@ -472,7 +437,7 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
BUG_ON(ib_conn->device == NULL);
device = ib_conn->device;
- dev_attr = &device->dev_attr;
+ ib_dev = device->ib_device;
memset(&init_attr, 0, sizeof init_attr);
@@ -503,16 +468,16 @@ static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
iser_conn->max_cmds =
ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
} else {
- if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+ if (ib_dev->attrs.max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
iser_conn->max_cmds =
ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
} else {
- init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+ init_attr.cap.max_send_wr = ib_dev->attrs.max_qp_wr;
iser_conn->max_cmds =
- ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+ ISER_GET_MAX_XMIT_CMDS(ib_dev->attrs.max_qp_wr);
iser_dbg("device %s supports max_send_wr %d\n",
- device->ib_device->name, dev_attr->max_qp_wr);
+ device->ib_device->name, ib_dev->attrs.max_qp_wr);
}
}
@@ -724,13 +689,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
iser_conn, err);
/* post an indication that all flush errors were consumed */
- err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
+ err = ib_post_send(ib_conn->qp, &ib_conn->last, &bad_wr);
if (err) {
- iser_err("conn %p failed to post beacon", ib_conn);
+ iser_err("conn %p failed to post last wr", ib_conn);
return 1;
}
- wait_for_completion(&ib_conn->flush_comp);
+ wait_for_completion(&ib_conn->last_comp);
}
return 1;
@@ -756,7 +721,7 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
sup_sg_tablesize = min_t(unsigned, ISCSI_ISER_MAX_SG_TABLESIZE,
- device->dev_attr.max_fast_reg_page_list_len);
+ device->ib_device->attrs.max_fast_reg_page_list_len);
if (sg_tablesize > sup_sg_tablesize) {
sg_tablesize = sup_sg_tablesize;
@@ -799,7 +764,7 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
/* connection T10-PI support */
if (iser_pi_enable) {
- if (!(device->dev_attr.device_cap_flags &
+ if (!(device->ib_device->attrs.device_cap_flags &
IB_DEVICE_SIGNATURE_HANDOVER)) {
iser_warn("T10-PI requested but not supported on %s, "
"continue without T10-PI\n",
@@ -841,16 +806,17 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
goto failure;
memset(&conn_param, 0, sizeof conn_param);
- conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
+ conn_param.responder_resources = device->ib_device->attrs.max_qp_rd_atom;
conn_param.initiator_depth = 1;
conn_param.retry_count = 7;
conn_param.rnr_retry_count = 6;
memset(&req_hdr, 0, sizeof(req_hdr));
- req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
- ISER_SEND_W_INV_NOT_SUPPORTED);
- conn_param.private_data = (void *)&req_hdr;
- conn_param.private_data_len = sizeof(struct iser_cm_hdr);
+ req_hdr.flags = ISER_ZBVA_NOT_SUP;
+ if (!device->remote_inv_sup)
+ req_hdr.flags |= ISER_SEND_W_INV_NOT_SUP;
+ conn_param.private_data = (void *)&req_hdr;
+ conn_param.private_data_len = sizeof(struct iser_cm_hdr);
ret = rdma_connect(cma_id, &conn_param);
if (ret) {
@@ -863,7 +829,8 @@ failure:
iser_connect_error(cma_id);
}
-static void iser_connected_handler(struct rdma_cm_id *cma_id)
+static void iser_connected_handler(struct rdma_cm_id *cma_id,
+ const void *private_data)
{
struct iser_conn *iser_conn;
struct ib_qp_attr attr;
@@ -877,6 +844,15 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
(void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
+ if (private_data) {
+ u8 flags = *(u8 *)private_data;
+
+ iser_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP);
+ }
+
+ iser_info("conn %p: negotiated %s invalidation\n",
+ iser_conn, iser_conn->snd_w_inv ? "remote" : "local");
+
iser_conn->state = ISER_CONN_UP;
complete(&iser_conn->up_completion);
}
@@ -928,7 +904,7 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
iser_route_handler(cma_id);
break;
case RDMA_CM_EVENT_ESTABLISHED:
- iser_connected_handler(cma_id);
+ iser_connected_handler(cma_id, event->param.conn.private_data);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
@@ -967,14 +943,21 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
void iser_conn_init(struct iser_conn *iser_conn)
{
+ struct ib_conn *ib_conn = &iser_conn->ib_conn;
+
iser_conn->state = ISER_CONN_INIT;
- iser_conn->ib_conn.post_recv_buf_count = 0;
- init_completion(&iser_conn->ib_conn.flush_comp);
init_completion(&iser_conn->stop_completion);
init_completion(&iser_conn->ib_completion);
init_completion(&iser_conn->up_completion);
INIT_LIST_HEAD(&iser_conn->conn_list);
mutex_init(&iser_conn->state_mutex);
+
+ ib_conn->post_recv_buf_count = 0;
+ ib_conn->reg_cqe.done = iser_reg_comp;
+ ib_conn->last_cqe.done = iser_last_comp;
+ ib_conn->last.wr_cqe = &ib_conn->last_cqe;
+ ib_conn->last.opcode = IB_WR_SEND;
+ init_completion(&ib_conn->last_comp);
}
/**
@@ -1000,9 +983,6 @@ int iser_connect(struct iser_conn *iser_conn,
iser_conn->state = ISER_CONN_PENDING;
- ib_conn->beacon.wr_id = ISER_BEACON_WRID;
- ib_conn->beacon.opcode = IB_WR_SEND;
-
ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
(void *)iser_conn,
RDMA_PS_TCP, IB_QPT_RC);
@@ -1045,56 +1025,60 @@ connect_failure:
int iser_post_recvl(struct iser_conn *iser_conn)
{
- struct ib_recv_wr rx_wr, *rx_wr_failed;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
- struct ib_sge sge;
+ struct iser_login_desc *desc = &iser_conn->login_desc;
+ struct ib_recv_wr wr, *wr_failed;
int ib_ret;
- sge.addr = iser_conn->login_resp_dma;
- sge.length = ISER_RX_LOGIN_SIZE;
- sge.lkey = ib_conn->device->pd->local_dma_lkey;
+ desc->sge.addr = desc->rsp_dma;
+ desc->sge.length = ISER_RX_LOGIN_SIZE;
+ desc->sge.lkey = ib_conn->device->pd->local_dma_lkey;
- rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
- rx_wr.sg_list = &sge;
- rx_wr.num_sge = 1;
- rx_wr.next = NULL;
+ desc->cqe.done = iser_login_rsp;
+ wr.wr_cqe = &desc->cqe;
+ wr.sg_list = &desc->sge;
+ wr.num_sge = 1;
+ wr.next = NULL;
ib_conn->post_recv_buf_count++;
- ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
+ ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed);
if (ib_ret) {
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
ib_conn->post_recv_buf_count--;
}
+
return ib_ret;
}
int iser_post_recvm(struct iser_conn *iser_conn, int count)
{
- struct ib_recv_wr *rx_wr, *rx_wr_failed;
- int i, ib_ret;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
unsigned int my_rx_head = iser_conn->rx_desc_head;
struct iser_rx_desc *rx_desc;
+ struct ib_recv_wr *wr, *wr_failed;
+ int i, ib_ret;
- for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
- rx_desc = &iser_conn->rx_descs[my_rx_head];
- rx_wr->wr_id = (uintptr_t)rx_desc;
- rx_wr->sg_list = &rx_desc->rx_sg;
- rx_wr->num_sge = 1;
- rx_wr->next = rx_wr + 1;
+ for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
+ rx_desc = &iser_conn->rx_descs[my_rx_head];
+ rx_desc->cqe.done = iser_task_rsp;
+ wr->wr_cqe = &rx_desc->cqe;
+ wr->sg_list = &rx_desc->rx_sg;
+ wr->num_sge = 1;
+ wr->next = wr + 1;
my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
}
- rx_wr--;
- rx_wr->next = NULL; /* mark end of work requests list */
+ wr--;
+ wr->next = NULL; /* mark end of work requests list */
ib_conn->post_recv_buf_count += count;
- ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
+ ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed);
if (ib_ret) {
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
ib_conn->post_recv_buf_count -= count;
} else
iser_conn->rx_desc_head = my_rx_head;
+
return ib_ret;
}
@@ -1115,7 +1099,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
DMA_TO_DEVICE);
wr->next = NULL;
- wr->wr_id = (uintptr_t)tx_desc;
+ wr->wr_cqe = &tx_desc->cqe;
wr->sg_list = tx_desc->tx_sg;
wr->num_sge = tx_desc->num_sge;
wr->opcode = IB_WR_SEND;
@@ -1129,149 +1113,6 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
return ib_ret;
}
-/**
- * is_iser_tx_desc - Indicate if the completion wr_id
- * is a TX descriptor or not.
- * @iser_conn: iser connection
- * @wr_id: completion WR identifier
- *
- * Since we cannot rely on wc opcode in FLUSH errors
- * we must work around it by checking if the wr_id address
- * falls in the iser connection rx_descs buffer. If so
- * it is an RX descriptor, otherwize it is a TX.
- */
-static inline bool
-is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
-{
- void *start = iser_conn->rx_descs;
- int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
-
- if (wr_id >= start && wr_id < start + len)
- return false;
-
- return true;
-}
-
-/**
- * iser_handle_comp_error() - Handle error completion
- * @ib_conn: connection RDMA resources
- * @wc: work completion
- *
- * Notes: We may handle a FLUSH error completion and in this case
- * we only cleanup in case TX type was DATAOUT. For non-FLUSH
- * error completion we should also notify iscsi layer that
- * connection is failed (in case we passed bind stage).
- */
-static void
-iser_handle_comp_error(struct ib_conn *ib_conn,
- struct ib_wc *wc)
-{
- void *wr_id = (void *)(uintptr_t)wc->wr_id;
- struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
- ib_conn);
-
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- if (iser_conn->iscsi_conn)
- iscsi_conn_failure(iser_conn->iscsi_conn,
- ISCSI_ERR_CONN_FAILED);
-
- if (wc->wr_id == ISER_FASTREG_LI_WRID)
- return;
-
- if (is_iser_tx_desc(iser_conn, wr_id)) {
- struct iser_tx_desc *desc = wr_id;
-
- if (desc->type == ISCSI_TX_DATAOUT)
- kmem_cache_free(ig.desc_cache, desc);
- } else {
- ib_conn->post_recv_buf_count--;
- }
-}
-
-/**
- * iser_handle_wc - handle a single work completion
- * @wc: work completion
- *
- * Soft-IRQ context, work completion can be either
- * SEND or RECV, and can turn out successful or
- * with error (or flush error).
- */
-static void iser_handle_wc(struct ib_wc *wc)
-{
- struct ib_conn *ib_conn;
- struct iser_tx_desc *tx_desc;
- struct iser_rx_desc *rx_desc;
-
- ib_conn = wc->qp->qp_context;
- if (likely(wc->status == IB_WC_SUCCESS)) {
- if (wc->opcode == IB_WC_RECV) {
- rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
- iser_rcv_completion(rx_desc, wc->byte_len,
- ib_conn);
- } else
- if (wc->opcode == IB_WC_SEND) {
- tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
- iser_snd_completion(tx_desc, ib_conn);
- } else {
- iser_err("Unknown wc opcode %d\n", wc->opcode);
- }
- } else {
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- iser_err("%s (%d): wr id %llx vend_err %x\n",
- ib_wc_status_msg(wc->status), wc->status,
- wc->wr_id, wc->vendor_err);
- else
- iser_dbg("%s (%d): wr id %llx\n",
- ib_wc_status_msg(wc->status), wc->status,
- wc->wr_id);
-
- if (wc->wr_id == ISER_BEACON_WRID)
- /* all flush errors were consumed */
- complete(&ib_conn->flush_comp);
- else
- iser_handle_comp_error(ib_conn, wc);
- }
-}
-
-/**
- * iser_cq_tasklet_fn - iSER completion polling loop
- * @data: iSER completion context
- *
- * Soft-IRQ context, polling connection CQ until
- * either CQ was empty or we exausted polling budget
- */
-static void iser_cq_tasklet_fn(unsigned long data)
-{
- struct iser_comp *comp = (struct iser_comp *)data;
- struct ib_cq *cq = comp->cq;
- struct ib_wc *const wcs = comp->wcs;
- int i, n, completed = 0;
-
- while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
- for (i = 0; i < n; i++)
- iser_handle_wc(&wcs[i]);
-
- completed += n;
- if (completed >= iser_cq_poll_limit)
- break;
- }
-
- /*
- * It is assumed here that arming CQ only once its empty
- * would not cause interrupts to be missed.
- */
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
-
- iser_dbg("got %d completions\n", completed);
-}
-
-static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
-{
- struct iser_comp *comp = cq_context;
-
- tasklet_schedule(&comp->tasklet);
-}
-
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector)
{
@@ -1319,3 +1160,21 @@ err:
/* Not alot we can do here, return ambiguous guard error */
return 0x1;
}
+
+void iser_err_comp(struct ib_wc *wc, const char *type)
+{
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ struct iser_conn *iser_conn = to_iser_conn(wc->qp->qp_context);
+
+ iser_err("%s failure: %s (%d) vend_err %x\n", type,
+ ib_wc_status_msg(wc->status), wc->status,
+ wc->vendor_err);
+
+ if (iser_conn->iscsi_conn)
+ iscsi_conn_failure(iser_conn->iscsi_conn,
+ ISCSI_ERR_CONN_FAILED);
+ } else {
+ iser_dbg("%s failure: %s (%d)\n", type,
+ ib_wc_status_msg(wc->status), wc->status);
+ }
+}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 468c5e132563..f121e6129339 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -29,7 +29,6 @@
#include <target/iscsi/iscsi_transport.h>
#include <linux/semaphore.h>
-#include "isert_proto.h"
#include "ib_isert.h"
#define ISERT_MAX_CONN 8
@@ -95,22 +94,6 @@ isert_qp_event_callback(struct ib_event *e, void *context)
}
}
-static int
-isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
-{
- int ret;
-
- ret = ib_query_device(ib_dev, devattr);
- if (ret) {
- isert_err("ib_query_device() failed: %d\n", ret);
- return ret;
- }
- isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
- isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
-
- return 0;
-}
-
static struct isert_comp *
isert_comp_get(struct isert_conn *isert_conn)
{
@@ -157,9 +140,9 @@ isert_create_qp(struct isert_conn *isert_conn,
attr.recv_cq = comp->cq;
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
- attr.cap.max_send_sge = device->dev_attr.max_sge;
- isert_conn->max_sge = min(device->dev_attr.max_sge,
- device->dev_attr.max_sge_rd);
+ attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
+ isert_conn->max_sge = min(device->ib_device->attrs.max_sge,
+ device->ib_device->attrs.max_sge_rd);
attr.cap.max_recv_sge = 1;
attr.sq_sig_type = IB_SIGNAL_REQ_WR;
attr.qp_type = IB_QPT_RC;
@@ -287,8 +270,7 @@ isert_free_comps(struct isert_device *device)
}
static int
-isert_alloc_comps(struct isert_device *device,
- struct ib_device_attr *attr)
+isert_alloc_comps(struct isert_device *device)
{
int i, max_cqe, ret = 0;
@@ -308,7 +290,7 @@ isert_alloc_comps(struct isert_device *device,
return -ENOMEM;
}
- max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe);
+ max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
for (i = 0; i < device->comps_used; i++) {
struct ib_cq_init_attr cq_attr = {};
@@ -344,17 +326,15 @@ out_cq:
static int
isert_create_device_ib_res(struct isert_device *device)
{
- struct ib_device_attr *dev_attr;
+ struct ib_device *ib_dev = device->ib_device;
int ret;
- dev_attr = &device->dev_attr;
- ret = isert_query_device(device->ib_device, dev_attr);
- if (ret)
- goto out;
+ isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
+ isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
/* asign function handlers */
- if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
- dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+ if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+ ib_dev->attrs.device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
device->use_fastreg = 1;
device->reg_rdma_mem = isert_reg_rdma;
device->unreg_rdma_mem = isert_unreg_rdma;
@@ -364,11 +344,11 @@ isert_create_device_ib_res(struct isert_device *device)
device->unreg_rdma_mem = isert_unmap_cmd;
}
- ret = isert_alloc_comps(device, dev_attr);
+ ret = isert_alloc_comps(device);
if (ret)
goto out;
- device->pd = ib_alloc_pd(device->ib_device);
+ device->pd = ib_alloc_pd(ib_dev);
if (IS_ERR(device->pd)) {
ret = PTR_ERR(device->pd);
isert_err("failed to allocate pd, device %p, ret=%d\n",
@@ -377,7 +357,7 @@ isert_create_device_ib_res(struct isert_device *device)
}
/* Check signature cap */
- device->pi_capable = dev_attr->device_cap_flags &
+ device->pi_capable = ib_dev->attrs.device_cap_flags &
IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
return 0;
@@ -676,6 +656,32 @@ out_login_buf:
return ret;
}
+static void
+isert_set_nego_params(struct isert_conn *isert_conn,
+ struct rdma_conn_param *param)
+{
+ struct ib_device_attr *attr = &isert_conn->device->ib_device->attrs;
+
+ /* Set max inflight RDMA READ requests */
+ isert_conn->initiator_depth = min_t(u8, param->initiator_depth,
+ attr->max_qp_init_rd_atom);
+ isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+
+ if (param->private_data) {
+ u8 flags = *(u8 *)param->private_data;
+
+ /*
+ * use remote invalidation if the both initiator
+ * and the HCA support it
+ */
+ isert_conn->snd_w_inv = !(flags & ISER_SEND_W_INV_NOT_SUP) &&
+ (attr->device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ if (isert_conn->snd_w_inv)
+ isert_info("Using remote invalidation\n");
+ }
+}
+
static int
isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
{
@@ -714,11 +720,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
}
isert_conn->device = device;
- /* Set max inflight RDMA READ requests */
- isert_conn->initiator_depth = min_t(u8,
- event->param.conn.initiator_depth,
- device->dev_attr.max_qp_init_rd_atom);
- isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+ isert_set_nego_params(isert_conn, &event->param.conn);
ret = isert_conn_setup_qp(isert_conn, cma_id);
if (ret)
@@ -1050,8 +1052,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
ISER_HEADERS_LEN, DMA_TO_DEVICE);
- memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
- tx_desc->iser_header.flags = ISER_VER;
+ memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
+ tx_desc->iser_header.flags = ISCSI_CTRL;
tx_desc->num_sge = 1;
tx_desc->isert_cmd = isert_cmd;
@@ -1097,7 +1099,14 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
- send_wr->opcode = IB_WR_SEND;
+
+ if (isert_conn->snd_w_inv && isert_cmd->inv_rkey) {
+ send_wr->opcode = IB_WR_SEND_WITH_INV;
+ send_wr->ex.invalidate_rkey = isert_cmd->inv_rkey;
+ } else {
+ send_wr->opcode = IB_WR_SEND;
+ }
+
send_wr->sg_list = &tx_desc->tx_sg[0];
send_wr->num_sge = isert_cmd->tx_desc.num_sge;
send_wr->send_flags = IB_SEND_SIGNALED;
@@ -1486,6 +1495,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
isert_cmd->read_va = read_va;
isert_cmd->write_stag = write_stag;
isert_cmd->write_va = write_va;
+ isert_cmd->inv_rkey = read_stag ? read_stag : write_stag;
ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd,
rx_desc, (unsigned char *)hdr);
@@ -1543,21 +1553,21 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
static void
isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
{
- struct iser_hdr *iser_hdr = &rx_desc->iser_header;
+ struct iser_ctrl *iser_ctrl = &rx_desc->iser_header;
uint64_t read_va = 0, write_va = 0;
uint32_t read_stag = 0, write_stag = 0;
- switch (iser_hdr->flags & 0xF0) {
+ switch (iser_ctrl->flags & 0xF0) {
case ISCSI_CTRL:
- if (iser_hdr->flags & ISER_RSV) {
- read_stag = be32_to_cpu(iser_hdr->read_stag);
- read_va = be64_to_cpu(iser_hdr->read_va);
+ if (iser_ctrl->flags & ISER_RSV) {
+ read_stag = be32_to_cpu(iser_ctrl->read_stag);
+ read_va = be64_to_cpu(iser_ctrl->read_va);
isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
read_stag, (unsigned long long)read_va);
}
- if (iser_hdr->flags & ISER_WSV) {
- write_stag = be32_to_cpu(iser_hdr->write_stag);
- write_va = be64_to_cpu(iser_hdr->write_va);
+ if (iser_ctrl->flags & ISER_WSV) {
+ write_stag = be32_to_cpu(iser_ctrl->write_stag);
+ write_va = be64_to_cpu(iser_ctrl->write_va);
isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
write_stag, (unsigned long long)write_va);
}
@@ -1568,7 +1578,7 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
isert_err("iSER Hello message\n");
break;
default:
- isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
+ isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_ctrl->flags);
break;
}
@@ -3095,12 +3105,20 @@ isert_rdma_accept(struct isert_conn *isert_conn)
struct rdma_cm_id *cm_id = isert_conn->cm_id;
struct rdma_conn_param cp;
int ret;
+ struct iser_cm_hdr rsp_hdr;
memset(&cp, 0, sizeof(struct rdma_conn_param));
cp.initiator_depth = isert_conn->initiator_depth;
cp.retry_count = 7;
cp.rnr_retry_count = 7;
+ memset(&rsp_hdr, 0, sizeof(rsp_hdr));
+ rsp_hdr.flags = ISERT_ZBVA_NOT_USED;
+ if (!isert_conn->snd_w_inv)
+ rsp_hdr.flags = rsp_hdr.flags | ISERT_SEND_W_INV_NOT_USED;
+ cp.private_data = (void *)&rsp_hdr;
+ cp.private_data_len = sizeof(rsp_hdr);
+
ret = rdma_accept(cm_id, &cp);
if (ret) {
isert_err("rdma_accept() failed with: %d\n", ret);
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 3d7fbc47c343..8d50453eef66 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -3,6 +3,8 @@
#include <linux/in6.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
+#include <scsi/iser.h>
+
#define DRV_NAME "isert"
#define PFX DRV_NAME ": "
@@ -31,6 +33,38 @@
#define isert_err(fmt, arg...) \
pr_err(PFX "%s: " fmt, __func__ , ## arg)
+/* Constant PDU lengths calculations */
+#define ISER_HEADERS_LEN (sizeof(struct iser_ctrl) + \
+ sizeof(struct iscsi_hdr))
+#define ISER_RECV_DATA_SEG_LEN 8192
+#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
+#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
+
+/* QP settings */
+/* Maximal bounds on received asynchronous PDUs */
+#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
+
+#define ISERT_MAX_RX_MISC_PDUS 6 /*
+ * NOOP_OUT(2), TEXT(1),
+ * SCSI_TMFUNC(2), LOGOUT(1)
+ */
+
+#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
+
+#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+
+#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+
+#define ISERT_INFLIGHT_DATAOUTS 8
+
+#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
+ (1 + ISERT_INFLIGHT_DATAOUTS) + \
+ ISERT_MAX_TX_MISC_PDUS + \
+ ISERT_MAX_RX_MISC_PDUS)
+
+#define ISER_RX_PAD_SIZE (ISER_RECV_DATA_SEG_LEN + 4096 - \
+ (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
+
#define ISCSI_ISER_SG_TABLESIZE 256
#define ISER_FASTREG_LI_WRID 0xffffffffffffffffULL
#define ISER_BEACON_WRID 0xfffffffffffffffeULL
@@ -56,7 +90,7 @@ enum iser_conn_state {
};
struct iser_rx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
char data[ISER_RECV_DATA_SEG_LEN];
u64 dma_addr;
@@ -65,7 +99,7 @@ struct iser_rx_desc {
} __packed;
struct iser_tx_desc {
- struct iser_hdr iser_header;
+ struct iser_ctrl iser_header;
struct iscsi_hdr iscsi_header;
enum isert_desc_type type;
u64 dma_addr;
@@ -129,6 +163,7 @@ struct isert_cmd {
uint32_t write_stag;
uint64_t read_va;
uint64_t write_va;
+ uint32_t inv_rkey;
u64 pdu_buf_dma;
u32 pdu_buf_len;
struct isert_conn *conn;
@@ -176,6 +211,7 @@ struct isert_conn {
struct work_struct release_work;
struct ib_recv_wr beacon;
bool logout_posted;
+ bool snd_w_inv;
};
#define ISERT_MAX_CQ 64
@@ -207,7 +243,6 @@ struct isert_device {
struct isert_comp *comps;
int comps_used;
struct list_head dev_node;
- struct ib_device_attr dev_attr;
int (*reg_rdma_mem)(struct iscsi_conn *conn,
struct iscsi_cmd *cmd,
struct isert_rdma_wr *wr);
diff --git a/drivers/infiniband/ulp/isert/isert_proto.h b/drivers/infiniband/ulp/isert/isert_proto.h
deleted file mode 100644
index 4dccd313b777..000000000000
--- a/drivers/infiniband/ulp/isert/isert_proto.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* From iscsi_iser.h */
-
-struct iser_hdr {
- u8 flags;
- u8 rsvd[3];
- __be32 write_stag; /* write rkey */
- __be64 write_va;
- __be32 read_stag; /* read rkey */
- __be64 read_va;
-} __packed;
-
-/*Constant PDU lengths calculations */
-#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
-
-#define ISER_RECV_DATA_SEG_LEN 8192
-#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
-#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
-
-/* QP settings */
-/* Maximal bounds on received asynchronous PDUs */
-#define ISERT_MAX_TX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */
-
-#define ISERT_MAX_RX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
- * SCSI_TMFUNC(2), LOGOUT(1) */
-
-#define ISCSI_DEF_XMIT_CMDS_MAX 128 /* from libiscsi.h, must be power of 2 */
-
-#define ISERT_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
-
-#define ISERT_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
-
-#define ISERT_INFLIGHT_DATAOUTS 8
-
-#define ISERT_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
- (1 + ISERT_INFLIGHT_DATAOUTS) + \
- ISERT_MAX_TX_MISC_PDUS + \
- ISERT_MAX_RX_MISC_PDUS)
-
-#define ISER_RX_PAD_SIZE (ISER_RECV_DATA_SEG_LEN + 4096 - \
- (ISER_RX_PAYLOAD_SIZE + sizeof(u64) + sizeof(struct ib_sge)))
-
-#define ISER_VER 0x10
-#define ISER_WSV 0x08
-#define ISER_RSV 0x04
-#define ISCSI_CTRL 0x10
-#define ISER_HELLO 0x20
-#define ISER_HELLORPLY 0x30
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 3db9a659719b..03022f6420d7 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -132,8 +132,9 @@ MODULE_PARM_DESC(ch_count,
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device, void *client_data);
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr);
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr);
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+ const char *opname);
static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
static struct scsi_transport_template *ib_srp_transport_template;
@@ -445,6 +446,17 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
dev->max_pages_per_mr);
}
+static void srp_drain_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct srp_rdma_ch *ch = cq->cq_context;
+
+ complete(&ch->done);
+}
+
+static struct ib_cqe srp_drain_cqe = {
+ .done = srp_drain_done,
+};
+
/**
* srp_destroy_qp() - destroy an RDMA queue pair
* @ch: SRP RDMA channel.
@@ -457,10 +469,11 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
static void srp_destroy_qp(struct srp_rdma_ch *ch)
{
static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
- static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID };
+ static struct ib_recv_wr wr = { 0 };
struct ib_recv_wr *bad_wr;
int ret;
+ wr.wr_cqe = &srp_drain_cqe;
/* Destroying a QP and reusing ch->done is only safe if not connected */
WARN_ON_ONCE(ch->connected);
@@ -489,34 +502,27 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL;
const int m = dev->use_fast_reg ? 3 : 1;
- struct ib_cq_init_attr cq_attr = {};
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
if (!init_attr)
return -ENOMEM;
- /* + 1 for SRP_LAST_WR_ID */
- cq_attr.cqe = target->queue_size + 1;
- cq_attr.comp_vector = ch->comp_vector;
- recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch,
- &cq_attr);
+ /* queue_size + 1 for ib_drain_qp */
+ recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
+ ch->comp_vector, IB_POLL_SOFTIRQ);
if (IS_ERR(recv_cq)) {
ret = PTR_ERR(recv_cq);
goto err;
}
- cq_attr.cqe = m * target->queue_size;
- cq_attr.comp_vector = ch->comp_vector;
- send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, ch,
- &cq_attr);
+ send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
+ ch->comp_vector, IB_POLL_DIRECT);
if (IS_ERR(send_cq)) {
ret = PTR_ERR(send_cq);
goto err_recv_cq;
}
- ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
-
init_attr->event_handler = srp_qp_event;
init_attr->cap.max_send_wr = m * target->queue_size;
init_attr->cap.max_recv_wr = target->queue_size + 1;
@@ -558,9 +564,9 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
if (ch->qp)
srp_destroy_qp(ch);
if (ch->recv_cq)
- ib_destroy_cq(ch->recv_cq);
+ ib_free_cq(ch->recv_cq);
if (ch->send_cq)
- ib_destroy_cq(ch->send_cq);
+ ib_free_cq(ch->send_cq);
ch->qp = qp;
ch->recv_cq = recv_cq;
@@ -580,13 +586,13 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
return 0;
err_qp:
- ib_destroy_qp(qp);
+ srp_destroy_qp(ch);
err_send_cq:
- ib_destroy_cq(send_cq);
+ ib_free_cq(send_cq);
err_recv_cq:
- ib_destroy_cq(recv_cq);
+ ib_free_cq(recv_cq);
err:
kfree(init_attr);
@@ -622,9 +628,10 @@ static void srp_free_ch_ib(struct srp_target_port *target,
if (ch->fmr_pool)
ib_destroy_fmr_pool(ch->fmr_pool);
}
+
srp_destroy_qp(ch);
- ib_destroy_cq(ch->send_cq);
- ib_destroy_cq(ch->recv_cq);
+ ib_free_cq(ch->send_cq);
+ ib_free_cq(ch->recv_cq);
/*
* Avoid that the SCSI error handler tries to use this channel after
@@ -1041,18 +1048,25 @@ out:
return ret <= 0 ? ret : -ENODEV;
}
-static int srp_inv_rkey(struct srp_rdma_ch *ch, u32 rkey)
+static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ srp_handle_qp_err(cq, wc, "INV RKEY");
+}
+
+static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
+ u32 rkey)
{
struct ib_send_wr *bad_wr;
struct ib_send_wr wr = {
.opcode = IB_WR_LOCAL_INV,
- .wr_id = LOCAL_INV_WR_ID_MASK,
.next = NULL,
.num_sge = 0,
.send_flags = 0,
.ex.invalidate_rkey = rkey,
};
+ wr.wr_cqe = &req->reg_cqe;
+ req->reg_cqe.done = srp_inv_rkey_err_done;
return ib_post_send(ch->qp, &wr, &bad_wr);
}
@@ -1074,7 +1088,7 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
struct srp_fr_desc **pfr;
for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
- res = srp_inv_rkey(ch, (*pfr)->mr->rkey);
+ res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
if (res < 0) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"Queueing INV WR for rkey %#x failed (%d)\n",
@@ -1312,7 +1326,13 @@ reset_state:
return 0;
}
+static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ srp_handle_qp_err(cq, wc, "FAST REG");
+}
+
static int srp_map_finish_fr(struct srp_map_state *state,
+ struct srp_request *req,
struct srp_rdma_ch *ch, int sg_nents)
{
struct srp_target_port *target = ch->target;
@@ -1349,9 +1369,11 @@ static int srp_map_finish_fr(struct srp_map_state *state,
if (unlikely(n < 0))
return n;
+ req->reg_cqe.done = srp_reg_mr_err_done;
+
wr.wr.next = NULL;
wr.wr.opcode = IB_WR_REG_MR;
- wr.wr.wr_id = FAST_REG_WR_ID_MASK;
+ wr.wr.wr_cqe = &req->reg_cqe;
wr.wr.num_sge = 0;
wr.wr.send_flags = 0;
wr.mr = desc->mr;
@@ -1455,7 +1477,7 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
while (count) {
int i, n;
- n = srp_map_finish_fr(state, ch, count);
+ n = srp_map_finish_fr(state, req, ch, count);
if (unlikely(n < 0))
return n;
@@ -1524,7 +1546,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
#ifdef CONFIG_NEED_SG_DMA_LENGTH
idb_sg->dma_length = idb_sg->length; /* hack^2 */
#endif
- ret = srp_map_finish_fr(&state, ch, 1);
+ ret = srp_map_finish_fr(&state, req, ch, 1);
if (ret < 0)
return ret;
} else if (dev->use_fmr) {
@@ -1719,7 +1741,7 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
struct srp_iu *iu;
- srp_send_completion(ch->send_cq, ch);
+ ib_process_cq_direct(ch->send_cq, -1);
if (list_empty(&ch->free_tx))
return NULL;
@@ -1739,6 +1761,19 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
return iu;
}
+static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+ struct srp_rdma_ch *ch = cq->cq_context;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ srp_handle_qp_err(cq, wc, "SEND");
+ return;
+ }
+
+ list_add(&iu->list, &ch->free_tx);
+}
+
static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
{
struct srp_target_port *target = ch->target;
@@ -1749,8 +1784,10 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
list.length = len;
list.lkey = target->lkey;
+ iu->cqe.done = srp_send_done;
+
wr.next = NULL;
- wr.wr_id = (uintptr_t) iu;
+ wr.wr_cqe = &iu->cqe;
wr.sg_list = &list;
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
@@ -1769,8 +1806,10 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
list.length = iu->size;
list.lkey = target->lkey;
+ iu->cqe.done = srp_recv_done;
+
wr.next = NULL;
- wr.wr_id = (uintptr_t) iu;
+ wr.wr_cqe = &iu->cqe;
wr.sg_list = &list;
wr.num_sge = 1;
@@ -1902,14 +1941,20 @@ static void srp_process_aer_req(struct srp_rdma_ch *ch,
"problems processing SRP_AER_REQ\n");
}
-static void srp_handle_recv(struct srp_rdma_ch *ch, struct ib_wc *wc)
+static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
+ struct srp_rdma_ch *ch = cq->cq_context;
struct srp_target_port *target = ch->target;
struct ib_device *dev = target->srp_host->srp_dev->dev;
- struct srp_iu *iu = (struct srp_iu *) (uintptr_t) wc->wr_id;
int res;
u8 opcode;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ srp_handle_qp_err(cq, wc, "RECV");
+ return;
+ }
+
ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
DMA_FROM_DEVICE);
@@ -1972,68 +2017,22 @@ static void srp_tl_err_work(struct work_struct *work)
srp_start_tl_fail_timers(target->rport);
}
-static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
- bool send_err, struct srp_rdma_ch *ch)
+static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
+ const char *opname)
{
+ struct srp_rdma_ch *ch = cq->cq_context;
struct srp_target_port *target = ch->target;
- if (wr_id == SRP_LAST_WR_ID) {
- complete(&ch->done);
- return;
- }
-
if (ch->connected && !target->qp_in_error) {
- if (wr_id & LOCAL_INV_WR_ID_MASK) {
- shost_printk(KERN_ERR, target->scsi_host, PFX
- "LOCAL_INV failed with status %s (%d)\n",
- ib_wc_status_msg(wc_status), wc_status);
- } else if (wr_id & FAST_REG_WR_ID_MASK) {
- shost_printk(KERN_ERR, target->scsi_host, PFX
- "FAST_REG_MR failed status %s (%d)\n",
- ib_wc_status_msg(wc_status), wc_status);
- } else {
- shost_printk(KERN_ERR, target->scsi_host,
- PFX "failed %s status %s (%d) for iu %p\n",
- send_err ? "send" : "receive",
- ib_wc_status_msg(wc_status), wc_status,
- (void *)(uintptr_t)wr_id);
- }
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "failed %s status %s (%d) for CQE %p\n",
+ opname, ib_wc_status_msg(wc->status), wc->status,
+ wc->wr_cqe);
queue_work(system_long_wq, &target->tl_err_work);
}
target->qp_in_error = true;
}
-static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr)
-{
- struct srp_rdma_ch *ch = ch_ptr;
- struct ib_wc wc;
-
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- while (ib_poll_cq(cq, 1, &wc) > 0) {
- if (likely(wc.status == IB_WC_SUCCESS)) {
- srp_handle_recv(ch, &wc);
- } else {
- srp_handle_qp_err(wc.wr_id, wc.status, false, ch);
- }
- }
-}
-
-static void srp_send_completion(struct ib_cq *cq, void *ch_ptr)
-{
- struct srp_rdma_ch *ch = ch_ptr;
- struct ib_wc wc;
- struct srp_iu *iu;
-
- while (ib_poll_cq(cq, 1, &wc) > 0) {
- if (likely(wc.status == IB_WC_SUCCESS)) {
- iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
- list_add(&iu->list, &ch->free_tx);
- } else {
- srp_handle_qp_err(wc.wr_id, wc.status, true, ch);
- }
- }
-}
-
static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(shost);
@@ -3439,27 +3438,17 @@ free_host:
static void srp_add_one(struct ib_device *device)
{
struct srp_device *srp_dev;
- struct ib_device_attr *dev_attr;
struct srp_host *host;
int mr_page_shift, p;
u64 max_pages_per_mr;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- pr_warn("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
if (!srp_dev)
- goto free_attr;
+ return;
srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
device->map_phys_fmr && device->unmap_fmr);
- srp_dev->has_fr = (dev_attr->device_cap_flags &
+ srp_dev->has_fr = (device->attrs.device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!srp_dev->has_fmr && !srp_dev->has_fr)
dev_warn(&device->dev, "neither FMR nor FR is supported\n");
@@ -3473,23 +3462,23 @@ static void srp_add_one(struct ib_device *device)
* minimum of 4096 bytes. We're unlikely to build large sglists
* out of smaller entries.
*/
- mr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
+ mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
srp_dev->mr_page_size = 1 << mr_page_shift;
srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
- max_pages_per_mr = dev_attr->max_mr_size;
+ max_pages_per_mr = device->attrs.max_mr_size;
do_div(max_pages_per_mr, srp_dev->mr_page_size);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
- dev_attr->max_fast_reg_page_list_len);
+ device->attrs.max_fast_reg_page_list_len);
}
srp_dev->mr_max_size = srp_dev->mr_page_size *
srp_dev->max_pages_per_mr;
- pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
- device->name, mr_page_shift, dev_attr->max_mr_size,
- dev_attr->max_fast_reg_page_list_len,
+ pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
+ device->name, mr_page_shift, device->attrs.max_mr_size,
+ device->attrs.max_fast_reg_page_list_len,
srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -3517,17 +3506,13 @@ static void srp_add_one(struct ib_device *device)
}
ib_set_client_data(device, &srp_client, srp_dev);
-
- goto free_attr;
+ return;
err_pd:
ib_dealloc_pd(srp_dev->pd);
free_dev:
kfree(srp_dev);
-
-free_attr:
- kfree(dev_attr);
}
static void srp_remove_one(struct ib_device *device, void *client_data)
@@ -3587,8 +3572,6 @@ static int __init srp_init_module(void)
{
int ret;
- BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
-
if (srp_sg_tablesize) {
pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
if (!cmd_sg_entries)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index f6af531f9f32..9e05ce4a04fd 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -66,11 +66,6 @@ enum {
SRP_TAG_TSK_MGMT = 1U << 31,
SRP_MAX_PAGES_PER_MR = 512,
-
- LOCAL_INV_WR_ID_MASK = 1,
- FAST_REG_WR_ID_MASK = 2,
-
- SRP_LAST_WR_ID = 0xfffffffcU,
};
enum srp_target_state {
@@ -128,6 +123,7 @@ struct srp_request {
struct srp_direct_buf *indirect_desc;
dma_addr_t indirect_dma_addr;
short nmdesc;
+ struct ib_cqe reg_cqe;
};
/**
@@ -231,6 +227,7 @@ struct srp_iu {
void *buf;
size_t size;
enum dma_data_direction direction;
+ struct ib_cqe cqe;
};
/**
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index bc5470c43d26..0c37fee363b1 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -93,6 +93,8 @@ MODULE_PARM_DESC(srpt_service_guid,
static struct ib_client srpt_client;
static void srpt_release_channel(struct srpt_rdma_ch *ch);
static int srpt_queue_status(struct se_cmd *cmd);
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc);
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc);
/**
* opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
@@ -341,10 +343,10 @@ static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
memset(iocp, 0, sizeof *iocp);
strcpy(iocp->id_string, SRPT_ID_STRING);
iocp->guid = cpu_to_be64(srpt_service_guid);
- iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
- iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
- iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
- iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
+ iocp->vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
+ iocp->device_id = cpu_to_be32(sdev->device->attrs.vendor_part_id);
+ iocp->device_version = cpu_to_be16(sdev->device->attrs.hw_ver);
+ iocp->subsys_vendor_id = cpu_to_be32(sdev->device->attrs.vendor_id);
iocp->subsys_device_id = 0x0;
iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
@@ -453,6 +455,7 @@ static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
* srpt_mad_recv_handler() - MAD reception callback function.
*/
static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_wc)
{
struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
@@ -778,12 +781,12 @@ static int srpt_post_recv(struct srpt_device *sdev,
struct ib_recv_wr wr, *bad_wr;
BUG_ON(!sdev);
- wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
-
list.addr = ioctx->ioctx.dma;
list.length = srp_max_req_size;
list.lkey = sdev->pd->local_dma_lkey;
+ ioctx->ioctx.cqe.done = srpt_recv_done;
+ wr.wr_cqe = &ioctx->ioctx.cqe;
wr.next = NULL;
wr.sg_list = &list;
wr.num_sge = 1;
@@ -819,8 +822,9 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
list.length = len;
list.lkey = sdev->pd->local_dma_lkey;
+ ioctx->ioctx.cqe.done = srpt_send_done;
wr.next = NULL;
- wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
+ wr.wr_cqe = &ioctx->ioctx.cqe;
wr.sg_list = &list;
wr.num_sge = 1;
wr.opcode = IB_WR_SEND;
@@ -1052,13 +1056,13 @@ static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
BUG_ON(!ch);
BUG_ON(!ioctx);
- BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
+ BUG_ON(ioctx->n_rdma && !ioctx->rdma_wrs);
while (ioctx->n_rdma)
- kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
+ kfree(ioctx->rdma_wrs[--ioctx->n_rdma].wr.sg_list);
- kfree(ioctx->rdma_ius);
- ioctx->rdma_ius = NULL;
+ kfree(ioctx->rdma_wrs);
+ ioctx->rdma_wrs = NULL;
if (ioctx->mapped_sg_count) {
sg = ioctx->sg;
@@ -1082,7 +1086,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
struct scatterlist *sg, *sg_orig;
int sg_cnt;
enum dma_data_direction dir;
- struct rdma_iu *riu;
+ struct ib_rdma_wr *riu;
struct srp_direct_buf *db;
dma_addr_t dma_addr;
struct ib_sge *sge;
@@ -1109,23 +1113,24 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
ioctx->mapped_sg_count = count;
- if (ioctx->rdma_ius && ioctx->n_rdma_ius)
- nrdma = ioctx->n_rdma_ius;
+ if (ioctx->rdma_wrs && ioctx->n_rdma_wrs)
+ nrdma = ioctx->n_rdma_wrs;
else {
nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
+ ioctx->n_rbuf;
- ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL);
- if (!ioctx->rdma_ius)
+ ioctx->rdma_wrs = kcalloc(nrdma, sizeof(*ioctx->rdma_wrs),
+ GFP_KERNEL);
+ if (!ioctx->rdma_wrs)
goto free_mem;
- ioctx->n_rdma_ius = nrdma;
+ ioctx->n_rdma_wrs = nrdma;
}
db = ioctx->rbufs;
tsize = cmd->data_length;
dma_len = ib_sg_dma_len(dev, &sg[0]);
- riu = ioctx->rdma_ius;
+ riu = ioctx->rdma_wrs;
/*
* For each remote desc - calculate the #ib_sge.
@@ -1139,9 +1144,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
rsize = be32_to_cpu(db->len);
raddr = be64_to_cpu(db->va);
- riu->raddr = raddr;
+ riu->remote_addr = raddr;
riu->rkey = be32_to_cpu(db->key);
- riu->sge_cnt = 0;
+ riu->wr.num_sge = 0;
/* calculate how many sge required for this remote_buf */
while (rsize > 0 && tsize > 0) {
@@ -1165,33 +1170,35 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
rsize = 0;
}
- ++riu->sge_cnt;
+ ++riu->wr.num_sge;
- if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
+ if (rsize > 0 &&
+ riu->wr.num_sge == SRPT_DEF_SG_PER_WQE) {
++ioctx->n_rdma;
- riu->sge =
- kmalloc(riu->sge_cnt * sizeof *riu->sge,
- GFP_KERNEL);
- if (!riu->sge)
+ riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+ sizeof(*riu->wr.sg_list),
+ GFP_KERNEL);
+ if (!riu->wr.sg_list)
goto free_mem;
++riu;
- riu->sge_cnt = 0;
- riu->raddr = raddr;
+ riu->wr.num_sge = 0;
+ riu->remote_addr = raddr;
riu->rkey = be32_to_cpu(db->key);
}
}
++ioctx->n_rdma;
- riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
- GFP_KERNEL);
- if (!riu->sge)
+ riu->wr.sg_list = kmalloc_array(riu->wr.num_sge,
+ sizeof(*riu->wr.sg_list),
+ GFP_KERNEL);
+ if (!riu->wr.sg_list)
goto free_mem;
}
db = ioctx->rbufs;
tsize = cmd->data_length;
- riu = ioctx->rdma_ius;
+ riu = ioctx->rdma_wrs;
sg = sg_orig;
dma_len = ib_sg_dma_len(dev, &sg[0]);
dma_addr = ib_sg_dma_address(dev, &sg[0]);
@@ -1200,7 +1207,7 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
for (i = 0, j = 0;
j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
rsize = be32_to_cpu(db->len);
- sge = riu->sge;
+ sge = riu->wr.sg_list;
k = 0;
while (rsize > 0 && tsize > 0) {
@@ -1232,9 +1239,9 @@ static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
}
++k;
- if (k == riu->sge_cnt && rsize > 0 && tsize > 0) {
+ if (k == riu->wr.num_sge && rsize > 0 && tsize > 0) {
++riu;
- sge = riu->sge;
+ sge = riu->wr.sg_list;
k = 0;
} else if (rsize > 0 && tsize > 0)
++sge;
@@ -1277,8 +1284,8 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
ioctx->n_rbuf = 0;
ioctx->rbufs = NULL;
ioctx->n_rdma = 0;
- ioctx->n_rdma_ius = 0;
- ioctx->rdma_ius = NULL;
+ ioctx->n_rdma_wrs = 0;
+ ioctx->rdma_wrs = NULL;
ioctx->mapped_sg_count = 0;
init_completion(&ioctx->tx_done);
ioctx->queue_status_only = false;
@@ -1380,118 +1387,44 @@ out:
}
/**
- * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
- */
-static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
-{
- struct srpt_send_ioctx *ioctx;
- enum srpt_command_state state;
- u32 index;
-
- atomic_inc(&ch->sq_wr_avail);
-
- index = idx_from_wr_id(wr_id);
- ioctx = ch->ioctx_ring[index];
- state = srpt_get_cmd_state(ioctx);
-
- WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
- && state != SRPT_STATE_MGMT_RSP_SENT
- && state != SRPT_STATE_NEED_DATA
- && state != SRPT_STATE_DONE);
-
- /* If SRP_RSP sending failed, undo the ch->req_lim change. */
- if (state == SRPT_STATE_CMD_RSP_SENT
- || state == SRPT_STATE_MGMT_RSP_SENT)
- atomic_dec(&ch->req_lim);
-
- srpt_abort_cmd(ioctx);
-}
-
-/**
- * srpt_handle_send_comp() - Process an IB send completion notification.
- */
-static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx)
-{
- enum srpt_command_state state;
-
- atomic_inc(&ch->sq_wr_avail);
-
- state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
-
- if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
- && state != SRPT_STATE_MGMT_RSP_SENT
- && state != SRPT_STATE_DONE))
- pr_debug("state = %d\n", state);
-
- if (state != SRPT_STATE_DONE) {
- srpt_unmap_sg_to_ib_sge(ch, ioctx);
- transport_generic_free_cmd(&ioctx->cmd, 0);
- } else {
- pr_err("IB completion has been received too late for"
- " wr_id = %u.\n", ioctx->ioctx.index);
- }
-}
-
-/**
- * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
- *
* XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
* the data that has been transferred via IB RDMA had to be postponed until the
* check_stop_free() callback. None of this is necessary anymore and needs to
* be cleaned up.
*/
-static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx,
- enum srpt_opcode opcode)
+static void srpt_rdma_read_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
+
WARN_ON(ioctx->n_rdma <= 0);
atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
- if (opcode == SRPT_RDMA_READ_LAST) {
- if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
- SRPT_STATE_DATA_IN))
- target_execute_cmd(&ioctx->cmd);
- else
- pr_err("%s[%d]: wrong state = %d\n", __func__,
- __LINE__, srpt_get_cmd_state(ioctx));
- } else if (opcode == SRPT_RDMA_ABORT) {
- ioctx->rdma_aborted = true;
- } else {
- WARN(true, "unexpected opcode %d\n", opcode);
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ pr_info("RDMA_READ for ioctx 0x%p failed with status %d\n",
+ ioctx, wc->status);
+ srpt_abort_cmd(ioctx);
+ return;
}
+
+ if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
+ SRPT_STATE_DATA_IN))
+ target_execute_cmd(&ioctx->cmd);
+ else
+ pr_err("%s[%d]: wrong state = %d\n", __func__,
+ __LINE__, srpt_get_cmd_state(ioctx));
}
-/**
- * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
- */
-static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
- struct srpt_send_ioctx *ioctx,
- enum srpt_opcode opcode)
+static void srpt_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
- enum srpt_command_state state;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, rdma_cqe);
- state = srpt_get_cmd_state(ioctx);
- switch (opcode) {
- case SRPT_RDMA_READ_LAST:
- if (ioctx->n_rdma <= 0) {
- pr_err("Received invalid RDMA read"
- " error completion with idx %d\n",
- ioctx->ioctx.index);
- break;
- }
- atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
- if (state == SRPT_STATE_NEED_DATA)
- srpt_abort_cmd(ioctx);
- else
- pr_err("%s[%d]: wrong state = %d\n",
- __func__, __LINE__, state);
- break;
- case SRPT_RDMA_WRITE_LAST:
- break;
- default:
- pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
- break;
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ pr_info("RDMA_WRITE for ioctx 0x%p failed with status %d\n",
+ ioctx, wc->status);
+ srpt_abort_cmd(ioctx);
}
}
@@ -1926,32 +1859,26 @@ out:
return;
}
-static void srpt_process_rcv_completion(struct ib_cq *cq,
- struct srpt_rdma_ch *ch,
- struct ib_wc *wc)
+static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_device *sdev = ch->sport->sdev;
- struct srpt_recv_ioctx *ioctx;
- u32 index;
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_recv_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_recv_ioctx, ioctx.cqe);
- index = idx_from_wr_id(wc->wr_id);
if (wc->status == IB_WC_SUCCESS) {
int req_lim;
req_lim = atomic_dec_return(&ch->req_lim);
if (unlikely(req_lim < 0))
pr_err("req_lim = %d < 0\n", req_lim);
- ioctx = sdev->ioctx_ring[index];
srpt_handle_new_iu(ch, ioctx, NULL);
} else {
- pr_info("receiving failed for idx %u with status %d\n",
- index, wc->status);
+ pr_info("receiving failed for ioctx %p with status %d\n",
+ ioctx, wc->status);
}
}
/**
- * srpt_process_send_completion() - Process an IB send completion.
- *
* Note: Although this has not yet been observed during tests, at least in
* theory it is possible that the srpt_get_send_ioctx() call invoked by
* srpt_handle_new_iu() fails. This is possible because the req_lim_delta
@@ -1964,109 +1891,52 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
* are queued on cmd_wait_list. The code below processes these delayed
* requests one at a time.
*/
-static void srpt_process_send_completion(struct ib_cq *cq,
- struct srpt_rdma_ch *ch,
- struct ib_wc *wc)
+static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct srpt_send_ioctx *send_ioctx;
- uint32_t index;
- enum srpt_opcode opcode;
+ struct srpt_rdma_ch *ch = cq->cq_context;
+ struct srpt_send_ioctx *ioctx =
+ container_of(wc->wr_cqe, struct srpt_send_ioctx, ioctx.cqe);
+ enum srpt_command_state state;
- index = idx_from_wr_id(wc->wr_id);
- opcode = opcode_from_wr_id(wc->wr_id);
- send_ioctx = ch->ioctx_ring[index];
- if (wc->status == IB_WC_SUCCESS) {
- if (opcode == SRPT_SEND)
- srpt_handle_send_comp(ch, send_ioctx);
- else {
- WARN_ON(opcode != SRPT_RDMA_ABORT &&
- wc->opcode != IB_WC_RDMA_READ);
- srpt_handle_rdma_comp(ch, send_ioctx, opcode);
- }
+ state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
+
+ WARN_ON(state != SRPT_STATE_CMD_RSP_SENT &&
+ state != SRPT_STATE_MGMT_RSP_SENT);
+
+ atomic_inc(&ch->sq_wr_avail);
+
+ if (wc->status != IB_WC_SUCCESS) {
+ pr_info("sending response for ioctx 0x%p failed"
+ " with status %d\n", ioctx, wc->status);
+
+ atomic_dec(&ch->req_lim);
+ srpt_abort_cmd(ioctx);
+ goto out;
+ }
+
+ if (state != SRPT_STATE_DONE) {
+ srpt_unmap_sg_to_ib_sge(ch, ioctx);
+ transport_generic_free_cmd(&ioctx->cmd, 0);
} else {
- if (opcode == SRPT_SEND) {
- pr_info("sending response for idx %u failed"
- " with status %d\n", index, wc->status);
- srpt_handle_send_err_comp(ch, wc->wr_id);
- } else if (opcode != SRPT_RDMA_MID) {
- pr_info("RDMA t %d for idx %u failed with"
- " status %d\n", opcode, index, wc->status);
- srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
- }
+ pr_err("IB completion has been received too late for"
+ " wr_id = %u.\n", ioctx->ioctx.index);
}
- while (unlikely(opcode == SRPT_SEND
- && !list_empty(&ch->cmd_wait_list)
- && srpt_get_ch_state(ch) == CH_LIVE
- && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) {
+out:
+ while (!list_empty(&ch->cmd_wait_list) &&
+ srpt_get_ch_state(ch) == CH_LIVE &&
+ (ioctx = srpt_get_send_ioctx(ch)) != NULL) {
struct srpt_recv_ioctx *recv_ioctx;
recv_ioctx = list_first_entry(&ch->cmd_wait_list,
struct srpt_recv_ioctx,
wait_list);
list_del(&recv_ioctx->wait_list);
- srpt_handle_new_iu(ch, recv_ioctx, send_ioctx);
- }
-}
-
-static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
-{
- struct ib_wc *const wc = ch->wc;
- int i, n;
-
- WARN_ON(cq != ch->cq);
-
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
- for (i = 0; i < n; i++) {
- if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
- srpt_process_rcv_completion(cq, ch, &wc[i]);
- else
- srpt_process_send_completion(cq, ch, &wc[i]);
- }
+ srpt_handle_new_iu(ch, recv_ioctx, ioctx);
}
}
/**
- * srpt_completion() - IB completion queue callback function.
- *
- * Notes:
- * - It is guaranteed that a completion handler will never be invoked
- * concurrently on two different CPUs for the same completion queue. See also
- * Documentation/infiniband/core_locking.txt and the implementation of
- * handle_edge_irq() in kernel/irq/chip.c.
- * - When threaded IRQs are enabled, completion handlers are invoked in thread
- * context instead of interrupt context.
- */
-static void srpt_completion(struct ib_cq *cq, void *ctx)
-{
- struct srpt_rdma_ch *ch = ctx;
-
- wake_up_interruptible(&ch->wait_queue);
-}
-
-static int srpt_compl_thread(void *arg)
-{
- struct srpt_rdma_ch *ch;
-
- /* Hibernation / freezing of the SRPT kernel thread is not supported. */
- current->flags |= PF_NOFREEZE;
-
- ch = arg;
- BUG_ON(!ch);
- pr_info("Session %s: kernel thread %s (PID %d) started\n",
- ch->sess_name, ch->thread->comm, current->pid);
- while (!kthread_should_stop()) {
- wait_event_interruptible(ch->wait_queue,
- (srpt_process_completion(ch->cq, ch),
- kthread_should_stop()));
- }
- pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
- ch->sess_name, ch->thread->comm, current->pid);
- return 0;
-}
-
-/**
* srpt_create_ch_ib() - Create receive and send completion queues.
*/
static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
@@ -2075,7 +1945,6 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
struct srpt_port *sport = ch->sport;
struct srpt_device *sdev = sport->sdev;
u32 srp_sq_size = sport->port_attrib.srp_sq_size;
- struct ib_cq_init_attr cq_attr = {};
int ret;
WARN_ON(ch->rq_size < 1);
@@ -2086,9 +1955,8 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
goto out;
retry:
- cq_attr.cqe = ch->rq_size + srp_sq_size;
- ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
- &cq_attr);
+ ch->cq = ib_alloc_cq(sdev->device, ch, ch->rq_size + srp_sq_size,
+ 0 /* XXX: spread CQs */, IB_POLL_WORKQUEUE);
if (IS_ERR(ch->cq)) {
ret = PTR_ERR(ch->cq);
pr_err("failed to create CQ cqe= %d ret= %d\n",
@@ -2131,18 +1999,6 @@ retry:
if (ret)
goto err_destroy_qp;
- init_waitqueue_head(&ch->wait_queue);
-
- pr_debug("creating thread for session %s\n", ch->sess_name);
-
- ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
- if (IS_ERR(ch->thread)) {
- pr_err("failed to create kernel thread %ld\n",
- PTR_ERR(ch->thread));
- ch->thread = NULL;
- goto err_destroy_qp;
- }
-
out:
kfree(qp_init);
return ret;
@@ -2150,17 +2006,14 @@ out:
err_destroy_qp:
ib_destroy_qp(ch->qp);
err_destroy_cq:
- ib_destroy_cq(ch->cq);
+ ib_free_cq(ch->cq);
goto out;
}
static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
{
- if (ch->thread)
- kthread_stop(ch->thread);
-
ib_destroy_qp(ch->qp);
- ib_destroy_cq(ch->cq);
+ ib_free_cq(ch->cq);
}
/**
@@ -2808,12 +2661,8 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
struct srpt_send_ioctx *ioctx)
{
- struct ib_rdma_wr wr;
struct ib_send_wr *bad_wr;
- struct rdma_iu *riu;
- int i;
- int ret;
- int sq_wr_avail;
+ int sq_wr_avail, ret, i;
enum dma_data_direction dir;
const int n_rdma = ioctx->n_rdma;
@@ -2829,59 +2678,32 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
}
}
- ioctx->rdma_aborted = false;
- ret = 0;
- riu = ioctx->rdma_ius;
- memset(&wr, 0, sizeof wr);
-
- for (i = 0; i < n_rdma; ++i, ++riu) {
- if (dir == DMA_FROM_DEVICE) {
- wr.wr.opcode = IB_WR_RDMA_WRITE;
- wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
- SRPT_RDMA_WRITE_LAST :
- SRPT_RDMA_MID,
- ioctx->ioctx.index);
- } else {
- wr.wr.opcode = IB_WR_RDMA_READ;
- wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
- SRPT_RDMA_READ_LAST :
- SRPT_RDMA_MID,
- ioctx->ioctx.index);
- }
- wr.wr.next = NULL;
- wr.remote_addr = riu->raddr;
- wr.rkey = riu->rkey;
- wr.wr.num_sge = riu->sge_cnt;
- wr.wr.sg_list = riu->sge;
+ for (i = 0; i < n_rdma; i++) {
+ struct ib_send_wr *wr = &ioctx->rdma_wrs[i].wr;
- /* only get completion event for the last rdma write */
- if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
- wr.wr.send_flags = IB_SEND_SIGNALED;
+ wr->opcode = (dir == DMA_FROM_DEVICE) ?
+ IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
- ret = ib_post_send(ch->qp, &wr.wr, &bad_wr);
- if (ret)
- break;
+ if (i == n_rdma - 1) {
+ /* only get completion event for the last rdma read */
+ if (dir == DMA_TO_DEVICE) {
+ wr->send_flags = IB_SEND_SIGNALED;
+ ioctx->rdma_cqe.done = srpt_rdma_read_done;
+ } else {
+ ioctx->rdma_cqe.done = srpt_rdma_write_done;
+ }
+ wr->wr_cqe = &ioctx->rdma_cqe;
+ wr->next = NULL;
+ } else {
+ wr->wr_cqe = NULL;
+ wr->next = &ioctx->rdma_wrs[i + 1].wr;
+ }
}
+ ret = ib_post_send(ch->qp, &ioctx->rdma_wrs->wr, &bad_wr);
if (ret)
pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
__func__, __LINE__, ret, i, n_rdma);
- if (ret && i > 0) {
- wr.wr.num_sge = 0;
- wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
- wr.wr.send_flags = IB_SEND_SIGNALED;
- while (ch->state == CH_LIVE &&
- ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
- pr_info("Trying to abort failed RDMA transfer [%d]\n",
- ioctx->ioctx.index);
- msleep(1000);
- }
- while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
- pr_info("Waiting until RDMA abort finished [%d]\n",
- ioctx->ioctx.index);
- msleep(1000);
- }
- }
out:
if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
atomic_add(n_rdma, &ch->sq_wr_avail);
@@ -3190,14 +3012,11 @@ static void srpt_add_one(struct ib_device *device)
init_waitqueue_head(&sdev->ch_releaseQ);
spin_lock_init(&sdev->spinlock);
- if (ib_query_device(device, &sdev->dev_attr))
- goto free_dev;
-
sdev->pd = ib_alloc_pd(device);
if (IS_ERR(sdev->pd))
goto free_dev;
- sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr);
+ sdev->srq_size = min(srpt_srq_size, sdev->device->attrs.max_srq_wr);
srq_attr.event_handler = srpt_srq_event;
srq_attr.srq_context = (void *)sdev;
@@ -3211,7 +3030,7 @@ static void srpt_add_one(struct ib_device *device)
goto err_pd;
pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
- __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr,
+ __func__, sdev->srq_size, sdev->device->attrs.max_srq_wr,
device->name);
if (!srpt_service_guid)
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 5366e0a9fd6d..09037f2b0b51 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -128,36 +128,6 @@ enum {
DEFAULT_MAX_RDMA_SIZE = 65536,
};
-enum srpt_opcode {
- SRPT_RECV,
- SRPT_SEND,
- SRPT_RDMA_MID,
- SRPT_RDMA_ABORT,
- SRPT_RDMA_READ_LAST,
- SRPT_RDMA_WRITE_LAST,
-};
-
-static inline u64 encode_wr_id(u8 opcode, u32 idx)
-{
- return ((u64)opcode << 32) | idx;
-}
-static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
-{
- return wr_id >> 32;
-}
-static inline u32 idx_from_wr_id(u64 wr_id)
-{
- return (u32)wr_id;
-}
-
-struct rdma_iu {
- u64 raddr;
- u32 rkey;
- struct ib_sge *sge;
- u32 sge_cnt;
- int mem_id;
-};
-
/**
* enum srpt_command_state - SCSI command state managed by SRPT.
* @SRPT_STATE_NEW: New command arrived and is being processed.
@@ -189,6 +159,7 @@ enum srpt_command_state {
* @index: Index of the I/O context in its ioctx_ring array.
*/
struct srpt_ioctx {
+ struct ib_cqe cqe;
void *buf;
dma_addr_t dma;
uint32_t index;
@@ -215,32 +186,30 @@ struct srpt_recv_ioctx {
* @sg: Pointer to sg-list associated with this I/O context.
* @sg_cnt: SG-list size.
* @mapped_sg_count: ib_dma_map_sg() return value.
- * @n_rdma_ius: Number of elements in the rdma_ius array.
- * @rdma_ius: Array with information about the RDMA mapping.
+ * @n_rdma_wrs: Number of elements in the rdma_wrs array.
+ * @rdma_wrs: Array with information about the RDMA mapping.
* @tag: Tag of the received SRP information unit.
* @spinlock: Protects 'state'.
* @state: I/O context state.
- * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
- * the already initiated transfers have finished.
* @cmd: Target core command data structure.
* @sense_data: SCSI sense data.
*/
struct srpt_send_ioctx {
struct srpt_ioctx ioctx;
struct srpt_rdma_ch *ch;
- struct rdma_iu *rdma_ius;
+ struct ib_rdma_wr *rdma_wrs;
+ struct ib_cqe rdma_cqe;
struct srp_direct_buf *rbufs;
struct srp_direct_buf single_rbuf;
struct scatterlist *sg;
struct list_head free_list;
spinlock_t spinlock;
enum srpt_command_state state;
- bool rdma_aborted;
struct se_cmd cmd;
struct completion tx_done;
int sg_cnt;
int mapped_sg_count;
- u16 n_rdma_ius;
+ u16 n_rdma_wrs;
u8 n_rdma;
u8 n_rbuf;
bool queue_status_only;
@@ -267,9 +236,6 @@ enum rdma_ch_state {
/**
* struct srpt_rdma_ch - RDMA channel.
- * @wait_queue: Allows the kernel thread to wait for more work.
- * @thread: Kernel thread that processes the IB queues associated with
- * the channel.
* @cm_id: IB CM ID associated with the channel.
* @qp: IB queue pair used for communicating over this channel.
* @cq: IB completion queue for this channel.
@@ -288,7 +254,6 @@ enum rdma_ch_state {
* @free_list: Head of list with free send I/O contexts.
* @state: channel state. See also enum rdma_ch_state.
* @ioctx_ring: Send ring.
- * @wc: IB work completion array for srpt_process_completion().
* @list: Node for insertion in the srpt_device.rch_list list.
* @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
* list contains struct srpt_ioctx elements and is protected
@@ -299,8 +264,6 @@ enum rdma_ch_state {
* @release_done: Enables waiting for srpt_release_channel() completion.
*/
struct srpt_rdma_ch {
- wait_queue_head_t wait_queue;
- struct task_struct *thread;
struct ib_cm_id *cm_id;
struct ib_qp *qp;
struct ib_cq *cq;
@@ -317,7 +280,6 @@ struct srpt_rdma_ch {
struct list_head free_list;
enum rdma_ch_state state;
struct srpt_send_ioctx **ioctx_ring;
- struct ib_wc wc[16];
struct list_head list;
struct list_head cmd_wait_list;
struct se_session *sess;
@@ -377,8 +339,6 @@ struct srpt_port {
* @mr: L_Key (local key) with write access to all local memory.
* @srq: Per-HCA SRQ (shared receive queue).
* @cm_id: Connection identifier.
- * @dev_attr: Attributes of the InfiniBand device as obtained during the
- * ib_client.add() callback.
* @srq_size: SRQ size.
* @ioctx_ring: Per-HCA SRQ.
* @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
@@ -393,7 +353,6 @@ struct srpt_device {
struct ib_pd *pd;
struct ib_srq *srq;
struct ib_cm_id *cm_id;
- struct ib_device_attr dev_attr;
int srq_size;
struct srpt_recv_ioctx **ioctx_ring;
struct list_head rch_list;
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 11fc2a27fa2e..715923d5236c 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -130,6 +130,11 @@ config ORION_IRQCHIP
select IRQ_DOMAIN
select MULTI_IRQ_HANDLER
+config PIC32_EVIC
+ bool
+ select GENERIC_IRQ_CHIP
+ select IRQ_DOMAIN
+
config RENESAS_INTC_IRQPIN
bool
select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index d4c2e4ebc308..18caacb60d58 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -58,3 +58,4 @@ obj-$(CONFIG_RENESAS_H8S_INTC) += irq-renesas-h8s.o
obj-$(CONFIG_ARCH_SA1100) += irq-sa11x0.o
obj-$(CONFIG_INGENIC_IRQ) += irq-ingenic.o
obj-$(CONFIG_IMX_GPCV2) += irq-imx-gpcv2.o
+obj-$(CONFIG_PIC32_EVIC) += irq-pic32-evic.o
diff --git a/drivers/irqchip/irq-pic32-evic.c b/drivers/irqchip/irq-pic32-evic.c
new file mode 100644
index 000000000000..e7155db01d55
--- /dev/null
+++ b/drivers/irqchip/irq-pic32-evic.c
@@ -0,0 +1,324 @@
+/*
+ * Cristian Birsan <cristian.birsan@microchip.com>
+ * Joshua Henderson <joshua.henderson@microchip.com>
+ * Copyright (C) 2016 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irq.h>
+
+#include <asm/irq.h>
+#include <asm/traps.h>
+#include <asm/mach-pic32/pic32.h>
+
+#define REG_INTCON 0x0000
+#define REG_INTSTAT 0x0020
+#define REG_IFS_OFFSET 0x0040
+#define REG_IEC_OFFSET 0x00C0
+#define REG_IPC_OFFSET 0x0140
+#define REG_OFF_OFFSET 0x0540
+
+#define MAJPRI_MASK 0x07
+#define SUBPRI_MASK 0x03
+#define PRIORITY_MASK 0x1F
+
+#define PIC32_INT_PRI(pri, subpri) \
+ ((((pri) & MAJPRI_MASK) << 2) | ((subpri) & SUBPRI_MASK))
+
+struct evic_chip_data {
+ u32 irq_types[NR_IRQS];
+ u32 ext_irqs[8];
+};
+
+static struct irq_domain *evic_irq_domain;
+static void __iomem *evic_base;
+
+asmlinkage void __weak plat_irq_dispatch(void)
+{
+ unsigned int irq, hwirq;
+
+ hwirq = readl(evic_base + REG_INTSTAT) & 0xFF;
+ irq = irq_linear_revmap(evic_irq_domain, hwirq);
+ do_IRQ(irq);
+}
+
+static struct evic_chip_data *irqd_to_priv(struct irq_data *data)
+{
+ return (struct evic_chip_data *)data->domain->host_data;
+}
+
+static int pic32_set_ext_polarity(int bit, u32 type)
+{
+ /*
+ * External interrupts can be either edge rising or edge falling,
+ * but not both.
+ */
+ switch (type) {
+ case IRQ_TYPE_EDGE_RISING:
+ writel(BIT(bit), evic_base + PIC32_SET(REG_INTCON));
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ writel(BIT(bit), evic_base + PIC32_CLR(REG_INTCON));
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int pic32_set_type_edge(struct irq_data *data,
+ unsigned int flow_type)
+{
+ struct evic_chip_data *priv = irqd_to_priv(data);
+ int ret;
+ int i;
+
+ if (!(flow_type & IRQ_TYPE_EDGE_BOTH))
+ return -EBADR;
+
+ /* set polarity for external interrupts only */
+ for (i = 0; i < ARRAY_SIZE(priv->ext_irqs); i++) {
+ if (priv->ext_irqs[i] == data->hwirq) {
+ ret = pic32_set_ext_polarity(i + 1, flow_type);
+ if (ret)
+ return ret;
+ }
+ }
+
+ irqd_set_trigger_type(data, flow_type);
+
+ return IRQ_SET_MASK_OK;
+}
+
+static void pic32_bind_evic_interrupt(int irq, int set)
+{
+ writel(set, evic_base + REG_OFF_OFFSET + irq * 4);
+}
+
+static void pic32_set_irq_priority(int irq, int priority)
+{
+ u32 reg, shift;
+
+ reg = irq / 4;
+ shift = (irq % 4) * 8;
+
+ writel(PRIORITY_MASK << shift,
+ evic_base + PIC32_CLR(REG_IPC_OFFSET + reg * 0x10));
+ writel(priority << shift,
+ evic_base + PIC32_SET(REG_IPC_OFFSET + reg * 0x10));
+}
+
+#define IRQ_REG_MASK(_hwirq, _reg, _mask) \
+ do { \
+ _reg = _hwirq / 32; \
+ _mask = 1 << (_hwirq % 32); \
+ } while (0)
+
+static int pic32_irq_domain_map(struct irq_domain *d, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ struct evic_chip_data *priv = d->host_data;
+ struct irq_data *data;
+ int ret;
+ u32 iecclr, ifsclr;
+ u32 reg, mask;
+
+ ret = irq_map_generic_chip(d, virq, hw);
+ if (ret)
+ return ret;
+
+ /*
+ * Piggyback on xlate function to move to an alternate chip as necessary
+ * at time of mapping instead of allowing the flow handler/chip to be
+ * changed later. This requires all interrupts to be configured through
+ * DT.
+ */
+ if (priv->irq_types[hw] & IRQ_TYPE_SENSE_MASK) {
+ data = irq_domain_get_irq_data(d, virq);
+ irqd_set_trigger_type(data, priv->irq_types[hw]);
+ irq_setup_alt_chip(data, priv->irq_types[hw]);
+ }
+
+ IRQ_REG_MASK(hw, reg, mask);
+
+ iecclr = PIC32_CLR(REG_IEC_OFFSET + reg * 0x10);
+ ifsclr = PIC32_CLR(REG_IFS_OFFSET + reg * 0x10);
+
+ /* mask and clear flag */
+ writel(mask, evic_base + iecclr);
+ writel(mask, evic_base + ifsclr);
+
+ /* default priority is required */
+ pic32_set_irq_priority(hw, PIC32_INT_PRI(2, 0));
+
+ return ret;
+}
+
+int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
+ const u32 *intspec, unsigned int intsize,
+ irq_hw_number_t *out_hwirq, unsigned int *out_type)
+{
+ struct evic_chip_data *priv = d->host_data;
+
+ if (WARN_ON(intsize < 2))
+ return -EINVAL;
+
+ if (WARN_ON(intspec[0] >= NR_IRQS))
+ return -EINVAL;
+
+ *out_hwirq = intspec[0];
+ *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
+
+ priv->irq_types[intspec[0]] = intspec[1] & IRQ_TYPE_SENSE_MASK;
+
+ return 0;
+}
+
+static const struct irq_domain_ops pic32_irq_domain_ops = {
+ .map = pic32_irq_domain_map,
+ .xlate = pic32_irq_domain_xlate,
+};
+
+static void __init pic32_ext_irq_of_init(struct irq_domain *domain)
+{
+ struct device_node *node = irq_domain_get_of_node(domain);
+ struct evic_chip_data *priv = domain->host_data;
+ struct property *prop;
+ const __le32 *p;
+ u32 hwirq;
+ int i = 0;
+ const char *pname = "microchip,external-irqs";
+
+ of_property_for_each_u32(node, pname, prop, p, hwirq) {
+ if (i >= ARRAY_SIZE(priv->ext_irqs)) {
+ pr_warn("More than %d external irq, skip rest\n",
+ ARRAY_SIZE(priv->ext_irqs));
+ break;
+ }
+
+ priv->ext_irqs[i] = hwirq;
+ i++;
+ }
+}
+
+static int __init pic32_of_init(struct device_node *node,
+ struct device_node *parent)
+{
+ struct irq_chip_generic *gc;
+ struct evic_chip_data *priv;
+ unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
+ int nchips, ret;
+ int i;
+
+ nchips = DIV_ROUND_UP(NR_IRQS, 32);
+
+ evic_base = of_iomap(node, 0);
+ if (!evic_base)
+ return -ENOMEM;
+
+ priv = kcalloc(nchips, sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto err_iounmap;
+ }
+
+ evic_irq_domain = irq_domain_add_linear(node, nchips * 32,
+ &pic32_irq_domain_ops,
+ priv);
+ if (!evic_irq_domain) {
+ ret = -ENOMEM;
+ goto err_free_priv;
+ }
+
+ /*
+ * The PIC32 EVIC has a linear list of irqs and the type of each
+ * irq is determined by the hardware peripheral the EVIC is arbitrating.
+ * These irq types are defined in the datasheet as "persistent" and
+ * "non-persistent" which are mapped here to level and edge
+ * respectively. To manage the different flow handler requirements of
+ * each irq type, different chip_types are used.
+ */
+ ret = irq_alloc_domain_generic_chips(evic_irq_domain, 32, 2,
+ "evic-level", handle_level_irq,
+ clr, 0, 0);
+ if (ret)
+ goto err_domain_remove;
+
+ board_bind_eic_interrupt = &pic32_bind_evic_interrupt;
+
+ for (i = 0; i < nchips; i++) {
+ u32 ifsclr = PIC32_CLR(REG_IFS_OFFSET + (i * 0x10));
+ u32 iec = REG_IEC_OFFSET + (i * 0x10);
+
+ gc = irq_get_domain_generic_chip(evic_irq_domain, i * 32);
+
+ gc->reg_base = evic_base;
+ gc->unused = 0;
+
+ /*
+ * Level/persistent interrupts have a special requirement that
+ * the condition generating the interrupt be cleared before the
+ * interrupt flag (ifs) can be cleared. chip.irq_eoi is used to
+ * complete the interrupt with an ack.
+ */
+ gc->chip_types[0].type = IRQ_TYPE_LEVEL_MASK;
+ gc->chip_types[0].handler = handle_fasteoi_irq;
+ gc->chip_types[0].regs.ack = ifsclr;
+ gc->chip_types[0].regs.mask = iec;
+ gc->chip_types[0].chip.name = "evic-level";
+ gc->chip_types[0].chip.irq_eoi = irq_gc_ack_set_bit;
+ gc->chip_types[0].chip.irq_mask = irq_gc_mask_clr_bit;
+ gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit;
+ gc->chip_types[0].chip.flags = IRQCHIP_SKIP_SET_WAKE;
+
+ /* Edge interrupts */
+ gc->chip_types[1].type = IRQ_TYPE_EDGE_BOTH;
+ gc->chip_types[1].handler = handle_edge_irq;
+ gc->chip_types[1].regs.ack = ifsclr;
+ gc->chip_types[1].regs.mask = iec;
+ gc->chip_types[1].chip.name = "evic-edge";
+ gc->chip_types[1].chip.irq_ack = irq_gc_ack_set_bit;
+ gc->chip_types[1].chip.irq_mask = irq_gc_mask_clr_bit;
+ gc->chip_types[1].chip.irq_unmask = irq_gc_mask_set_bit;
+ gc->chip_types[1].chip.irq_set_type = pic32_set_type_edge;
+ gc->chip_types[1].chip.flags = IRQCHIP_SKIP_SET_WAKE;
+
+ gc->private = &priv[i];
+ }
+
+ irq_set_default_host(evic_irq_domain);
+
+ /*
+ * External interrupts have software configurable edge polarity. These
+ * interrupts are defined in DT allowing polarity to be configured only
+ * for these interrupts when requested.
+ */
+ pic32_ext_irq_of_init(evic_irq_domain);
+
+ return 0;
+
+err_domain_remove:
+ irq_domain_remove(evic_irq_domain);
+
+err_free_priv:
+ kfree(priv);
+
+err_iounmap:
+ iounmap(evic_base);
+
+ return ret;
+}
+
+IRQCHIP_DECLARE(pic32_evic, "microchip,pic32mzda-evic", pic32_of_init);
diff --git a/drivers/mtd/bcm63xxpart.c b/drivers/mtd/bcm63xxpart.c
index 440936998593..cec3188a170d 100644
--- a/drivers/mtd/bcm63xxpart.c
+++ b/drivers/mtd/bcm63xxpart.c
@@ -24,6 +24,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/bcm963xx_tag.h>
#include <linux/crc32.h>
#include <linux/module.h>
#include <linux/kernel.h>
@@ -34,11 +35,8 @@
#include <linux/mtd/partitions.h>
#include <asm/mach-bcm63xx/bcm63xx_nvram.h>
-#include <asm/mach-bcm63xx/bcm963xx_tag.h>
#include <asm/mach-bcm63xx/board_bcm963xx.h>
-#define BCM63XX_EXTENDED_SIZE 0xBFC00000 /* Extended flash address */
-
#define BCM63XX_CFE_BLOCK_SIZE SZ_64K /* always at least 64KiB */
#define BCM63XX_CFE_MAGIC_OFFSET 0x4e0
@@ -123,8 +121,8 @@ static int bcm63xx_parse_cfe_partitions(struct mtd_info *master,
pr_info("CFE boot tag found with version %s and board type %s\n",
tagversion, boardid);
- kerneladdr = kerneladdr - BCM63XX_EXTENDED_SIZE;
- rootfsaddr = rootfsaddr - BCM63XX_EXTENDED_SIZE;
+ kerneladdr = kerneladdr - BCM963XX_EXTENDED_SIZE;
+ rootfsaddr = rootfsaddr - BCM963XX_EXTENDED_SIZE;
spareaddr = roundup(totallen, master->erasesize) + cfelen;
if (rootfsaddr < kerneladdr) {
diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c
index 54e056d3be02..ee2b74d1d1b5 100644
--- a/drivers/mtd/ubi/cdev.c
+++ b/drivers/mtd/ubi/cdev.c
@@ -174,9 +174,9 @@ static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end,
struct ubi_device *ubi = desc->vol->ubi;
struct inode *inode = file_inode(file);
int err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = ubi_sync(ubi->ubi_num);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 2c2baab9d880..d66c690a8597 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -157,6 +157,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
[29] = "802.1ad offload support",
[31] = "Modifying loopback source checks using UPDATE_QP support",
[32] = "Loopback source checks support",
+ [33] = "RoCEv2 support"
};
int i;
@@ -626,6 +627,8 @@ out:
return err;
}
+static void disable_unsupported_roce_caps(void *buf);
+
int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
{
struct mlx4_cmd_mailbox *mailbox;
@@ -738,6 +741,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
if (err)
goto out;
+ if (mlx4_is_mfunc(dev))
+ disable_unsupported_roce_caps(outbox);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_QP_OFFSET);
dev_cap->reserved_qps = 1 << (field & 0xf);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_OFFSET);
@@ -905,6 +910,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
MLX4_GET(dev_cap->bmme_flags, outbox,
QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ if (dev_cap->bmme_flags & MLX4_FLAG_ROCE_V1_V2)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ROCE_V1_V2;
if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
@@ -1161,6 +1168,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
if (err)
return err;
+ disable_unsupported_roce_caps(outbox->buf);
/* add port mng change event capability and disable mw type 1
* unconditionally to slaves
*/
@@ -1258,6 +1266,21 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
return 0;
}
+static void disable_unsupported_roce_caps(void *buf)
+{
+ u32 flags;
+
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ flags &= ~(1UL << 31);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ flags &= ~(1UL << 24);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
+ MLX4_GET(flags, buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+ flags &= ~(MLX4_FLAG_ROCE_V1_V2);
+ MLX4_PUT(buf, flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+}
+
int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
@@ -2239,7 +2262,8 @@ struct mlx4_config_dev {
__be32 rsvd1[3];
__be16 vxlan_udp_dport;
__be16 rsvd2;
- __be32 rsvd3;
+ __be16 roce_v2_entropy;
+ __be16 roce_v2_udp_dport;
__be32 roce_flags;
__be32 rsvd4[25];
__be16 rsvd5;
@@ -2248,6 +2272,7 @@ struct mlx4_config_dev {
};
#define MLX4_VXLAN_UDP_DPORT (1 << 0)
+#define MLX4_ROCE_V2_UDP_DPORT BIT(3)
#define MLX4_DISABLE_RX_PORT BIT(18)
static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
@@ -2365,6 +2390,18 @@ int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis)
return mlx4_CONFIG_DEV_set(dev, &config_dev);
}
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port)
+{
+ struct mlx4_config_dev config_dev;
+
+ memset(&config_dev, 0, sizeof(config_dev));
+ config_dev.update_flags = cpu_to_be32(MLX4_ROCE_V2_UDP_DPORT);
+ config_dev.roce_v2_udp_dport = cpu_to_be16(udp_port);
+
+ return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+EXPORT_SYMBOL_GPL(mlx4_config_roce_v2_port);
+
int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
{
struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 2404c22ad2b2..7baef52db6b7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -780,7 +780,10 @@ struct mlx4_set_port_general_context {
u16 reserved1;
u8 v_ignore_fcs;
u8 flags;
- u8 ignore_fcs;
+ union {
+ u8 ignore_fcs;
+ u8 roce_mode;
+ };
u8 reserved2;
__be16 mtu;
u8 pptx;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index f2550425c251..787b7bb54d52 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -1520,6 +1520,8 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz)
return err;
}
+#define SET_PORT_ROCE_2_FLAGS 0x10
+#define MLX4_SET_PORT_ROCE_V1_V2 0x2
int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
{
@@ -1539,6 +1541,11 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
context->pprx = (pprx * (!pfcrx)) << 7;
context->pfcrx = pfcrx;
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
+ context->flags |= SET_PORT_ROCE_2_FLAGS;
+ context->roce_mode |=
+ MLX4_SET_PORT_ROCE_V1_V2 << 4;
+ }
in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE,
MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 168823dde79f..d1cd9c32a9ae 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -167,6 +167,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
context->log_page_size = mtt->page_shift - MLX4_ICM_PAGE_SHIFT;
}
+ if ((cur_state == MLX4_QP_STATE_RTR) &&
+ (new_state == MLX4_QP_STATE_RTS) &&
+ dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
+ context->roce_entropy =
+ cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn));
+
*(__be32 *) mailbox->buf = cpu_to_be32(optpar);
memcpy(mailbox->buf + 8, context, sizeof *context);
@@ -921,3 +927,23 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
return 0;
}
EXPORT_SYMBOL_GPL(mlx4_qp_to_ready);
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn)
+{
+ struct mlx4_qp_context context;
+ struct mlx4_qp qp;
+ int err;
+
+ qp.qpn = qpn;
+ err = mlx4_qp_query(dev, &qp, &context);
+ if (!err) {
+ u32 dest_qpn = be32_to_cpu(context.remote_qpn) & 0xffffff;
+ u16 folded_dst = folded_qp(dest_qpn);
+ u16 folded_src = folded_qp(qpn);
+
+ return (dest_qpn != qpn) ?
+ ((folded_dst ^ folded_src) | 0xC000) :
+ folded_src | 0xC000;
+ }
+ return 0xdead;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9ea49a893323..aac071a7e830 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -39,8 +39,8 @@
#include <linux/mlx5/qp.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/vport.h>
+#include <linux/mlx5/transobj.h>
#include "wq.h"
-#include "transobj.h"
#include "mlx5_core.h"
#define MLX5E_MAX_NUM_TC 8
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c56d91a2812b..6a3e430f1062 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2241,7 +2241,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
goto err_unmap_free_uar;
}
- err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
+ err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn);
if (err) {
mlx5_core_err(mdev, "alloc td failed, %d\n", err);
goto err_dealloc_pd;
@@ -2324,7 +2324,7 @@ err_destroy_mkey:
mlx5_core_destroy_mkey(mdev, &priv->mr);
err_dealloc_transport_domain:
- mlx5_dealloc_transport_domain(mdev, priv->tdn);
+ mlx5_core_dealloc_transport_domain(mdev, priv->tdn);
err_dealloc_pd:
mlx5_core_dealloc_pd(mdev, priv->pdn);
@@ -2356,7 +2356,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
mlx5e_close_drop_rq(priv);
mlx5e_destroy_tises(priv);
mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
- mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
+ mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn);
mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
free_netdev(netdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 23c244a7e5d7..647a3ca2c2a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -230,6 +230,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
eqe_type_str(eqe->type), eqe->type, rsn);
mlx5_rsc_event(dev, rsn, eqe->type);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b37749a3730e..1545a944c309 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -78,6 +78,11 @@ struct mlx5_device_context {
void *context;
};
+enum {
+ MLX5_ATOMIC_REQ_MODE_BE = 0x0,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
+};
+
static struct mlx5_profile profile[] = {
[0] = {
.mask = 0,
@@ -387,7 +392,7 @@ query_ex:
return err;
}
-static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
{
u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
int err;
@@ -395,6 +400,7 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
memset(out, 0, sizeof(out));
MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+ MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
if (err)
return err;
@@ -404,6 +410,46 @@ static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
return err;
}
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+{
+ void *set_ctx;
+ void *set_hca_cap;
+ int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+ int req_endianness;
+ int err;
+
+ if (MLX5_CAP_GEN(dev, atomic)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC,
+ HCA_CAP_OPMOD_GET_CUR);
+ if (err)
+ return err;
+ } else {
+ return 0;
+ }
+
+ req_endianness =
+ MLX5_CAP_ATOMIC(dev,
+ supported_atomic_req_8B_endianess_mode_1);
+
+ if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
+ return 0;
+
+ set_ctx = kzalloc(set_sz, GFP_KERNEL);
+ if (!set_ctx)
+ return -ENOMEM;
+
+ set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+
+ /* Set requestor to host endianness */
+ MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
+ MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
+
+ err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
+
+ kfree(set_ctx);
+ return err;
+}
+
static int handle_hca_cap(struct mlx5_core_dev *dev)
{
void *set_ctx = NULL;
@@ -445,7 +491,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
- err = set_caps(dev, set_ctx, set_sz);
+ err = set_caps(dev, set_ctx, set_sz,
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
query_ex:
kfree(set_ctx);
@@ -667,7 +714,6 @@ clean:
return err;
}
-#ifdef CONFIG_MLX5_CORE_EN
static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
{
u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
@@ -720,7 +766,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
return -ENOTSUPP;
}
-#endif
static int map_bf_area(struct mlx5_core_dev *dev)
{
@@ -966,13 +1011,11 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto err_pagealloc_cleanup;
}
-#ifdef CONFIG_MLX5_CORE_EN
err = mlx5_core_set_issi(dev);
if (err) {
dev_err(&pdev->dev, "failed to set issi\n");
goto err_disable_hca;
}
-#endif
err = mlx5_satisfy_startup_pages(dev, 1);
if (err) {
@@ -992,6 +1035,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
goto reclaim_boot_pages;
}
+ err = handle_hca_cap_atomic(dev);
+ if (err) {
+ dev_err(&pdev->dev, "handle_hca_cap_atomic failed\n");
+ goto reclaim_boot_pages;
+ }
+
err = mlx5_satisfy_startup_pages(dev, 0);
if (err) {
dev_err(&pdev->dev, "failed to allocate init pages\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 30e2ba3f5f16..def289375ecb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -36,6 +36,7 @@
#include <linux/mlx5/cmd.h>
#include <linux/mlx5/qp.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/transobj.h>
#include "mlx5_core.h"
@@ -67,6 +68,52 @@ void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
complete(&common->free);
}
+static u64 qp_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+ BIT(MLX5_EVENT_TYPE_COMM_EST) |
+ BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+ BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+ BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+ BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+ BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+ return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+ u64 mask;
+
+ mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+ BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+ return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+ return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+ switch (rsc_type) {
+ case MLX5_EVENT_QUEUE_TYPE_QP:
+ return BIT(event_type) & qp_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_RQ:
+ return BIT(event_type) & rq_allowed_event_types();
+ case MLX5_EVENT_QUEUE_TYPE_SQ:
+ return BIT(event_type) & sq_allowed_event_types();
+ default:
+ WARN(1, "Event arrived for unknown resource type");
+ return false;
+ }
+}
+
void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
{
struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
@@ -75,8 +122,16 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
if (!common)
return;
+ if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
+ mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
+ event_type, rsn);
+ return;
+ }
+
switch (common->res) {
case MLX5_RES_QP:
+ case MLX5_RES_RQ:
+ case MLX5_RES_SQ:
qp = (struct mlx5_core_qp *)common;
qp->event(qp, event_type);
break;
@@ -177,27 +232,56 @@ void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
}
#endif
+static int create_qprqsq_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp,
+ int rsc_type)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ int err;
+
+ qp->common.res = rsc_type;
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree,
+ qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+ qp);
+ spin_unlock_irq(&table->lock);
+ if (err)
+ return err;
+
+ atomic_set(&qp->common.refcount, 1);
+ init_completion(&qp->common.free);
+ qp->pid = current->pid;
+
+ return 0;
+}
+
+static void destroy_qprqsq_common(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *qp)
+{
+ struct mlx5_qp_table *table = &dev->priv.qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ radix_tree_delete(&table->tree,
+ qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+ spin_unlock_irqrestore(&table->lock, flags);
+ mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+ wait_for_completion(&qp->common.free);
+}
+
int mlx5_core_create_qp(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp,
struct mlx5_create_qp_mbox_in *in,
int inlen)
{
- struct mlx5_qp_table *table = &dev->priv.qp_table;
struct mlx5_create_qp_mbox_out out;
struct mlx5_destroy_qp_mbox_in din;
struct mlx5_destroy_qp_mbox_out dout;
int err;
- void *qpc;
memset(&out, 0, sizeof(out));
in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
- if (dev->issi) {
- qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
- /* 0xffffff means we ask to work with cqe version 0 */
- MLX5_SET(qpc, qpc, user_index, 0xffffff);
- }
-
err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
if (err) {
mlx5_core_warn(dev, "ret %d\n", err);
@@ -213,24 +297,16 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
- qp->common.res = MLX5_RES_QP;
- spin_lock_irq(&table->lock);
- err = radix_tree_insert(&table->tree, qp->qpn, qp);
- spin_unlock_irq(&table->lock);
- if (err) {
- mlx5_core_warn(dev, "err %d\n", err);
+ err = create_qprqsq_common(dev, qp, MLX5_RES_QP);
+ if (err)
goto err_cmd;
- }
err = mlx5_debug_qp_add(dev, qp);
if (err)
mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
qp->qpn);
- qp->pid = current->pid;
- atomic_set(&qp->common.refcount, 1);
atomic_inc(&dev->num_qps);
- init_completion(&qp->common.free);
return 0;
@@ -250,18 +326,11 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
{
struct mlx5_destroy_qp_mbox_in in;
struct mlx5_destroy_qp_mbox_out out;
- struct mlx5_qp_table *table = &dev->priv.qp_table;
- unsigned long flags;
int err;
mlx5_debug_qp_remove(dev, qp);
- spin_lock_irqsave(&table->lock, flags);
- radix_tree_delete(&table->tree, qp->qpn);
- spin_unlock_irqrestore(&table->lock, flags);
-
- mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
- wait_for_completion(&qp->common.free);
+ destroy_qprqsq_common(dev, qp);
memset(&in, 0, sizeof(in));
memset(&out, 0, sizeof(out));
@@ -279,59 +348,15 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
- enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
struct mlx5_modify_qp_mbox_in *in, int sqd_event,
struct mlx5_core_qp *qp)
{
- static const u16 optab[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE] = {
- [MLX5_QP_STATE_RST] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_RST2INIT_QP,
- },
- [MLX5_QP_STATE_INIT] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_INIT] = MLX5_CMD_OP_INIT2INIT_QP,
- [MLX5_QP_STATE_RTR] = MLX5_CMD_OP_INIT2RTR_QP,
- },
- [MLX5_QP_STATE_RTR] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTR2RTS_QP,
- },
- [MLX5_QP_STATE_RTS] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_RTS2RTS_QP,
- },
- [MLX5_QP_STATE_SQD] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- },
- [MLX5_QP_STATE_SQER] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- [MLX5_QP_STATE_RTS] = MLX5_CMD_OP_SQERR2RTS_QP,
- },
- [MLX5_QP_STATE_ERR] = {
- [MLX5_QP_STATE_RST] = MLX5_CMD_OP_2RST_QP,
- [MLX5_QP_STATE_ERR] = MLX5_CMD_OP_2ERR_QP,
- }
- };
-
struct mlx5_modify_qp_mbox_out out;
int err = 0;
- u16 op;
-
- if (cur_state >= MLX5_QP_NUM_STATE || new_state >= MLX5_QP_NUM_STATE ||
- !optab[cur_state][new_state])
- return -EINVAL;
memset(&out, 0, sizeof(out));
- op = optab[cur_state][new_state];
- in->hdr.opcode = cpu_to_be16(op);
+ in->hdr.opcode = cpu_to_be16(operation);
in->qpn = cpu_to_be32(qp->qpn);
err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
if (err)
@@ -449,3 +474,67 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
}
EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
#endif
+
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq)
+{
+ int err;
+ u32 rqn;
+
+ err = mlx5_core_create_rq(dev, in, inlen, &rqn);
+ if (err)
+ return err;
+
+ rq->qpn = rqn;
+ err = create_qprqsq_common(dev, rq, MLX5_RES_RQ);
+ if (err)
+ goto err_destroy_rq;
+
+ return 0;
+
+err_destroy_rq:
+ mlx5_core_destroy_rq(dev, rq->qpn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
+
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *rq)
+{
+ destroy_qprqsq_common(dev, rq);
+ mlx5_core_destroy_rq(dev, rq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
+
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq)
+{
+ int err;
+ u32 sqn;
+
+ err = mlx5_core_create_sq(dev, in, inlen, &sqn);
+ if (err)
+ return err;
+
+ sq->qpn = sqn;
+ err = create_qprqsq_common(dev, sq, MLX5_RES_SQ);
+ if (err)
+ goto err_destroy_sq;
+
+ return 0;
+
+err_destroy_sq:
+ mlx5_core_destroy_sq(dev, sq->qpn);
+
+ return err;
+}
+EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *sq)
+{
+ destroy_qprqsq_common(dev, sq);
+ mlx5_core_destroy_sq(dev, sq->qpn);
+}
+EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index ffada801976b..04bc522605a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -37,7 +37,7 @@
#include <linux/mlx5/srq.h>
#include <rdma/ib_verbs.h>
#include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type)
{
@@ -241,8 +241,6 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
memcpy(xrc_srqc, srqc, MLX5_ST_SZ_BYTES(srqc));
memcpy(pas, in->pas, pas_size);
- /* 0xffffff means we ask to work with cqe version 0 */
- MLX5_SET(xrc_srqc, xrc_srqc, user_index, 0xffffff);
MLX5_SET(create_xrc_srq_in, create_in, opcode,
MLX5_CMD_OP_CREATE_XRC_SRQ);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index d7068f54e800..03a5093ffeb7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -32,9 +32,9 @@
#include <linux/mlx5/driver.h>
#include "mlx5_core.h"
-#include "transobj.h"
+#include <linux/mlx5/transobj.h>
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
{
u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)];
u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)];
@@ -53,8 +53,9 @@ int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
return err;
}
+EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
{
u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)];
u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)];
@@ -68,6 +69,7 @@ void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
{
@@ -94,6 +96,7 @@ int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
memset(out, 0, sizeof(out));
return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_modify_rq);
void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
{
@@ -108,6 +111,18 @@ void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+
+ MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
+ MLX5_SET(query_rq_in, in, rqn, rqn);
+
+ return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_rq);
+
int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
{
u32 out[MLX5_ST_SZ_DW(create_sq_out)];
@@ -133,6 +148,7 @@ int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
memset(out, 0, sizeof(out));
return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_modify_sq);
void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
{
@@ -147,6 +163,18 @@ void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
+ int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
+
+ MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
+ MLX5_SET(query_sq_in, in, sqn, sqn);
+
+ return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+}
+EXPORT_SYMBOL(mlx5_core_query_sq);
+
int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tirn)
{
@@ -162,6 +190,7 @@ int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
return err;
}
+EXPORT_SYMBOL(mlx5_core_create_tir);
int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
int inlen)
@@ -187,6 +216,7 @@ void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_destroy_tir);
int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tisn)
@@ -203,6 +233,19 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
return err;
}
+EXPORT_SYMBOL(mlx5_core_create_tis);
+
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+ int inlen)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
+
+ MLX5_SET(modify_tis_in, in, tisn, tisn);
+ MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
+
+ return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_core_modify_tis);
void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
{
@@ -216,6 +259,7 @@ void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_core_destroy_tis);
int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rmpn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 076197efea9b..c7398b95aecd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -76,7 +76,7 @@ u8 mlx5_query_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
return MLX5_GET(query_vport_state_out, out, admin_state);
}
-EXPORT_SYMBOL(mlx5_query_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state);
int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
u16 vport, u8 state)
@@ -104,7 +104,7 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
return err;
}
-EXPORT_SYMBOL(mlx5_modify_vport_admin_state);
+EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state);
static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
u32 *out, int outlen)
@@ -151,12 +151,9 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
nic_vport_context.permanent_address);
err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
- if (err)
- goto out;
-
- ether_addr_copy(addr, &out_addr[2]);
+ if (!err)
+ ether_addr_copy(addr, &out_addr[2]);
-out:
kvfree(out);
return err;
}
@@ -197,7 +194,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
return err;
}
-EXPORT_SYMBOL(mlx5_modify_nic_vport_mac_address);
+EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_mac_address);
int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
u32 vport,
@@ -430,6 +427,68 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_vlans);
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+ u64 *system_image_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.system_image_guid);
+
+ kfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_system_image_guid);
+
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *node_guid = MLX5_GET64(query_nic_vport_context_out, out,
+ nic_vport_context.node_guid);
+
+ kfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
+
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+ u16 *qkey_viol_cntr)
+{
+ u32 *out;
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+
+ out = mlx5_vzalloc(outlen);
+ if (!out)
+ return -ENOMEM;
+
+ mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+
+ *qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.qkey_violation_counter);
+
+ kfree(out);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_qkey_viol_cntr);
+
int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
u8 port_num, u16 vf_num, u16 gid_index,
union ib_gid *gid)
@@ -750,3 +809,44 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
return err;
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
+
+enum mlx5_vport_roce_state {
+ MLX5_VPORT_ROCE_DISABLED = 0,
+ MLX5_VPORT_ROCE_ENABLED = 1,
+};
+
+static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
+ enum mlx5_vport_roce_state state)
+{
+ void *in;
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ int err;
+
+ in = mlx5_vzalloc(inlen);
+ if (!in) {
+ mlx5_core_warn(mdev, "failed to allocate inbox\n");
+ return -ENOMEM;
+ }
+
+ MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
+ MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
+ state);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+
+ return err;
+}
+
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev)
+{
+ return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce);
+
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
+{
+ return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED);
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
diff --git a/drivers/ntb/hw/Kconfig b/drivers/ntb/hw/Kconfig
index 4d5535c4cddf..7116472b4625 100644
--- a/drivers/ntb/hw/Kconfig
+++ b/drivers/ntb/hw/Kconfig
@@ -1 +1,2 @@
+source "drivers/ntb/hw/amd/Kconfig"
source "drivers/ntb/hw/intel/Kconfig"
diff --git a/drivers/ntb/hw/Makefile b/drivers/ntb/hw/Makefile
index 175d7c92a569..532e0859b4a1 100644
--- a/drivers/ntb/hw/Makefile
+++ b/drivers/ntb/hw/Makefile
@@ -1 +1,2 @@
+obj-$(CONFIG_NTB_AMD) += amd/
obj-$(CONFIG_NTB_INTEL) += intel/
diff --git a/drivers/ntb/hw/amd/Kconfig b/drivers/ntb/hw/amd/Kconfig
new file mode 100644
index 000000000000..cfe903cd9514
--- /dev/null
+++ b/drivers/ntb/hw/amd/Kconfig
@@ -0,0 +1,7 @@
+config NTB_AMD
+ tristate "AMD Non-Transparent Bridge support"
+ depends on X86_64
+ help
+ This driver supports AMD NTB on capable Zeppelin hardware.
+
+ If unsure, say N.
diff --git a/drivers/ntb/hw/amd/Makefile b/drivers/ntb/hw/amd/Makefile
new file mode 100644
index 000000000000..ad54da917563
--- /dev/null
+++ b/drivers/ntb/hw/amd/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_AMD) += ntb_hw_amd.o
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
new file mode 100644
index 000000000000..588803ad6847
--- /dev/null
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -0,0 +1,1143 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * BSD LICENSE
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of AMD Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/pci.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#include "ntb_hw_amd.h"
+
+#define NTB_NAME "ntb_hw_amd"
+#define NTB_DESC "AMD(R) PCI-E Non-Transparent Bridge Driver"
+#define NTB_VER "1.0"
+
+MODULE_DESCRIPTION(NTB_DESC);
+MODULE_VERSION(NTB_VER);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("AMD Inc.");
+
+static const struct file_operations amd_ntb_debugfs_info;
+static struct dentry *debugfs_dir;
+
+static int ndev_mw_to_bar(struct amd_ntb_dev *ndev, int idx)
+{
+ if (idx < 0 || idx > ndev->mw_count)
+ return -EINVAL;
+
+ return 1 << idx;
+}
+
+static int amd_ntb_mw_count(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->mw_count;
+}
+
+static int amd_ntb_mw_get_range(struct ntb_dev *ntb, int idx,
+ phys_addr_t *base,
+ resource_size_t *size,
+ resource_size_t *align,
+ resource_size_t *align_size)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ int bar;
+
+ bar = ndev_mw_to_bar(ndev, idx);
+ if (bar < 0)
+ return bar;
+
+ if (base)
+ *base = pci_resource_start(ndev->ntb.pdev, bar);
+
+ if (size)
+ *size = pci_resource_len(ndev->ntb.pdev, bar);
+
+ if (align)
+ *align = SZ_4K;
+
+ if (align_size)
+ *align_size = 1;
+
+ return 0;
+}
+
+static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
+ dma_addr_t addr, resource_size_t size)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ unsigned long xlat_reg, limit_reg = 0;
+ resource_size_t mw_size;
+ void __iomem *mmio, *peer_mmio;
+ u64 base_addr, limit, reg_val;
+ int bar;
+
+ bar = ndev_mw_to_bar(ndev, idx);
+ if (bar < 0)
+ return bar;
+
+ mw_size = pci_resource_len(ndev->ntb.pdev, bar);
+
+ /* make sure the range fits in the usable mw size */
+ if (size > mw_size)
+ return -EINVAL;
+
+ mmio = ndev->self_mmio;
+ peer_mmio = ndev->peer_mmio;
+
+ base_addr = pci_resource_start(ndev->ntb.pdev, bar);
+
+ if (bar != 1) {
+ xlat_reg = AMD_BAR23XLAT_OFFSET + ((bar - 2) << 3);
+ limit_reg = AMD_BAR23LMT_OFFSET + ((bar - 2) << 3);
+
+ /* Set the limit if supported */
+ limit = base_addr + size;
+
+ /* set and verify setting the translation address */
+ write64(addr, peer_mmio + xlat_reg);
+ reg_val = read64(peer_mmio + xlat_reg);
+ if (reg_val != addr) {
+ write64(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+
+ /* set and verify setting the limit */
+ write64(limit, mmio + limit_reg);
+ reg_val = read64(mmio + limit_reg);
+ if (reg_val != limit) {
+ write64(base_addr, mmio + limit_reg);
+ write64(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+ } else {
+ xlat_reg = AMD_BAR1XLAT_OFFSET;
+ limit_reg = AMD_BAR1LMT_OFFSET;
+
+ /* split bar addr range must all be 32 bit */
+ if (addr & (~0ull << 32))
+ return -EINVAL;
+ if ((addr + size) & (~0ull << 32))
+ return -EINVAL;
+
+ /* Set the limit if supported */
+ limit = base_addr + size;
+
+ /* set and verify setting the translation address */
+ write64(addr, peer_mmio + xlat_reg);
+ reg_val = read64(peer_mmio + xlat_reg);
+ if (reg_val != addr) {
+ write64(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+
+ /* set and verify setting the limit */
+ writel(limit, mmio + limit_reg);
+ reg_val = readl(mmio + limit_reg);
+ if (reg_val != limit) {
+ writel(base_addr, mmio + limit_reg);
+ writel(0, peer_mmio + xlat_reg);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int amd_link_is_up(struct amd_ntb_dev *ndev)
+{
+ if (!ndev->peer_sta)
+ return NTB_LNK_STA_ACTIVE(ndev->cntl_sta);
+
+ /* If peer_sta is reset or D0 event, the ISR has
+ * started a timer to check link status of hardware.
+ * So here just clear status bit. And if peer_sta is
+ * D3 or PME_TO, D0/reset event will be happened when
+ * system wakeup/poweron, so do nothing here.
+ */
+ if (ndev->peer_sta & AMD_PEER_RESET_EVENT)
+ ndev->peer_sta &= ~AMD_PEER_RESET_EVENT;
+ else if (ndev->peer_sta & AMD_PEER_D0_EVENT)
+ ndev->peer_sta = 0;
+
+ return 0;
+}
+
+static int amd_ntb_link_is_up(struct ntb_dev *ntb,
+ enum ntb_speed *speed,
+ enum ntb_width *width)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ int ret = 0;
+
+ if (amd_link_is_up(ndev)) {
+ if (speed)
+ *speed = NTB_LNK_STA_SPEED(ndev->lnk_sta);
+ if (width)
+ *width = NTB_LNK_STA_WIDTH(ndev->lnk_sta);
+
+ dev_dbg(ndev_dev(ndev), "link is up.\n");
+
+ ret = 1;
+ } else {
+ if (speed)
+ *speed = NTB_SPEED_NONE;
+ if (width)
+ *width = NTB_WIDTH_NONE;
+
+ dev_dbg(ndev_dev(ndev), "link is down.\n");
+ }
+
+ return ret;
+}
+
+static int amd_ntb_link_enable(struct ntb_dev *ntb,
+ enum ntb_speed max_speed,
+ enum ntb_width max_width)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 ntb_ctl;
+
+ /* Enable event interrupt */
+ ndev->int_mask &= ~AMD_EVENT_INTMASK;
+ writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+ if (ndev->ntb.topo == NTB_TOPO_SEC)
+ return -EINVAL;
+ dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+ ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+ ntb_ctl |= (PMM_REG_CTL | SMM_REG_CTL);
+ writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+ return 0;
+}
+
+static int amd_ntb_link_disable(struct ntb_dev *ntb)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 ntb_ctl;
+
+ /* Disable event interrupt */
+ ndev->int_mask |= AMD_EVENT_INTMASK;
+ writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+ if (ndev->ntb.topo == NTB_TOPO_SEC)
+ return -EINVAL;
+ dev_dbg(ndev_dev(ndev), "Enabling Link.\n");
+
+ ntb_ctl = readl(mmio + AMD_CNTL_OFFSET);
+ ntb_ctl &= ~(PMM_REG_CTL | SMM_REG_CTL);
+ writel(ntb_ctl, mmio + AMD_CNTL_OFFSET);
+
+ return 0;
+}
+
+static u64 amd_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->db_valid_mask;
+}
+
+static int amd_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->db_count;
+}
+
+static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+ if (db_vector < 0 || db_vector > ndev->db_count)
+ return 0;
+
+ return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+}
+
+static u64 amd_ntb_db_read(struct ntb_dev *ntb)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+
+ return (u64)readw(mmio + AMD_DBSTAT_OFFSET);
+}
+
+static int amd_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+
+ writew((u16)db_bits, mmio + AMD_DBSTAT_OFFSET);
+
+ return 0;
+}
+
+static int amd_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned long flags;
+
+ if (db_bits & ~ndev->db_valid_mask)
+ return -EINVAL;
+
+ spin_lock_irqsave(&ndev->db_mask_lock, flags);
+ ndev->db_mask |= db_bits;
+ writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+ spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+ return 0;
+}
+
+static int amd_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned long flags;
+
+ if (db_bits & ~ndev->db_valid_mask)
+ return -EINVAL;
+
+ spin_lock_irqsave(&ndev->db_mask_lock, flags);
+ ndev->db_mask &= ~db_bits;
+ writew((u16)ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+ spin_unlock_irqrestore(&ndev->db_mask_lock, flags);
+
+ return 0;
+}
+
+static int amd_ntb_peer_db_addr(struct ntb_dev *ntb,
+ phys_addr_t *db_addr,
+ resource_size_t *db_size)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+ if (db_addr)
+ *db_addr = (phys_addr_t)(ndev->peer_mmio + AMD_DBREQ_OFFSET);
+ if (db_size)
+ *db_size = sizeof(u32);
+
+ return 0;
+}
+
+static int amd_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+
+ writew((u16)db_bits, mmio + AMD_DBREQ_OFFSET);
+
+ return 0;
+}
+
+static int amd_ntb_spad_count(struct ntb_dev *ntb)
+{
+ return ntb_ndev(ntb)->spad_count;
+}
+
+static u32 amd_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return 0;
+
+ offset = ndev->self_spad + (idx << 2);
+ return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_spad_write(struct ntb_dev *ntb,
+ int idx, u32 val)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ offset = ndev->self_spad + (idx << 2);
+ writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+ return 0;
+}
+
+static int amd_ntb_peer_spad_addr(struct ntb_dev *ntb, int idx,
+ phys_addr_t *spad_addr)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ if (spad_addr)
+ *spad_addr = (phys_addr_t)(ndev->self_mmio + AMD_SPAD_OFFSET +
+ ndev->peer_spad + (idx << 2));
+ return 0;
+}
+
+static u32 amd_ntb_peer_spad_read(struct ntb_dev *ntb, int idx)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ offset = ndev->peer_spad + (idx << 2);
+ return readl(mmio + AMD_SPAD_OFFSET + offset);
+}
+
+static int amd_ntb_peer_spad_write(struct ntb_dev *ntb,
+ int idx, u32 val)
+{
+ struct amd_ntb_dev *ndev = ntb_ndev(ntb);
+ void __iomem *mmio = ndev->self_mmio;
+ u32 offset;
+
+ if (idx < 0 || idx >= ndev->spad_count)
+ return -EINVAL;
+
+ offset = ndev->peer_spad + (idx << 2);
+ writel(val, mmio + AMD_SPAD_OFFSET + offset);
+
+ return 0;
+}
+
+static const struct ntb_dev_ops amd_ntb_ops = {
+ .mw_count = amd_ntb_mw_count,
+ .mw_get_range = amd_ntb_mw_get_range,
+ .mw_set_trans = amd_ntb_mw_set_trans,
+ .link_is_up = amd_ntb_link_is_up,
+ .link_enable = amd_ntb_link_enable,
+ .link_disable = amd_ntb_link_disable,
+ .db_valid_mask = amd_ntb_db_valid_mask,
+ .db_vector_count = amd_ntb_db_vector_count,
+ .db_vector_mask = amd_ntb_db_vector_mask,
+ .db_read = amd_ntb_db_read,
+ .db_clear = amd_ntb_db_clear,
+ .db_set_mask = amd_ntb_db_set_mask,
+ .db_clear_mask = amd_ntb_db_clear_mask,
+ .peer_db_addr = amd_ntb_peer_db_addr,
+ .peer_db_set = amd_ntb_peer_db_set,
+ .spad_count = amd_ntb_spad_count,
+ .spad_read = amd_ntb_spad_read,
+ .spad_write = amd_ntb_spad_write,
+ .peer_spad_addr = amd_ntb_peer_spad_addr,
+ .peer_spad_read = amd_ntb_peer_spad_read,
+ .peer_spad_write = amd_ntb_peer_spad_write,
+};
+
+static void amd_ack_smu(struct amd_ntb_dev *ndev, u32 bit)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ int reg;
+
+ reg = readl(mmio + AMD_SMUACK_OFFSET);
+ reg |= bit;
+ writel(reg, mmio + AMD_SMUACK_OFFSET);
+
+ ndev->peer_sta |= bit;
+}
+
+static void amd_handle_event(struct amd_ntb_dev *ndev, int vec)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ u32 status;
+
+ status = readl(mmio + AMD_INTSTAT_OFFSET);
+ if (!(status & AMD_EVENT_INTMASK))
+ return;
+
+ dev_dbg(ndev_dev(ndev), "status = 0x%x and vec = %d\n", status, vec);
+
+ status &= AMD_EVENT_INTMASK;
+ switch (status) {
+ case AMD_PEER_FLUSH_EVENT:
+ dev_info(ndev_dev(ndev), "Flush is done.\n");
+ break;
+ case AMD_PEER_RESET_EVENT:
+ amd_ack_smu(ndev, AMD_PEER_RESET_EVENT);
+
+ /* link down first */
+ ntb_link_event(&ndev->ntb);
+ /* polling peer status */
+ schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+ break;
+ case AMD_PEER_D3_EVENT:
+ case AMD_PEER_PMETO_EVENT:
+ amd_ack_smu(ndev, status);
+
+ /* link down */
+ ntb_link_event(&ndev->ntb);
+
+ break;
+ case AMD_PEER_D0_EVENT:
+ mmio = ndev->peer_mmio;
+ status = readl(mmio + AMD_PMESTAT_OFFSET);
+ /* check if this is WAKEUP event */
+ if (status & 0x1)
+ dev_info(ndev_dev(ndev), "Wakeup is done.\n");
+
+ amd_ack_smu(ndev, AMD_PEER_D0_EVENT);
+
+ /* start a timer to poll link status */
+ schedule_delayed_work(&ndev->hb_timer,
+ AMD_LINK_HB_TIMEOUT);
+ break;
+ default:
+ dev_info(ndev_dev(ndev), "event status = 0x%x.\n", status);
+ break;
+ }
+}
+
+static irqreturn_t ndev_interrupt(struct amd_ntb_dev *ndev, int vec)
+{
+ dev_dbg(ndev_dev(ndev), "vec %d\n", vec);
+
+ if (vec > (AMD_DB_CNT - 1) || (ndev->msix_vec_count == 1))
+ amd_handle_event(ndev, vec);
+
+ if (vec < AMD_DB_CNT)
+ ntb_db_event(&ndev->ntb, vec);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t ndev_vec_isr(int irq, void *dev)
+{
+ struct amd_ntb_vec *nvec = dev;
+
+ return ndev_interrupt(nvec->ndev, nvec->num);
+}
+
+static irqreturn_t ndev_irq_isr(int irq, void *dev)
+{
+ struct amd_ntb_dev *ndev = dev;
+
+ return ndev_interrupt(ndev, irq - ndev_pdev(ndev)->irq);
+}
+
+static int ndev_init_isr(struct amd_ntb_dev *ndev,
+ int msix_min, int msix_max)
+{
+ struct pci_dev *pdev;
+ int rc, i, msix_count, node;
+
+ pdev = ndev_pdev(ndev);
+
+ node = dev_to_node(&pdev->dev);
+
+ ndev->db_mask = ndev->db_valid_mask;
+
+ /* Try to set up msix irq */
+ ndev->vec = kzalloc_node(msix_max * sizeof(*ndev->vec),
+ GFP_KERNEL, node);
+ if (!ndev->vec)
+ goto err_msix_vec_alloc;
+
+ ndev->msix = kzalloc_node(msix_max * sizeof(*ndev->msix),
+ GFP_KERNEL, node);
+ if (!ndev->msix)
+ goto err_msix_alloc;
+
+ for (i = 0; i < msix_max; ++i)
+ ndev->msix[i].entry = i;
+
+ msix_count = pci_enable_msix_range(pdev, ndev->msix,
+ msix_min, msix_max);
+ if (msix_count < 0)
+ goto err_msix_enable;
+
+ /* NOTE: Disable MSIX if msix count is less than 16 because of
+ * hardware limitation.
+ */
+ if (msix_count < msix_min) {
+ pci_disable_msix(pdev);
+ goto err_msix_enable;
+ }
+
+ for (i = 0; i < msix_count; ++i) {
+ ndev->vec[i].ndev = ndev;
+ ndev->vec[i].num = i;
+ rc = request_irq(ndev->msix[i].vector, ndev_vec_isr, 0,
+ "ndev_vec_isr", &ndev->vec[i]);
+ if (rc)
+ goto err_msix_request;
+ }
+
+ dev_dbg(ndev_dev(ndev), "Using msix interrupts\n");
+ ndev->db_count = msix_min;
+ ndev->msix_vec_count = msix_max;
+ return 0;
+
+err_msix_request:
+ while (i-- > 0)
+ free_irq(ndev->msix[i].vector, ndev);
+ pci_disable_msix(pdev);
+err_msix_enable:
+ kfree(ndev->msix);
+err_msix_alloc:
+ kfree(ndev->vec);
+err_msix_vec_alloc:
+ ndev->msix = NULL;
+ ndev->vec = NULL;
+
+ /* Try to set up msi irq */
+ rc = pci_enable_msi(pdev);
+ if (rc)
+ goto err_msi_enable;
+
+ rc = request_irq(pdev->irq, ndev_irq_isr, 0,
+ "ndev_irq_isr", ndev);
+ if (rc)
+ goto err_msi_request;
+
+ dev_dbg(ndev_dev(ndev), "Using msi interrupts\n");
+ ndev->db_count = 1;
+ ndev->msix_vec_count = 1;
+ return 0;
+
+err_msi_request:
+ pci_disable_msi(pdev);
+err_msi_enable:
+
+ /* Try to set up intx irq */
+ pci_intx(pdev, 1);
+
+ rc = request_irq(pdev->irq, ndev_irq_isr, IRQF_SHARED,
+ "ndev_irq_isr", ndev);
+ if (rc)
+ goto err_intx_request;
+
+ dev_dbg(ndev_dev(ndev), "Using intx interrupts\n");
+ ndev->db_count = 1;
+ ndev->msix_vec_count = 1;
+ return 0;
+
+err_intx_request:
+ return rc;
+}
+
+static void ndev_deinit_isr(struct amd_ntb_dev *ndev)
+{
+ struct pci_dev *pdev;
+ void __iomem *mmio = ndev->self_mmio;
+ int i;
+
+ pdev = ndev_pdev(ndev);
+
+ /* Mask all doorbell interrupts */
+ ndev->db_mask = ndev->db_valid_mask;
+ writel(ndev->db_mask, mmio + AMD_DBMASK_OFFSET);
+
+ if (ndev->msix) {
+ i = ndev->msix_vec_count;
+ while (i--)
+ free_irq(ndev->msix[i].vector, &ndev->vec[i]);
+ pci_disable_msix(pdev);
+ kfree(ndev->msix);
+ kfree(ndev->vec);
+ } else {
+ free_irq(pdev->irq, ndev);
+ if (pci_dev_msi_enabled(pdev))
+ pci_disable_msi(pdev);
+ else
+ pci_intx(pdev, 0);
+ }
+}
+
+static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct amd_ntb_dev *ndev;
+ void __iomem *mmio;
+ char *buf;
+ size_t buf_size;
+ ssize_t ret, off;
+ union { u64 v64; u32 v32; u16 v16; } u;
+
+ ndev = filp->private_data;
+ mmio = ndev->self_mmio;
+
+ buf_size = min(count, 0x800ul);
+
+ buf = kmalloc(buf_size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ off = 0;
+
+ off += scnprintf(buf + off, buf_size - off,
+ "NTB Device Information:\n");
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Connection Topology -\t%s\n",
+ ntb_topo_string(ndev->ntb.topo));
+
+ off += scnprintf(buf + off, buf_size - off,
+ "LNK STA -\t\t%#06x\n", ndev->lnk_sta);
+
+ if (!amd_link_is_up(ndev)) {
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Status -\t\tDown\n");
+ } else {
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Status -\t\tUp\n");
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Speed -\t\tPCI-E Gen %u\n",
+ NTB_LNK_STA_SPEED(ndev->lnk_sta));
+ off += scnprintf(buf + off, buf_size - off,
+ "Link Width -\t\tx%u\n",
+ NTB_LNK_STA_WIDTH(ndev->lnk_sta));
+ }
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Memory Window Count -\t%u\n", ndev->mw_count);
+ off += scnprintf(buf + off, buf_size - off,
+ "Scratchpad Count -\t%u\n", ndev->spad_count);
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Count -\t%u\n", ndev->db_count);
+ off += scnprintf(buf + off, buf_size - off,
+ "MSIX Vector Count -\t%u\n", ndev->msix_vec_count);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Valid Mask -\t%#llx\n", ndev->db_valid_mask);
+
+ u.v32 = readl(ndev->self_mmio + AMD_DBMASK_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Mask -\t\t\t%#06x\n", u.v32);
+
+ u.v32 = readl(mmio + AMD_DBSTAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "Doorbell Bell -\t\t\t%#06x\n", u.v32);
+
+ off += scnprintf(buf + off, buf_size - off,
+ "\nNTB Incoming XLAT:\n");
+
+ u.v64 = read64(mmio + AMD_BAR1XLAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "XLAT1 -\t\t%#018llx\n", u.v64);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR23XLAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "XLAT23 -\t\t%#018llx\n", u.v64);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR45XLAT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "XLAT45 -\t\t%#018llx\n", u.v64);
+
+ u.v32 = readl(mmio + AMD_BAR1LMT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "LMT1 -\t\t\t%#06x\n", u.v32);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR23LMT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "LMT23 -\t\t\t%#018llx\n", u.v64);
+
+ u.v64 = read64(ndev->self_mmio + AMD_BAR45LMT_OFFSET);
+ off += scnprintf(buf + off, buf_size - off,
+ "LMT45 -\t\t\t%#018llx\n", u.v64);
+
+ ret = simple_read_from_buffer(ubuf, count, offp, buf, off);
+ kfree(buf);
+ return ret;
+}
+
+static void ndev_init_debugfs(struct amd_ntb_dev *ndev)
+{
+ if (!debugfs_dir) {
+ ndev->debugfs_dir = NULL;
+ ndev->debugfs_info = NULL;
+ } else {
+ ndev->debugfs_dir =
+ debugfs_create_dir(ndev_name(ndev), debugfs_dir);
+ if (!ndev->debugfs_dir)
+ ndev->debugfs_info = NULL;
+ else
+ ndev->debugfs_info =
+ debugfs_create_file("info", S_IRUSR,
+ ndev->debugfs_dir, ndev,
+ &amd_ntb_debugfs_info);
+ }
+}
+
+static void ndev_deinit_debugfs(struct amd_ntb_dev *ndev)
+{
+ debugfs_remove_recursive(ndev->debugfs_dir);
+}
+
+static inline void ndev_init_struct(struct amd_ntb_dev *ndev,
+ struct pci_dev *pdev)
+{
+ ndev->ntb.pdev = pdev;
+ ndev->ntb.topo = NTB_TOPO_NONE;
+ ndev->ntb.ops = &amd_ntb_ops;
+ ndev->int_mask = AMD_EVENT_INTMASK;
+ spin_lock_init(&ndev->db_mask_lock);
+}
+
+static int amd_poll_link(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->peer_mmio;
+ u32 reg, stat;
+ int rc;
+
+ reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+ reg &= NTB_LIN_STA_ACTIVE_BIT;
+
+ dev_dbg(ndev_dev(ndev), "%s: reg_val = 0x%x.\n", __func__, reg);
+
+ if (reg == ndev->cntl_sta)
+ return 0;
+
+ ndev->cntl_sta = reg;
+
+ rc = pci_read_config_dword(ndev->ntb.pdev,
+ AMD_LINK_STATUS_OFFSET, &stat);
+ if (rc)
+ return 0;
+ ndev->lnk_sta = stat;
+
+ return 1;
+}
+
+static void amd_link_hb(struct work_struct *work)
+{
+ struct amd_ntb_dev *ndev = hb_ndev(work);
+
+ if (amd_poll_link(ndev))
+ ntb_link_event(&ndev->ntb);
+
+ if (!amd_link_is_up(ndev))
+ schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+}
+
+static int amd_init_isr(struct amd_ntb_dev *ndev)
+{
+ return ndev_init_isr(ndev, AMD_DB_CNT, AMD_MSIX_VECTOR_CNT);
+}
+
+static void amd_init_side_info(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned int reg;
+
+ reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+ if (!(reg & AMD_SIDE_READY)) {
+ reg |= AMD_SIDE_READY;
+ writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+ }
+}
+
+static void amd_deinit_side_info(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ unsigned int reg;
+
+ reg = readl(mmio + AMD_SIDEINFO_OFFSET);
+ if (reg & AMD_SIDE_READY) {
+ reg &= ~AMD_SIDE_READY;
+ writel(reg, mmio + AMD_SIDEINFO_OFFSET);
+ readl(mmio + AMD_SIDEINFO_OFFSET);
+ }
+}
+
+static int amd_init_ntb(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+
+ ndev->mw_count = AMD_MW_CNT;
+ ndev->spad_count = AMD_SPADS_CNT;
+ ndev->db_count = AMD_DB_CNT;
+
+ switch (ndev->ntb.topo) {
+ case NTB_TOPO_PRI:
+ case NTB_TOPO_SEC:
+ ndev->spad_count >>= 1;
+ if (ndev->ntb.topo == NTB_TOPO_PRI) {
+ ndev->self_spad = 0;
+ ndev->peer_spad = 0x20;
+ } else {
+ ndev->self_spad = 0x20;
+ ndev->peer_spad = 0;
+ }
+
+ INIT_DELAYED_WORK(&ndev->hb_timer, amd_link_hb);
+ schedule_delayed_work(&ndev->hb_timer, AMD_LINK_HB_TIMEOUT);
+
+ break;
+ default:
+ dev_err(ndev_dev(ndev), "AMD NTB does not support B2B mode.\n");
+ return -EINVAL;
+ }
+
+ ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+ /* Mask event interrupts */
+ writel(ndev->int_mask, mmio + AMD_INTMASK_OFFSET);
+
+ return 0;
+}
+
+static enum ntb_topo amd_get_topo(struct amd_ntb_dev *ndev)
+{
+ void __iomem *mmio = ndev->self_mmio;
+ u32 info;
+
+ info = readl(mmio + AMD_SIDEINFO_OFFSET);
+ if (info & AMD_SIDE_MASK)
+ return NTB_TOPO_SEC;
+ else
+ return NTB_TOPO_PRI;
+}
+
+static int amd_init_dev(struct amd_ntb_dev *ndev)
+{
+ struct pci_dev *pdev;
+ int rc = 0;
+
+ pdev = ndev_pdev(ndev);
+
+ ndev->ntb.topo = amd_get_topo(ndev);
+ dev_dbg(ndev_dev(ndev), "AMD NTB topo is %s\n",
+ ntb_topo_string(ndev->ntb.topo));
+
+ rc = amd_init_ntb(ndev);
+ if (rc)
+ return rc;
+
+ rc = amd_init_isr(ndev);
+ if (rc) {
+ dev_err(ndev_dev(ndev), "fail to init isr.\n");
+ return rc;
+ }
+
+ ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+
+ return 0;
+}
+
+static void amd_deinit_dev(struct amd_ntb_dev *ndev)
+{
+ cancel_delayed_work_sync(&ndev->hb_timer);
+
+ ndev_deinit_isr(ndev);
+}
+
+static int amd_ntb_init_pci(struct amd_ntb_dev *ndev,
+ struct pci_dev *pdev)
+{
+ int rc;
+
+ pci_set_drvdata(pdev, ndev);
+
+ rc = pci_enable_device(pdev);
+ if (rc)
+ goto err_pci_enable;
+
+ rc = pci_request_regions(pdev, NTB_NAME);
+ if (rc)
+ goto err_pci_regions;
+
+ pci_set_master(pdev);
+
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (rc) {
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (rc)
+ goto err_dma_mask;
+ dev_warn(ndev_dev(ndev), "Cannot DMA highmem\n");
+ }
+
+ rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (rc) {
+ rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (rc)
+ goto err_dma_mask;
+ dev_warn(ndev_dev(ndev), "Cannot DMA consistent highmem\n");
+ }
+
+ ndev->self_mmio = pci_iomap(pdev, 0, 0);
+ if (!ndev->self_mmio) {
+ rc = -EIO;
+ goto err_dma_mask;
+ }
+ ndev->peer_mmio = ndev->self_mmio + AMD_PEER_OFFSET;
+
+ return 0;
+
+err_dma_mask:
+ pci_clear_master(pdev);
+err_pci_regions:
+ pci_disable_device(pdev);
+err_pci_enable:
+ pci_set_drvdata(pdev, NULL);
+ return rc;
+}
+
+static void amd_ntb_deinit_pci(struct amd_ntb_dev *ndev)
+{
+ struct pci_dev *pdev = ndev_pdev(ndev);
+
+ pci_iounmap(pdev, ndev->self_mmio);
+
+ pci_clear_master(pdev);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static int amd_ntb_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct amd_ntb_dev *ndev;
+ int rc, node;
+
+ node = dev_to_node(&pdev->dev);
+
+ ndev = kzalloc_node(sizeof(*ndev), GFP_KERNEL, node);
+ if (!ndev) {
+ rc = -ENOMEM;
+ goto err_ndev;
+ }
+
+ ndev_init_struct(ndev, pdev);
+
+ rc = amd_ntb_init_pci(ndev, pdev);
+ if (rc)
+ goto err_init_pci;
+
+ rc = amd_init_dev(ndev);
+ if (rc)
+ goto err_init_dev;
+
+ /* write side info */
+ amd_init_side_info(ndev);
+
+ amd_poll_link(ndev);
+
+ ndev_init_debugfs(ndev);
+
+ rc = ntb_register_device(&ndev->ntb);
+ if (rc)
+ goto err_register;
+
+ dev_info(&pdev->dev, "NTB device registered.\n");
+
+ return 0;
+
+err_register:
+ ndev_deinit_debugfs(ndev);
+ amd_deinit_dev(ndev);
+err_init_dev:
+ amd_ntb_deinit_pci(ndev);
+err_init_pci:
+ kfree(ndev);
+err_ndev:
+ return rc;
+}
+
+static void amd_ntb_pci_remove(struct pci_dev *pdev)
+{
+ struct amd_ntb_dev *ndev = pci_get_drvdata(pdev);
+
+ ntb_unregister_device(&ndev->ntb);
+ ndev_deinit_debugfs(ndev);
+ amd_deinit_side_info(ndev);
+ amd_deinit_dev(ndev);
+ amd_ntb_deinit_pci(ndev);
+ kfree(ndev);
+}
+
+static const struct file_operations amd_ntb_debugfs_info = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = ndev_debugfs_read,
+};
+
+static const struct pci_device_id amd_ntb_pci_tbl[] = {
+ {PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NTB)},
+ {0}
+};
+MODULE_DEVICE_TABLE(pci, amd_ntb_pci_tbl);
+
+static struct pci_driver amd_ntb_pci_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = amd_ntb_pci_tbl,
+ .probe = amd_ntb_pci_probe,
+ .remove = amd_ntb_pci_remove,
+};
+
+static int __init amd_ntb_pci_driver_init(void)
+{
+ pr_info("%s %s\n", NTB_DESC, NTB_VER);
+
+ if (debugfs_initialized())
+ debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+
+ return pci_register_driver(&amd_ntb_pci_driver);
+}
+module_init(amd_ntb_pci_driver_init);
+
+static void __exit amd_ntb_pci_driver_exit(void)
+{
+ pci_unregister_driver(&amd_ntb_pci_driver);
+ debugfs_remove_recursive(debugfs_dir);
+}
+module_exit(amd_ntb_pci_driver_exit);
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.h b/drivers/ntb/hw/amd/ntb_hw_amd.h
new file mode 100644
index 000000000000..2eac3cd3e646
--- /dev/null
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.h
@@ -0,0 +1,217 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * BSD LICENSE
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of AMD Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * AMD PCIe NTB Linux driver
+ *
+ * Contact Information:
+ * Xiangliang Yu <Xiangliang.Yu@amd.com>
+ */
+
+#ifndef NTB_HW_AMD_H
+#define NTB_HW_AMD_H
+
+#include <linux/ntb.h>
+#include <linux/pci.h>
+
+#define PCI_DEVICE_ID_AMD_NTB 0x145B
+#define AMD_LINK_HB_TIMEOUT msecs_to_jiffies(1000)
+#define AMD_LINK_STATUS_OFFSET 0x68
+#define NTB_LIN_STA_ACTIVE_BIT 0x00000002
+#define NTB_LNK_STA_SPEED_MASK 0x000F0000
+#define NTB_LNK_STA_WIDTH_MASK 0x03F00000
+#define NTB_LNK_STA_ACTIVE(x) (!!((x) & NTB_LIN_STA_ACTIVE_BIT))
+#define NTB_LNK_STA_SPEED(x) (((x) & NTB_LNK_STA_SPEED_MASK) >> 16)
+#define NTB_LNK_STA_WIDTH(x) (((x) & NTB_LNK_STA_WIDTH_MASK) >> 20)
+
+#ifndef read64
+#ifdef readq
+#define read64 readq
+#else
+#define read64 _read64
+static inline u64 _read64(void __iomem *mmio)
+{
+ u64 low, high;
+
+ low = readl(mmio);
+ high = readl(mmio + sizeof(u32));
+ return low | (high << 32);
+}
+#endif
+#endif
+
+#ifndef write64
+#ifdef writeq
+#define write64 writeq
+#else
+#define write64 _write64
+static inline void _write64(u64 val, void __iomem *mmio)
+{
+ writel(val, mmio);
+ writel(val >> 32, mmio + sizeof(u32));
+}
+#endif
+#endif
+
+enum {
+ /* AMD NTB Capability */
+ AMD_MW_CNT = 3,
+ AMD_DB_CNT = 16,
+ AMD_MSIX_VECTOR_CNT = 24,
+ AMD_SPADS_CNT = 16,
+
+ /* AMD NTB register offset */
+ AMD_CNTL_OFFSET = 0x200,
+
+ /* NTB control register bits */
+ PMM_REG_CTL = BIT(21),
+ SMM_REG_CTL = BIT(20),
+ SMM_REG_ACC_PATH = BIT(18),
+ PMM_REG_ACC_PATH = BIT(17),
+ NTB_CLK_EN = BIT(16),
+
+ AMD_STA_OFFSET = 0x204,
+ AMD_PGSLV_OFFSET = 0x208,
+ AMD_SPAD_MUX_OFFSET = 0x20C,
+ AMD_SPAD_OFFSET = 0x210,
+ AMD_RSMU_HCID = 0x250,
+ AMD_RSMU_SIID = 0x254,
+ AMD_PSION_OFFSET = 0x300,
+ AMD_SSION_OFFSET = 0x330,
+ AMD_MMINDEX_OFFSET = 0x400,
+ AMD_MMDATA_OFFSET = 0x404,
+ AMD_SIDEINFO_OFFSET = 0x408,
+
+ AMD_SIDE_MASK = BIT(0),
+ AMD_SIDE_READY = BIT(1),
+
+ /* limit register */
+ AMD_ROMBARLMT_OFFSET = 0x410,
+ AMD_BAR1LMT_OFFSET = 0x414,
+ AMD_BAR23LMT_OFFSET = 0x418,
+ AMD_BAR45LMT_OFFSET = 0x420,
+ /* xlat address */
+ AMD_POMBARXLAT_OFFSET = 0x428,
+ AMD_BAR1XLAT_OFFSET = 0x430,
+ AMD_BAR23XLAT_OFFSET = 0x438,
+ AMD_BAR45XLAT_OFFSET = 0x440,
+ /* doorbell and interrupt */
+ AMD_DBFM_OFFSET = 0x450,
+ AMD_DBREQ_OFFSET = 0x454,
+ AMD_MIRRDBSTAT_OFFSET = 0x458,
+ AMD_DBMASK_OFFSET = 0x45C,
+ AMD_DBSTAT_OFFSET = 0x460,
+ AMD_INTMASK_OFFSET = 0x470,
+ AMD_INTSTAT_OFFSET = 0x474,
+
+ /* event type */
+ AMD_PEER_FLUSH_EVENT = BIT(0),
+ AMD_PEER_RESET_EVENT = BIT(1),
+ AMD_PEER_D3_EVENT = BIT(2),
+ AMD_PEER_PMETO_EVENT = BIT(3),
+ AMD_PEER_D0_EVENT = BIT(4),
+ AMD_EVENT_INTMASK = (AMD_PEER_FLUSH_EVENT |
+ AMD_PEER_RESET_EVENT | AMD_PEER_D3_EVENT |
+ AMD_PEER_PMETO_EVENT | AMD_PEER_D0_EVENT),
+
+ AMD_PMESTAT_OFFSET = 0x480,
+ AMD_PMSGTRIG_OFFSET = 0x490,
+ AMD_LTRLATENCY_OFFSET = 0x494,
+ AMD_FLUSHTRIG_OFFSET = 0x498,
+
+ /* SMU register*/
+ AMD_SMUACK_OFFSET = 0x4A0,
+ AMD_SINRST_OFFSET = 0x4A4,
+ AMD_RSPNUM_OFFSET = 0x4A8,
+ AMD_SMU_SPADMUTEX = 0x4B0,
+ AMD_SMU_SPADOFFSET = 0x4B4,
+
+ AMD_PEER_OFFSET = 0x400,
+};
+
+struct amd_ntb_dev;
+
+struct amd_ntb_vec {
+ struct amd_ntb_dev *ndev;
+ int num;
+};
+
+struct amd_ntb_dev {
+ struct ntb_dev ntb;
+
+ u32 ntb_side;
+ u32 lnk_sta;
+ u32 cntl_sta;
+ u32 peer_sta;
+
+ unsigned char mw_count;
+ unsigned char spad_count;
+ unsigned char db_count;
+ unsigned char msix_vec_count;
+
+ u64 db_valid_mask;
+ u64 db_mask;
+ u32 int_mask;
+
+ struct msix_entry *msix;
+ struct amd_ntb_vec *vec;
+
+ /* synchronize rmw access of db_mask and hw reg */
+ spinlock_t db_mask_lock;
+
+ void __iomem *self_mmio;
+ void __iomem *peer_mmio;
+ unsigned int self_spad;
+ unsigned int peer_spad;
+
+ struct delayed_work hb_timer;
+
+ struct dentry *debugfs_dir;
+ struct dentry *debugfs_info;
+};
+
+#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
+#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
+#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
+#define ntb_ndev(__ntb) container_of(__ntb, struct amd_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct amd_ntb_dev, hb_timer.work)
+
+#endif
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index a198f8298258..40d04ef5da9e 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -875,7 +875,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
limit_reg = bar2_off(ndev->xlat_reg->bar2_limit, bar);
if (bar < 4 || !ndev->bar4_split) {
- base = ioread64(mmio + base_reg);
+ base = ioread64(mmio + base_reg) & NTB_BAR_MASK_64;
/* Set the limit if supported, if size is not mw_size */
if (limit_reg && size != mw_size)
@@ -906,7 +906,7 @@ static int intel_ntb_mw_set_trans(struct ntb_dev *ntb, int idx,
if ((addr + size) & (~0ull << 32))
return -EINVAL;
- base = ioread32(mmio + base_reg);
+ base = ioread32(mmio + base_reg) & NTB_BAR_MASK_32;
/* Set the limit if supported, if size is not mw_size */
if (limit_reg && size != mw_size)
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h
index 2eb4addd10d0..3ec149cf6562 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.h
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.h
@@ -245,6 +245,9 @@
#define NTB_UNSAFE_DB BIT_ULL(0)
#define NTB_UNSAFE_SPAD BIT_ULL(1)
+#define NTB_BAR_MASK_64 ~(0xfull)
+#define NTB_BAR_MASK_32 ~(0xfu)
+
struct intel_ntb_dev;
struct intel_ntb_reg {
@@ -334,7 +337,8 @@ struct intel_ntb_dev {
#define ndev_pdev(ndev) ((ndev)->ntb.pdev)
#define ndev_name(ndev) pci_name(ndev_pdev(ndev))
#define ndev_dev(ndev) (&ndev_pdev(ndev)->dev)
-#define ntb_ndev(ntb) container_of(ntb, struct intel_ntb_dev, ntb)
-#define hb_ndev(work) container_of(work, struct intel_ntb_dev, hb_timer.work)
+#define ntb_ndev(__ntb) container_of(__ntb, struct intel_ntb_dev, ntb)
+#define hb_ndev(__work) container_of(__work, struct intel_ntb_dev, \
+ hb_timer.work)
#endif
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 60654d524858..ec4775f0ec16 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -171,12 +171,14 @@ struct ntb_transport_qp {
u64 rx_err_ver;
u64 rx_memcpy;
u64 rx_async;
+ u64 dma_rx_prep_err;
u64 tx_bytes;
u64 tx_pkts;
u64 tx_ring_full;
u64 tx_err_no_buf;
u64 tx_memcpy;
u64 tx_async;
+ u64 dma_tx_prep_err;
};
struct ntb_transport_mw {
@@ -249,6 +251,8 @@ enum {
#define QP_TO_MW(nt, qp) ((qp) % nt->mw_count)
#define NTB_QP_DEF_NUM_ENTRIES 100
#define NTB_LINK_DOWN_TIMEOUT 10
+#define DMA_RETRIES 20
+#define DMA_OUT_RESOURCE_TO 50
static void ntb_transport_rxc_db(unsigned long data);
static const struct ntb_ctx_ops ntb_transport_ops;
@@ -501,6 +505,12 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"free tx - \t%u\n",
ntb_transport_tx_free_entry(qp));
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "DMA tx prep err - \t%llu\n",
+ qp->dma_tx_prep_err);
+ out_offset += snprintf(buf + out_offset, out_count - out_offset,
+ "DMA rx prep err - \t%llu\n",
+ qp->dma_rx_prep_err);
out_offset += snprintf(buf + out_offset, out_count - out_offset,
"\n");
@@ -726,6 +736,8 @@ static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
qp->tx_err_no_buf = 0;
qp->tx_memcpy = 0;
qp->tx_async = 0;
+ qp->dma_tx_prep_err = 0;
+ qp->dma_rx_prep_err = 0;
}
static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
@@ -1228,6 +1240,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
struct dmaengine_unmap_data *unmap;
dma_cookie_t cookie;
void *buf = entry->buf;
+ int retries = 0;
len = entry->len;
@@ -1263,11 +1276,21 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
unmap->from_cnt = 1;
- txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
- unmap->addr[0], len,
- DMA_PREP_INTERRUPT);
- if (!txd)
+ for (retries = 0; retries < DMA_RETRIES; retries++) {
+ txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
+ unmap->addr[0], len,
+ DMA_PREP_INTERRUPT);
+ if (txd)
+ break;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(DMA_OUT_RESOURCE_TO);
+ }
+
+ if (!txd) {
+ qp->dma_rx_prep_err++;
goto err_get_unmap;
+ }
txd->callback = ntb_rx_copy_callback;
txd->callback_param = entry;
@@ -1460,6 +1483,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
void __iomem *offset;
size_t len = entry->len;
void *buf = entry->buf;
+ int retries = 0;
offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header);
@@ -1494,10 +1518,20 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
unmap->to_cnt = 1;
- txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len,
- DMA_PREP_INTERRUPT);
- if (!txd)
+ for (retries = 0; retries < DMA_RETRIES; retries++) {
+ txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0],
+ len, DMA_PREP_INTERRUPT);
+ if (txd)
+ break;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(DMA_OUT_RESOURCE_TO);
+ }
+
+ if (!txd) {
+ qp->dma_tx_prep_err++;
goto err_get_unmap;
+ }
txd->callback = ntb_tx_copy_callback;
txd->callback_param = entry;
@@ -1532,7 +1566,7 @@ static int ntb_process_tx(struct ntb_transport_qp *qp,
if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
if (qp->tx_handler)
- qp->tx_handler(qp->cb_data, qp, NULL, -EIO);
+ qp->tx_handler(qp, qp->cb_data, NULL, -EIO);
ntb_list_add(&qp->ntb_tx_free_q_lock, &entry->entry,
&qp->tx_free_q);
diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig
index 01852f98a843..a5d0eda44438 100644
--- a/drivers/ntb/test/Kconfig
+++ b/drivers/ntb/test/Kconfig
@@ -17,3 +17,11 @@ config NTB_TOOL
functioning at a basic level.
If unsure, say N.
+
+config NTB_PERF
+ tristate "NTB RAW Perf Measuring Tool"
+ help
+ This is a tool to measure raw NTB performance by transferring data
+ to and from the window without additional software interaction.
+
+ If unsure, say N.
diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile
index 0ea32a324b6c..9e77e0b761c2 100644
--- a/drivers/ntb/test/Makefile
+++ b/drivers/ntb/test/Makefile
@@ -1,2 +1,3 @@
obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
+obj-$(CONFIG_NTB_PERF) += ntb_perf.o
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
new file mode 100644
index 000000000000..c8a37ba4b4f9
--- /dev/null
+++ b/drivers/ntb/test/ntb_perf.c
@@ -0,0 +1,748 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copy
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * PCIe NTB Perf Linux driver
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/time.h>
+#include <linux/timer.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/sizes.h>
+#include <linux/ntb.h>
+
+#define DRIVER_NAME "ntb_perf"
+#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
+
+#define DRIVER_LICENSE "Dual BSD/GPL"
+#define DRIVER_VERSION "1.0"
+#define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>"
+
+#define PERF_LINK_DOWN_TIMEOUT 10
+#define PERF_VERSION 0xffff0001
+#define MAX_THREADS 32
+#define MAX_TEST_SIZE SZ_1M
+#define MAX_SRCS 32
+#define DMA_OUT_RESOURCE_TO 50
+#define DMA_RETRIES 20
+#define SZ_4G (1ULL << 32)
+#define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */
+
+MODULE_LICENSE(DRIVER_LICENSE);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
+
+static struct dentry *perf_debugfs_dir;
+
+static unsigned int seg_order = 19; /* 512K */
+module_param(seg_order, uint, 0644);
+MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
+
+static unsigned int run_order = 32; /* 4G */
+module_param(run_order, uint, 0644);
+MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
+
+static bool use_dma; /* default to 0 */
+module_param(use_dma, bool, 0644);
+MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
+
+struct perf_mw {
+ phys_addr_t phys_addr;
+ resource_size_t phys_size;
+ resource_size_t xlat_align;
+ resource_size_t xlat_align_size;
+ void __iomem *vbase;
+ size_t xlat_size;
+ size_t buf_size;
+ void *virt_addr;
+ dma_addr_t dma_addr;
+};
+
+struct perf_ctx;
+
+struct pthr_ctx {
+ struct task_struct *thread;
+ struct perf_ctx *perf;
+ atomic_t dma_sync;
+ struct dma_chan *dma_chan;
+ int dma_prep_err;
+ int src_idx;
+ void *srcs[MAX_SRCS];
+};
+
+struct perf_ctx {
+ struct ntb_dev *ntb;
+ spinlock_t db_lock;
+ struct perf_mw mw;
+ bool link_is_up;
+ struct work_struct link_cleanup;
+ struct delayed_work link_work;
+ struct dentry *debugfs_node_dir;
+ struct dentry *debugfs_run;
+ struct dentry *debugfs_threads;
+ u8 perf_threads;
+ bool run;
+ struct pthr_ctx pthr_ctx[MAX_THREADS];
+ atomic_t tsync;
+};
+
+enum {
+ VERSION = 0,
+ MW_SZ_HIGH,
+ MW_SZ_LOW,
+ SPAD_MSG,
+ SPAD_ACK,
+ MAX_SPAD
+};
+
+static void perf_link_event(void *ctx)
+{
+ struct perf_ctx *perf = ctx;
+
+ if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1)
+ schedule_delayed_work(&perf->link_work, 2*HZ);
+ else
+ schedule_work(&perf->link_cleanup);
+}
+
+static void perf_db_event(void *ctx, int vec)
+{
+ struct perf_ctx *perf = ctx;
+ u64 db_bits, db_mask;
+
+ db_mask = ntb_db_vector_mask(perf->ntb, vec);
+ db_bits = ntb_db_read(perf->ntb);
+
+ dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
+ vec, db_mask, db_bits);
+}
+
+static const struct ntb_ctx_ops perf_ops = {
+ .link_event = perf_link_event,
+ .db_event = perf_db_event,
+};
+
+static void perf_copy_callback(void *data)
+{
+ struct pthr_ctx *pctx = data;
+
+ atomic_dec(&pctx->dma_sync);
+}
+
+static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
+ char *src, size_t size)
+{
+ struct perf_ctx *perf = pctx->perf;
+ struct dma_async_tx_descriptor *txd;
+ struct dma_chan *chan = pctx->dma_chan;
+ struct dma_device *device;
+ struct dmaengine_unmap_data *unmap;
+ dma_cookie_t cookie;
+ size_t src_off, dst_off;
+ struct perf_mw *mw = &perf->mw;
+ u64 vbase, dst_vaddr;
+ dma_addr_t dst_phys;
+ int retries = 0;
+
+ if (!use_dma) {
+ memcpy_toio(dst, src, size);
+ return size;
+ }
+
+ if (!chan) {
+ dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
+ return -EINVAL;
+ }
+
+ device = chan->device;
+ src_off = (size_t)src & ~PAGE_MASK;
+ dst_off = (size_t)dst & ~PAGE_MASK;
+
+ if (!is_dma_copy_aligned(device, src_off, dst_off, size))
+ return -ENODEV;
+
+ vbase = (u64)(u64 *)mw->vbase;
+ dst_vaddr = (u64)(u64 *)dst;
+ dst_phys = mw->phys_addr + (dst_vaddr - vbase);
+
+ unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
+ if (!unmap)
+ return -ENOMEM;
+
+ unmap->len = size;
+ unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
+ src_off, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(device->dev, unmap->addr[0]))
+ goto err_get_unmap;
+
+ unmap->to_cnt = 1;
+
+ do {
+ txd = device->device_prep_dma_memcpy(chan, dst_phys,
+ unmap->addr[0],
+ size, DMA_PREP_INTERRUPT);
+ if (!txd) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(DMA_OUT_RESOURCE_TO);
+ }
+ } while (!txd && (++retries < DMA_RETRIES));
+
+ if (!txd) {
+ pctx->dma_prep_err++;
+ goto err_get_unmap;
+ }
+
+ txd->callback = perf_copy_callback;
+ txd->callback_param = pctx;
+ dma_set_unmap(txd, unmap);
+
+ cookie = dmaengine_submit(txd);
+ if (dma_submit_error(cookie))
+ goto err_set_unmap;
+
+ atomic_inc(&pctx->dma_sync);
+ dma_async_issue_pending(chan);
+
+ return size;
+
+err_set_unmap:
+ dmaengine_unmap_put(unmap);
+err_get_unmap:
+ dmaengine_unmap_put(unmap);
+ return 0;
+}
+
+static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src,
+ u64 buf_size, u64 win_size, u64 total)
+{
+ int chunks, total_chunks, i;
+ int copied_chunks = 0;
+ u64 copied = 0, result;
+ char *tmp = dst;
+ u64 perf, diff_us;
+ ktime_t kstart, kstop, kdiff;
+
+ chunks = div64_u64(win_size, buf_size);
+ total_chunks = div64_u64(total, buf_size);
+ kstart = ktime_get();
+
+ for (i = 0; i < total_chunks; i++) {
+ result = perf_copy(pctx, tmp, src, buf_size);
+ copied += result;
+ copied_chunks++;
+ if (copied_chunks == chunks) {
+ tmp = dst;
+ copied_chunks = 0;
+ } else
+ tmp += buf_size;
+
+ /* Probably should schedule every 4GB to prevent soft hang. */
+ if (((copied % SZ_4G) == 0) && !use_dma) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(1);
+ }
+ }
+
+ if (use_dma) {
+ pr_info("%s: All DMA descriptors submitted\n", current->comm);
+ while (atomic_read(&pctx->dma_sync) != 0)
+ msleep(20);
+ }
+
+ kstop = ktime_get();
+ kdiff = ktime_sub(kstop, kstart);
+ diff_us = ktime_to_us(kdiff);
+
+ pr_info("%s: copied %llu bytes\n", current->comm, copied);
+
+ pr_info("%s: lasted %llu usecs\n", current->comm, diff_us);
+
+ perf = div64_u64(copied, diff_us);
+
+ pr_info("%s: MBytes/s: %llu\n", current->comm, perf);
+
+ return 0;
+}
+
+static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
+{
+ return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
+}
+
+static int ntb_perf_thread(void *data)
+{
+ struct pthr_ctx *pctx = data;
+ struct perf_ctx *perf = pctx->perf;
+ struct pci_dev *pdev = perf->ntb->pdev;
+ struct perf_mw *mw = &perf->mw;
+ char *dst;
+ u64 win_size, buf_size, total;
+ void *src;
+ int rc, node, i;
+ struct dma_chan *dma_chan = NULL;
+
+ pr_info("kthread %s starting...\n", current->comm);
+
+ node = dev_to_node(&pdev->dev);
+
+ if (use_dma && !pctx->dma_chan) {
+ dma_cap_mask_t dma_mask;
+
+ dma_cap_zero(dma_mask);
+ dma_cap_set(DMA_MEMCPY, dma_mask);
+ dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
+ (void *)(unsigned long)node);
+ if (!dma_chan) {
+ pr_warn("%s: cannot acquire DMA channel, quitting\n",
+ current->comm);
+ return -ENODEV;
+ }
+ pctx->dma_chan = dma_chan;
+ }
+
+ for (i = 0; i < MAX_SRCS; i++) {
+ pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
+ if (!pctx->srcs[i]) {
+ rc = -ENOMEM;
+ goto err;
+ }
+ }
+
+ win_size = mw->phys_size;
+ buf_size = 1ULL << seg_order;
+ total = 1ULL << run_order;
+
+ if (buf_size > MAX_TEST_SIZE)
+ buf_size = MAX_TEST_SIZE;
+
+ dst = (char *)mw->vbase;
+
+ atomic_inc(&perf->tsync);
+ while (atomic_read(&perf->tsync) != perf->perf_threads)
+ schedule();
+
+ src = pctx->srcs[pctx->src_idx];
+ pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
+
+ rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
+
+ atomic_dec(&perf->tsync);
+
+ if (rc < 0) {
+ pr_err("%s: failed\n", current->comm);
+ rc = -ENXIO;
+ goto err;
+ }
+
+ for (i = 0; i < MAX_SRCS; i++) {
+ kfree(pctx->srcs[i]);
+ pctx->srcs[i] = NULL;
+ }
+
+ return 0;
+
+err:
+ for (i = 0; i < MAX_SRCS; i++) {
+ kfree(pctx->srcs[i]);
+ pctx->srcs[i] = NULL;
+ }
+
+ if (dma_chan) {
+ dma_release_channel(dma_chan);
+ pctx->dma_chan = NULL;
+ }
+
+ return rc;
+}
+
+static void perf_free_mw(struct perf_ctx *perf)
+{
+ struct perf_mw *mw = &perf->mw;
+ struct pci_dev *pdev = perf->ntb->pdev;
+
+ if (!mw->virt_addr)
+ return;
+
+ ntb_mw_clear_trans(perf->ntb, 0);
+ dma_free_coherent(&pdev->dev, mw->buf_size,
+ mw->virt_addr, mw->dma_addr);
+ mw->xlat_size = 0;
+ mw->buf_size = 0;
+ mw->virt_addr = NULL;
+}
+
+static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
+{
+ struct perf_mw *mw = &perf->mw;
+ size_t xlat_size, buf_size;
+
+ if (!size)
+ return -EINVAL;
+
+ xlat_size = round_up(size, mw->xlat_align_size);
+ buf_size = round_up(size, mw->xlat_align);
+
+ if (mw->xlat_size == xlat_size)
+ return 0;
+
+ if (mw->buf_size)
+ perf_free_mw(perf);
+
+ mw->xlat_size = xlat_size;
+ mw->buf_size = buf_size;
+
+ mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
+ &mw->dma_addr, GFP_KERNEL);
+ if (!mw->virt_addr) {
+ mw->xlat_size = 0;
+ mw->buf_size = 0;
+ }
+
+ return 0;
+}
+
+static void perf_link_work(struct work_struct *work)
+{
+ struct perf_ctx *perf =
+ container_of(work, struct perf_ctx, link_work.work);
+ struct ntb_dev *ndev = perf->ntb;
+ struct pci_dev *pdev = ndev->pdev;
+ u32 val;
+ u64 size;
+ int rc;
+
+ dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+ size = perf->mw.phys_size;
+ ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
+ ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
+ ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
+
+ /* now read what peer wrote */
+ val = ntb_spad_read(ndev, VERSION);
+ if (val != PERF_VERSION) {
+ dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
+ goto out;
+ }
+
+ val = ntb_spad_read(ndev, MW_SZ_HIGH);
+ size = (u64)val << 32;
+
+ val = ntb_spad_read(ndev, MW_SZ_LOW);
+ size |= val;
+
+ dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
+
+ rc = perf_set_mw(perf, size);
+ if (rc)
+ goto out1;
+
+ perf->link_is_up = true;
+
+ return;
+
+out1:
+ perf_free_mw(perf);
+
+out:
+ if (ntb_link_is_up(ndev, NULL, NULL) == 1)
+ schedule_delayed_work(&perf->link_work,
+ msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
+}
+
+static void perf_link_cleanup(struct work_struct *work)
+{
+ struct perf_ctx *perf = container_of(work,
+ struct perf_ctx,
+ link_cleanup);
+
+ dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
+
+ if (!perf->link_is_up)
+ cancel_delayed_work_sync(&perf->link_work);
+}
+
+static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf)
+{
+ struct perf_mw *mw;
+ int rc;
+
+ mw = &perf->mw;
+
+ rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
+ &mw->xlat_align, &mw->xlat_align_size);
+ if (rc)
+ return rc;
+
+ perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
+ if (!mw->vbase)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct perf_ctx *perf = filp->private_data;
+ char *buf;
+ ssize_t ret, out_offset;
+
+ if (!perf)
+ return 0;
+
+ buf = kmalloc(64, GFP_KERNEL);
+ out_offset = snprintf(buf, 64, "%d\n", perf->run);
+ ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
+ kfree(buf);
+
+ return ret;
+}
+
+static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
+ size_t count, loff_t *offp)
+{
+ struct perf_ctx *perf = filp->private_data;
+ int node, i;
+
+ if (!perf->link_is_up)
+ return 0;
+
+ if (perf->perf_threads == 0)
+ return 0;
+
+ if (atomic_read(&perf->tsync) == 0)
+ perf->run = false;
+
+ if (perf->run) {
+ /* lets stop the threads */
+ perf->run = false;
+ for (i = 0; i < MAX_THREADS; i++) {
+ if (perf->pthr_ctx[i].thread) {
+ kthread_stop(perf->pthr_ctx[i].thread);
+ perf->pthr_ctx[i].thread = NULL;
+ } else
+ break;
+ }
+ } else {
+ perf->run = true;
+
+ if (perf->perf_threads > MAX_THREADS) {
+ perf->perf_threads = MAX_THREADS;
+ pr_info("Reset total threads to: %u\n", MAX_THREADS);
+ }
+
+ /* no greater than 1M */
+ if (seg_order > MAX_SEG_ORDER) {
+ seg_order = MAX_SEG_ORDER;
+ pr_info("Fix seg_order to %u\n", seg_order);
+ }
+
+ if (run_order < seg_order) {
+ run_order = seg_order;
+ pr_info("Fix run_order to %u\n", run_order);
+ }
+
+ node = dev_to_node(&perf->ntb->pdev->dev);
+ /* launch kernel thread */
+ for (i = 0; i < perf->perf_threads; i++) {
+ struct pthr_ctx *pctx;
+
+ pctx = &perf->pthr_ctx[i];
+ atomic_set(&pctx->dma_sync, 0);
+ pctx->perf = perf;
+ pctx->thread =
+ kthread_create_on_node(ntb_perf_thread,
+ (void *)pctx,
+ node, "ntb_perf %d", i);
+ if (pctx->thread)
+ wake_up_process(pctx->thread);
+ else {
+ perf->run = false;
+ for (i = 0; i < MAX_THREADS; i++) {
+ if (pctx->thread) {
+ kthread_stop(pctx->thread);
+ pctx->thread = NULL;
+ }
+ }
+ }
+
+ if (perf->run == false)
+ return -ENXIO;
+ }
+
+ }
+
+ return count;
+}
+
+static const struct file_operations ntb_perf_debugfs_run = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = debugfs_run_read,
+ .write = debugfs_run_write,
+};
+
+static int perf_debugfs_setup(struct perf_ctx *perf)
+{
+ struct pci_dev *pdev = perf->ntb->pdev;
+
+ if (!debugfs_initialized())
+ return -ENODEV;
+
+ if (!perf_debugfs_dir) {
+ perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ if (!perf_debugfs_dir)
+ return -ENODEV;
+ }
+
+ perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
+ perf_debugfs_dir);
+ if (!perf->debugfs_node_dir)
+ return -ENODEV;
+
+ perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
+ perf->debugfs_node_dir, perf,
+ &ntb_perf_debugfs_run);
+ if (!perf->debugfs_run)
+ return -ENODEV;
+
+ perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
+ perf->debugfs_node_dir,
+ &perf->perf_threads);
+ if (!perf->debugfs_threads)
+ return -ENODEV;
+
+ return 0;
+}
+
+static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+ struct pci_dev *pdev = ntb->pdev;
+ struct perf_ctx *perf;
+ int node;
+ int rc = 0;
+
+ node = dev_to_node(&pdev->dev);
+
+ perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
+ if (!perf) {
+ rc = -ENOMEM;
+ goto err_perf;
+ }
+
+ perf->ntb = ntb;
+ perf->perf_threads = 1;
+ atomic_set(&perf->tsync, 0);
+ perf->run = false;
+ spin_lock_init(&perf->db_lock);
+ perf_setup_mw(ntb, perf);
+ INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
+ INIT_WORK(&perf->link_cleanup, perf_link_cleanup);
+
+ rc = ntb_set_ctx(ntb, perf, &perf_ops);
+ if (rc)
+ goto err_ctx;
+
+ perf->link_is_up = false;
+ ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+ ntb_link_event(ntb);
+
+ rc = perf_debugfs_setup(perf);
+ if (rc)
+ goto err_ctx;
+
+ return 0;
+
+err_ctx:
+ cancel_delayed_work_sync(&perf->link_work);
+ cancel_work_sync(&perf->link_cleanup);
+ kfree(perf);
+err_perf:
+ return rc;
+}
+
+static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+ struct perf_ctx *perf = ntb->ctx;
+ int i;
+
+ dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
+
+ cancel_delayed_work_sync(&perf->link_work);
+ cancel_work_sync(&perf->link_cleanup);
+
+ ntb_clear_ctx(ntb);
+ ntb_link_disable(ntb);
+
+ debugfs_remove_recursive(perf_debugfs_dir);
+ perf_debugfs_dir = NULL;
+
+ if (use_dma) {
+ for (i = 0; i < MAX_THREADS; i++) {
+ struct pthr_ctx *pctx = &perf->pthr_ctx[i];
+
+ if (pctx->dma_chan)
+ dma_release_channel(pctx->dma_chan);
+ }
+ }
+
+ kfree(perf);
+}
+
+static struct ntb_client perf_client = {
+ .ops = {
+ .probe = perf_probe,
+ .remove = perf_remove,
+ },
+};
+module_ntb_client(perf_client);
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index dd92c5edf219..b48ac6300c79 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -138,22 +138,22 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name,
struct dentry *dentry;
struct inode *inode;
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_alloc_name(root, name);
if (!dentry) {
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return -ENOMEM;
}
inode = oprofilefs_get_inode(root->d_sb, S_IFREG | perm);
if (!inode) {
dput(dentry);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return -ENOMEM;
}
inode->i_fop = fops;
inode->i_private = priv;
d_add(dentry, inode);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return 0;
}
@@ -215,22 +215,22 @@ struct dentry *oprofilefs_mkdir(struct dentry *parent, char const *name)
struct dentry *dentry;
struct inode *inode;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
dentry = d_alloc_name(parent, name);
if (!dentry) {
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return NULL;
}
inode = oprofilefs_get_inode(parent->d_sb, S_IFDIR | 0755);
if (!inode) {
dput(dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return NULL;
}
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
d_add(dentry, inode);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return dentry;
}
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index d28db0e793df..d78ee151c9e4 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -900,6 +900,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = {
},
},
{
+ .ident = "Lenovo Yoga 700",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 700"),
+ },
+ },
+ {
.ident = "Lenovo Yoga 900",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
diff --git a/drivers/platform/x86/intel_telemetry_debugfs.c b/drivers/platform/x86/intel_telemetry_debugfs.c
index 5b31d1548c07..f5134acd6ff0 100644
--- a/drivers/platform/x86/intel_telemetry_debugfs.c
+++ b/drivers/platform/x86/intel_telemetry_debugfs.c
@@ -96,9 +96,11 @@
} \
}
+#ifdef CONFIG_PM_SLEEP
static u8 suspend_prep_ok;
static u32 suspend_shlw_ctr_temp, suspend_deep_ctr_temp;
static u64 suspend_shlw_res_temp, suspend_deep_res_temp;
+#endif
struct telemetry_susp_stats {
u32 shlw_swake_ctr;
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 23d862dfdde3..e2f31c93717d 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1106,6 +1106,7 @@ config SCSI_IPR
tristate "IBM Power Linux RAID adapter support"
depends on PCI && SCSI && ATA
select FW_LOADER
+ select IRQ_POLL
---help---
This driver supports the IBM Power Linux family RAID adapters.
This includes IBM pSeries 5712, 5703, 5709, and 570A, as well
diff --git a/drivers/scsi/be2iscsi/Kconfig b/drivers/scsi/be2iscsi/Kconfig
index 4e7cad272469..bad5f32e1f67 100644
--- a/drivers/scsi/be2iscsi/Kconfig
+++ b/drivers/scsi/be2iscsi/Kconfig
@@ -3,6 +3,7 @@ config BE2ISCSI
depends on PCI && SCSI && NET
select SCSI_ISCSI_ATTRS
select ISCSI_BOOT_SYSFS
+ select IRQ_POLL
help
This driver implements the iSCSI functionality for Emulex
diff --git a/drivers/scsi/be2iscsi/be.h b/drivers/scsi/be2iscsi/be.h
index 77f992e74726..a41c6432f444 100644
--- a/drivers/scsi/be2iscsi/be.h
+++ b/drivers/scsi/be2iscsi/be.h
@@ -20,7 +20,7 @@
#include <linux/pci.h>
#include <linux/if_vlan.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
#define FW_VER_LEN 32
#define MCC_Q_LEN 128
#define MCC_CQ_LEN 256
@@ -101,7 +101,7 @@ struct be_eq_obj {
struct beiscsi_hba *phba;
struct be_queue_info *cq;
struct work_struct work_cqs; /* Work Item */
- struct blk_iopoll iopoll;
+ struct irq_poll iopoll;
};
struct be_mcc_obj {
diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c
index b7087ba69d8d..022e87b62e40 100644
--- a/drivers/scsi/be2iscsi/be_iscsi.c
+++ b/drivers/scsi/be2iscsi/be_iscsi.c
@@ -1292,9 +1292,9 @@ static void beiscsi_flush_cq(struct beiscsi_hba *phba)
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_disable(&pbe_eq->iopoll);
+ irq_poll_disable(&pbe_eq->iopoll);
beiscsi_process_cq(pbe_eq);
- blk_iopoll_enable(&pbe_eq->iopoll);
+ irq_poll_enable(&pbe_eq->iopoll);
}
}
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index fe0c5143f8e6..cb9072a841be 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -910,8 +910,7 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
num_eq_processed = 0;
while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
& EQE_VALID_MASK) {
- if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
- blk_iopoll_sched(&pbe_eq->iopoll);
+ irq_poll_sched(&pbe_eq->iopoll);
AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
queue_tail_inc(eq);
@@ -972,8 +971,7 @@ static irqreturn_t be_isr(int irq, void *dev_id)
spin_unlock_irqrestore(&phba->isr_lock, flags);
num_mcceq_processed++;
} else {
- if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
- blk_iopoll_sched(&pbe_eq->iopoll);
+ irq_poll_sched(&pbe_eq->iopoll);
num_ioeq_processed++;
}
AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
@@ -2295,7 +2293,7 @@ void beiscsi_process_all_cqs(struct work_struct *work)
hwi_ring_eq_db(phba, pbe_eq->q.id, 0, 0, 1, 1);
}
-static int be_iopoll(struct blk_iopoll *iop, int budget)
+static int be_iopoll(struct irq_poll *iop, int budget)
{
unsigned int ret;
struct beiscsi_hba *phba;
@@ -2306,7 +2304,7 @@ static int be_iopoll(struct blk_iopoll *iop, int budget)
pbe_eq->cq_count += ret;
if (ret < budget) {
phba = pbe_eq->phba;
- blk_iopoll_complete(iop);
+ irq_poll_complete(iop);
beiscsi_log(phba, KERN_INFO,
BEISCSI_LOG_CONFIG | BEISCSI_LOG_IO,
"BM_%d : rearm pbe_eq->q.id =%d\n",
@@ -5293,7 +5291,7 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba,
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_disable(&pbe_eq->iopoll);
+ irq_poll_disable(&pbe_eq->iopoll);
}
if (unload_state == BEISCSI_CLEAN_UNLOAD) {
@@ -5579,9 +5577,8 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev)
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+ irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
be_iopoll);
- blk_iopoll_enable(&pbe_eq->iopoll);
}
i = (phba->msix_enabled) ? i : 0;
@@ -5752,9 +5749,8 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev,
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+ irq_poll_init(&pbe_eq->iopoll, be_iopoll_budget,
be_iopoll);
- blk_iopoll_enable(&pbe_eq->iopoll);
}
i = (phba->msix_enabled) ? i : 0;
@@ -5795,7 +5791,7 @@ free_blkenbld:
destroy_workqueue(phba->wq);
for (i = 0; i < phba->num_cpus; i++) {
pbe_eq = &phwi_context->be_eq[i];
- blk_iopoll_disable(&pbe_eq->iopoll);
+ irq_poll_disable(&pbe_eq->iopoll);
}
free_twq:
beiscsi_clean_port(phba);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 1c3759bab80b..3b3e0998fa6e 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3638,7 +3638,7 @@ static struct device_attribute ipr_ioa_reset_attr = {
.store = ipr_store_reset_adapter
};
-static int ipr_iopoll(struct blk_iopoll *iop, int budget);
+static int ipr_iopoll(struct irq_poll *iop, int budget);
/**
* ipr_show_iopoll_weight - Show ipr polling mode
* @dev: class device struct
@@ -3681,34 +3681,33 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev,
int i;
if (!ioa_cfg->sis64) {
- dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n");
+ dev_info(&ioa_cfg->pdev->dev, "irq_poll not supported on this adapter\n");
return -EINVAL;
}
if (kstrtoul(buf, 10, &user_iopoll_weight))
return -EINVAL;
if (user_iopoll_weight > 256) {
- dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n");
+ dev_info(&ioa_cfg->pdev->dev, "Invalid irq_poll weight. It must be less than 256\n");
return -EINVAL;
}
if (user_iopoll_weight == ioa_cfg->iopoll_weight) {
- dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n");
+ dev_info(&ioa_cfg->pdev->dev, "Current irq_poll weight has the same weight\n");
return strlen(buf);
}
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
for (i = 1; i < ioa_cfg->hrrq_num; i++)
- blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+ irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
}
spin_lock_irqsave(shost->host_lock, lock_flags);
ioa_cfg->iopoll_weight = user_iopoll_weight;
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
for (i = 1; i < ioa_cfg->hrrq_num; i++) {
- blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+ irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
ioa_cfg->iopoll_weight, ipr_iopoll);
- blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
}
}
spin_unlock_irqrestore(shost->host_lock, lock_flags);
@@ -5568,7 +5567,7 @@ static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget,
return num_hrrq;
}
-static int ipr_iopoll(struct blk_iopoll *iop, int budget)
+static int ipr_iopoll(struct irq_poll *iop, int budget)
{
struct ipr_ioa_cfg *ioa_cfg;
struct ipr_hrr_queue *hrrq;
@@ -5584,7 +5583,7 @@ static int ipr_iopoll(struct blk_iopoll *iop, int budget)
completed_ops = ipr_process_hrrq(hrrq, budget, &doneq);
if (completed_ops < budget)
- blk_iopoll_complete(iop);
+ irq_poll_complete(iop);
spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) {
@@ -5692,8 +5691,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp)
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
hrrq->toggle_bit) {
- if (!blk_iopoll_sched_prep(&hrrq->iopoll))
- blk_iopoll_sched(&hrrq->iopoll);
+ irq_poll_sched(&hrrq->iopoll);
spin_unlock_irqrestore(hrrq->lock, hrrq_flags);
return IRQ_HANDLED;
}
@@ -10404,9 +10402,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
for (i = 1; i < ioa_cfg->hrrq_num; i++) {
- blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
+ irq_poll_init(&ioa_cfg->hrrq[i].iopoll,
ioa_cfg->iopoll_weight, ipr_iopoll);
- blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll);
}
}
@@ -10435,7 +10432,7 @@ static void ipr_shutdown(struct pci_dev *pdev)
if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
ioa_cfg->iopoll_weight = 0;
for (i = 1; i < ioa_cfg->hrrq_num; i++)
- blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
+ irq_poll_disable(&ioa_cfg->hrrq[i].iopoll);
}
while (ioa_cfg->in_reset_reload) {
diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index a34c7a5a995e..56c57068300a 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h
@@ -32,7 +32,7 @@
#include <linux/libata.h>
#include <linux/list.h>
#include <linux/kref.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
#include <scsi/scsi.h>
#include <scsi/scsi_cmnd.h>
@@ -517,7 +517,7 @@ struct ipr_hrr_queue {
u8 allow_cmds:1;
u8 removing_ioa:1;
- struct blk_iopoll iopoll;
+ struct irq_poll iopoll;
};
/* Command packet structure */
diff --git a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
index d6273e143324..a80d993b882e 100644
--- a/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
+++ b/drivers/staging/lustre/include/linux/libcfs/libcfs_private.h
@@ -151,16 +151,12 @@ do { \
#define LIBCFS_FREE(ptr, size) \
do { \
- int s = (size); \
if (unlikely((ptr) == NULL)) { \
CERROR("LIBCFS: free NULL '" #ptr "' (%d bytes) at " \
- "%s:%d\n", s, __FILE__, __LINE__); \
+ "%s:%d\n", (int)(size), __FILE__, __LINE__); \
break; \
} \
- if (unlikely(s > LIBCFS_VMALLOC_SIZE)) \
- vfree(ptr); \
- else \
- kfree(ptr); \
+ kvfree(ptr); \
} while (0)
/******************************************************************************/
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
index 72af486b65df..cb74ae731b95 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c
@@ -2070,32 +2070,13 @@ static int kiblnd_net_init_pools(kib_net_t *net, __u32 *cpts, int ncpts)
static int kiblnd_hdev_get_attr(kib_hca_dev_t *hdev)
{
- struct ib_device_attr *attr;
- int rc;
-
/* It's safe to assume a HCA can handle a page size
* matching that of the native system */
hdev->ibh_page_shift = PAGE_SHIFT;
hdev->ibh_page_size = 1 << PAGE_SHIFT;
hdev->ibh_page_mask = ~((__u64)hdev->ibh_page_size - 1);
- LIBCFS_ALLOC(attr, sizeof(*attr));
- if (attr == NULL) {
- CERROR("Out of memory\n");
- return -ENOMEM;
- }
-
- rc = ib_query_device(hdev->ibh_ibdev, attr);
- if (rc == 0)
- hdev->ibh_mr_size = attr->max_mr_size;
-
- LIBCFS_FREE(attr, sizeof(*attr));
-
- if (rc != 0) {
- CERROR("Failed to query IB device: %d\n", rc);
- return rc;
- }
-
+ hdev->ibh_mr_size = hdev->ibh_ibdev->attrs.max_mr_size;
if (hdev->ibh_mr_size == ~0ULL) {
hdev->ibh_mr_shift = 64;
return 0;
diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 7b355319079c..8982f7d1b374 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c
@@ -1858,7 +1858,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
int api32 = ll_need_32bit_api(sbi);
loff_t ret = -EINVAL;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (origin) {
case SEEK_SET:
break;
@@ -1896,7 +1896,7 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
goto out;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index c92d58b770ec..39e2ffd5f97f 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -2082,17 +2082,17 @@ putgl:
/* update time if requested */
rc = 0;
if (llss->ia2.ia_valid != 0) {
- mutex_lock(&llss->inode1->i_mutex);
+ inode_lock(llss->inode1);
rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
- mutex_unlock(&llss->inode1->i_mutex);
+ inode_unlock(llss->inode1);
}
if (llss->ia1.ia_valid != 0) {
int rc1;
- mutex_lock(&llss->inode2->i_mutex);
+ inode_lock(llss->inode2);
rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
- mutex_unlock(&llss->inode2->i_mutex);
+ inode_unlock(llss->inode2);
if (rc == 0)
rc = rc1;
}
@@ -2179,13 +2179,13 @@ static int ll_hsm_import(struct inode *inode, struct file *file,
ATTR_MTIME | ATTR_MTIME_SET |
ATTR_ATIME | ATTR_ATIME_SET;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
rc = ll_setattr_raw(file->f_path.dentry, attr, true);
if (rc == -ENODATA)
rc = 0;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kfree(attr);
free_hss:
@@ -2609,7 +2609,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FSYNC, 1);
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* catch async errors that were recorded back when async writeback
* failed for pages in this mapping. */
@@ -2641,7 +2641,7 @@ int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
fd->fd_write_failed = false;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index ee8a1d67d191..845e992ca5fc 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -631,8 +631,6 @@ struct ll_file_data {
struct lov_stripe_md;
-extern spinlock_t inode_lock;
-
extern struct dentry *llite_root;
extern struct kset *llite_kset;
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 1db93af62bad..b2fc5b3786ee 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -1277,7 +1277,7 @@ int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
return -ENOMEM;
if (!S_ISDIR(inode->i_mode))
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
memcpy(&op_data->op_attr, attr, sizeof(*attr));
@@ -1358,7 +1358,7 @@ out:
ll_finish_md_op_data(op_data);
if (!S_ISDIR(inode->i_mode)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
inode_dio_wait(inode);
}
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index e578a1130ad1..18aab25f9cd9 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -245,9 +245,9 @@ static int ll_get_name(struct dentry *dentry, char *name,
goto out;
}
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
rc = ll_dir_read(dir, &lgd.ctx);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
if (!rc && !lgd.lgd_found)
rc = -ENOENT;
out:
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index 420d39123877..871924b3f2e7 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -257,9 +257,9 @@ static int do_bio_lustrebacked(struct lloop_device *lo, struct bio *head)
* be asked to write less pages once, this purely depends on
* implementation. Anyway, we should be careful to avoid deadlocking.
*/
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
bytes = ll_direct_rw_pages(env, io, rw, inode, pvec);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
cl_io_fini(env, io);
return (bytes == pvec->ldp_size) ? 0 : (int)bytes;
}
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 95cdb0c58b04..f355474967d6 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -115,8 +115,8 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
struct inode *inode = vmpage->mapping->host;
loff_t pos;
- if (mutex_trylock(&inode->i_mutex)) {
- mutex_unlock(&(inode)->i_mutex);
+ if (inode_trylock(inode)) {
+ inode_unlock((inode));
/* this is too bad. Someone is trying to write the
* page w/o holding inode mutex. This means we can
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 39fa13b74cbd..711fda93a58d 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -403,7 +403,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
* 1. Need inode mutex to operate transient pages.
*/
if (iov_iter_rw(iter) == READ)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
LASSERT(obj->cob_transient_pages == 0);
while (iov_iter_count(iter)) {
@@ -454,7 +454,7 @@ static ssize_t ll_direct_IO_26(struct kiocb *iocb, struct iov_iter *iter,
out:
LASSERT(obj->cob_transient_pages == 0);
if (iov_iter_rw(iter) == READ)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (tot_bytes > 0) {
if (iov_iter_rw(iter) == WRITE) {
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index f68e972886ca..0920ac6b3003 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -439,7 +439,7 @@ static int vvp_io_setattr_start(const struct lu_env *env,
struct inode *inode = ccc_object_inode(io->ci_obj);
int result = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (cl_io_is_trunc(io))
result = vvp_io_setattr_trunc(env, ios, inode,
io->u.ci_setattr.sa_attr.lvb_size);
@@ -459,7 +459,7 @@ static void vvp_io_setattr_end(const struct lu_env *env,
* because osc has already notified to destroy osc_extents. */
vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
static void vvp_io_setattr_fini(const struct lu_env *env,
diff --git a/drivers/staging/lustre/lustre/llite/vvp_page.c b/drivers/staging/lustre/lustre/llite/vvp_page.c
index 99c0d7aee921..a133475a7c74 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_page.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_page.c
@@ -428,7 +428,7 @@ static void vvp_transient_page_verify(const struct cl_page *page)
{
struct inode *inode = ccc_object_inode(page->cp_obj);
- LASSERT(!mutex_trylock(&inode->i_mutex));
+ LASSERT(!inode_trylock(inode));
}
static int vvp_transient_page_own(const struct lu_env *env,
@@ -480,9 +480,9 @@ static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
struct inode *inode = ccc_object_inode(slice->cpl_obj);
int locked;
- locked = !mutex_trylock(&inode->i_mutex);
+ locked = !inode_trylock(inode);
if (!locked)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return locked ? -EBUSY : -ENODATA;
}
@@ -502,7 +502,7 @@ static void vvp_transient_page_fini(const struct lu_env *env,
struct ccc_object *clobj = cl2ccc(clp->cp_obj);
vvp_page_fini_common(cp);
- LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+ LASSERT(!inode_trylock(clobj->cob_inode));
clobj->cob_transient_pages--;
}
@@ -548,7 +548,7 @@ int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
} else {
struct ccc_object *clobj = cl2ccc(obj);
- LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex));
+ LASSERT(!inode_trylock(clobj->cob_inode));
cl_page_slice_add(page, &cpg->cpg_cl, obj,
&vvp_transient_page_ops);
clobj->cob_transient_pages++;
diff --git a/drivers/staging/rdma/amso1100/c2_cq.c b/drivers/staging/rdma/amso1100/c2_cq.c
index 3ef881f2da0f..7ad0c082485a 100644
--- a/drivers/staging/rdma/amso1100/c2_cq.c
+++ b/drivers/staging/rdma/amso1100/c2_cq.c
@@ -173,9 +173,6 @@ static inline int c2_poll_one(struct c2_dev *c2dev,
case C2_WR_TYPE_RDMA_READ:
entry->opcode = IB_WC_RDMA_READ;
break;
- case C2_WR_TYPE_BIND_MW:
- entry->opcode = IB_WC_BIND_MW;
- break;
case C2_WR_TYPE_RECV:
entry->byte_len = be32_to_cpu(ce->bytes_rcvd);
entry->opcode = IB_WC_RECV;
diff --git a/drivers/staging/rdma/amso1100/c2_provider.c b/drivers/staging/rdma/amso1100/c2_provider.c
index a092ac743c72..de8d10e1bde3 100644
--- a/drivers/staging/rdma/amso1100/c2_provider.c
+++ b/drivers/staging/rdma/amso1100/c2_provider.c
@@ -337,43 +337,21 @@ static inline u32 c2_convert_access(int acc)
C2_ACF_LOCAL_READ | C2_ACF_WINDOW_BIND;
}
-static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 * iova_start)
+static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
{
struct c2_mr *mr;
u64 *page_list;
- u32 total_len;
- int err, i, j, k, page_shift, pbl_depth;
+ const u32 total_len = 0xffffffff; /* AMSO1100 limit */
+ int err, page_shift, pbl_depth, i;
+ u64 kva = 0;
- pbl_depth = 0;
- total_len = 0;
+ pr_debug("%s:%u\n", __func__, __LINE__);
- page_shift = PAGE_SHIFT;
/*
- * If there is only 1 buffer we assume this could
- * be a map of all phy mem...use a 32k page_shift.
+ * This is a map of all phy mem...use a 32k page_shift.
*/
- if (num_phys_buf == 1)
- page_shift += 3;
-
- for (i = 0; i < num_phys_buf; i++) {
-
- if (offset_in_page(buffer_list[i].addr)) {
- pr_debug("Unaligned Memory Buffer: 0x%x\n",
- (unsigned int) buffer_list[i].addr);
- return ERR_PTR(-EINVAL);
- }
-
- if (!buffer_list[i].size) {
- pr_debug("Invalid Buffer Size\n");
- return ERR_PTR(-EINVAL);
- }
-
- total_len += buffer_list[i].size;
- pbl_depth += ALIGN(buffer_list[i].size,
- BIT(page_shift)) >> page_shift;
- }
+ page_shift = PAGE_SHIFT + 3;
+ pbl_depth = ALIGN(total_len, BIT(page_shift)) >> page_shift;
page_list = vmalloc(sizeof(u64) * pbl_depth);
if (!page_list) {
@@ -382,16 +360,8 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
return ERR_PTR(-ENOMEM);
}
- for (i = 0, j = 0; i < num_phys_buf; i++) {
-
- int naddrs;
-
- naddrs = ALIGN(buffer_list[i].size,
- BIT(page_shift)) >> page_shift;
- for (k = 0; k < naddrs; k++)
- page_list[j++] = (buffer_list[i].addr +
- (k << page_shift));
- }
+ for (i = 0; i < pbl_depth; i++)
+ page_list[i] = (i << page_shift);
mr = kmalloc(sizeof(*mr), GFP_KERNEL);
if (!mr) {
@@ -399,17 +369,17 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
return ERR_PTR(-ENOMEM);
}
- mr->pd = to_c2pd(ib_pd);
+ mr->pd = to_c2pd(pd);
mr->umem = NULL;
pr_debug("%s - page shift %d, pbl_depth %d, total_len %u, "
"*iova_start %llx, first pa %llx, last pa %llx\n",
__func__, page_shift, pbl_depth, total_len,
- (unsigned long long) *iova_start,
+ (unsigned long long) kva,
(unsigned long long) page_list[0],
(unsigned long long) page_list[pbl_depth-1]);
- err = c2_nsmr_register_phys_kern(to_c2dev(ib_pd->device), page_list,
+ err = c2_nsmr_register_phys_kern(to_c2dev(pd->device), page_list,
BIT(page_shift), pbl_depth,
- total_len, 0, iova_start,
+ total_len, 0, &kva,
c2_convert_access(acc), mr);
vfree(page_list);
if (err) {
@@ -420,19 +390,6 @@ static struct ib_mr *c2_reg_phys_mr(struct ib_pd *ib_pd,
return &mr->ibmr;
}
-static struct ib_mr *c2_get_dma_mr(struct ib_pd *pd, int acc)
-{
- struct ib_phys_buf bl;
- u64 kva = 0;
-
- pr_debug("%s:%u\n", __func__, __LINE__);
-
- /* AMSO1100 limit */
- bl.size = 0xffffffff;
- bl.addr = 0;
- return c2_reg_phys_mr(pd, &bl, 1, acc, &kva);
-}
-
static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int acc, struct ib_udata *udata)
{
@@ -840,7 +797,6 @@ int c2_register_device(struct c2_dev *dev)
dev->ibdev.destroy_cq = c2_destroy_cq;
dev->ibdev.poll_cq = c2_poll_cq;
dev->ibdev.get_dma_mr = c2_get_dma_mr;
- dev->ibdev.reg_phys_mr = c2_reg_phys_mr;
dev->ibdev.reg_user_mr = c2_reg_user_mr;
dev->ibdev.dereg_mr = c2_dereg_mr;
dev->ibdev.get_port_immutable = c2_port_immutable;
diff --git a/drivers/staging/rdma/ehca/ehca_classes.h b/drivers/staging/rdma/ehca/ehca_classes.h
index bd45e0f3923f..e8c3387d7aaa 100644
--- a/drivers/staging/rdma/ehca/ehca_classes.h
+++ b/drivers/staging/rdma/ehca/ehca_classes.h
@@ -316,9 +316,8 @@ struct ehca_mr_pginfo {
union {
struct { /* type EHCA_MR_PGI_PHYS section */
- int num_phys_buf;
- struct ib_phys_buf *phys_buf_array;
- u64 next_buf;
+ u64 addr;
+ u16 size;
} phy;
struct { /* type EHCA_MR_PGI_USER section */
struct ib_umem *region;
diff --git a/drivers/staging/rdma/ehca/ehca_iverbs.h b/drivers/staging/rdma/ehca/ehca_iverbs.h
index 80e6a3d5df3e..cca5933fcda6 100644
--- a/drivers/staging/rdma/ehca/ehca_iverbs.h
+++ b/drivers/staging/rdma/ehca/ehca_iverbs.h
@@ -80,30 +80,14 @@ int ehca_destroy_ah(struct ib_ah *ah);
struct ib_mr *ehca_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags, u64 *iova_start);
-
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int mr_access_flags,
struct ib_udata *udata);
-int ehca_rereg_phys_mr(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf, int mr_access_flags, u64 *iova_start);
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
int ehca_dereg_mr(struct ib_mr *mr);
struct ib_mw *ehca_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-int ehca_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
-
int ehca_dealloc_mw(struct ib_mw *mw);
struct ib_fmr *ehca_alloc_fmr(struct ib_pd *pd,
diff --git a/drivers/staging/rdma/ehca/ehca_main.c b/drivers/staging/rdma/ehca/ehca_main.c
index 860b974e9faa..832f22f40862 100644
--- a/drivers/staging/rdma/ehca/ehca_main.c
+++ b/drivers/staging/rdma/ehca/ehca_main.c
@@ -511,13 +511,9 @@ static int ehca_init_device(struct ehca_shca *shca)
shca->ib_device.req_notify_cq = ehca_req_notify_cq;
/* shca->ib_device.req_ncomp_notif = ehca_req_ncomp_notif; */
shca->ib_device.get_dma_mr = ehca_get_dma_mr;
- shca->ib_device.reg_phys_mr = ehca_reg_phys_mr;
shca->ib_device.reg_user_mr = ehca_reg_user_mr;
- shca->ib_device.query_mr = ehca_query_mr;
shca->ib_device.dereg_mr = ehca_dereg_mr;
- shca->ib_device.rereg_phys_mr = ehca_rereg_phys_mr;
shca->ib_device.alloc_mw = ehca_alloc_mw;
- shca->ib_device.bind_mw = ehca_bind_mw;
shca->ib_device.dealloc_mw = ehca_dealloc_mw;
shca->ib_device.alloc_fmr = ehca_alloc_fmr;
shca->ib_device.map_phys_fmr = ehca_map_phys_fmr;
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.c b/drivers/staging/rdma/ehca/ehca_mrmw.c
index 553e883a5718..3367205e3160 100644
--- a/drivers/staging/rdma/ehca/ehca_mrmw.c
+++ b/drivers/staging/rdma/ehca/ehca_mrmw.c
@@ -196,120 +196,6 @@ get_dma_mr_exit0:
/*----------------------------------------------------------------------*/
-struct ib_mr *ehca_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start)
-{
- struct ib_mr *ib_mr;
- int ret;
- struct ehca_mr *e_mr;
- struct ehca_shca *shca =
- container_of(pd->device, struct ehca_shca, ib_device);
- struct ehca_pd *e_pd = container_of(pd, struct ehca_pd, ib_pd);
-
- u64 size;
-
- if ((num_phys_buf <= 0) || !phys_buf_array) {
- ehca_err(pd->device, "bad input values: num_phys_buf=%x "
- "phys_buf_array=%p", num_phys_buf, phys_buf_array);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_phys_mr_exit0;
- }
- if (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
- ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE))) {
- /*
- * Remote Write Access requires Local Write Access
- * Remote Atomic Access requires Local Write Access
- */
- ehca_err(pd->device, "bad input values: mr_access_flags=%x",
- mr_access_flags);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_phys_mr_exit0;
- }
-
- /* check physical buffer list and calculate size */
- ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array, num_phys_buf,
- iova_start, &size);
- if (ret) {
- ib_mr = ERR_PTR(ret);
- goto reg_phys_mr_exit0;
- }
- if ((size == 0) ||
- (((u64)iova_start + size) < (u64)iova_start)) {
- ehca_err(pd->device, "bad input values: size=%llx iova_start=%p",
- size, iova_start);
- ib_mr = ERR_PTR(-EINVAL);
- goto reg_phys_mr_exit0;
- }
-
- e_mr = ehca_mr_new();
- if (!e_mr) {
- ehca_err(pd->device, "out of memory");
- ib_mr = ERR_PTR(-ENOMEM);
- goto reg_phys_mr_exit0;
- }
-
- /* register MR on HCA */
- if (ehca_mr_is_maxmr(size, iova_start)) {
- e_mr->flags |= EHCA_MR_FLAG_MAXMR;
- ret = ehca_reg_maxmr(shca, e_mr, iova_start, mr_access_flags,
- e_pd, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey);
- if (ret) {
- ib_mr = ERR_PTR(ret);
- goto reg_phys_mr_exit1;
- }
- } else {
- struct ehca_mr_pginfo pginfo;
- u32 num_kpages;
- u32 num_hwpages;
- u64 hw_pgsize;
-
- num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size,
- PAGE_SIZE);
- /* for kernel space we try most possible pgsize */
- hw_pgsize = ehca_get_max_hwpage_size(shca);
- num_hwpages = NUM_CHUNKS(((u64)iova_start % hw_pgsize) + size,
- hw_pgsize);
- memset(&pginfo, 0, sizeof(pginfo));
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_kpages = num_kpages;
- pginfo.hwpage_size = hw_pgsize;
- pginfo.num_hwpages = num_hwpages;
- pginfo.u.phy.num_phys_buf = num_phys_buf;
- pginfo.u.phy.phys_buf_array = phys_buf_array;
- pginfo.next_hwpage =
- ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
-
- ret = ehca_reg_mr(shca, e_mr, iova_start, size, mr_access_flags,
- e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,
- &e_mr->ib.ib_mr.rkey, EHCA_REG_MR);
- if (ret) {
- ib_mr = ERR_PTR(ret);
- goto reg_phys_mr_exit1;
- }
- }
-
- /* successful registration of all pages */
- return &e_mr->ib.ib_mr;
-
-reg_phys_mr_exit1:
- ehca_mr_delete(e_mr);
-reg_phys_mr_exit0:
- if (IS_ERR(ib_mr))
- ehca_err(pd->device, "h_ret=%li pd=%p phys_buf_array=%p "
- "num_phys_buf=%x mr_access_flags=%x iova_start=%p",
- PTR_ERR(ib_mr), pd, phys_buf_array,
- num_phys_buf, mr_access_flags, iova_start);
- return ib_mr;
-} /* end ehca_reg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
struct ib_mr *ehca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt, int mr_access_flags,
struct ib_udata *udata)
@@ -437,207 +323,6 @@ reg_user_mr_exit0:
/*----------------------------------------------------------------------*/
-int ehca_rereg_phys_mr(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start)
-{
- int ret;
-
- struct ehca_shca *shca =
- container_of(mr->device, struct ehca_shca, ib_device);
- struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
- u64 new_size;
- u64 *new_start;
- u32 new_acl;
- struct ehca_pd *new_pd;
- u32 tmp_lkey, tmp_rkey;
- unsigned long sl_flags;
- u32 num_kpages = 0;
- u32 num_hwpages = 0;
- struct ehca_mr_pginfo pginfo;
-
- if (!(mr_rereg_mask & IB_MR_REREG_TRANS)) {
- /* TODO not supported, because PHYP rereg hCall needs pages */
- ehca_err(mr->device, "rereg without IB_MR_REREG_TRANS not "
- "supported yet, mr_rereg_mask=%x", mr_rereg_mask);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
-
- if (mr_rereg_mask & IB_MR_REREG_PD) {
- if (!pd) {
- ehca_err(mr->device, "rereg with bad pd, pd=%p "
- "mr_rereg_mask=%x", pd, mr_rereg_mask);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- }
-
- if ((mr_rereg_mask &
- ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) ||
- (mr_rereg_mask == 0)) {
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
-
- /* check other parameters */
- if (e_mr == shca->maxmr) {
- /* should be impossible, however reject to be sure */
- ehca_err(mr->device, "rereg internal max-MR impossible, mr=%p "
- "shca->maxmr=%p mr->lkey=%x",
- mr, shca->maxmr, mr->lkey);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- if (mr_rereg_mask & IB_MR_REREG_TRANS) { /* transl., i.e. addr/size */
- if (e_mr->flags & EHCA_MR_FLAG_FMR) {
- ehca_err(mr->device, "not supported for FMR, mr=%p "
- "flags=%x", mr, e_mr->flags);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- if (!phys_buf_array || num_phys_buf <= 0) {
- ehca_err(mr->device, "bad input values mr_rereg_mask=%x"
- " phys_buf_array=%p num_phys_buf=%x",
- mr_rereg_mask, phys_buf_array, num_phys_buf);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
- }
- if ((mr_rereg_mask & IB_MR_REREG_ACCESS) && /* change ACL */
- (((mr_access_flags & IB_ACCESS_REMOTE_WRITE) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)) ||
- ((mr_access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
- !(mr_access_flags & IB_ACCESS_LOCAL_WRITE)))) {
- /*
- * Remote Write Access requires Local Write Access
- * Remote Atomic Access requires Local Write Access
- */
- ehca_err(mr->device, "bad input values: mr_rereg_mask=%x "
- "mr_access_flags=%x", mr_rereg_mask, mr_access_flags);
- ret = -EINVAL;
- goto rereg_phys_mr_exit0;
- }
-
- /* set requested values dependent on rereg request */
- spin_lock_irqsave(&e_mr->mrlock, sl_flags);
- new_start = e_mr->start;
- new_size = e_mr->size;
- new_acl = e_mr->acl;
- new_pd = container_of(mr->pd, struct ehca_pd, ib_pd);
-
- if (mr_rereg_mask & IB_MR_REREG_TRANS) {
- u64 hw_pgsize = ehca_get_max_hwpage_size(shca);
-
- new_start = iova_start; /* change address */
- /* check physical buffer list and calculate size */
- ret = ehca_mr_chk_buf_and_calc_size(phys_buf_array,
- num_phys_buf, iova_start,
- &new_size);
- if (ret)
- goto rereg_phys_mr_exit1;
- if ((new_size == 0) ||
- (((u64)iova_start + new_size) < (u64)iova_start)) {
- ehca_err(mr->device, "bad input values: new_size=%llx "
- "iova_start=%p", new_size, iova_start);
- ret = -EINVAL;
- goto rereg_phys_mr_exit1;
- }
- num_kpages = NUM_CHUNKS(((u64)new_start % PAGE_SIZE) +
- new_size, PAGE_SIZE);
- num_hwpages = NUM_CHUNKS(((u64)new_start % hw_pgsize) +
- new_size, hw_pgsize);
- memset(&pginfo, 0, sizeof(pginfo));
- pginfo.type = EHCA_MR_PGI_PHYS;
- pginfo.num_kpages = num_kpages;
- pginfo.hwpage_size = hw_pgsize;
- pginfo.num_hwpages = num_hwpages;
- pginfo.u.phy.num_phys_buf = num_phys_buf;
- pginfo.u.phy.phys_buf_array = phys_buf_array;
- pginfo.next_hwpage =
- ((u64)iova_start & ~PAGE_MASK) / hw_pgsize;
- }
- if (mr_rereg_mask & IB_MR_REREG_ACCESS)
- new_acl = mr_access_flags;
- if (mr_rereg_mask & IB_MR_REREG_PD)
- new_pd = container_of(pd, struct ehca_pd, ib_pd);
-
- ret = ehca_rereg_mr(shca, e_mr, new_start, new_size, new_acl,
- new_pd, &pginfo, &tmp_lkey, &tmp_rkey);
- if (ret)
- goto rereg_phys_mr_exit1;
-
- /* successful reregistration */
- if (mr_rereg_mask & IB_MR_REREG_PD)
- mr->pd = pd;
- mr->lkey = tmp_lkey;
- mr->rkey = tmp_rkey;
-
-rereg_phys_mr_exit1:
- spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-rereg_phys_mr_exit0:
- if (ret)
- ehca_err(mr->device, "ret=%i mr=%p mr_rereg_mask=%x pd=%p "
- "phys_buf_array=%p num_phys_buf=%x mr_access_flags=%x "
- "iova_start=%p",
- ret, mr, mr_rereg_mask, pd, phys_buf_array,
- num_phys_buf, mr_access_flags, iova_start);
- return ret;
-} /* end ehca_rereg_phys_mr() */
-
-/*----------------------------------------------------------------------*/
-
-int ehca_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr)
-{
- int ret = 0;
- u64 h_ret;
- struct ehca_shca *shca =
- container_of(mr->device, struct ehca_shca, ib_device);
- struct ehca_mr *e_mr = container_of(mr, struct ehca_mr, ib.ib_mr);
- unsigned long sl_flags;
- struct ehca_mr_hipzout_parms hipzout;
-
- if ((e_mr->flags & EHCA_MR_FLAG_FMR)) {
- ehca_err(mr->device, "not supported for FMR, mr=%p e_mr=%p "
- "e_mr->flags=%x", mr, e_mr, e_mr->flags);
- ret = -EINVAL;
- goto query_mr_exit0;
- }
-
- memset(mr_attr, 0, sizeof(struct ib_mr_attr));
- spin_lock_irqsave(&e_mr->mrlock, sl_flags);
-
- h_ret = hipz_h_query_mr(shca->ipz_hca_handle, e_mr, &hipzout);
- if (h_ret != H_SUCCESS) {
- ehca_err(mr->device, "hipz_mr_query failed, h_ret=%lli mr=%p "
- "hca_hndl=%llx mr_hndl=%llx lkey=%x",
- h_ret, mr, shca->ipz_hca_handle.handle,
- e_mr->ipz_mr_handle.handle, mr->lkey);
- ret = ehca2ib_return_code(h_ret);
- goto query_mr_exit1;
- }
- mr_attr->pd = mr->pd;
- mr_attr->device_virt_addr = hipzout.vaddr;
- mr_attr->size = hipzout.len;
- mr_attr->lkey = hipzout.lkey;
- mr_attr->rkey = hipzout.rkey;
- ehca_mrmw_reverse_map_acl(&hipzout.acl, &mr_attr->mr_access_flags);
-
-query_mr_exit1:
- spin_unlock_irqrestore(&e_mr->mrlock, sl_flags);
-query_mr_exit0:
- if (ret)
- ehca_err(mr->device, "ret=%i mr=%p mr_attr=%p",
- ret, mr, mr_attr);
- return ret;
-} /* end ehca_query_mr() */
-
-/*----------------------------------------------------------------------*/
-
int ehca_dereg_mr(struct ib_mr *mr)
{
int ret = 0;
@@ -728,18 +413,6 @@ alloc_mw_exit0:
/*----------------------------------------------------------------------*/
-int ehca_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- /* TODO: not supported up to now */
- ehca_gen_err("bind MW currently not supported by HCAD");
-
- return -EPERM;
-} /* end ehca_bind_mw() */
-
-/*----------------------------------------------------------------------*/
-
int ehca_dealloc_mw(struct ib_mw *mw)
{
u64 h_ret;
@@ -1616,7 +1289,6 @@ int ehca_reg_internal_maxmr(
u64 *iova_start;
u64 size_maxmr;
struct ehca_mr_pginfo pginfo;
- struct ib_phys_buf ib_pbuf;
u32 num_kpages;
u32 num_hwpages;
u64 hw_pgsize;
@@ -1637,8 +1309,6 @@ int ehca_reg_internal_maxmr(
/* register internal max-MR on HCA */
size_maxmr = ehca_mr_len;
iova_start = (u64 *)ehca_map_vaddr((void *)(KERNELBASE + PHYSICAL_START));
- ib_pbuf.addr = 0;
- ib_pbuf.size = size_maxmr;
num_kpages = NUM_CHUNKS(((u64)iova_start % PAGE_SIZE) + size_maxmr,
PAGE_SIZE);
hw_pgsize = ehca_get_max_hwpage_size(shca);
@@ -1650,8 +1320,8 @@ int ehca_reg_internal_maxmr(
pginfo.num_kpages = num_kpages;
pginfo.num_hwpages = num_hwpages;
pginfo.hwpage_size = hw_pgsize;
- pginfo.u.phy.num_phys_buf = 1;
- pginfo.u.phy.phys_buf_array = &ib_pbuf;
+ pginfo.u.phy.addr = 0;
+ pginfo.u.phy.size = size_maxmr;
ret = ehca_reg_mr(shca, e_mr, iova_start, size_maxmr, 0, e_pd,
&pginfo, &e_mr->ib.ib_mr.lkey,
@@ -1669,7 +1339,6 @@ int ehca_reg_internal_maxmr(
e_mr->ib.ib_mr.pd = &e_pd->ib_pd;
e_mr->ib.ib_mr.uobject = NULL;
atomic_inc(&(e_pd->ib_pd.usecnt));
- atomic_set(&(e_mr->ib.ib_mr.usecnt), 0);
*e_maxmr = e_mr;
return 0;
@@ -1762,61 +1431,6 @@ ehca_dereg_internal_maxmr_exit0:
/*----------------------------------------------------------------------*/
-/*
- * check physical buffer array of MR verbs for validness and
- * calculates MR size
- */
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- u64 *iova_start,
- u64 *size)
-{
- struct ib_phys_buf *pbuf = phys_buf_array;
- u64 size_count = 0;
- u32 i;
-
- if (num_phys_buf == 0) {
- ehca_gen_err("bad phys buf array len, num_phys_buf=0");
- return -EINVAL;
- }
- /* check first buffer */
- if (((u64)iova_start & ~PAGE_MASK) != (pbuf->addr & ~PAGE_MASK)) {
- ehca_gen_err("iova_start/addr mismatch, iova_start=%p "
- "pbuf->addr=%llx pbuf->size=%llx",
- iova_start, pbuf->addr, pbuf->size);
- return -EINVAL;
- }
- if (((pbuf->addr + pbuf->size) % PAGE_SIZE) &&
- (num_phys_buf > 1)) {
- ehca_gen_err("addr/size mismatch in 1st buf, pbuf->addr=%llx "
- "pbuf->size=%llx", pbuf->addr, pbuf->size);
- return -EINVAL;
- }
-
- for (i = 0; i < num_phys_buf; i++) {
- if ((i > 0) && (pbuf->addr % PAGE_SIZE)) {
- ehca_gen_err("bad address, i=%x pbuf->addr=%llx "
- "pbuf->size=%llx",
- i, pbuf->addr, pbuf->size);
- return -EINVAL;
- }
- if (((i > 0) && /* not 1st */
- (i < (num_phys_buf - 1)) && /* not last */
- (pbuf->size % PAGE_SIZE)) || (pbuf->size == 0)) {
- ehca_gen_err("bad size, i=%x pbuf->size=%llx",
- i, pbuf->size);
- return -EINVAL;
- }
- size_count += pbuf->size;
- pbuf++;
- }
-
- *size = size_count;
- return 0;
-} /* end ehca_mr_chk_buf_and_calc_size() */
-
-/*----------------------------------------------------------------------*/
-
/* check page list of map FMR verb for validness */
int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
u64 *page_list,
@@ -2002,57 +1616,54 @@ static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
u32 number, u64 *kpage)
{
int ret = 0;
- struct ib_phys_buf *pbuf;
+ u64 addr = pginfo->u.phy.addr;
+ u64 size = pginfo->u.phy.size;
u64 num_hw, offs_hw;
u32 i = 0;
- /* loop over desired phys_buf_array entries */
- while (i < number) {
- pbuf = pginfo->u.phy.phys_buf_array + pginfo->u.phy.next_buf;
- num_hw = NUM_CHUNKS((pbuf->addr % pginfo->hwpage_size) +
- pbuf->size, pginfo->hwpage_size);
- offs_hw = (pbuf->addr & ~(pginfo->hwpage_size - 1)) /
- pginfo->hwpage_size;
- while (pginfo->next_hwpage < offs_hw + num_hw) {
- /* sanity check */
- if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
- (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
- ehca_gen_err("kpage_cnt >= num_kpages, "
- "kpage_cnt=%llx num_kpages=%llx "
- "hwpage_cnt=%llx "
- "num_hwpages=%llx i=%x",
- pginfo->kpage_cnt,
- pginfo->num_kpages,
- pginfo->hwpage_cnt,
- pginfo->num_hwpages, i);
- return -EFAULT;
- }
- *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
- (pginfo->next_hwpage * pginfo->hwpage_size);
- if ( !(*kpage) && pbuf->addr ) {
- ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
- "next_hwpage=%llx", pbuf->addr,
- pbuf->size, pginfo->next_hwpage);
- return -EFAULT;
- }
- (pginfo->hwpage_cnt)++;
- (pginfo->next_hwpage)++;
- if (PAGE_SIZE >= pginfo->hwpage_size) {
- if (pginfo->next_hwpage %
- (PAGE_SIZE / pginfo->hwpage_size) == 0)
- (pginfo->kpage_cnt)++;
- } else
- pginfo->kpage_cnt += pginfo->hwpage_size /
- PAGE_SIZE;
- kpage++;
- i++;
- if (i >= number) break;
+ num_hw = NUM_CHUNKS((addr % pginfo->hwpage_size) + size,
+ pginfo->hwpage_size);
+ offs_hw = (addr & ~(pginfo->hwpage_size - 1)) / pginfo->hwpage_size;
+
+ while (pginfo->next_hwpage < offs_hw + num_hw) {
+ /* sanity check */
+ if ((pginfo->kpage_cnt >= pginfo->num_kpages) ||
+ (pginfo->hwpage_cnt >= pginfo->num_hwpages)) {
+ ehca_gen_err("kpage_cnt >= num_kpages, "
+ "kpage_cnt=%llx num_kpages=%llx "
+ "hwpage_cnt=%llx "
+ "num_hwpages=%llx i=%x",
+ pginfo->kpage_cnt,
+ pginfo->num_kpages,
+ pginfo->hwpage_cnt,
+ pginfo->num_hwpages, i);
+ return -EFAULT;
}
- if (pginfo->next_hwpage >= offs_hw + num_hw) {
- (pginfo->u.phy.next_buf)++;
- pginfo->next_hwpage = 0;
+ *kpage = (addr & ~(pginfo->hwpage_size - 1)) +
+ (pginfo->next_hwpage * pginfo->hwpage_size);
+ if ( !(*kpage) && addr ) {
+ ehca_gen_err("addr=%llx size=%llx "
+ "next_hwpage=%llx", addr,
+ size, pginfo->next_hwpage);
+ return -EFAULT;
}
+ (pginfo->hwpage_cnt)++;
+ (pginfo->next_hwpage)++;
+ if (PAGE_SIZE >= pginfo->hwpage_size) {
+ if (pginfo->next_hwpage %
+ (PAGE_SIZE / pginfo->hwpage_size) == 0)
+ (pginfo->kpage_cnt)++;
+ } else
+ pginfo->kpage_cnt += pginfo->hwpage_size /
+ PAGE_SIZE;
+ kpage++;
+ i++;
+ if (i >= number) break;
}
+ if (pginfo->next_hwpage >= offs_hw + num_hw) {
+ pginfo->next_hwpage = 0;
+ }
+
return ret;
}
diff --git a/drivers/staging/rdma/ehca/ehca_mrmw.h b/drivers/staging/rdma/ehca/ehca_mrmw.h
index 50d8b51306dd..52bfa95697f7 100644
--- a/drivers/staging/rdma/ehca/ehca_mrmw.h
+++ b/drivers/staging/rdma/ehca/ehca_mrmw.h
@@ -98,11 +98,6 @@ int ehca_reg_maxmr(struct ehca_shca *shca,
int ehca_dereg_internal_maxmr(struct ehca_shca *shca);
-int ehca_mr_chk_buf_and_calc_size(struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- u64 *iova_start,
- u64 *size);
-
int ehca_fmr_check_page_list(struct ehca_mr *e_fmr,
u64 *page_list,
int list_len);
diff --git a/drivers/staging/rdma/ehca/ehca_reqs.c b/drivers/staging/rdma/ehca/ehca_reqs.c
index 10e2074384f5..11813b880e16 100644
--- a/drivers/staging/rdma/ehca/ehca_reqs.c
+++ b/drivers/staging/rdma/ehca/ehca_reqs.c
@@ -614,7 +614,6 @@ int ehca_post_srq_recv(struct ib_srq *srq,
static const u8 ib_wc_opcode[255] = {
[0x01] = IB_WC_RECV+1,
[0x02] = IB_WC_RECV_RDMA_WITH_IMM+1,
- [0x04] = IB_WC_BIND_MW+1,
[0x08] = IB_WC_FETCH_ADD+1,
[0x10] = IB_WC_COMP_SWAP+1,
[0x20] = IB_WC_RDMA_WRITE+1,
diff --git a/drivers/staging/rdma/hfi1/mr.c b/drivers/staging/rdma/hfi1/mr.c
index 568f185a022d..a3f8b884fdd6 100644
--- a/drivers/staging/rdma/hfi1/mr.c
+++ b/drivers/staging/rdma/hfi1/mr.c
@@ -167,10 +167,7 @@ static struct hfi1_mr *alloc_mr(int count, struct ib_pd *pd)
rval = init_mregion(&mr->mr, pd, count);
if (rval)
goto bail;
- /*
- * ib_reg_phys_mr() will initialize mr->ibmr except for
- * lkey and rkey.
- */
+
rval = hfi1_alloc_lkey(&mr->mr, 0);
if (rval)
goto bail_mregion;
@@ -188,52 +185,6 @@ bail:
}
/**
- * hfi1_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- struct hfi1_mr *mr;
- int n, m, i;
- struct ib_mr *ret;
-
- mr = alloc_mr(num_phys_buf, pd);
- if (IS_ERR(mr)) {
- ret = (struct ib_mr *)mr;
- goto bail;
- }
-
- mr->mr.user_base = *iova_start;
- mr->mr.iova = *iova_start;
- mr->mr.access_flags = acc;
-
- m = 0;
- n = 0;
- for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
- mr->mr.map[m]->segs[n].length = buffer_list[i].size;
- mr->mr.length += buffer_list[i].size;
- n++;
- if (n == HFI1_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
-/**
* hfi1_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @start: starting userspace address
diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c
index ef0feaa684a4..09b8d412ee90 100644
--- a/drivers/staging/rdma/hfi1/verbs.c
+++ b/drivers/staging/rdma/hfi1/verbs.c
@@ -2052,7 +2052,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ibdev->poll_cq = hfi1_poll_cq;
ibdev->req_notify_cq = hfi1_req_notify_cq;
ibdev->get_dma_mr = hfi1_get_dma_mr;
- ibdev->reg_phys_mr = hfi1_reg_phys_mr;
ibdev->reg_user_mr = hfi1_reg_user_mr;
ibdev->dereg_mr = hfi1_dereg_mr;
ibdev->alloc_mr = hfi1_alloc_mr;
diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h
index 72106e5362b9..286e468b0479 100644
--- a/drivers/staging/rdma/hfi1/verbs.h
+++ b/drivers/staging/rdma/hfi1/verbs.h
@@ -1024,10 +1024,6 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
struct ib_mr *hfi1_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *hfi1_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
-
struct ib_mr *hfi1_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
diff --git a/drivers/staging/rdma/ipath/ipath_fs.c b/drivers/staging/rdma/ipath/ipath_fs.c
index 796af6867007..476fcdf05acb 100644
--- a/drivers/staging/rdma/ipath/ipath_fs.c
+++ b/drivers/staging/rdma/ipath/ipath_fs.c
@@ -82,14 +82,14 @@ static int create_file(const char *name, umode_t mode,
{
int error;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry))
error = ipathfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
else
error = PTR_ERR(*dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return error;
}
@@ -295,7 +295,7 @@ static int remove_device_files(struct super_block *sb,
int ret;
root = dget(sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
dir = lookup_one_len(unit, root, strlen(unit));
@@ -311,7 +311,7 @@ static int remove_device_files(struct super_block *sb,
ret = simple_rmdir(d_inode(root), dir);
bail:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
return ret;
}
diff --git a/drivers/staging/rdma/ipath/ipath_mr.c b/drivers/staging/rdma/ipath/ipath_mr.c
index c7278f6a8217..b76b0ce66709 100644
--- a/drivers/staging/rdma/ipath/ipath_mr.c
+++ b/drivers/staging/rdma/ipath/ipath_mr.c
@@ -98,10 +98,6 @@ static struct ipath_mr *alloc_mr(int count,
}
mr->mr.mapsz = m;
- /*
- * ib_reg_phys_mr() will initialize mr->ibmr except for
- * lkey and rkey.
- */
if (!ipath_alloc_lkey(lk_table, &mr->mr))
goto bail;
mr->ibmr.rkey = mr->ibmr.lkey = mr->mr.lkey;
@@ -121,57 +117,6 @@ done:
}
/**
- * ipath_reg_phys_mr - register a physical memory region
- * @pd: protection domain for this memory region
- * @buffer_list: pointer to the list of physical buffers to register
- * @num_phys_buf: the number of physical buffers to register
- * @iova_start: the starting address passed over IB which maps to this MR
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start)
-{
- struct ipath_mr *mr;
- int n, m, i;
- struct ib_mr *ret;
-
- mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
- if (mr == NULL) {
- ret = ERR_PTR(-ENOMEM);
- goto bail;
- }
-
- mr->mr.pd = pd;
- mr->mr.user_base = *iova_start;
- mr->mr.iova = *iova_start;
- mr->mr.length = 0;
- mr->mr.offset = 0;
- mr->mr.access_flags = acc;
- mr->mr.max_segs = num_phys_buf;
- mr->umem = NULL;
-
- m = 0;
- n = 0;
- for (i = 0; i < num_phys_buf; i++) {
- mr->mr.map[m]->segs[n].vaddr = (void *) buffer_list[i].addr;
- mr->mr.map[m]->segs[n].length = buffer_list[i].size;
- mr->mr.length += buffer_list[i].size;
- n++;
- if (n == IPATH_SEGSZ) {
- m++;
- n = 0;
- }
- }
-
- ret = &mr->ibmr;
-
-bail:
- return ret;
-}
-
-/**
* ipath_reg_user_mr - register a userspace memory region
* @pd: protection domain for this memory region
* @start: starting userspace address
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.c b/drivers/staging/rdma/ipath/ipath_verbs.c
index 1778dee13f99..53f9dcab180d 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.c
+++ b/drivers/staging/rdma/ipath/ipath_verbs.c
@@ -2201,7 +2201,6 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
dev->poll_cq = ipath_poll_cq;
dev->req_notify_cq = ipath_req_notify_cq;
dev->get_dma_mr = ipath_get_dma_mr;
- dev->reg_phys_mr = ipath_reg_phys_mr;
dev->reg_user_mr = ipath_reg_user_mr;
dev->dereg_mr = ipath_dereg_mr;
dev->alloc_fmr = ipath_alloc_fmr;
diff --git a/drivers/staging/rdma/ipath/ipath_verbs.h b/drivers/staging/rdma/ipath/ipath_verbs.h
index 0a90a56870ab..6c70a89667a9 100644
--- a/drivers/staging/rdma/ipath/ipath_verbs.h
+++ b/drivers/staging/rdma/ipath/ipath_verbs.h
@@ -828,10 +828,6 @@ int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
struct ib_mr *ipath_get_dma_mr(struct ib_pd *pd, int acc);
-struct ib_mr *ipath_reg_phys_mr(struct ib_pd *pd,
- struct ib_phys_buf *buffer_list,
- int num_phys_buf, int acc, u64 *iova_start);
-
struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c
index ccc0ad02d066..36fa724a36c8 100644
--- a/drivers/thermal/int340x_thermal/processor_thermal_device.c
+++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c
@@ -33,6 +33,12 @@
/* Braswell thermal reporting device */
#define PCI_DEVICE_ID_PROC_BSW_THERMAL 0x22DC
+/* Broxton thermal reporting device */
+#define PCI_DEVICE_ID_PROC_BXT0_THERMAL 0x0A8C
+#define PCI_DEVICE_ID_PROC_BXT1_THERMAL 0x1A8C
+#define PCI_DEVICE_ID_PROC_BXTX_THERMAL 0x4A8C
+#define PCI_DEVICE_ID_PROC_BXTP_THERMAL 0x5A8C
+
struct power_config {
u32 index;
u32 min_uw;
@@ -404,6 +410,10 @@ static const struct pci_device_id proc_thermal_pci_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_HSB_THERMAL)},
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_SKL_THERMAL)},
{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BSW_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT0_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXT1_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTX_THERMAL)},
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BXTP_THERMAL)},
{ 0, },
};
diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c
index 50c7da79be83..00d81af648b8 100644
--- a/drivers/thermal/intel_pch_thermal.c
+++ b/drivers/thermal/intel_pch_thermal.c
@@ -136,7 +136,7 @@ struct pch_dev_ops {
/* dev ops for Wildcat Point */
-static struct pch_dev_ops pch_dev_ops_wpt = {
+static const struct pch_dev_ops pch_dev_ops_wpt = {
.hw_init = pch_wpt_init,
.get_temp = pch_wpt_get_temp,
};
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 13d01edc7a04..44b9c485157d 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -75,11 +75,11 @@ struct rcar_thermal_priv {
#define rcar_has_irq_support(priv) ((priv)->common->base)
#define rcar_id_to_shift(priv) ((priv)->id * 8)
-#ifdef DEBUG
-# define rcar_force_update_temp(priv) 1
-#else
-# define rcar_force_update_temp(priv) 0
-#endif
+static const struct of_device_id rcar_thermal_dt_ids[] = {
+ { .compatible = "renesas,rcar-thermal", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
/*
* basic functions
@@ -203,14 +203,26 @@ err_out_unlock:
static int rcar_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
{
struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone);
+ int tmp;
+ int ret;
- if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv))
- rcar_thermal_update_temp(priv);
+ ret = rcar_thermal_update_temp(priv);
+ if (ret < 0)
+ return ret;
mutex_lock(&priv->lock);
- *temp = MCELSIUS((priv->ctemp * 5) - 65);
+ tmp = MCELSIUS((priv->ctemp * 5) - 65);
mutex_unlock(&priv->lock);
+ if ((tmp < MCELSIUS(-45)) || (tmp > MCELSIUS(125))) {
+ struct device *dev = rcar_priv_to_dev(priv);
+
+ dev_err(dev, "it couldn't measure temperature correctly\n");
+ return -EIO;
+ }
+
+ *temp = tmp;
+
return 0;
}
@@ -288,6 +300,9 @@ static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable)
unsigned long flags;
u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */
+ if (!rcar_has_irq_support(priv))
+ return;
+
spin_lock_irqsave(&common->lock, flags);
rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask);
@@ -299,11 +314,15 @@ static void rcar_thermal_work(struct work_struct *work)
{
struct rcar_thermal_priv *priv;
int cctemp, nctemp;
+ int ret;
priv = container_of(work, struct rcar_thermal_priv, work.work);
rcar_thermal_get_temp(priv->zone, &cctemp);
- rcar_thermal_update_temp(priv);
+ ret = rcar_thermal_update_temp(priv);
+ if (ret < 0)
+ return;
+
rcar_thermal_irq_enable(priv);
rcar_thermal_get_temp(priv->zone, &nctemp);
@@ -368,8 +387,7 @@ static int rcar_thermal_remove(struct platform_device *pdev)
struct rcar_thermal_priv *priv;
rcar_thermal_for_each_priv(priv, common) {
- if (rcar_has_irq_support(priv))
- rcar_thermal_irq_disable(priv);
+ rcar_thermal_irq_disable(priv);
thermal_zone_device_unregister(priv->zone);
}
@@ -441,7 +459,9 @@ static int rcar_thermal_probe(struct platform_device *pdev)
mutex_init(&priv->lock);
INIT_LIST_HEAD(&priv->list);
INIT_DELAYED_WORK(&priv->work, rcar_thermal_work);
- rcar_thermal_update_temp(priv);
+ ret = rcar_thermal_update_temp(priv);
+ if (ret < 0)
+ goto error_unregister;
priv->zone = thermal_zone_device_register("rcar_thermal",
1, 0, priv,
@@ -453,8 +473,7 @@ static int rcar_thermal_probe(struct platform_device *pdev)
goto error_unregister;
}
- if (rcar_has_irq_support(priv))
- rcar_thermal_irq_enable(priv);
+ rcar_thermal_irq_enable(priv);
list_move_tail(&priv->list, &common->head);
@@ -484,12 +503,6 @@ error_unregister:
return ret;
}
-static const struct of_device_id rcar_thermal_dt_ids[] = {
- { .compatible = "renesas,rcar-thermal", },
- {},
-};
-MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids);
-
static struct platform_driver rcar_thermal_driver = {
.driver = {
.name = "rcar_thermal",
diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index e845841ab036..b58e3fb9b311 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -38,7 +38,7 @@ enum tshut_mode {
};
/**
- * the system Temperature Sensors tshut(tshut) polarity
+ * The system Temperature Sensors tshut(tshut) polarity
* the bit 8 is tshut polarity.
* 0: low active, 1: high active
*/
@@ -57,10 +57,10 @@ enum sensor_id {
};
/**
-* The conversion table has the adc value and temperature.
-* ADC_DECREMENT is the adc value decremnet.(e.g. v2_code_table)
-* ADC_INCREMNET is the adc value incremnet.(e.g. v3_code_table)
-*/
+ * The conversion table has the adc value and temperature.
+ * ADC_DECREMENT: the adc value is of diminishing.(e.g. v2_code_table)
+ * ADC_INCREMENT: the adc value is incremental.(e.g. v3_code_table)
+ */
enum adc_sort_mode {
ADC_DECREMENT = 0,
ADC_INCREMENT,
@@ -72,16 +72,17 @@ enum adc_sort_mode {
*/
#define SOC_MAX_SENSORS 2
+/**
+ * struct chip_tsadc_table: hold information about chip-specific differences
+ * @id: conversion table
+ * @length: size of conversion table
+ * @data_mask: mask to apply on data inputs
+ * @mode: sort mode of this adc variant (incrementing or decrementing)
+ */
struct chip_tsadc_table {
const struct tsadc_table *id;
-
- /* the array table size*/
unsigned int length;
-
- /* that analogic mask data */
u32 data_mask;
-
- /* the sort mode is adc value that increment or decrement in table */
enum adc_sort_mode mode;
};
@@ -153,6 +154,7 @@ struct rockchip_thermal_data {
#define TSADCV2_SHUT_2GPIO_SRC_EN(chn) BIT(4 + (chn))
#define TSADCV2_SHUT_2CRU_SRC_EN(chn) BIT(8 + (chn))
+#define TSADCV1_INT_PD_CLEAR_MASK ~BIT(16)
#define TSADCV2_INT_PD_CLEAR_MASK ~BIT(8)
#define TSADCV2_DATA_MASK 0xfff
@@ -168,6 +170,51 @@ struct tsadc_table {
int temp;
};
+/**
+ * Note:
+ * Code to Temperature mapping of the Temperature sensor is a piece wise linear
+ * curve.Any temperature, code faling between to 2 give temperatures can be
+ * linearly interpolated.
+ * Code to Temperature mapping should be updated based on sillcon results.
+ */
+static const struct tsadc_table v1_code_table[] = {
+ {TSADCV3_DATA_MASK, -40000},
+ {436, -40000},
+ {431, -35000},
+ {426, -30000},
+ {421, -25000},
+ {416, -20000},
+ {411, -15000},
+ {406, -10000},
+ {401, -5000},
+ {395, 0},
+ {390, 5000},
+ {385, 10000},
+ {380, 15000},
+ {375, 20000},
+ {370, 25000},
+ {364, 30000},
+ {359, 35000},
+ {354, 40000},
+ {349, 45000},
+ {343, 50000},
+ {338, 55000},
+ {333, 60000},
+ {328, 65000},
+ {322, 70000},
+ {317, 75000},
+ {312, 80000},
+ {307, 85000},
+ {301, 90000},
+ {296, 95000},
+ {291, 100000},
+ {286, 105000},
+ {280, 110000},
+ {275, 115000},
+ {270, 120000},
+ {264, 125000},
+};
+
static const struct tsadc_table v2_code_table[] = {
{TSADCV2_DATA_MASK, -40000},
{3800, -40000},
@@ -245,6 +292,44 @@ static const struct tsadc_table v3_code_table[] = {
{TSADCV3_DATA_MASK, 125000},
};
+static const struct tsadc_table v4_code_table[] = {
+ {TSADCV3_DATA_MASK, -40000},
+ {431, -40000},
+ {426, -35000},
+ {421, -30000},
+ {415, -25000},
+ {410, -20000},
+ {405, -15000},
+ {399, -10000},
+ {394, -5000},
+ {389, 0},
+ {383, 5000},
+ {378, 10000},
+ {373, 15000},
+ {367, 20000},
+ {362, 25000},
+ {357, 30000},
+ {351, 35000},
+ {346, 40000},
+ {340, 45000},
+ {335, 50000},
+ {330, 55000},
+ {324, 60000},
+ {319, 65000},
+ {313, 70000},
+ {308, 75000},
+ {302, 80000},
+ {297, 85000},
+ {291, 90000},
+ {286, 95000},
+ {281, 100000},
+ {275, 105000},
+ {270, 110000},
+ {264, 115000},
+ {259, 120000},
+ {253, 125000},
+};
+
static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,
int temp)
{
@@ -368,6 +453,14 @@ static void rk_tsadcv2_initialize(void __iomem *regs,
regs + TSADCV2_HIGHT_TSHUT_DEBOUNCE);
}
+static void rk_tsadcv1_irq_ack(void __iomem *regs)
+{
+ u32 val;
+
+ val = readl_relaxed(regs + TSADCV2_INT_PD);
+ writel_relaxed(val & TSADCV1_INT_PD_CLEAR_MASK, regs + TSADCV2_INT_PD);
+}
+
static void rk_tsadcv2_irq_ack(void __iomem *regs)
{
u32 val;
@@ -429,6 +522,29 @@ static void rk_tsadcv2_tshut_mode(int chn, void __iomem *regs,
writel_relaxed(val, regs + TSADCV2_INT_EN);
}
+static const struct rockchip_tsadc_chip rk3228_tsadc_data = {
+ .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+ .chn_num = 1, /* one channel for tsadc */
+
+ .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+ .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+ .tshut_temp = 95000,
+
+ .initialize = rk_tsadcv2_initialize,
+ .irq_ack = rk_tsadcv1_irq_ack,
+ .control = rk_tsadcv2_control,
+ .get_temp = rk_tsadcv2_get_temp,
+ .set_tshut_temp = rk_tsadcv2_tshut_temp,
+ .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+ .table = {
+ .id = v1_code_table,
+ .length = ARRAY_SIZE(v1_code_table),
+ .data_mask = TSADCV3_DATA_MASK,
+ .mode = ADC_DECREMENT,
+ },
+};
+
static const struct rockchip_tsadc_chip rk3288_tsadc_data = {
.chn_id[SENSOR_CPU] = 1, /* cpu sensor is channel 1 */
.chn_id[SENSOR_GPU] = 2, /* gpu sensor is channel 2 */
@@ -477,8 +593,36 @@ static const struct rockchip_tsadc_chip rk3368_tsadc_data = {
},
};
+static const struct rockchip_tsadc_chip rk3399_tsadc_data = {
+ .chn_id[SENSOR_CPU] = 0, /* cpu sensor is channel 0 */
+ .chn_id[SENSOR_GPU] = 1, /* gpu sensor is channel 1 */
+ .chn_num = 2, /* two channels for tsadc */
+
+ .tshut_mode = TSHUT_MODE_GPIO, /* default TSHUT via GPIO give PMIC */
+ .tshut_polarity = TSHUT_LOW_ACTIVE, /* default TSHUT LOW ACTIVE */
+ .tshut_temp = 95000,
+
+ .initialize = rk_tsadcv2_initialize,
+ .irq_ack = rk_tsadcv1_irq_ack,
+ .control = rk_tsadcv2_control,
+ .get_temp = rk_tsadcv2_get_temp,
+ .set_tshut_temp = rk_tsadcv2_tshut_temp,
+ .set_tshut_mode = rk_tsadcv2_tshut_mode,
+
+ .table = {
+ .id = v4_code_table,
+ .length = ARRAY_SIZE(v4_code_table),
+ .data_mask = TSADCV3_DATA_MASK,
+ .mode = ADC_DECREMENT,
+ },
+};
+
static const struct of_device_id of_rockchip_thermal_match[] = {
{
+ .compatible = "rockchip,rk3228-tsadc",
+ .data = (void *)&rk3228_tsadc_data,
+ },
+ {
.compatible = "rockchip,rk3288-tsadc",
.data = (void *)&rk3288_tsadc_data,
},
@@ -486,6 +630,10 @@ static const struct of_device_id of_rockchip_thermal_match[] = {
.compatible = "rockchip,rk3368-tsadc",
.data = (void *)&rk3368_tsadc_data,
},
+ {
+ .compatible = "rockchip,rk3399-tsadc",
+ .data = (void *)&rk3399_tsadc_data,
+ },
{ /* end */ },
};
MODULE_DEVICE_TABLE(of, of_rockchip_thermal_match);
@@ -617,7 +765,7 @@ rockchip_thermal_register_sensor(struct platform_device *pdev,
return 0;
}
-/*
+/**
* Reset TSADC Controller, reset all tsadc registers.
*/
static void rockchip_thermal_reset_controller(struct reset_control *reset)
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index 2f9f7086ac3d..ea9366ad3e6b 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -63,6 +63,19 @@ static unsigned long get_target_state(struct thermal_instance *instance,
next_target = instance->target;
dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);
+ if (!instance->initialized) {
+ if (throttle) {
+ next_target = (cur_state + 1) >= instance->upper ?
+ instance->upper :
+ ((cur_state + 1) < instance->lower ?
+ instance->lower : (cur_state + 1));
+ } else {
+ next_target = THERMAL_NO_TARGET;
+ }
+
+ return next_target;
+ }
+
switch (trend) {
case THERMAL_TREND_RAISING:
if (throttle) {
@@ -149,7 +162,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
dev_dbg(&instance->cdev->device, "old_target=%d, target=%d\n",
old_target, (int)instance->target);
- if (old_target == instance->target)
+ if (instance->initialized && old_target == instance->target)
continue;
/* Activate a passive thermal instance */
@@ -161,7 +174,7 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
instance->target == THERMAL_NO_TARGET)
update_passive_instance(tz, trip_type, -1);
-
+ instance->initialized = true;
instance->cdev->updated = false; /* cdev needs update */
}
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index d9e525cc9c1c..a0a8fd1235e2 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -37,6 +37,7 @@
#include <linux/of.h>
#include <net/netlink.h>
#include <net/genetlink.h>
+#include <linux/suspend.h>
#define CREATE_TRACE_POINTS
#include <trace/events/thermal.h>
@@ -59,6 +60,8 @@ static LIST_HEAD(thermal_governor_list);
static DEFINE_MUTEX(thermal_list_lock);
static DEFINE_MUTEX(thermal_governor_lock);
+static atomic_t in_suspend;
+
static struct thermal_governor *def_governor;
static struct thermal_governor *__find_governor(const char *name)
@@ -532,14 +535,31 @@ static void update_temperature(struct thermal_zone_device *tz)
mutex_unlock(&tz->lock);
trace_thermal_temperature(tz);
- dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
- tz->last_temperature, tz->temperature);
+ if (tz->last_temperature == THERMAL_TEMP_INVALID)
+ dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
+ tz->temperature);
+ else
+ dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
+ tz->last_temperature, tz->temperature);
+}
+
+static void thermal_zone_device_reset(struct thermal_zone_device *tz)
+{
+ struct thermal_instance *pos;
+
+ tz->temperature = THERMAL_TEMP_INVALID;
+ tz->passive = 0;
+ list_for_each_entry(pos, &tz->thermal_instances, tz_node)
+ pos->initialized = false;
}
void thermal_zone_device_update(struct thermal_zone_device *tz)
{
int count;
+ if (atomic_read(&in_suspend))
+ return;
+
if (!tz->ops->get_temp)
return;
@@ -676,8 +696,12 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
return -EINVAL;
ret = tz->ops->set_trip_temp(tz, trip, temperature);
+ if (ret)
+ return ret;
- return ret ? ret : count;
+ thermal_zone_device_update(tz);
+
+ return count;
}
static ssize_t
@@ -1321,6 +1345,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
if (!result) {
list_add_tail(&dev->tz_node, &tz->thermal_instances);
list_add_tail(&dev->cdev_node, &cdev->thermal_instances);
+ atomic_set(&tz->need_update, 1);
}
mutex_unlock(&cdev->lock);
mutex_unlock(&tz->lock);
@@ -1430,6 +1455,7 @@ __thermal_cooling_device_register(struct device_node *np,
const struct thermal_cooling_device_ops *ops)
{
struct thermal_cooling_device *cdev;
+ struct thermal_zone_device *pos = NULL;
int result;
if (type && strlen(type) >= THERMAL_NAME_LENGTH)
@@ -1474,6 +1500,12 @@ __thermal_cooling_device_register(struct device_node *np,
/* Update binding information for 'this' new cdev */
bind_cdev(cdev);
+ mutex_lock(&thermal_list_lock);
+ list_for_each_entry(pos, &thermal_tz_list, node)
+ if (atomic_cmpxchg(&pos->need_update, 1, 0))
+ thermal_zone_device_update(pos);
+ mutex_unlock(&thermal_list_lock);
+
return cdev;
}
@@ -1806,6 +1838,8 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
tz->trips = trips;
tz->passive_delay = passive_delay;
tz->polling_delay = polling_delay;
+ /* A new thermal zone needs to be updated anyway. */
+ atomic_set(&tz->need_update, 1);
dev_set_name(&tz->device, "thermal_zone%d", tz->id);
result = device_register(&tz->device);
@@ -1900,7 +1934,10 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);
- thermal_zone_device_update(tz);
+ thermal_zone_device_reset(tz);
+ /* Update the new thermal zone and mark it as already updated. */
+ if (atomic_cmpxchg(&tz->need_update, 1, 0))
+ thermal_zone_device_update(tz);
return tz;
@@ -2140,6 +2177,36 @@ static void thermal_unregister_governors(void)
thermal_gov_power_allocator_unregister();
}
+static int thermal_pm_notify(struct notifier_block *nb,
+ unsigned long mode, void *_unused)
+{
+ struct thermal_zone_device *tz;
+
+ switch (mode) {
+ case PM_HIBERNATION_PREPARE:
+ case PM_RESTORE_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ atomic_set(&in_suspend, 1);
+ break;
+ case PM_POST_HIBERNATION:
+ case PM_POST_RESTORE:
+ case PM_POST_SUSPEND:
+ atomic_set(&in_suspend, 0);
+ list_for_each_entry(tz, &thermal_tz_list, node) {
+ thermal_zone_device_reset(tz);
+ thermal_zone_device_update(tz);
+ }
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static struct notifier_block thermal_pm_nb = {
+ .notifier_call = thermal_pm_notify,
+};
+
static int __init thermal_init(void)
{
int result;
@@ -2160,6 +2227,11 @@ static int __init thermal_init(void)
if (result)
goto exit_netlink;
+ result = register_pm_notifier(&thermal_pm_nb);
+ if (result)
+ pr_warn("Thermal: Can not register suspend notifier, return %d\n",
+ result);
+
return 0;
exit_netlink:
@@ -2179,6 +2251,7 @@ error:
static void __exit thermal_exit(void)
{
+ unregister_pm_notifier(&thermal_pm_nb);
of_thermal_destroy_zones();
genetlink_exit();
class_unregister(&thermal_class);
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index d7ac1fccd659..749d41abfbab 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -41,6 +41,7 @@ struct thermal_instance {
struct thermal_zone_device *tz;
struct thermal_cooling_device *cdev;
int trip;
+ bool initialized;
unsigned long upper; /* Highest cooling state for this trip point */
unsigned long lower; /* Lowest cooling state for this trip point */
unsigned long target; /* expected cooling state */
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index 0fbfb2b2aa08..26ccad5d8680 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -673,7 +673,7 @@ printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
unsigned long flags;
int tx_list_empty;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
spin_lock_irqsave(&dev->lock, flags);
tx_list_empty = (likely(list_empty(&dev->tx_reqs)));
spin_unlock_irqrestore(&dev->lock, flags);
@@ -683,7 +683,7 @@ printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync)
wait_event_interruptible(dev->tx_flush_wait,
(likely(list_empty(&dev->tx_reqs_active))));
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 365afd7e14f8..7e179f81d05c 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1521,10 +1521,10 @@ static void destroy_ep_files (struct dev_data *dev)
spin_unlock_irq (&dev->lock);
/* break link to dcache */
- mutex_lock (&parent->i_mutex);
+ inode_lock(parent);
d_delete (dentry);
dput (dentry);
- mutex_unlock (&parent->i_mutex);
+ inode_unlock(parent);
spin_lock_irq (&dev->lock);
}
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index f92f5aff0dd5..8755b2c2aada 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -91,7 +91,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
if (!access_ok(VERIFY_WRITE, buf, nbytes))
return -EFAULT;
- mutex_lock(&file_inode(file)->i_mutex);
+ inode_lock(file_inode(file));
list_for_each_entry_safe(req, tmp_req, queue, queue) {
len = snprintf(tmpbuf, sizeof(tmpbuf),
"%8p %08x %c%c%c %5d %c%c%c\n",
@@ -118,7 +118,7 @@ static ssize_t queue_dbg_read(struct file *file, char __user *buf,
nbytes -= len;
buf += len;
}
- mutex_unlock(&file_inode(file)->i_mutex);
+ inode_unlock(file_inode(file));
return actual;
}
@@ -143,7 +143,7 @@ static int regs_dbg_open(struct inode *inode, struct file *file)
u32 *data;
int ret = -ENOMEM;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
udc = inode->i_private;
data = kmalloc(inode->i_size, GFP_KERNEL);
if (!data)
@@ -158,7 +158,7 @@ static int regs_dbg_open(struct inode *inode, struct file *file)
ret = 0;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -169,11 +169,11 @@ static ssize_t regs_dbg_read(struct file *file, char __user *buf,
struct inode *inode = file_inode(file);
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = simple_read_from_buffer(buf, nbytes, ppos,
file->private_data,
file_inode(file)->i_size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index 3fc63c208d08..57721c73177f 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -78,13 +78,13 @@ int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasy
if (!info->fbdefio)
return 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Kill off the delayed work */
cancel_delayed_work_sync(&info->deferred_work);
/* Run it immediately */
schedule_delayed_work(&info->deferred_work, 0);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 36205c27c4d0..f6bed86c17f9 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -545,6 +545,7 @@ err_enable_device:
static void virtio_pci_remove(struct pci_dev *pci_dev)
{
struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
+ struct device *dev = get_device(&vp_dev->vdev.dev);
unregister_virtio_device(&vp_dev->vdev);
@@ -554,6 +555,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
virtio_pci_modern_remove(vp_dev);
pci_disable_device(pci_dev);
+ put_device(dev);
}
static struct pci_driver virtio_pci_driver = {
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 7bf835f85bc8..eadc894faea2 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -449,14 +449,14 @@ static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
if (retval)
return retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
v9fs_blank_wstat(&wstat);
retval = p9_client_wstat(fid, &wstat);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
@@ -472,13 +472,13 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
if (retval)
return retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
p9_debug(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
retval = p9_client_fsync(fid, datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 659c579c4588..0548c53f41d5 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -33,11 +33,11 @@ affs_file_release(struct inode *inode, struct file *filp)
inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (inode->i_size != AFFS_I(inode)->mmu_private)
affs_truncate(inode);
affs_free_prealloc(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
@@ -958,12 +958,12 @@ int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = write_inode_now(inode, 0);
err = sync_blockdev(inode->i_sb->s_bdev);
if (!ret)
ret = err;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
const struct file_operations affs_file_operations = {
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 4baf1d2b39e4..d91a9c9cfbd0 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -483,7 +483,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
fl->fl_type = F_UNLCK;
- mutex_lock(&vnode->vfs_inode.i_mutex);
+ inode_lock(&vnode->vfs_inode);
/* check local lock records first */
ret = 0;
@@ -505,7 +505,7 @@ static int afs_do_getlk(struct file *file, struct file_lock *fl)
}
error:
- mutex_unlock(&vnode->vfs_inode.i_mutex);
+ inode_unlock(&vnode->vfs_inode);
_leave(" = %d [%hd]", ret, fl->fl_type);
return ret;
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 0714abcd7f32..dfef94f70667 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -693,7 +693,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* use a writeback record as a marker in the queue - when this reaches
* the front of the queue, all the outstanding writes are either
@@ -735,7 +735,7 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
afs_put_writeback(wb);
_leave(" = %d", ret);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/attr.c b/fs/attr.c
index 6530ced19697..25b24d0f6c88 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -195,7 +195,7 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
struct timespec now;
unsigned int ia_valid = attr->ia_valid;
- WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(inode));
if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3a93755e880f..051ea4809c14 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -491,6 +491,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
* arch_check_elf() - check an ELF executable
* @ehdr: The main ELF header
* @has_interp: True if the ELF has an interpreter, else false.
+ * @interp_ehdr: The interpreter's ELF header
* @state: Architecture-specific state preserved throughout the process
* of loading the ELF.
*
@@ -502,6 +503,7 @@ static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
* with that return code.
*/
static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
+ struct elfhdr *interp_ehdr,
struct arch_elf_state *state)
{
/* Dummy implementation, always proceed */
@@ -829,7 +831,9 @@ static int load_elf_binary(struct linux_binprm *bprm)
* still possible to return an error to the code that invoked
* the exec syscall.
*/
- retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
+ retval = arch_check_elf(&loc->elf_ex,
+ !!interpreter, &loc->interp_elf_ex,
+ &arch_state);
if (retval)
goto out_free_dentry;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 78f005f37847..3a3ced779fc7 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -638,11 +638,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
case 3:
/* Delete this handler. */
root = dget(file->f_path.dentry->d_sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
kill_node(e);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
break;
default:
@@ -675,7 +675,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
return PTR_ERR(e);
root = dget(sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = lookup_one_len(e->name, root, strlen(e->name));
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
@@ -711,7 +711,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
out2:
dput(dentry);
out:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
if (err) {
@@ -754,12 +754,12 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
case 3:
/* Delete all handlers. */
root = dget(file->f_path.dentry->d_sb->s_root);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
while (!list_empty(&entries))
kill_node(list_entry(entries.next, Node, list));
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(root);
break;
default:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index ba762ea07f67..7b9cd49622b1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -75,7 +75,7 @@ void kill_bdev(struct block_device *bdev)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
- if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+ if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
return;
invalidate_bh_lrus();
@@ -346,9 +346,9 @@ static loff_t block_llseek(struct file *file, loff_t offset, int whence)
struct inode *bd_inode = bdev_file_inode(file);
loff_t retval;
- mutex_lock(&bd_inode->i_mutex);
+ inode_lock(bd_inode);
retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
- mutex_unlock(&bd_inode->i_mutex);
+ inode_unlock(bd_inode);
return retval;
}
@@ -1142,9 +1142,9 @@ void bd_set_size(struct block_device *bdev, loff_t size)
{
unsigned bsize = bdev_logical_block_size(bdev);
- mutex_lock(&bdev->bd_inode->i_mutex);
+ inode_lock(bdev->bd_inode);
i_size_write(bdev->bd_inode, size);
- mutex_unlock(&bdev->bd_inode->i_mutex);
+ inode_unlock(bdev->bd_inode);
while (bsize < PAGE_CACHE_SIZE) {
if (size & bsize)
break;
@@ -1741,9 +1741,9 @@ static void blkdev_vm_open(struct vm_area_struct *vma)
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
struct block_device *bdev = I_BDEV(bd_inode);
- mutex_lock(&bd_inode->i_mutex);
+ inode_lock(bd_inode);
bdev->bd_map_count++;
- mutex_unlock(&bd_inode->i_mutex);
+ inode_unlock(bd_inode);
}
static void blkdev_vm_close(struct vm_area_struct *vma)
@@ -1751,9 +1751,9 @@ static void blkdev_vm_close(struct vm_area_struct *vma)
struct inode *bd_inode = bdev_file_inode(vma->vm_file);
struct block_device *bdev = I_BDEV(bd_inode);
- mutex_lock(&bd_inode->i_mutex);
+ inode_lock(bd_inode);
bdev->bd_map_count--;
- mutex_unlock(&bd_inode->i_mutex);
+ inode_unlock(bd_inode);
}
static const struct vm_operations_struct blkdev_dax_vm_ops = {
@@ -1777,7 +1777,7 @@ static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
struct block_device *bdev = I_BDEV(bd_inode);
file_accessed(file);
- mutex_lock(&bd_inode->i_mutex);
+ inode_lock(bd_inode);
bdev->bd_map_count++;
if (IS_DAX(bd_inode)) {
vma->vm_ops = &blkdev_dax_vm_ops;
@@ -1785,7 +1785,7 @@ static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
} else {
vma->vm_ops = &blkdev_default_vm_ops;
}
- mutex_unlock(&bd_inode->i_mutex);
+ inode_unlock(bd_inode);
return 0;
}
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9f5cc1e8e126..098bb8f690c9 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1762,17 +1762,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
loff_t pos;
size_t count;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = generic_write_checks(iocb, from);
if (err <= 0) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
current->backing_dev_info = inode_to_bdi(inode);
err = file_remove_privs(file);
if (err) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
@@ -1783,7 +1783,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
* to stop this write operation to ensure FS consistency.
*/
if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
err = -EROFS;
goto out;
}
@@ -1804,7 +1804,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
end_pos = round_up(pos + count, root->sectorsize);
err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
if (err) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
}
@@ -1820,7 +1820,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
iocb->ki_pos = pos + num_written;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* We also have to set last_sub_trans to the current log transid,
@@ -1909,7 +1909,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
atomic_inc(&root->log_batch);
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
@@ -1961,7 +1961,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = start_ordered_ops(inode, start, end);
}
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
atomic_inc(&root->log_batch);
@@ -2007,7 +2007,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
@@ -2031,7 +2031,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
trans->sync = true;
@@ -2054,7 +2054,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* file again, but that will end up using the synchronization
* inside btrfs_sync_log to keep things safe.
*/
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* If any of the ordered extents had an error, just return it to user
@@ -2303,7 +2303,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
ret = find_first_non_hole(inode, &offset, &len);
if (ret < 0)
@@ -2343,7 +2343,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
truncated_page = true;
ret = btrfs_truncate_page(inode, offset, 0, 0);
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
}
@@ -2419,7 +2419,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
ret = btrfs_wait_ordered_range(inode, lockstart,
lockend - lockstart + 1);
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
}
@@ -2574,7 +2574,7 @@ out_only_mutex:
ret = btrfs_end_transaction(trans, root);
}
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret && !err)
err = ret;
return err;
@@ -2658,7 +2658,7 @@ static long btrfs_fallocate(struct file *file, int mode,
if (ret < 0)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = inode_newsize_ok(inode, alloc_end);
if (ret)
goto out;
@@ -2816,7 +2816,7 @@ out:
* So this is completely used as cleanup.
*/
btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/* Let go of our reservation. */
btrfs_free_reserved_data_space(inode, alloc_start,
alloc_end - alloc_start);
@@ -2892,7 +2892,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file->f_mapping->host;
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case SEEK_END:
case SEEK_CUR:
@@ -2901,20 +2901,20 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence)
case SEEK_DATA:
case SEEK_HOLE:
if (offset >= i_size_read(inode)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
ret = find_desired_extent(inode, &offset, whence);
if (ret) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
}
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1b79dc9b12e4..e28f3d4691af 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8447,7 +8447,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* not unlock the i_mutex at this case.
*/
if (offset + count <= inode->i_size) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
relock = true;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
@@ -8504,7 +8504,7 @@ out:
if (wakeup)
inode_dio_end(inode);
if (relock)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
return ret;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9028737ee9b5..952172ca7e45 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -240,7 +240,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ip_oldflags = ip->flags;
i_oldflags = inode->i_flags;
@@ -358,7 +358,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
}
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(file);
return ret;
}
@@ -881,7 +881,7 @@ out_up_read:
out_dput:
dput(dentry);
out_unlock:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return error;
}
@@ -1393,18 +1393,18 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
ra_index += cluster;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
BTRFS_I(inode)->force_compress = compress_type;
ret = cluster_pages_for_defrag(inode, pages, i, cluster);
if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out_ra;
}
defrag_count += ret;
balance_dirty_pages_ratelimited(inode->i_mapping);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (newer_than) {
if (newer_off == (u64)-1)
@@ -1465,9 +1465,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
out_ra:
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
if (!file)
kfree(ra);
@@ -2430,7 +2430,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
goto out_dput;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Don't allow to delete a subvolume with send in progress. This is
@@ -2543,7 +2543,7 @@ out_up_write:
spin_unlock(&dest->root_item_lock);
}
out_unlock_inode:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!err) {
d_invalidate(dentry);
btrfs_invalidate_inodes(dest);
@@ -2559,7 +2559,7 @@ out_unlock_inode:
out_dput:
dput(dentry);
out_unlock_dir:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
out_drop_write:
mnt_drop_write_file(file);
out:
@@ -2857,8 +2857,8 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
{
- mutex_unlock(&inode1->i_mutex);
- mutex_unlock(&inode2->i_mutex);
+ inode_unlock(inode1);
+ inode_unlock(inode2);
}
static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
@@ -2866,8 +2866,8 @@ static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2)
if (inode1 < inode2)
swap(inode1, inode2);
- mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(inode1, I_MUTEX_PARENT);
+ inode_lock_nested(inode2, I_MUTEX_CHILD);
}
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
@@ -3026,7 +3026,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
return 0;
if (same_inode) {
- mutex_lock(&src->i_mutex);
+ inode_lock(src);
ret = extent_same_check_offsets(src, loff, &len, olen);
if (ret)
@@ -3101,7 +3101,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
btrfs_cmp_data_free(&cmp);
out_unlock:
if (same_inode)
- mutex_unlock(&src->i_mutex);
+ inode_unlock(src);
else
btrfs_double_inode_unlock(src, dst);
@@ -3749,7 +3749,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
if (!same_inode) {
btrfs_double_inode_lock(src, inode);
} else {
- mutex_lock(&src->i_mutex);
+ inode_lock(src);
}
/* determine range to clone */
@@ -3820,7 +3820,7 @@ out_unlock:
if (!same_inode)
btrfs_double_inode_unlock(src, inode);
else
- mutex_unlock(&src->i_mutex);
+ inode_unlock(src);
return ret;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ef6d8fc85853..fd1c4d982463 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3030,7 +3030,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
int ret = 0;
BUG_ON(cluster->start != cluster->boundary[0]);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = btrfs_check_data_free_space(inode, cluster->start,
cluster->end + 1 - cluster->start);
@@ -3057,7 +3057,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
btrfs_free_reserved_data_space(inode, cluster->start,
cluster->end + 1 - cluster->start);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index b1a68530e911..92bf5ee732fb 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -4279,7 +4279,7 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
return PTR_ERR(inode);
/* Avoid truncate/dio/punch hole.. */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode_dio_wait(inode);
physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
@@ -4358,7 +4358,7 @@ next_page:
}
ret = COPY_COMPLETE;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
return ret;
}
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index fd953c361a43..6c68d6356197 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -126,7 +126,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
* locks the inode's i_mutex before calling setxattr or removexattr.
*/
if (flags & XATTR_REPLACE) {
- ASSERT(mutex_is_locked(&inode->i_mutex));
+ ASSERT(inode_is_locked(inode));
di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
name, name_len, 0);
if (!di)
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index afa023dded5b..675a3332d72f 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -446,7 +446,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
return 0;
cachefiles_begin_secure(cache, &saved_cred);
- mutex_lock(&d_inode(object->backer)->i_mutex);
+ inode_lock(d_inode(object->backer));
/* if there's an extension to a partial page at the end of the backing
* file, we need to discard the partial page so that we pick up new
@@ -465,7 +465,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
ret = notify_change(object->backer, &newattrs, NULL);
truncate_failed:
- mutex_unlock(&d_inode(object->backer)->i_mutex);
+ inode_unlock(d_inode(object->backer));
cachefiles_end_secure(cache, saved_cred);
if (ret == -EIO) {
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index c4b893453e0e..1c2334c163dd 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -295,7 +295,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
cachefiles_mark_object_buried(cache, rep, why);
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
if (ret == -EIO)
cachefiles_io_error(cache, "Unlink failed");
@@ -306,7 +306,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
/* directories have to be moved to the graveyard */
_debug("move stale object to graveyard");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
try_again:
/* first step is to make up a grave dentry in the graveyard */
@@ -423,13 +423,13 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
dir = dget_parent(object->dentry);
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
if (test_bit(FSCACHE_OBJECT_KILLED_BY_CACHE, &object->fscache.flags)) {
/* object allocation for the same key preemptively deleted this
* object's file so that it could create its own file */
_debug("object preemptively buried");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = 0;
} else {
/* we need to check that our parent is _still_ our parent - it
@@ -442,7 +442,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
/* it got moved, presumably by cachefilesd culling it,
* so it's no longer in the key path and we can ignore
* it */
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = 0;
}
}
@@ -501,7 +501,7 @@ lookup_again:
/* search the current directory for the element name */
_debug("lookup '%s'", name);
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
start = jiffies;
next = lookup_one_len(name, dir, nlen);
@@ -585,7 +585,7 @@ lookup_again:
/* process the next component */
if (key) {
_debug("advance");
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(dir);
dir = next;
next = NULL;
@@ -623,7 +623,7 @@ lookup_again:
/* note that we're now using this object */
ret = cachefiles_mark_object_active(cache, object);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(dir);
dir = NULL;
@@ -705,7 +705,7 @@ lookup_error:
cachefiles_io_error(cache, "Lookup failed");
next = NULL;
error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(next);
error_out2:
dput(dir);
@@ -729,7 +729,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
_enter(",,%s", dirname);
/* search the current directory for the element name */
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
start = jiffies;
subdir = lookup_one_len(dirname, dir, strlen(dirname));
@@ -768,7 +768,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
d_backing_inode(subdir)->i_ino);
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
/* we need to make sure the subdir is a directory */
ASSERT(d_backing_inode(subdir));
@@ -800,19 +800,19 @@ check_error:
return ERR_PTR(ret);
mkdir_error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(subdir);
pr_err("mkdir %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
lookup_error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = PTR_ERR(subdir);
pr_err("Lookup %s failed with error %d\n", dirname, ret);
return ERR_PTR(ret);
nomem_d_alloc:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
_leave(" = -ENOMEM");
return ERR_PTR(-ENOMEM);
}
@@ -837,7 +837,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
// dir, filename);
/* look up the victim */
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
start = jiffies;
victim = lookup_one_len(filename, dir, strlen(filename));
@@ -852,7 +852,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
* at the netfs's request whilst the cull was in progress
*/
if (d_is_negative(victim)) {
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(victim);
_leave(" = -ENOENT [absent]");
return ERR_PTR(-ENOENT);
@@ -881,13 +881,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
object_in_use:
read_unlock(&cache->active_lock);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(victim);
//_leave(" = -EBUSY [in use]");
return ERR_PTR(-EBUSY);
lookup_error:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
ret = PTR_ERR(victim);
if (ret == -ENOENT) {
/* file or dir now absent - probably retired by netfs */
@@ -947,7 +947,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
return 0;
error_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
error:
dput(victim);
if (ret == -ENOENT) {
@@ -982,7 +982,7 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
if (IS_ERR(victim))
return PTR_ERR(victim);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(victim);
//_leave(" = 0");
return 0;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b7d218a168fb..c22213789090 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1108,7 +1108,7 @@ retry_locked:
return 0;
/* past end of file? */
- i_size = inode->i_size; /* caller holds i_mutex */
+ i_size = i_size_read(inode);
if (page_off >= i_size ||
(pos_in_page == 0 && (pos+len) >= i_size &&
@@ -1149,7 +1149,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
page = grab_cache_page_write_begin(mapping, index, 0);
if (!page)
return -ENOMEM;
- *pagep = page;
dout("write_begin file %p inode %p page %p %d~%d\n", file,
inode, page, (int)pos, (int)len);
@@ -1184,8 +1183,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
zero_user_segment(page, from+copied, len);
/* did file size increase? */
- /* (no need for i_size_read(); we caller holds i_mutex */
- if (pos+copied > inode->i_size)
+ if (pos+copied > i_size_read(inode))
check_cap = ceph_inode_set_size(inode, pos+copied);
if (!PageUptodate(page))
@@ -1378,11 +1376,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
if ((off > size) ||
- (page->mapping != inode->i_mapping))
+ (page->mapping != inode->i_mapping)) {
+ unlock_page(page);
goto out;
+ }
ret = ceph_update_writeable_page(vma->vm_file, off, len, page);
- if (ret == 0) {
+ if (ret >= 0) {
/* success. we'll keep the page locked. */
set_page_dirty(page);
ret = VM_FAULT_LOCKED;
@@ -1393,8 +1393,6 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
}
out:
- if (ret != VM_FAULT_LOCKED)
- unlock_page(page);
if (ret == VM_FAULT_LOCKED ||
ci->i_inline_version != CEPH_INLINE_NONE) {
int dirty;
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index a4766ded1ba7..a351480dbabc 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -106,7 +106,7 @@ static uint16_t ceph_fscache_inode_get_aux(const void *cookie_netfs_data,
memset(&aux, 0, sizeof(aux));
aux.mtime = inode->i_mtime;
- aux.size = inode->i_size;
+ aux.size = i_size_read(inode);
memcpy(buffer, &aux, sizeof(aux));
@@ -117,9 +117,7 @@ static void ceph_fscache_inode_get_attr(const void *cookie_netfs_data,
uint64_t *size)
{
const struct ceph_inode_info* ci = cookie_netfs_data;
- const struct inode* inode = &ci->vfs_inode;
-
- *size = inode->i_size;
+ *size = i_size_read(&ci->vfs_inode);
}
static enum fscache_checkaux ceph_fscache_inode_check_aux(
@@ -134,7 +132,7 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
memset(&aux, 0, sizeof(aux));
aux.mtime = inode->i_mtime;
- aux.size = inode->i_size;
+ aux.size = i_size_read(inode);
if (memcmp(data, &aux, sizeof(aux)) != 0)
return FSCACHE_CHECKAUX_OBSOLETE;
@@ -197,7 +195,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
return;
/* Avoid multiple racing open requests */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (ci->fscache)
goto done;
@@ -207,7 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc,
ci, true);
fscache_check_consistency(ci->fscache);
done:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c69e1253b47b..cdbf8cf3d52c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2030,7 +2030,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
if (datasync)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
dirty = try_flush_caps(inode, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -2046,7 +2046,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = wait_event_interruptible(ci->i_cap_wq,
caps_are_flushed(inode, flush_tid));
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
return ret;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 9314b4ea2375..fd11fb231a2e 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -507,7 +507,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset);
loff_t retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = -EINVAL;
switch (whence) {
case SEEK_CUR:
@@ -542,7 +542,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
}
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index fe02ae7f056a..3b3172357326 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -215,7 +215,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
if (IS_ERR(req))
return PTR_ERR(req);
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
req->r_inode = d_inode(child);
ihold(d_inode(child));
@@ -224,7 +224,7 @@ static int ceph_get_name(struct dentry *parent, char *name,
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
if (!err) {
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3c68e6aee2f0..86a9c383955e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -397,8 +397,9 @@ int ceph_release(struct inode *inode, struct file *file)
}
enum {
- CHECK_EOF = 1,
- READ_INLINE = 2,
+ HAVE_RETRIED = 1,
+ CHECK_EOF = 2,
+ READ_INLINE = 3,
};
/*
@@ -411,17 +412,15 @@ enum {
static int striped_read(struct inode *inode,
u64 off, u64 len,
struct page **pages, int num_pages,
- int *checkeof, bool o_direct,
- unsigned long buf_align)
+ int *checkeof)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
u64 pos, this_len, left;
- int io_align, page_align;
- int pages_left;
- int read;
+ loff_t i_size;
+ int page_align, pages_left;
+ int read, ret;
struct page **page_pos;
- int ret;
bool hit_stripe, was_short;
/*
@@ -432,13 +431,9 @@ static int striped_read(struct inode *inode,
page_pos = pages;
pages_left = num_pages;
read = 0;
- io_align = off & ~PAGE_MASK;
more:
- if (o_direct)
- page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
- else
- page_align = pos & ~PAGE_MASK;
+ page_align = pos & ~PAGE_MASK;
this_len = left;
ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
&ci->i_layout, pos, &this_len,
@@ -452,13 +447,12 @@ more:
dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
+ i_size = i_size_read(inode);
if (ret >= 0) {
int didpages;
- if (was_short && (pos + ret < inode->i_size)) {
- int zlen = min(this_len - ret,
- inode->i_size - pos - ret);
- int zoff = (o_direct ? buf_align : io_align) +
- read + ret;
+ if (was_short && (pos + ret < i_size)) {
+ int zlen = min(this_len - ret, i_size - pos - ret);
+ int zoff = (off & ~PAGE_MASK) + read + ret;
dout(" zero gap %llu to %llu\n",
pos + ret, pos + ret + zlen);
ceph_zero_page_vector_range(zoff, zlen, pages);
@@ -473,14 +467,14 @@ more:
pages_left -= didpages;
/* hit stripe and need continue*/
- if (left && hit_stripe && pos < inode->i_size)
+ if (left && hit_stripe && pos < i_size)
goto more;
}
if (read > 0) {
ret = read;
/* did we bounce off eof? */
- if (pos + left > inode->i_size)
+ if (pos + left > i_size)
*checkeof = CHECK_EOF;
}
@@ -521,54 +515,28 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
if (ret < 0)
return ret;
- if (iocb->ki_flags & IOCB_DIRECT) {
- while (iov_iter_count(i)) {
- size_t start;
- ssize_t n;
-
- n = dio_get_pagev_size(i);
- pages = dio_get_pages_alloc(i, n, &start, &num_pages);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
-
- ret = striped_read(inode, off, n,
- pages, num_pages, checkeof,
- 1, start);
-
- ceph_put_page_vector(pages, num_pages, true);
-
- if (ret <= 0)
- break;
- off += ret;
- iov_iter_advance(i, ret);
- if (ret < n)
+ num_pages = calc_pages_for(off, len);
+ pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+ ret = striped_read(inode, off, len, pages,
+ num_pages, checkeof);
+ if (ret > 0) {
+ int l, k = 0;
+ size_t left = ret;
+
+ while (left) {
+ size_t page_off = off & ~PAGE_MASK;
+ size_t copy = min_t(size_t, left,
+ PAGE_SIZE - page_off);
+ l = copy_page_to_iter(pages[k++], page_off, copy, i);
+ off += l;
+ left -= l;
+ if (l < copy)
break;
}
- } else {
- num_pages = calc_pages_for(off, len);
- pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
- ret = striped_read(inode, off, len, pages,
- num_pages, checkeof, 0, 0);
- if (ret > 0) {
- int l, k = 0;
- size_t left = ret;
-
- while (left) {
- size_t page_off = off & ~PAGE_MASK;
- size_t copy = min_t(size_t,
- PAGE_SIZE - page_off, left);
- l = copy_page_to_iter(pages[k++], page_off,
- copy, i);
- off += l;
- left -= l;
- if (l < copy)
- break;
- }
- }
- ceph_release_page_vector(pages, num_pages);
}
+ ceph_release_page_vector(pages, num_pages);
if (off > iocb->ki_pos) {
ret = off - iocb->ki_pos;
@@ -579,6 +547,193 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
return ret;
}
+struct ceph_aio_request {
+ struct kiocb *iocb;
+ size_t total_len;
+ int write;
+ int error;
+ struct list_head osd_reqs;
+ unsigned num_reqs;
+ atomic_t pending_reqs;
+ struct timespec mtime;
+ struct ceph_cap_flush *prealloc_cf;
+};
+
+struct ceph_aio_work {
+ struct work_struct work;
+ struct ceph_osd_request *req;
+};
+
+static void ceph_aio_retry_work(struct work_struct *work);
+
+static void ceph_aio_complete(struct inode *inode,
+ struct ceph_aio_request *aio_req)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int ret;
+
+ if (!atomic_dec_and_test(&aio_req->pending_reqs))
+ return;
+
+ ret = aio_req->error;
+ if (!ret)
+ ret = aio_req->total_len;
+
+ dout("ceph_aio_complete %p rc %d\n", inode, ret);
+
+ if (ret >= 0 && aio_req->write) {
+ int dirty;
+
+ loff_t endoff = aio_req->iocb->ki_pos + aio_req->total_len;
+ if (endoff > i_size_read(inode)) {
+ if (ceph_inode_set_size(inode, endoff))
+ ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+ }
+
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
+ &aio_req->prealloc_cf);
+ spin_unlock(&ci->i_ceph_lock);
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
+
+ }
+
+ ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR :
+ CEPH_CAP_FILE_RD));
+
+ aio_req->iocb->ki_complete(aio_req->iocb, ret, 0);
+
+ ceph_free_cap_flush(aio_req->prealloc_cf);
+ kfree(aio_req);
+}
+
+static void ceph_aio_complete_req(struct ceph_osd_request *req,
+ struct ceph_msg *msg)
+{
+ int rc = req->r_result;
+ struct inode *inode = req->r_inode;
+ struct ceph_aio_request *aio_req = req->r_priv;
+ struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+ int num_pages = calc_pages_for((u64)osd_data->alignment,
+ osd_data->length);
+
+ dout("ceph_aio_complete_req %p rc %d bytes %llu\n",
+ inode, rc, osd_data->length);
+
+ if (rc == -EOLDSNAPC) {
+ struct ceph_aio_work *aio_work;
+ BUG_ON(!aio_req->write);
+
+ aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS);
+ if (aio_work) {
+ INIT_WORK(&aio_work->work, ceph_aio_retry_work);
+ aio_work->req = req;
+ queue_work(ceph_inode_to_client(inode)->wb_wq,
+ &aio_work->work);
+ return;
+ }
+ rc = -ENOMEM;
+ } else if (!aio_req->write) {
+ if (rc == -ENOENT)
+ rc = 0;
+ if (rc >= 0 && osd_data->length > rc) {
+ int zoff = osd_data->alignment + rc;
+ int zlen = osd_data->length - rc;
+ /*
+ * If read is satisfied by single OSD request,
+ * it can pass EOF. Otherwise read is within
+ * i_size.
+ */
+ if (aio_req->num_reqs == 1) {
+ loff_t i_size = i_size_read(inode);
+ loff_t endoff = aio_req->iocb->ki_pos + rc;
+ if (endoff < i_size)
+ zlen = min_t(size_t, zlen,
+ i_size - endoff);
+ aio_req->total_len = rc + zlen;
+ }
+
+ if (zlen > 0)
+ ceph_zero_page_vector_range(zoff, zlen,
+ osd_data->pages);
+ }
+ }
+
+ ceph_put_page_vector(osd_data->pages, num_pages, false);
+ ceph_osdc_put_request(req);
+
+ if (rc < 0)
+ cmpxchg(&aio_req->error, 0, rc);
+
+ ceph_aio_complete(inode, aio_req);
+ return;
+}
+
+static void ceph_aio_retry_work(struct work_struct *work)
+{
+ struct ceph_aio_work *aio_work =
+ container_of(work, struct ceph_aio_work, work);
+ struct ceph_osd_request *orig_req = aio_work->req;
+ struct ceph_aio_request *aio_req = orig_req->r_priv;
+ struct inode *inode = orig_req->r_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_snap_context *snapc;
+ struct ceph_osd_request *req;
+ int ret;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (__ceph_have_pending_cap_snap(ci)) {
+ struct ceph_cap_snap *capsnap =
+ list_last_entry(&ci->i_cap_snaps,
+ struct ceph_cap_snap,
+ ci_item);
+ snapc = ceph_get_snap_context(capsnap->context);
+ } else {
+ BUG_ON(!ci->i_head_snapc);
+ snapc = ceph_get_snap_context(ci->i_head_snapc);
+ }
+ spin_unlock(&ci->i_ceph_lock);
+
+ req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 2,
+ false, GFP_NOFS);
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ req = orig_req;
+ goto out;
+ }
+
+ req->r_flags = CEPH_OSD_FLAG_ORDERSNAP |
+ CEPH_OSD_FLAG_ONDISK |
+ CEPH_OSD_FLAG_WRITE;
+ req->r_base_oloc = orig_req->r_base_oloc;
+ req->r_base_oid = orig_req->r_base_oid;
+
+ req->r_ops[0] = orig_req->r_ops[0];
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+
+ ceph_osdc_build_request(req, req->r_ops[0].extent.offset,
+ snapc, CEPH_NOSNAP, &aio_req->mtime);
+
+ ceph_put_snap_context(snapc);
+ ceph_osdc_put_request(orig_req);
+
+ req->r_callback = ceph_aio_complete_req;
+ req->r_inode = inode;
+ req->r_priv = aio_req;
+
+ ret = ceph_osdc_start_request(req->r_osdc, req, false);
+out:
+ if (ret < 0) {
+ BUG_ON(ret == -EOLDSNAPC);
+ req->r_result = ret;
+ ceph_aio_complete_req(req, NULL);
+ }
+
+ kfree(aio_work);
+}
+
/*
* Write commit request unsafe callback, called to tell us when a
* request is unsafe (that is, in flight--has been handed to the
@@ -612,16 +767,10 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
}
-/*
- * Synchronous write, straight from __user pointer or user pages.
- *
- * If write spans object boundary, just do multiple writes. (For a
- * correct atomic write, we should e.g. take write locks on all
- * objects, rollback on failure, etc.)
- */
static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
- struct ceph_snap_context *snapc)
+ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
+ struct ceph_snap_context *snapc,
+ struct ceph_cap_flush **pcf)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
@@ -630,44 +779,52 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
struct ceph_vino vino;
struct ceph_osd_request *req;
struct page **pages;
- int num_pages;
- int written = 0;
+ struct ceph_aio_request *aio_req = NULL;
+ int num_pages = 0;
int flags;
- int check_caps = 0;
int ret;
struct timespec mtime = CURRENT_TIME;
- size_t count = iov_iter_count(from);
+ size_t count = iov_iter_count(iter);
+ loff_t pos = iocb->ki_pos;
+ bool write = iov_iter_rw(iter) == WRITE;
- if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
+ if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
return -EROFS;
- dout("sync_direct_write on file %p %lld~%u\n", file, pos,
- (unsigned)count);
+ dout("sync_direct_read_write (%s) on file %p %lld~%u\n",
+ (write ? "write" : "read"), file, pos, (unsigned)count);
ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + count);
if (ret < 0)
return ret;
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- pos >> PAGE_CACHE_SHIFT,
- (pos + count) >> PAGE_CACHE_SHIFT);
- if (ret < 0)
- dout("invalidate_inode_pages2_range returned %d\n", ret);
+ if (write) {
+ ret = invalidate_inode_pages2_range(inode->i_mapping,
+ pos >> PAGE_CACHE_SHIFT,
+ (pos + count) >> PAGE_CACHE_SHIFT);
+ if (ret < 0)
+ dout("invalidate_inode_pages2_range returned %d\n", ret);
- flags = CEPH_OSD_FLAG_ORDERSNAP |
- CEPH_OSD_FLAG_ONDISK |
- CEPH_OSD_FLAG_WRITE;
+ flags = CEPH_OSD_FLAG_ORDERSNAP |
+ CEPH_OSD_FLAG_ONDISK |
+ CEPH_OSD_FLAG_WRITE;
+ } else {
+ flags = CEPH_OSD_FLAG_READ;
+ }
- while (iov_iter_count(from) > 0) {
- u64 len = dio_get_pagev_size(from);
- size_t start;
- ssize_t n;
+ while (iov_iter_count(iter) > 0) {
+ u64 size = dio_get_pagev_size(iter);
+ size_t start = 0;
+ ssize_t len;
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- vino, pos, &len, 0,
- 2,/*include a 'startsync' command*/
- CEPH_OSD_OP_WRITE, flags, snapc,
+ vino, pos, &size, 0,
+ /*include a 'startsync' command*/
+ write ? 2 : 1,
+ write ? CEPH_OSD_OP_WRITE :
+ CEPH_OSD_OP_READ,
+ flags, snapc,
ci->i_truncate_seq,
ci->i_truncate_size,
false);
@@ -676,10 +833,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
break;
}
- osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
-
- n = len;
- pages = dio_get_pages_alloc(from, len, &start, &num_pages);
+ len = size;
+ pages = dio_get_pages_alloc(iter, len, &start, &num_pages);
if (IS_ERR(pages)) {
ceph_osdc_put_request(req);
ret = PTR_ERR(pages);
@@ -687,47 +842,128 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
}
/*
- * throw out any page cache pages in this range. this
- * may block.
+ * To simplify error handling, allow AIO when IO within i_size
+ * or IO can be satisfied by single OSD request.
*/
- truncate_inode_pages_range(inode->i_mapping, pos,
- (pos+n) | (PAGE_CACHE_SIZE-1));
- osd_req_op_extent_osd_data_pages(req, 0, pages, n, start,
- false, false);
+ if (pos == iocb->ki_pos && !is_sync_kiocb(iocb) &&
+ (len == count || pos + count <= i_size_read(inode))) {
+ aio_req = kzalloc(sizeof(*aio_req), GFP_KERNEL);
+ if (aio_req) {
+ aio_req->iocb = iocb;
+ aio_req->write = write;
+ INIT_LIST_HEAD(&aio_req->osd_reqs);
+ if (write) {
+ aio_req->mtime = mtime;
+ swap(aio_req->prealloc_cf, *pcf);
+ }
+ }
+ /* ignore error */
+ }
+
+ if (write) {
+ /*
+ * throw out any page cache pages in this range. this
+ * may block.
+ */
+ truncate_inode_pages_range(inode->i_mapping, pos,
+ (pos+len) | (PAGE_CACHE_SIZE - 1));
+
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
+ }
+
+
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, start,
+ false, false);
- /* BUG_ON(vino.snap != CEPH_NOSNAP); */
ceph_osdc_build_request(req, pos, snapc, vino.snap, &mtime);
- ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (aio_req) {
+ aio_req->total_len += len;
+ aio_req->num_reqs++;
+ atomic_inc(&aio_req->pending_reqs);
+
+ req->r_callback = ceph_aio_complete_req;
+ req->r_inode = inode;
+ req->r_priv = aio_req;
+ list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs);
+
+ pos += len;
+ iov_iter_advance(iter, len);
+ continue;
+ }
+
+ ret = ceph_osdc_start_request(req->r_osdc, req, false);
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ size = i_size_read(inode);
+ if (!write) {
+ if (ret == -ENOENT)
+ ret = 0;
+ if (ret >= 0 && ret < len && pos + ret < size) {
+ int zlen = min_t(size_t, len - ret,
+ size - pos - ret);
+ ceph_zero_page_vector_range(start + ret, zlen,
+ pages);
+ ret += zlen;
+ }
+ if (ret >= 0)
+ len = ret;
+ }
+
ceph_put_page_vector(pages, num_pages, false);
ceph_osdc_put_request(req);
- if (ret)
+ if (ret < 0)
break;
- pos += n;
- written += n;
- iov_iter_advance(from, n);
- if (pos > i_size_read(inode)) {
- check_caps = ceph_inode_set_size(inode, pos);
- if (check_caps)
+ pos += len;
+ iov_iter_advance(iter, len);
+
+ if (!write && pos >= size)
+ break;
+
+ if (write && pos > size) {
+ if (ceph_inode_set_size(inode, pos))
ceph_check_caps(ceph_inode(inode),
CHECK_CAPS_AUTHONLY,
NULL);
}
}
- if (ret != -EOLDSNAPC && written > 0) {
+ if (aio_req) {
+ if (aio_req->num_reqs == 0) {
+ kfree(aio_req);
+ return ret;
+ }
+
+ ceph_get_cap_refs(ci, write ? CEPH_CAP_FILE_WR :
+ CEPH_CAP_FILE_RD);
+
+ while (!list_empty(&aio_req->osd_reqs)) {
+ req = list_first_entry(&aio_req->osd_reqs,
+ struct ceph_osd_request,
+ r_unsafe_item);
+ list_del_init(&req->r_unsafe_item);
+ if (ret >= 0)
+ ret = ceph_osdc_start_request(req->r_osdc,
+ req, false);
+ if (ret < 0) {
+ BUG_ON(ret == -EOLDSNAPC);
+ req->r_result = ret;
+ ceph_aio_complete_req(req, NULL);
+ }
+ }
+ return -EIOCBQUEUED;
+ }
+
+ if (ret != -EOLDSNAPC && pos > iocb->ki_pos) {
+ ret = pos - iocb->ki_pos;
iocb->ki_pos = pos;
- ret = written;
}
return ret;
}
-
/*
* Synchronous write, straight from __user pointer or user pages.
*
@@ -897,8 +1133,14 @@ again:
ceph_cap_string(got));
if (ci->i_inline_version == CEPH_INLINE_NONE) {
- /* hmm, this isn't really async... */
- ret = ceph_sync_read(iocb, to, &retry_op);
+ if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+ ret = ceph_direct_read_write(iocb, to,
+ NULL, NULL);
+ if (ret >= 0 && ret < len)
+ retry_op = CHECK_EOF;
+ } else {
+ ret = ceph_sync_read(iocb, to, &retry_op);
+ }
} else {
retry_op = READ_INLINE;
}
@@ -916,7 +1158,7 @@ again:
pinned_page = NULL;
}
ceph_put_cap_refs(ci, got);
- if (retry_op && ret >= 0) {
+ if (retry_op > HAVE_RETRIED && ret >= 0) {
int statret;
struct page *page = NULL;
loff_t i_size;
@@ -968,12 +1210,11 @@ again:
if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
ret < len) {
dout("sync_read hit hole, ppos %lld < size %lld"
- ", reading more\n", iocb->ki_pos,
- inode->i_size);
+ ", reading more\n", iocb->ki_pos, i_size);
read += ret;
len -= ret;
- retry_op = 0;
+ retry_op = HAVE_RETRIED;
goto again;
}
}
@@ -1014,7 +1255,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (!prealloc_cf)
return -ENOMEM;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
@@ -1052,7 +1293,7 @@ retry_snap:
}
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
- inode, ceph_vinop(inode), pos, count, inode->i_size);
+ inode, ceph_vinop(inode), pos, count, i_size_read(inode));
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
else
@@ -1070,7 +1311,7 @@ retry_snap:
(iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
struct ceph_snap_context *snapc;
struct iov_iter data;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
spin_lock(&ci->i_ceph_lock);
if (__ceph_have_pending_cap_snap(ci)) {
@@ -1088,8 +1329,8 @@ retry_snap:
/* we might need to revert back to that point */
data = *from;
if (iocb->ki_flags & IOCB_DIRECT)
- written = ceph_sync_direct_write(iocb, &data, pos,
- snapc);
+ written = ceph_direct_read_write(iocb, &data, snapc,
+ &prealloc_cf);
else
written = ceph_sync_write(iocb, &data, pos, snapc);
if (written == -EOLDSNAPC) {
@@ -1097,14 +1338,14 @@ retry_snap:
"got EOLDSNAPC, retrying\n",
inode, ceph_vinop(inode),
pos, (unsigned)count);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
goto retry_snap;
}
if (written > 0)
iov_iter_advance(from, written);
ceph_put_snap_context(snapc);
} else {
- loff_t old_size = inode->i_size;
+ loff_t old_size = i_size_read(inode);
/*
* No need to acquire the i_truncate_mutex. Because
* the MDS revokes Fwb caps before sending truncate
@@ -1115,9 +1356,9 @@ retry_snap:
written = generic_perform_write(file, from, pos);
if (likely(written >= 0))
iocb->ki_pos = pos + written;
- if (inode->i_size > old_size)
+ if (i_size_read(inode) > old_size)
ceph_fscache_update_objectsize(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
if (written >= 0) {
@@ -1147,7 +1388,7 @@ retry_snap:
goto out_unlocked;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out_unlocked:
ceph_free_cap_flush(prealloc_cf);
current->backing_dev_info = NULL;
@@ -1160,9 +1401,10 @@ out_unlocked:
static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
+ loff_t i_size;
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
@@ -1172,9 +1414,10 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
}
}
+ i_size = i_size_read(inode);
switch (whence) {
case SEEK_END:
- offset += inode->i_size;
+ offset += i_size;
break;
case SEEK_CUR:
/*
@@ -1190,24 +1433,24 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
offset += file->f_pos;
break;
case SEEK_DATA:
- if (offset >= inode->i_size) {
+ if (offset >= i_size) {
ret = -ENXIO;
goto out;
}
break;
case SEEK_HOLE:
- if (offset >= inode->i_size) {
+ if (offset >= i_size) {
ret = -ENXIO;
goto out;
}
- offset = inode->i_size;
+ offset = i_size;
break;
}
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
@@ -1363,7 +1606,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (!prealloc_cf)
return -ENOMEM;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (ceph_snap(inode) != CEPH_NOSNAP) {
ret = -EROFS;
@@ -1418,7 +1661,7 @@ static long ceph_fallocate(struct file *file, int mode,
ceph_put_cap_refs(ci, got);
unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ceph_free_cap_flush(prealloc_cf);
return ret;
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index da55eb8bcffa..fb4ba2e4e2a5 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -548,7 +548,7 @@ int ceph_fill_file_size(struct inode *inode, int issued,
if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
(truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
dout("size %lld -> %llu\n", inode->i_size, size);
- inode->i_size = size;
+ i_size_write(inode, size);
inode->i_blocks = (size + (1<<9) - 1) >> 9;
ci->i_reported_size = size;
if (truncate_seq != ci->i_truncate_seq) {
@@ -808,7 +808,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
spin_unlock(&ci->i_ceph_lock);
err = -EINVAL;
- if (WARN_ON(symlen != inode->i_size))
+ if (WARN_ON(symlen != i_size_read(inode)))
goto out;
err = -ENOMEM;
@@ -1549,7 +1549,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
spin_lock(&ci->i_ceph_lock);
dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
- inode->i_size = size;
+ i_size_write(inode, size);
inode->i_blocks = (size + (1 << 9) - 1) >> 9;
/* tell the MDS if we are approaching max_size */
@@ -1911,7 +1911,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
inode->i_size, attr->ia_size);
if ((issued & CEPH_CAP_FILE_EXCL) &&
attr->ia_size > inode->i_size) {
- inode->i_size = attr->ia_size;
+ i_size_write(inode, attr->ia_size);
inode->i_blocks =
(attr->ia_size + (1 << 9) - 1) >> 9;
inode->i_ctime = attr->ia_ctime;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 7febcf2475c5..50b268483302 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -50,7 +50,7 @@ void cifs_vfs_err(const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- pr_err("CIFS VFS: %pV", &vaf);
+ pr_err_ratelimited("CIFS VFS: %pV", &vaf);
va_end(args);
}
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index f40fbaca1b2a..66cf0f9fff89 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -51,14 +51,13 @@ __printf(1, 2) void cifs_vfs_err(const char *fmt, ...);
/* information message: e.g., configuration, major event */
#define cifs_dbg(type, fmt, ...) \
do { \
- if (type == FYI) { \
- if (cifsFYI & CIFS_INFO) { \
- pr_debug("%s: " fmt, __FILE__, ##__VA_ARGS__); \
- } \
+ if (type == FYI && cifsFYI & CIFS_INFO) { \
+ pr_debug_ratelimited("%s: " \
+ fmt, __FILE__, ##__VA_ARGS__); \
} else if (type == VFS) { \
cifs_vfs_err(fmt, ##__VA_ARGS__); \
} else if (type == NOISY && type != 0) { \
- pr_debug(fmt, ##__VA_ARGS__); \
+ pr_debug_ratelimited(fmt, ##__VA_ARGS__); \
} \
} while (0)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c4c1169814b2..c48ca13673e3 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -507,6 +507,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_printf(s, ",rsize=%u", cifs_sb->rsize);
seq_printf(s, ",wsize=%u", cifs_sb->wsize);
+ seq_printf(s, ",echo_interval=%lu",
+ tcon->ses->server->echo_interval / HZ);
/* convert actimeo and display it in seconds */
seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
@@ -640,9 +642,9 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
while (*s && *s != sep)
s++;
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
child = lookup_one_len(p, dentry, s - p);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(dentry);
dentry = child;
} while (!IS_ERR(dentry));
@@ -752,6 +754,9 @@ cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
ssize_t rc;
struct inode *inode = file_inode(iocb->ki_filp);
+ if (iocb->ki_filp->f_flags & O_DIRECT)
+ return cifs_user_readv(iocb, iter);
+
rc = cifs_revalidate_mapping(inode);
if (rc)
return rc;
@@ -766,6 +771,18 @@ static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t written;
int rc;
+ if (iocb->ki_filp->f_flags & O_DIRECT) {
+ written = cifs_user_writev(iocb, from);
+ if (written > 0 && CIFS_CACHE_READ(cinode)) {
+ cifs_zap_mapping(inode);
+ cifs_dbg(FYI,
+ "Set no oplock for inode=%p after a write operation\n",
+ inode);
+ cinode->oplock = 0;
+ }
+ return written;
+ }
+
written = cifs_get_writer(cinode);
if (written)
return written;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 2b510c537a0d..a25b2513f146 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -70,8 +70,10 @@
#define SERVER_NAME_LENGTH 40
#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1)
-/* SMB echo "timeout" -- FIXME: tunable? */
-#define SMB_ECHO_INTERVAL (60 * HZ)
+/* echo interval in seconds */
+#define SMB_ECHO_INTERVAL_MIN 1
+#define SMB_ECHO_INTERVAL_MAX 600
+#define SMB_ECHO_INTERVAL_DEFAULT 60
#include "cifspdu.h"
@@ -225,7 +227,7 @@ struct smb_version_operations {
void (*print_stats)(struct seq_file *m, struct cifs_tcon *);
void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *);
/* verify the message */
- int (*check_message)(char *, unsigned int);
+ int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
void (*downgrade_oplock)(struct TCP_Server_Info *,
struct cifsInodeInfo *, bool);
@@ -507,6 +509,7 @@ struct smb_vol {
struct sockaddr_storage dstaddr; /* destination address */
struct sockaddr_storage srcaddr; /* allow binding to a local IP */
struct nls_table *local_nls;
+ unsigned int echo_interval; /* echo interval in secs */
};
#define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \
@@ -627,7 +630,9 @@ struct TCP_Server_Info {
#ifdef CONFIG_CIFS_SMB2
unsigned int max_read;
unsigned int max_write;
+ __u8 preauth_hash[512];
#endif /* CONFIG_CIFS_SMB2 */
+ unsigned long echo_interval;
};
static inline unsigned int
@@ -809,7 +814,10 @@ struct cifs_ses {
bool need_reconnect:1; /* connection reset, uid now invalid */
#ifdef CONFIG_CIFS_SMB2
__u16 session_flags;
- char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
+ __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
+ __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
+ __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
+ __u8 preauth_hash[512];
#endif /* CONFIG_CIFS_SMB2 */
};
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c63fd1dde25b..eed7ff50faf0 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -102,7 +102,7 @@ extern int SendReceiveBlockingLock(const unsigned int xid,
struct smb_hdr *out_buf,
int *bytes_returned);
extern int cifs_reconnect(struct TCP_Server_Info *server);
-extern int checkSMB(char *buf, unsigned int length);
+extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr);
extern bool is_valid_oplock_break(char *, struct TCP_Server_Info *);
extern bool backup_cred(struct cifs_sb_info *);
extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
@@ -439,7 +439,8 @@ extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
extern int calc_seckey(struct cifs_ses *);
-extern int generate_smb3signingkey(struct cifs_ses *);
+extern int generate_smb30signingkey(struct cifs_ses *);
+extern int generate_smb311signingkey(struct cifs_ses *);
#ifdef CONFIG_CIFS_WEAK_PW_HASH
extern int calc_lanman_hash(const char *password, const char *cryptkey,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ecb0803bdb0e..4fbd92d2e113 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -95,6 +95,7 @@ enum {
Opt_cruid, Opt_gid, Opt_file_mode,
Opt_dirmode, Opt_port,
Opt_rsize, Opt_wsize, Opt_actimeo,
+ Opt_echo_interval,
/* Mount options which take string value */
Opt_user, Opt_pass, Opt_ip,
@@ -188,6 +189,7 @@ static const match_table_t cifs_mount_option_tokens = {
{ Opt_rsize, "rsize=%s" },
{ Opt_wsize, "wsize=%s" },
{ Opt_actimeo, "actimeo=%s" },
+ { Opt_echo_interval, "echo_interval=%s" },
{ Opt_blank_user, "user=" },
{ Opt_blank_user, "username=" },
@@ -368,7 +370,6 @@ cifs_reconnect(struct TCP_Server_Info *server)
server->session_key.response = NULL;
server->session_key.len = 0;
server->lstrp = jiffies;
- mutex_unlock(&server->srv_mutex);
/* mark submitted MIDs for retry and issue callback */
INIT_LIST_HEAD(&retry_list);
@@ -381,6 +382,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
list_move(&mid_entry->qhead, &retry_list);
}
spin_unlock(&GlobalMid_Lock);
+ mutex_unlock(&server->srv_mutex);
cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
list_for_each_safe(tmp, tmp2, &retry_list) {
@@ -418,6 +420,7 @@ cifs_echo_request(struct work_struct *work)
int rc;
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, echo.work);
+ unsigned long echo_interval = server->echo_interval;
/*
* We cannot send an echo if it is disabled or until the
@@ -427,7 +430,7 @@ cifs_echo_request(struct work_struct *work)
*/
if (!server->ops->need_neg || server->ops->need_neg(server) ||
(server->ops->can_echo && !server->ops->can_echo(server)) ||
- time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
+ time_before(jiffies, server->lstrp + echo_interval - HZ))
goto requeue_echo;
rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
@@ -436,7 +439,7 @@ cifs_echo_request(struct work_struct *work)
server->hostname);
requeue_echo:
- queue_delayed_work(cifsiod_wq, &server->echo, SMB_ECHO_INTERVAL);
+ queue_delayed_work(cifsiod_wq, &server->echo, echo_interval);
}
static bool
@@ -487,9 +490,9 @@ server_unresponsive(struct TCP_Server_Info *server)
* a response in >60s.
*/
if (server->tcpStatus == CifsGood &&
- time_after(jiffies, server->lstrp + 2 * SMB_ECHO_INTERVAL)) {
- cifs_dbg(VFS, "Server %s has not responded in %d seconds. Reconnecting...\n",
- server->hostname, (2 * SMB_ECHO_INTERVAL) / HZ);
+ time_after(jiffies, server->lstrp + 2 * server->echo_interval)) {
+ cifs_dbg(VFS, "Server %s has not responded in %lu seconds. Reconnecting...\n",
+ server->hostname, (2 * server->echo_interval) / HZ);
cifs_reconnect(server);
wake_up(&server->response_q);
return true;
@@ -828,7 +831,7 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
* 48 bytes is enough to display the header and a little bit
* into the payload for debugging purposes.
*/
- length = server->ops->check_message(buf, server->total_read);
+ length = server->ops->check_message(buf, server->total_read, server);
if (length != 0)
cifs_dump_mem("Bad SMB: ", buf,
min_t(unsigned int, server->total_read, 48));
@@ -1624,6 +1627,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
goto cifs_parse_mount_err;
}
break;
+ case Opt_echo_interval:
+ if (get_option_ul(args, &option)) {
+ cifs_dbg(VFS, "%s: Invalid echo interval value\n",
+ __func__);
+ goto cifs_parse_mount_err;
+ }
+ vol->echo_interval = option;
+ break;
/* String Arguments */
@@ -2089,6 +2100,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
if (!match_security(server, vol))
return 0;
+ if (server->echo_interval != vol->echo_interval)
+ return 0;
+
return 1;
}
@@ -2208,6 +2222,12 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
tcp_ses->tcpStatus = CifsNew;
++tcp_ses->srv_count;
+ if (volume_info->echo_interval >= SMB_ECHO_INTERVAL_MIN &&
+ volume_info->echo_interval <= SMB_ECHO_INTERVAL_MAX)
+ tcp_ses->echo_interval = volume_info->echo_interval * HZ;
+ else
+ tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ;
+
rc = ip_connect(tcp_ses);
if (rc < 0) {
cifs_dbg(VFS, "Error connecting to socket. Aborting operation.\n");
@@ -2237,7 +2257,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
cifs_fscache_get_client_cookie(tcp_ses);
/* queue echo request delayed work */
- queue_delayed_work(cifsiod_wq, &tcp_ses->echo, SMB_ECHO_INTERVAL);
+ queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
return tcp_ses;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0a2752b79e72..ff882aeaccc6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2267,7 +2267,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (rc)
return rc;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
xid = get_xid();
@@ -2292,7 +2292,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
}
free_xid(xid);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
@@ -2309,7 +2309,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (rc)
return rc;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
xid = get_xid();
@@ -2326,7 +2326,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
free_xid(xid);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
@@ -2672,7 +2672,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
* with a brlock that prevents writing.
*/
down_read(&cinode->lock_sem);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
rc = generic_write_checks(iocb, from);
if (rc <= 0)
@@ -2685,7 +2685,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from)
else
rc = -EACCES;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (rc > 0) {
ssize_t err = generic_write_sync(file, iocb->ki_pos - rc, rc);
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a329f5ba35aa..aeb26dbfa1bf 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -814,8 +814,21 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
}
} else
fattr.cf_uniqueid = iunique(sb, ROOT_I);
- } else
- fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+ } else {
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+ validinum == false && server->ops->get_srv_inum) {
+ /*
+ * Pass a NULL tcon to ensure we don't make a round
+ * trip to the server. This only works for SMB2+.
+ */
+ tmprc = server->ops->get_srv_inum(xid,
+ NULL, cifs_sb, full_path,
+ &fattr.cf_uniqueid, data);
+ if (tmprc)
+ fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+ } else
+ fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid;
+ }
/* query for SFU type info if supported and needed */
if (fattr.cf_cifsattrs & ATTR_SYSTEM &&
@@ -856,6 +869,13 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
} else {
/* we already have inode, update it */
+ /* if uniqueid is different, return error */
+ if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
+ CIFS_I(*inode)->uniqueid != fattr.cf_uniqueid)) {
+ rc = -ESTALE;
+ goto cgii_exit;
+ }
+
/* if filetype is different, return error */
if (unlikely(((*inode)->i_mode & S_IFMT) !=
(fattr.cf_mode & S_IFMT))) {
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 8442b8b8e0be..813fe13c2ae1 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -310,7 +310,7 @@ check_smb_hdr(struct smb_hdr *smb)
}
int
-checkSMB(char *buf, unsigned int total_read)
+checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server)
{
struct smb_hdr *smb = (struct smb_hdr *)buf;
__u32 rfclen = be32_to_cpu(smb->smb_buf_length);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 0557c45e9c33..b30a4a6d98a0 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -847,6 +847,7 @@ int cifs_readdir(struct file *file, struct dir_context *ctx)
* if buggy server returns . and .. late do we want to
* check for that here?
*/
+ *tmp_buf = 0;
rc = cifs_filldir(current_entry, file, ctx,
tmp_buf, max_len);
if (rc) {
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 1c5907019045..389fb9f8c84e 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -38,7 +38,7 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
* Make sure that this really is an SMB, that it is a response,
* and that the message ids match.
*/
- if ((*(__le32 *)hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
+ if ((hdr->ProtocolId == SMB2_PROTO_NUMBER) &&
(mid == wire_mid)) {
if (hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
return 0;
@@ -50,9 +50,9 @@ check_smb2_hdr(struct smb2_hdr *hdr, __u64 mid)
cifs_dbg(VFS, "Received Request not response\n");
}
} else { /* bad signature or mid */
- if (*(__le32 *)hdr->ProtocolId != SMB2_PROTO_NUMBER)
+ if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
cifs_dbg(VFS, "Bad protocol string signature header %x\n",
- *(unsigned int *) hdr->ProtocolId);
+ le32_to_cpu(hdr->ProtocolId));
if (mid != wire_mid)
cifs_dbg(VFS, "Mids do not match: %llu and %llu\n",
mid, wire_mid);
@@ -93,11 +93,11 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
};
int
-smb2_check_message(char *buf, unsigned int length)
+smb2_check_message(char *buf, unsigned int length, struct TCP_Server_Info *srvr)
{
struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
struct smb2_pdu *pdu = (struct smb2_pdu *)hdr;
- __u64 mid = le64_to_cpu(hdr->MessageId);
+ __u64 mid;
__u32 len = get_rfc1002_length(buf);
__u32 clc_len; /* calculated length */
int command;
@@ -111,6 +111,30 @@ smb2_check_message(char *buf, unsigned int length)
* ie Validate the wct via smb2_struct_sizes table above
*/
+ if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+ struct smb2_transform_hdr *thdr =
+ (struct smb2_transform_hdr *)buf;
+ struct cifs_ses *ses = NULL;
+ struct list_head *tmp;
+
+ /* decrypt frame now that it is completely read in */
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(tmp, &srvr->smb_ses_list) {
+ ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+ if (ses->Suid == thdr->SessionId)
+ break;
+
+ ses = NULL;
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+ if (ses == NULL) {
+ cifs_dbg(VFS, "no decryption - session id not found\n");
+ return 1;
+ }
+ }
+
+
+ mid = le64_to_cpu(hdr->MessageId);
if (length < sizeof(struct smb2_pdu)) {
if ((length >= sizeof(struct smb2_hdr)) && (hdr->Status != 0)) {
pdu->StructureSize2 = 0;
@@ -322,7 +346,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
/* return pointer to beginning of data area, ie offset from SMB start */
if ((*off != 0) && (*len != 0))
- return (char *)(&hdr->ProtocolId[0]) + *off;
+ return (char *)(&hdr->ProtocolId) + *off;
else
return NULL;
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 53ccdde6ff18..3525ed756173 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -182,6 +182,11 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
struct smb2_hdr *hdr = (struct smb2_hdr *)buf;
__u64 wire_mid = le64_to_cpu(hdr->MessageId);
+ if (hdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM) {
+ cifs_dbg(VFS, "encrypted frame parsing not supported yet");
+ return NULL;
+ }
+
spin_lock(&GlobalMid_Lock);
list_for_each_entry(mid, &server->pending_mid_q, qhead) {
if ((mid->mid == wire_mid) &&
@@ -1692,7 +1697,7 @@ struct smb_version_operations smb30_operations = {
.get_lease_key = smb2_get_lease_key,
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
- .generate_signingkey = generate_smb3signingkey,
+ .generate_signingkey = generate_smb30signingkey,
.calc_signature = smb3_calc_signature,
.set_integrity = smb3_set_integrity,
.is_read_op = smb21_is_read_op,
@@ -1779,7 +1784,7 @@ struct smb_version_operations smb311_operations = {
.get_lease_key = smb2_get_lease_key,
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
- .generate_signingkey = generate_smb3signingkey,
+ .generate_signingkey = generate_smb311signingkey,
.calc_signature = smb3_calc_signature,
.set_integrity = smb3_set_integrity,
.is_read_op = smb21_is_read_op,
@@ -1838,7 +1843,7 @@ struct smb_version_values smb21_values = {
struct smb_version_values smb30_values = {
.version_string = SMB30_VERSION_STRING,
.protocol_id = SMB30_PROT_ID,
- .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
@@ -1858,7 +1863,7 @@ struct smb_version_values smb30_values = {
struct smb_version_values smb302_values = {
.version_string = SMB302_VERSION_STRING,
.protocol_id = SMB302_PROT_ID,
- .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES,
+ .req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
.large_lock_type = 0,
.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 767555518d40..10f8d5cf5681 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -97,10 +97,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
hdr->smb2_buf_length = cpu_to_be32(parmsize + sizeof(struct smb2_hdr)
- 4 /* RFC 1001 length field itself not counted */);
- hdr->ProtocolId[0] = 0xFE;
- hdr->ProtocolId[1] = 'S';
- hdr->ProtocolId[2] = 'M';
- hdr->ProtocolId[3] = 'B';
+ hdr->ProtocolId = SMB2_PROTO_NUMBER;
hdr->StructureSize = cpu_to_le16(64);
hdr->Command = smb2_cmd;
hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */
@@ -1573,7 +1570,8 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
goto ioctl_exit;
}
- memcpy(*out_data, rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
+ memcpy(*out_data,
+ (char *)&rsp->hdr.ProtocolId + le32_to_cpu(rsp->OutputOffset),
*plen);
ioctl_exit:
free_rsp_buf(resp_buftype, rsp);
@@ -2093,7 +2091,7 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
}
if (*buf) {
- memcpy(*buf, (char *)rsp->hdr.ProtocolId + rsp->DataOffset,
+ memcpy(*buf, (char *)&rsp->hdr.ProtocolId + rsp->DataOffset,
*nbytes);
free_rsp_buf(resp_buftype, iov[0].iov_base);
} else if (resp_buftype != CIFS_NO_BUFFER) {
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 4af52780ec35..ff88d9feb01e 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -86,6 +86,7 @@
#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
+#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
/*
* SMB2 Header Definition
@@ -102,7 +103,7 @@ struct smb2_hdr {
__be32 smb2_buf_length; /* big endian on wire */
/* length is only two or three bytes - with
one or two byte type preceding it that MBZ */
- __u8 ProtocolId[4]; /* 0xFE 'S' 'M' 'B' */
+ __le32 ProtocolId; /* 0xFE 'S' 'M' 'B' */
__le16 StructureSize; /* 64 */
__le16 CreditCharge; /* MBZ */
__le32 Status; /* Error from server */
@@ -128,11 +129,10 @@ struct smb2_transform_hdr {
one or two byte type preceding it that MBZ */
__u8 ProtocolId[4]; /* 0xFD 'S' 'M' 'B' */
__u8 Signature[16];
- __u8 Nonce[11];
- __u8 Reserved[5];
+ __u8 Nonce[16];
__le32 OriginalMessageSize;
__u16 Reserved1;
- __le16 EncryptionAlgorithm;
+ __le16 Flags; /* EncryptionAlgorithm */
__u64 SessionId;
} __packed;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 79dc650c18b2..4f07dc93608d 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -34,7 +34,8 @@ struct smb_rqst;
*****************************************************************
*/
extern int map_smb2_to_linux_error(char *buf, bool log_err);
-extern int smb2_check_message(char *buf, unsigned int length);
+extern int smb2_check_message(char *buf, unsigned int length,
+ struct TCP_Server_Info *server);
extern unsigned int smb2_calc_size(void *buf);
extern char *smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr);
extern __le16 *cifs_convert_path_to_utf16(const char *from,
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index d4c5b6f109a7..8732a43b1008 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -222,8 +222,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
return rc;
}
-int
-generate_smb3signingkey(struct cifs_ses *ses)
+static int generate_key(struct cifs_ses *ses, struct kvec label,
+ struct kvec context, __u8 *key, unsigned int key_size)
{
unsigned char zero = 0x0;
__u8 i[4] = {0, 0, 0, 1};
@@ -233,7 +233,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
unsigned char *hashptr = prfhash;
memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
- memset(ses->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE);
+ memset(key, 0x0, key_size);
rc = smb3_crypto_shash_allocate(ses->server);
if (rc) {
@@ -262,7 +262,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
}
rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
- "SMB2AESCMAC", 12);
+ label.iov_base, label.iov_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with label\n", __func__);
goto smb3signkey_ret;
@@ -276,7 +276,7 @@ generate_smb3signingkey(struct cifs_ses *ses)
}
rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
- "SmbSign", 8);
+ context.iov_base, context.iov_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with context\n", __func__);
goto smb3signkey_ret;
@@ -296,12 +296,102 @@ generate_smb3signingkey(struct cifs_ses *ses)
goto smb3signkey_ret;
}
- memcpy(ses->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE);
+ memcpy(key, hashptr, key_size);
smb3signkey_ret:
return rc;
}
+struct derivation {
+ struct kvec label;
+ struct kvec context;
+};
+
+struct derivation_triplet {
+ struct derivation signing;
+ struct derivation encryption;
+ struct derivation decryption;
+};
+
+static int
+generate_smb3signingkey(struct cifs_ses *ses,
+ const struct derivation_triplet *ptriplet)
+{
+ int rc;
+
+ rc = generate_key(ses, ptriplet->signing.label,
+ ptriplet->signing.context, ses->smb3signingkey,
+ SMB3_SIGN_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ rc = generate_key(ses, ptriplet->encryption.label,
+ ptriplet->encryption.context, ses->smb3encryptionkey,
+ SMB3_SIGN_KEY_SIZE);
+ if (rc)
+ return rc;
+
+ return generate_key(ses, ptriplet->decryption.label,
+ ptriplet->decryption.context,
+ ses->smb3decryptionkey, SMB3_SIGN_KEY_SIZE);
+}
+
+int
+generate_smb30signingkey(struct cifs_ses *ses)
+
+{
+ struct derivation_triplet triplet;
+ struct derivation *d;
+
+ d = &triplet.signing;
+ d->label.iov_base = "SMB2AESCMAC";
+ d->label.iov_len = 12;
+ d->context.iov_base = "SmbSign";
+ d->context.iov_len = 8;
+
+ d = &triplet.encryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerIn ";
+ d->context.iov_len = 10;
+
+ d = &triplet.decryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerOut";
+ d->context.iov_len = 10;
+
+ return generate_smb3signingkey(ses, &triplet);
+}
+
+int
+generate_smb311signingkey(struct cifs_ses *ses)
+
+{
+ struct derivation_triplet triplet;
+ struct derivation *d;
+
+ d = &triplet.signing;
+ d->label.iov_base = "SMB2AESCMAC";
+ d->label.iov_len = 12;
+ d->context.iov_base = "SmbSign";
+ d->context.iov_len = 8;
+
+ d = &triplet.encryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerIn ";
+ d->context.iov_len = 10;
+
+ d = &triplet.decryption;
+ d->label.iov_base = "SMB2AESCCM";
+ d->label.iov_len = 11;
+ d->context.iov_base = "ServerOut";
+ d->context.iov_len = 10;
+
+ return generate_smb3signingkey(ses, &triplet);
+}
+
int
smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
{
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 2a24c524fb9a..87abe8ed074c 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -576,14 +576,16 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
cifs_in_send_dec(server);
cifs_save_when_sent(mid);
- if (rc < 0)
+ if (rc < 0) {
server->sequence_number -= 2;
+ cifs_delete_mid(mid);
+ }
+
mutex_unlock(&server->srv_mutex);
if (rc == 0)
return 0;
- cifs_delete_mid(mid);
add_credits_and_wake_if(server, credits, optype);
return rc;
}
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index f829fe963f5b..5104d84c4f64 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -72,8 +72,7 @@ void coda_sysctl_clean(void);
} while (0)
-#define CODA_FREE(ptr,size) \
- do { if (size < PAGE_SIZE) kfree((ptr)); else vfree((ptr)); } while (0)
+#define CODA_FREE(ptr, size) kvfree((ptr))
/* inode to cnode access functions */
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index fda9f4311212..42e731b8c80a 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -427,13 +427,13 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
if (host_file->f_op->iterate) {
struct inode *host_inode = file_inode(host_file);
- mutex_lock(&host_inode->i_mutex);
+ inode_lock(host_inode);
ret = -ENOENT;
if (!IS_DEADDIR(host_inode)) {
ret = host_file->f_op->iterate(host_file, ctx);
file_accessed(host_file);
}
- mutex_unlock(&host_inode->i_mutex);
+ inode_unlock(host_inode);
return ret;
}
/* Venus: we must read Venus dirents from a file */
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 1da3805f3ddc..f47c7483863b 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -71,12 +71,12 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
host_file = cfi->cfi_container;
file_start_write(host_file);
- mutex_lock(&coda_inode->i_mutex);
+ inode_lock(coda_inode);
ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos);
coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
- mutex_unlock(&coda_inode->i_mutex);
+ inode_unlock(coda_inode);
file_end_write(host_file);
return ret;
}
@@ -203,7 +203,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&coda_inode->i_mutex);
+ inode_lock(coda_inode);
cfi = CODA_FTOC(coda_file);
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
@@ -212,7 +212,7 @@ int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
err = vfs_fsync(host_file, datasync);
if (!err && !datasync)
err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
- mutex_unlock(&coda_inode->i_mutex);
+ inode_unlock(coda_inode);
return err;
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index cab612b2ae76..f419519ec41f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -640,13 +640,13 @@ static void detach_groups(struct config_group *group)
child = sd->s_dentry;
- mutex_lock(&d_inode(child)->i_mutex);
+ inode_lock(d_inode(child));
configfs_detach_group(sd->s_element);
d_inode(child)->i_flags |= S_DEAD;
dont_mount(child);
- mutex_unlock(&d_inode(child)->i_mutex);
+ inode_unlock(d_inode(child));
d_delete(child);
dput(child);
@@ -834,11 +834,11 @@ static int configfs_attach_item(struct config_item *parent_item,
* the VFS may already have hit and used them. Thus,
* we must lock them as rmdir() would.
*/
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
configfs_remove_dir(item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
d_delete(dentry);
}
}
@@ -874,7 +874,7 @@ static int configfs_attach_group(struct config_item *parent_item,
* We must also lock the inode to remove it safely in case of
* error, as rmdir() would.
*/
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
configfs_adjust_dir_dirent_depth_before_populate(sd);
ret = populate_groups(to_config_group(item));
if (ret) {
@@ -883,7 +883,7 @@ static int configfs_attach_group(struct config_item *parent_item,
dont_mount(dentry);
}
configfs_adjust_dir_dirent_depth_after_populate(sd);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
if (ret)
d_delete(dentry);
}
@@ -1135,7 +1135,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
* subsystem is really registered, and so we need to lock out
* configfs_[un]register_subsystem().
*/
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
subsys_sd = configfs_find_subsys_dentry(root->d_fsdata, s_item);
if (!subsys_sd) {
@@ -1147,7 +1147,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
ret = configfs_do_depend_item(subsys_sd->s_dentry, target);
out_unlock_fs:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
/*
* If we succeeded, the fs is pinned via other methods. If not,
@@ -1230,7 +1230,7 @@ int configfs_depend_item_unlocked(struct configfs_subsystem *caller_subsys,
* additional locking to prevent other subsystem from being
* unregistered
*/
- mutex_lock(&d_inode(root->cg_item.ci_dentry)->i_mutex);
+ inode_lock(d_inode(root->cg_item.ci_dentry));
/*
* As we are trying to depend item from other subsystem
@@ -1254,7 +1254,7 @@ out_root_unlock:
* We were called from subsystem other than our target so we
* took some locks so now it's time to release them
*/
- mutex_unlock(&d_inode(root->cg_item.ci_dentry)->i_mutex);
+ inode_unlock(d_inode(root->cg_item.ci_dentry));
return ret;
}
@@ -1561,7 +1561,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
down_write(&configfs_rename_sem);
parent = item->parent->dentry;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
if (!IS_ERR(new_dentry)) {
@@ -1577,7 +1577,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
error = -EEXIST;
dput(new_dentry);
}
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
up_write(&configfs_rename_sem);
return error;
@@ -1590,7 +1590,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
struct configfs_dirent * parent_sd = dentry->d_fsdata;
int err;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
/*
* Fake invisibility if dir belongs to a group/default groups hierarchy
* being attached
@@ -1603,7 +1603,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
else
err = 0;
}
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return err;
}
@@ -1613,11 +1613,11 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
struct dentry * dentry = file->f_path.dentry;
struct configfs_dirent * cursor = file->private_data;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
spin_lock(&configfs_dirent_lock);
list_del_init(&cursor->s_sibling);
spin_unlock(&configfs_dirent_lock);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
release_configfs_dirent(cursor);
@@ -1698,7 +1698,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry * dentry = file->f_path.dentry;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
switch (whence) {
case 1:
offset += file->f_pos;
@@ -1706,7 +1706,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
if (offset >= 0)
break;
default:
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return -EINVAL;
}
if (offset != file->f_pos) {
@@ -1732,7 +1732,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
spin_unlock(&configfs_dirent_lock);
}
}
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return offset;
}
@@ -1767,14 +1767,14 @@ int configfs_register_group(struct config_group *parent_group,
parent = parent_group->cg_item.ci_dentry;
- mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
ret = create_default_group(parent_group, group);
if (!ret) {
spin_lock(&configfs_dirent_lock);
configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata);
spin_unlock(&configfs_dirent_lock);
}
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
return ret;
}
EXPORT_SYMBOL(configfs_register_group);
@@ -1791,7 +1791,7 @@ void configfs_unregister_group(struct config_group *group)
struct dentry *dentry = group->cg_item.ci_dentry;
struct dentry *parent = group->cg_item.ci_parent->ci_dentry;
- mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
spin_lock(&configfs_dirent_lock);
configfs_detach_prep(dentry, NULL);
spin_unlock(&configfs_dirent_lock);
@@ -1800,7 +1800,7 @@ void configfs_unregister_group(struct config_group *group)
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
d_delete(dentry);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
dput(dentry);
@@ -1872,7 +1872,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
sd = root->d_fsdata;
link_group(to_config_group(sd->s_element), group);
- mutex_lock_nested(&d_inode(root)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(root), I_MUTEX_PARENT);
err = -ENOMEM;
dentry = d_alloc_name(root, group->cg_item.ci_name);
@@ -1892,7 +1892,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
}
}
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
if (err) {
unlink_group(group);
@@ -1913,9 +1913,9 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
return;
}
- mutex_lock_nested(&d_inode(root)->i_mutex,
+ inode_lock_nested(d_inode(root),
I_MUTEX_PARENT);
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
mutex_lock(&configfs_symlink_mutex);
spin_lock(&configfs_dirent_lock);
if (configfs_detach_prep(dentry, NULL)) {
@@ -1926,11 +1926,11 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
configfs_detach_group(&group->cg_item);
d_inode(dentry)->i_flags |= S_DEAD;
dont_mount(dentry);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
d_delete(dentry);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
dput(dentry);
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 3687187c8ea5..33b7ee34eda5 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -540,10 +540,10 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib
umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
int error = 0;
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_NORMAL);
+ inode_lock_nested(d_inode(dir), I_MUTEX_NORMAL);
error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode,
CONFIGFS_ITEM_ATTR);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
return error;
}
@@ -562,10 +562,10 @@ int configfs_create_bin_file(struct config_item *item,
umode_t mode = (bin_attr->cb_attr.ca_mode & S_IALLUGO) | S_IFREG;
int error = 0;
- mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL);
+ inode_lock_nested(dir->d_inode, I_MUTEX_NORMAL);
error = configfs_make_dirent(parent_sd, NULL, (void *) bin_attr, mode,
CONFIGFS_ITEM_BIN_ATTR);
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
return error;
}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 0cc810e9dccc..cee087d8f7e0 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -255,7 +255,7 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
/* no inode means this hasn't been made visible yet */
return;
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
if (!sd->s_element)
continue;
@@ -268,5 +268,5 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
break;
}
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
}
diff --git a/fs/dax.c b/fs/dax.c
index 7af879759064..4fd6b0c5c6b5 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -24,6 +24,7 @@
#include <linux/memcontrol.h>
#include <linux/mm.h>
#include <linux/mutex.h>
+#include <linux/pagevec.h>
#include <linux/pmem.h>
#include <linux/sched.h>
#include <linux/uio.h>
@@ -245,13 +246,14 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
loff_t end = pos + iov_iter_count(iter);
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) {
struct address_space *mapping = inode->i_mapping;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = filemap_write_and_wait_range(mapping, pos, end - 1);
if (retval) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
goto out;
}
}
@@ -263,7 +265,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
retval = dax_io(inode, iter, pos, end, get_block, &bh);
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if ((retval > 0) && end_io)
end_io(iocb, pos, retval, bh.b_private);
@@ -324,6 +326,199 @@ static int copy_user_bh(struct page *to, struct inode *inode,
return 0;
}
+#define NO_SECTOR -1
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+
+static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
+ sector_t sector, bool pmd_entry, bool dirty)
+{
+ struct radix_tree_root *page_tree = &mapping->page_tree;
+ pgoff_t pmd_index = DAX_PMD_INDEX(index);
+ int type, error = 0;
+ void *entry;
+
+ WARN_ON_ONCE(pmd_entry && !dirty);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ spin_lock_irq(&mapping->tree_lock);
+
+ entry = radix_tree_lookup(page_tree, pmd_index);
+ if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD) {
+ index = pmd_index;
+ goto dirty;
+ }
+
+ entry = radix_tree_lookup(page_tree, index);
+ if (entry) {
+ type = RADIX_DAX_TYPE(entry);
+ if (WARN_ON_ONCE(type != RADIX_DAX_PTE &&
+ type != RADIX_DAX_PMD)) {
+ error = -EIO;
+ goto unlock;
+ }
+
+ if (!pmd_entry || type == RADIX_DAX_PMD)
+ goto dirty;
+
+ /*
+ * We only insert dirty PMD entries into the radix tree. This
+ * means we don't need to worry about removing a dirty PTE
+ * entry and inserting a clean PMD entry, thus reducing the
+ * range we would flush with a follow-up fsync/msync call.
+ */
+ radix_tree_delete(&mapping->page_tree, index);
+ mapping->nrexceptional--;
+ }
+
+ if (sector == NO_SECTOR) {
+ /*
+ * This can happen during correct operation if our pfn_mkwrite
+ * fault raced against a hole punch operation. If this
+ * happens the pte that was hole punched will have been
+ * unmapped and the radix tree entry will have been removed by
+ * the time we are called, but the call will still happen. We
+ * will return all the way up to wp_pfn_shared(), where the
+ * pte_same() check will fail, eventually causing page fault
+ * to be retried by the CPU.
+ */
+ goto unlock;
+ }
+
+ error = radix_tree_insert(page_tree, index,
+ RADIX_DAX_ENTRY(sector, pmd_entry));
+ if (error)
+ goto unlock;
+
+ mapping->nrexceptional++;
+ dirty:
+ if (dirty)
+ radix_tree_tag_set(page_tree, index, PAGECACHE_TAG_DIRTY);
+ unlock:
+ spin_unlock_irq(&mapping->tree_lock);
+ return error;
+}
+
+static int dax_writeback_one(struct block_device *bdev,
+ struct address_space *mapping, pgoff_t index, void *entry)
+{
+ struct radix_tree_root *page_tree = &mapping->page_tree;
+ int type = RADIX_DAX_TYPE(entry);
+ struct radix_tree_node *node;
+ struct blk_dax_ctl dax;
+ void **slot;
+ int ret = 0;
+
+ spin_lock_irq(&mapping->tree_lock);
+ /*
+ * Regular page slots are stabilized by the page lock even
+ * without the tree itself locked. These unlocked entries
+ * need verification under the tree lock.
+ */
+ if (!__radix_tree_lookup(page_tree, index, &node, &slot))
+ goto unlock;
+ if (*slot != entry)
+ goto unlock;
+
+ /* another fsync thread may have already written back this entry */
+ if (!radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
+ goto unlock;
+
+ if (WARN_ON_ONCE(type != RADIX_DAX_PTE && type != RADIX_DAX_PMD)) {
+ ret = -EIO;
+ goto unlock;
+ }
+
+ dax.sector = RADIX_DAX_SECTOR(entry);
+ dax.size = (type == RADIX_DAX_PMD ? PMD_SIZE : PAGE_SIZE);
+ spin_unlock_irq(&mapping->tree_lock);
+
+ /*
+ * We cannot hold tree_lock while calling dax_map_atomic() because it
+ * eventually calls cond_resched().
+ */
+ ret = dax_map_atomic(bdev, &dax);
+ if (ret < 0)
+ return ret;
+
+ if (WARN_ON_ONCE(ret < dax.size)) {
+ ret = -EIO;
+ goto unmap;
+ }
+
+ wb_cache_pmem(dax.addr, dax.size);
+
+ spin_lock_irq(&mapping->tree_lock);
+ radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_TOWRITE);
+ spin_unlock_irq(&mapping->tree_lock);
+ unmap:
+ dax_unmap_atomic(bdev, &dax);
+ return ret;
+
+ unlock:
+ spin_unlock_irq(&mapping->tree_lock);
+ return ret;
+}
+
+/*
+ * Flush the mapping to the persistent domain within the byte range of [start,
+ * end]. This is required by data integrity operations to ensure file data is
+ * on persistent storage prior to completion of the operation.
+ */
+int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+ loff_t end)
+{
+ struct inode *inode = mapping->host;
+ struct block_device *bdev = inode->i_sb->s_bdev;
+ pgoff_t start_index, end_index, pmd_index;
+ pgoff_t indices[PAGEVEC_SIZE];
+ struct pagevec pvec;
+ bool done = false;
+ int i, ret = 0;
+ void *entry;
+
+ if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
+ return -EIO;
+
+ start_index = start >> PAGE_CACHE_SHIFT;
+ end_index = end >> PAGE_CACHE_SHIFT;
+ pmd_index = DAX_PMD_INDEX(start_index);
+
+ rcu_read_lock();
+ entry = radix_tree_lookup(&mapping->page_tree, pmd_index);
+ rcu_read_unlock();
+
+ /* see if the start of our range is covered by a PMD entry */
+ if (entry && RADIX_DAX_TYPE(entry) == RADIX_DAX_PMD)
+ start_index = pmd_index;
+
+ tag_pages_for_writeback(mapping, start_index, end_index);
+
+ pagevec_init(&pvec, 0);
+ while (!done) {
+ pvec.nr = find_get_entries_tag(mapping, start_index,
+ PAGECACHE_TAG_TOWRITE, PAGEVEC_SIZE,
+ pvec.pages, indices);
+
+ if (pvec.nr == 0)
+ break;
+
+ for (i = 0; i < pvec.nr; i++) {
+ if (indices[i] > end_index) {
+ done = true;
+ break;
+ }
+
+ ret = dax_writeback_one(bdev, mapping, indices[i],
+ pvec.pages[i]);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ wmb_pmem();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
+
static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
@@ -363,6 +558,11 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
}
dax_unmap_atomic(bdev, &dax);
+ error = dax_radix_entry(mapping, vmf->pgoff, dax.sector, false,
+ vmf->flags & FAULT_FLAG_WRITE);
+ if (error)
+ goto out;
+
error = vm_insert_mixed(vma, vaddr, dax.pfn);
out:
@@ -408,6 +608,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
memset(&bh, 0, sizeof(bh));
block = (sector_t)vmf->pgoff << (PAGE_SHIFT - blkbits);
+ bh.b_bdev = inode->i_sb->s_bdev;
bh.b_size = PAGE_SIZE;
repeat:
@@ -487,6 +688,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
delete_from_page_cache(page);
unlock_page(page);
page_cache_release(page);
+ page = NULL;
}
/*
@@ -590,7 +792,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
struct block_device *bdev;
pgoff_t size, pgoff;
sector_t block;
- int result = 0;
+ int error, result = 0;
+ bool alloc = false;
/* dax pmd mappings require pfn_t_devmap() */
if (!IS_ENABLED(CONFIG_FS_DAX_PMD))
@@ -624,13 +827,21 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
block = (sector_t)pgoff << (PAGE_SHIFT - blkbits);
bh.b_size = PMD_SIZE;
- if (get_block(inode, block, &bh, write) != 0)
+
+ if (get_block(inode, block, &bh, 0) != 0)
return VM_FAULT_SIGBUS;
+
+ if (!buffer_mapped(&bh) && write) {
+ if (get_block(inode, block, &bh, 1) != 0)
+ return VM_FAULT_SIGBUS;
+ alloc = true;
+ }
+
bdev = bh.b_bdev;
- i_mmap_lock_read(mapping);
/*
* If the filesystem isn't willing to tell us the length of a hole,
@@ -639,19 +850,22 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
*/
if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) {
dax_pmd_dbg(&bh, address, "allocated block too small");
- goto fallback;
+ return VM_FAULT_FALLBACK;
}
/*
* If we allocated new storage, make sure no process has any
* zero pages covering this hole
*/
- if (buffer_new(&bh)) {
- i_mmap_unlock_read(mapping);
- unmap_mapping_range(mapping, pgoff << PAGE_SHIFT, PMD_SIZE, 0);
- i_mmap_lock_read(mapping);
+ if (alloc) {
+ loff_t lstart = pgoff << PAGE_SHIFT;
+ loff_t lend = lstart + PMD_SIZE - 1; /* inclusive */
+
+ truncate_pagecache_range(inode, lstart, lend);
}
+ i_mmap_lock_read(mapping);
+
/*
* If a truncate happened while we were allocating blocks, we may
* leave blocks allocated to the file that are beyond EOF. We can't
@@ -664,7 +878,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
goto out;
}
if ((pgoff | PG_PMD_COLOUR) >= size) {
- dax_pmd_dbg(&bh, address, "pgoff unaligned");
+ dax_pmd_dbg(&bh, address,
+ "offset + huge page size > file size");
goto fallback;
}
@@ -732,6 +947,31 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
dax_unmap_atomic(bdev, &dax);
+ /*
+ * For PTE faults we insert a radix tree entry for reads, and
+ * leave it clean. Then on the first write we dirty the radix
+ * tree entry via the dax_pfn_mkwrite() path. This sequence
+ * allows the dax_pfn_mkwrite() call to be simpler and avoid a
+ * call into get_block() to translate the pgoff to a sector in
+ * order to be able to create a new radix tree entry.
+ *
+ * The PMD path doesn't have an equivalent to
+ * dax_pfn_mkwrite(), though, so for a read followed by a
+ * write we traverse all the way through __dax_pmd_fault()
+ * twice. This means we can just skip inserting a radix tree
+ * entry completely on the initial read and just wait until
+ * the write to insert a dirty entry.
+ */
+ if (write) {
+ error = dax_radix_entry(mapping, pgoff, dax.sector,
+ true, true);
+ if (error) {
+ dax_pmd_dbg(&bh, address,
+ "PMD radix insertion failed");
+ goto fallback;
+ }
+ }
+
dev_dbg(part_to_dev(bdev->bd_part),
"%s: %s addr: %lx pfn: %lx sect: %llx\n",
__func__, current->comm, address,
@@ -790,15 +1030,20 @@ EXPORT_SYMBOL_GPL(dax_pmd_fault);
* dax_pfn_mkwrite - handle first write to DAX page
* @vma: The virtual memory area where the fault occurred
* @vmf: The description of the fault
- *
*/
int dax_pfn_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+ struct file *file = vma->vm_file;
- sb_start_pagefault(sb);
- file_update_time(vma->vm_file);
- sb_end_pagefault(sb);
+ /*
+ * We pass NO_SECTOR to dax_radix_entry() because we expect that a
+ * RADIX_DAX_PTE entry already exists in the radix tree from a
+ * previous call to __dax_fault(). We just want to look up that PTE
+ * entry using vmf->pgoff and make sure the dirty tag is set. This
+ * saves us from having to make a call to get_block() here to look
+ * up the sector.
+ */
+ dax_radix_entry(file->f_mapping, vmf->pgoff, NO_SECTOR, false, true);
return VM_FAULT_NOPAGE;
}
EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
@@ -835,6 +1080,7 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
BUG_ON((offset + length) > PAGE_CACHE_SIZE);
memset(&bh, 0, sizeof(bh));
+ bh.b_bdev = inode->i_sb->s_bdev;
bh.b_size = PAGE_CACHE_SIZE;
err = get_block(inode, index, &bh, 0);
if (err < 0)
diff --git a/fs/dcache.c b/fs/dcache.c
index b4539e84e577..92d5140de851 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2462,7 +2462,7 @@ EXPORT_SYMBOL(d_rehash);
*/
void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
{
- BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
+ BUG_ON(!inode_is_locked(dentry->d_parent->d_inode));
BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
spin_lock(&dentry->d_lock);
@@ -2738,7 +2738,7 @@ static int __d_unalias(struct inode *inode,
if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
goto out_err;
m1 = &dentry->d_sb->s_vfs_rename_mutex;
- if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
+ if (!inode_trylock(alias->d_parent->d_inode))
goto out_err;
m2 = &alias->d_parent->d_inode->i_mutex;
out_unalias:
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b7fcc0de0b2f..bece948b363d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -265,7 +265,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (!parent)
parent = debugfs_mount->mnt_root;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
dput(dentry);
@@ -273,7 +273,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
}
if (IS_ERR(dentry)) {
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
@@ -282,7 +282,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
static struct dentry *failed_creating(struct dentry *dentry)
{
- mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+ inode_unlock(d_inode(dentry->d_parent));
dput(dentry);
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
return NULL;
@@ -290,7 +290,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
static struct dentry *end_creating(struct dentry *dentry)
{
- mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
+ inode_unlock(d_inode(dentry->d_parent));
return dentry;
}
@@ -560,9 +560,9 @@ void debugfs_remove(struct dentry *dentry)
if (!parent || d_really_is_negative(parent))
return;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
ret = __debugfs_remove(dentry, parent);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
if (!ret)
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
@@ -594,7 +594,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
parent = dentry;
down:
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
loop:
/*
* The parent->d_subdirs is protected by the d_lock. Outside that
@@ -609,7 +609,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
/* perhaps simple_empty(child) makes more sense */
if (!list_empty(&child->d_subdirs)) {
spin_unlock(&parent->d_lock);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
parent = child;
goto down;
}
@@ -630,10 +630,10 @@ void debugfs_remove_recursive(struct dentry *dentry)
}
spin_unlock(&parent->d_lock);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
child = parent;
parent = parent->d_parent;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
if (child != dentry)
/* go up */
@@ -641,7 +641,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
if (!__debugfs_remove(child, parent))
simple_release_fs(&debugfs_mount, &debugfs_mount_count);
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
}
EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c35ffdc12bba..1f107fd51328 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -255,7 +255,7 @@ static int mknod_ptmx(struct super_block *sb)
if (!uid_valid(root_uid) || !gid_valid(root_gid))
return -EINVAL;
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
/* If we have already created ptmx node, return */
if (fsi->ptmx_dentry) {
@@ -292,7 +292,7 @@ static int mknod_ptmx(struct super_block *sb)
fsi->ptmx_dentry = dentry;
rc = 0;
out:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return rc;
}
@@ -615,7 +615,7 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
sprintf(s, "%d", index);
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_alloc_name(root, s);
if (dentry) {
@@ -626,7 +626,7 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
inode = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return inode;
}
@@ -671,7 +671,7 @@ void devpts_pty_kill(struct inode *inode)
BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_find_alias(inode);
@@ -680,7 +680,7 @@ void devpts_pty_kill(struct inode *inode)
dput(dentry); /* d_alloc_name() in devpts_pty_new() */
dput(dentry); /* d_find_alias above */
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
}
static int __init init_devpts_fs(void)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 602e8441bc0f..1b2f7ffc8b84 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1157,12 +1157,12 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
iocb->ki_filp->f_mapping;
/* will be released by direct_io_worker */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = filemap_write_and_wait_range(mapping, offset,
end - 1);
if (retval) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kmem_cache_free(dio_cache, dio);
goto out;
}
@@ -1173,7 +1173,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
dio->i_size = i_size_read(inode);
if (iov_iter_rw(iter) == READ && offset >= dio->i_size) {
if (dio->flags & DIO_LOCKING)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kmem_cache_free(dio_cache, dio);
retval = 0;
goto out;
@@ -1295,7 +1295,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
* of protecting us from looking up uninitialized blocks.
*/
if (iov_iter_rw(iter) == READ && (dio->flags & DIO_LOCKING))
- mutex_unlock(&dio->inode->i_mutex);
+ inode_unlock(dio->inode);
/*
* The only time we want to leave bios in flight is when a successful
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 040aa879d634..4e685ac1024d 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -41,13 +41,13 @@ static struct dentry *lock_parent(struct dentry *dentry)
struct dentry *dir;
dir = dget_parent(dentry);
- mutex_lock_nested(&(d_inode(dir)->i_mutex), I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
return dir;
}
static void unlock_dir(struct dentry *dir)
{
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
dput(dir);
}
@@ -397,11 +397,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
int rc = 0;
lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
- mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dir_dentry));
lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
lower_dir_dentry,
ecryptfs_dentry->d_name.len);
- mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dir_dentry));
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -426,11 +426,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
"filename; rc = [%d]\n", __func__, rc);
goto out;
}
- mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dir_dentry));
lower_dentry = lookup_one_len(encrypted_and_encoded_name,
lower_dir_dentry,
encrypted_and_encoded_name_size);
- mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dir_dentry));
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -869,9 +869,9 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = notify_change(lower_dentry, &lower_ia, NULL);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
}
return rc;
}
@@ -970,9 +970,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
lower_ia.ia_valid &= ~ATTR_MODE;
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = notify_change(lower_dentry, &lower_ia, NULL);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
fsstack_copy_attr_all(inode, lower_inode);
return rc;
@@ -1048,10 +1048,10 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = d_inode(lower_dentry)->i_op->getxattr(lower_dentry, name, value,
size);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
return rc;
}
@@ -1075,9 +1075,9 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = d_inode(lower_dentry)->i_op->listxattr(lower_dentry, list, size);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
return rc;
}
@@ -1092,9 +1092,9 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
rc = -EOPNOTSUPP;
goto out;
}
- mutex_lock(&d_inode(lower_dentry)->i_mutex);
+ inode_lock(d_inode(lower_dentry));
rc = d_inode(lower_dentry)->i_op->removexattr(lower_dentry, name);
- mutex_unlock(&d_inode(lower_dentry)->i_mutex);
+ inode_unlock(d_inode(lower_dentry));
out:
return rc;
}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index caba848ac763..c6ced4cbf0cf 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -436,7 +436,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
rc = -ENOMEM;
goto out;
}
- mutex_lock(&lower_inode->i_mutex);
+ inode_lock(lower_inode);
size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
xattr_virt, PAGE_CACHE_SIZE);
if (size < 0)
@@ -444,7 +444,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
xattr_virt, size, 0);
- mutex_unlock(&lower_inode->i_mutex);
+ inode_unlock(lower_inode);
if (rc)
printk(KERN_ERR "Error whilst attempting to write inode size "
"to lower file xattr; rc = [%d]\n", rc);
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index 90001da9abfd..c424e4813ec8 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -50,9 +50,9 @@ static ssize_t efivarfs_file_write(struct file *file,
d_delete(file->f_path.dentry);
dput(file->f_path.dentry);
} else {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_size_write(inode, datasize + sizeof(attributes));
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
bytes = count;
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 86a2121828c3..b8a564f29107 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -160,10 +160,10 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
efivar_entry_size(entry, &size);
efivar_entry_add(entry, &efivarfs_list);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode->i_private = entry;
i_size_write(inode, size + sizeof(entry->var.Attributes));
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
d_add(dentry, inode);
return 0;
diff --git a/fs/exec.c b/fs/exec.c
index 828ec5f07de0..dcd4ac7d3f1e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1307,13 +1307,13 @@ static void bprm_fill_uid(struct linux_binprm *bprm)
return;
/* Be careful if suid/sgid is set */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* reload atomically mode/uid/gid now that lock held */
mode = inode->i_mode;
uid = inode->i_uid;
gid = inode->i_gid;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/* We ignore suid/sgid if there are no mappings for them in the ns */
if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 906de66e8e7e..28645f0640f7 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -52,9 +52,9 @@ static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = sync_inode_metadata(filp->f_mapping->host, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 714cd37a6ba3..c46f1a190b8d 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -124,10 +124,10 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
int err;
parent = ERR_PTR(-EACCES);
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
if (mnt->mnt_sb->s_export_op->get_parent)
parent = mnt->mnt_sb->s_export_op->get_parent(dentry);
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
if (IS_ERR(parent)) {
dprintk("%s: get_parent of %ld failed, err %d\n",
@@ -143,9 +143,9 @@ static struct dentry *reconnect_one(struct vfsmount *mnt,
if (err)
goto out_err;
dprintk("%s: found name: %s\n", __func__, nbuf);
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
tmp = lookup_one_len(nbuf, parent, strlen(nbuf));
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
if (IS_ERR(tmp)) {
dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp));
goto out_err;
@@ -503,10 +503,10 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
*/
err = exportfs_get_name(mnt, target_dir, nbuf, result);
if (!err) {
- mutex_lock(&target_dir->d_inode->i_mutex);
+ inode_lock(target_dir->d_inode);
nresult = lookup_one_len(nbuf, target_dir,
strlen(nbuf));
- mutex_unlock(&target_dir->d_inode->i_mutex);
+ inode_unlock(target_dir->d_inode);
if (!IS_ERR(nresult)) {
if (nresult->d_inode) {
dput(result);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 11a42c5a09ae..2c88d683cd91 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -102,8 +102,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
{
struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
- int ret = VM_FAULT_NOPAGE;
loff_t size;
+ int ret;
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
@@ -113,6 +113,8 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else
+ ret = dax_pfn_mkwrite(vma, vmf);
up_read(&ei->dax_sem);
sb_end_pagefault(inode->i_sb);
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 5d46c09863f0..b386af2e45f4 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -51,10 +51,10 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
flags = ext2_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = -EPERM;
goto setflags_out;
}
@@ -68,7 +68,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
*/
if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = -EPERM;
goto setflags_out;
}
@@ -80,7 +80,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
ext2_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME_SEC;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mark_inode_dirty(inode);
setflags_out:
@@ -102,10 +102,10 @@ setflags_out:
goto setversion_out;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode->i_ctime = CURRENT_TIME_SEC;
inode->i_generation = generation;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mark_inode_dirty(inode);
setversion_out:
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1c127213363a..0662b285dc8a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2896,7 +2896,7 @@ do { \
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
{
WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
- !mutex_is_locked(&inode->i_mutex));
+ !inode_is_locked(inode));
down_write(&EXT4_I(inode)->i_data_sem);
if (newsize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = newsize;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b52fea3b7219..0ffabaf90aa5 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4799,7 +4799,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
else
max_blocks -= lblk;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Indirect files do not support unwritten extnets
@@ -4902,7 +4902,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -4973,7 +4973,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* We only support preallocation for extent-based files only
@@ -5006,7 +5006,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
EXT4_I(inode)->i_sync_tid);
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
return ret;
}
@@ -5492,7 +5492,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* There is no need to overlap collapse range with EOF, in which case
* it is effectively a truncate operation
@@ -5587,7 +5587,7 @@ out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -5638,7 +5638,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ret = -EOPNOTSUPP;
@@ -5757,7 +5757,7 @@ out_mmap:
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -5792,8 +5792,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
- BUG_ON(!mutex_is_locked(&inode1->i_mutex));
- BUG_ON(!mutex_is_locked(&inode2->i_mutex));
+ BUG_ON(!inode_is_locked(inode1));
+ BUG_ON(!inode_is_locked(inode2));
*erp = ext4_es_remove_extent(inode1, lblk1, count);
if (unlikely(*erp))
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 749b222e6498..1126436dada1 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -113,7 +113,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ext4_unwritten_wait(inode);
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -169,7 +169,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
ret = __generic_file_write_iter(iocb, from);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret > 0) {
ssize_t err;
@@ -186,7 +186,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ret;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (aio_mutex)
mutex_unlock(aio_mutex);
return ret;
@@ -291,8 +291,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
{
struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb;
- int ret = VM_FAULT_NOPAGE;
loff_t size;
+ int ret;
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
@@ -300,6 +300,8 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else
+ ret = dax_pfn_mkwrite(vma, vmf);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
@@ -561,11 +563,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
int blkbits;
int ret = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (offset >= isize) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
@@ -613,7 +615,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
dataoff = (loff_t)last << blkbits;
} while (last <= end);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (dataoff > isize)
return -ENXIO;
@@ -634,11 +636,11 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
int blkbits;
int ret = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (offset >= isize) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
@@ -689,7 +691,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
break;
} while (last <= end);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (holeoff > isize)
holeoff = isize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d964195ea0e2..83bc8bfb3bea 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3231,7 +3231,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
overwrite = *((int *)iocb->private);
if (overwrite)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* We could direct write to holes and fallocate.
@@ -3331,7 +3331,7 @@ retake_lock:
inode_dio_end(inode);
/* take i_mutex locking again if we do a ovewrite dio */
if (overwrite)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
return ret;
}
@@ -3653,7 +3653,7 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
handle_t *handle;
loff_t size = i_size_read(inode);
- WARN_ON(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON(!inode_is_locked(inode));
if (offset > size || offset + len < size)
return 0;
@@ -3707,7 +3707,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
return ret;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* No need to punch hole beyond i_size */
if (offset >= inode->i_size)
@@ -3809,7 +3809,7 @@ out_dio:
up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -3879,7 +3879,7 @@ void ext4_truncate(struct inode *inode)
* have i_mutex locked because it's not necessary.
*/
if (!(inode->i_state & (I_NEW|I_FREEING)))
- WARN_ON(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON(!inode_is_locked(inode));
trace_ext4_truncate_enter(inode);
if (!ext4_can_truncate(inode))
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 2b0cb84255eb..0f6c36922c24 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -330,7 +330,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
return err;
err = -EPERM;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode))
goto out_unlock;
@@ -381,7 +381,7 @@ out_dirty:
out_stop:
ext4_journal_stop(handle);
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(filp);
return err;
}
@@ -464,9 +464,9 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
flags = ext4_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = ext4_ioctl_setflags(inode, flags);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(filp);
return err;
}
@@ -497,7 +497,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
goto setversion_out;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
@@ -512,7 +512,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
ext4_journal_stop(handle);
unlock_out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
setversion_out:
mnt_drop_write_file(filp);
return err;
@@ -658,9 +658,9 @@ group_add_out:
* ext4_ext_swap_inode_data before we switch the
* inode format to prevent read.
*/
- mutex_lock(&(inode->i_mutex));
+ inode_lock((inode));
err = ext4_ext_migrate(inode);
- mutex_unlock(&(inode->i_mutex));
+ inode_unlock((inode));
mnt_drop_write_file(filp);
return err;
}
@@ -876,11 +876,11 @@ encryption_policy_out:
flags = ext4_xflags_to_iflags(fa.fsx_xflags);
flags = ext4_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
(flags & EXT4_FL_XFLAG_VISIBLE);
err = ext4_ioctl_setflags(inode, flags);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(filp);
if (err)
return err;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 854f75de4599..06574dd77614 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2753,7 +2753,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
return 0;
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
- !mutex_is_locked(&inode->i_mutex));
+ !inode_is_locked(inode));
/*
* Exit early if inode already is on orphan list. This is a big speedup
* since we don't have to contend on the global s_orphan_lock.
@@ -2835,7 +2835,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
return 0;
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
- !mutex_is_locked(&inode->i_mutex));
+ !inode_is_locked(inode));
/* Do this quick check before taking global s_orphan_lock. */
if (list_empty(&ei->i_orphan))
return 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 00c98fab6333..3ed01ec011d7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2286,10 +2286,10 @@ static void ext4_orphan_cleanup(struct super_block *sb,
__func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %lld bytes\n",
inode->i_ino, inode->i_size);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
nr_truncates++;
} else {
if (test_opt(sb, DEBUG))
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ac9e7c6aac74..5c06db17e41f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -794,7 +794,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return ret;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (start >= isize)
@@ -860,7 +860,7 @@ out:
if (ret == 1)
ret = 0;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 18ddb1e5182a..ea272be62677 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -333,7 +333,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
loff_t isize;
int err = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
if (offset >= isize)
@@ -388,10 +388,10 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
found:
if (whence == SEEK_HOLE && data_ofs > isize)
data_ofs = isize;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return vfs_setpos(file, data_ofs, maxbytes);
fail:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -ENXIO;
}
@@ -1219,7 +1219,7 @@ static long f2fs_fallocate(struct file *file, int mode,
FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (mode & FALLOC_FL_PUNCH_HOLE) {
if (offset >= inode->i_size)
@@ -1243,7 +1243,7 @@ static long f2fs_fallocate(struct file *file, int mode,
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
trace_f2fs_fallocate(inode, mode, offset, len, ret);
return ret;
@@ -1307,13 +1307,13 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
flags = f2fs_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = fi->i_flags;
if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = -EPERM;
goto out;
}
@@ -1322,7 +1322,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
flags = flags & FS_FL_USER_MODIFIABLE;
flags |= oldflags & ~FS_FL_USER_MODIFIABLE;
fi->i_flags = flags;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
f2fs_set_inode_flags(inode);
inode->i_ctime = CURRENT_TIME;
@@ -1667,7 +1667,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
f2fs_balance_fs(sbi, true);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* writeback all dirty pages in the range */
err = filemap_write_and_wait_range(inode->i_mapping, range->start,
@@ -1778,7 +1778,7 @@ do_map:
clear_out:
clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!err)
range->len = (u64)total << PAGE_CACHE_SHIFT;
return err;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 7def96caec5f..d0b95c95079b 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -769,7 +769,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file,
buf.dirent = dirent;
buf.result = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
buf.ctx.pos = file->f_pos;
ret = -ENOENT;
if (!IS_DEADDIR(inode)) {
@@ -777,7 +777,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file,
short_only, both ? &buf : NULL);
file->f_pos = buf.ctx.pos;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret >= 0)
ret = buf.result;
return ret;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 43d3475da83a..f70185668832 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -24,9 +24,9 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
{
u32 attr;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
attr = fat_make_attrs(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return put_user(attr, user_attr);
}
@@ -47,7 +47,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
err = mnt_want_write_file(file);
if (err)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -109,7 +109,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
fat_save_attrs(inode, attr);
mark_inode_dirty(inode);
out_unlock_inode:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(file);
out:
return err;
@@ -246,7 +246,7 @@ static long fat_fallocate(struct file *file, int mode,
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (mode & FALLOC_FL_KEEP_SIZE) {
ondisksize = inode->i_blocks << 9;
if ((offset + len) <= ondisksize)
@@ -272,7 +272,7 @@ static long fat_fallocate(struct file *file, int mode,
}
error:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 712601f299b8..4b855b65d457 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -944,7 +944,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
if (!parent)
return -ENOENT;
- mutex_lock(&parent->i_mutex);
+ inode_lock(parent);
if (!S_ISDIR(parent->i_mode))
goto unlock;
@@ -962,7 +962,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
fuse_invalidate_entry(entry);
if (child_nodeid != 0 && d_really_is_positive(entry)) {
- mutex_lock(&d_inode(entry)->i_mutex);
+ inode_lock(d_inode(entry));
if (get_node_id(d_inode(entry)) != child_nodeid) {
err = -ENOENT;
goto badentry;
@@ -983,7 +983,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
clear_nlink(d_inode(entry));
err = 0;
badentry:
- mutex_unlock(&d_inode(entry)->i_mutex);
+ inode_unlock(d_inode(entry));
if (!err)
d_delete(entry);
} else {
@@ -992,7 +992,7 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
dput(entry);
unlock:
- mutex_unlock(&parent->i_mutex);
+ inode_unlock(parent);
iput(parent);
return err;
}
@@ -1504,7 +1504,7 @@ void fuse_set_nowrite(struct inode *inode)
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- BUG_ON(!mutex_is_locked(&inode->i_mutex));
+ BUG_ON(!inode_is_locked(inode));
spin_lock(&fc->lock);
BUG_ON(fi->writectr < 0);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index aa03aab6a24f..b03d253ece15 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -207,7 +207,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
return err;
if (lock_inode)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = fuse_do_open(fc, get_node_id(inode), file, isdir);
@@ -215,7 +215,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
fuse_finish_open(inode, file);
if (lock_inode)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
@@ -413,9 +413,9 @@ static int fuse_flush(struct file *file, fl_owner_t id)
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
fuse_sync_writes(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg));
@@ -450,7 +450,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
if (is_bad_inode(inode))
return -EIO;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Start writeback against all dirty pages of the inode, then
@@ -486,7 +486,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
err = 0;
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
@@ -1160,7 +1160,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return generic_file_write_iter(iocb, from);
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We can write back this queue in page reclaim */
current->backing_dev_info = inode_to_bdi(inode);
@@ -1210,7 +1210,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
}
out:
current->backing_dev_info = NULL;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return written ? written : err;
}
@@ -1322,10 +1322,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
if (!write)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
fuse_sync_writes(inode);
if (!write)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
while (count) {
@@ -1413,14 +1413,14 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
return -EIO;
/* Don't allow parallel writes to the same file */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
res = generic_write_checks(iocb, from);
if (res > 0)
res = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE);
fuse_invalidate_attr(inode);
if (res > 0)
fuse_write_update_size(inode, iocb->ki_pos);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -2287,17 +2287,17 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
retval = generic_file_llseek(file, offset, whence);
break;
case SEEK_END:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = fuse_update_attributes(inode, NULL, file, NULL);
if (!retval)
retval = generic_file_llseek(file, offset, whence);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
break;
case SEEK_HOLE:
case SEEK_DATA:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = fuse_lseek(file, offset, whence);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
break;
default:
retval = -EINVAL;
@@ -2944,7 +2944,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
return -EOPNOTSUPP;
if (lock_inode) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (mode & FALLOC_FL_PUNCH_HOLE) {
loff_t endbyte = offset + length - 1;
err = filemap_write_and_wait_range(inode->i_mapping,
@@ -2990,7 +2990,7 @@ out:
clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
if (lock_inode)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 7412863cda1e..c9384f932975 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -914,7 +914,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
if ((mode & ~FALLOC_FL_KEEP_SIZE) || gfs2_is_jdata(ip))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
@@ -946,7 +946,7 @@ out_unlock:
gfs2_glock_dq(&gh);
out_uninit:
gfs2_holder_uninit(&gh);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3e94400d587c..352f958769e1 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -2067,7 +2067,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
if (ret)
@@ -2094,7 +2094,7 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
gfs2_glock_dq_uninit(&gh);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index be6d9c450b22..a39891344259 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -888,7 +888,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
return -ENOMEM;
sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
- mutex_lock(&ip->i_inode.i_mutex);
+ inode_lock(&ip->i_inode);
for (qx = 0; qx < num_qd; qx++) {
error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
GL_NOCACHE, &ghs[qx]);
@@ -953,7 +953,7 @@ out_alloc:
out:
while (qx--)
gfs2_glock_dq_uninit(&ghs[qx]);
- mutex_unlock(&ip->i_inode.i_mutex);
+ inode_unlock(&ip->i_inode);
kfree(ghs);
gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl, NORMAL_FLUSH);
return error;
@@ -1674,7 +1674,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
if (error)
goto out_put;
- mutex_lock(&ip->i_inode.i_mutex);
+ inode_lock(&ip->i_inode);
error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_EXCLUSIVE, 0, &q_gh);
if (error)
goto out_unlockput;
@@ -1739,7 +1739,7 @@ out_i:
out_q:
gfs2_glock_dq_uninit(&q_gh);
out_unlockput:
- mutex_unlock(&ip->i_inode.i_mutex);
+ inode_unlock(&ip->i_inode);
out_put:
qd_put(qd);
return error;
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 70788e03820a..e9f2b855f831 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -173,9 +173,9 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
{
struct hfs_readdir_data *rd = file->private_data;
if (rd) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
list_del(&rd->list);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kfree(rd);
}
return 0;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index b99ebddb10cb..6686bf39a5b5 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -570,13 +570,13 @@ static int hfs_file_release(struct inode *inode, struct file *file)
if (HFS_IS_RSRC(inode))
inode = HFS_I(inode)->rsrc_inode;
if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
hfs_file_truncate(inode);
//if (inode->i_flags & S_DEAD) {
// hfs_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL);
// hfs_delete_inode(inode);
//}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
@@ -656,7 +656,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* sync the inode to buffers */
ret = write_inode_now(inode, 0);
@@ -668,7 +668,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
err = sync_blockdev(sb->s_bdev);
if (!ret)
ret = err;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index d0f39dcbb58e..a4e867e08947 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -284,9 +284,9 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file)
{
struct hfsplus_readdir_data *rd = file->private_data;
if (rd) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
list_del(&rd->list);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
kfree(rd);
}
return 0;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 19b33f8151f1..1a6394cdb54e 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -229,14 +229,14 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
if (HFSPLUS_IS_RSRC(inode))
inode = HFSPLUS_I(inode)->rsrc_inode;
if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
hfsplus_file_truncate(inode);
if (inode->i_flags & S_DEAD) {
hfsplus_delete_cat(inode->i_ino,
HFSPLUS_SB(sb)->hidden_dir, NULL);
hfsplus_delete_inode(inode);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
@@ -286,7 +286,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
error = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* Sync inode metadata into the catalog and extent trees.
@@ -327,7 +327,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 0624ce4e0702..32a49e292b6a 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -93,7 +93,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
goto out_drop_write;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) ||
inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
@@ -126,7 +126,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
mark_inode_dirty(inode);
out_unlock_inode:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out_drop_write:
mnt_drop_write_file(file);
out:
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index cfaa18c7a337..d1abbee281d1 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -378,9 +378,9 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = fsync_file(HOSTFS_I(inode)->fd, datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index dc540bfcee1d..e57a53c13d86 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -33,7 +33,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
if (whence == SEEK_DATA || whence == SEEK_HOLE)
return -EINVAL;
- mutex_lock(&i->i_mutex);
+ inode_lock(i);
hpfs_lock(s);
/*pr_info("dir lseek\n");*/
@@ -48,12 +48,12 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
ok:
filp->f_pos = new_off;
hpfs_unlock(s);
- mutex_unlock(&i->i_mutex);
+ inode_unlock(i);
return new_off;
fail:
/*pr_warn("illegal lseek: %016llx\n", new_off);*/
hpfs_unlock(s);
- mutex_unlock(&i->i_mutex);
+ inode_unlock(i);
return -ESPIPE;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8bbf7f3e2a27..e1f465a389d5 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -141,7 +141,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
file_accessed(file);
ret = -ENOMEM;
@@ -157,7 +157,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
if (vma->vm_flags & VM_WRITE && inode->i_size < len)
inode->i_size = len;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -530,7 +530,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (hole_end > hole_start) {
struct address_space *mapping = inode->i_mapping;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_mmap_lock_write(mapping);
if (!RB_EMPTY_ROOT(&mapping->i_mmap))
hugetlb_vmdelete_list(&mapping->i_mmap,
@@ -538,7 +538,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
hole_end >> PAGE_SHIFT);
i_mmap_unlock_write(mapping);
remove_inode_hugepages(inode, hole_start, hole_end);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
@@ -572,7 +572,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
start = offset >> hpage_shift;
end = (offset + len + hpage_size - 1) >> hpage_shift;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */
error = inode_newsize_ok(inode, offset + len);
@@ -659,7 +659,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
i_size_write(inode, offset + len);
inode->i_ctime = CURRENT_TIME;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
diff --git a/fs/inode.c b/fs/inode.c
index e491e54d2430..9f62db3bcc3e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -495,7 +495,7 @@ void clear_inode(struct inode *inode)
*/
spin_lock_irq(&inode->i_data.tree_lock);
BUG_ON(inode->i_data.nrpages);
- BUG_ON(inode->i_data.nrshadows);
+ BUG_ON(inode->i_data.nrexceptional);
spin_unlock_irq(&inode->i_data.tree_lock);
BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING));
@@ -966,9 +966,9 @@ void lock_two_nondirectories(struct inode *inode1, struct inode *inode2)
swap(inode1, inode2);
if (inode1 && !S_ISDIR(inode1->i_mode))
- mutex_lock(&inode1->i_mutex);
+ inode_lock(inode1);
if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
- mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2);
+ inode_lock_nested(inode2, I_MUTEX_NONDIR2);
}
EXPORT_SYMBOL(lock_two_nondirectories);
@@ -980,9 +980,9 @@ EXPORT_SYMBOL(lock_two_nondirectories);
void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2)
{
if (inode1 && !S_ISDIR(inode1->i_mode))
- mutex_unlock(&inode1->i_mutex);
+ inode_unlock(inode1);
if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1)
- mutex_unlock(&inode2->i_mutex);
+ inode_unlock(inode2);
}
EXPORT_SYMBOL(unlock_two_nondirectories);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 29466c380958..116a333e9c77 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -434,9 +434,9 @@ int generic_block_fiemap(struct inode *inode,
u64 len, get_block_t *get_block)
{
int ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = __generic_block_fiemap(inode, fieinfo, start, len, get_block);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
EXPORT_SYMBOL(generic_block_fiemap);
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a3750f902adc..0ae91ad6df2d 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/mtd/mtd.h>
+#include <linux/mm.h> /* kvfree() */
#include "nodelist.h"
static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
@@ -383,12 +384,7 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
return 0;
out_free:
-#ifndef __ECOS
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
-#endif
- kfree(c->blocks);
+ kvfree(c->blocks);
return ret;
}
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index f509f62e12f6..c5ac5944bc1b 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -39,10 +39,10 @@ int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Trigger GC to flush any pending writes for this inode */
jffs2_flush_wbuf_gc(c, inode->i_ino);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 2caf1682036d..bead25ae8fe4 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -596,10 +596,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
out_root:
jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c);
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
- kfree(c->blocks);
+ kvfree(c->blocks);
out_inohash:
jffs2_clear_xattr_subsystem(c);
kfree(c->inocache_list);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index bb080c272149..0a9a114bb9d1 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -331,10 +331,7 @@ static void jffs2_put_super (struct super_block *sb)
jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c);
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
- kfree(c->blocks);
+ kvfree(c->blocks);
jffs2_flash_cleanup(c);
kfree(c->inocache_list);
jffs2_clear_xattr_subsystem(c);
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 0e026a7bdcd4..4ce7735dd042 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -38,17 +38,17 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
if (rc)
return rc;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (!(inode->i_state & I_DIRTY_ALL) ||
(datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
/* Make sure committed changes hit the disk */
jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc;
}
rc |= jfs_commit_inode(inode, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return rc ? -EIO : 0;
}
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index 8db8b7d61e40..8653cac7e12e 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -96,7 +96,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
/* Lock against other parallel changes of flags */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
jfs_get_inode_flags(jfs_inode);
oldflags = jfs_inode->mode2;
@@ -109,7 +109,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
((flags ^ oldflags) &
(JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
if (!capable(CAP_LINUX_IMMUTABLE)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
err = -EPERM;
goto setflags_out;
}
@@ -120,7 +120,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
jfs_inode->mode2 = flags;
jfs_set_inode_flags(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
setflags_out:
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 900925b5eb8c..4f5d85ba8e23 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -792,7 +792,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
struct buffer_head tmp_bh;
struct buffer_head *bh;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
while (towrite > 0) {
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
@@ -824,7 +824,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
}
out:
if (len == towrite) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
if (inode->i_size < off+len-towrite)
@@ -832,7 +832,7 @@ out:
inode->i_version++;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return len - towrite;
}
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 821973853340..996b7742c90b 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1511,9 +1511,9 @@ static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
struct inode *inode = file_inode(file);
loff_t ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = generic_file_llseek(file, offset, whence);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index 01491299f348..0ca80b2af420 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -89,7 +89,7 @@ EXPORT_SYMBOL(dcache_dir_close);
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry *dentry = file->f_path.dentry;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
switch (whence) {
case 1:
offset += file->f_pos;
@@ -97,7 +97,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
if (offset >= 0)
break;
default:
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return -EINVAL;
}
if (offset != file->f_pos) {
@@ -124,7 +124,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
spin_unlock(&dentry->d_lock);
}
}
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return offset;
}
EXPORT_SYMBOL(dcache_dir_lseek);
@@ -941,7 +941,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = sync_mapping_buffers(inode->i_mapping);
if (!(inode->i_state & I_DIRTY_ALL))
goto out;
@@ -953,7 +953,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
ret = err;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
EXPORT_SYMBOL(__generic_file_fsync);
diff --git a/fs/locks.c b/fs/locks.c
index af1ed74a657f..7c5f91be9b65 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1650,12 +1650,12 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
* bother, maybe that's a sign this just isn't a good file to
* hand out a delegation on.
*/
- if (is_deleg && !mutex_trylock(&inode->i_mutex))
+ if (is_deleg && !inode_trylock(inode))
return -EAGAIN;
if (is_deleg && arg == F_WRLCK) {
/* Write delegations are not currently supported: */
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
WARN_ON_ONCE(1);
return -EINVAL;
}
@@ -1732,7 +1732,7 @@ out:
spin_unlock(&ctx->flc_lock);
locks_dispose_list(&dispose);
if (is_deleg)
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!error && !my_fl)
*flp = NULL;
return error;
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 1a6f0167b16a..61eaeb1b6cac 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -204,12 +204,12 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = li->li_flags;
flags &= LOGFS_FL_USER_MODIFIABLE;
flags |= oldflags & ~LOGFS_FL_USER_MODIFIABLE;
li->li_flags = flags;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
inode->i_ctime = CURRENT_TIME;
mark_inode_dirty_sync(inode);
@@ -230,11 +230,11 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
logfs_get_wblocks(sb, NULL, WF_LOCK);
logfs_write_anchor(sb);
logfs_put_wblocks(sb, NULL, WF_LOCK);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
diff --git a/fs/namei.c b/fs/namei.c
index bceefd5588a2..f624d132e01e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1629,9 +1629,9 @@ static int lookup_slow(struct nameidata *nd, struct path *path)
parent = nd->path.dentry;
BUG_ON(nd->inode != parent->d_inode);
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
dentry = __lookup_hash(&nd->last, parent, nd->flags);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
path->mnt = nd->path.mnt;
@@ -2229,10 +2229,10 @@ struct dentry *kern_path_locked(const char *name, struct path *path)
putname(filename);
return ERR_PTR(-EINVAL);
}
- mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
d = __lookup_hash(&last, path->dentry, 0);
if (IS_ERR(d)) {
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
path_put(path);
}
putname(filename);
@@ -2282,7 +2282,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
unsigned int c;
int err;
- WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(base->d_inode));
this.name = name;
this.len = len;
@@ -2380,9 +2380,9 @@ struct dentry *lookup_one_len_unlocked(const char *name,
if (ret)
return ret;
- mutex_lock(&base->d_inode->i_mutex);
+ inode_lock(base->d_inode);
ret = __lookup_hash(&this, base, 0);
- mutex_unlock(&base->d_inode->i_mutex);
+ inode_unlock(base->d_inode);
return ret;
}
EXPORT_SYMBOL(lookup_one_len_unlocked);
@@ -2463,7 +2463,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
goto done;
}
- mutex_lock(&dir->d_inode->i_mutex);
+ inode_lock(dir->d_inode);
dentry = d_lookup(dir, &nd->last);
if (!dentry) {
/*
@@ -2473,16 +2473,16 @@ mountpoint_last(struct nameidata *nd, struct path *path)
*/
dentry = d_alloc(dir, &nd->last);
if (!dentry) {
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
return -ENOMEM;
}
dentry = lookup_real(dir->d_inode, dentry, nd->flags);
if (IS_ERR(dentry)) {
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
return PTR_ERR(dentry);
}
}
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
done:
if (d_is_negative(dentry)) {
@@ -2672,7 +2672,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
struct dentry *p;
if (p1 == p2) {
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
return NULL;
}
@@ -2680,29 +2680,29 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
p = d_ancestor(p2, p1);
if (p) {
- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
return p;
}
p = d_ancestor(p1, p2);
if (p) {
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
return p;
}
- mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return NULL;
}
EXPORT_SYMBOL(lock_rename);
void unlock_rename(struct dentry *p1, struct dentry *p2)
{
- mutex_unlock(&p1->d_inode->i_mutex);
+ inode_unlock(p1->d_inode);
if (p1 != p2) {
- mutex_unlock(&p2->d_inode->i_mutex);
+ inode_unlock(p2->d_inode);
mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
}
}
@@ -3141,9 +3141,9 @@ retry_lookup:
* dropping this one anyway.
*/
}
- mutex_lock(&dir->d_inode->i_mutex);
+ inode_lock(dir->d_inode);
error = lookup_open(nd, &path, file, op, got_write, opened);
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
if (error <= 0) {
if (error)
@@ -3489,7 +3489,7 @@ static struct dentry *filename_create(int dfd, struct filename *name,
* Do the final lookup.
*/
lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL;
- mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path->dentry, lookup_flags);
if (IS_ERR(dentry))
goto unlock;
@@ -3518,7 +3518,7 @@ fail:
dput(dentry);
dentry = ERR_PTR(error);
unlock:
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
if (!err2)
mnt_drop_write(path->mnt);
out:
@@ -3538,7 +3538,7 @@ EXPORT_SYMBOL(kern_path_create);
void done_path_create(struct path *path, struct dentry *dentry)
{
dput(dentry);
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
mnt_drop_write(path->mnt);
path_put(path);
}
@@ -3735,7 +3735,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
return -EPERM;
dget(dentry);
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
error = -EBUSY;
if (is_local_mountpoint(dentry))
@@ -3755,7 +3755,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
detach_mounts(dentry);
out:
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
dput(dentry);
if (!error)
d_delete(dentry);
@@ -3794,7 +3794,7 @@ retry:
if (error)
goto exit1;
- mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
@@ -3810,7 +3810,7 @@ retry:
exit3:
dput(dentry);
exit2:
- mutex_unlock(&path.dentry->d_inode->i_mutex);
+ inode_unlock(path.dentry->d_inode);
mnt_drop_write(path.mnt);
exit1:
path_put(&path);
@@ -3856,7 +3856,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
if (!dir->i_op->unlink)
return -EPERM;
- mutex_lock(&target->i_mutex);
+ inode_lock(target);
if (is_local_mountpoint(dentry))
error = -EBUSY;
else {
@@ -3873,7 +3873,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate
}
}
out:
- mutex_unlock(&target->i_mutex);
+ inode_unlock(target);
/* We don't d_delete() NFS sillyrenamed files--they still exist. */
if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
@@ -3916,7 +3916,7 @@ retry:
if (error)
goto exit1;
retry_deleg:
- mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
dentry = __lookup_hash(&last, path.dentry, lookup_flags);
error = PTR_ERR(dentry);
if (!IS_ERR(dentry)) {
@@ -3934,7 +3934,7 @@ retry_deleg:
exit2:
dput(dentry);
}
- mutex_unlock(&path.dentry->d_inode->i_mutex);
+ inode_unlock(path.dentry->d_inode);
if (inode)
iput(inode); /* truncate the inode here */
inode = NULL;
@@ -4086,7 +4086,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Make sure we don't allow creating hardlink to an unlinked file */
if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
error = -ENOENT;
@@ -4103,7 +4103,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
inode->i_state &= ~I_LINKABLE;
spin_unlock(&inode->i_lock);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!error)
fsnotify_link(dir, inode, new_dentry);
return error;
@@ -4303,7 +4303,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!is_dir || (flags & RENAME_EXCHANGE))
lock_two_nondirectories(source, target);
else if (target)
- mutex_lock(&target->i_mutex);
+ inode_lock(target);
error = -EBUSY;
if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
@@ -4356,7 +4356,7 @@ out:
if (!is_dir || (flags & RENAME_EXCHANGE))
unlock_two_nondirectories(source, target);
else if (target)
- mutex_unlock(&target->i_mutex);
+ inode_unlock(target);
dput(new_dentry);
if (!error) {
fsnotify_move(old_dir, new_dir, old_name, is_dir,
diff --git a/fs/namespace.c b/fs/namespace.c
index a830e1463704..4fb1691b4355 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1961,9 +1961,9 @@ static struct mountpoint *lock_mount(struct path *path)
struct vfsmount *mnt;
struct dentry *dentry = path->dentry;
retry:
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
if (unlikely(cant_mount(dentry))) {
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
return ERR_PTR(-ENOENT);
}
namespace_lock();
@@ -1974,13 +1974,13 @@ retry:
mp = new_mountpoint(dentry);
if (IS_ERR(mp)) {
namespace_unlock();
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
return mp;
}
return mp;
}
namespace_unlock();
- mutex_unlock(&path->dentry->d_inode->i_mutex);
+ inode_unlock(path->dentry->d_inode);
path_put(path);
path->mnt = mnt;
dentry = path->dentry = dget(mnt->mnt_root);
@@ -1992,7 +1992,7 @@ static void unlock_mount(struct mountpoint *where)
struct dentry *dentry = where->m_dentry;
put_mountpoint(where);
namespace_unlock();
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
}
static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index f0e3e9e747dd..26c2de2de13f 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -369,7 +369,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
if (!res) {
struct inode *inode = d_inode(dentry);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) {
ncp_new_dentry(dentry);
val=1;
@@ -377,7 +377,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
ncp_dbg(2, "found, but dirEntNum changed\n");
ncp_update_inode2(inode, &finfo);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
finished:
@@ -639,9 +639,9 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
} else {
struct inode *inode = d_inode(newdent);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(inode, I_MUTEX_CHILD);
ncp_update_inode2(inode, entry);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
if (ctl.idx >= NCP_DIRCACHE_SIZE) {
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 011324ce9df2..dd38ca1f2ecb 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -224,10 +224,10 @@ ncp_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
iocb->ki_pos = pos;
if (pos > i_size_read(inode)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (pos > i_size_read(inode))
i_size_write(inode, pos);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
ncp_dbg(1, "exit %pD2\n", file);
outrel:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c82a21228a34..9cce67043f92 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -940,7 +940,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
filp, offset, whence);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case 1:
offset += filp->f_pos;
@@ -957,7 +957,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
dir_ctx->duped = 0;
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
@@ -972,9 +972,9 @@ static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 7ab7ec9f4eed..7a0cfd3266e5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -580,7 +580,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
if (!count)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
result = nfs_sync_mapping(mapping);
if (result)
goto out_unlock;
@@ -608,7 +608,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
NFS_I(inode)->read_io += count;
result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!result) {
result = nfs_direct_wait(dreq);
@@ -622,7 +622,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
out_release:
nfs_direct_req_release(dreq);
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
return result;
}
@@ -1005,7 +1005,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
pos = iocb->ki_pos;
end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
result = nfs_sync_mapping(mapping);
if (result)
@@ -1045,7 +1045,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
pos >> PAGE_CACHE_SHIFT, end);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!result) {
result = nfs_direct_wait(dreq);
@@ -1066,7 +1066,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
out_release:
nfs_direct_req_release(dreq);
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return result;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4ef8f5addcad..748bb813b8ec 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -278,9 +278,9 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret != 0)
break;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = nfs_file_fsync_commit(file, start, end, datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index bb1f4e7a3270..3384dc8e6683 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -971,7 +971,7 @@ filelayout_mark_request_commit(struct nfs_page *req,
u32 i, j;
if (fl->commit_through_mds) {
- nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+ nfs_request_add_commit_list(req, cinfo);
} else {
/* Note that we are calling nfs4_fl_calc_j_index on each page
* that ends up being committed to a data server. An attractive
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 6594e9f903a0..5bcd92d50e82 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1948,11 +1948,9 @@ ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo,
start = xdr_reserve_space(xdr, 4);
BUG_ON(!start);
- if (ff_layout_encode_ioerr(flo, xdr, args))
- goto out;
-
+ ff_layout_encode_ioerr(flo, xdr, args);
ff_layout_encode_iostats(flo, xdr, args);
-out:
+
*start = cpu_to_be32((xdr->p - start - 1) * 4);
dprintk("%s: Return\n", __func__);
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index bd0327541366..29898a9550fa 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -218,63 +218,55 @@ static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
err->length = end - err->offset;
}
-static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err, u64 offset,
- u64 length, int status, enum nfs_opnum4 opnum,
- nfs4_stateid *stateid,
- struct nfs4_deviceid *deviceid)
+static int
+ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
+ const struct nfs4_ff_layout_ds_err *e2)
{
- return err->status == status && err->opnum == opnum &&
- nfs4_stateid_match(&err->stateid, stateid) &&
- !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) &&
- end_offset(err->offset, err->length) >= offset &&
- err->offset <= end_offset(offset, length);
-}
-
-static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old,
- struct nfs4_ff_layout_ds_err *new)
-{
- if (!ds_error_can_merge(old, new->offset, new->length, new->status,
- new->opnum, &new->stateid, &new->deviceid))
- return false;
-
- extend_ds_error(old, new->offset, new->length);
- return true;
+ int ret;
+
+ if (e1->opnum != e2->opnum)
+ return e1->opnum < e2->opnum ? -1 : 1;
+ if (e1->status != e2->status)
+ return e1->status < e2->status ? -1 : 1;
+ ret = memcmp(&e1->stateid, &e2->stateid, sizeof(e1->stateid));
+ if (ret != 0)
+ return ret;
+ ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
+ if (ret != 0)
+ return ret;
+ if (end_offset(e1->offset, e1->length) < e2->offset)
+ return -1;
+ if (e1->offset > end_offset(e2->offset, e2->length))
+ return 1;
+ /* If ranges overlap or are contiguous, they are the same */
+ return 0;
}
-static bool
+static void
ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
struct nfs4_ff_layout_ds_err *dserr)
{
- struct nfs4_ff_layout_ds_err *err;
-
- list_for_each_entry(err, &flo->error_list, list) {
- if (merge_ds_error(err, dserr)) {
- return true;
- }
- }
-
- list_add(&dserr->list, &flo->error_list);
- return false;
-}
-
-static bool
-ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset,
- u64 length, int status, enum nfs_opnum4 opnum,
- nfs4_stateid *stateid, struct nfs4_deviceid *deviceid)
-{
- bool found = false;
- struct nfs4_ff_layout_ds_err *err;
-
- list_for_each_entry(err, &flo->error_list, list) {
- if (ds_error_can_merge(err, offset, length, status, opnum,
- stateid, deviceid)) {
- found = true;
- extend_ds_error(err, offset, length);
+ struct nfs4_ff_layout_ds_err *err, *tmp;
+ struct list_head *head = &flo->error_list;
+ int match;
+
+ /* Do insertion sort w/ merges */
+ list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
+ match = ff_ds_error_match(err, dserr);
+ if (match < 0)
+ continue;
+ if (match > 0) {
+ /* Add entry "dserr" _before_ entry "err" */
+ head = &err->list;
break;
}
+ /* Entries match, so merge "err" into "dserr" */
+ extend_ds_error(dserr, err->offset, err->length);
+ list_del(&err->list);
+ kfree(err);
}
- return found;
+ list_add_tail(&dserr->list, head);
}
int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
@@ -283,7 +275,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
gfp_t gfp_flags)
{
struct nfs4_ff_layout_ds_err *dserr;
- bool needfree;
if (status == 0)
return 0;
@@ -291,14 +282,6 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
if (mirror->mirror_ds == NULL)
return -EINVAL;
- spin_lock(&flo->generic_hdr.plh_inode->i_lock);
- if (ff_layout_update_ds_error(flo, offset, length, status, opnum,
- &mirror->stateid,
- &mirror->mirror_ds->id_node.deviceid)) {
- spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
- return 0;
- }
- spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
dserr = kmalloc(sizeof(*dserr), gfp_flags);
if (!dserr)
return -ENOMEM;
@@ -313,10 +296,8 @@ int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
NFS4_DEVICEID4_SIZE);
spin_lock(&flo->generic_hdr.plh_inode->i_lock);
- needfree = ff_layout_add_ds_error_locked(flo, dserr);
+ ff_layout_add_ds_error_locked(flo, dserr);
spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
- if (needfree)
- kfree(dserr);
return 0;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8e24d886d2c5..86faecf8f328 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -661,9 +661,9 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
trace_nfs_getattr_enter(inode);
/* Flush out writes to the server in order to update c/mtime. */
if (S_ISREG(inode->i_mode)) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = nfs_sync_inode(inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (err)
goto out;
}
@@ -1178,9 +1178,9 @@ static int __nfs_revalidate_mapping(struct inode *inode,
spin_unlock(&inode->i_lock);
trace_nfs_invalidate_mapping_enter(inode);
if (may_lock) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = nfs_invalidate_mapping(inode, mapping);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
} else
ret = nfs_invalidate_mapping(inode, mapping);
trace_nfs_invalidate_mapping_exit(inode, ret);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4e8cc942336c..9a547aa3ec8e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -484,7 +484,7 @@ void nfs_retry_commit(struct list_head *page_list,
struct nfs_commit_info *cinfo,
u32 ds_commit_idx);
void nfs_commitdata_release(struct nfs_commit_data *data);
-void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
+void nfs_request_add_commit_list(struct nfs_page *req,
struct nfs_commit_info *cinfo);
void nfs_request_add_commit_list_locked(struct nfs_page *req,
struct list_head *dst,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 6e8174930a48..bd25dc7077f7 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -101,13 +101,13 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
@@ -123,7 +123,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return -EOPNOTSUPP;
nfs_wb_all(inode);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == 0)
@@ -131,7 +131,7 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 26f9a23e2b25..57ca1c8039c1 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -141,11 +141,11 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret != 0)
break;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = nfs_file_fsync_commit(file, start, end, datasync);
if (!ret)
ret = pnfs_sync_inode(inode, !!datasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* If nfs_file_fsync_commit detected a server reboot, then
* resend all dirty pages that might have been covered by
@@ -219,13 +219,13 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
if (same_inode) {
- mutex_lock(&src_inode->i_mutex);
+ inode_lock(src_inode);
} else if (dst_inode < src_inode) {
- mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(dst_inode, I_MUTEX_PARENT);
+ inode_lock_nested(src_inode, I_MUTEX_CHILD);
} else {
- mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(src_inode, I_MUTEX_PARENT);
+ inode_lock_nested(dst_inode, I_MUTEX_CHILD);
}
/* flush all pending writes on both src and dst so that server
@@ -246,13 +246,13 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
out_unlock:
if (same_inode) {
- mutex_unlock(&src_inode->i_mutex);
+ inode_unlock(src_inode);
} else if (dst_inode < src_inode) {
- mutex_unlock(&src_inode->i_mutex);
- mutex_unlock(&dst_inode->i_mutex);
+ inode_unlock(src_inode);
+ inode_unlock(dst_inode);
} else {
- mutex_unlock(&dst_inode->i_mutex);
- mutex_unlock(&src_inode->i_mutex);
+ inode_unlock(dst_inode);
+ inode_unlock(src_inode);
}
out:
return ret;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ce43cd6d88c6..5754835a2886 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -830,11 +830,10 @@ EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);
* holding the nfs_page lock.
*/
void
-nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
- struct nfs_commit_info *cinfo)
+nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
{
spin_lock(cinfo->lock);
- nfs_request_add_commit_list_locked(req, dst, cinfo);
+ nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
spin_unlock(cinfo->lock);
nfs_mark_page_unstable(req->wb_page, cinfo);
}
@@ -892,7 +891,7 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
{
if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
return;
- nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
+ nfs_request_add_commit_list(req, cinfo);
}
static void
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 819ad812c71b..4cba7865f496 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -55,10 +55,10 @@ nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u
struct inode *inode = d_inode(resfh->fh_dentry);
int status;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
status = security_inode_setsecctx(resfh->fh_dentry,
label->data, label->len);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (status)
/*
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 79f0307a5ec8..dc8ebecf5618 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -192,7 +192,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
dir = nn->rec_file->f_path.dentry;
/* lock the parent */
- mutex_lock(&d_inode(dir)->i_mutex);
+ inode_lock(d_inode(dir));
dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
if (IS_ERR(dentry)) {
@@ -213,7 +213,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
out_put:
dput(dentry);
out_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
if (status == 0) {
if (nn->in_grace) {
crp = nfs4_client_to_reclaim(dname, nn);
@@ -286,7 +286,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
}
status = iterate_dir(nn->rec_file, &ctx.ctx);
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
if (!status) {
@@ -302,7 +302,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
list_del(&entry->list);
kfree(entry);
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
nfs4_reset_creds(original_cred);
list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
@@ -322,7 +322,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
dir = nn->rec_file->f_path.dentry;
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
dentry = lookup_one_len(name, dir, namlen);
if (IS_ERR(dentry)) {
status = PTR_ERR(dentry);
@@ -335,7 +335,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
out:
dput(dentry);
out_unlock:
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
return status;
}
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 0770bcb543c8..f84fe6bf9aee 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -288,7 +288,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
}
inode = d_inode(dentry);
- mutex_lock_nested(&inode->i_mutex, subclass);
+ inode_lock_nested(inode, subclass);
fill_pre_wcc(fhp);
fhp->fh_locked = true;
}
@@ -307,7 +307,7 @@ fh_unlock(struct svc_fh *fhp)
{
if (fhp->fh_locked) {
fill_post_wcc(fhp);
- mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
+ inode_unlock(d_inode(fhp->fh_dentry));
fhp->fh_locked = false;
}
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 6739077f17fe..5d2a57e4c03a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -493,9 +493,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
- mutex_lock(&d_inode(dentry)->i_mutex);
+ inode_lock(d_inode(dentry));
host_error = security_inode_setsecctx(dentry, label->data, label->len);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
return nfserrno(host_error);
}
#else
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 10b22527a617..21a1e2e0d92f 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -1003,7 +1003,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ret)
return ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
isize = i_size_read(inode);
@@ -1113,6 +1113,6 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ret == 1)
ret = 0;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index aba43811d6ef..e8fe24882b5b 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -158,7 +158,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
flags = nilfs_mask_flags(inode->i_mode, flags);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = NILFS_I(inode)->i_flags;
@@ -186,7 +186,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
nilfs_mark_inode_dirty(inode);
ret = nilfs_transaction_commit(inode->i_sb);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
}
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 9e38dafa3bc7..b2eff5816adc 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1509,7 +1509,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
err = filemap_write_and_wait_range(vi->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&vi->i_mutex);
+ inode_lock(vi);
BUG_ON(!S_ISDIR(vi->i_mode));
/* If the bitmap attribute inode is in memory sync it, too. */
@@ -1532,7 +1532,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
- mutex_unlock(&vi->i_mutex);
+ inode_unlock(vi);
return ret;
}
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 9d383e5eff0e..bed4d427dfae 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1944,14 +1944,14 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t written = 0;
ssize_t err;
- mutex_lock(&vi->i_mutex);
+ inode_lock(vi);
/* We can write back this queue in page reclaim. */
current->backing_dev_info = inode_to_bdi(vi);
err = ntfs_prepare_file_for_write(iocb, from);
if (iov_iter_count(from) && !err)
written = ntfs_perform_write(file, from, iocb->ki_pos);
current->backing_dev_info = NULL;
- mutex_unlock(&vi->i_mutex);
+ inode_unlock(vi);
if (likely(written > 0)) {
err = generic_write_sync(file, iocb->ki_pos, written);
if (err < 0)
@@ -1996,7 +1996,7 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
err = filemap_write_and_wait_range(vi->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&vi->i_mutex);
+ inode_lock(vi);
BUG_ON(S_ISDIR(vi->i_mode));
if (!datasync || !NInoNonResident(NTFS_I(vi)))
@@ -2015,7 +2015,7 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
- mutex_unlock(&vi->i_mutex);
+ inode_unlock(vi);
return ret;
}
diff --git a/fs/ntfs/quota.c b/fs/ntfs/quota.c
index d80e3315cab0..9793e68ba1dd 100644
--- a/fs/ntfs/quota.c
+++ b/fs/ntfs/quota.c
@@ -48,7 +48,7 @@ bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
ntfs_error(vol->sb, "Quota inodes are not open.");
return false;
}
- mutex_lock(&vol->quota_q_ino->i_mutex);
+ inode_lock(vol->quota_q_ino);
ictx = ntfs_index_ctx_get(NTFS_I(vol->quota_q_ino));
if (!ictx) {
ntfs_error(vol->sb, "Failed to get index context.");
@@ -98,7 +98,7 @@ bool ntfs_mark_quotas_out_of_date(ntfs_volume *vol)
ntfs_index_entry_mark_dirty(ictx);
set_done:
ntfs_index_ctx_put(ictx);
- mutex_unlock(&vol->quota_q_ino->i_mutex);
+ inode_unlock(vol->quota_q_ino);
/*
* We set the flag so we do not try to mark the quotas out of date
* again on remount.
@@ -110,7 +110,7 @@ done:
err_out:
if (ictx)
ntfs_index_ctx_put(ictx);
- mutex_unlock(&vol->quota_q_ino->i_mutex);
+ inode_unlock(vol->quota_q_ino);
return false;
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 2f77f8dfb861..1b38abdaa3ed 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -1284,10 +1284,10 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
* Find the inode number for the hibernation file by looking up the
* filename hiberfil.sys in the root directory.
*/
- mutex_lock(&vol->root_ino->i_mutex);
+ inode_lock(vol->root_ino);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->root_ino), hiberfil, 12,
&name);
- mutex_unlock(&vol->root_ino->i_mutex);
+ inode_unlock(vol->root_ino);
if (IS_ERR_MREF(mref)) {
ret = MREF_ERR(mref);
/* If the file does not exist, Windows is not hibernated. */
@@ -1377,10 +1377,10 @@ static bool load_and_init_quota(ntfs_volume *vol)
* Find the inode number for the quota file by looking up the filename
* $Quota in the extended system files directory $Extend.
*/
- mutex_lock(&vol->extend_ino->i_mutex);
+ inode_lock(vol->extend_ino);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), Quota, 6,
&name);
- mutex_unlock(&vol->extend_ino->i_mutex);
+ inode_unlock(vol->extend_ino);
if (IS_ERR_MREF(mref)) {
/*
* If the file does not exist, quotas are disabled and have
@@ -1460,10 +1460,10 @@ static bool load_and_init_usnjrnl(ntfs_volume *vol)
* Find the inode number for the transaction log file by looking up the
* filename $UsnJrnl in the extended system files directory $Extend.
*/
- mutex_lock(&vol->extend_ino->i_mutex);
+ inode_lock(vol->extend_ino);
mref = ntfs_lookup_inode_by_name(NTFS_I(vol->extend_ino), UsnJrnl, 8,
&name);
- mutex_unlock(&vol->extend_ino->i_mutex);
+ inode_unlock(vol->extend_ino);
if (IS_ERR_MREF(mref)) {
/*
* If the file does not exist, transaction logging is disabled,
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a3ded88718c9..d002579c6f2b 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5719,7 +5719,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
goto bail;
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
@@ -5776,7 +5776,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
out_commit:
ocfs2_commit_trans(osb, handle);
out:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
bail:
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
@@ -5832,7 +5832,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
- BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+ BUG_ON(inode_trylock(tl_inode));
start_cluster = ocfs2_blocks_to_clusters(osb->sb, start_blk);
@@ -5980,7 +5980,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
struct ocfs2_dinode *di;
struct ocfs2_truncate_log *tl;
- BUG_ON(mutex_trylock(&tl_inode->i_mutex));
+ BUG_ON(inode_trylock(tl_inode));
di = (struct ocfs2_dinode *) tl_bh->b_data;
@@ -6008,7 +6008,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
goto out;
}
- mutex_lock(&data_alloc_inode->i_mutex);
+ inode_lock(data_alloc_inode);
status = ocfs2_inode_lock(data_alloc_inode, &data_alloc_bh, 1);
if (status < 0) {
@@ -6035,7 +6035,7 @@ out_unlock:
ocfs2_inode_unlock(data_alloc_inode, 1);
out_mutex:
- mutex_unlock(&data_alloc_inode->i_mutex);
+ inode_unlock(data_alloc_inode);
iput(data_alloc_inode);
out:
@@ -6047,9 +6047,9 @@ int ocfs2_flush_truncate_log(struct ocfs2_super *osb)
int status;
struct inode *tl_inode = osb->osb_tl_inode;
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
status = __ocfs2_flush_truncate_log(osb);
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
return status;
}
@@ -6208,7 +6208,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
(unsigned long long)le64_to_cpu(tl_copy->i_blkno),
num_recs);
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
for(i = 0; i < num_recs; i++) {
if (ocfs2_truncate_log_needs_flush(osb)) {
status = __ocfs2_flush_truncate_log(osb);
@@ -6239,7 +6239,7 @@ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
}
bail_up:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
return status;
}
@@ -6346,7 +6346,7 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
goto out;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = ocfs2_inode_lock(inode, &di_bh, 1);
if (ret) {
@@ -6395,7 +6395,7 @@ out_unlock:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
out:
while(head) {
@@ -6439,7 +6439,7 @@ static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
handle_t *handle;
int ret = 0;
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
while (head) {
if (ocfs2_truncate_log_needs_flush(osb)) {
@@ -6471,7 +6471,7 @@ static int ocfs2_free_cached_clusters(struct ocfs2_super *osb,
}
}
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
while (head) {
/* Premature exit may have left some dangling items. */
@@ -7355,7 +7355,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 0);
if (ret < 0) {
@@ -7422,7 +7422,7 @@ out_unlock:
ocfs2_inode_unlock(main_bm_inode, 0);
brelse(main_bm_bh);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
return ret;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 7f604727f487..794fd1587f34 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2046,9 +2046,9 @@ static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
int ret = 0;
unsigned int truncated_clusters;
- mutex_lock(&osb->osb_tl_inode->i_mutex);
+ inode_lock(osb->osb_tl_inode);
truncated_clusters = osb->truncated_clusters;
- mutex_unlock(&osb->osb_tl_inode->i_mutex);
+ inode_unlock(osb->osb_tl_inode);
/*
* Check whether we can succeed in allocating if we free
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index ffecf89c8c1c..e1adf285fc31 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -4361,7 +4361,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
mlog_errno(ret);
goto out;
}
- mutex_lock(&dx_alloc_inode->i_mutex);
+ inode_lock(dx_alloc_inode);
ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1);
if (ret) {
@@ -4410,7 +4410,7 @@ out_unlock:
ocfs2_inode_unlock(dx_alloc_inode, 1);
out_mutex:
- mutex_unlock(&dx_alloc_inode->i_mutex);
+ inode_unlock(dx_alloc_inode);
brelse(dx_alloc_bh);
out:
iput(dx_alloc_inode);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d63127932509..7cb38fdca229 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1872,7 +1872,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* This prevents concurrent writes on other nodes
@@ -1991,7 +1991,7 @@ out_rw_unlock:
ocfs2_rw_unlock(inode, 1);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return ret;
}
@@ -2299,7 +2299,7 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
relock:
/*
@@ -2435,7 +2435,7 @@ out:
ocfs2_rw_unlock(inode, rw_level);
out_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (written)
ret = written;
@@ -2547,7 +2547,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file->f_mapping->host;
int ret = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case SEEK_SET:
@@ -2585,7 +2585,7 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret)
return ret;
return offset;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 97a563bab9a8..36294446d960 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -630,10 +630,10 @@ static int ocfs2_remove_inode(struct inode *inode,
goto bail;
}
- mutex_lock(&inode_alloc_inode->i_mutex);
+ inode_lock(inode_alloc_inode);
status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1);
if (status < 0) {
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
mlog_errno(status);
goto bail;
@@ -680,7 +680,7 @@ bail_commit:
ocfs2_commit_trans(osb, handle);
bail_unlock:
ocfs2_inode_unlock(inode_alloc_inode, 1);
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
brelse(inode_alloc_bh);
bail:
iput(inode_alloc_inode);
@@ -751,10 +751,10 @@ static int ocfs2_wipe_inode(struct inode *inode,
/* Lock the orphan dir. The lock will be held for the entire
* delete_inode operation. We do this now to avoid races with
* recovery completion on other nodes. */
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (status < 0) {
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
mlog_errno(status);
goto bail;
@@ -803,7 +803,7 @@ bail_unlock_dir:
return status;
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
brelse(orphan_dir_bh);
bail:
iput(orphan_dir_inode);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 16b0bb482ea7..4506ec5ec2ea 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -86,7 +86,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
unsigned oldflags;
int status;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
status = ocfs2_inode_lock(inode, &bh, 1);
if (status < 0) {
@@ -135,7 +135,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
bail_unlock:
ocfs2_inode_unlock(inode, 1);
bail:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
brelse(bh);
@@ -287,7 +287,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb,
struct ocfs2_dinode *dinode_alloc = NULL;
if (inode_alloc)
- mutex_lock(&inode_alloc->i_mutex);
+ inode_lock(inode_alloc);
if (o2info_coherent(&fi->ifi_req)) {
status = ocfs2_inode_lock(inode_alloc, &bh, 0);
@@ -317,7 +317,7 @@ bail:
ocfs2_inode_unlock(inode_alloc, 0);
if (inode_alloc)
- mutex_unlock(&inode_alloc->i_mutex);
+ inode_unlock(inode_alloc);
brelse(bh);
@@ -547,7 +547,7 @@ static int ocfs2_info_freefrag_scan_bitmap(struct ocfs2_super *osb,
struct ocfs2_dinode *gb_dinode = NULL;
if (gb_inode)
- mutex_lock(&gb_inode->i_mutex);
+ inode_lock(gb_inode);
if (o2info_coherent(&ffg->iff_req)) {
status = ocfs2_inode_lock(gb_inode, &bh, 0);
@@ -604,7 +604,7 @@ bail:
ocfs2_inode_unlock(gb_inode, 0);
if (gb_inode)
- mutex_unlock(&gb_inode->i_mutex);
+ inode_unlock(gb_inode);
iput(gb_inode);
brelse(bh);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 3772a2dbb980..61b833b721d8 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -2088,7 +2088,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
return status;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
if (status < 0) {
mlog_errno(status);
@@ -2106,7 +2106,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
out_cluster:
ocfs2_inode_unlock(orphan_dir_inode, 0);
out:
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
return status;
}
@@ -2196,7 +2196,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb,
oi->ip_next_orphan = NULL;
if (oi->ip_flags & OCFS2_INODE_DIO_ORPHAN_ENTRY) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = ocfs2_rw_lock(inode, 1);
if (ret < 0) {
mlog_errno(ret);
@@ -2235,7 +2235,7 @@ unlock_inode:
unlock_rw:
ocfs2_rw_unlock(inode, 1);
unlock_mutex:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/* clear dio flag in ocfs2_inode_info */
oi->ip_flags &= ~OCFS2_INODE_DIO_ORPHAN_ENTRY;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index e9c99e35f5ea..7d62c43a2c3e 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -414,7 +414,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (status < 0) {
@@ -468,7 +468,7 @@ out_unlock:
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
@@ -506,7 +506,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
goto bail;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
status = ocfs2_read_inode_block_full(inode, &alloc_bh,
OCFS2_BH_IGNORE_CACHE);
@@ -539,7 +539,7 @@ bail:
brelse(alloc_bh);
if (inode) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
}
@@ -571,7 +571,7 @@ int ocfs2_complete_local_alloc_recovery(struct ocfs2_super *osb,
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
status = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (status < 0) {
@@ -601,7 +601,7 @@ out_unlock:
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
brelse(main_bm_bh);
@@ -643,7 +643,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
goto bail;
}
- mutex_lock(&local_alloc_inode->i_mutex);
+ inode_lock(local_alloc_inode);
/*
* We must double check state and allocator bits because
@@ -709,7 +709,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
status = 0;
bail:
if (status < 0 && local_alloc_inode) {
- mutex_unlock(&local_alloc_inode->i_mutex);
+ inode_unlock(local_alloc_inode);
iput(local_alloc_inode);
}
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 124471d26a73..e3d05d9901a3 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -276,7 +276,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context,
* context->data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
*/
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
@@ -338,7 +338,7 @@ out_commit:
ocfs2_commit_trans(osb, handle);
out_unlock_mutex:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
if (context->data_ac) {
ocfs2_free_alloc_context(context->data_ac);
@@ -632,7 +632,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
goto out;
}
- mutex_lock(&gb_inode->i_mutex);
+ inode_lock(gb_inode);
ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
if (ret) {
@@ -640,7 +640,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
goto out_unlock_gb_mutex;
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
@@ -708,11 +708,11 @@ out_commit:
brelse(gd_bh);
out_unlock_tl_inode:
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
ocfs2_inode_unlock(gb_inode, 1);
out_unlock_gb_mutex:
- mutex_unlock(&gb_inode->i_mutex);
+ inode_unlock(gb_inode);
brelse(gb_bh);
iput(gb_inode);
@@ -905,7 +905,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
return -EROFS;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/*
* This prevents concurrent writes from other nodes
@@ -969,7 +969,7 @@ out_inode_unlock:
out_rw_unlock:
ocfs2_rw_unlock(inode, 1);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return status;
}
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index ab42c38031b1..6b3e87189a64 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1045,7 +1045,7 @@ leave:
if (orphan_dir) {
/* This was locked for us in ocfs2_prepare_orphan_dir() */
ocfs2_inode_unlock(orphan_dir, 1);
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
iput(orphan_dir);
}
@@ -1664,7 +1664,7 @@ bail:
if (orphan_dir) {
/* This was locked for us in ocfs2_prepare_orphan_dir() */
ocfs2_inode_unlock(orphan_dir, 1);
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
iput(orphan_dir);
}
@@ -2121,11 +2121,11 @@ static int ocfs2_lookup_lock_orphan_dir(struct ocfs2_super *osb,
return ret;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
ret = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (ret < 0) {
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
mlog_errno(ret);
@@ -2226,7 +2226,7 @@ out:
if (ret) {
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
}
@@ -2495,7 +2495,7 @@ out:
ocfs2_free_alloc_context(inode_ac);
/* Unroll orphan dir locking */
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
ocfs2_inode_unlock(orphan_dir, 1);
iput(orphan_dir);
}
@@ -2602,7 +2602,7 @@ leave:
if (orphan_dir) {
/* This was locked for us in ocfs2_prepare_orphan_dir() */
ocfs2_inode_unlock(orphan_dir, 1);
- mutex_unlock(&orphan_dir->i_mutex);
+ inode_unlock(orphan_dir);
iput(orphan_dir);
}
@@ -2689,7 +2689,7 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb,
bail_unlock_orphan:
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
ocfs2_free_dir_lookup_result(&orphan_insert);
@@ -2721,10 +2721,10 @@ int ocfs2_del_inode_from_orphan(struct ocfs2_super *osb,
goto bail;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (status < 0) {
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
mlog_errno(status);
goto bail;
@@ -2770,7 +2770,7 @@ bail_commit:
bail_unlock_orphan:
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
brelse(orphan_dir_bh);
iput(orphan_dir_inode);
@@ -2834,12 +2834,12 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
goto leave;
}
- mutex_lock(&orphan_dir_inode->i_mutex);
+ inode_lock(orphan_dir_inode);
status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
if (status < 0) {
mlog_errno(status);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
goto leave;
}
@@ -2901,7 +2901,7 @@ out_commit:
ocfs2_commit_trans(osb, handle);
orphan_unlock:
ocfs2_inode_unlock(orphan_dir_inode, 1);
- mutex_unlock(&orphan_dir_inode->i_mutex);
+ inode_unlock(orphan_dir_inode);
iput(orphan_dir_inode);
leave:
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index fde9ef18cff3..9c9dd30bc945 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -308,7 +308,7 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
WARN_ON(bh != oinfo->dqi_gqi_bh);
spin_unlock(&dq_data_lock);
if (ex) {
- mutex_lock(&oinfo->dqi_gqinode->i_mutex);
+ inode_lock(oinfo->dqi_gqinode);
down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
} else {
down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
@@ -320,7 +320,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex)
{
if (ex) {
up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
- mutex_unlock(&oinfo->dqi_gqinode->i_mutex);
+ inode_unlock(oinfo->dqi_gqinode);
} else {
up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem);
}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 252119860e6c..3eff031aaf26 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -807,7 +807,7 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
mlog_errno(ret);
goto out;
}
- mutex_lock(&alloc_inode->i_mutex);
+ inode_lock(alloc_inode);
ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
if (ret) {
@@ -867,7 +867,7 @@ out_unlock:
}
out_mutex:
if (alloc_inode) {
- mutex_unlock(&alloc_inode->i_mutex);
+ inode_unlock(alloc_inode);
iput(alloc_inode);
}
out:
@@ -4197,7 +4197,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out;
}
- mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(new_inode, I_MUTEX_CHILD);
ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1,
OI_LS_REFLINK_TARGET);
if (ret) {
@@ -4231,7 +4231,7 @@ inode_unlock:
ocfs2_inode_unlock(new_inode, 1);
brelse(new_bh);
out_unlock:
- mutex_unlock(&new_inode->i_mutex);
+ inode_unlock(new_inode);
out:
if (!ret) {
ret = filemap_fdatawait(inode->i_mapping);
@@ -4402,11 +4402,11 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
return error;
}
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = dquot_initialize(dir);
if (!error)
error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (!error)
fsnotify_create(dir, new_dentry);
return error;
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 79b8021302b3..576b9a04873f 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -301,7 +301,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (ret < 0) {
@@ -375,7 +375,7 @@ out_unlock:
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
@@ -486,7 +486,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
goto out;
}
- mutex_lock(&main_bm_inode->i_mutex);
+ inode_lock(main_bm_inode);
ret = ocfs2_inode_lock(main_bm_inode, &main_bm_bh, 1);
if (ret < 0) {
@@ -590,7 +590,7 @@ out_unlock:
ocfs2_inode_unlock(main_bm_inode, 1);
out_mutex:
- mutex_unlock(&main_bm_inode->i_mutex);
+ inode_unlock(main_bm_inode);
iput(main_bm_inode);
out:
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index fc6d25f6d444..2f19aeec5482 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -141,7 +141,7 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
if (ac->ac_which != OCFS2_AC_USE_LOCAL)
ocfs2_inode_unlock(inode, 1);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
iput(inode);
ac->ac_inode = NULL;
@@ -797,11 +797,11 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
return -EINVAL;
}
- mutex_lock(&alloc_inode->i_mutex);
+ inode_lock(alloc_inode);
status = ocfs2_inode_lock(alloc_inode, &bh, 1);
if (status < 0) {
- mutex_unlock(&alloc_inode->i_mutex);
+ inode_unlock(alloc_inode);
iput(alloc_inode);
mlog_errno(status);
@@ -2875,10 +2875,10 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
goto bail;
}
- mutex_lock(&inode_alloc_inode->i_mutex);
+ inode_lock(inode_alloc_inode);
status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
if (status < 0) {
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
iput(inode_alloc_inode);
mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
(u32)suballoc_slot, status);
@@ -2891,7 +2891,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
ocfs2_inode_unlock(inode_alloc_inode, 0);
- mutex_unlock(&inode_alloc_inode->i_mutex);
+ inode_unlock(inode_alloc_inode);
iput(inode_alloc_inode);
brelse(alloc_bh);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index f0e241ffd94f..7d3d979f57d9 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2524,7 +2524,7 @@ static int ocfs2_xattr_free_block(struct inode *inode,
mlog_errno(ret);
goto out;
}
- mutex_lock(&xb_alloc_inode->i_mutex);
+ inode_lock(xb_alloc_inode);
ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
if (ret < 0) {
@@ -2549,7 +2549,7 @@ out_unlock:
ocfs2_inode_unlock(xb_alloc_inode, 1);
brelse(xb_alloc_bh);
out_mutex:
- mutex_unlock(&xb_alloc_inode->i_mutex);
+ inode_unlock(xb_alloc_inode);
iput(xb_alloc_inode);
out:
brelse(blk_bh);
@@ -3619,17 +3619,17 @@ int ocfs2_xattr_set(struct inode *inode,
}
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
if (ret < 0) {
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
mlog_errno(ret);
goto cleanup;
}
}
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
&xbs, &ctxt, ref_meta, &credits);
@@ -5460,7 +5460,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
return ret;
}
- mutex_lock(&tl_inode->i_mutex);
+ inode_lock(tl_inode);
if (ocfs2_truncate_log_needs_flush(osb)) {
ret = __ocfs2_flush_truncate_log(osb);
@@ -5504,7 +5504,7 @@ out_commit:
out:
ocfs2_schedule_truncate_log_flush(osb, 1);
- mutex_unlock(&tl_inode->i_mutex);
+ inode_unlock(tl_inode);
if (meta_ac)
ocfs2_free_alloc_context(meta_ac);
diff --git a/fs/open.c b/fs/open.c
index b25b1542c530..55bdc75e2172 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -58,10 +58,10 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
if (ret)
newattrs.ia_valid |= ret | ATTR_FORCE;
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock(dentry->d_inode);
/* Note any delegations or leases have already been broken: */
ret = notify_change(dentry, &newattrs, NULL);
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(dentry->d_inode);
return ret;
}
@@ -510,7 +510,7 @@ static int chmod_common(struct path *path, umode_t mode)
if (error)
return error;
retry_deleg:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_path_chmod(path, mode);
if (error)
goto out_unlock;
@@ -518,7 +518,7 @@ retry_deleg:
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
error = notify_change(path->dentry, &newattrs, &delegated_inode);
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
@@ -593,11 +593,11 @@ retry_deleg:
if (!S_ISDIR(inode->i_mode))
newattrs.ia_valid |=
ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_path_chown(path, uid, gid);
if (!error)
error = notify_change(path->dentry, &newattrs, &delegated_inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index eff6319d5037..d894e7cd9a86 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -248,9 +248,9 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
if (err)
goto out_cleanup;
- mutex_lock(&newdentry->d_inode->i_mutex);
+ inode_lock(newdentry->d_inode);
err = ovl_set_attr(newdentry, stat);
- mutex_unlock(&newdentry->d_inode->i_mutex);
+ inode_unlock(newdentry->d_inode);
if (err)
goto out_cleanup;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 692ceda3bc21..ed95272d57a6 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -167,7 +167,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
struct dentry *newdentry;
int err;
- mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(udir, I_MUTEX_PARENT);
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
dentry->d_name.len);
err = PTR_ERR(newdentry);
@@ -185,7 +185,7 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
out_dput:
dput(newdentry);
out_unlock:
- mutex_unlock(&udir->i_mutex);
+ inode_unlock(udir);
return err;
}
@@ -258,9 +258,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
if (err)
goto out_cleanup;
- mutex_lock(&opaquedir->d_inode->i_mutex);
+ inode_lock(opaquedir->d_inode);
err = ovl_set_attr(opaquedir, &stat);
- mutex_unlock(&opaquedir->d_inode->i_mutex);
+ inode_unlock(opaquedir->d_inode);
if (err)
goto out_cleanup;
@@ -599,7 +599,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
struct dentry *upper = ovl_dentry_upper(dentry);
int err;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
err = -ESTALE;
if (upper->d_parent == upperdir) {
/* Don't let d_delete() think it can reset d_inode */
@@ -619,7 +619,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
* now.
*/
d_drop(dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return err;
}
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index bf996e574f3d..49e204560655 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -63,9 +63,9 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (!err) {
upperdentry = ovl_dentry_upper(dentry);
- mutex_lock(&upperdentry->d_inode->i_mutex);
+ inode_lock(upperdentry->d_inode);
err = notify_change(upperdentry, attr, NULL);
- mutex_unlock(&upperdentry->d_inode->i_mutex);
+ inode_unlock(upperdentry->d_inode);
}
ovl_drop_write(dentry);
out:
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index adcb1398c481..fdaf28f75e12 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -228,7 +228,7 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
dput(dentry);
}
}
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
}
revert_creds(old_cred);
put_cred(override_cred);
@@ -399,7 +399,7 @@ static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
loff_t res;
struct ovl_dir_file *od = file->private_data;
- mutex_lock(&file_inode(file)->i_mutex);
+ inode_lock(file_inode(file));
if (!file->f_pos)
ovl_dir_reset(file);
@@ -429,7 +429,7 @@ static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
res = offset;
}
out_unlock:
- mutex_unlock(&file_inode(file)->i_mutex);
+ inode_unlock(file_inode(file));
return res;
}
@@ -454,10 +454,10 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
ovl_path_upper(dentry, &upperpath);
realfile = ovl_path_open(&upperpath, O_RDONLY);
smp_mb__before_spinlock();
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (!od->upperfile) {
if (IS_ERR(realfile)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return PTR_ERR(realfile);
}
od->upperfile = realfile;
@@ -467,7 +467,7 @@ static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
fput(realfile);
realfile = od->upperfile;
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
}
@@ -479,9 +479,9 @@ static int ovl_dir_release(struct inode *inode, struct file *file)
struct ovl_dir_file *od = file->private_data;
if (od->cache) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ovl_cache_put(od, file->f_path.dentry);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
fput(od->realfile);
if (od->upperfile)
@@ -557,7 +557,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
{
struct ovl_cache_entry *p;
- mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
list_for_each_entry(p, list, l_node) {
struct dentry *dentry;
@@ -575,5 +575,5 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
ovl_cleanup(upper->d_inode, dentry);
dput(dentry);
}
- mutex_unlock(&upper->d_inode->i_mutex);
+ inode_unlock(upper->d_inode);
}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index d250604f985a..8d826bd56b26 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -229,7 +229,7 @@ void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
- WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex));
+ WARN_ON(!inode_is_locked(upperdentry->d_parent->d_inode));
WARN_ON(oe->__upperdentry);
BUG_ON(!upperdentry->d_inode);
/*
@@ -244,7 +244,7 @@ void ovl_dentry_version_inc(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
- WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+ WARN_ON(!inode_is_locked(dentry->d_inode));
oe->version++;
}
@@ -252,7 +252,7 @@ u64 ovl_dentry_version_get(struct dentry *dentry)
{
struct ovl_entry *oe = dentry->d_fsdata;
- WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
+ WARN_ON(!inode_is_locked(dentry->d_inode));
return oe->version;
}
@@ -375,9 +375,9 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir,
{
struct dentry *dentry;
- mutex_lock(&dir->d_inode->i_mutex);
+ inode_lock(dir->d_inode);
dentry = lookup_one_len(name->name, dir, name->len);
- mutex_unlock(&dir->d_inode->i_mutex);
+ inode_unlock(dir->d_inode);
if (IS_ERR(dentry)) {
if (PTR_ERR(dentry) == -ENOENT)
@@ -744,7 +744,7 @@ static struct dentry *ovl_workdir_create(struct vfsmount *mnt,
if (err)
return ERR_PTR(err);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
retry:
work = lookup_one_len(OVL_WORKDIR_NAME, dentry,
strlen(OVL_WORKDIR_NAME));
@@ -770,7 +770,7 @@ retry:
goto out_dput;
}
out_unlock:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
mnt_drop_write(mnt);
return work;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 92e6726f6e37..a939f5ed7f89 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -552,9 +552,9 @@ static int open_kcore(struct inode *inode, struct file *filp)
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_size_write(inode, proc_root_kcore->size);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 67e8db442cf0..b6a8d3529fea 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -50,7 +50,7 @@ int proc_setup_self(struct super_block *s)
struct pid_namespace *ns = s->s_fs_info;
struct dentry *self;
- mutex_lock(&root_inode->i_mutex);
+ inode_lock(root_inode);
self = d_alloc_name(s->s_root, "self");
if (self) {
struct inode *inode = new_inode_pseudo(s);
@@ -69,7 +69,7 @@ int proc_setup_self(struct super_block *s)
} else {
self = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&root_inode->i_mutex);
+ inode_unlock(root_inode);
if (IS_ERR(self)) {
pr_err("proc_fill_super: can't allocate /proc/self\n");
return PTR_ERR(self);
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index 9eacd59e0360..e58a31e8fb2a 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -52,7 +52,7 @@ int proc_setup_thread_self(struct super_block *s)
struct pid_namespace *ns = s->s_fs_info;
struct dentry *thread_self;
- mutex_lock(&root_inode->i_mutex);
+ inode_lock(root_inode);
thread_self = d_alloc_name(s->s_root, "thread-self");
if (thread_self) {
struct inode *inode = new_inode_pseudo(s);
@@ -71,7 +71,7 @@ int proc_setup_thread_self(struct super_block *s)
} else {
thread_self = ERR_PTR(-ENOMEM);
}
- mutex_unlock(&root_inode->i_mutex);
+ inode_unlock(root_inode);
if (IS_ERR(thread_self)) {
pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
return PTR_ERR(thread_self);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index d8c439d813ce..dc645b66cd79 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -377,7 +377,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
break;
}
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
dentry = d_alloc_name(root, name);
if (!dentry)
@@ -397,12 +397,12 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
list_add(&private->list, &allpstore);
spin_unlock_irqrestore(&allpstore_lock, flags);
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
return 0;
fail_lockedalloc:
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
kfree(private);
fail_alloc:
iput(inode);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index fbd70af98820..3c3b81bb6dfe 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -682,9 +682,9 @@ int dquot_quota_sync(struct super_block *sb, int type)
continue;
if (!sb_has_quota_active(sb, cnt))
continue;
- mutex_lock(&dqopt->files[cnt]->i_mutex);
+ inode_lock(dqopt->files[cnt]);
truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
- mutex_unlock(&dqopt->files[cnt]->i_mutex);
+ inode_unlock(dqopt->files[cnt]);
}
mutex_unlock(&dqopt->dqonoff_mutex);
@@ -2162,12 +2162,12 @@ int dquot_disable(struct super_block *sb, int type, unsigned int flags)
/* If quota was reenabled in the meantime, we have
* nothing to do */
if (!sb_has_quota_loaded(sb, cnt)) {
- mutex_lock(&toputinode[cnt]->i_mutex);
+ inode_lock(toputinode[cnt]);
toputinode[cnt]->i_flags &= ~(S_IMMUTABLE |
S_NOATIME | S_NOQUOTA);
truncate_inode_pages(&toputinode[cnt]->i_data,
0);
- mutex_unlock(&toputinode[cnt]->i_mutex);
+ inode_unlock(toputinode[cnt]);
mark_inode_dirty_sync(toputinode[cnt]);
}
mutex_unlock(&dqopt->dqonoff_mutex);
@@ -2258,11 +2258,11 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
/* We don't want quota and atime on quota files (deadlocks
* possible) Also nobody should write to the file - we use
* special IO operations which ignore the immutable bit. */
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
oldflags = inode->i_flags & (S_NOATIME | S_IMMUTABLE |
S_NOQUOTA);
inode->i_flags |= S_NOQUOTA | S_NOATIME | S_IMMUTABLE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
/*
* When S_NOQUOTA is set, remove dquot references as no more
* references can be added
@@ -2305,12 +2305,12 @@ out_file_init:
iput(inode);
out_lock:
if (oldflags != -1) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Set the flags back (in the case of accidental quotaon()
* on a wrong file we don't want to mess up the flags) */
inode->i_flags &= ~(S_NOATIME | S_NOQUOTA | S_IMMUTABLE);
inode->i_flags |= oldflags;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
mutex_unlock(&dqopt->dqonoff_mutex);
out_fmt:
@@ -2430,9 +2430,9 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
struct dentry *dentry;
int error;
- mutex_lock(&d_inode(sb->s_root)->i_mutex);
+ inode_lock(d_inode(sb->s_root));
dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
- mutex_unlock(&d_inode(sb->s_root)->i_mutex);
+ inode_unlock(d_inode(sb->s_root));
if (IS_ERR(dentry))
return PTR_ERR(dentry);
diff --git a/fs/read_write.c b/fs/read_write.c
index 06b07d5a08fe..324ec271cc4e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -238,7 +238,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
struct inode *inode = file_inode(file);
loff_t retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
switch (whence) {
case SEEK_END:
offset += i_size_read(inode);
@@ -283,7 +283,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int whence)
retval = offset;
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return retval;
}
EXPORT_SYMBOL(default_llseek);
@@ -1656,6 +1656,9 @@ next_file:
mnt_drop_write_file(dst_file);
next_loop:
fdput(dst_fd);
+
+ if (fatal_signal_pending(current))
+ goto out;
}
out:
diff --git a/fs/readdir.c b/fs/readdir.c
index ced679179cac..e69ef3b79787 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -44,7 +44,7 @@ int iterate_dir(struct file *file, struct dir_context *ctx)
fsnotify_access(file);
file_accessed(file);
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
return res;
}
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 4a024e2ceb9f..3abd4004184b 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -38,11 +38,11 @@ static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
reiserfs_write_lock(inode->i_sb);
err = reiserfs_commit_for_inode(inode);
reiserfs_write_unlock(inode->i_sb);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (err < 0)
return err;
return 0;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 96a1bcf33db4..9424a4ba93a9 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -158,7 +158,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
BUG_ON(!S_ISREG(inode->i_mode));
err = sync_mapping_buffers(inode->i_mapping);
reiserfs_write_lock(inode->i_sb);
@@ -166,7 +166,7 @@ static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
reiserfs_write_unlock(inode->i_sb);
if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (barrier_done < 0)
return barrier_done;
return (err < 0) ? -EIO : 0;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 6ec8a30a0911..036a1fc0a8c3 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -224,7 +224,7 @@ out_unlock:
page_cache_release(page);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
reiserfs_write_unlock(inode->i_sb);
return retval;
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index e5ddb4e5ea94..57e0b2310532 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -64,14 +64,14 @@
#ifdef CONFIG_REISERFS_FS_XATTR
static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
{
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
return dir->i_op->create(dir, dentry, mode, true);
}
#endif
static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
return dir->i_op->mkdir(dir, dentry, mode);
}
@@ -85,11 +85,11 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
error = dir->i_op->unlink(dir, dentry);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
if (!error)
d_delete(dentry);
@@ -100,13 +100,13 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
- BUG_ON(!mutex_is_locked(&dir->i_mutex));
+ BUG_ON(!inode_is_locked(dir));
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD);
error = dir->i_op->rmdir(dir, dentry);
if (!error)
d_inode(dentry)->i_flags |= S_DEAD;
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
if (!error)
d_delete(dentry);
@@ -123,7 +123,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
if (d_really_is_negative(privroot))
return ERR_PTR(-ENODATA);
- mutex_lock_nested(&d_inode(privroot)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(privroot), I_MUTEX_XATTR);
xaroot = dget(REISERFS_SB(sb)->xattr_root);
if (!xaroot)
@@ -139,7 +139,7 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
}
}
- mutex_unlock(&d_inode(privroot)->i_mutex);
+ inode_unlock(d_inode(privroot));
return xaroot;
}
@@ -156,7 +156,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
le32_to_cpu(INODE_PKEY(inode)->k_objectid),
inode->i_generation);
- mutex_lock_nested(&d_inode(xaroot)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(xaroot), I_MUTEX_XATTR);
xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
if (!IS_ERR(xadir) && d_really_is_negative(xadir)) {
@@ -170,7 +170,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
}
}
- mutex_unlock(&d_inode(xaroot)->i_mutex);
+ inode_unlock(d_inode(xaroot));
dput(xaroot);
return xadir;
}
@@ -195,7 +195,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
container_of(ctx, struct reiserfs_dentry_buf, ctx);
struct dentry *dentry;
- WARN_ON_ONCE(!mutex_is_locked(&d_inode(dbuf->xadir)->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(d_inode(dbuf->xadir)));
if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
return -ENOSPC;
@@ -254,7 +254,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
goto out_dir;
}
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
buf.xadir = dir;
while (1) {
@@ -276,7 +276,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
break;
buf.count = 0;
}
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
cleanup_dentry_buf(&buf);
@@ -298,13 +298,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
if (!err) {
int jerror;
- mutex_lock_nested(&d_inode(dir->d_parent)->i_mutex,
+ inode_lock_nested(d_inode(dir->d_parent),
I_MUTEX_XATTR);
err = action(dir, data);
reiserfs_write_lock(inode->i_sb);
jerror = journal_end(&th);
reiserfs_write_unlock(inode->i_sb);
- mutex_unlock(&d_inode(dir->d_parent)->i_mutex);
+ inode_unlock(d_inode(dir->d_parent));
err = jerror ?: err;
}
}
@@ -384,7 +384,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
if (IS_ERR(xadir))
return ERR_CAST(xadir);
- mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
xafile = lookup_one_len(name, xadir, strlen(name));
if (IS_ERR(xafile)) {
err = PTR_ERR(xafile);
@@ -404,7 +404,7 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
if (err)
dput(xafile);
out:
- mutex_unlock(&d_inode(xadir)->i_mutex);
+ inode_unlock(d_inode(xadir));
dput(xadir);
if (err)
return ERR_PTR(err);
@@ -469,7 +469,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
if (IS_ERR(xadir))
return PTR_ERR(xadir);
- mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(xadir), I_MUTEX_XATTR);
dentry = lookup_one_len(name, xadir, strlen(name));
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
@@ -483,7 +483,7 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
dput(dentry);
out_dput:
- mutex_unlock(&d_inode(xadir)->i_mutex);
+ inode_unlock(d_inode(xadir));
dput(xadir);
return err;
}
@@ -580,11 +580,11 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
.ia_valid = ATTR_SIZE | ATTR_CTIME,
};
- mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR);
inode_dio_wait(d_inode(dentry));
err = reiserfs_setattr(dentry, &newattrs);
- mutex_unlock(&d_inode(dentry)->i_mutex);
+ inode_unlock(d_inode(dentry));
} else
update_ctime(inode);
out_unlock:
@@ -888,9 +888,9 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
goto out;
}
- mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+ inode_lock_nested(d_inode(dir), I_MUTEX_XATTR);
err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
- mutex_unlock(&d_inode(dir)->i_mutex);
+ inode_unlock(d_inode(dir));
if (!err)
err = buf.pos;
@@ -905,7 +905,7 @@ static int create_privroot(struct dentry *dentry)
int err;
struct inode *inode = d_inode(dentry->d_parent);
- WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(inode));
err = xattr_mkdir(inode, dentry, 0700);
if (err || d_really_is_negative(dentry)) {
@@ -995,7 +995,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
int err = 0;
/* If we don't have the privroot located yet - go find it */
- mutex_lock(&d_inode(s->s_root)->i_mutex);
+ inode_lock(d_inode(s->s_root));
dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) {
@@ -1005,7 +1005,7 @@ int reiserfs_lookup_privroot(struct super_block *s)
d_inode(dentry)->i_flags |= S_PRIVATE;
} else
err = PTR_ERR(dentry);
- mutex_unlock(&d_inode(s->s_root)->i_mutex);
+ inode_unlock(d_inode(s->s_root));
return err;
}
@@ -1025,14 +1025,14 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
goto error;
if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) {
- mutex_lock(&d_inode(s->s_root)->i_mutex);
+ inode_lock(d_inode(s->s_root));
err = create_privroot(REISERFS_SB(s)->priv_root);
- mutex_unlock(&d_inode(s->s_root)->i_mutex);
+ inode_unlock(d_inode(s->s_root));
}
if (d_really_is_positive(privroot)) {
s->s_xattr = reiserfs_xattr_handlers;
- mutex_lock(&d_inode(privroot)->i_mutex);
+ inode_lock(d_inode(privroot));
if (!REISERFS_SB(s)->xattr_root) {
struct dentry *dentry;
@@ -1043,7 +1043,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
else
err = PTR_ERR(dentry);
}
- mutex_unlock(&d_inode(privroot)->i_mutex);
+ inode_unlock(d_inode(privroot));
}
error:
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index c66f2423e1f5..4a0e48f92104 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -84,9 +84,9 @@ static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umo
* the files within the tracefs system. It is up to the individual
* mkdir routine to handle races.
*/
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
ret = tracefs_ops.mkdir(name);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
kfree(name);
@@ -109,13 +109,13 @@ static int tracefs_syscall_rmdir(struct inode *inode, struct dentry *dentry)
* This time we need to unlock not only the parent (inode) but
* also the directory that is being deleted.
*/
- mutex_unlock(&inode->i_mutex);
- mutex_unlock(&dentry->d_inode->i_mutex);
+ inode_unlock(inode);
+ inode_unlock(dentry->d_inode);
ret = tracefs_ops.rmdir(name);
- mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
- mutex_lock(&dentry->d_inode->i_mutex);
+ inode_lock_nested(inode, I_MUTEX_PARENT);
+ inode_lock(dentry->d_inode);
kfree(name);
@@ -334,7 +334,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
if (!parent)
parent = tracefs_mount->mnt_root;
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(dentry) && dentry->d_inode) {
dput(dentry);
@@ -342,7 +342,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
}
if (IS_ERR(dentry)) {
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
}
@@ -351,7 +351,7 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
static struct dentry *failed_creating(struct dentry *dentry)
{
- mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ inode_unlock(dentry->d_parent->d_inode);
dput(dentry);
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
return NULL;
@@ -359,7 +359,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
static struct dentry *end_creating(struct dentry *dentry)
{
- mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+ inode_unlock(dentry->d_parent->d_inode);
return dentry;
}
@@ -544,9 +544,9 @@ void tracefs_remove(struct dentry *dentry)
if (!parent || !parent->d_inode)
return;
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
ret = __tracefs_remove(dentry, parent);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
if (!ret)
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
}
@@ -572,7 +572,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
parent = dentry;
down:
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
loop:
/*
* The parent->d_subdirs is protected by the d_lock. Outside that
@@ -587,7 +587,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
/* perhaps simple_empty(child) makes more sense */
if (!list_empty(&child->d_subdirs)) {
spin_unlock(&parent->d_lock);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
parent = child;
goto down;
}
@@ -608,10 +608,10 @@ void tracefs_remove_recursive(struct dentry *dentry)
}
spin_unlock(&parent->d_lock);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
child = parent;
parent = parent->d_parent;
- mutex_lock(&parent->d_inode->i_mutex);
+ inode_lock(parent->d_inode);
if (child != dentry)
/* go up */
@@ -619,7 +619,7 @@ void tracefs_remove_recursive(struct dentry *dentry)
if (!__tracefs_remove(child, parent))
simple_release_fs(&tracefs_mount, &tracefs_mount_count);
- mutex_unlock(&parent->d_inode->i_mutex);
+ inode_unlock(parent->d_inode);
}
/**
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e49bd2808bf3..795992a8321e 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -515,8 +515,8 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
dbg_gen("dent '%pd' to ino %lu (nlink %d) in dir ino %lu",
dentry, inode->i_ino,
inode->i_nlink, dir->i_ino);
- ubifs_assert(mutex_is_locked(&dir->i_mutex));
- ubifs_assert(mutex_is_locked(&inode->i_mutex));
+ ubifs_assert(inode_is_locked(dir));
+ ubifs_assert(inode_is_locked(inode));
err = dbg_check_synced_i_size(c, inode);
if (err)
@@ -572,8 +572,8 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
dbg_gen("dent '%pd' from ino %lu (nlink %d) in dir ino %lu",
dentry, inode->i_ino,
inode->i_nlink, dir->i_ino);
- ubifs_assert(mutex_is_locked(&dir->i_mutex));
- ubifs_assert(mutex_is_locked(&inode->i_mutex));
+ ubifs_assert(inode_is_locked(dir));
+ ubifs_assert(inode_is_locked(inode));
err = dbg_check_synced_i_size(c, inode);
if (err)
return err;
@@ -661,8 +661,8 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
dbg_gen("directory '%pd', ino %lu in dir ino %lu", dentry,
inode->i_ino, dir->i_ino);
- ubifs_assert(mutex_is_locked(&dir->i_mutex));
- ubifs_assert(mutex_is_locked(&inode->i_mutex));
+ ubifs_assert(inode_is_locked(dir));
+ ubifs_assert(inode_is_locked(inode));
err = check_dir_empty(c, d_inode(dentry));
if (err)
return err;
@@ -996,10 +996,10 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu",
old_dentry, old_inode->i_ino, old_dir->i_ino,
new_dentry, new_dir->i_ino);
- ubifs_assert(mutex_is_locked(&old_dir->i_mutex));
- ubifs_assert(mutex_is_locked(&new_dir->i_mutex));
+ ubifs_assert(inode_is_locked(old_dir));
+ ubifs_assert(inode_is_locked(new_dir));
if (unlink)
- ubifs_assert(mutex_is_locked(&new_inode->i_mutex));
+ ubifs_assert(inode_is_locked(new_inode));
if (unlink && is_dir) {
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index eff62801acbf..065c88f8e4b8 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1317,7 +1317,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
return err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* Synchronize the inode unless this is a 'datasync()' call. */
if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
@@ -1332,7 +1332,7 @@ int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
*/
err = ubifs_sync_wbufs_by_inode(c, inode);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err;
}
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index e53292d0c21b..c7f4d434d098 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -313,7 +313,7 @@ static int setxattr(struct inode *host, const char *name, const void *value,
union ubifs_key key;
int err, type;
- ubifs_assert(mutex_is_locked(&host->i_mutex));
+ ubifs_assert(inode_is_locked(host));
if (size > UBIFS_MAX_INO_DATA)
return -ERANGE;
@@ -550,7 +550,7 @@ int ubifs_removexattr(struct dentry *dentry, const char *name)
dbg_gen("xattr '%s', ino %lu ('%pd')", name,
host->i_ino, dentry);
- ubifs_assert(mutex_is_locked(&host->i_mutex));
+ ubifs_assert(inode_is_locked(host));
err = check_namespace(&nm);
if (err < 0)
diff --git a/fs/udf/file.c b/fs/udf/file.c
index bddf3d071dae..1af98963d860 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -122,7 +122,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct udf_inode_info *iinfo = UDF_I(inode);
int err;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = generic_write_checks(iocb, from);
if (retval <= 0)
@@ -136,7 +136,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
(udf_file_entry_alloc_offset(inode) + end)) {
err = udf_expand_file_adinicb(inode);
if (err) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
udf_debug("udf_expand_adinicb: err=%d\n", err);
return err;
}
@@ -149,7 +149,7 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
retval = __generic_file_write_iter(iocb, from);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (retval > 0) {
mark_inode_dirty(inode);
@@ -223,12 +223,12 @@ static int udf_release_file(struct inode *inode, struct file *filp)
* Grab i_mutex to avoid races with writes changing i_size
* while we are running.
*/
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
down_write(&UDF_I(inode)->i_data_sem);
udf_discard_prealloc(inode);
udf_truncate_tail_extent(inode);
up_write(&UDF_I(inode)->i_data_sem);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
return 0;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 87dc16d15572..166d3ed32c39 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -262,7 +262,7 @@ int udf_expand_file_adinicb(struct inode *inode)
.nr_to_write = 1,
};
- WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
+ WARN_ON_ONCE(!inode_is_locked(inode));
if (!iinfo->i_lenAlloc) {
if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 0fbb4c7c72e8..a522c15a0bfd 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -279,17 +279,12 @@ static void udf_sb_free_bitmap(struct udf_bitmap *bitmap)
{
int i;
int nr_groups = bitmap->s_nr_groups;
- int size = sizeof(struct udf_bitmap) + (sizeof(struct buffer_head *) *
- nr_groups);
for (i = 0; i < nr_groups; i++)
if (bitmap->s_block_bitmap[i])
brelse(bitmap->s_block_bitmap[i]);
- if (size <= PAGE_SIZE)
- kfree(bitmap);
- else
- vfree(bitmap);
+ kvfree(bitmap);
}
static void udf_free_partition(struct udf_part_map *map)
diff --git a/fs/utimes.c b/fs/utimes.c
index aa138d64560a..85c40f4f373d 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -103,9 +103,9 @@ static int utimes_common(struct path *path, struct timespec *times)
}
}
retry_deleg:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = notify_change(path->dentry, &newattrs, &delegated_inode);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
diff --git a/fs/xattr.c b/fs/xattr.c
index d5dd6c8b82a7..07d0e47f6a7f 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -129,7 +129,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_inode_setxattr(dentry, name, value, size, flags);
if (error)
goto out;
@@ -137,7 +137,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
EXPORT_SYMBOL_GPL(vfs_setxattr);
@@ -277,7 +277,7 @@ vfs_removexattr(struct dentry *dentry, const char *name)
if (error)
return error;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
error = security_inode_removexattr(dentry, name);
if (error)
goto out;
@@ -290,7 +290,7 @@ vfs_removexattr(struct dentry *dentry, const char *name)
}
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
EXPORT_SYMBOL_GPL(vfs_removexattr);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ebe9b8290a70..52883ac3cf84 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -55,7 +55,7 @@ xfs_rw_ilock(
int type)
{
if (type & XFS_IOLOCK_EXCL)
- mutex_lock(&VFS_I(ip)->i_mutex);
+ inode_lock(VFS_I(ip));
xfs_ilock(ip, type);
}
@@ -66,7 +66,7 @@ xfs_rw_iunlock(
{
xfs_iunlock(ip, type);
if (type & XFS_IOLOCK_EXCL)
- mutex_unlock(&VFS_I(ip)->i_mutex);
+ inode_unlock(VFS_I(ip));
}
static inline void
@@ -76,7 +76,7 @@ xfs_rw_ilock_demote(
{
xfs_ilock_demote(ip, type);
if (type & XFS_IOLOCK_EXCL)
- mutex_unlock(&VFS_I(ip)->i_mutex);
+ inode_unlock(VFS_I(ip));
}
/*
@@ -1610,9 +1610,8 @@ xfs_filemap_pmd_fault(
/*
* pfn_mkwrite was originally inteneded to ensure we capture time stamp
* updates on write faults. In reality, it's need to serialise against
- * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
- * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
- * barrier in place.
+ * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED
+ * to ensure we serialise the fault barrier in place.
*/
static int
xfs_filemap_pfn_mkwrite(
@@ -1635,6 +1634,8 @@ xfs_filemap_pfn_mkwrite(
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
+ else if (IS_DAX(inode))
+ ret = dax_pfn_mkwrite(vma, vmf);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index dc6221942b85..ade236e90bb3 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -42,11 +42,11 @@ xfs_break_layouts(
while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
xfs_iunlock(ip, *iolock);
if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
error = break_layout(inode, true);
*iolock = XFS_IOLOCK_EXCL;
if (with_imutex)
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
xfs_ilock(ip, *iolock);
}
diff --git a/include/drm/drm_atomic_helper.h b/include/drm/drm_atomic_helper.h
index 89d008dc08e2..fe5efada9d68 100644
--- a/include/drm/drm_atomic_helper.h
+++ b/include/drm/drm_atomic_helper.h
@@ -42,6 +42,10 @@ int drm_atomic_helper_commit(struct drm_device *dev,
struct drm_atomic_state *state,
bool async);
+bool drm_atomic_helper_framebuffer_changed(struct drm_device *dev,
+ struct drm_atomic_state *old_state,
+ struct drm_crtc *crtc);
+
void drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
struct drm_atomic_state *old_state);
diff --git a/include/linux/bcm963xx_nvram.h b/include/linux/bcm963xx_nvram.h
new file mode 100644
index 000000000000..290c231b8cf1
--- /dev/null
+++ b/include/linux/bcm963xx_nvram.h
@@ -0,0 +1,112 @@
+#ifndef __LINUX_BCM963XX_NVRAM_H__
+#define __LINUX_BCM963XX_NVRAM_H__
+
+#include <linux/crc32.h>
+#include <linux/if_ether.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+/*
+ * Broadcom BCM963xx SoC board nvram data structure.
+ *
+ * The nvram structure varies in size depending on the SoC board version. Use
+ * the appropriate minimum BCM963XX_NVRAM_*_SIZE define for the information
+ * you need instead of sizeof(struct bcm963xx_nvram) as this may change.
+ */
+
+#define BCM963XX_NVRAM_V4_SIZE 300
+#define BCM963XX_NVRAM_V5_SIZE (1 * SZ_1K)
+
+#define BCM963XX_DEFAULT_PSI_SIZE 64
+
+enum bcm963xx_nvram_nand_part {
+ BCM963XX_NVRAM_NAND_PART_BOOT = 0,
+ BCM963XX_NVRAM_NAND_PART_ROOTFS_1,
+ BCM963XX_NVRAM_NAND_PART_ROOTFS_2,
+ BCM963XX_NVRAM_NAND_PART_DATA,
+ BCM963XX_NVRAM_NAND_PART_BBT,
+
+ __BCM963XX_NVRAM_NAND_NR_PARTS
+};
+
+struct bcm963xx_nvram {
+ u32 version;
+ char bootline[256];
+ char name[16];
+ u32 main_tp_number;
+ u32 psi_size;
+ u32 mac_addr_count;
+ u8 mac_addr_base[ETH_ALEN];
+ u8 __reserved1[2];
+ u32 checksum_v4;
+
+ u8 __reserved2[292];
+ u32 nand_part_offset[__BCM963XX_NVRAM_NAND_NR_PARTS];
+ u32 nand_part_size[__BCM963XX_NVRAM_NAND_NR_PARTS];
+ u8 __reserved3[388];
+ u32 checksum_v5;
+};
+
+#define BCM963XX_NVRAM_NAND_PART_OFFSET(nvram, part) \
+ bcm963xx_nvram_nand_part_offset(nvram, BCM963XX_NVRAM_NAND_PART_ ##part)
+
+static inline u64 __pure bcm963xx_nvram_nand_part_offset(
+ const struct bcm963xx_nvram *nvram,
+ enum bcm963xx_nvram_nand_part part)
+{
+ return nvram->nand_part_offset[part] * SZ_1K;
+}
+
+#define BCM963XX_NVRAM_NAND_PART_SIZE(nvram, part) \
+ bcm963xx_nvram_nand_part_size(nvram, BCM963XX_NVRAM_NAND_PART_ ##part)
+
+static inline u64 __pure bcm963xx_nvram_nand_part_size(
+ const struct bcm963xx_nvram *nvram,
+ enum bcm963xx_nvram_nand_part part)
+{
+ return nvram->nand_part_size[part] * SZ_1K;
+}
+
+/*
+ * bcm963xx_nvram_checksum - Verify nvram checksum
+ *
+ * @nvram: pointer to full size nvram data structure
+ * @expected_out: optional pointer to store expected checksum value
+ * @actual_out: optional pointer to store actual checksum value
+ *
+ * Return: 0 if the checksum is valid, otherwise -EINVAL
+ */
+static int __maybe_unused bcm963xx_nvram_checksum(
+ const struct bcm963xx_nvram *nvram,
+ u32 *expected_out, u32 *actual_out)
+{
+ u32 expected, actual;
+ size_t len;
+
+ if (nvram->version <= 4) {
+ expected = nvram->checksum_v4;
+ len = BCM963XX_NVRAM_V4_SIZE - sizeof(u32);
+ } else {
+ expected = nvram->checksum_v5;
+ len = BCM963XX_NVRAM_V5_SIZE - sizeof(u32);
+ }
+
+ /*
+ * Calculate the CRC32 value for the nvram with a checksum value
+ * of 0 without modifying or copying the nvram by combining:
+ * - The CRC32 of the nvram without the checksum value
+ * - The CRC32 of a zero checksum value (which is also 0)
+ */
+ actual = crc32_le_combine(
+ crc32_le(~0, (u8 *)nvram, len), 0, sizeof(u32));
+
+ if (expected_out)
+ *expected_out = expected;
+
+ if (actual_out)
+ *actual_out = actual;
+
+ return expected == actual ? 0 : -EINVAL;
+};
+
+#endif /* __LINUX_BCM963XX_NVRAM_H__ */
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h b/include/linux/bcm963xx_tag.h
index 1e6b587f62c9..161c7b37a77b 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm963xx_tag.h
+++ b/include/linux/bcm963xx_tag.h
@@ -1,5 +1,7 @@
-#ifndef __BCM963XX_TAG_H
-#define __BCM963XX_TAG_H
+#ifndef __LINUX_BCM963XX_TAG_H__
+#define __LINUX_BCM963XX_TAG_H__
+
+#include <linux/types.h>
#define TAGVER_LEN 4 /* Length of Tag Version */
#define TAGLAYOUT_LEN 4 /* Length of FlashLayoutVer */
@@ -10,8 +12,7 @@
#define CHIPID_LEN 6 /* Chip Id Length */
#define IMAGE_LEN 10 /* Length of Length Field */
#define ADDRESS_LEN 12 /* Length of Address field */
-#define DUALFLAG_LEN 2 /* Dual Image flag Length */
-#define INACTIVEFLAG_LEN 2 /* Inactie Flag Length */
+#define IMAGE_SEQUENCE_LEN 4 /* Image sequence Length */
#define RSASIG_LEN 20 /* Length of RSA Signature in tag */
#define TAGINFO1_LEN 30 /* Length of vendor information field1 in tag */
#define FLASHLAYOUTVER_LEN 4 /* Length of Flash Layout Version String tag */
@@ -26,6 +27,11 @@
"DWV-S0", \
}
+/* Extended flash address, needs to be subtracted
+ * from bcm_tag flash image offsets.
+ */
+#define BCM963XX_EXTENDED_SIZE 0xBFC00000
+
/*
* The broadcom firmware assumes the rootfs starts the image,
* therefore uses the rootfs start (flash_image_address)
@@ -65,10 +71,10 @@ struct bcm_tag {
char kernel_address[ADDRESS_LEN];
/* 128-137: Size of kernel */
char kernel_length[IMAGE_LEN];
- /* 138-139: Unused at the moment */
- char dual_image[DUALFLAG_LEN];
- /* 140-141: Unused at the moment */
- char inactive_flag[INACTIVEFLAG_LEN];
+ /* 138-141: Image sequence number
+ * (to be incremented when flashed with a new image)
+ */
+ char image_sequence[IMAGE_SEQUENCE_LEN];
/* 142-161: RSA Signature (not used; some vendors may use this) */
char rsa_signature[RSASIG_LEN];
/* 162-191: Compilation and related information (not used in OpenWrt) */
@@ -93,4 +99,4 @@ struct bcm_tag {
char reserved2[16];
};
-#endif /* __BCM63XX_TAG_H */
+#endif /* __LINUX_BCM63XX_TAG_H__ */
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
deleted file mode 100644
index 77ae77c0b704..000000000000
--- a/include/linux/blk-iopoll.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef BLK_IOPOLL_H
-#define BLK_IOPOLL_H
-
-struct blk_iopoll;
-typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
-
-struct blk_iopoll {
- struct list_head list;
- unsigned long state;
- unsigned long data;
- int weight;
- int max;
- blk_iopoll_fn *poll;
-};
-
-enum {
- IOPOLL_F_SCHED = 0,
- IOPOLL_F_DISABLE = 1,
-};
-
-/*
- * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
- * that we were the first to acquire this iop for scheduling. If this iop
- * is currently disabled, return "failure".
- */
-static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
-{
- if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
- return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
-
- return 1;
-}
-
-static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
-{
- return test_bit(IOPOLL_F_DISABLE, &iop->state);
-}
-
-extern void blk_iopoll_sched(struct blk_iopoll *);
-extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
-extern void blk_iopoll_complete(struct blk_iopoll *);
-extern void __blk_iopoll_complete(struct blk_iopoll *);
-extern void blk_iopoll_enable(struct blk_iopoll *);
-extern void blk_iopoll_disable(struct blk_iopoll *);
-
-#endif
diff --git a/include/linux/ceph/ceph_frag.h b/include/linux/ceph/ceph_frag.h
index 5babb8e95352..b827e066e55a 100644
--- a/include/linux/ceph/ceph_frag.h
+++ b/include/linux/ceph/ceph_frag.h
@@ -40,46 +40,11 @@ static inline __u32 ceph_frag_mask_shift(__u32 f)
return 24 - ceph_frag_bits(f);
}
-static inline int ceph_frag_contains_value(__u32 f, __u32 v)
+static inline bool ceph_frag_contains_value(__u32 f, __u32 v)
{
return (v & ceph_frag_mask(f)) == ceph_frag_value(f);
}
-static inline int ceph_frag_contains_frag(__u32 f, __u32 sub)
-{
- /* is sub as specific as us, and contained by us? */
- return ceph_frag_bits(sub) >= ceph_frag_bits(f) &&
- (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f);
-}
-static inline __u32 ceph_frag_parent(__u32 f)
-{
- return ceph_frag_make(ceph_frag_bits(f) - 1,
- ceph_frag_value(f) & (ceph_frag_mask(f) << 1));
-}
-static inline int ceph_frag_is_left_child(__u32 f)
-{
- return ceph_frag_bits(f) > 0 &&
- (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0;
-}
-static inline int ceph_frag_is_right_child(__u32 f)
-{
- return ceph_frag_bits(f) > 0 &&
- (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1;
-}
-static inline __u32 ceph_frag_sibling(__u32 f)
-{
- return ceph_frag_make(ceph_frag_bits(f),
- ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f)));
-}
-static inline __u32 ceph_frag_left_child(__u32 f)
-{
- return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f));
-}
-static inline __u32 ceph_frag_right_child(__u32 f)
-{
- return ceph_frag_make(ceph_frag_bits(f)+1,
- ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f))));
-}
static inline __u32 ceph_frag_make_child(__u32 f, int by, int i)
{
int newbits = ceph_frag_bits(f) + by;
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 71b1d6cdcb5d..8dbd7879fdc6 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -220,6 +220,7 @@ struct ceph_connection {
struct ceph_entity_addr actual_peer_addr;
/* message out temps */
+ struct ceph_msg_header out_hdr;
struct ceph_msg *out_msg; /* sending message (== tail of
out_sent) */
bool out_msg_done;
@@ -229,7 +230,6 @@ struct ceph_connection {
int out_kvec_left; /* kvec's left in out_kvec */
int out_skip; /* skip this many bytes */
int out_kvec_bytes; /* total bytes left */
- bool out_kvec_is_msg; /* kvec refers to out_msg */
int out_more; /* there is more data after the kvecs */
__le64 out_temp_ack; /* for writing an ack */
struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
diff --git a/include/linux/dax.h b/include/linux/dax.h
index b415e521528d..8204c3dc3800 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -36,4 +36,11 @@ static inline bool vma_is_dax(struct vm_area_struct *vma)
{
return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
}
+
+static inline bool dax_mapping(struct address_space *mapping)
+{
+ return mapping->host && IS_DAX(mapping->host);
+}
+int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+ loff_t end);
#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index eb73d74ed992..1a2046275cdf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -433,7 +433,8 @@ struct address_space {
struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
unsigned long nrpages; /* number of total pages */
- unsigned long nrshadows; /* number of shadow entries */
+ /* number of shadow or DAX exceptional entries */
+ unsigned long nrexceptional;
pgoff_t writeback_index;/* writeback starts here */
const struct address_space_operations *a_ops; /* methods */
unsigned long flags; /* error bits/gfp mask */
@@ -714,6 +715,31 @@ enum inode_i_mutex_lock_class
I_MUTEX_PARENT2,
};
+static inline void inode_lock(struct inode *inode)
+{
+ mutex_lock(&inode->i_mutex);
+}
+
+static inline void inode_unlock(struct inode *inode)
+{
+ mutex_unlock(&inode->i_mutex);
+}
+
+static inline int inode_trylock(struct inode *inode)
+{
+ return mutex_trylock(&inode->i_mutex);
+}
+
+static inline int inode_is_locked(struct inode *inode)
+{
+ return mutex_is_locked(&inode->i_mutex);
+}
+
+static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
+{
+ mutex_lock_nested(&inode->i_mutex, subclass);
+}
+
void lock_two_nondirectories(struct inode *, struct inode*);
void unlock_two_nondirectories(struct inode *, struct inode*);
@@ -3047,8 +3073,8 @@ static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
}
static inline bool dir_relax(struct inode *inode)
{
- mutex_unlock(&inode->i_mutex);
- mutex_lock(&inode->i_mutex);
+ inode_unlock(inode);
+ inode_lock(inode);
return !IS_DEADDIR(inode);
}
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0639dcc98195..81de7123959d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -165,7 +165,6 @@ struct ftrace_ops {
ftrace_func_t saved_func;
int __percpu *disabled;
#ifdef CONFIG_DYNAMIC_FTRACE
- int nr_trampolines;
struct ftrace_ops_hash local_hash;
struct ftrace_ops_hash *func_hash;
struct ftrace_ops_hash old_hash;
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index cb30edbfe9fc..0e95fcc75b2a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -413,7 +413,7 @@ enum
NET_TX_SOFTIRQ,
NET_RX_SOFTIRQ,
BLOCK_SOFTIRQ,
- BLOCK_IOPOLL_SOFTIRQ,
+ IRQ_POLL_SOFTIRQ,
TASKLET_SOFTIRQ,
SCHED_SOFTIRQ,
HRTIMER_SOFTIRQ, /* Unused, but kept as tools rely on the
diff --git a/include/linux/irq_poll.h b/include/linux/irq_poll.h
new file mode 100644
index 000000000000..3e8c1b8fb9be
--- /dev/null
+++ b/include/linux/irq_poll.h
@@ -0,0 +1,25 @@
+#ifndef IRQ_POLL_H
+#define IRQ_POLL_H
+
+struct irq_poll;
+typedef int (irq_poll_fn)(struct irq_poll *, int);
+
+struct irq_poll {
+ struct list_head list;
+ unsigned long state;
+ int weight;
+ irq_poll_fn *poll;
+};
+
+enum {
+ IRQ_POLL_F_SCHED = 0,
+ IRQ_POLL_F_DISABLE = 1,
+};
+
+extern void irq_poll_sched(struct irq_poll *);
+extern void irq_poll_init(struct irq_poll *, int, irq_poll_fn *);
+extern void irq_poll_complete(struct irq_poll *);
+extern void irq_poll_enable(struct irq_poll *);
+extern void irq_poll_disable(struct irq_poll *);
+
+#endif
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 58391f2e0414..116b284bc4ce 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -206,7 +206,8 @@ enum {
MLX4_SET_PORT_GID_TABLE = 0x5,
MLX4_SET_PORT_PRIO2TC = 0x8,
MLX4_SET_PORT_SCHEDULER = 0x9,
- MLX4_SET_PORT_VXLAN = 0xB
+ MLX4_SET_PORT_VXLAN = 0xB,
+ MLX4_SET_PORT_ROCE_ADDR = 0xD
};
enum {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index d3133be12d92..430a929f048b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -216,6 +216,7 @@ enum {
MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30,
MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32,
+ MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33,
};
enum {
@@ -267,12 +268,14 @@ enum {
MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9,
MLX4_BMME_FLAG_RESERVED_LKEY = 1 << 10,
MLX4_BMME_FLAG_FAST_REG_WR = 1 << 11,
+ MLX4_BMME_FLAG_ROCE_V1_V2 = 1 << 19,
MLX4_BMME_FLAG_PORT_REMAP = 1 << 24,
MLX4_BMME_FLAG_VSD_INIT2RTR = 1 << 28,
};
enum {
- MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP
+ MLX4_FLAG_PORT_REMAP = MLX4_BMME_FLAG_PORT_REMAP,
+ MLX4_FLAG_ROCE_V1_V2 = MLX4_BMME_FLAG_ROCE_V1_V2
};
enum mlx4_event {
@@ -979,14 +982,11 @@ struct mlx4_mad_ifc {
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if ((type) == (dev)->caps.port_mask[(port)])
-#define mlx4_foreach_non_ib_transport_port(port, dev) \
- for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
- if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
-
#define mlx4_foreach_ib_transport_port(port, dev) \
- for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
+ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
- ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+ ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \
+ ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2))
#define MLX4_INVALID_SLAVE_ID 0xFF
#define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1)
@@ -1457,6 +1457,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis);
+int mlx4_config_roce_v2_port(struct mlx4_dev *dev, u16 udp_port);
int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2);
int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index fe052e234906..587cdf943b52 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -194,7 +194,7 @@ struct mlx4_qp_context {
u8 mtu_msgmax;
u8 rq_size_stride;
u8 sq_size_stride;
- u8 rlkey;
+ u8 rlkey_roce_mode;
__be32 usr_page;
__be32 local_qpn;
__be32 remote_qpn;
@@ -204,7 +204,8 @@ struct mlx4_qp_context {
u32 reserved1;
__be32 next_send_psn;
__be32 cqn_send;
- u32 reserved2[2];
+ __be16 roce_entropy;
+ __be16 reserved2[3];
__be32 last_acked_psn;
__be32 ssn;
__be32 params2;
@@ -487,4 +488,14 @@ static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp);
+static inline u16 folded_qp(u32 q)
+{
+ u16 res;
+
+ res = ((q & 0xff) ^ ((q & 0xff0000) >> 16)) | (q & 0xff00);
+ return res;
+}
+
+u16 mlx4_qp_roce_entropy(struct mlx4_dev *dev, u32 qpn);
+
#endif /* MLX4_QP_H */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 7be845e30689..987764afa65c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -223,6 +223,14 @@ enum {
#define MLX5_UMR_MTT_MASK (MLX5_UMR_MTT_ALIGNMENT - 1)
#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
+#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
+
+enum {
+ MLX5_EVENT_QUEUE_TYPE_QP = 0,
+ MLX5_EVENT_QUEUE_TYPE_RQ = 1,
+ MLX5_EVENT_QUEUE_TYPE_SQ = 2,
+};
+
enum mlx5_event {
MLX5_EVENT_TYPE_COMP = 0x0,
@@ -280,6 +288,26 @@ enum {
};
enum {
+ MLX5_ROCE_VERSION_1 = 0,
+ MLX5_ROCE_VERSION_2 = 2,
+};
+
+enum {
+ MLX5_ROCE_VERSION_1_CAP = 1 << MLX5_ROCE_VERSION_1,
+ MLX5_ROCE_VERSION_2_CAP = 1 << MLX5_ROCE_VERSION_2,
+};
+
+enum {
+ MLX5_ROCE_L3_TYPE_IPV4 = 0,
+ MLX5_ROCE_L3_TYPE_IPV6 = 1,
+};
+
+enum {
+ MLX5_ROCE_L3_TYPE_IPV4_CAP = 1 << 1,
+ MLX5_ROCE_L3_TYPE_IPV6_CAP = 1 << 2,
+};
+
+enum {
MLX5_OPCODE_NOP = 0x00,
MLX5_OPCODE_SEND_INVAL = 0x01,
MLX5_OPCODE_RDMA_WRITE = 0x08,
@@ -446,7 +474,7 @@ struct mlx5_init_seg {
__be32 rsvd2[880];
__be32 internal_timer_h;
__be32 internal_timer_l;
- __be32 rsrv3[2];
+ __be32 rsvd3[2];
__be32 health_counter;
__be32 rsvd4[1019];
__be64 ieee1588_clk;
@@ -460,7 +488,9 @@ struct mlx5_eqe_comp {
};
struct mlx5_eqe_qp_srq {
- __be32 reserved[6];
+ __be32 reserved1[5];
+ u8 type;
+ u8 reserved2[3];
__be32 qp_srq_n;
};
@@ -651,6 +681,12 @@ enum {
};
enum {
+ MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH = 0x0,
+ MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6 = 0x1,
+ MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4 = 0x2,
+};
+
+enum {
CQE_L2_OK = 1 << 0,
CQE_L3_OK = 1 << 1,
CQE_L4_OK = 1 << 2,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 5162f3533042..1e3006dcf35d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -115,6 +115,11 @@ enum {
MLX5_REG_HOST_ENDIANNESS = 0x7004,
};
+enum {
+ MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0,
+ MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1,
+};
+
enum mlx5_page_fault_resume_flags {
MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
MLX5_PAGE_FAULT_RESUME_WRITE = 1 << 1,
@@ -341,9 +346,11 @@ struct mlx5_core_mr {
};
enum mlx5_res_type {
- MLX5_RES_QP,
- MLX5_RES_SRQ,
- MLX5_RES_XSRQ,
+ MLX5_RES_QP = MLX5_EVENT_QUEUE_TYPE_QP,
+ MLX5_RES_RQ = MLX5_EVENT_QUEUE_TYPE_RQ,
+ MLX5_RES_SQ = MLX5_EVENT_QUEUE_TYPE_SQ,
+ MLX5_RES_SRQ = 3,
+ MLX5_RES_XSRQ = 4,
};
struct mlx5_core_rsc_common {
@@ -651,13 +658,6 @@ extern struct workqueue_struct *mlx5_core_wq;
.struct_offset_bytes = offsetof(struct ib_unpacked_ ## header, field), \
.struct_size_bytes = sizeof((struct ib_unpacked_ ## header *)0)->field
-struct ib_field {
- size_t struct_offset_bytes;
- size_t struct_size_bytes;
- int offset_bits;
- int size_bits;
-};
-
static inline struct mlx5_core_dev *pci2mlx5_core_dev(struct pci_dev *pdev)
{
return pci_get_drvdata(pdev);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 68d73f82e009..231ab6bcea76 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -67,6 +67,11 @@ enum {
};
enum {
+ MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0,
+ MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3,
+};
+
+enum {
MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
MLX5_CMD_OP_QUERY_ADAPTER = 0x101,
MLX5_CMD_OP_INIT_HCA = 0x102,
@@ -573,21 +578,24 @@ enum {
struct mlx5_ifc_atomic_caps_bits {
u8 reserved_0[0x40];
- u8 atomic_req_endianness[0x1];
- u8 reserved_1[0x1f];
+ u8 atomic_req_8B_endianess_mode[0x2];
+ u8 reserved_1[0x4];
+ u8 supported_atomic_req_8B_endianess_mode_1[0x1];
- u8 reserved_2[0x20];
+ u8 reserved_2[0x19];
- u8 reserved_3[0x10];
- u8 atomic_operations[0x10];
+ u8 reserved_3[0x20];
u8 reserved_4[0x10];
- u8 atomic_size_qp[0x10];
+ u8 atomic_operations[0x10];
u8 reserved_5[0x10];
+ u8 atomic_size_qp[0x10];
+
+ u8 reserved_6[0x10];
u8 atomic_size_dc[0x10];
- u8 reserved_6[0x720];
+ u8 reserved_7[0x720];
};
struct mlx5_ifc_odp_cap_bits {
@@ -850,7 +858,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_66[0x8];
u8 log_uar_page_sz[0x10];
- u8 reserved_67[0x40];
+ u8 reserved_67[0x20];
+ u8 device_frequency_mhz[0x20];
u8 device_frequency_khz[0x20];
u8 reserved_68[0x5f];
u8 cqe_zip[0x1];
@@ -2215,19 +2224,25 @@ struct mlx5_ifc_nic_vport_context_bits {
u8 mtu[0x10];
- u8 reserved_3[0x640];
+ u8 system_image_guid[0x40];
+ u8 port_guid[0x40];
+ u8 node_guid[0x40];
+
+ u8 reserved_3[0x140];
+ u8 qkey_violation_counter[0x10];
+ u8 reserved_4[0x430];
u8 promisc_uc[0x1];
u8 promisc_mc[0x1];
u8 promisc_all[0x1];
- u8 reserved_4[0x2];
+ u8 reserved_5[0x2];
u8 allowed_list_type[0x3];
- u8 reserved_5[0xc];
+ u8 reserved_6[0xc];
u8 allowed_list_size[0xc];
struct mlx5_ifc_mac_address_layout_bits permanent_address;
- u8 reserved_6[0x20];
+ u8 reserved_7[0x20];
u8 current_uc_mac_address[0][0x40];
};
@@ -4199,6 +4214,13 @@ struct mlx5_ifc_modify_tis_out_bits {
u8 reserved_1[0x40];
};
+struct mlx5_ifc_modify_tis_bitmask_bits {
+ u8 reserved_0[0x20];
+
+ u8 reserved_1[0x1f];
+ u8 prio[0x1];
+};
+
struct mlx5_ifc_modify_tis_in_bits {
u8 opcode[0x10];
u8 reserved_0[0x10];
@@ -4211,7 +4233,7 @@ struct mlx5_ifc_modify_tis_in_bits {
u8 reserved_3[0x20];
- u8 modify_bitmask[0x40];
+ struct mlx5_ifc_modify_tis_bitmask_bits bitmask;
u8 reserved_4[0x40];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index f079fb1a31f7..5b8c89ffaa58 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -85,7 +85,16 @@ enum mlx5_qp_state {
MLX5_QP_STATE_ERR = 6,
MLX5_QP_STATE_SQ_DRAINING = 7,
MLX5_QP_STATE_SUSPENDED = 9,
- MLX5_QP_NUM_STATE
+ MLX5_QP_NUM_STATE,
+ MLX5_QP_STATE,
+ MLX5_QP_STATE_BAD,
+};
+
+enum {
+ MLX5_SQ_STATE_NA = MLX5_SQC_STATE_ERR + 1,
+ MLX5_SQ_NUM_STATE = MLX5_SQ_STATE_NA + 1,
+ MLX5_RQ_STATE_NA = MLX5_RQC_STATE_ERR + 1,
+ MLX5_RQ_NUM_STATE = MLX5_RQ_STATE_NA + 1,
};
enum {
@@ -130,6 +139,9 @@ enum {
MLX5_QP_BIT_RWE = 1 << 14,
MLX5_QP_BIT_RAE = 1 << 13,
MLX5_QP_BIT_RIC = 1 << 4,
+ MLX5_QP_BIT_CC_SLAVE_RECV = 1 << 2,
+ MLX5_QP_BIT_CC_SLAVE_SEND = 1 << 1,
+ MLX5_QP_BIT_CC_MASTER = 1 << 0
};
enum {
@@ -248,8 +260,12 @@ struct mlx5_av {
__be32 dqp_dct;
u8 stat_rate_sl;
u8 fl_mlid;
- __be16 rlid;
- u8 reserved0[10];
+ union {
+ __be16 rlid;
+ __be16 udp_sport;
+ };
+ u8 reserved0[4];
+ u8 rmac[6];
u8 tclass;
u8 hop_limit;
__be32 grh_gid_fl;
@@ -456,11 +472,16 @@ struct mlx5_qp_path {
u8 static_rate;
u8 hop_limit;
__be32 tclass_flowlabel;
- u8 rgid[16];
- u8 rsvd1[4];
- u8 sl;
+ union {
+ u8 rgid[16];
+ u8 rip[16];
+ };
+ u8 f_dscp_ecn_prio;
+ u8 ecn_dscp;
+ __be16 udp_sport;
+ u8 dci_cfi_prio_sl;
u8 port;
- u8 rsvd2[6];
+ u8 rmac[6];
};
struct mlx5_qp_context {
@@ -620,8 +641,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
struct mlx5_core_qp *qp,
struct mlx5_create_qp_mbox_in *in,
int inlen);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
- enum mlx5_qp_state new_state,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
struct mlx5_modify_qp_mbox_in *in, int sqd_event,
struct mlx5_core_qp *qp);
int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
@@ -639,6 +659,14 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
u8 context, int error);
#endif
+int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *rq);
+void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *rq);
+int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
+ struct mlx5_core_qp *sq);
+void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
+ struct mlx5_core_qp *sq);
static inline const char *mlx5_qp_type_str(int type)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h b/include/linux/mlx5/transobj.h
index 74cae51436e4..88441f5ece25 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -33,16 +33,20 @@
#ifndef __TRANSOBJ_H__
#define __TRANSOBJ_H__
-int mlx5_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
-void mlx5_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
+#include <linux/mlx5/driver.h>
+
+int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
+void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rqn);
int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
+int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out);
int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *sqn);
int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
+int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tirn);
int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
@@ -50,6 +54,8 @@ int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *tisn);
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
+ int inlen);
void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
u32 *rmpn);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 638f2ca7a527..123771003e68 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -45,6 +45,11 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
u16 vport, u8 *addr);
int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
u16 vport, u8 *addr);
+int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
+ u64 *system_image_guid);
+int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid);
+int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
+ u16 *qkey_viol_cntr);
int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport,
u8 port_num, u16 vf_num, u16 gid_index,
union ib_gid *gid);
@@ -85,4 +90,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev,
u16 vlans[],
int list_size);
+int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev);
+int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev);
+
#endif /* __MLX5_VPORT_H__ */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 4d08b6c33557..92395a0a7dc5 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -361,6 +361,9 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
unsigned int nr_pages, struct page **pages);
unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
int tag, unsigned int nr_pages, struct page **pages);
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+ int tag, unsigned int nr_entries,
+ struct page **entries, pgoff_t *indices);
struct page *grab_cache_page_write_begin(struct address_space *mapping,
pgoff_t index, unsigned flags);
diff --git a/include/linux/platform_data/sdhci-pic32.h b/include/linux/platform_data/sdhci-pic32.h
new file mode 100644
index 000000000000..7e0efe64c8c5
--- /dev/null
+++ b/include/linux/platform_data/sdhci-pic32.h
@@ -0,0 +1,22 @@
+/*
+ * Purna Chandra Mandal, purna.mandal@microchip.com
+ * Copyright (C) 2015 Microchip Technology Inc. All rights reserved.
+ *
+ * This program is free software; you can distribute it and/or modify it
+ * under the terms of the GNU General Public License (Version 2) as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+#ifndef __PIC32_SDHCI_PDATA_H__
+#define __PIC32_SDHCI_PDATA_H__
+
+struct pic32_sdhci_platform_data {
+ /* read & write fifo threshold */
+ int (*setup_dma)(u32 rfifo, u32 wfifo);
+};
+
+#endif
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index acfea8ce4a07..7c3d11a6b4ad 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -53,12 +53,18 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size)
{
BUG();
}
+
+static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size)
+{
+ BUG();
+}
#endif
/*
* Architectures that define ARCH_HAS_PMEM_API must provide
* implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
- * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem().
+ * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem()
+ * and arch_has_wmb_pmem().
*/
static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
{
@@ -178,4 +184,18 @@ static inline void clear_pmem(void __pmem *addr, size_t size)
else
default_clear_pmem(addr, size);
}
+
+/**
+ * wb_cache_pmem - write back processor cache for PMEM memory range
+ * @addr: virtual start address
+ * @size: number of bytes to write back
+ *
+ * Write back the processor cache range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline void wb_cache_pmem(void __pmem *addr, size_t size)
+{
+ if (arch_has_pmem_api())
+ arch_wb_cache_pmem(addr, size);
+}
#endif /* __PMEM_H__ */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 57e7d87d2d4c..7c88ad156a29 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -51,6 +51,15 @@
#define RADIX_TREE_EXCEPTIONAL_ENTRY 2
#define RADIX_TREE_EXCEPTIONAL_SHIFT 2
+#define RADIX_DAX_MASK 0xf
+#define RADIX_DAX_SHIFT 4
+#define RADIX_DAX_PTE (0x4 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_PMD (0x8 | RADIX_TREE_EXCEPTIONAL_ENTRY)
+#define RADIX_DAX_TYPE(entry) ((unsigned long)entry & RADIX_DAX_MASK)
+#define RADIX_DAX_SECTOR(entry) (((unsigned long)entry >> RADIX_DAX_SHIFT))
+#define RADIX_DAX_ENTRY(sector, pmd) ((void *)((unsigned long)sector << \
+ RADIX_DAX_SHIFT | (pmd ? RADIX_DAX_PMD : RADIX_DAX_PTE)))
+
static inline int radix_tree_is_indirect_ptr(void *ptr)
{
return (int)((unsigned long)ptr & RADIX_TREE_INDIRECT_PTR);
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a43f41cb3c43..4d4780c00d34 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -15,10 +15,7 @@ struct shmem_inode_info {
unsigned int seals; /* shmem seals */
unsigned long flags;
unsigned long alloced; /* data pages alloced to file */
- union {
- unsigned long swapped; /* subtotal assigned to swap */
- char *symlink; /* unswappable short symlink */
- };
+ unsigned long swapped; /* subtotal assigned to swap */
struct shared_policy policy; /* NUMA memory alloc policy */
struct list_head swaplist; /* chain of maybes on swap */
struct simple_xattrs xattrs; /* list of xattrs */
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index f869807a0d0e..5322fea6fe4c 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -51,6 +51,7 @@
/* RPC/RDMA parameters and stats */
extern unsigned int svcrdma_ord;
extern unsigned int svcrdma_max_requests;
+extern unsigned int svcrdma_max_bc_requests;
extern unsigned int svcrdma_max_req_size;
extern atomic_t rdma_stat_recv;
@@ -69,6 +70,7 @@ extern atomic_t rdma_stat_sq_prod;
* completes.
*/
struct svc_rdma_op_ctxt {
+ struct list_head free;
struct svc_rdma_op_ctxt *read_hdr;
struct svc_rdma_fastreg_mr *frmr;
int hdr_count;
@@ -112,6 +114,7 @@ struct svc_rdma_fastreg_mr {
struct list_head frmr_list;
};
struct svc_rdma_req_map {
+ struct list_head free;
unsigned long count;
union {
struct kvec sge[RPCSVC_MAXPAGES];
@@ -132,28 +135,32 @@ struct svcxprt_rdma {
int sc_max_sge;
int sc_max_sge_rd; /* max sge for read target */
- int sc_sq_depth; /* Depth of SQ */
atomic_t sc_sq_count; /* Number of SQ WR on queue */
-
- int sc_max_requests; /* Depth of RQ */
+ unsigned int sc_sq_depth; /* Depth of SQ */
+ unsigned int sc_rq_depth; /* Depth of RQ */
+ u32 sc_max_requests; /* Forward credits */
+ u32 sc_max_bc_requests;/* Backward credits */
int sc_max_req_size; /* Size of each RQ WR buf */
struct ib_pd *sc_pd;
atomic_t sc_dma_used;
- atomic_t sc_ctxt_used;
+ spinlock_t sc_ctxt_lock;
+ struct list_head sc_ctxts;
+ int sc_ctxt_used;
+ spinlock_t sc_map_lock;
+ struct list_head sc_maps;
+
struct list_head sc_rq_dto_q;
spinlock_t sc_rq_dto_lock;
struct ib_qp *sc_qp;
struct ib_cq *sc_rq_cq;
struct ib_cq *sc_sq_cq;
- struct ib_mr *sc_phys_mr; /* MR for server memory */
int (*sc_reader)(struct svcxprt_rdma *,
struct svc_rqst *,
struct svc_rdma_op_ctxt *,
int *, u32 *, u32, u32, u64, bool);
u32 sc_dev_caps; /* distilled device caps */
- u32 sc_dma_lkey; /* local dma key */
unsigned int sc_frmr_pg_list_len;
struct list_head sc_frmr_q;
spinlock_t sc_frmr_q_lock;
@@ -179,8 +186,18 @@ struct svcxprt_rdma {
#define RPCRDMA_MAX_REQUESTS 32
#define RPCRDMA_MAX_REQ_SIZE 4096
+/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
+ * current NFSv4.1 implementation supports one backchannel slot.
+ */
+#define RPCRDMA_MAX_BC_REQUESTS 2
+
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
+/* svc_rdma_backchannel.c */
+extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt,
+ struct rpcrdma_msg *rmsgp,
+ struct xdr_buf *rcvbuf);
+
/* svc_rdma_marshal.c */
extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
@@ -206,6 +223,8 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *,
u32, u32, u64, bool);
/* svc_rdma_sendto.c */
+extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *,
+ struct svc_rdma_req_map *);
extern int svc_rdma_sendto(struct svc_rqst *);
extern struct rpcrdma_read_chunk *
svc_rdma_get_read_chunk(struct rpcrdma_msg *);
@@ -214,13 +233,14 @@ extern struct rpcrdma_read_chunk *
extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
enum rpcrdma_errcode);
-extern int svc_rdma_post_recv(struct svcxprt_rdma *);
+extern int svc_rdma_post_recv(struct svcxprt_rdma *, gfp_t);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int);
extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt);
-extern struct svc_rdma_req_map *svc_rdma_get_req_map(void);
-extern void svc_rdma_put_req_map(struct svc_rdma_req_map *);
+extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *);
+extern void svc_rdma_put_req_map(struct svcxprt_rdma *,
+ struct svc_rdma_req_map *);
extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *);
extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
struct svc_rdma_fastreg_mr *);
@@ -234,6 +254,7 @@ extern struct svc_xprt_class svc_rdma_bc_class;
#endif
/* svc_rdma.c */
+extern struct workqueue_struct *svc_rdma_wq;
extern int svc_rdma_init(void);
extern void svc_rdma_cleanup(void);
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 613c29bd6baf..e13a1ace50e9 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -43,6 +43,9 @@
/* Default weight of a bound cooling device */
#define THERMAL_WEIGHT_DEFAULT 0
+/* use value, which < 0K, to indicate an invalid/uninitialized temperature */
+#define THERMAL_TEMP_INVALID -274000
+
/* Unit conversion macros */
#define DECI_KELVIN_TO_CELSIUS(t) ({ \
long _t = (t); \
@@ -167,6 +170,7 @@ struct thermal_attr {
* @forced_passive: If > 0, temperature at which to switch on all ACPI
* processor cooling devices. Currently only used by the
* step-wise governor.
+ * @need_update: if equals 1, thermal_zone_device_update needs to be invoked.
* @ops: operations this &thermal_zone_device supports
* @tzp: thermal zone parameters
* @governor: pointer to the governor for this thermal zone
@@ -194,6 +198,7 @@ struct thermal_zone_device {
int emul_temperature;
int passive;
unsigned int forced_passive;
+ atomic_t need_update;
struct thermal_zone_device_ops *ops;
struct thermal_zone_params *tzp;
struct thermal_governor *governor;
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 11528591d0d7..c34c9002460c 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -83,6 +83,8 @@ struct rdma_dev_addr {
int bound_dev_if;
enum rdma_transport_type transport;
struct net *net;
+ enum rdma_network_type network;
+ int hoplimit;
};
/**
@@ -91,8 +93,8 @@ struct rdma_dev_addr {
*
* The dev_addr->net field must be initialized.
*/
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
- u16 *vlan_id);
+int rdma_translate_ip(const struct sockaddr *addr,
+ struct rdma_dev_addr *dev_addr, u16 *vlan_id);
/**
* rdma_resolve_ip - Resolve source and destination IP addresses to
@@ -117,6 +119,10 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
struct rdma_dev_addr *addr, void *context),
void *context);
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+ const struct sockaddr *dst_addr,
+ struct rdma_dev_addr *addr);
+
void rdma_addr_cancel(struct rdma_dev_addr *addr);
int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
@@ -125,8 +131,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
int rdma_addr_size(struct sockaddr *addr);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
-int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
- u8 *smac, u16 *vlan_id, int if_index);
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+ const union ib_gid *dgid,
+ u8 *smac, u16 *vlan_id, int *if_index,
+ int *hoplimit);
static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
{
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index 269a27cf0a46..e30f19bd4a41 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -60,6 +60,7 @@ int ib_get_cached_gid(struct ib_device *device,
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
* @ndev: In RoCE, the net device of the device. NULL means ignore.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the cached GID table where the GID was found. This
@@ -70,6 +71,7 @@ int ib_get_cached_gid(struct ib_device *device,
*/
int ib_find_cached_gid(struct ib_device *device,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
struct net_device *ndev,
u8 *port_num,
u16 *index);
@@ -79,6 +81,7 @@ int ib_find_cached_gid(struct ib_device *device,
* GID value occurs
* @device: The device to query.
* @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
* @port_num: The port number of the device where the GID value sould be
* searched.
* @ndev: In RoCE, the net device of the device. Null means ignore.
@@ -90,6 +93,7 @@ int ib_find_cached_gid(struct ib_device *device,
*/
int ib_find_cached_gid_by_port(struct ib_device *device,
const union ib_gid *gid,
+ enum ib_gid_type gid_type,
u8 port_num,
struct net_device *ndev,
u16 *index);
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index ec9b44dd3d80..0ff049bd9ad4 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -438,6 +438,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
/**
* ib_mad_recv_handler - callback handler for a received MAD.
* @mad_agent: MAD agent requesting the received MAD.
+ * @send_buf: Send buffer if found, else NULL
* @mad_recv_wc: Received work completion information on the received MAD.
*
* MADs received in response to a send request operation will be handed to
@@ -447,6 +448,7 @@ typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
* modify the data referenced by @mad_recv_wc.
*/
typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
+ struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc);
/**
diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index e99d8f9a4551..0f3daae44bf9 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -41,6 +41,8 @@ enum {
IB_ETH_BYTES = 14,
IB_VLAN_BYTES = 4,
IB_GRH_BYTES = 40,
+ IB_IP4_BYTES = 20,
+ IB_UDP_BYTES = 8,
IB_BTH_BYTES = 12,
IB_DETH_BYTES = 8
};
@@ -223,6 +225,27 @@ struct ib_unpacked_eth {
__be16 type;
};
+struct ib_unpacked_ip4 {
+ u8 ver;
+ u8 hdr_len;
+ u8 tos;
+ __be16 tot_len;
+ __be16 id;
+ __be16 frag_off;
+ u8 ttl;
+ u8 protocol;
+ __sum16 check;
+ __be32 saddr;
+ __be32 daddr;
+};
+
+struct ib_unpacked_udp {
+ __be16 sport;
+ __be16 dport;
+ __be16 length;
+ __be16 csum;
+};
+
struct ib_unpacked_vlan {
__be16 tag;
__be16 type;
@@ -237,6 +260,10 @@ struct ib_ud_header {
struct ib_unpacked_vlan vlan;
int grh_present;
struct ib_unpacked_grh grh;
+ int ipv4_present;
+ struct ib_unpacked_ip4 ip4;
+ int udp_present;
+ struct ib_unpacked_udp udp;
struct ib_unpacked_bth bth;
struct ib_unpacked_deth deth;
int immediate_present;
@@ -253,13 +280,17 @@ void ib_unpack(const struct ib_field *desc,
void *buf,
void *structure);
-void ib_ud_header_init(int payload_bytes,
- int lrh_present,
- int eth_present,
- int vlan_present,
- int grh_present,
- int immediate_present,
- struct ib_ud_header *header);
+__sum16 ib_ud_ip4_csum(struct ib_ud_header *header);
+
+int ib_ud_header_init(int payload_bytes,
+ int lrh_present,
+ int eth_present,
+ int vlan_present,
+ int grh_present,
+ int ip_version,
+ int udp_present,
+ int immediate_present,
+ struct ib_ud_header *header);
int ib_ud_header_pack(struct ib_ud_header *header,
void *buf);
diff --git a/include/rdma/ib_pma.h b/include/rdma/ib_pma.h
index a5889f18807b..2f8a65c1fca7 100644
--- a/include/rdma/ib_pma.h
+++ b/include/rdma/ib_pma.h
@@ -42,6 +42,7 @@
*/
#define IB_PMA_CLASS_CAP_ALLPORTSELECT cpu_to_be16(1 << 8)
#define IB_PMA_CLASS_CAP_EXT_WIDTH cpu_to_be16(1 << 9)
+#define IB_PMA_CLASS_CAP_EXT_WIDTH_NOIETF cpu_to_be16(1 << 10)
#define IB_PMA_CLASS_CAP_XMIT_WAIT cpu_to_be16(1 << 12)
#define IB_PMA_CLASS_PORT_INFO cpu_to_be16(0x0001)
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 301969552d0a..cdc1c81aa275 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -160,6 +160,7 @@ struct ib_sa_path_rec {
int ifindex;
/* ignored in IB */
struct net *net;
+ enum ib_gid_type gid_type;
};
static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
@@ -402,6 +403,8 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
*/
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
+ struct net_device *ndev,
+ enum ib_gid_type gid_type,
struct ib_ah_attr *ah_attr);
/**
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 120da1d7f57e..284b00c8fea4 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -49,13 +49,19 @@
#include <linux/scatterlist.h>
#include <linux/workqueue.h>
#include <linux/socket.h>
+#include <linux/irq_poll.h>
#include <uapi/linux/if_ether.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
+#include <linux/string.h>
+#include <linux/slab.h>
#include <linux/atomic.h>
#include <linux/mmu_notifier.h>
#include <asm/uaccess.h>
extern struct workqueue_struct *ib_wq;
+extern struct workqueue_struct *ib_comp_wq;
union ib_gid {
u8 raw[16];
@@ -67,7 +73,17 @@ union ib_gid {
extern union ib_gid zgid;
+enum ib_gid_type {
+ /* If link layer is Ethernet, this is RoCE V1 */
+ IB_GID_TYPE_IB = 0,
+ IB_GID_TYPE_ROCE = 0,
+ IB_GID_TYPE_ROCE_UDP_ENCAP = 1,
+ IB_GID_TYPE_SIZE
+};
+
+#define ROCE_V2_UDP_DPORT 4791
struct ib_gid_attr {
+ enum ib_gid_type gid_type;
struct net_device *ndev;
};
@@ -98,6 +114,35 @@ enum rdma_protocol_type {
__attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type);
+enum rdma_network_type {
+ RDMA_NETWORK_IB,
+ RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB,
+ RDMA_NETWORK_IPV4,
+ RDMA_NETWORK_IPV6
+};
+
+static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type network_type)
+{
+ if (network_type == RDMA_NETWORK_IPV4 ||
+ network_type == RDMA_NETWORK_IPV6)
+ return IB_GID_TYPE_ROCE_UDP_ENCAP;
+
+ /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */
+ return IB_GID_TYPE_IB;
+}
+
+static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
+ union ib_gid *gid)
+{
+ if (gid_type == IB_GID_TYPE_IB)
+ return RDMA_NETWORK_IB;
+
+ if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+ return RDMA_NETWORK_IPV4;
+ else
+ return RDMA_NETWORK_IPV6;
+}
+
enum rdma_link_layer {
IB_LINK_LAYER_UNSPECIFIED,
IB_LINK_LAYER_INFINIBAND,
@@ -105,24 +150,32 @@ enum rdma_link_layer {
};
enum ib_device_cap_flags {
- IB_DEVICE_RESIZE_MAX_WR = 1,
- IB_DEVICE_BAD_PKEY_CNTR = (1<<1),
- IB_DEVICE_BAD_QKEY_CNTR = (1<<2),
- IB_DEVICE_RAW_MULTI = (1<<3),
- IB_DEVICE_AUTO_PATH_MIG = (1<<4),
- IB_DEVICE_CHANGE_PHY_PORT = (1<<5),
- IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6),
- IB_DEVICE_CURR_QP_STATE_MOD = (1<<7),
- IB_DEVICE_SHUTDOWN_PORT = (1<<8),
- IB_DEVICE_INIT_TYPE = (1<<9),
- IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10),
- IB_DEVICE_SYS_IMAGE_GUID = (1<<11),
- IB_DEVICE_RC_RNR_NAK_GEN = (1<<12),
- IB_DEVICE_SRQ_RESIZE = (1<<13),
- IB_DEVICE_N_NOTIFY_CQ = (1<<14),
- IB_DEVICE_LOCAL_DMA_LKEY = (1<<15),
- IB_DEVICE_RESERVED = (1<<16), /* old SEND_W_INV */
- IB_DEVICE_MEM_WINDOW = (1<<17),
+ IB_DEVICE_RESIZE_MAX_WR = (1 << 0),
+ IB_DEVICE_BAD_PKEY_CNTR = (1 << 1),
+ IB_DEVICE_BAD_QKEY_CNTR = (1 << 2),
+ IB_DEVICE_RAW_MULTI = (1 << 3),
+ IB_DEVICE_AUTO_PATH_MIG = (1 << 4),
+ IB_DEVICE_CHANGE_PHY_PORT = (1 << 5),
+ IB_DEVICE_UD_AV_PORT_ENFORCE = (1 << 6),
+ IB_DEVICE_CURR_QP_STATE_MOD = (1 << 7),
+ IB_DEVICE_SHUTDOWN_PORT = (1 << 8),
+ IB_DEVICE_INIT_TYPE = (1 << 9),
+ IB_DEVICE_PORT_ACTIVE_EVENT = (1 << 10),
+ IB_DEVICE_SYS_IMAGE_GUID = (1 << 11),
+ IB_DEVICE_RC_RNR_NAK_GEN = (1 << 12),
+ IB_DEVICE_SRQ_RESIZE = (1 << 13),
+ IB_DEVICE_N_NOTIFY_CQ = (1 << 14),
+
+ /*
+ * This device supports a per-device lkey or stag that can be
+ * used without performing a memory registration for the local
+ * memory. Note that ULPs should never check this flag, but
+ * instead of use the local_dma_lkey flag in the ib_pd structure,
+ * which will always contain a usable lkey.
+ */
+ IB_DEVICE_LOCAL_DMA_LKEY = (1 << 15),
+ IB_DEVICE_RESERVED /* old SEND_W_INV */ = (1 << 16),
+ IB_DEVICE_MEM_WINDOW = (1 << 17),
/*
* Devices should set IB_DEVICE_UD_IP_SUM if they support
* insertion of UDP and TCP checksum on outgoing UD IPoIB
@@ -130,18 +183,35 @@ enum ib_device_cap_flags {
* incoming messages. Setting this flag implies that the
* IPoIB driver may set NETIF_F_IP_CSUM for datagram mode.
*/
- IB_DEVICE_UD_IP_CSUM = (1<<18),
- IB_DEVICE_UD_TSO = (1<<19),
- IB_DEVICE_XRC = (1<<20),
- IB_DEVICE_MEM_MGT_EXTENSIONS = (1<<21),
- IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
- IB_DEVICE_MEM_WINDOW_TYPE_2A = (1<<23),
- IB_DEVICE_MEM_WINDOW_TYPE_2B = (1<<24),
- IB_DEVICE_RC_IP_CSUM = (1<<25),
- IB_DEVICE_RAW_IP_CSUM = (1<<26),
- IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
- IB_DEVICE_SIGNATURE_HANDOVER = (1<<30),
- IB_DEVICE_ON_DEMAND_PAGING = (1<<31),
+ IB_DEVICE_UD_IP_CSUM = (1 << 18),
+ IB_DEVICE_UD_TSO = (1 << 19),
+ IB_DEVICE_XRC = (1 << 20),
+
+ /*
+ * This device supports the IB "base memory management extension",
+ * which includes support for fast registrations (IB_WR_REG_MR,
+ * IB_WR_LOCAL_INV and IB_WR_SEND_WITH_INV verbs). This flag should
+ * also be set by any iWarp device which must support FRs to comply
+ * to the iWarp verbs spec. iWarp devices also support the
+ * IB_WR_RDMA_READ_WITH_INV verb for RDMA READs that invalidate the
+ * stag.
+ */
+ IB_DEVICE_MEM_MGT_EXTENSIONS = (1 << 21),
+ IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1 << 22),
+ IB_DEVICE_MEM_WINDOW_TYPE_2A = (1 << 23),
+ IB_DEVICE_MEM_WINDOW_TYPE_2B = (1 << 24),
+ IB_DEVICE_RC_IP_CSUM = (1 << 25),
+ IB_DEVICE_RAW_IP_CSUM = (1 << 26),
+ /*
+ * Devices should set IB_DEVICE_CROSS_CHANNEL if they
+ * support execution of WQEs that involve synchronization
+ * of I/O operations with single completion queue managed
+ * by hardware.
+ */
+ IB_DEVICE_CROSS_CHANNEL = (1 << 27),
+ IB_DEVICE_MANAGED_FLOW_STEERING = (1 << 29),
+ IB_DEVICE_SIGNATURE_HANDOVER = (1 << 30),
+ IB_DEVICE_ON_DEMAND_PAGING = (1 << 31),
};
enum ib_signature_prot_cap {
@@ -184,6 +254,7 @@ struct ib_odp_caps {
enum ib_cq_creation_flags {
IB_CQ_FLAGS_TIMESTAMP_COMPLETION = 1 << 0,
+ IB_CQ_FLAGS_IGNORE_OVERRUN = 1 << 1,
};
struct ib_cq_init_attr {
@@ -393,6 +464,7 @@ union rdma_protocol_stats {
#define RDMA_CORE_CAP_PROT_IB 0x00100000
#define RDMA_CORE_CAP_PROT_ROCE 0x00200000
#define RDMA_CORE_CAP_PROT_IWARP 0x00400000
+#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
| RDMA_CORE_CAP_IB_MAD \
@@ -405,6 +477,12 @@ union rdma_protocol_stats {
| RDMA_CORE_CAP_IB_CM \
| RDMA_CORE_CAP_AF_IB \
| RDMA_CORE_CAP_ETH_AH)
+#define RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP \
+ (RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_AF_IB \
+ | RDMA_CORE_CAP_ETH_AH)
#define RDMA_CORE_PORT_IWARP (RDMA_CORE_CAP_PROT_IWARP \
| RDMA_CORE_CAP_IW_CM)
#define RDMA_CORE_PORT_INTEL_OPA (RDMA_CORE_PORT_IBA_IB \
@@ -519,6 +597,17 @@ struct ib_grh {
union ib_gid dgid;
};
+union rdma_network_hdr {
+ struct ib_grh ibgrh;
+ struct {
+ /* The IB spec states that if it's IPv4, the header
+ * is located in the last 20 bytes of the header.
+ */
+ u8 reserved[20];
+ struct iphdr roce4grh;
+ };
+};
+
enum {
IB_MULTICAST_QPN = 0xffffff
};
@@ -734,7 +823,6 @@ enum ib_wc_opcode {
IB_WC_RDMA_READ,
IB_WC_COMP_SWAP,
IB_WC_FETCH_ADD,
- IB_WC_BIND_MW,
IB_WC_LSO,
IB_WC_LOCAL_INV,
IB_WC_REG_MR,
@@ -755,10 +843,14 @@ enum ib_wc_flags {
IB_WC_IP_CSUM_OK = (1<<3),
IB_WC_WITH_SMAC = (1<<4),
IB_WC_WITH_VLAN = (1<<5),
+ IB_WC_WITH_NETWORK_HDR_TYPE = (1<<6),
};
struct ib_wc {
- u64 wr_id;
+ union {
+ u64 wr_id;
+ struct ib_cqe *wr_cqe;
+ };
enum ib_wc_status status;
enum ib_wc_opcode opcode;
u32 vendor_err;
@@ -777,6 +869,7 @@ struct ib_wc {
u8 port_num; /* valid only for DR SMPs on switches */
u8 smac[ETH_ALEN];
u16 vlan_id;
+ u8 network_hdr_type;
};
enum ib_cq_notify_flags {
@@ -866,6 +959,9 @@ enum ib_qp_type {
enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+ IB_QP_CREATE_CROSS_CHANNEL = 1 << 2,
+ IB_QP_CREATE_MANAGED_SEND = 1 << 3,
+ IB_QP_CREATE_MANAGED_RECV = 1 << 4,
IB_QP_CREATE_NETIF_QP = 1 << 5,
IB_QP_CREATE_SIGNATURE_EN = 1 << 6,
IB_QP_CREATE_USE_GFP_NOIO = 1 << 7,
@@ -1027,7 +1123,6 @@ enum ib_wr_opcode {
IB_WR_REG_MR,
IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
- IB_WR_BIND_MW,
IB_WR_REG_SIG_MR,
/* reserve values for low level drivers' internal use.
* These values will not be used at all in the ib core layer.
@@ -1062,26 +1157,16 @@ struct ib_sge {
u32 lkey;
};
-/**
- * struct ib_mw_bind_info - Parameters for a memory window bind operation.
- * @mr: A memory region to bind the memory window to.
- * @addr: The address where the memory window should begin.
- * @length: The length of the memory window, in bytes.
- * @mw_access_flags: Access flags from enum ib_access_flags for the window.
- *
- * This struct contains the shared parameters for type 1 and type 2
- * memory window bind operations.
- */
-struct ib_mw_bind_info {
- struct ib_mr *mr;
- u64 addr;
- u64 length;
- int mw_access_flags;
+struct ib_cqe {
+ void (*done)(struct ib_cq *cq, struct ib_wc *wc);
};
struct ib_send_wr {
struct ib_send_wr *next;
- u64 wr_id;
+ union {
+ u64 wr_id;
+ struct ib_cqe *wr_cqe;
+ };
struct ib_sge *sg_list;
int num_sge;
enum ib_wr_opcode opcode;
@@ -1147,19 +1232,6 @@ static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
return container_of(wr, struct ib_reg_wr, wr);
}
-struct ib_bind_mw_wr {
- struct ib_send_wr wr;
- struct ib_mw *mw;
- /* The new rkey for the memory window. */
- u32 rkey;
- struct ib_mw_bind_info bind_info;
-};
-
-static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr)
-{
- return container_of(wr, struct ib_bind_mw_wr, wr);
-}
-
struct ib_sig_handover_wr {
struct ib_send_wr wr;
struct ib_sig_attrs *sig_attrs;
@@ -1175,7 +1247,10 @@ static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
struct ib_recv_wr {
struct ib_recv_wr *next;
- u64 wr_id;
+ union {
+ u64 wr_id;
+ struct ib_cqe *wr_cqe;
+ };
struct ib_sge *sg_list;
int num_sge;
};
@@ -1190,20 +1265,10 @@ enum ib_access_flags {
IB_ACCESS_ON_DEMAND = (1<<6),
};
-struct ib_phys_buf {
- u64 addr;
- u64 size;
-};
-
-struct ib_mr_attr {
- struct ib_pd *pd;
- u64 device_virt_addr;
- u64 size;
- int mr_access_flags;
- u32 lkey;
- u32 rkey;
-};
-
+/*
+ * XXX: these are apparently used for ->rereg_user_mr, no idea why they
+ * are hidden here instead of a uapi header!
+ */
enum ib_mr_rereg_flags {
IB_MR_REREG_TRANS = 1,
IB_MR_REREG_PD = (1<<1),
@@ -1211,18 +1276,6 @@ enum ib_mr_rereg_flags {
IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1)
};
-/**
- * struct ib_mw_bind - Parameters for a type 1 memory window bind operation.
- * @wr_id: Work request id.
- * @send_flags: Flags from ib_send_flags enum.
- * @bind_info: More parameters of the bind operation.
- */
-struct ib_mw_bind {
- u64 wr_id;
- int send_flags;
- struct ib_mw_bind_info bind_info;
-};
-
struct ib_fmr_attr {
int max_pages;
int max_maps;
@@ -1307,6 +1360,12 @@ struct ib_ah {
typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context);
+enum ib_poll_context {
+ IB_POLL_DIRECT, /* caller context, no hw completions */
+ IB_POLL_SOFTIRQ, /* poll from softirq context */
+ IB_POLL_WORKQUEUE, /* poll from workqueue */
+};
+
struct ib_cq {
struct ib_device *device;
struct ib_uobject *uobject;
@@ -1315,6 +1374,12 @@ struct ib_cq {
void *cq_context;
int cqe;
atomic_t usecnt; /* count number of work queues */
+ enum ib_poll_context poll_ctx;
+ struct ib_wc *wc;
+ union {
+ struct irq_poll iop;
+ struct work_struct work;
+ };
};
struct ib_srq {
@@ -1363,7 +1428,6 @@ struct ib_mr {
u64 iova;
u32 length;
unsigned int page_size;
- atomic_t usecnt; /* count number of MWs */
};
struct ib_mw {
@@ -1724,11 +1788,6 @@ struct ib_device {
int wc_cnt);
struct ib_mr * (*get_dma_mr)(struct ib_pd *pd,
int mr_access_flags);
- struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start);
struct ib_mr * (*reg_user_mr)(struct ib_pd *pd,
u64 start, u64 length,
u64 virt_addr,
@@ -1741,8 +1800,6 @@ struct ib_device {
int mr_access_flags,
struct ib_pd *pd,
struct ib_udata *udata);
- int (*query_mr)(struct ib_mr *mr,
- struct ib_mr_attr *mr_attr);
int (*dereg_mr)(struct ib_mr *mr);
struct ib_mr * (*alloc_mr)(struct ib_pd *pd,
enum ib_mr_type mr_type,
@@ -1750,18 +1807,8 @@ struct ib_device {
int (*map_mr_sg)(struct ib_mr *mr,
struct scatterlist *sg,
int sg_nents);
- int (*rereg_phys_mr)(struct ib_mr *mr,
- int mr_rereg_mask,
- struct ib_pd *pd,
- struct ib_phys_buf *phys_buf_array,
- int num_phys_buf,
- int mr_access_flags,
- u64 *iova_start);
struct ib_mw * (*alloc_mw)(struct ib_pd *pd,
enum ib_mw_type type);
- int (*bind_mw)(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind);
int (*dealloc_mw)(struct ib_mw *mw);
struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd,
int mr_access_flags,
@@ -1823,6 +1870,7 @@ struct ib_device {
u16 is_switch:1;
u8 node_type;
u8 phys_port_cnt;
+ struct ib_device_attr attrs;
/**
* The following mandatory functions are used only at device
@@ -1888,6 +1936,31 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
}
+static inline bool ib_is_udata_cleared(struct ib_udata *udata,
+ size_t offset,
+ size_t len)
+{
+ const void __user *p = udata->inbuf + offset;
+ bool ret = false;
+ u8 *buf;
+
+ if (len > USHRT_MAX)
+ return false;
+
+ buf = kmalloc(len, GFP_KERNEL);
+ if (!buf)
+ return false;
+
+ if (copy_from_user(buf, p, len))
+ goto free;
+
+ ret = !memchr_inv(buf, 0, len);
+
+free:
+ kfree(buf);
+ return ret;
+}
+
/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
@@ -1912,9 +1985,6 @@ int ib_register_event_handler (struct ib_event_handler *event_handler);
int ib_unregister_event_handler(struct ib_event_handler *event_handler);
void ib_dispatch_event(struct ib_event *event);
-int ib_query_device(struct ib_device *device,
- struct ib_device_attr *device_attr);
-
int ib_query_port(struct ib_device *device,
u8 port_num, struct ib_port_attr *port_attr);
@@ -1968,6 +2038,17 @@ static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
static inline bool rdma_protocol_roce(const struct ib_device *device, u8 port_num)
{
+ return device->port_immutable[port_num].core_cap_flags &
+ (RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+}
+
+static inline bool rdma_protocol_roce_udp_encap(const struct ib_device *device, u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+}
+
+static inline bool rdma_protocol_roce_eth_encap(const struct ib_device *device, u8 port_num)
+{
return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_ROCE;
}
@@ -1978,8 +2059,8 @@ static inline bool rdma_protocol_iwarp(const struct ib_device *device, u8 port_n
static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
{
- return device->port_immutable[port_num].core_cap_flags &
- (RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_PROT_ROCE);
+ return rdma_protocol_ib(device, port_num) ||
+ rdma_protocol_roce(device, port_num);
}
/**
@@ -2220,7 +2301,8 @@ int ib_modify_port(struct ib_device *device,
struct ib_port_modify *port_modify);
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
- struct net_device *ndev, u8 *port_num, u16 *index);
+ enum ib_gid_type gid_type, struct net_device *ndev,
+ u8 *port_num, u16 *index);
int ib_find_pkey(struct ib_device *device,
u8 port_num, u16 pkey, u16 *index);
@@ -2454,6 +2536,11 @@ static inline int ib_post_recv(struct ib_qp *qp,
return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
}
+struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
+ int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx);
+void ib_free_cq(struct ib_cq *cq);
+int ib_process_cq_direct(struct ib_cq *cq, int budget);
+
/**
* ib_create_cq - Creates a CQ on the specified device.
* @device: The device on which to create the CQ.
@@ -2839,13 +2926,6 @@ static inline void ib_dma_free_coherent(struct ib_device *dev,
}
/**
- * ib_query_mr - Retrieves information about a specific memory region.
- * @mr: The memory region to retrieve information about.
- * @mr_attr: The attributes of the specified memory region.
- */
-int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr);
-
-/**
* ib_dereg_mr - Deregisters a memory region and removes it from the
* HCA translation table.
* @mr: The memory region to deregister.
@@ -2882,42 +2962,6 @@ static inline u32 ib_inc_rkey(u32 rkey)
}
/**
- * ib_alloc_mw - Allocates a memory window.
- * @pd: The protection domain associated with the memory window.
- * @type: The type of the memory window (1 or 2).
- */
-struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
-
-/**
- * ib_bind_mw - Posts a work request to the send queue of the specified
- * QP, which binds the memory window to the given address range and
- * remote access attributes.
- * @qp: QP to post the bind work request on.
- * @mw: The memory window to bind.
- * @mw_bind: Specifies information about the memory window, including
- * its address range, remote access rights, and associated memory region.
- *
- * If there is no immediate error, the function will update the rkey member
- * of the mw parameter to its new value. The bind operation can still fail
- * asynchronously.
- */
-static inline int ib_bind_mw(struct ib_qp *qp,
- struct ib_mw *mw,
- struct ib_mw_bind *mw_bind)
-{
- /* XXX reference counting in corresponding MR? */
- return mw->device->bind_mw ?
- mw->device->bind_mw(qp, mw, mw_bind) :
- -ENOSYS;
-}
-
-/**
- * ib_dealloc_mw - Deallocates a memory window.
- * @mw: The memory window to deallocate.
- */
-int ib_dealloc_mw(struct ib_mw *mw);
-
-/**
* ib_alloc_fmr - Allocates a unmapped fast memory region.
* @pd: The protection domain associated with the unmapped region.
* @mr_access_flags: Specifies the memory access rights.
diff --git a/include/scsi/iser.h b/include/scsi/iser.h
new file mode 100644
index 000000000000..2e678fa74eca
--- /dev/null
+++ b/include/scsi/iser.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ISCSI_ISER_H
+#define ISCSI_ISER_H
+
+#define ISER_ZBVA_NOT_SUP 0x80
+#define ISER_SEND_W_INV_NOT_SUP 0x40
+#define ISERT_ZBVA_NOT_USED 0x80
+#define ISERT_SEND_W_INV_NOT_USED 0x40
+
+#define ISCSI_CTRL 0x10
+#define ISER_HELLO 0x20
+#define ISER_HELLORPLY 0x30
+
+#define ISER_VER 0x10
+#define ISER_WSV 0x08
+#define ISER_RSV 0x04
+
+/**
+ * struct iser_cm_hdr - iSER CM header (from iSER Annex A12)
+ *
+ * @flags: flags support (zbva, send_w_inv)
+ * @rsvd: reserved
+ */
+struct iser_cm_hdr {
+ u8 flags;
+ u8 rsvd[3];
+} __packed;
+
+/**
+ * struct iser_ctrl - iSER header of iSCSI control PDU
+ *
+ * @flags: opcode and read/write valid bits
+ * @rsvd: reserved
+ * @write_stag: write rkey
+ * @write_va: write virtual address
+ * @reaf_stag: read rkey
+ * @read_va: read virtual address
+ */
+struct iser_ctrl {
+ u8 flags;
+ u8 rsvd[3];
+ __be32 write_stag;
+ __be64 write_va;
+ __be32 read_stag;
+ __be64 read_va;
+} __packed;
+
+#endif /* ISCSI_ISER_H */
diff --git a/include/trace/events/fence.h b/include/trace/events/fence.h
index 98feb1b82896..d6dfa05ba322 100644
--- a/include/trace/events/fence.h
+++ b/include/trace/events/fence.h
@@ -17,7 +17,7 @@ TRACE_EVENT(fence_annotate_wait_on,
TP_STRUCT__entry(
__string(driver, fence->ops->get_driver_name(fence))
- __string(timeline, fence->ops->get_driver_name(fence))
+ __string(timeline, fence->ops->get_timeline_name(fence))
__field(unsigned int, context)
__field(unsigned int, seqno)
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index ff8f6c091a15..f95f25e786ef 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -15,7 +15,7 @@ struct softirq_action;
softirq_name(NET_TX) \
softirq_name(NET_RX) \
softirq_name(BLOCK) \
- softirq_name(BLOCK_IOPOLL) \
+ softirq_name(IRQ_POLL) \
softirq_name(TASKLET) \
softirq_name(SCHED) \
softirq_name(HRTIMER) \
diff --git a/include/uapi/drm/etnaviv_drm.h b/include/uapi/drm/etnaviv_drm.h
index 4cc989ad6851..f95e1c43c3fb 100644
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@@ -48,6 +48,8 @@ struct drm_etnaviv_timespec {
#define ETNAVIV_PARAM_GPU_FEATURES_2 0x05
#define ETNAVIV_PARAM_GPU_FEATURES_3 0x06
#define ETNAVIV_PARAM_GPU_FEATURES_4 0x07
+#define ETNAVIV_PARAM_GPU_FEATURES_5 0x08
+#define ETNAVIV_PARAM_GPU_FEATURES_6 0x09
#define ETNAVIV_PARAM_GPU_STREAM_COUNT 0x10
#define ETNAVIV_PARAM_GPU_REGISTER_MAX 0x11
@@ -59,6 +61,7 @@ struct drm_etnaviv_timespec {
#define ETNAVIV_PARAM_GPU_BUFFER_SIZE 0x17
#define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT 0x18
#define ETNAVIV_PARAM_GPU_NUM_CONSTANTS 0x19
+#define ETNAVIV_PARAM_GPU_NUM_VARYINGS 0x1a
#define ETNA_MAX_PIPES 4
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index f4617cf07069..781c1399c6a3 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -795,7 +795,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
ro = mnt_want_write(mnt); /* we'll drop it in any case */
error = 0;
- mutex_lock(&d_inode(root)->i_mutex);
+ inode_lock(d_inode(root));
path.dentry = lookup_one_len(name->name, root, strlen(name->name));
if (IS_ERR(path.dentry)) {
error = PTR_ERR(path.dentry);
@@ -841,7 +841,7 @@ out_putfd:
put_unused_fd(fd);
fd = error;
}
- mutex_unlock(&d_inode(root)->i_mutex);
+ inode_unlock(d_inode(root));
if (!ro)
mnt_drop_write(mnt);
out_putname:
@@ -866,7 +866,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
err = mnt_want_write(mnt);
if (err)
goto out_name;
- mutex_lock_nested(&d_inode(mnt->mnt_root)->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
dentry = lookup_one_len(name->name, mnt->mnt_root,
strlen(name->name));
if (IS_ERR(dentry)) {
@@ -884,7 +884,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
dput(dentry);
out_unlock:
- mutex_unlock(&d_inode(mnt->mnt_root)->i_mutex);
+ inode_unlock(d_inode(mnt->mnt_root));
if (inode)
iput(inode);
mnt_drop_write(mnt);
diff --git a/ipc/sem.c b/ipc/sem.c
index b471e5a3863d..cddd5b5fde51 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1493,7 +1493,7 @@ out_rcu_wakeup:
wake_up_sem_queue_do(&tasks);
out_free:
if (sem_io != fast_sem_io)
- ipc_free(sem_io, sizeof(ushort)*nsems);
+ ipc_free(sem_io);
return err;
}
diff --git a/ipc/util.c b/ipc/util.c
index 0f401d94b7c6..798cad18dd87 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -414,17 +414,12 @@ void *ipc_alloc(int size)
/**
* ipc_free - free ipc space
* @ptr: pointer returned by ipc_alloc
- * @size: size of block
*
- * Free a block created with ipc_alloc(). The caller must know the size
- * used in the allocation call.
+ * Free a block created with ipc_alloc().
*/
-void ipc_free(void *ptr, int size)
+void ipc_free(void *ptr)
{
- if (size > PAGE_SIZE)
- vfree(ptr);
- else
- kfree(ptr);
+ kvfree(ptr);
}
/**
diff --git a/ipc/util.h b/ipc/util.h
index 3a8a5a0eca62..51f7ca58ac67 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -118,7 +118,7 @@ int ipcperms(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp, short flg);
* both function can sleep
*/
void *ipc_alloc(int size);
-void ipc_free(void *ptr, int size);
+void ipc_free(void *ptr);
/*
* For allocation that need to be freed by RCU.
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 27c6046c2c3d..f84f8d06e1f6 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -95,7 +95,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
if (IS_ERR(dentry))
return (void *)dentry; /* returning an error */
inode = path.dentry->d_inode;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
audit_mark = kzalloc(sizeof(*audit_mark), GFP_KERNEL);
if (unlikely(!audit_mark)) {
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 656c7e93ac0d..9f194aad0adc 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -364,7 +364,7 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
struct dentry *d = kern_path_locked(watch->path, parent);
if (IS_ERR(d))
return PTR_ERR(d);
- mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex);
+ inode_unlock(d_backing_inode(parent->dentry));
if (d_is_positive(d)) {
/* update watch filter fields */
watch->dev = d_backing_inode(d)->i_sb->s_dev;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c0957416b32e..06ae52e99ac2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4872,9 +4872,9 @@ static int perf_fasync(int fd, struct file *filp, int on)
struct perf_event *event = filp->private_data;
int retval;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
retval = fasync_helper(fd, filp, on, &event->fasync);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (retval < 0)
return retval;
diff --git a/kernel/relay.c b/kernel/relay.c
index 0b4570cfacae..074994bcfa9b 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1133,7 +1133,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
if (!desc->count)
return 0;
- mutex_lock(&file_inode(filp)->i_mutex);
+ inode_lock(file_inode(filp));
do {
if (!relay_file_read_avail(buf, *ppos))
break;
@@ -1153,7 +1153,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
*ppos = relay_file_read_end_pos(buf, read_start, ret);
}
} while (desc->count && ret);
- mutex_unlock(&file_inode(filp)->i_mutex);
+ inode_unlock(file_inode(filp));
return desc->written;
}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44253adb3c36..63d3a24e081a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -222,9 +222,9 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
/* Ensure the static_key remains in a consistent state */
inode = file_inode(filp);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i = sched_feat_set(cmp);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (i == __SCHED_FEAT_NR)
return -EINVAL;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 580ac2d4024f..15a1795bbba1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -316,24 +316,24 @@ static inline void seccomp_sync_threads(void)
put_seccomp_filter(thread);
smp_store_release(&thread->seccomp.filter,
caller->seccomp.filter);
+
+ /*
+ * Don't let an unprivileged task work around
+ * the no_new_privs restriction by creating
+ * a thread that sets it up, enters seccomp,
+ * then dies.
+ */
+ if (task_no_new_privs(caller))
+ task_set_no_new_privs(thread);
+
/*
* Opt the other thread into seccomp if needed.
* As threads are considered to be trust-realm
* equivalent (see ptrace_may_access), it is safe to
* allow one thread to transition the other.
*/
- if (thread->seccomp.mode == SECCOMP_MODE_DISABLED) {
- /*
- * Don't let an unprivileged task work around
- * the no_new_privs restriction by creating
- * a thread that sets it up, enters seccomp,
- * then dies.
- */
- if (task_no_new_privs(caller))
- task_set_no_new_privs(thread);
-
+ if (thread->seccomp.mode == SECCOMP_MODE_DISABLED)
seccomp_assign_mode(thread, SECCOMP_MODE_FILTER);
- }
}
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 87fb9801bd9e..d9293402ee68 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1751,7 +1751,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
{
__buffer_unlock_commit(buffer, event);
- ftrace_trace_stack(tr, buffer, flags, 6, pc, regs);
+ ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
diff --git a/lib/Kconfig b/lib/Kconfig
index 435f7315bc89..133ebc0c1773 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -477,6 +477,11 @@ config DDR
information. This data is useful for drivers handling
DDR SDRAM controllers.
+config IRQ_POLL
+ bool "IRQ polling library"
+ help
+ Helper library to poll interrupt mitigation using polling.
+
config MPILIB
tristate
select CLZ_TAB
diff --git a/lib/Makefile b/lib/Makefile
index 2d4bc33d09b4..a7c26a41a738 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -165,6 +165,7 @@ obj-$(CONFIG_GENERIC_NET_UTILS) += net_utils.o
obj-$(CONFIG_SG_SPLIT) += sg_split.o
obj-$(CONFIG_STMP_DEVICE) += stmp_device.o
+obj-$(CONFIG_IRQ_POLL) += irq_poll.o
libfdt_files = fdt.o fdt_ro.o fdt_wip.o fdt_rw.o fdt_sw.o fdt_strerror.o \
fdt_empty_tree.o
diff --git a/block/blk-iopoll.c b/lib/irq_poll.c
index 0736729d6494..836f7db4e548 100644
--- a/block/blk-iopoll.c
+++ b/lib/irq_poll.c
@@ -6,84 +6,84 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
-#include <linux/blkdev.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
-#include <linux/blk-iopoll.h>
+#include <linux/irq_poll.h>
#include <linux/delay.h>
-#include "blk.h"
-
-static unsigned int blk_iopoll_budget __read_mostly = 256;
+static unsigned int irq_poll_budget __read_mostly = 256;
static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
/**
- * blk_iopoll_sched - Schedule a run of the iopoll handler
+ * irq_poll_sched - Schedule a run of the iopoll handler
* @iop: The parent iopoll structure
*
* Description:
- * Add this blk_iopoll structure to the pending poll list and trigger the
- * raise of the blk iopoll softirq. The driver must already have gotten a
- * successful return from blk_iopoll_sched_prep() before calling this.
+ * Add this irq_poll structure to the pending poll list and trigger the
+ * raise of the blk iopoll softirq.
**/
-void blk_iopoll_sched(struct blk_iopoll *iop)
+void irq_poll_sched(struct irq_poll *iop)
{
unsigned long flags;
+ if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
+ return;
+ if (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
+ return;
+
local_irq_save(flags);
list_add_tail(&iop->list, this_cpu_ptr(&blk_cpu_iopoll));
- __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+ __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
local_irq_restore(flags);
}
-EXPORT_SYMBOL(blk_iopoll_sched);
+EXPORT_SYMBOL(irq_poll_sched);
/**
- * __blk_iopoll_complete - Mark this @iop as un-polled again
+ * __irq_poll_complete - Mark this @iop as un-polled again
* @iop: The parent iopoll structure
*
* Description:
- * See blk_iopoll_complete(). This function must be called with interrupts
+ * See irq_poll_complete(). This function must be called with interrupts
* disabled.
**/
-void __blk_iopoll_complete(struct blk_iopoll *iop)
+static void __irq_poll_complete(struct irq_poll *iop)
{
list_del(&iop->list);
smp_mb__before_atomic();
- clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
+ clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
}
-EXPORT_SYMBOL(__blk_iopoll_complete);
/**
- * blk_iopoll_complete - Mark this @iop as un-polled again
+ * irq_poll_complete - Mark this @iop as un-polled again
* @iop: The parent iopoll structure
*
* Description:
* If a driver consumes less than the assigned budget in its run of the
* iopoll handler, it'll end the polled mode by calling this function. The
- * iopoll handler will not be invoked again before blk_iopoll_sched_prep()
+ * iopoll handler will not be invoked again before irq_poll_sched()
* is called.
**/
-void blk_iopoll_complete(struct blk_iopoll *iop)
+void irq_poll_complete(struct irq_poll *iop)
{
unsigned long flags;
local_irq_save(flags);
- __blk_iopoll_complete(iop);
+ __irq_poll_complete(iop);
local_irq_restore(flags);
}
-EXPORT_SYMBOL(blk_iopoll_complete);
+EXPORT_SYMBOL(irq_poll_complete);
-static void blk_iopoll_softirq(struct softirq_action *h)
+static void irq_poll_softirq(struct softirq_action *h)
{
struct list_head *list = this_cpu_ptr(&blk_cpu_iopoll);
- int rearm = 0, budget = blk_iopoll_budget;
+ int rearm = 0, budget = irq_poll_budget;
unsigned long start_time = jiffies;
local_irq_disable();
while (!list_empty(list)) {
- struct blk_iopoll *iop;
+ struct irq_poll *iop;
int work, weight;
/*
@@ -101,11 +101,11 @@ static void blk_iopoll_softirq(struct softirq_action *h)
* entries to the tail of this list, and only ->poll()
* calls can remove this head entry from the list.
*/
- iop = list_entry(list->next, struct blk_iopoll, list);
+ iop = list_entry(list->next, struct irq_poll, list);
weight = iop->weight;
work = 0;
- if (test_bit(IOPOLL_F_SCHED, &iop->state))
+ if (test_bit(IRQ_POLL_F_SCHED, &iop->state))
work = iop->poll(iop, weight);
budget -= work;
@@ -121,72 +121,70 @@ static void blk_iopoll_softirq(struct softirq_action *h)
* move the instance around on the list at-will.
*/
if (work >= weight) {
- if (blk_iopoll_disable_pending(iop))
- __blk_iopoll_complete(iop);
+ if (test_bit(IRQ_POLL_F_DISABLE, &iop->state))
+ __irq_poll_complete(iop);
else
list_move_tail(&iop->list, list);
}
}
if (rearm)
- __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+ __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
local_irq_enable();
}
/**
- * blk_iopoll_disable - Disable iopoll on this @iop
+ * irq_poll_disable - Disable iopoll on this @iop
* @iop: The parent iopoll structure
*
* Description:
* Disable io polling and wait for any pending callbacks to have completed.
**/
-void blk_iopoll_disable(struct blk_iopoll *iop)
+void irq_poll_disable(struct irq_poll *iop)
{
- set_bit(IOPOLL_F_DISABLE, &iop->state);
- while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
+ set_bit(IRQ_POLL_F_DISABLE, &iop->state);
+ while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
msleep(1);
- clear_bit(IOPOLL_F_DISABLE, &iop->state);
+ clear_bit(IRQ_POLL_F_DISABLE, &iop->state);
}
-EXPORT_SYMBOL(blk_iopoll_disable);
+EXPORT_SYMBOL(irq_poll_disable);
/**
- * blk_iopoll_enable - Enable iopoll on this @iop
+ * irq_poll_enable - Enable iopoll on this @iop
* @iop: The parent iopoll structure
*
* Description:
* Enable iopoll on this @iop. Note that the handler run will not be
* scheduled, it will only mark it as active.
**/
-void blk_iopoll_enable(struct blk_iopoll *iop)
+void irq_poll_enable(struct irq_poll *iop)
{
- BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
+ BUG_ON(!test_bit(IRQ_POLL_F_SCHED, &iop->state));
smp_mb__before_atomic();
- clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
+ clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
}
-EXPORT_SYMBOL(blk_iopoll_enable);
+EXPORT_SYMBOL(irq_poll_enable);
/**
- * blk_iopoll_init - Initialize this @iop
+ * irq_poll_init - Initialize this @iop
* @iop: The parent iopoll structure
* @weight: The default weight (or command completion budget)
* @poll_fn: The handler to invoke
*
* Description:
- * Initialize this blk_iopoll structure. Before being actively used, the
- * driver must call blk_iopoll_enable().
+ * Initialize and enable this irq_poll structure.
**/
-void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
+void irq_poll_init(struct irq_poll *iop, int weight, irq_poll_fn *poll_fn)
{
memset(iop, 0, sizeof(*iop));
INIT_LIST_HEAD(&iop->list);
iop->weight = weight;
iop->poll = poll_fn;
- set_bit(IOPOLL_F_SCHED, &iop->state);
}
-EXPORT_SYMBOL(blk_iopoll_init);
+EXPORT_SYMBOL(irq_poll_init);
-static int blk_iopoll_cpu_notify(struct notifier_block *self,
+static int irq_poll_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
/*
@@ -199,26 +197,26 @@ static int blk_iopoll_cpu_notify(struct notifier_block *self,
local_irq_disable();
list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
this_cpu_ptr(&blk_cpu_iopoll));
- __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+ __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
local_irq_enable();
}
return NOTIFY_OK;
}
-static struct notifier_block blk_iopoll_cpu_notifier = {
- .notifier_call = blk_iopoll_cpu_notify,
+static struct notifier_block irq_poll_cpu_notifier = {
+ .notifier_call = irq_poll_cpu_notify,
};
-static __init int blk_iopoll_setup(void)
+static __init int irq_poll_setup(void)
{
int i;
for_each_possible_cpu(i)
INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
- open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
- register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
+ open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
+ register_hotcpu_notifier(&irq_poll_cpu_notifier);
return 0;
}
-subsys_initcall(blk_iopoll_setup);
+subsys_initcall(irq_poll_setup);
diff --git a/mm/filemap.c b/mm/filemap.c
index 847ee43c2806..bc943867d68c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -11,6 +11,7 @@
*/
#include <linux/export.h>
#include <linux/compiler.h>
+#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/capability.h>
@@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping,
__radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot);
if (shadow) {
- mapping->nrshadows++;
+ mapping->nrexceptional++;
/*
- * Make sure the nrshadows update is committed before
+ * Make sure the nrexceptional update is committed before
* the nrpages update so that final truncate racing
* with reclaim does not see both counters 0 at the
* same time and miss a shadow entry.
@@ -481,6 +482,12 @@ int filemap_write_and_wait_range(struct address_space *mapping,
{
int err = 0;
+ if (dax_mapping(mapping) && mapping->nrexceptional) {
+ err = dax_writeback_mapping_range(mapping, lstart, lend);
+ if (err)
+ return err;
+ }
+
if (mapping->nrpages) {
err = __filemap_fdatawrite_range(mapping, lstart, lend,
WB_SYNC_ALL);
@@ -579,9 +586,13 @@ static int page_cache_tree_insert(struct address_space *mapping,
p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
if (!radix_tree_exceptional_entry(p))
return -EEXIST;
+
+ if (WARN_ON(dax_mapping(mapping)))
+ return -EINVAL;
+
if (shadowp)
*shadowp = p;
- mapping->nrshadows--;
+ mapping->nrexceptional--;
if (node)
workingset_node_shadows_dec(node);
}
@@ -1245,9 +1256,9 @@ repeat:
if (radix_tree_deref_retry(page))
goto restart;
/*
- * A shadow entry of a recently evicted page,
- * or a swap entry from shmem/tmpfs. Return
- * it without attempting to raise page count.
+ * A shadow entry of a recently evicted page, a swap
+ * entry from shmem/tmpfs or a DAX entry. Return it
+ * without attempting to raise page count.
*/
goto export;
}
@@ -1494,6 +1505,74 @@ repeat:
}
EXPORT_SYMBOL(find_get_pages_tag);
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping: the address_space to search
+ * @start: the starting page cache index
+ * @tag: the tag index
+ * @nr_entries: the maximum number of entries
+ * @entries: where the resulting entries are placed
+ * @indices: the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+ int tag, unsigned int nr_entries,
+ struct page **entries, pgoff_t *indices)
+{
+ void **slot;
+ unsigned int ret = 0;
+ struct radix_tree_iter iter;
+
+ if (!nr_entries)
+ return 0;
+
+ rcu_read_lock();
+restart:
+ radix_tree_for_each_tagged(slot, &mapping->page_tree,
+ &iter, start, tag) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot(slot);
+ if (unlikely(!page))
+ continue;
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page)) {
+ /*
+ * Transient condition which can only trigger
+ * when entry at index 0 moves out of or back
+ * to root: none yet gotten, safe to restart.
+ */
+ goto restart;
+ }
+
+ /*
+ * A shadow entry of a recently evicted page, a swap
+ * entry from shmem/tmpfs or a DAX entry. Return it
+ * without attempting to raise page count.
+ */
+ goto export;
+ }
+ if (!page_cache_get_speculative(page))
+ goto repeat;
+
+ /* Has the page moved? */
+ if (unlikely(page != *slot)) {
+ page_cache_release(page);
+ goto repeat;
+ }
+export:
+ indices[ret] = iter.index;
+ entries[ret] = page;
+ if (++ret == nr_entries)
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+EXPORT_SYMBOL(find_get_entries_tag);
+
/*
* CD/DVDs are error prone. When a medium error occurs, the driver may fail
* a _large_ part of the i/o request. Imagine the worst scenario:
@@ -2684,11 +2763,11 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file->f_mapping->host;
ssize_t ret;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0)
ret = __generic_file_write_iter(iocb, from);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if (ret > 0) {
ssize_t err;
diff --git a/mm/percpu.c b/mm/percpu.c
index 8a943b97a053..998607adf6eb 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -305,16 +305,12 @@ static void *pcpu_mem_zalloc(size_t size)
/**
* pcpu_mem_free - free memory
* @ptr: memory to free
- * @size: size of the area
*
* Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc().
*/
-static void pcpu_mem_free(void *ptr, size_t size)
+static void pcpu_mem_free(void *ptr)
{
- if (size <= PAGE_SIZE)
- kfree(ptr);
- else
- vfree(ptr);
+ kvfree(ptr);
}
/**
@@ -463,8 +459,8 @@ out_unlock:
* pcpu_mem_free() might end up calling vfree() which uses
* IRQ-unsafe lock and thus can't be called under pcpu_lock.
*/
- pcpu_mem_free(old, old_size);
- pcpu_mem_free(new, new_size);
+ pcpu_mem_free(old);
+ pcpu_mem_free(new);
return 0;
}
@@ -732,7 +728,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
sizeof(chunk->map[0]));
if (!chunk->map) {
- pcpu_mem_free(chunk, pcpu_chunk_struct_size);
+ pcpu_mem_free(chunk);
return NULL;
}
@@ -753,8 +749,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk)
{
if (!chunk)
return;
- pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]));
- pcpu_mem_free(chunk, pcpu_chunk_struct_size);
+ pcpu_mem_free(chunk->map);
+ pcpu_mem_free(chunk);
}
/**
diff --git a/mm/shmem.c b/mm/shmem.c
index fa2ceb2d2655..440e2a7e6c1c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -701,8 +701,7 @@ static void shmem_evict_inode(struct inode *inode)
list_del_init(&info->swaplist);
mutex_unlock(&shmem_swaplist_mutex);
}
- } else
- kfree(info->symlink);
+ }
simple_xattrs_free(&info->xattrs);
WARN_ON(inode->i_blocks);
@@ -1902,7 +1901,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
if (whence != SEEK_DATA && whence != SEEK_HOLE)
return generic_file_llseek_size(file, offset, whence,
MAX_LFS_FILESIZE, i_size_read(inode));
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
/* We're holding i_mutex so we can access i_size directly */
if (offset < 0)
@@ -1926,7 +1925,7 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
if (offset >= 0)
offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return offset;
}
@@ -2091,7 +2090,7 @@ int shmem_add_seals(struct file *file, unsigned int seals)
if (seals & ~(unsigned int)F_ALL_SEALS)
return -EINVAL;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (info->seals & F_SEAL_SEAL) {
error = -EPERM;
@@ -2114,7 +2113,7 @@ int shmem_add_seals(struct file *file, unsigned int seals)
error = 0;
unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
EXPORT_SYMBOL_GPL(shmem_add_seals);
@@ -2164,7 +2163,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (mode & FALLOC_FL_PUNCH_HOLE) {
struct address_space *mapping = file->f_mapping;
@@ -2277,7 +2276,7 @@ undone:
inode->i_private = NULL;
spin_unlock(&inode->i_lock);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
@@ -2549,13 +2548,12 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
info = SHMEM_I(inode);
inode->i_size = len-1;
if (len <= SHORT_SYMLINK_LEN) {
- info->symlink = kmemdup(symname, len, GFP_KERNEL);
- if (!info->symlink) {
+ inode->i_link = kmemdup(symname, len, GFP_KERNEL);
+ if (!inode->i_link) {
iput(inode);
return -ENOMEM;
}
inode->i_op = &shmem_short_symlink_operations;
- inode->i_link = info->symlink;
} else {
inode_nohighmem(inode);
error = shmem_getpage(inode, 0, &page, SGP_WRITE, NULL);
@@ -3132,6 +3130,7 @@ static struct inode *shmem_alloc_inode(struct super_block *sb)
static void shmem_destroy_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
+ kfree(inode->i_link);
kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index c43f654a7b64..d2c37365e2d6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1956,9 +1956,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
set_blocksize(bdev, old_block_size);
blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
} else {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
inode->i_flags &= ~S_SWAPFILE;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
filp_close(swap_file, NULL);
@@ -2183,7 +2183,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
p->flags |= SWP_BLKDEV;
} else if (S_ISREG(inode->i_mode)) {
p->bdev = inode->i_sb->s_bdev;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (IS_SWAPFILE(inode))
return -EBUSY;
} else
@@ -2416,7 +2416,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
mapping = swap_file->f_mapping;
inode = mapping->host;
- /* If S_ISREG(inode->i_mode) will do mutex_lock(&inode->i_mutex); */
+ /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */
error = claim_swapfile(p, inode);
if (unlikely(error))
goto bad_swap;
@@ -2561,7 +2561,7 @@ bad_swap:
vfree(cluster_info);
if (swap_file) {
if (inode && S_ISREG(inode->i_mode)) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
inode = NULL;
}
filp_close(swap_file, NULL);
@@ -2574,7 +2574,7 @@ out:
if (name)
putname(name);
if (inode && S_ISREG(inode->i_mode))
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return error;
}
diff --git a/mm/truncate.c b/mm/truncate.c
index 76e35ad97102..e3ee0e27cd17 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/backing-dev.h>
+#include <linux/dax.h>
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/swap.h>
@@ -34,31 +35,39 @@ static void clear_exceptional_entry(struct address_space *mapping,
return;
spin_lock_irq(&mapping->tree_lock);
- /*
- * Regular page slots are stabilized by the page lock even
- * without the tree itself locked. These unlocked entries
- * need verification under the tree lock.
- */
- if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot))
- goto unlock;
- if (*slot != entry)
- goto unlock;
- radix_tree_replace_slot(slot, NULL);
- mapping->nrshadows--;
- if (!node)
- goto unlock;
- workingset_node_shadows_dec(node);
- /*
- * Don't track node without shadow entries.
- *
- * Avoid acquiring the list_lru lock if already untracked.
- * The list_empty() test is safe as node->private_list is
- * protected by mapping->tree_lock.
- */
- if (!workingset_node_shadows(node) &&
- !list_empty(&node->private_list))
- list_lru_del(&workingset_shadow_nodes, &node->private_list);
- __radix_tree_delete_node(&mapping->page_tree, node);
+
+ if (dax_mapping(mapping)) {
+ if (radix_tree_delete_item(&mapping->page_tree, index, entry))
+ mapping->nrexceptional--;
+ } else {
+ /*
+ * Regular page slots are stabilized by the page lock even
+ * without the tree itself locked. These unlocked entries
+ * need verification under the tree lock.
+ */
+ if (!__radix_tree_lookup(&mapping->page_tree, index, &node,
+ &slot))
+ goto unlock;
+ if (*slot != entry)
+ goto unlock;
+ radix_tree_replace_slot(slot, NULL);
+ mapping->nrexceptional--;
+ if (!node)
+ goto unlock;
+ workingset_node_shadows_dec(node);
+ /*
+ * Don't track node without shadow entries.
+ *
+ * Avoid acquiring the list_lru lock if already untracked.
+ * The list_empty() test is safe as node->private_list is
+ * protected by mapping->tree_lock.
+ */
+ if (!workingset_node_shadows(node) &&
+ !list_empty(&node->private_list))
+ list_lru_del(&workingset_shadow_nodes,
+ &node->private_list);
+ __radix_tree_delete_node(&mapping->page_tree, node);
+ }
unlock:
spin_unlock_irq(&mapping->tree_lock);
}
@@ -228,7 +237,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
int i;
cleancache_invalidate_inode(mapping);
- if (mapping->nrpages == 0 && mapping->nrshadows == 0)
+ if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
return;
/* Offsets within partial pages */
@@ -402,7 +411,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
*/
void truncate_inode_pages_final(struct address_space *mapping)
{
- unsigned long nrshadows;
+ unsigned long nrexceptional;
unsigned long nrpages;
/*
@@ -416,14 +425,14 @@ void truncate_inode_pages_final(struct address_space *mapping)
/*
* When reclaim installs eviction entries, it increases
- * nrshadows first, then decreases nrpages. Make sure we see
+ * nrexceptional first, then decreases nrpages. Make sure we see
* this in the right order or we might miss an entry.
*/
nrpages = mapping->nrpages;
smp_rmb();
- nrshadows = mapping->nrshadows;
+ nrexceptional = mapping->nrexceptional;
- if (nrpages || nrshadows) {
+ if (nrpages || nrexceptional) {
/*
* As truncation uses a lockless tree lookup, cycle
* the tree lock to make sure any ongoing tree
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bd620b65db52..eb3dd37ccd7c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -46,6 +46,7 @@
#include <linux/oom.h>
#include <linux/prefetch.h>
#include <linux/printk.h>
+#include <linux/dax.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
* inode reclaim needs to empty out the radix tree or
* the nodes are lost. Don't plant shadows behind its
* back.
+ *
+ * We also don't store shadows for DAX mappings because the
+ * only page cache pages found in these are zero pages
+ * covering holes, and because we don't want to mix DAX
+ * exceptional entries and shadow exceptional entries in the
+ * same page_tree.
*/
if (reclaimed && page_is_file_cache(page) &&
- !mapping_exiting(mapping))
+ !mapping_exiting(mapping) && !dax_mapping(mapping))
shadow = workingset_eviction(mapping, page);
__delete_from_page_cache(page, shadow, memcg);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 64bd0aa13f75..40b2c74ddf16 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1408,17 +1408,7 @@ static void vmstat_update(struct work_struct *w)
* Defer the checking for differentials to the
* shepherd thread on a different processor.
*/
- int r;
- /*
- * Shepherd work thread does not race since it never
- * changes the bit if its zero but the cpu
- * online / off line code may race if
- * worker threads are still allowed during
- * shutdown / startup.
- */
- r = cpumask_test_and_set_cpu(smp_processor_id(),
- cpu_stat_off);
- VM_BUG_ON(r);
+ cpumask_set_cpu(smp_processor_id(), cpu_stat_off);
}
}
diff --git a/mm/workingset.c b/mm/workingset.c
index aa017133744b..61ead9e5549d 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -351,8 +351,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
node->slots[i] = NULL;
BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
- BUG_ON(!mapping->nrshadows);
- mapping->nrshadows--;
+ BUG_ON(!mapping->nrexceptional);
+ mapping->nrexceptional--;
}
}
BUG_ON(node->count);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index bced8c074c12..7bc2208b6cc4 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -108,9 +108,7 @@ struct p9_poll_wait {
* @unsent_req_list: accounting for requests that haven't been sent
* @req: current request being processed (if any)
* @tmp_buf: temporary buffer to read in header
- * @rsize: amount to read for current frame
- * @rpos: read position in current frame
- * @rbuf: current read buffer
+ * @rc: temporary fcall for reading current frame
* @wpos: write position for current frame
* @wsize: amount of data to write for current frame
* @wbuf: current write buffer
@@ -131,9 +129,7 @@ struct p9_conn {
struct list_head unsent_req_list;
struct p9_req_t *req;
char tmp_buf[7];
- int rsize;
- int rpos;
- char *rbuf;
+ struct p9_fcall rc;
int wpos;
int wsize;
char *wbuf;
@@ -305,69 +301,77 @@ static void p9_read_work(struct work_struct *work)
if (m->err < 0)
return;
- p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);
+ p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n", m, m->rc.offset);
- if (!m->rbuf) {
- m->rbuf = m->tmp_buf;
- m->rpos = 0;
- m->rsize = 7; /* start by reading header */
+ if (!m->rc.sdata) {
+ m->rc.sdata = m->tmp_buf;
+ m->rc.offset = 0;
+ m->rc.capacity = 7; /* start by reading header */
}
clear_bit(Rpending, &m->wsched);
- p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n",
- m, m->rpos, m->rsize, m->rsize-m->rpos);
- err = p9_fd_read(m->client, m->rbuf + m->rpos,
- m->rsize - m->rpos);
+ p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n",
+ m, m->rc.offset, m->rc.capacity,
+ m->rc.capacity - m->rc.offset);
+ err = p9_fd_read(m->client, m->rc.sdata + m->rc.offset,
+ m->rc.capacity - m->rc.offset);
p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
- if (err == -EAGAIN) {
+ if (err == -EAGAIN)
goto end_clear;
- }
if (err <= 0)
goto error;
- m->rpos += err;
+ m->rc.offset += err;
- if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */
- u16 tag;
+ /* header read in */
+ if ((!m->req) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new header\n");
- n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */
- if (n >= m->client->msize) {
+ err = p9_parse_header(&m->rc, NULL, NULL, NULL, 0);
+ if (err) {
+ p9_debug(P9_DEBUG_ERROR,
+ "error parsing header: %d\n", err);
+ goto error;
+ }
+
+ if (m->rc.size >= m->client->msize) {
p9_debug(P9_DEBUG_ERROR,
- "requested packet size too big: %d\n", n);
+ "requested packet size too big: %d\n",
+ m->rc.size);
err = -EIO;
goto error;
}
- tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */
p9_debug(P9_DEBUG_TRANS,
- "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
+ "mux %p pkt: size: %d bytes tag: %d\n",
+ m, m->rc.size, m->rc.tag);
- m->req = p9_tag_lookup(m->client, tag);
+ m->req = p9_tag_lookup(m->client, m->rc.tag);
if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
- tag);
+ m->rc.tag);
err = -EIO;
goto error;
}
if (m->req->rc == NULL) {
- m->req->rc = kmalloc(sizeof(struct p9_fcall) +
- m->client->msize, GFP_NOFS);
- if (!m->req->rc) {
- m->req = NULL;
- err = -ENOMEM;
- goto error;
- }
+ p9_debug(P9_DEBUG_ERROR,
+ "No recv fcall for tag %d (req %p), disconnecting!\n",
+ m->rc.tag, m->req);
+ m->req = NULL;
+ err = -EIO;
+ goto error;
}
- m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall);
- memcpy(m->rbuf, m->tmp_buf, m->rsize);
- m->rsize = n;
+ m->rc.sdata = (char *)m->req->rc + sizeof(struct p9_fcall);
+ memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity);
+ m->rc.capacity = m->rc.size;
}
- /* not an else because some packets (like clunk) have no payload */
- if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */
+ /* packet is read in
+ * not an else because some packets (like clunk) have no payload
+ */
+ if ((m->req) && (m->rc.offset == m->rc.capacity)) {
p9_debug(P9_DEBUG_TRANS, "got new packet\n");
spin_lock(&m->client->lock);
if (m->req->status != REQ_STATUS_ERROR)
@@ -375,9 +379,9 @@ static void p9_read_work(struct work_struct *work)
list_del(&m->req->req_list);
spin_unlock(&m->client->lock);
p9_client_cb(m->client, m->req, status);
- m->rbuf = NULL;
- m->rpos = 0;
- m->rsize = 0;
+ m->rc.sdata = NULL;
+ m->rc.offset = 0;
+ m->rc.capacity = 0;
m->req = NULL;
}
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 199bc76202d2..4acb1d5417aa 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -658,7 +658,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
mutex_unlock(&virtio_9p_lock);
if (!found) {
- pr_err("no channels available\n");
+ pr_err("no channels available for device %s\n", devname);
return ret;
}
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 10d87753ed87..9e43a315e662 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -152,7 +152,6 @@ static int process_one_ticket(struct ceph_auth_client *ac,
void *ticket_buf = NULL;
void *tp, *tpend;
void **ptp;
- struct ceph_timespec new_validity;
struct ceph_crypto_key new_session_key;
struct ceph_buffer *new_ticket_blob;
unsigned long new_expires, new_renew_after;
@@ -193,8 +192,8 @@ static int process_one_ticket(struct ceph_auth_client *ac,
if (ret)
goto out;
- ceph_decode_copy(&dp, &new_validity, sizeof(new_validity));
- ceph_decode_timespec(&validity, &new_validity);
+ ceph_decode_timespec(&validity, dp);
+ dp += sizeof(struct ceph_timespec);
new_expires = get_seconds() + validity.tv_sec;
new_renew_after = new_expires - (validity.tv_sec / 4);
dout(" expires=%lu renew_after=%lu\n", new_expires,
@@ -233,10 +232,10 @@ static int process_one_ticket(struct ceph_auth_client *ac,
ceph_buffer_put(th->ticket_blob);
th->session_key = new_session_key;
th->ticket_blob = new_ticket_blob;
- th->validity = new_validity;
th->secret_id = new_secret_id;
th->expires = new_expires;
th->renew_after = new_renew_after;
+ th->have_key = true;
dout(" got ticket service %d (%s) secret_id %lld len %d\n",
type, ceph_entity_type_name(type), th->secret_id,
(int)th->ticket_blob->vec.iov_len);
@@ -384,6 +383,24 @@ bad:
return -ERANGE;
}
+static bool need_key(struct ceph_x_ticket_handler *th)
+{
+ if (!th->have_key)
+ return true;
+
+ return get_seconds() >= th->renew_after;
+}
+
+static bool have_key(struct ceph_x_ticket_handler *th)
+{
+ if (th->have_key) {
+ if (get_seconds() >= th->expires)
+ th->have_key = false;
+ }
+
+ return th->have_key;
+}
+
static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
{
int want = ac->want_keys;
@@ -402,20 +419,18 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed)
continue;
th = get_ticket_handler(ac, service);
-
if (IS_ERR(th)) {
*pneed |= service;
continue;
}
- if (get_seconds() >= th->renew_after)
+ if (need_key(th))
*pneed |= service;
- if (get_seconds() >= th->expires)
+ if (!have_key(th))
xi->have_keys &= ~service;
}
}
-
static int ceph_x_build_request(struct ceph_auth_client *ac,
void *buf, void *end)
{
@@ -667,14 +682,26 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
ac->private = NULL;
}
-static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
- int peer_type)
+static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
{
struct ceph_x_ticket_handler *th;
th = get_ticket_handler(ac, peer_type);
if (!IS_ERR(th))
- memset(&th->validity, 0, sizeof(th->validity));
+ th->have_key = false;
+}
+
+static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
+ int peer_type)
+{
+ /*
+ * We are to invalidate a service ticket in the hopes of
+ * getting a new, hopefully more valid, one. But, we won't get
+ * it unless our AUTH ticket is good, so invalidate AUTH ticket
+ * as well, just in case.
+ */
+ invalidate_ticket(ac, peer_type);
+ invalidate_ticket(ac, CEPH_ENTITY_TYPE_AUTH);
}
static int calcu_signature(struct ceph_x_authorizer *au,
diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h
index e8b7c6917d47..40b1a3cf7397 100644
--- a/net/ceph/auth_x.h
+++ b/net/ceph/auth_x.h
@@ -16,7 +16,7 @@ struct ceph_x_ticket_handler {
unsigned int service;
struct ceph_crypto_key session_key;
- struct ceph_timespec validity;
+ bool have_key;
u64 secret_id;
struct ceph_buffer *ticket_blob;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9981039ef4ff..9cfedf565f5b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -23,9 +23,6 @@
#include <linux/ceph/pagelist.h>
#include <linux/export.h>
-#define list_entry_next(pos, member) \
- list_entry(pos->member.next, typeof(*pos), member)
-
/*
* Ceph uses the messenger to exchange ceph_msg messages with other
* hosts in the system. The messenger provides ordered and reliable
@@ -672,6 +669,8 @@ static void reset_connection(struct ceph_connection *con)
}
con->in_seq = 0;
con->in_seq_acked = 0;
+
+ con->out_skip = 0;
}
/*
@@ -771,6 +770,8 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
static void con_out_kvec_reset(struct ceph_connection *con)
{
+ BUG_ON(con->out_skip);
+
con->out_kvec_left = 0;
con->out_kvec_bytes = 0;
con->out_kvec_cur = &con->out_kvec[0];
@@ -779,9 +780,9 @@ static void con_out_kvec_reset(struct ceph_connection *con)
static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
- int index;
+ int index = con->out_kvec_left;
- index = con->out_kvec_left;
+ BUG_ON(con->out_skip);
BUG_ON(index >= ARRAY_SIZE(con->out_kvec));
con->out_kvec[index].iov_len = size;
@@ -790,6 +791,27 @@ static void con_out_kvec_add(struct ceph_connection *con,
con->out_kvec_bytes += size;
}
+/*
+ * Chop off a kvec from the end. Return residual number of bytes for
+ * that kvec, i.e. how many bytes would have been written if the kvec
+ * hadn't been nuked.
+ */
+static int con_out_kvec_skip(struct ceph_connection *con)
+{
+ int off = con->out_kvec_cur - con->out_kvec;
+ int skip = 0;
+
+ if (con->out_kvec_bytes > 0) {
+ skip = con->out_kvec[off + con->out_kvec_left - 1].iov_len;
+ BUG_ON(con->out_kvec_bytes < skip);
+ BUG_ON(!con->out_kvec_left);
+ con->out_kvec_bytes -= skip;
+ con->out_kvec_left--;
+ }
+
+ return skip;
+}
+
#ifdef CONFIG_BLOCK
/*
@@ -1042,7 +1064,7 @@ static bool ceph_msg_data_pagelist_advance(struct ceph_msg_data_cursor *cursor,
/* Move on to the next page */
BUG_ON(list_is_last(&cursor->page->lru, &pagelist->head));
- cursor->page = list_entry_next(cursor->page, lru);
+ cursor->page = list_next_entry(cursor->page, lru);
cursor->last_piece = cursor->resid <= PAGE_SIZE;
return true;
@@ -1166,7 +1188,7 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
if (!cursor->resid && cursor->total_resid) {
WARN_ON(!cursor->last_piece);
BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
- cursor->data = list_entry_next(cursor->data, links);
+ cursor->data = list_next_entry(cursor->data, links);
__ceph_msg_data_cursor_init(cursor);
new_piece = true;
}
@@ -1197,7 +1219,6 @@ static void prepare_write_message_footer(struct ceph_connection *con)
m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
dout("prepare_write_message_footer %p\n", con);
- con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
if (con->ops->sign_message)
@@ -1225,7 +1246,6 @@ static void prepare_write_message(struct ceph_connection *con)
u32 crc;
con_out_kvec_reset(con);
- con->out_kvec_is_msg = true;
con->out_msg_done = false;
/* Sneak an ack in there first? If we can get it into the same
@@ -1265,18 +1285,19 @@ static void prepare_write_message(struct ceph_connection *con)
/* tag + hdr + front + middle */
con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
- con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+ con_out_kvec_add(con, sizeof(con->out_hdr), &con->out_hdr);
con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
if (m->middle)
con_out_kvec_add(con, m->middle->vec.iov_len,
m->middle->vec.iov_base);
- /* fill in crc (except data pages), footer */
+ /* fill in hdr crc and finalize hdr */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc);
- con->out_msg->footer.flags = 0;
+ memcpy(&con->out_hdr, &con->out_msg->hdr, sizeof(con->out_hdr));
+ /* fill in front and middle crc, footer */
crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc);
if (m->middle) {
@@ -1288,6 +1309,7 @@ static void prepare_write_message(struct ceph_connection *con)
dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc));
+ con->out_msg->footer.flags = 0;
/* is there a data payload? */
con->out_msg->footer.data_crc = 0;
@@ -1492,7 +1514,6 @@ static int write_partial_kvec(struct ceph_connection *con)
}
}
con->out_kvec_left = 0;
- con->out_kvec_is_msg = false;
ret = 1;
out:
dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
@@ -1584,6 +1605,7 @@ static int write_partial_skip(struct ceph_connection *con)
{
int ret;
+ dout("%s %p %d left\n", __func__, con, con->out_skip);
while (con->out_skip > 0) {
size_t size = min(con->out_skip, (int) PAGE_CACHE_SIZE);
@@ -2506,13 +2528,13 @@ more:
more_kvec:
/* kvec data queued? */
- if (con->out_skip) {
- ret = write_partial_skip(con);
+ if (con->out_kvec_left) {
+ ret = write_partial_kvec(con);
if (ret <= 0)
goto out;
}
- if (con->out_kvec_left) {
- ret = write_partial_kvec(con);
+ if (con->out_skip) {
+ ret = write_partial_skip(con);
if (ret <= 0)
goto out;
}
@@ -2805,13 +2827,17 @@ static bool con_backoff(struct ceph_connection *con)
static void con_fault_finish(struct ceph_connection *con)
{
+ dout("%s %p\n", __func__, con);
+
/*
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
*/
- if (con->auth_retry && con->ops->invalidate_authorizer) {
- dout("calling invalidate_authorizer()\n");
- con->ops->invalidate_authorizer(con);
+ if (con->auth_retry) {
+ dout("auth_retry %d, invalidating\n", con->auth_retry);
+ if (con->ops->invalidate_authorizer)
+ con->ops->invalidate_authorizer(con);
+ con->auth_retry = 0;
}
if (con->ops->fault)
@@ -3050,16 +3076,31 @@ void ceph_msg_revoke(struct ceph_msg *msg)
ceph_msg_put(msg);
}
if (con->out_msg == msg) {
- dout("%s %p msg %p - was sending\n", __func__, con, msg);
- con->out_msg = NULL;
- if (con->out_kvec_is_msg) {
- con->out_skip = con->out_kvec_bytes;
- con->out_kvec_is_msg = false;
+ BUG_ON(con->out_skip);
+ /* footer */
+ if (con->out_msg_done) {
+ con->out_skip += con_out_kvec_skip(con);
+ } else {
+ BUG_ON(!msg->data_length);
+ if (con->peer_features & CEPH_FEATURE_MSG_AUTH)
+ con->out_skip += sizeof(msg->footer);
+ else
+ con->out_skip += sizeof(msg->old_footer);
}
+ /* data, middle, front */
+ if (msg->data_length)
+ con->out_skip += msg->cursor.total_resid;
+ if (msg->middle)
+ con->out_skip += con_out_kvec_skip(con);
+ con->out_skip += con_out_kvec_skip(con);
+
+ dout("%s %p msg %p - was sending, will write %d skip %d\n",
+ __func__, con, msg, con->out_kvec_bytes, con->out_skip);
msg->hdr.seq = 0;
-
+ con->out_msg = NULL;
ceph_msg_put(msg);
}
+
mutex_unlock(&con->mutex);
}
@@ -3361,9 +3402,7 @@ static void ceph_msg_free(struct ceph_msg *m)
static void ceph_msg_release(struct kref *kref)
{
struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
- LIST_HEAD(data);
- struct list_head *links;
- struct list_head *next;
+ struct ceph_msg_data *data, *next;
dout("%s %p\n", __func__, m);
WARN_ON(!list_empty(&m->list_head));
@@ -3376,12 +3415,8 @@ static void ceph_msg_release(struct kref *kref)
m->middle = NULL;
}
- list_splice_init(&m->data, &data);
- list_for_each_safe(links, next, &data) {
- struct ceph_msg_data *data;
-
- data = list_entry(links, struct ceph_msg_data, links);
- list_del_init(links);
+ list_for_each_entry_safe(data, next, &m->data, links) {
+ list_del_init(&data->links);
ceph_msg_data_destroy(data);
}
m->data_length = 0;
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index edda01626a45..de85dddc3dc0 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -364,10 +364,6 @@ static bool have_debugfs_info(struct ceph_mon_client *monc)
return monc->client->have_fsid && monc->auth->global_id > 0;
}
-/*
- * The monitor responds with mount ack indicate mount success. The
- * included client ticket allows the client to talk to MDSs and OSDs.
- */
static void ceph_monc_handle_map(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 744e5936c10d..7aea0ccb6be6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -289,10 +289,8 @@ static void __node_free_rcu(struct rcu_head *head)
if (!n->tn_bits)
kmem_cache_free(trie_leaf_kmem, n);
- else if (n->tn_bits <= TNODE_KMALLOC_MAX)
- kfree(n);
else
- vfree(n);
+ kvfree(n);
}
#define node_free(n) call_rcu(&tn_info(n)->rcu, __node_free_rcu)
diff --git a/net/rds/ib.c b/net/rds/ib.c
index f222885ac0c7..9481d55ff6cb 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -122,44 +122,34 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
static void rds_ib_add_one(struct ib_device *device)
{
struct rds_ib_device *rds_ibdev;
- struct ib_device_attr *dev_attr;
/* Only handle IB (no iWARP) devices */
if (device->node_type != RDMA_NODE_IB_CA)
return;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- rdsdebug("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
ibdev_to_node(device));
if (!rds_ibdev)
- goto free_attr;
+ return;
spin_lock_init(&rds_ibdev->spinlock);
atomic_set(&rds_ibdev->refcount, 1);
INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
- rds_ibdev->max_wrs = dev_attr->max_qp_wr;
- rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
+ rds_ibdev->max_wrs = device->attrs.max_qp_wr;
+ rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
- rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
- rds_ibdev->max_1m_fmrs = dev_attr->max_mr ?
- min_t(unsigned int, (dev_attr->max_mr / 2),
+ rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
+ rds_ibdev->max_1m_fmrs = device->attrs.max_mr ?
+ min_t(unsigned int, (device->attrs.max_mr / 2),
rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
- rds_ibdev->max_8k_fmrs = dev_attr->max_mr ?
- min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE),
+ rds_ibdev->max_8k_fmrs = device->attrs.max_mr ?
+ min_t(unsigned int, ((device->attrs.max_mr / 2) * RDS_MR_8K_SCALE),
rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
- rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
- rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
+ rds_ibdev->max_initiator_depth = device->attrs.max_qp_init_rd_atom;
+ rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom;
rds_ibdev->dev = device;
rds_ibdev->pd = ib_alloc_pd(device);
@@ -183,7 +173,7 @@ static void rds_ib_add_one(struct ib_device *device)
}
rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
- dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
+ device->attrs.max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
rds_ibdev->max_8k_fmrs);
@@ -202,8 +192,6 @@ static void rds_ib_add_one(struct ib_device *device)
put_dev:
rds_ib_dev_put(rds_ibdev);
-free_attr:
- kfree(dev_attr);
}
/*
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 576f1825fc55..f4a9fff829e0 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -60,30 +60,20 @@ LIST_HEAD(iw_nodev_conns);
static void rds_iw_add_one(struct ib_device *device)
{
struct rds_iw_device *rds_iwdev;
- struct ib_device_attr *dev_attr;
/* Only handle iwarp devices */
if (device->node_type != RDMA_NODE_RNIC)
return;
- dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
- if (!dev_attr)
- return;
-
- if (ib_query_device(device, dev_attr)) {
- rdsdebug("Query device failed for %s\n", device->name);
- goto free_attr;
- }
-
rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
if (!rds_iwdev)
- goto free_attr;
+ return;
spin_lock_init(&rds_iwdev->spinlock);
- rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
- rds_iwdev->max_wrs = dev_attr->max_qp_wr;
- rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE);
+ rds_iwdev->dma_local_lkey = !!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
+ rds_iwdev->max_wrs = device->attrs.max_qp_wr;
+ rds_iwdev->max_sge = min(device->attrs.max_sge, RDS_IW_MAX_SGE);
rds_iwdev->dev = device;
rds_iwdev->pd = ib_alloc_pd(device);
@@ -111,8 +101,7 @@ static void rds_iw_add_one(struct ib_device *device)
list_add_tail(&rds_iwdev->list, &rds_iw_devices);
ib_set_client_data(device, &rds_iw_client, rds_iwdev);
-
- goto free_attr;
+ return;
err_mr:
if (rds_iwdev->mr)
@@ -121,8 +110,6 @@ err_pd:
ib_dealloc_pd(rds_iwdev->pd);
free_dev:
kfree(rds_iwdev);
-free_attr:
- kfree(dev_attr);
}
static void rds_iw_remove_one(struct ib_device *device, void *client_data)
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 5e4f815c2b34..2b32fd602669 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -771,7 +771,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
if (count == 0)
return 0;
- mutex_lock(&inode->i_mutex); /* protect against multiple concurrent
+ inode_lock(inode); /* protect against multiple concurrent
* readers on this file */
again:
spin_lock(&queue_lock);
@@ -784,7 +784,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
}
if (rp->q.list.next == &cd->queue) {
spin_unlock(&queue_lock);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
WARN_ON_ONCE(rp->offset);
return 0;
}
@@ -838,7 +838,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
}
if (err == -EAGAIN)
goto again;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return err ? err : count;
}
@@ -909,9 +909,9 @@ static ssize_t cache_write(struct file *filp, const char __user *buf,
if (!cd->cache_parse)
goto out;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
ret = cache_downcall(mapping, buf, count, cd);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
out:
return ret;
}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 14f45bf0410c..31789ef3e614 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -172,7 +172,7 @@ rpc_close_pipes(struct inode *inode)
int need_release;
LIST_HEAD(free_list);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
spin_lock(&pipe->lock);
need_release = pipe->nreaders != 0 || pipe->nwriters != 0;
pipe->nreaders = 0;
@@ -188,7 +188,7 @@ rpc_close_pipes(struct inode *inode)
cancel_delayed_work_sync(&pipe->queue_timeout);
rpc_inode_setowner(inode, NULL);
RPC_I(inode)->pipe = NULL;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
static struct inode *
@@ -221,7 +221,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
int first_open;
int res = -ENXIO;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
@@ -237,7 +237,7 @@ rpc_pipe_open(struct inode *inode, struct file *filp)
pipe->nwriters++;
res = 0;
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -248,7 +248,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
struct rpc_pipe_msg *msg;
int last_close;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL)
goto out;
@@ -278,7 +278,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
if (last_close && pipe->ops->release_pipe)
pipe->ops->release_pipe(inode);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return 0;
}
@@ -290,7 +290,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
struct rpc_pipe_msg *msg;
int res = 0;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL) {
res = -EPIPE;
@@ -322,7 +322,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
pipe->ops->destroy_msg(msg);
}
out_unlock:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -332,11 +332,11 @@ rpc_pipe_write(struct file *filp, const char __user *buf, size_t len, loff_t *of
struct inode *inode = file_inode(filp);
int res;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
res = -EPIPE;
if (RPC_I(inode)->pipe != NULL)
res = RPC_I(inode)->pipe->ops->downcall(filp, buf, len);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return res;
}
@@ -349,12 +349,12 @@ rpc_pipe_poll(struct file *filp, struct poll_table_struct *wait)
poll_wait(filp, &rpci->waitq, wait);
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (rpci->pipe == NULL)
mask |= POLLERR | POLLHUP;
else if (filp->private_data || !list_empty(&rpci->pipe->pipe))
mask |= POLLIN | POLLRDNORM;
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return mask;
}
@@ -367,10 +367,10 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
switch (cmd) {
case FIONREAD:
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
pipe = RPC_I(inode)->pipe;
if (pipe == NULL) {
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return -EPIPE;
}
spin_lock(&pipe->lock);
@@ -381,7 +381,7 @@ rpc_pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
len += msg->len - msg->copied;
}
spin_unlock(&pipe->lock);
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
return put_user(len, (int __user *)arg);
default:
return -EINVAL;
@@ -617,9 +617,9 @@ int rpc_rmdir(struct dentry *dentry)
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
error = __rpc_rmdir(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
@@ -701,9 +701,9 @@ static void rpc_depopulate(struct dentry *parent,
{
struct inode *dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
+ inode_lock_nested(dir, I_MUTEX_CHILD);
__rpc_depopulate(parent, files, start, eof);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
}
static int rpc_populate(struct dentry *parent,
@@ -715,7 +715,7 @@ static int rpc_populate(struct dentry *parent,
struct dentry *dentry;
int i, err;
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
for (i = start; i < eof; i++) {
dentry = __rpc_lookup_create_exclusive(parent, files[i].name);
err = PTR_ERR(dentry);
@@ -739,11 +739,11 @@ static int rpc_populate(struct dentry *parent,
if (err != 0)
goto out_bad;
}
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return 0;
out_bad:
__rpc_depopulate(parent, files, start, eof);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
printk(KERN_WARNING "%s: %s failed to populate directory %pd\n",
__FILE__, __func__, parent);
return err;
@@ -757,7 +757,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
struct inode *dir = d_inode(parent);
int error;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
if (IS_ERR(dentry))
goto out;
@@ -770,7 +770,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
goto err_rmdir;
}
out:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return dentry;
err_rmdir:
__rpc_rmdir(dir, dentry);
@@ -788,11 +788,11 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
if (depopulate != NULL)
depopulate(dentry);
error = __rpc_rmdir(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
@@ -828,7 +828,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
if (pipe->ops->downcall == NULL)
umode &= ~S_IWUGO;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
dentry = __rpc_lookup_create_exclusive(parent, name);
if (IS_ERR(dentry))
goto out;
@@ -837,7 +837,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
if (err)
goto out_err;
out:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return dentry;
out_err:
dentry = ERR_PTR(err);
@@ -865,9 +865,9 @@ rpc_unlink(struct dentry *dentry)
parent = dget_parent(dentry);
dir = d_inode(parent);
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ inode_lock_nested(dir, I_MUTEX_PARENT);
error = __rpc_rmpipe(dir, dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
dput(parent);
return error;
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2e98f4a243e5..37edea6fa92d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1425,3 +1425,4 @@ void xprt_put(struct rpc_xprt *xprt)
if (atomic_dec_and_test(&xprt->count))
xprt_destroy(xprt);
}
+EXPORT_SYMBOL_GPL(xprt_put);
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 33f99d3004f2..dc9f3b513a05 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -2,7 +2,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
rpcrdma-y := transport.o rpc_rdma.o verbs.o \
fmr_ops.o frwr_ops.o physical_ops.o \
- svc_rdma.o svc_rdma_transport.o \
+ svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
module.o
rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c6836844bd0e..e16567389e28 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -190,12 +190,11 @@ static int
frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
struct rpcrdma_create_data_internal *cdata)
{
- struct ib_device_attr *devattr = &ia->ri_devattr;
int depth, delta;
ia->ri_max_frmr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- devattr->max_fast_reg_page_list_len);
+ ia->ri_device->attrs.max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
__func__, ia->ri_max_frmr_depth);
@@ -222,8 +221,8 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
}
ep->rep_attr.cap.max_send_wr *= depth;
- if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
- cdata->max_requests = devattr->max_qp_wr / depth;
+ if (ep->rep_attr.cap.max_send_wr > ia->ri_device->attrs.max_qp_wr) {
+ cdata->max_requests = ia->ri_device->attrs.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests *
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 1b7051bdbdc8..c846ca9f1eba 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -55,6 +55,7 @@ unsigned int svcrdma_ord = RPCRDMA_ORD;
static unsigned int min_ord = 1;
static unsigned int max_ord = 4096;
unsigned int svcrdma_max_requests = RPCRDMA_MAX_REQUESTS;
+unsigned int svcrdma_max_bc_requests = RPCRDMA_MAX_BC_REQUESTS;
static unsigned int min_max_requests = 4;
static unsigned int max_max_requests = 16384;
unsigned int svcrdma_max_req_size = RPCRDMA_MAX_REQ_SIZE;
@@ -71,10 +72,6 @@ atomic_t rdma_stat_rq_prod;
atomic_t rdma_stat_sq_poll;
atomic_t rdma_stat_sq_prod;
-/* Temporary NFS request map and context caches */
-struct kmem_cache *svc_rdma_map_cachep;
-struct kmem_cache *svc_rdma_ctxt_cachep;
-
struct workqueue_struct *svc_rdma_wq;
/*
@@ -243,17 +240,16 @@ void svc_rdma_cleanup(void)
svc_unreg_xprt_class(&svc_rdma_bc_class);
#endif
svc_unreg_xprt_class(&svc_rdma_class);
- kmem_cache_destroy(svc_rdma_map_cachep);
- kmem_cache_destroy(svc_rdma_ctxt_cachep);
}
int svc_rdma_init(void)
{
dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
- dprintk("\tmax_requests : %d\n", svcrdma_max_requests);
- dprintk("\tsq_depth : %d\n",
+ dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
+ dprintk("\tsq_depth : %u\n",
svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT);
+ dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
svc_rdma_wq = alloc_workqueue("svc_rdma", 0, 0);
@@ -264,39 +260,10 @@ int svc_rdma_init(void)
svcrdma_table_header =
register_sysctl_table(svcrdma_root_table);
- /* Create the temporary map cache */
- svc_rdma_map_cachep = kmem_cache_create("svc_rdma_map_cache",
- sizeof(struct svc_rdma_req_map),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
- if (!svc_rdma_map_cachep) {
- printk(KERN_INFO "Could not allocate map cache.\n");
- goto err0;
- }
-
- /* Create the temporary context cache */
- svc_rdma_ctxt_cachep =
- kmem_cache_create("svc_rdma_ctxt_cache",
- sizeof(struct svc_rdma_op_ctxt),
- 0,
- SLAB_HWCACHE_ALIGN,
- NULL);
- if (!svc_rdma_ctxt_cachep) {
- printk(KERN_INFO "Could not allocate WR ctxt cache.\n");
- goto err1;
- }
-
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
svc_reg_xprt_class(&svc_rdma_bc_class);
#endif
return 0;
- err1:
- kmem_cache_destroy(svc_rdma_map_cachep);
- err0:
- unregister_sysctl_table(svcrdma_table_header);
- destroy_workqueue(svc_rdma_wq);
- return -ENOMEM;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
new file mode 100644
index 000000000000..65a7c232a345
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2015 Oracle. All rights reserved.
+ *
+ * Support for backward direction RPCs on RPC/RDMA (server-side).
+ */
+
+#include <linux/sunrpc/svc_rdma.h>
+#include "xprt_rdma.h"
+
+#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+
+#undef SVCRDMA_BACKCHANNEL_DEBUG
+
+int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, struct rpcrdma_msg *rmsgp,
+ struct xdr_buf *rcvbuf)
+{
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct kvec *dst, *src = &rcvbuf->head[0];
+ struct rpc_rqst *req;
+ unsigned long cwnd;
+ u32 credits;
+ size_t len;
+ __be32 xid;
+ __be32 *p;
+ int ret;
+
+ p = (__be32 *)src->iov_base;
+ len = src->iov_len;
+ xid = rmsgp->rm_xid;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+ pr_info("%s: xid=%08x, length=%zu\n",
+ __func__, be32_to_cpu(xid), len);
+ pr_info("%s: RPC/RDMA: %*ph\n",
+ __func__, (int)RPCRDMA_HDRLEN_MIN, rmsgp);
+ pr_info("%s: RPC: %*ph\n",
+ __func__, (int)len, p);
+#endif
+
+ ret = -EAGAIN;
+ if (src->iov_len < 24)
+ goto out_shortreply;
+
+ spin_lock_bh(&xprt->transport_lock);
+ req = xprt_lookup_rqst(xprt, xid);
+ if (!req)
+ goto out_notfound;
+
+ dst = &req->rq_private_buf.head[0];
+ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
+ if (dst->iov_len < len)
+ goto out_unlock;
+ memcpy(dst->iov_base, p, len);
+
+ credits = be32_to_cpu(rmsgp->rm_credit);
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > r_xprt->rx_buf.rb_bc_max_requests)
+ credits = r_xprt->rx_buf.rb_bc_max_requests;
+
+ cwnd = xprt->cwnd;
+ xprt->cwnd = credits << RPC_CWNDSHIFT;
+ if (xprt->cwnd > cwnd)
+ xprt_release_rqst_cong(req->rq_task);
+
+ ret = 0;
+ xprt_complete_rqst(req->rq_task, rcvbuf->len);
+ rcvbuf->len = 0;
+
+out_unlock:
+ spin_unlock_bh(&xprt->transport_lock);
+out:
+ return ret;
+
+out_shortreply:
+ dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n",
+ xprt, src->iov_len);
+ goto out;
+
+out_notfound:
+ dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n",
+ xprt, be32_to_cpu(xid));
+
+ goto out_unlock;
+}
+
+/* Send a backwards direction RPC call.
+ *
+ * Caller holds the connection's mutex and has already marshaled
+ * the RPC/RDMA request.
+ *
+ * This is similar to svc_rdma_reply, but takes an rpc_rqst
+ * instead, does not support chunks, and avoids blocking memory
+ * allocation.
+ *
+ * XXX: There is still an opportunity to block in svc_rdma_send()
+ * if there are no SQ entries to post the Send. This may occur if
+ * the adapter has a small maximum SQ depth.
+ */
+static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
+ struct rpc_rqst *rqst)
+{
+ struct xdr_buf *sndbuf = &rqst->rq_snd_buf;
+ struct svc_rdma_op_ctxt *ctxt;
+ struct svc_rdma_req_map *vec;
+ struct ib_send_wr send_wr;
+ int ret;
+
+ vec = svc_rdma_get_req_map(rdma);
+ ret = svc_rdma_map_xdr(rdma, sndbuf, vec);
+ if (ret)
+ goto out_err;
+
+ /* Post a recv buffer to handle the reply for this request. */
+ ret = svc_rdma_post_recv(rdma, GFP_NOIO);
+ if (ret) {
+ pr_err("svcrdma: Failed to post bc receive buffer, err=%d.\n",
+ ret);
+ pr_err("svcrdma: closing transport %p.\n", rdma);
+ set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
+ ret = -ENOTCONN;
+ goto out_err;
+ }
+
+ ctxt = svc_rdma_get_context(rdma);
+ ctxt->pages[0] = virt_to_page(rqst->rq_buffer);
+ ctxt->count = 1;
+
+ ctxt->wr_op = IB_WR_SEND;
+ ctxt->direction = DMA_TO_DEVICE;
+ ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
+ ctxt->sge[0].length = sndbuf->len;
+ ctxt->sge[0].addr =
+ ib_dma_map_page(rdma->sc_cm_id->device, ctxt->pages[0], 0,
+ sndbuf->len, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) {
+ ret = -EIO;
+ goto out_unmap;
+ }
+ atomic_inc(&rdma->sc_dma_used);
+
+ memset(&send_wr, 0, sizeof(send_wr));
+ send_wr.wr_id = (unsigned long)ctxt;
+ send_wr.sg_list = ctxt->sge;
+ send_wr.num_sge = 1;
+ send_wr.opcode = IB_WR_SEND;
+ send_wr.send_flags = IB_SEND_SIGNALED;
+
+ ret = svc_rdma_send(rdma, &send_wr);
+ if (ret) {
+ ret = -EIO;
+ goto out_unmap;
+ }
+
+out_err:
+ svc_rdma_put_req_map(rdma, vec);
+ dprintk("svcrdma: %s returns %d\n", __func__, ret);
+ return ret;
+
+out_unmap:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 1);
+ goto out_err;
+}
+
+/* Server-side transport endpoint wants a whole page for its send
+ * buffer. The client RPC code constructs the RPC header in this
+ * buffer before it invokes ->send_request.
+ *
+ * Returns NULL if there was a temporary allocation failure.
+ */
+static void *
+xprt_rdma_bc_allocate(struct rpc_task *task, size_t size)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+ struct svcxprt_rdma *rdma;
+ struct page *page;
+
+ rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+
+ /* Prevent an infinite loop: try to make this case work */
+ if (size > PAGE_SIZE)
+ WARN_ONCE(1, "svcrdma: large bc buffer request (size %zu)\n",
+ size);
+
+ page = alloc_page(RPCRDMA_DEF_GFP);
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
+static void
+xprt_rdma_bc_free(void *buffer)
+{
+ /* No-op: ctxt and page have already been freed. */
+}
+
+static int
+rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
+{
+ struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+ int rc;
+
+ /* Space in the send buffer for an RPC/RDMA header is reserved
+ * via xprt->tsh_size.
+ */
+ headerp->rm_xid = rqst->rq_xid;
+ headerp->rm_vers = rpcrdma_version;
+ headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+ headerp->rm_type = rdma_msg;
+ headerp->rm_body.rm_chunks[0] = xdr_zero;
+ headerp->rm_body.rm_chunks[1] = xdr_zero;
+ headerp->rm_body.rm_chunks[2] = xdr_zero;
+
+#ifdef SVCRDMA_BACKCHANNEL_DEBUG
+ pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
+#endif
+
+ rc = svc_rdma_bc_sendto(rdma, rqst);
+ if (rc)
+ goto drop_connection;
+ return rc;
+
+drop_connection:
+ dprintk("svcrdma: failed to send bc call\n");
+ xprt_disconnect_done(xprt);
+ return -ENOTCONN;
+}
+
+/* Send an RPC call on the passive end of a transport
+ * connection.
+ */
+static int
+xprt_rdma_bc_send_request(struct rpc_task *task)
+{
+ struct rpc_rqst *rqst = task->tk_rqstp;
+ struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
+ struct svcxprt_rdma *rdma;
+ int ret;
+
+ dprintk("svcrdma: sending bc call with xid: %08x\n",
+ be32_to_cpu(rqst->rq_xid));
+
+ if (!mutex_trylock(&sxprt->xpt_mutex)) {
+ rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
+ if (!mutex_trylock(&sxprt->xpt_mutex))
+ return -EAGAIN;
+ rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
+ }
+
+ ret = -ENOTCONN;
+ rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
+ if (!test_bit(XPT_DEAD, &sxprt->xpt_flags))
+ ret = rpcrdma_bc_send_request(rdma, rqst);
+
+ mutex_unlock(&sxprt->xpt_mutex);
+
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static void
+xprt_rdma_bc_close(struct rpc_xprt *xprt)
+{
+ dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+}
+
+static void
+xprt_rdma_bc_put(struct rpc_xprt *xprt)
+{
+ dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+
+ xprt_free(xprt);
+ module_put(THIS_MODULE);
+}
+
+static struct rpc_xprt_ops xprt_rdma_bc_procs = {
+ .reserve_xprt = xprt_reserve_xprt_cong,
+ .release_xprt = xprt_release_xprt_cong,
+ .alloc_slot = xprt_alloc_slot,
+ .release_request = xprt_release_rqst_cong,
+ .buf_alloc = xprt_rdma_bc_allocate,
+ .buf_free = xprt_rdma_bc_free,
+ .send_request = xprt_rdma_bc_send_request,
+ .set_retrans_timeout = xprt_set_retrans_timeout_def,
+ .close = xprt_rdma_bc_close,
+ .destroy = xprt_rdma_bc_put,
+ .print_stats = xprt_rdma_print_stats
+};
+
+static const struct rpc_timeout xprt_rdma_bc_timeout = {
+ .to_initval = 60 * HZ,
+ .to_maxval = 60 * HZ,
+};
+
+/* It shouldn't matter if the number of backchannel session slots
+ * doesn't match the number of RPC/RDMA credits. That just means
+ * one or the other will have extra slots that aren't used.
+ */
+static struct rpc_xprt *
+xprt_setup_rdma_bc(struct xprt_create *args)
+{
+ struct rpc_xprt *xprt;
+ struct rpcrdma_xprt *new_xprt;
+
+ if (args->addrlen > sizeof(xprt->addr)) {
+ dprintk("RPC: %s: address too large\n", __func__);
+ return ERR_PTR(-EBADF);
+ }
+
+ xprt = xprt_alloc(args->net, sizeof(*new_xprt),
+ RPCRDMA_MAX_BC_REQUESTS,
+ RPCRDMA_MAX_BC_REQUESTS);
+ if (!xprt) {
+ dprintk("RPC: %s: couldn't allocate rpc_xprt\n",
+ __func__);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ xprt->timeout = &xprt_rdma_bc_timeout;
+ xprt_set_bound(xprt);
+ xprt_set_connected(xprt);
+ xprt->bind_timeout = RPCRDMA_BIND_TO;
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+ xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+
+ xprt->prot = XPRT_TRANSPORT_BC_RDMA;
+ xprt->tsh_size = RPCRDMA_HDRLEN_MIN / sizeof(__be32);
+ xprt->ops = &xprt_rdma_bc_procs;
+
+ memcpy(&xprt->addr, args->dstaddr, args->addrlen);
+ xprt->addrlen = args->addrlen;
+ xprt_rdma_format_addresses(xprt, (struct sockaddr *)&xprt->addr);
+ xprt->resvport = 0;
+
+ xprt->max_payload = xprt_rdma_max_inline_read;
+
+ new_xprt = rpcx_to_rdmax(xprt);
+ new_xprt->rx_buf.rb_bc_max_requests = xprt->max_reqs;
+
+ xprt_get(xprt);
+ args->bc_xprt->xpt_bc_xprt = xprt;
+ xprt->bc_xprt = args->bc_xprt;
+
+ if (!try_module_get(THIS_MODULE))
+ goto out_fail;
+
+ /* Final put for backchannel xprt is in __svc_rdma_free */
+ xprt_get(xprt);
+ return xprt;
+
+out_fail:
+ xprt_rdma_free_addresses(xprt);
+ args->bc_xprt->xpt_bc_xprt = NULL;
+ xprt_put(xprt);
+ xprt_free(xprt);
+ return ERR_PTR(-EINVAL);
+}
+
+struct xprt_class xprt_rdma_bc = {
+ .list = LIST_HEAD_INIT(xprt_rdma_bc.list),
+ .name = "rdma backchannel",
+ .owner = THIS_MODULE,
+ .ident = XPRT_TRANSPORT_BC_RDMA,
+ .setup = xprt_setup_rdma_bc,
+};
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index ff4f01e527ec..c8b8a8b4181e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -144,6 +144,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
head->arg.page_len += len;
+
head->arg.len += len;
if (!pg_off)
head->count++;
@@ -160,8 +161,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
goto err;
atomic_inc(&xprt->sc_dma_used);
- /* The lkey here is either a local dma lkey or a dma_mr lkey */
- ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[pno].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->sge[pno].length = len;
ctxt->count++;
@@ -567,6 +567,38 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
return ret;
}
+/* By convention, backchannel calls arrive via rdma_msg type
+ * messages, and never populate the chunk lists. This makes
+ * the RPC/RDMA header small and fixed in size, so it is
+ * straightforward to check the RPC header's direction field.
+ */
+static bool
+svc_rdma_is_backchannel_reply(struct svc_xprt *xprt, struct rpcrdma_msg *rmsgp)
+{
+ __be32 *p = (__be32 *)rmsgp;
+
+ if (!xprt->xpt_bc_xprt)
+ return false;
+
+ if (rmsgp->rm_type != rdma_msg)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[0] != xdr_zero)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[1] != xdr_zero)
+ return false;
+ if (rmsgp->rm_body.rm_chunks[2] != xdr_zero)
+ return false;
+
+ /* sanity */
+ if (p[7] != rmsgp->rm_xid)
+ return false;
+ /* call direction */
+ if (p[8] == cpu_to_be32(RPC_CALL))
+ return false;
+
+ return true;
+}
+
/*
* Set up the rqstp thread context to point to the RQ buffer. If
* necessary, pull additional data from the client with an RDMA_READ
@@ -632,6 +664,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto close_out;
}
+ if (svc_rdma_is_backchannel_reply(xprt, rmsgp)) {
+ ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, rmsgp,
+ &rqstp->rq_arg);
+ svc_rdma_put_context(ctxt, 0);
+ if (ret)
+ goto repost;
+ return ret;
+ }
+
/* Read read-list data. */
ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
if (ret > 0) {
@@ -668,4 +709,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
set_bit(XPT_CLOSE, &xprt->xpt_flags);
defer:
return 0;
+
+repost:
+ ret = svc_rdma_post_recv(rdma_xprt, GFP_KERNEL);
+ if (ret) {
+ pr_err("svcrdma: could not post a receive buffer, err=%d.\n",
+ ret);
+ pr_err("svcrdma: closing transport %p.\n", rdma_xprt);
+ set_bit(XPT_CLOSE, &rdma_xprt->sc_xprt.xpt_flags);
+ ret = -ENOTCONN;
+ }
+ return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 969a1ab75fc3..df57f3ce6cd2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -50,9 +50,9 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-static int map_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec)
+int svc_rdma_map_xdr(struct svcxprt_rdma *xprt,
+ struct xdr_buf *xdr,
+ struct svc_rdma_req_map *vec)
{
int sge_no;
u32 sge_bytes;
@@ -62,7 +62,7 @@ static int map_xdr(struct svcxprt_rdma *xprt,
if (xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)) {
- pr_err("svcrdma: map_xdr: XDR buffer length error\n");
+ pr_err("svcrdma: %s: XDR buffer length error\n", __func__);
return -EIO;
}
@@ -97,9 +97,9 @@ static int map_xdr(struct svcxprt_rdma *xprt,
sge_no++;
}
- dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
+ dprintk("svcrdma: %s: sge_no %d page_no %d "
"page_base %u page_len %u head_len %zu tail_len %zu\n",
- sge_no, page_no, xdr->page_base, xdr->page_len,
+ __func__, sge_no, page_no, xdr->page_base, xdr->page_len,
xdr->head[0].iov_len, xdr->tail[0].iov_len);
vec->count = sge_no;
@@ -265,7 +265,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge[sge_no].addr))
goto err;
atomic_inc(&xprt->sc_dma_used);
- sge[sge_no].lkey = xprt->sc_dma_lkey;
+ sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count++;
sge_off = 0;
sge_no++;
@@ -465,7 +465,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
int ret;
/* Post a recv buffer to handle another request. */
- ret = svc_rdma_post_recv(rdma);
+ ret = svc_rdma_post_recv(rdma, GFP_KERNEL);
if (ret) {
printk(KERN_INFO
"svcrdma: could not post a receive buffer, err=%d."
@@ -480,7 +480,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->count = 1;
/* Prepare the SGE for the RPCRDMA Header */
- ctxt->sge[0].lkey = rdma->sc_dma_lkey;
+ ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
@@ -504,7 +504,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[sge_no].addr))
goto err;
atomic_inc(&rdma->sc_dma_used);
- ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
+ ctxt->sge[sge_no].lkey = rdma->sc_pd->local_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
if (byte_count != 0) {
@@ -591,14 +591,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
/* Build an req vec for the XDR */
ctxt = svc_rdma_get_context(rdma);
ctxt->direction = DMA_TO_DEVICE;
- vec = svc_rdma_get_req_map();
- ret = map_xdr(rdma, &rqstp->rq_res, vec);
+ vec = svc_rdma_get_req_map(rdma);
+ ret = svc_rdma_map_xdr(rdma, &rqstp->rq_res, vec);
if (ret)
goto err0;
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
- res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ ret = -ENOMEM;
+ res_page = alloc_page(GFP_KERNEL);
+ if (!res_page)
+ goto err0;
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
@@ -630,14 +633,14 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
ret = send_reply(rdma, rqstp, res_page, rdma_resp, ctxt, vec,
inline_bytes);
- svc_rdma_put_req_map(vec);
+ svc_rdma_put_req_map(rdma, vec);
dprintk("svcrdma: send_reply returns %d\n", ret);
return ret;
err1:
put_page(res_page);
err0:
- svc_rdma_put_req_map(vec);
+ svc_rdma_put_req_map(rdma, vec);
svc_rdma_put_context(ctxt, 0);
return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index b348b4adef29..5763825d09bf 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -153,18 +153,76 @@ static void svc_rdma_bc_free(struct svc_xprt *xprt)
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
+ gfp_t flags)
{
struct svc_rdma_op_ctxt *ctxt;
- ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
- GFP_KERNEL | __GFP_NOFAIL);
- ctxt->xprt = xprt;
- INIT_LIST_HEAD(&ctxt->dto_q);
+ ctxt = kmalloc(sizeof(*ctxt), flags);
+ if (ctxt) {
+ ctxt->xprt = xprt;
+ INIT_LIST_HEAD(&ctxt->free);
+ INIT_LIST_HEAD(&ctxt->dto_q);
+ }
+ return ctxt;
+}
+
+static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
+{
+ unsigned int i;
+
+ /* Each RPC/RDMA credit can consume a number of send
+ * and receive WQEs. One ctxt is allocated for each.
+ */
+ i = xprt->sc_sq_depth + xprt->sc_rq_depth;
+
+ while (i--) {
+ struct svc_rdma_op_ctxt *ctxt;
+
+ ctxt = alloc_ctxt(xprt, GFP_KERNEL);
+ if (!ctxt) {
+ dprintk("svcrdma: No memory for RDMA ctxt\n");
+ return false;
+ }
+ list_add(&ctxt->free, &xprt->sc_ctxts);
+ }
+ return true;
+}
+
+struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
+{
+ struct svc_rdma_op_ctxt *ctxt = NULL;
+
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used++;
+ if (list_empty(&xprt->sc_ctxts))
+ goto out_empty;
+
+ ctxt = list_first_entry(&xprt->sc_ctxts,
+ struct svc_rdma_op_ctxt, free);
+ list_del_init(&ctxt->free);
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+out:
ctxt->count = 0;
ctxt->frmr = NULL;
- atomic_inc(&xprt->sc_ctxt_used);
return ctxt;
+
+out_empty:
+ /* Either pre-allocation missed the mark, or send
+ * queue accounting is broken.
+ */
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+
+ ctxt = alloc_ctxt(xprt, GFP_NOIO);
+ if (ctxt)
+ goto out;
+
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used--;
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
+ WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
+ return NULL;
}
void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
@@ -174,11 +232,11 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) {
/*
* Unmap the DMA addr in the SGE if the lkey matches
- * the sc_dma_lkey, otherwise, ignore it since it is
+ * the local_dma_lkey, otherwise, ignore it since it is
* an FRMR lkey and will be unmapped later when the
* last WR that uses it completes.
*/
- if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
+ if (ctxt->sge[i].lkey == xprt->sc_pd->local_dma_lkey) {
atomic_dec(&xprt->sc_dma_used);
ib_dma_unmap_page(xprt->sc_cm_id->device,
ctxt->sge[i].addr,
@@ -190,35 +248,108 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
{
- struct svcxprt_rdma *xprt;
+ struct svcxprt_rdma *xprt = ctxt->xprt;
int i;
- xprt = ctxt->xprt;
if (free_pages)
for (i = 0; i < ctxt->count; i++)
put_page(ctxt->pages[i]);
- kmem_cache_free(svc_rdma_ctxt_cachep, ctxt);
- atomic_dec(&xprt->sc_ctxt_used);
+ spin_lock_bh(&xprt->sc_ctxt_lock);
+ xprt->sc_ctxt_used--;
+ list_add(&ctxt->free, &xprt->sc_ctxts);
+ spin_unlock_bh(&xprt->sc_ctxt_lock);
}
-/*
- * Temporary NFS req mappings are shared across all transport
- * instances. These are short lived and should be bounded by the number
- * of concurrent server threads * depth of the SQ.
- */
-struct svc_rdma_req_map *svc_rdma_get_req_map(void)
+static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
+{
+ while (!list_empty(&xprt->sc_ctxts)) {
+ struct svc_rdma_op_ctxt *ctxt;
+
+ ctxt = list_first_entry(&xprt->sc_ctxts,
+ struct svc_rdma_op_ctxt, free);
+ list_del(&ctxt->free);
+ kfree(ctxt);
+ }
+}
+
+static struct svc_rdma_req_map *alloc_req_map(gfp_t flags)
{
struct svc_rdma_req_map *map;
- map = kmem_cache_alloc(svc_rdma_map_cachep,
- GFP_KERNEL | __GFP_NOFAIL);
+
+ map = kmalloc(sizeof(*map), flags);
+ if (map)
+ INIT_LIST_HEAD(&map->free);
+ return map;
+}
+
+static bool svc_rdma_prealloc_maps(struct svcxprt_rdma *xprt)
+{
+ unsigned int i;
+
+ /* One for each receive buffer on this connection. */
+ i = xprt->sc_max_requests;
+
+ while (i--) {
+ struct svc_rdma_req_map *map;
+
+ map = alloc_req_map(GFP_KERNEL);
+ if (!map) {
+ dprintk("svcrdma: No memory for request map\n");
+ return false;
+ }
+ list_add(&map->free, &xprt->sc_maps);
+ }
+ return true;
+}
+
+struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *xprt)
+{
+ struct svc_rdma_req_map *map = NULL;
+
+ spin_lock(&xprt->sc_map_lock);
+ if (list_empty(&xprt->sc_maps))
+ goto out_empty;
+
+ map = list_first_entry(&xprt->sc_maps,
+ struct svc_rdma_req_map, free);
+ list_del_init(&map->free);
+ spin_unlock(&xprt->sc_map_lock);
+
+out:
map->count = 0;
return map;
+
+out_empty:
+ spin_unlock(&xprt->sc_map_lock);
+
+ /* Pre-allocation amount was incorrect */
+ map = alloc_req_map(GFP_NOIO);
+ if (map)
+ goto out;
+
+ WARN_ONCE(1, "svcrdma: empty request map list?\n");
+ return NULL;
+}
+
+void svc_rdma_put_req_map(struct svcxprt_rdma *xprt,
+ struct svc_rdma_req_map *map)
+{
+ spin_lock(&xprt->sc_map_lock);
+ list_add(&map->free, &xprt->sc_maps);
+ spin_unlock(&xprt->sc_map_lock);
}
-void svc_rdma_put_req_map(struct svc_rdma_req_map *map)
+static void svc_rdma_destroy_maps(struct svcxprt_rdma *xprt)
{
- kmem_cache_free(svc_rdma_map_cachep, map);
+ while (!list_empty(&xprt->sc_maps)) {
+ struct svc_rdma_req_map *map;
+
+ map = list_first_entry(&xprt->sc_maps,
+ struct svc_rdma_req_map, free);
+ list_del(&map->free);
+ kfree(map);
+ }
}
/* ib_cq event handler */
@@ -386,46 +517,44 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
static void process_context(struct svcxprt_rdma *xprt,
struct svc_rdma_op_ctxt *ctxt)
{
+ struct svc_rdma_op_ctxt *read_hdr;
+ int free_pages = 0;
+
svc_rdma_unmap_dma(ctxt);
switch (ctxt->wr_op) {
case IB_WR_SEND:
- if (ctxt->frmr)
- pr_err("svcrdma: SEND: ctxt->frmr != NULL\n");
- svc_rdma_put_context(ctxt, 1);
+ free_pages = 1;
break;
case IB_WR_RDMA_WRITE:
- if (ctxt->frmr)
- pr_err("svcrdma: WRITE: ctxt->frmr != NULL\n");
- svc_rdma_put_context(ctxt, 0);
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
svc_rdma_put_frmr(xprt, ctxt->frmr);
- if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
- struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
- if (read_hdr) {
- spin_lock_bh(&xprt->sc_rq_dto_lock);
- set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
- list_add_tail(&read_hdr->dto_q,
- &xprt->sc_read_complete_q);
- spin_unlock_bh(&xprt->sc_rq_dto_lock);
- } else {
- pr_err("svcrdma: ctxt->read_hdr == NULL\n");
- }
- svc_xprt_enqueue(&xprt->sc_xprt);
- }
+
+ if (!test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags))
+ break;
+
+ read_hdr = ctxt->read_hdr;
svc_rdma_put_context(ctxt, 0);
- break;
+
+ spin_lock_bh(&xprt->sc_rq_dto_lock);
+ set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
+ list_add_tail(&read_hdr->dto_q,
+ &xprt->sc_read_complete_q);
+ spin_unlock_bh(&xprt->sc_rq_dto_lock);
+ svc_xprt_enqueue(&xprt->sc_xprt);
+ return;
default:
- printk(KERN_ERR "svcrdma: unexpected completion type, "
- "opcode=%d\n",
- ctxt->wr_op);
+ dprintk("svcrdma: unexpected completion opcode=%d\n",
+ ctxt->wr_op);
break;
}
+
+ svc_rdma_put_context(ctxt, free_pages);
}
/*
@@ -523,19 +652,15 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
+ INIT_LIST_HEAD(&cma_xprt->sc_ctxts);
+ INIT_LIST_HEAD(&cma_xprt->sc_maps);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
spin_lock_init(&cma_xprt->sc_frmr_q_lock);
-
- cma_xprt->sc_ord = svcrdma_ord;
-
- cma_xprt->sc_max_req_size = svcrdma_max_req_size;
- cma_xprt->sc_max_requests = svcrdma_max_requests;
- cma_xprt->sc_sq_depth = svcrdma_max_requests * RPCRDMA_SQ_DEPTH_MULT;
- atomic_set(&cma_xprt->sc_sq_count, 0);
- atomic_set(&cma_xprt->sc_ctxt_used, 0);
+ spin_lock_init(&cma_xprt->sc_ctxt_lock);
+ spin_lock_init(&cma_xprt->sc_map_lock);
if (listener)
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
@@ -543,7 +668,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
return cma_xprt;
}
-int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
+int svc_rdma_post_recv(struct svcxprt_rdma *xprt, gfp_t flags)
{
struct ib_recv_wr recv_wr, *bad_recv_wr;
struct svc_rdma_op_ctxt *ctxt;
@@ -561,7 +686,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err_put_ctxt;
}
- page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ page = alloc_page(flags);
+ if (!page)
+ goto err_put_ctxt;
ctxt->pages[sge_no] = page;
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
@@ -571,7 +698,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
atomic_inc(&xprt->sc_dma_used);
ctxt->sge[sge_no].addr = pa;
ctxt->sge[sge_no].length = PAGE_SIZE;
- ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[sge_no].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->count = sge_no + 1;
buflen += PAGE_SIZE;
}
@@ -886,11 +1013,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct rdma_conn_param conn_param;
struct ib_cq_init_attr cq_attr = {};
struct ib_qp_init_attr qp_attr;
- struct ib_device_attr devattr;
- int uninitialized_var(dma_mr_acc);
- int need_dma_mr = 0;
- int ret;
- int i;
+ struct ib_device *dev;
+ unsigned int i;
+ int ret = 0;
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -910,37 +1035,42 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
newxprt, newxprt->sc_cm_id);
- ret = ib_query_device(newxprt->sc_cm_id->device, &devattr);
- if (ret) {
- dprintk("svcrdma: could not query device attributes on "
- "device %p, rc=%d\n", newxprt->sc_cm_id->device, ret);
- goto errout;
- }
+ dev = newxprt->sc_cm_id->device;
/* Qualify the transport resource defaults with the
* capabilities of this particular device */
- newxprt->sc_max_sge = min((size_t)devattr.max_sge,
+ newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
(size_t)RPCSVC_MAXPAGES);
- newxprt->sc_max_sge_rd = min_t(size_t, devattr.max_sge_rd,
+ newxprt->sc_max_sge_rd = min_t(size_t, dev->attrs.max_sge_rd,
RPCSVC_MAXPAGES);
- newxprt->sc_max_requests = min((size_t)devattr.max_qp_wr,
- (size_t)svcrdma_max_requests);
- newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_max_requests;
+ newxprt->sc_max_req_size = svcrdma_max_req_size;
+ newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
+ svcrdma_max_requests);
+ newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
+ svcrdma_max_bc_requests);
+ newxprt->sc_rq_depth = newxprt->sc_max_requests +
+ newxprt->sc_max_bc_requests;
+ newxprt->sc_sq_depth = RPCRDMA_SQ_DEPTH_MULT * newxprt->sc_rq_depth;
+
+ if (!svc_rdma_prealloc_ctxts(newxprt))
+ goto errout;
+ if (!svc_rdma_prealloc_maps(newxprt))
+ goto errout;
/*
* Limit ORD based on client limit, local device limit, and
* configured svcrdma limit.
*/
- newxprt->sc_ord = min_t(size_t, devattr.max_qp_rd_atom, newxprt->sc_ord);
+ newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord);
newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord);
- newxprt->sc_pd = ib_alloc_pd(newxprt->sc_cm_id->device);
+ newxprt->sc_pd = ib_alloc_pd(dev);
if (IS_ERR(newxprt->sc_pd)) {
dprintk("svcrdma: error creating PD for connect request\n");
goto errout;
}
cq_attr.cqe = newxprt->sc_sq_depth;
- newxprt->sc_sq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+ newxprt->sc_sq_cq = ib_create_cq(dev,
sq_comp_handler,
cq_event_handler,
newxprt,
@@ -949,8 +1079,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk("svcrdma: error creating SQ CQ for connect request\n");
goto errout;
}
- cq_attr.cqe = newxprt->sc_max_requests;
- newxprt->sc_rq_cq = ib_create_cq(newxprt->sc_cm_id->device,
+ cq_attr.cqe = newxprt->sc_rq_depth;
+ newxprt->sc_rq_cq = ib_create_cq(dev,
rq_comp_handler,
cq_event_handler,
newxprt,
@@ -964,7 +1094,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.event_handler = qp_event_handler;
qp_attr.qp_context = &newxprt->sc_xprt;
qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
- qp_attr.cap.max_recv_wr = newxprt->sc_max_requests;
+ qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -978,7 +1108,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
" cap.max_send_sge = %d\n"
" cap.max_recv_sge = %d\n",
newxprt->sc_cm_id, newxprt->sc_pd,
- newxprt->sc_cm_id->device, newxprt->sc_pd->device,
+ dev, newxprt->sc_pd->device,
qp_attr.cap.max_send_wr,
qp_attr.cap.max_recv_wr,
qp_attr.cap.max_send_sge,
@@ -1014,9 +1144,9 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
* of an RDMA_READ. IB does not.
*/
newxprt->sc_reader = rdma_read_chunk_lcl;
- if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ if (dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
newxprt->sc_frmr_pg_list_len =
- devattr.max_fast_reg_page_list_len;
+ dev->attrs.max_fast_reg_page_list_len;
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG;
newxprt->sc_reader = rdma_read_chunk_frmr;
}
@@ -1024,44 +1154,16 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/*
* Determine if a DMA MR is required and if so, what privs are required
*/
- if (!rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num) &&
- !rdma_ib_or_roce(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num))
+ if (!rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num) &&
+ !rdma_ib_or_roce(dev, newxprt->sc_cm_id->port_num))
goto errout;
- if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG) ||
- !(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
- need_dma_mr = 1;
- dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
- if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num) &&
- !(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG))
- dma_mr_acc |= IB_ACCESS_REMOTE_WRITE;
- }
-
- if (rdma_protocol_iwarp(newxprt->sc_cm_id->device,
- newxprt->sc_cm_id->port_num))
+ if (rdma_protocol_iwarp(dev, newxprt->sc_cm_id->port_num))
newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV;
- /* Create the DMA MR if needed, otherwise, use the DMA LKEY */
- if (need_dma_mr) {
- /* Register all of physical memory */
- newxprt->sc_phys_mr =
- ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc);
- if (IS_ERR(newxprt->sc_phys_mr)) {
- dprintk("svcrdma: Failed to create DMA MR ret=%d\n",
- ret);
- goto errout;
- }
- newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey;
- } else
- newxprt->sc_dma_lkey =
- newxprt->sc_cm_id->device->local_dma_lkey;
-
/* Post receive buffers */
- for (i = 0; i < newxprt->sc_max_requests; i++) {
- ret = svc_rdma_post_recv(newxprt);
+ for (i = 0; i < newxprt->sc_rq_depth; i++) {
+ ret = svc_rdma_post_recv(newxprt, GFP_KERNEL);
if (ret) {
dprintk("svcrdma: failure posting receive buffers\n");
goto errout;
@@ -1160,12 +1262,14 @@ static void __svc_rdma_free(struct work_struct *work)
{
struct svcxprt_rdma *rdma =
container_of(work, struct svcxprt_rdma, sc_work);
- dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
+ struct svc_xprt *xprt = &rdma->sc_xprt;
+
+ dprintk("svcrdma: %s(%p)\n", __func__, rdma);
/* We should only be called from kref_put */
- if (atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0)
+ if (atomic_read(&xprt->xpt_ref.refcount) != 0)
pr_err("svcrdma: sc_xprt still in use? (%d)\n",
- atomic_read(&rdma->sc_xprt.xpt_ref.refcount));
+ atomic_read(&xprt->xpt_ref.refcount));
/*
* Destroy queued, but not processed read completions. Note
@@ -1193,15 +1297,22 @@ static void __svc_rdma_free(struct work_struct *work)
}
/* Warn if we leaked a resource or under-referenced */
- if (atomic_read(&rdma->sc_ctxt_used) != 0)
+ if (rdma->sc_ctxt_used != 0)
pr_err("svcrdma: ctxt still in use? (%d)\n",
- atomic_read(&rdma->sc_ctxt_used));
+ rdma->sc_ctxt_used);
if (atomic_read(&rdma->sc_dma_used) != 0)
pr_err("svcrdma: dma still in use? (%d)\n",
atomic_read(&rdma->sc_dma_used));
- /* De-allocate fastreg mr */
+ /* Final put of backchannel client transport */
+ if (xprt->xpt_bc_xprt) {
+ xprt_put(xprt->xpt_bc_xprt);
+ xprt->xpt_bc_xprt = NULL;
+ }
+
rdma_dealloc_frmr_q(rdma);
+ svc_rdma_destroy_ctxts(rdma);
+ svc_rdma_destroy_maps(rdma);
/* Destroy the QP if present (not a listener) */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
@@ -1213,9 +1324,6 @@ static void __svc_rdma_free(struct work_struct *work)
if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
ib_destroy_cq(rdma->sc_rq_cq);
- if (rdma->sc_phys_mr && !IS_ERR(rdma->sc_phys_mr))
- ib_dereg_mr(rdma->sc_phys_mr);
-
if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
ib_dealloc_pd(rdma->sc_pd);
@@ -1321,7 +1429,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
int length;
int ret;
- p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
+ p = alloc_page(GFP_KERNEL);
+ if (!p)
+ return;
va = page_address(p);
/* XDR encode error */
@@ -1341,7 +1451,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
return;
}
atomic_inc(&xprt->sc_dma_used);
- ctxt->sge[0].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[0].lkey = xprt->sc_pd->local_dma_lkey;
ctxt->sge[0].length = length;
/* Prepare SEND WR */
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 740bddcf3488..b1b009f10ea3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -63,7 +63,7 @@
*/
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
-static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
+unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
@@ -143,12 +143,7 @@ static struct ctl_table sunrpc_table[] = {
#endif
-#define RPCRDMA_BIND_TO (60U * HZ)
-#define RPCRDMA_INIT_REEST_TO (5U * HZ)
-#define RPCRDMA_MAX_REEST_TO (30U * HZ)
-#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
-
-static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
+static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */
static void
xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
@@ -174,7 +169,7 @@ xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
}
-static void
+void
xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
{
char buf[128];
@@ -203,7 +198,7 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap)
xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
}
-static void
+void
xprt_rdma_free_addresses(struct rpc_xprt *xprt)
{
unsigned int i;
@@ -499,7 +494,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
if (req == NULL)
return NULL;
- flags = GFP_NOIO | __GFP_NOWARN;
+ flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task))
flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
@@ -642,7 +637,7 @@ drop_connection:
return -ENOTCONN; /* implies disconnect */
}
-static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
long idle_time = 0;
@@ -743,6 +738,11 @@ void xprt_rdma_cleanup(void)
rpcrdma_destroy_wq();
frwr_destroy_recovery_wq();
+
+ rc = xprt_unregister_transport(&xprt_rdma_bc);
+ if (rc)
+ dprintk("RPC: %s: xprt_unregister(bc) returned %i\n",
+ __func__, rc);
}
int xprt_rdma_init(void)
@@ -766,6 +766,14 @@ int xprt_rdma_init(void)
return rc;
}
+ rc = xprt_register_transport(&xprt_rdma_bc);
+ if (rc) {
+ xprt_unregister_transport(&xprt_rdma);
+ rpcrdma_destroy_wq();
+ frwr_destroy_recovery_wq();
+ return rc;
+ }
+
dprintk("RPCRDMA Module Init, register RPC RDMA transport\n");
dprintk("Defaults:\n");
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 732c71ce5dca..878f1bfb1db9 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -462,7 +462,6 @@ int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
struct rpcrdma_ia *ia = &xprt->rx_ia;
- struct ib_device_attr *devattr = &ia->ri_devattr;
int rc;
ia->ri_dma_mr = NULL;
@@ -482,16 +481,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
goto out2;
}
- rc = ib_query_device(ia->ri_device, devattr);
- if (rc) {
- dprintk("RPC: %s: ib_query_device failed %d\n",
- __func__, rc);
- goto out3;
- }
-
if (memreg == RPCRDMA_FRMR) {
- if (!(devattr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) ||
- (devattr->max_fast_reg_page_list_len == 0)) {
+ if (!(ia->ri_device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS) ||
+ (ia->ri_device->attrs.max_fast_reg_page_list_len == 0)) {
dprintk("RPC: %s: FRMR registration "
"not supported by HCA\n", __func__);
memreg = RPCRDMA_MTHCAFMR;
@@ -566,24 +559,23 @@ int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
- struct ib_device_attr *devattr = &ia->ri_devattr;
struct ib_cq *sendcq, *recvcq;
struct ib_cq_init_attr cq_attr = {};
unsigned int max_qp_wr;
int rc, err;
- if (devattr->max_sge < RPCRDMA_MAX_IOVS) {
+ if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_IOVS) {
dprintk("RPC: %s: insufficient sge's available\n",
__func__);
return -ENOMEM;
}
- if (devattr->max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
+ if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
dprintk("RPC: %s: insufficient wqe's available\n",
__func__);
return -ENOMEM;
}
- max_qp_wr = devattr->max_qp_wr - RPCRDMA_BACKWARD_WRS;
+ max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS;
/* check provider's send/recv wr limits */
if (cdata->max_requests > max_qp_wr)
@@ -668,11 +660,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
+ if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
ep->rep_remote_cma.responder_resources = 32;
else
ep->rep_remote_cma.responder_resources =
- devattr->max_qp_rd_atom;
+ ia->ri_device->attrs.max_qp_rd_atom;
ep->rep_remote_cma.retry_count = 7;
ep->rep_remote_cma.flow_control = 0;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 728101ddc44b..38fe11b09875 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -55,6 +55,11 @@
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
+#define RPCRDMA_BIND_TO (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
+
/*
* Interface Adapter -- one per transport instance
*/
@@ -68,7 +73,6 @@ struct rpcrdma_ia {
struct completion ri_done;
int ri_async_rc;
unsigned int ri_max_frmr_depth;
- struct ib_device_attr ri_devattr;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
};
@@ -142,6 +146,8 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
return (struct rpcrdma_msg *)rb->rg_base;
}
+#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
+
/*
* struct rpcrdma_rep -- this structure encapsulates state required to recv
* and complete a reply, asychronously. It needs several pieces of
@@ -309,6 +315,8 @@ struct rpcrdma_buffer {
u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */
struct list_head rb_allreqs;
+
+ u32 rb_bc_max_requests;
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
@@ -516,6 +524,10 @@ int rpcrdma_marshal_req(struct rpc_rqst *);
/* RPC/RDMA module init - xprtrdma/transport.c
*/
+extern unsigned int xprt_rdma_max_inline_read;
+void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
+void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
+void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
@@ -531,11 +543,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-/* Temporary NFS request map cache. Created in svc_rdma.c */
-extern struct kmem_cache *svc_rdma_map_cachep;
-/* WR context cache. Created in svc_rdma.c */
-extern struct kmem_cache *svc_rdma_ctxt_cachep;
-/* Workqueue created in svc_rdma.c */
-extern struct workqueue_struct *svc_rdma_wq;
+extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/security/inode.c b/security/inode.c
index 16622aef9bde..28414b0207ce 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -99,7 +99,7 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode,
dir = d_inode(parent);
- mutex_lock(&dir->i_mutex);
+ inode_lock(dir);
dentry = lookup_one_len(name, parent, strlen(name));
if (IS_ERR(dentry))
goto out;
@@ -129,14 +129,14 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode,
}
d_instantiate(dentry, inode);
dget(dentry);
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
return dentry;
out1:
dput(dentry);
dentry = ERR_PTR(error);
out:
- mutex_unlock(&dir->i_mutex);
+ inode_unlock(dir);
simple_release_fs(&mount, &mount_count);
return dentry;
}
@@ -195,7 +195,7 @@ void securityfs_remove(struct dentry *dentry)
if (!parent || d_really_is_negative(parent))
return;
- mutex_lock(&d_inode(parent)->i_mutex);
+ inode_lock(d_inode(parent));
if (simple_positive(dentry)) {
if (d_is_dir(dentry))
simple_rmdir(d_inode(parent), dentry);
@@ -203,7 +203,7 @@ void securityfs_remove(struct dentry *dentry)
simple_unlink(d_inode(parent), dentry);
dput(dentry);
}
- mutex_unlock(&d_inode(parent)->i_mutex);
+ inode_unlock(d_inode(parent));
simple_release_fs(&mount, &mount_count);
}
EXPORT_SYMBOL_GPL(securityfs_remove);
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index c21f09bf8b99..9d96551d0196 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -121,7 +121,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint,
if (!(mode & FMODE_WRITE))
return;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (atomic_read(&inode->i_writecount) == 1) {
if ((iint->version != inode->i_version) ||
(iint->flags & IMA_NEW_FILE)) {
@@ -130,7 +130,7 @@ static void ima_check_last_writer(struct integrity_iint_cache *iint,
ima_update_xattr(iint, file);
}
}
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
/**
@@ -186,7 +186,7 @@ static int process_measurement(struct file *file, int mask, int function,
if (action & IMA_FILE_APPRAISE)
function = FILE_CHECK;
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
if (action) {
iint = integrity_inode_get(inode);
@@ -250,7 +250,7 @@ out_free:
if (pathbuf)
__putname(pathbuf);
out:
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
if ((rc && must_appraise) && (ima_appraise & IMA_APPRAISE_ENFORCE))
return -EACCES;
return 0;
diff --git a/security/keys/key.c b/security/keys/key.c
index 07a87311055c..09ef276c4bdc 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -430,7 +430,8 @@ static int __key_instantiate_and_link(struct key *key,
/* and link it into the destination keyring */
if (keyring) {
- set_bit(KEY_FLAG_KEEP, &key->flags);
+ if (test_bit(KEY_FLAG_KEEP, &keyring->flags))
+ set_bit(KEY_FLAG_KEEP, &key->flags);
__key_link(key, _edit);
}
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 732c1c77dccd..1b1fd27de632 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -380,9 +380,9 @@ static int sel_open_policy(struct inode *inode, struct file *filp)
goto err;
if (i_size_read(inode) != security_policydb_len()) {
- mutex_lock(&inode->i_mutex);
+ inode_lock(inode);
i_size_write(inode, security_policydb_len());
- mutex_unlock(&inode->i_mutex);
+ inode_unlock(inode);
}
rc = security_read_policy(&plm->data, &plm->len);
diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index e3e949126a56..a2a1e24becc6 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -97,11 +97,11 @@ config SND_PCM_TIMER
bool "PCM timer interface" if EXPERT
default y
help
- If you disable this option, pcm timer will be inavailable, so
- those stubs used pcm timer (e.g. dmix, dsnoop & co) may work
+ If you disable this option, pcm timer will be unavailable, so
+ those stubs that use pcm timer (e.g. dmix, dsnoop & co) may work
incorrectlly.
- For some embedded device, we may disable it to reduce memory
+ For some embedded devices, we may disable it to reduce memory
footprint, about 20KB on x86_64 platform.
config SND_SEQUENCER_OSS
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 18b8dc45bb8f..7fac3cae8abd 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -46,6 +46,13 @@
#include <sound/compress_offload.h>
#include <sound/compress_driver.h>
+/* struct snd_compr_codec_caps overflows the ioctl bit size for some
+ * architectures, so we need to disable the relevant ioctls.
+ */
+#if _IOC_SIZEBITS < 14
+#define COMPR_CODEC_CAPS_OVERFLOW
+#endif
+
/* TODO:
* - add substream support for multiple devices in case of
* SND_DYNAMIC_MINORS is not used
@@ -440,6 +447,7 @@ out:
return retval;
}
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
static int
snd_compr_get_codec_caps(struct snd_compr_stream *stream, unsigned long arg)
{
@@ -463,6 +471,7 @@ out:
kfree(caps);
return retval;
}
+#endif /* !COMPR_CODEC_CAPS_OVERFLOW */
/* revisit this with snd_pcm_preallocate_xxx */
static int snd_compr_allocate_buffer(struct snd_compr_stream *stream,
@@ -801,9 +810,11 @@ static long snd_compr_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
retval = snd_compr_get_caps(stream, arg);
break;
+#ifndef COMPR_CODEC_CAPS_OVERFLOW
case _IOC_NR(SNDRV_COMPRESS_GET_CODEC_CAPS):
retval = snd_compr_get_codec_caps(stream, arg);
break;
+#endif
case _IOC_NR(SNDRV_COMPRESS_SET_PARAMS):
retval = snd_compr_set_params(stream, arg);
break;
diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c
index b1221b29728e..6779e82b46dd 100644
--- a/sound/core/seq/oss/seq_oss_init.c
+++ b/sound/core/seq/oss/seq_oss_init.c
@@ -202,7 +202,7 @@ snd_seq_oss_open(struct file *file, int level)
dp->index = i;
if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) {
- pr_err("ALSA: seq_oss: too many applications\n");
+ pr_debug("ALSA: seq_oss: too many applications\n");
rc = -ENOMEM;
goto _error;
}
diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c
index 0f3b38184fe5..b16dbef04174 100644
--- a/sound/core/seq/oss/seq_oss_synth.c
+++ b/sound/core/seq/oss/seq_oss_synth.c
@@ -308,7 +308,7 @@ snd_seq_oss_synth_cleanup(struct seq_oss_devinfo *dp)
struct seq_oss_synth *rec;
struct seq_oss_synthinfo *info;
- if (snd_BUG_ON(dp->max_synthdev >= SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
+ if (snd_BUG_ON(dp->max_synthdev > SNDRV_SEQ_OSS_MAX_SYNTH_DEVS))
return;
for (i = 0; i < dp->max_synthdev; i++) {
info = &dp->synths[i];
diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c
index 75b74850c005..bde33308f0d6 100644
--- a/sound/drivers/dummy.c
+++ b/sound/drivers/dummy.c
@@ -87,7 +87,7 @@ MODULE_PARM_DESC(pcm_substreams, "PCM substreams # (1-128) for dummy driver.");
module_param(fake_buffer, bool, 0444);
MODULE_PARM_DESC(fake_buffer, "Fake buffer allocations.");
#ifdef CONFIG_HIGH_RES_TIMERS
-module_param(hrtimer, bool, 0644);
+module_param(hrtimer, bool, 0444);
MODULE_PARM_DESC(hrtimer, "Use hrtimer as the timer source.");
#endif
diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c
index 926e5dcbb66a..5022c9b97ddf 100644
--- a/sound/firewire/bebob/bebob_stream.c
+++ b/sound/firewire/bebob/bebob_stream.c
@@ -47,14 +47,16 @@ static const unsigned int bridgeco_freq_table[] = {
[6] = 0x07,
};
-static unsigned int
-get_formation_index(unsigned int rate)
+static int
+get_formation_index(unsigned int rate, unsigned int *index)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(snd_bebob_rate_table); i++) {
- if (snd_bebob_rate_table[i] == rate)
- return i;
+ if (snd_bebob_rate_table[i] == rate) {
+ *index = i;
+ return 0;
+ }
}
return -EINVAL;
}
@@ -425,7 +427,9 @@ make_both_connections(struct snd_bebob *bebob, unsigned int rate)
goto end;
/* confirm params for both streams */
- index = get_formation_index(rate);
+ err = get_formation_index(rate, &index);
+ if (err < 0)
+ goto end;
pcm_channels = bebob->tx_stream_formations[index].pcm;
midi_channels = bebob->tx_stream_formations[index].midi;
err = amdtp_am824_set_parameters(&bebob->tx_stream, rate,
diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig
index 0216475fc759..37adcc6cbe6b 100644
--- a/sound/isa/Kconfig
+++ b/sound/isa/Kconfig
@@ -3,6 +3,7 @@
config SND_WSS_LIB
tristate
select SND_PCM
+ select SND_TIMER
config SND_SB_COMMON
tristate
@@ -42,6 +43,7 @@ config SND_AD1816A
select SND_OPL3_LIB
select SND_MPU401_UART
select SND_PCM
+ select SND_TIMER
help
Say Y here to include support for Analog Devices SoundPort
AD1816A or compatible sound chips.
@@ -209,6 +211,7 @@ config SND_GUSCLASSIC
tristate "Gravis UltraSound Classic"
select SND_RAWMIDI
select SND_PCM
+ select SND_TIMER
help
Say Y here to include support for Gravis UltraSound Classic
soundcards.
@@ -221,6 +224,7 @@ config SND_GUSEXTREME
select SND_OPL3_LIB
select SND_MPU401_UART
select SND_PCM
+ select SND_TIMER
help
Say Y here to include support for Gravis UltraSound Extreme
soundcards.
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 656ce39bddbc..8f6594a7d37f 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -155,6 +155,7 @@ config SND_AZT3328
select SND_PCM
select SND_RAWMIDI
select SND_AC97_CODEC
+ select SND_TIMER
depends on ZONE_DMA
help
Say Y here to include support for Aztech AZF3328 (PCI168)
@@ -463,6 +464,7 @@ config SND_EMU10K1
select SND_HWDEP
select SND_RAWMIDI
select SND_AC97_CODEC
+ select SND_TIMER
depends on ZONE_DMA
help
Say Y to include support for Sound Blaster PCI 512, Live!,
@@ -889,6 +891,7 @@ config SND_YMFPCI
select SND_OPL3_LIB
select SND_MPU401_UART
select SND_AC97_CODEC
+ select SND_TIMER
help
Say Y here to include support for Yamaha PCI audio chips -
YMF724, YMF724F, YMF740, YMF740C, YMF744, YMF754.
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 256e6cda218f..4045dca3d699 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -90,6 +90,8 @@ enum {
#define NVIDIA_HDA_ENABLE_COHBIT 0x01
/* Defines for Intel SCH HDA snoop control */
+#define INTEL_HDA_CGCTL 0x48
+#define INTEL_HDA_CGCTL_MISCBDCGE (0x1 << 6)
#define INTEL_SCH_HDA_DEVC 0x78
#define INTEL_SCH_HDA_DEVC_NOSNOOP (0x1<<11)
@@ -534,10 +536,21 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset)
{
struct hdac_bus *bus = azx_bus(chip);
struct pci_dev *pci = chip->pci;
+ u32 val;
if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
snd_hdac_set_codec_wakeup(bus, true);
+ if (IS_BROXTON(pci)) {
+ pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+ val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
+ pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+ }
azx_init_chip(chip, full_reset);
+ if (IS_BROXTON(pci)) {
+ pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
+ val = val | INTEL_HDA_CGCTL_MISCBDCGE;
+ pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
+ }
if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
snd_hdac_set_codec_wakeup(bus, false);
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 426a29a1c19b..1f52b55d77c9 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -3653,6 +3653,7 @@ HDA_CODEC_ENTRY(0x10de0070, "GPU 70 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de0071, "GPU 71 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de0072, "GPU 72 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de007d, "GPU 7d HDMI/DP", patch_nvhdmi),
+HDA_CODEC_ENTRY(0x10de0083, "GPU 83 HDMI/DP", patch_nvhdmi),
HDA_CODEC_ENTRY(0x10de8001, "MCP73 HDMI", patch_nvhdmi_2ch),
HDA_CODEC_ENTRY(0x11069f80, "VX900 HDMI/DP", patch_via_hdmi),
HDA_CODEC_ENTRY(0x11069f81, "VX900 HDMI/DP", patch_via_hdmi),
diff --git a/sound/sparc/Kconfig b/sound/sparc/Kconfig
index d75deba5617d..dfcd38647606 100644
--- a/sound/sparc/Kconfig
+++ b/sound/sparc/Kconfig
@@ -22,6 +22,7 @@ config SND_SUN_AMD7930
config SND_SUN_CS4231
tristate "Sun CS4231"
select SND_PCM
+ select SND_TIMER
help
Say Y here to include support for CS4231 sound device on Sun.
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 23ea6d800c4c..a75d9ce7d77a 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1205,8 +1205,12 @@ void snd_usb_set_interface_quirk(struct usb_device *dev)
* "Playback Design" products need a 50ms delay after setting the
* USB interface.
*/
- if (le16_to_cpu(dev->descriptor.idVendor) == 0x23ba)
+ switch (le16_to_cpu(dev->descriptor.idVendor)) {
+ case 0x23ba: /* Playback Design */
+ case 0x0644: /* TEAC Corp. */
mdelay(50);
+ break;
+ }
}
void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
@@ -1221,6 +1225,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
(requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
mdelay(20);
+ /*
+ * "TEAC Corp." products need a 20ms delay after each
+ * class compliant request
+ */
+ if ((le16_to_cpu(dev->descriptor.idVendor) == 0x0644) &&
+ (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
+ mdelay(20);
+
/* Marantz/Denon devices with USB DAC functionality need a delay
* after each class compliant request
*/
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index ea69ce35e902..c3bd294a63d1 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -3746,7 +3746,7 @@ static const struct flag flags[] = {
{ "NET_TX_SOFTIRQ", 2 },
{ "NET_RX_SOFTIRQ", 3 },
{ "BLOCK_SOFTIRQ", 4 },
- { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+ { "IRQ_POLL_SOFTIRQ", 5 },
{ "TASKLET_SOFTIRQ", 6 },
{ "SCHED_SOFTIRQ", 7 },
{ "HRTIMER_SOFTIRQ", 8 },
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 8ff7d620d942..33b52eaa39db 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -209,7 +209,7 @@ static const struct flag flags[] = {
{ "NET_TX_SOFTIRQ", 2 },
{ "NET_RX_SOFTIRQ", 3 },
{ "BLOCK_SOFTIRQ", 4 },
- { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+ { "IRQ_POLL_SOFTIRQ", 5 },
{ "TASKLET_SOFTIRQ", 6 },
{ "SCHED_SOFTIRQ", 7 },
{ "HRTIMER_SOFTIRQ", 8 },
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index 26b7926bda88..ba34f9e96efd 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -1,15 +1,19 @@
#if defined(__i386__) || defined(__x86_64__)
#define barrier() asm volatile("" ::: "memory")
-#define mb() __sync_synchronize()
-
-#define smp_mb() mb()
-# define dma_rmb() barrier()
-# define dma_wmb() barrier()
-# define smp_rmb() barrier()
-# define smp_wmb() barrier()
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value) do { \
+ typeof(var) virt_store_mb_value = (value); \
+ __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+ __ATOMIC_SEQ_CST); \
+ barrier(); \
+} while (0);
/* Weak barriers should be used. If not - it's a bug */
-# define rmb() abort()
-# define wmb() abort()
+# define mb() abort()
+# define rmb() abort()
+# define wmb() abort()
#else
#error Please fill in barrier macros
#endif
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644
index 000000000000..845960e1cbf2
--- /dev/null
+++ b/tools/virtio/linux/compiler.h
@@ -0,0 +1,9 @@
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+ (*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var))))
+
+#endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 4db7d5691ba7..033849948215 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -8,6 +8,7 @@
#include <assert.h>
#include <stdarg.h>
+#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/printk.h>
#include <linux/bug.h>
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644
index 000000000000..feaa64ac4630
--- /dev/null
+++ b/tools/virtio/ringtest/Makefile
@@ -0,0 +1,22 @@
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb
+LDFLAGS += -pthread -O2 -ggdb
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+clean:
+ -rm main.o
+ -rm ring.o ring
+ -rm virtio_ring_0_9.o virtio_ring_0_9
+ -rm virtio_ring_poll.o virtio_ring_poll
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644
index 000000000000..34e94c46104f
--- /dev/null
+++ b/tools/virtio/ringtest/README
@@ -0,0 +1,2 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644
index 000000000000..3a5ff438bd62
--- /dev/null
+++ b/tools/virtio/ringtest/main.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+ unsigned long long v = 1;
+ int r;
+
+ vmexit();
+ r = write(fd, &v, sizeof v);
+ assert(r == sizeof v);
+ vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+ unsigned long long v = 1;
+ int r;
+
+ vmexit();
+ r = read(fd, &v, sizeof v);
+ assert(r == sizeof v);
+ vmentry();
+}
+
+void kick(void)
+{
+ notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+ wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+ notify(callfd);
+}
+
+void wait_for_call(void)
+{
+ wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+ cpu_set_t cpuset;
+ int ret;
+ pthread_t self;
+ long int cpu;
+ char *endptr;
+
+ if (!arg)
+ return;
+
+ cpu = strtol(arg, &endptr, 0);
+ assert(!*endptr);
+
+ assert(cpu >= 0 || cpu < CPU_SETSIZE);
+
+ self = pthread_self();
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+ assert(!ret);
+}
+
+static void run_guest(void)
+{
+ int completed_before;
+ int completed = 0;
+ int started = 0;
+ int bufs = runcycles;
+ int spurious = 0;
+ int r;
+ unsigned len;
+ void *buf;
+ int tokick = batch;
+
+ for (;;) {
+ if (do_sleep)
+ disable_call();
+ completed_before = completed;
+ do {
+ if (started < bufs &&
+ started - completed < max_outstanding) {
+ r = add_inbuf(0, NULL, "Hello, world!");
+ if (__builtin_expect(r == 0, true)) {
+ ++started;
+ if (!--tokick) {
+ tokick = batch;
+ if (do_sleep)
+ kick_available();
+ }
+
+ }
+ } else
+ r = -1;
+
+ /* Flush out completed bufs if any */
+ if (get_buf(&len, &buf)) {
+ ++completed;
+ if (__builtin_expect(completed == bufs, false))
+ return;
+ r = 0;
+ }
+ } while (r == 0);
+ if (completed == completed_before)
+ ++spurious;
+ assert(completed <= bufs);
+ assert(started <= bufs);
+ if (do_sleep) {
+ if (enable_call())
+ wait_for_call();
+ } else {
+ poll_used();
+ }
+ }
+}
+
+static void run_host(void)
+{
+ int completed_before;
+ int completed = 0;
+ int spurious = 0;
+ int bufs = runcycles;
+ unsigned len;
+ void *buf;
+
+ for (;;) {
+ if (do_sleep) {
+ if (enable_kick())
+ wait_for_kick();
+ } else {
+ poll_avail();
+ }
+ if (do_sleep)
+ disable_kick();
+ completed_before = completed;
+ while (__builtin_expect(use_buf(&len, &buf), true)) {
+ if (do_sleep)
+ call_used();
+ ++completed;
+ if (__builtin_expect(completed == bufs, false))
+ return;
+ }
+ if (completed == completed_before)
+ ++spurious;
+ assert(completed <= bufs);
+ if (completed == bufs)
+ break;
+ }
+}
+
+void *start_guest(void *arg)
+{
+ set_affinity(arg);
+ run_guest();
+ pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+ set_affinity(arg);
+ run_host();
+ pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+ {
+ .name = "help",
+ .has_arg = no_argument,
+ .val = 'h',
+ },
+ {
+ .name = "host-affinity",
+ .has_arg = required_argument,
+ .val = 'H',
+ },
+ {
+ .name = "guest-affinity",
+ .has_arg = required_argument,
+ .val = 'G',
+ },
+ {
+ .name = "ring-size",
+ .has_arg = required_argument,
+ .val = 'R',
+ },
+ {
+ .name = "run-cycles",
+ .has_arg = required_argument,
+ .val = 'C',
+ },
+ {
+ .name = "outstanding",
+ .has_arg = required_argument,
+ .val = 'o',
+ },
+ {
+ .name = "batch",
+ .has_arg = required_argument,
+ .val = 'b',
+ },
+ {
+ .name = "sleep",
+ .has_arg = no_argument,
+ .val = 's',
+ },
+ {
+ .name = "relax",
+ .has_arg = no_argument,
+ .val = 'x',
+ },
+ {
+ .name = "exit",
+ .has_arg = no_argument,
+ .val = 'e',
+ },
+ {
+ }
+};
+
+static void help(void)
+{
+ fprintf(stderr, "Usage: <test> [--help]"
+ " [--host-affinity H]"
+ " [--guest-affinity G]"
+ " [--ring-size R (default: %d)]"
+ " [--run-cycles C (default: %d)]"
+ " [--batch b]"
+ " [--outstanding o]"
+ " [--sleep]"
+ " [--relax]"
+ " [--exit]"
+ "\n",
+ ring_size,
+ runcycles);
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+ pthread_t host, guest;
+ void *tret;
+ char *host_arg = NULL;
+ char *guest_arg = NULL;
+ char *endptr;
+ long int c;
+
+ kickfd = eventfd(0, 0);
+ assert(kickfd >= 0);
+ callfd = eventfd(0, 0);
+ assert(callfd >= 0);
+
+ for (;;) {
+ int o = getopt_long(argc, argv, optstring, longopts, NULL);
+ switch (o) {
+ case -1:
+ goto done;
+ case '?':
+ help();
+ exit(2);
+ case 'H':
+ host_arg = optarg;
+ break;
+ case 'G':
+ guest_arg = optarg;
+ break;
+ case 'R':
+ ring_size = strtol(optarg, &endptr, 0);
+ assert(ring_size && !(ring_size & (ring_size - 1)));
+ assert(!*endptr);
+ break;
+ case 'C':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ runcycles = c;
+ break;
+ case 'o':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ max_outstanding = c;
+ break;
+ case 'b':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ batch = c;
+ break;
+ case 's':
+ do_sleep = true;
+ break;
+ case 'x':
+ do_relax = true;
+ break;
+ case 'e':
+ do_exit = true;
+ break;
+ default:
+ help();
+ exit(4);
+ break;
+ }
+ }
+
+ /* does nothing here, used to make sure all smp APIs compile */
+ smp_acquire();
+ smp_release();
+ smp_mb();
+done:
+
+ if (batch > max_outstanding)
+ batch = max_outstanding;
+
+ if (optind < argc) {
+ help();
+ exit(4);
+ }
+ alloc_ring();
+
+ ret = pthread_create(&host, NULL, start_host, host_arg);
+ assert(!ret);
+ ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+ assert(!ret);
+
+ ret = pthread_join(guest, &tret);
+ assert(!ret);
+ ret = pthread_join(host, &tret);
+ assert(!ret);
+ return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644
index 000000000000..16917acb0ade
--- /dev/null
+++ b/tools/virtio/ringtest/main.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+ unsigned long long t;
+
+ t = __rdtsc();
+ while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+ _Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+ if (!do_exit)
+ return;
+
+ wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+ if (!do_exit)
+ return;
+
+ wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool enable_call();
+void kick_available();
+void poll_used();
+/* host side */
+void disable_kick();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+void poll_avail();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+ if (do_relax)
+ cpu_relax();
+ else
+ /* prevent compiler from removing busy loops */
+ barrier();
+}
+
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+ barrier(); \
+ __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+ __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+ barrier(); \
+} while (0)
+
+#endif
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644
index 000000000000..c25c8d248b6b
--- /dev/null
+++ b/tools/virtio/ringtest/ring.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+ unsigned short next,
+ unsigned short prev)
+{
+ return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+ unsigned short flags;
+ unsigned short index;
+ unsigned len;
+ unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+ unsigned short kick_index;
+ unsigned char reserved0[HOST_GUEST_PADDING - 2];
+ unsigned short call_index;
+ unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+ void *buf; /* descriptor is writeable, we can't get buf from there */
+ void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+ unsigned avail_idx;
+ unsigned last_used_idx;
+ unsigned num_free;
+ unsigned kicked_avail_idx;
+ unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+ /* we do not need to track last avail index
+ * unless we have more than one in flight.
+ */
+ unsigned used_idx;
+ unsigned called_used_idx;
+ unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+ int ret;
+ int i;
+
+ ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+ if (ret) {
+ perror("Unable to allocate ring buffer.\n");
+ exit(3);
+ }
+ event = malloc(sizeof *event);
+ if (!event) {
+ perror("Unable to allocate event buffer.\n");
+ exit(3);
+ }
+ memset(event, 0, sizeof *event);
+ guest.avail_idx = 0;
+ guest.kicked_avail_idx = -1;
+ guest.last_used_idx = 0;
+ host.used_idx = 0;
+ host.called_used_idx = -1;
+ for (i = 0; i < ring_size; ++i) {
+ struct desc desc = {
+ .index = i,
+ };
+ ring[i] = desc;
+ }
+ guest.num_free = ring_size;
+ data = malloc(ring_size * sizeof *data);
+ if (!data) {
+ perror("Unable to allocate data buffer.\n");
+ exit(3);
+ }
+ memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+ unsigned head, index;
+
+ if (!guest.num_free)
+ return -1;
+
+ guest.num_free--;
+ head = (ring_size - 1) & (guest.avail_idx++);
+
+ /* Start with a write. On MESI architectures this helps
+ * avoid a shared state with consumer that is polling this descriptor.
+ */
+ ring[head].addr = (unsigned long)(void*)buf;
+ ring[head].len = len;
+ /* read below might bypass write above. That is OK because it's just an
+ * optimization. If this happens, we will get the cache line in a
+ * shared state which is unfortunate, but probably not worth it to
+ * add an explicit full barrier to avoid this.
+ */
+ barrier();
+ index = ring[head].index;
+ data[index].buf = buf;
+ data[index].data = datap;
+ /* Barrier A (for pairing) */
+ smp_release();
+ ring[head].flags = DESC_HW;
+
+ return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+ unsigned head = (ring_size - 1) & guest.last_used_idx;
+ unsigned index;
+ void *datap;
+
+ if (ring[head].flags & DESC_HW)
+ return NULL;
+ /* Barrier B (for pairing) */
+ smp_acquire();
+ *lenp = ring[head].len;
+ index = ring[head].index & (ring_size - 1);
+ datap = data[index].data;
+ *bufp = data[index].buf;
+ data[index].buf = NULL;
+ data[index].data = NULL;
+ guest.num_free++;
+ guest.last_used_idx++;
+ return datap;
+}
+
+void poll_used(void)
+{
+ unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+ while (ring[head].flags & DESC_HW)
+ busy_wait();
+}
+
+void disable_call()
+{
+ /* Doing nothing to disable calls might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_call()
+{
+ unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+ event->call_index = guest.last_used_idx;
+ /* Flush call index write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+ return ring[head].flags & DESC_HW;
+}
+
+void kick_available(void)
+{
+ /* Flush in previous flags write */
+ /* Barrier C (for pairing) */
+ smp_mb();
+ if (!need_event(event->kick_index,
+ guest.avail_idx,
+ guest.kicked_avail_idx))
+ return;
+
+ guest.kicked_avail_idx = guest.avail_idx;
+ kick();
+}
+
+/* host side */
+void disable_kick()
+{
+ /* Doing nothing to disable kicks might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_kick()
+{
+ unsigned head = (ring_size - 1) & host.used_idx;
+
+ event->kick_index = host.used_idx;
+ /* Barrier C (for pairing) */
+ smp_mb();
+ return !(ring[head].flags & DESC_HW);
+}
+
+void poll_avail(void)
+{
+ unsigned head = (ring_size - 1) & host.used_idx;
+
+ while (!(ring[head].flags & DESC_HW))
+ busy_wait();
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+ unsigned head = (ring_size - 1) & host.used_idx;
+
+ if (!(ring[head].flags & DESC_HW))
+ return false;
+
+ /* make sure length read below is not speculated */
+ /* Barrier A (for pairing) */
+ smp_acquire();
+
+ /* simple in-order completion: we don't need
+ * to touch index at all. This also means we
+ * can just modify the descriptor in-place.
+ */
+ ring[head].len--;
+ /* Make sure len is valid before flags.
+ * Note: alternative is to write len and flags in one access -
+ * possible on 64 bit architectures but wmb is free on Intel anyway
+ * so I have no way to test whether it's a gain.
+ */
+ /* Barrier B (for pairing) */
+ smp_release();
+ ring[head].flags = 0;
+ host.used_idx++;
+ return true;
+}
+
+void call_used(void)
+{
+ /* Flush in previous flags write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+ if (!need_event(event->call_index,
+ host.used_idx,
+ host.called_used_idx))
+ return;
+
+ host.called_used_idx = host.used_idx;
+ call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755
index 000000000000..52b0f71ffa8d
--- /dev/null
+++ b/tools/virtio/ringtest/run-on-all.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1)
+
+#run command on all cpus
+for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n);
+do
+ #Don't run guest and host on same CPU
+ #It actually works ok if using signalling
+ if
+ (echo "$@" | grep -e "--sleep" > /dev/null) || \
+ test $HOST_AFFINITY '!=' $cpu
+ then
+ echo "GUEST AFFINITY $cpu"
+ "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+ fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644
index 000000000000..47c9a1a18d36
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+ void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+ unsigned short avail_idx;
+ unsigned short last_used_idx;
+ unsigned short num_free;
+ unsigned short kicked_avail_idx;
+ unsigned short free_head;
+ unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+ /* we do not need to track last avail index
+ * unless we have more than one in flight.
+ */
+ unsigned short used_idx;
+ unsigned short called_used_idx;
+ unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+ int ret;
+ int i;
+ void *p;
+
+ ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+ if (ret) {
+ perror("Unable to allocate ring buffer.\n");
+ exit(3);
+ }
+ memset(p, 0, vring_size(ring_size, 0x1000));
+ vring_init(&ring, ring_size, p, 0x1000);
+
+ guest.avail_idx = 0;
+ guest.kicked_avail_idx = -1;
+ guest.last_used_idx = 0;
+ /* Put everything in free lists. */
+ guest.free_head = 0;
+ for (i = 0; i < ring_size - 1; i++)
+ ring.desc[i].next = i + 1;
+ host.used_idx = 0;
+ host.called_used_idx = -1;
+ guest.num_free = ring_size;
+ data = malloc(ring_size * sizeof *data);
+ if (!data) {
+ perror("Unable to allocate data buffer.\n");
+ exit(3);
+ }
+ memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+ unsigned head, avail;
+ struct vring_desc *desc;
+
+ if (!guest.num_free)
+ return -1;
+
+ head = guest.free_head;
+ guest.num_free--;
+
+ desc = ring.desc;
+ desc[head].flags = VRING_DESC_F_NEXT;
+ desc[head].addr = (unsigned long)(void *)buf;
+ desc[head].len = len;
+ /* We do it like this to simulate the way
+ * we'd have to flip it if we had multiple
+ * descriptors.
+ */
+ desc[head].flags &= ~VRING_DESC_F_NEXT;
+ guest.free_head = desc[head].next;
+
+ data[head].data = datap;
+
+#ifdef RING_POLL
+ /* Barrier A (for pairing) */
+ smp_release();
+ avail = guest.avail_idx++;
+ ring.avail->ring[avail & (ring_size - 1)] =
+ (head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+ avail = (ring_size - 1) & (guest.avail_idx++);
+ ring.avail->ring[avail] = head;
+ /* Barrier A (for pairing) */
+ smp_release();
+#endif
+ ring.avail->idx = guest.avail_idx;
+ return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+ unsigned head;
+ unsigned index;
+ void *datap;
+
+#ifdef RING_POLL
+ head = (ring_size - 1) & guest.last_used_idx;
+ index = ring.used->ring[head].id;
+ if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+ return NULL;
+ /* Barrier B (for pairing) */
+ smp_acquire();
+ index &= ring_size - 1;
+#else
+ if (ring.used->idx == guest.last_used_idx)
+ return NULL;
+ /* Barrier B (for pairing) */
+ smp_acquire();
+ head = (ring_size - 1) & guest.last_used_idx;
+ index = ring.used->ring[head].id;
+#endif
+ *lenp = ring.used->ring[head].len;
+ datap = data[index].data;
+ *bufp = (void*)(unsigned long)ring.desc[index].addr;
+ data[index].data = NULL;
+ ring.desc[index].next = guest.free_head;
+ guest.free_head = index;
+ guest.num_free++;
+ guest.last_used_idx++;
+ return datap;
+}
+
+void poll_used(void)
+{
+#ifdef RING_POLL
+ unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+ for (;;) {
+ unsigned index = ring.used->ring[head].id;
+
+ if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+ busy_wait();
+ else
+ break;
+ }
+#else
+ unsigned head = guest.last_used_idx;
+
+ while (ring.used->idx == head)
+ busy_wait();
+#endif
+}
+
+void disable_call()
+{
+ /* Doing nothing to disable calls might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_call()
+{
+ unsigned short last_used_idx;
+
+ vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+ /* Flush call index write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+#ifdef RING_POLL
+ {
+ unsigned short head = last_used_idx & (ring_size - 1);
+ unsigned index = ring.used->ring[head].id;
+
+ return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+ }
+#else
+ return ring.used->idx == last_used_idx;
+#endif
+}
+
+void kick_available(void)
+{
+ /* Flush in previous flags write */
+ /* Barrier C (for pairing) */
+ smp_mb();
+ if (!vring_need_event(vring_avail_event(&ring),
+ guest.avail_idx,
+ guest.kicked_avail_idx))
+ return;
+
+ guest.kicked_avail_idx = guest.avail_idx;
+ kick();
+}
+
+/* host side */
+void disable_kick()
+{
+ /* Doing nothing to disable kicks might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_kick()
+{
+ unsigned head = host.used_idx;
+
+ vring_avail_event(&ring) = head;
+ /* Barrier C (for pairing) */
+ smp_mb();
+#ifdef RING_POLL
+ {
+ unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+ return (index ^ head ^ 0x8000) & ~(ring_size - 1);
+ }
+#else
+ return head == ring.avail->idx;
+#endif
+}
+
+void poll_avail(void)
+{
+ unsigned head = host.used_idx;
+#ifdef RING_POLL
+ for (;;) {
+ unsigned index = ring.avail->ring[head & (ring_size - 1)];
+ if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
+ busy_wait();
+ else
+ break;
+ }
+#else
+ while (ring.avail->idx == head)
+ busy_wait();
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+ unsigned used_idx = host.used_idx;
+ struct vring_desc *desc;
+ unsigned head;
+
+#ifdef RING_POLL
+ head = ring.avail->ring[used_idx & (ring_size - 1)];
+ if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+ return false;
+ /* Barrier A (for pairing) */
+ smp_acquire();
+
+ used_idx &= ring_size - 1;
+ desc = &ring.desc[head & (ring_size - 1)];
+#else
+ if (used_idx == ring.avail->idx)
+ return false;
+
+ /* Barrier A (for pairing) */
+ smp_acquire();
+
+ used_idx &= ring_size - 1;
+ head = ring.avail->ring[used_idx];
+ desc = &ring.desc[head];
+#endif
+
+ *lenp = desc->len;
+ *bufp = (void *)(unsigned long)desc->addr;
+
+ /* now update used ring */
+ ring.used->ring[used_idx].id = head;
+ ring.used->ring[used_idx].len = desc->len - 1;
+ /* Barrier B (for pairing) */
+ smp_release();
+ host.used_idx++;
+ ring.used->idx = host.used_idx;
+
+ return true;
+}
+
+void call_used(void)
+{
+ /* Flush in previous flags write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+ if (!vring_need_event(vring_used_event(&ring),
+ host.used_idx,
+ host.called_used_idx))
+ return;
+
+ host.called_used_idx = host.used_idx;
+ call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644
index 000000000000..84fc2c557aaa
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_poll.c
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"