aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/media/v4l/media-types.xml4
-rw-r--r--Documentation/devicetree/bindings/net/can/rcar_can.txt11
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fec.txt2
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-net.txt77
-rw-r--r--Documentation/devicetree/bindings/regulator/tps65217.txt10
-rw-r--r--Documentation/networking/kcm.txt285
-rw-r--r--Documentation/watchdog/watchdog-parameters.txt4
-rw-r--r--MAINTAINERS55
-rw-r--r--Makefile2
-rw-r--r--arch/arc/Kconfig18
-rw-r--r--arch/arc/Makefile4
-rw-r--r--arch/arc/configs/axs101_defconfig4
-rw-r--r--arch/arc/configs/axs103_defconfig10
-rw-r--r--arch/arc/configs/axs103_smp_defconfig10
-rw-r--r--arch/arc/configs/nsim_700_defconfig5
-rw-r--r--arch/arc/configs/nsim_hs_defconfig3
-rw-r--r--arch/arc/configs/nsim_hs_smp_defconfig6
-rw-r--r--arch/arc/configs/nsimosci_defconfig2
-rw-r--r--arch/arc/configs/nsimosci_hs_defconfig3
-rw-r--r--arch/arc/configs/nsimosci_hs_smp_defconfig12
-rw-r--r--arch/arc/configs/tb10x_defconfig18
-rw-r--r--arch/arc/include/asm/arcregs.h32
-rw-r--r--arch/arc/include/asm/irq.h2
-rw-r--r--arch/arc/include/asm/irqflags-arcv2.h11
-rw-r--r--arch/arc/kernel/entry-arcv2.S11
-rw-r--r--arch/arc/kernel/intc-compact.c3
-rw-r--r--arch/arc/kernel/mcip.c60
-rw-r--r--arch/arc/kernel/setup.c80
-rw-r--r--arch/arc/kernel/smp.c3
-rw-r--r--arch/arm/boot/compressed/Makefile2
-rw-r--r--arch/arm/boot/dts/am335x-bone-common.dtsi14
-rw-r--r--arch/arm/boot/dts/am335x-chilisom.dtsi14
-rw-r--r--arch/arm/boot/dts/am335x-nano.dts14
-rw-r--r--arch/arm/boot/dts/am335x-pepper.dts14
-rw-r--r--arch/arm/boot/dts/am335x-shc.dts4
-rw-r--r--arch/arm/boot/dts/am335x-sl50.dts13
-rw-r--r--arch/arm/boot/dts/am57xx-beagle-x15.dts4
-rw-r--r--arch/arm/boot/dts/am57xx-cl-som-am57x.dts2
-rw-r--r--arch/arm/boot/dts/imx6qdl.dtsi1
-rw-r--r--arch/arm/boot/dts/kirkwood-ds112.dts2
-rw-r--r--arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts31
-rw-r--r--arch/arm/boot/dts/r8a7791-porter.dts1
-rw-r--r--arch/arm/boot/dts/sama5d2-pinfunc.h2
-rw-r--r--arch/arm/boot/dts/tps65217.dtsi56
-rw-r--r--arch/arm/include/asm/arch_gicv3.h1
-rw-r--r--arch/arm/include/asm/xen/page-coherent.h21
-rw-r--r--arch/arm/kernel/Makefile1
-rw-r--r--arch/arm/kvm/guest.c2
-rw-r--r--arch/arm/kvm/mmio.c3
-rw-r--r--arch/arm/mach-omap2/board-generic.c22
-rw-r--r--arch/arm/mach-omap2/gpmc-onenand.c6
-rw-r--r--arch/arm/mach-omap2/omap_device.c14
-rw-r--r--arch/arm/mach-shmobile/common.h1
-rw-r--r--arch/arm/mach-shmobile/headsmp-scu.S6
-rw-r--r--arch/arm/mach-shmobile/headsmp.S28
-rw-r--r--arch/arm/mach-shmobile/platsmp-apmu.c1
-rw-r--r--arch/arm/mach-shmobile/platsmp-scu.c4
-rw-r--r--arch/arm/mach-shmobile/smp-r8a7779.c2
-rw-r--r--arch/arm/mm/mmap.c2
-rw-r--r--arch/arm/mm/pageattr.c3
-rw-r--r--arch/arm64/include/asm/pgtable.h7
-rw-r--r--arch/arm64/kvm/guest.c2
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c20
-rw-r--r--arch/arm64/mm/init.c4
-rw-r--r--arch/arm64/mm/mmap.c4
-rw-r--r--arch/mips/jz4740/gpio.c2
-rw-r--r--arch/mips/kernel/r2300_fpu.S2
-rw-r--r--arch/mips/kernel/r4k_fpu.S2
-rw-r--r--arch/mips/kernel/traps.c13
-rw-r--r--arch/mips/kvm/mips.c4
-rw-r--r--arch/mips/mm/mmap.c4
-rw-r--r--arch/mips/mm/sc-mips.c13
-rw-r--r--arch/parisc/include/asm/floppy.h2
-rw-r--r--arch/parisc/include/uapi/asm/unistd.h3
-rw-r--r--arch/parisc/kernel/ptrace.c16
-rw-r--r--arch/parisc/kernel/syscall.S5
-rw-r--r--arch/parisc/kernel/syscall_table.S1
-rw-r--r--arch/powerpc/kernel/eeh_driver.c3
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c3
-rw-r--r--arch/powerpc/kernel/process.c4
-rw-r--r--arch/powerpc/mm/hash64_64k.c8
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c12
-rw-r--r--arch/powerpc/mm/hugetlbpage-book3e.c13
-rw-r--r--arch/powerpc/mm/mmap.c4
-rw-r--r--arch/s390/include/asm/fpu/internal.h2
-rw-r--r--arch/s390/kernel/compat_signal.c2
-rw-r--r--arch/sparc/Makefile6
-rw-r--r--arch/sparc/include/uapi/asm/unistd.h3
-rw-r--r--arch/sparc/kernel/entry.S17
-rw-r--r--arch/sparc/kernel/hvcalls.S3
-rw-r--r--arch/sparc/kernel/signal_64.c2
-rw-r--r--arch/sparc/kernel/sparc_ksyms_64.c1
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c2
-rw-r--r--arch/sparc/kernel/syscalls.S36
-rw-r--r--arch/sparc/kernel/systbls_32.S2
-rw-r--r--arch/sparc/kernel/systbls_64.S4
-rw-r--r--arch/um/kernel/reboot.c1
-rw-r--r--arch/um/kernel/signal.c2
-rw-r--r--arch/x86/entry/entry_32.S1
-rw-r--r--arch/x86/entry/entry_64_compat.S1
-rw-r--r--arch/x86/include/asm/pci_x86.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h26
-rw-r--r--arch/x86/include/asm/xen/pci.h4
-rw-r--r--arch/x86/kernel/acpi/sleep.c7
-rw-r--r--arch/x86/kvm/emulate.c4
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/vmx.c14
-rw-r--r--arch/x86/kvm/x86.c8
-rw-r--r--arch/x86/mm/mmap.c6
-rw-r--r--arch/x86/mm/mpx.c2
-rw-r--r--arch/x86/mm/pageattr.c14
-rw-r--r--arch/x86/pci/common.c26
-rw-r--r--arch/x86/pci/intel_mid_pci.c9
-rw-r--r--arch/x86/pci/irq.c23
-rw-r--r--arch/x86/pci/xen.c5
-rw-r--r--arch/x86/platform/intel-quark/imr.c4
-rw-r--r--arch/x86/um/os-Linux/task_size.c4
-rw-r--r--block/Kconfig13
-rw-r--r--block/blk-map.c91
-rw-r--r--block/blk-merge.c8
-rw-r--r--drivers/acpi/nfit.c105
-rw-r--r--drivers/acpi/pci_irq.c17
-rw-r--r--drivers/acpi/pci_link.c128
-rw-r--r--drivers/android/binder.c2
-rw-r--r--drivers/ata/ahci.c49
-rw-r--r--drivers/ata/ahci.h5
-rw-r--r--drivers/ata/ahci_xgene.c85
-rw-r--r--drivers/ata/libahci.c63
-rw-r--r--drivers/ata/libata-scsi.c11
-rw-r--r--drivers/ata/pata_rb532_cf.c11
-rw-r--r--drivers/char/random.c22
-rw-r--r--drivers/clk/ti/dpll3xxx.c3
-rw-r--r--drivers/cpufreq/Kconfig1
-rw-r--r--drivers/cpufreq/Kconfig.arm4
-rw-r--r--drivers/cpufreq/mt8173-cpufreq.c1
-rw-r--r--drivers/devfreq/tegra-devfreq.c2
-rw-r--r--drivers/dma/pxa_dma.c8
-rw-r--r--drivers/gpio/gpio-rcar.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_dpm.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c5
-rw-r--r--drivers/gpu/drm/amd/powerplay/amd_powerplay.c5
-rw-r--r--drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c1
-rw-r--r--drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c4
-rw-r--r--drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c2
-rw-r--r--drivers/gpu/drm/ast/ast_main.c2
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c28
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1
-rw-r--r--drivers/gpu/drm/i915/intel_crt.c13
-rw-r--r--drivers/gpu/drm/i915/intel_csr.c2
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c112
-rw-r--r--drivers/gpu/drm/i915/intel_display.c86
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c18
-rw-r--r--drivers/gpu/drm/i915/intel_drv.h3
-rw-r--r--drivers/gpu/drm/i915/intel_dsi.c13
-rw-r--r--drivers/gpu/drm/i915/intel_hdmi.c14
-rw-r--r--drivers/gpu/drm/i915/intel_lvds.c14
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c7
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c153
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_platform.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c40
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h6
-rw-r--r--drivers/gpu/drm/radeon/radeon_device.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_display.c17
-rw-r--r--drivers/gpu/drm/radeon/radeon_pm.c13
-rw-r--r--drivers/gpu/host1x/bus.c2
-rw-r--r--drivers/gpu/host1x/dev.c7
-rw-r--r--drivers/gpu/host1x/dev.h1
-rw-r--r--drivers/i2c/busses/i2c-brcmstb.c3
-rw-r--r--drivers/infiniband/core/device.c1
-rw-r--r--drivers/infiniband/core/sa_query.c2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c9
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c41
-rw-r--r--drivers/iommu/amd_iommu.c4
-rw-r--r--drivers/iommu/amd_iommu_init.c63
-rw-r--r--drivers/iommu/dmar.c5
-rw-r--r--drivers/iommu/intel-iommu.c4
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c18
-rw-r--r--drivers/md/dm.c2
-rw-r--r--drivers/media/i2c/adp1653.c2
-rw-r--r--drivers/media/i2c/adv7604.c3
-rw-r--r--drivers/media/usb/au0828/au0828-video.c3
-rw-r--r--drivers/misc/cxl/pci.c2
-rw-r--r--drivers/mmc/host/omap_hsmmc.c2
-rw-r--r--drivers/mtd/ubi/upd.c2
-rw-r--r--drivers/net/can/Kconfig2
-rw-r--r--drivers/net/can/ifi_canfd/ifi_canfd.c97
-rw-r--r--drivers/net/can/rcar_can.c1
-rw-r--r--drivers/net/can/spi/mcp251x.c2
-rw-r--r--drivers/net/can/usb/gs_usb.c24
-rw-r--r--drivers/net/dsa/mv88e6xxx.c105
-rw-r--r--drivers/net/dsa/mv88e6xxx.h2
-rw-r--r--drivers/net/ethernet/3com/3c59x.c2
-rw-r--r--drivers/net/ethernet/Kconfig1
-rw-r--r--drivers/net/ethernet/Makefile1
-rw-r--r--drivers/net/ethernet/altera/altera_tse_main.c1
-rw-r--r--drivers/net/ethernet/aurora/nb8800.c14
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h36
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c22
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c12
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c181
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h17
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c201
-rw-r--r--drivers/net/ethernet/brocade/bna/bna_tx_rx.c2
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic.h9
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic_main.c6
-rw-r--r--drivers/net/ethernet/cavium/thunder/nic_reg.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c221
-rw-r--r--drivers/net/ethernet/cisco/enic/enic.h22
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_cq.c2
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_dev.c45
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_intr.c3
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_rq.c4
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_wq.c4
-rw-r--r--drivers/net/ethernet/emulex/benet/be.h1
-rw-r--r--drivers/net/ethernet/emulex/benet/be_cmds.h9
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c20
-rw-r--r--drivers/net/ethernet/ethoc.c1
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c8
-rw-r--r--drivers/net/ethernet/freescale/fman/fman.c4
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c4
-rw-r--r--drivers/net/ethernet/hisilicon/Kconfig1
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c8
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c37
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h5
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_ethtool.c15
-rw-r--r--drivers/net/ethernet/ibm/ibmveth.c5
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c62
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h4
-rw-r--r--drivers/net/ethernet/jme.c26
-rw-r--r--drivers/net/ethernet/mediatek/Kconfig17
-rw-r--r--drivers/net/ethernet/mediatek/Makefile5
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c1807
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h421
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_clock.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c129
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c429
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c22
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c9
-rw-r--r--drivers/net/ethernet/moxa/moxart_ether.c4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hsi.h25
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.c81
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c291
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.h14
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h22
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c387
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic.h3
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c24
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c3
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.c18
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c4
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c20
-rw-r--r--drivers/net/ethernet/rocker/rocker_ofdpa.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c11
-rw-r--r--drivers/net/ethernet/synopsys/dwc_eth_qos.c45
-rw-r--r--drivers/net/geneve.c52
-rw-r--r--drivers/net/hyperv/rndis_filter.c19
-rw-r--r--drivers/net/macvtap.c9
-rw-r--r--drivers/net/phy/micrel.c28
-rw-r--r--drivers/net/ppp/ppp_generic.c11
-rw-r--r--drivers/net/usb/ax88172a.c1
-rw-r--r--drivers/net/usb/cdc_ncm.c26
-rw-r--r--drivers/net/usb/qmi_wwan.c7
-rw-r--r--drivers/net/usb/usbnet.c7
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c73
-rw-r--r--drivers/net/vrf.c13
-rw-r--r--drivers/net/vxlan.c72
-rw-r--r--drivers/nvdimm/bus.c20
-rw-r--r--drivers/nvdimm/pmem.c2
-rw-r--r--drivers/nvme/host/core.c111
-rw-r--r--drivers/nvme/host/nvme.h8
-rw-r--r--drivers/nvme/host/pci.c149
-rw-r--r--drivers/of/of_mdio.c1
-rw-r--r--drivers/pci/host/Kconfig1
-rw-r--r--drivers/pci/host/pci-keystone-dw.c11
-rw-r--r--drivers/pci/host/pci-layerscape.c21
-rw-r--r--drivers/pci/xen-pcifront.c10
-rw-r--r--drivers/power/bq27xxx_battery_i2c.c37
-rw-r--r--drivers/scsi/ipr.c5
-rw-r--r--drivers/scsi/scsi_lib.c1
-rw-r--r--drivers/sh/pm_runtime.c2
-rw-r--r--drivers/ssb/Kconfig1
-rw-r--r--drivers/staging/media/davinci_vpfe/vpfe_video.c2
-rw-r--r--drivers/usb/chipidea/ci_hdrc_pci.c4
-rw-r--r--drivers/usb/chipidea/debug.c3
-rw-r--r--drivers/usb/chipidea/otg.c2
-rw-r--r--drivers/usb/core/hub.c8
-rw-r--r--drivers/usb/dwc2/Kconfig1
-rw-r--r--drivers/usb/dwc2/core.c6
-rw-r--r--drivers/usb/dwc2/hcd_ddma.c23
-rw-r--r--drivers/usb/dwc2/hcd_intr.c8
-rw-r--r--drivers/usb/dwc3/core.h1
-rw-r--r--drivers/usb/dwc3/ep0.c5
-rw-r--r--drivers/usb/dwc3/gadget.c70
-rw-r--r--drivers/usb/gadget/legacy/inode.c7
-rw-r--r--drivers/usb/gadget/udc/fsl_qe_udc.c2
-rw-r--r--drivers/usb/gadget/udc/net2280.h15
-rw-r--r--drivers/usb/gadget/udc/udc-core.c3
-rw-r--r--drivers/usb/musb/musb_host.c8
-rw-r--r--drivers/usb/phy/phy-msm-usb.c20
-rw-r--r--drivers/usb/serial/Kconfig16
-rw-r--r--drivers/usb/serial/Makefile1
-rw-r--r--drivers/usb/serial/cp210x.c3
-rw-r--r--drivers/usb/serial/mxu11x0.c1006
-rw-r--r--drivers/usb/serial/option.c14
-rw-r--r--drivers/usb/serial/qcserial.c7
-rw-r--r--drivers/vfio/pci/vfio_pci.c9
-rw-r--r--drivers/vfio/platform/vfio_platform_common.c9
-rw-r--r--drivers/vfio/vfio_iommu_type1.c6
-rw-r--r--drivers/vhost/vhost.c15
-rw-r--r--drivers/video/console/fbcon.c2
-rw-r--r--drivers/virtio/virtio_pci_modern.c2
-rw-r--r--drivers/watchdog/Kconfig11
-rw-r--r--drivers/watchdog/Makefile1
-rw-r--r--drivers/watchdog/sun4v_wdt.c191
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c9
-rw-r--r--drivers/xen/xen-scsiback.c80
-rw-r--r--drivers/xen/xenbus/xenbus_dev_frontend.c2
-rw-r--r--fs/affs/file.c5
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/block_dev.c19
-rw-r--r--fs/btrfs/root-tree.c10
-rw-r--r--fs/ceph/addr.c4
-rw-r--r--fs/ceph/caps.c27
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/ceph/mds_client.c16
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/cifsfs.c1
-rw-r--r--fs/cifs/cifsfs.h12
-rw-r--r--fs/cifs/cifssmb.c21
-rw-r--r--fs/cifs/smb2pdu.c24
-rw-r--r--fs/dax.c21
-rw-r--r--fs/dcache.c20
-rw-r--r--fs/ext2/file.c19
-rw-r--r--fs/ext2/inode.c16
-rw-r--r--fs/ext4/file.c19
-rw-r--r--fs/ext4/inode.c6
-rw-r--r--fs/ext4/ioctl.c5
-rw-r--r--fs/fs-writeback.c54
-rw-r--r--fs/hpfs/namei.c31
-rw-r--r--fs/jffs2/README.Locking5
-rw-r--r--fs/jffs2/build.c75
-rw-r--r--fs/jffs2/file.c39
-rw-r--r--fs/jffs2/gc.c17
-rw-r--r--fs/jffs2/nodelist.h6
-rw-r--r--fs/namei.c22
-rw-r--r--fs/nfs/blocklayout/extent_tree.c10
-rw-r--r--fs/nfs/nfs42proc.c119
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/pnfs.c55
-rw-r--r--fs/ocfs2/aops.c1
-rw-r--r--fs/overlayfs/dir.c10
-rw-r--r--fs/overlayfs/inode.c2
-rw-r--r--fs/overlayfs/super.c13
-rw-r--r--fs/pnode.c9
-rw-r--r--fs/read_write.c9
-rw-r--r--fs/super.c1
-rw-r--r--fs/userfaultfd.c6
-rw-r--r--fs/xattr.c6
-rw-r--r--fs/xfs/xfs_aops.c6
-rw-r--r--fs/xfs/xfs_aops.h1
-rw-r--r--fs/xfs/xfs_bmap_util.c3
-rw-r--r--include/linux/ata.h4
-rw-r--r--include/linux/bio.h37
-rw-r--r--include/linux/blkdev.h25
-rw-r--r--include/linux/bpf.h6
-rw-r--r--include/linux/ceph/ceph_features.h1
-rw-r--r--include/linux/dax.h8
-rw-r--r--include/linux/dcache.h4
-rw-r--r--include/linux/if_bridge.h4
-rw-r--r--include/linux/libata.h2
-rw-r--r--include/linux/libnvdimm.h3
-rw-r--r--include/linux/mlx5/mlx5_ifc.h4
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/netdevice.h24
-rw-r--r--include/linux/netfilter.h29
-rw-r--r--include/linux/netfilter/x_tables.h6
-rw-r--r--include/linux/netfilter_arp/arp_tables.h9
-rw-r--r--include/linux/netfilter_ipv4/ip_tables.h9
-rw-r--r--include/linux/netfilter_ipv6/ip6_tables.h9
-rw-r--r--include/linux/nfs_fs.h4
-rw-r--r--include/linux/nfs_xdr.h1
-rw-r--r--include/linux/pci.h17
-rw-r--r--include/linux/perf_event.h7
-rw-r--r--include/linux/power/bq27xxx_battery.h1
-rw-r--r--include/linux/qed/qed_eth_if.h12
-rw-r--r--include/linux/random.h1
-rw-r--r--include/linux/rculist.h21
-rw-r--r--include/linux/skbuff.h24
-rw-r--r--include/linux/socket.h7
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/linux/trace_events.h2
-rw-r--r--include/linux/writeback.h5
-rw-r--r--include/net/act_api.h21
-rw-r--r--include/net/checksum.h17
-rw-r--r--include/net/dst_metadata.h5
-rw-r--r--include/net/flow_dissector.h13
-rw-r--r--include/net/ip_tunnels.h30
-rw-r--r--include/net/iw_handler.h6
-rw-r--r--include/net/kcm.h226
-rw-r--r--include/net/netfilter/nft_masq.h4
-rw-r--r--include/net/netns/ipv6.h3
-rw-r--r--include/net/pkt_cls.h14
-rw-r--r--include/net/tc_act/tc_gact.h4
-rw-r--r--include/net/tc_act/tc_skbedit.h16
-rw-r--r--include/net/tcp.h24
-rw-r--r--include/net/udp_tunnel.h4
-rw-r--r--include/net/vxlan.h1
-rw-r--r--include/sound/hdaudio.h2
-rw-r--r--include/uapi/linux/bpf.h20
-rw-r--r--include/uapi/linux/if_link.h2
-rw-r--r--include/uapi/linux/kcm.h40
-rw-r--r--include/uapi/linux/media.h54
-rw-r--r--include/uapi/linux/ndctl.h11
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h6
-rw-r--r--include/uapi/linux/pkt_cls.h2
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arraymap.c2
-rw-r--r--kernel/bpf/hashtab.c240
-rw-r--r--kernel/bpf/helpers.c2
-rw-r--r--kernel/bpf/percpu_freelist.c100
-rw-r--r--kernel/bpf/percpu_freelist.h31
-rw-r--r--kernel/bpf/stackmap.c89
-rw-r--r--kernel/bpf/syscall.c35
-rw-r--r--kernel/events/core.c368
-rw-r--r--kernel/memremap.c4
-rw-r--r--kernel/sched/deadline.c2
-rw-r--r--kernel/trace/bpf_trace.c2
-rw-r--r--kernel/trace/trace_events.c17
-rw-r--r--kernel/trace/trace_events_filter.c13
-rw-r--r--kernel/trace/trace_stack.c6
-rw-r--r--lib/random32.c1
-rw-r--r--mm/filemap.c12
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/memory.c14
-rw-r--r--mm/migrate.c2
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/ax25/ax25_ip.c15
-rw-r--r--net/bridge/br_fdb.c15
-rw-r--r--net/bridge/br_netfilter_hooks.c68
-rw-r--r--net/bridge/br_stp.c11
-rw-r--r--net/ceph/messenger.c15
-rw-r--r--net/ceph/osd_client.c4
-rw-r--r--net/core/filter.c135
-rw-r--r--net/core/flow_dissector.c13
-rw-r--r--net/core/rtnetlink.c6
-rw-r--r--net/core/skbuff.c61
-rw-r--r--net/dsa/dsa.c16
-rw-r--r--net/ipv4/arp.c35
-rw-r--r--net/ipv4/fou.c3
-rw-r--r--net/ipv4/gre_offload.c3
-rw-r--r--net/ipv4/igmp.c3
-rw-r--r--net/ipv4/ip_gre.c10
-rw-r--r--net/ipv4/ip_output.c5
-rw-r--r--net/ipv4/ip_tunnel.c3
-rw-r--r--net/ipv4/netfilter/arp_tables.c66
-rw-r--r--net/ipv4/netfilter/arptable_filter.c40
-rw-r--r--net/ipv4/netfilter/ip_tables.c63
-rw-r--r--net/ipv4/netfilter/iptable_filter.c44
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c41
-rw-r--r--net/ipv4/netfilter/iptable_nat.c41
-rw-r--r--net/ipv4/netfilter/iptable_raw.c38
-rw-r--r--net/ipv4/netfilter/iptable_security.c44
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c7
-rw-r--r--net/ipv4/tcp.c15
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/ipv4/tcp_minisocks.c3
-rw-r--r--net/ipv4/udp_tunnel.c2
-rw-r--r--net/ipv6/exthdrs_core.c6
-rw-r--r--net/ipv6/ip6_fib.c91
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6_udp_tunnel.c6
-rw-r--r--net/ipv6/mcast.c3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c65
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c47
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c46
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c41
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c46
-rw-r--r--net/ipv6/netfilter/ip6table_security.c44
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c7
-rw-r--r--net/ipv6/udp.c6
-rw-r--r--net/kcm/Kconfig10
-rw-r--r--net/kcm/Makefile3
-rw-r--r--net/kcm/kcmproc.c426
-rw-r--r--net/kcm/kcmsock.c2409
-rw-r--r--net/mac80211/agg-rx.c2
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/rc80211_minstrel.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c16
-rw-r--r--net/mac80211/rx.c37
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c17
-rw-r--r--net/netfilter/nft_masq.c51
-rw-r--r--net/netfilter/nft_meta.c11
-rw-r--r--net/netfilter/x_tables.c65
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netlabel/netlabel_domainhash.c4
-rw-r--r--net/netlabel/netlabel_unlabeled.c6
-rw-r--r--net/packet/af_packet.c43
-rw-r--r--net/sched/act_ife.c4
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/cls_flower.c64
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/proc.c10
-rw-r--r--net/socket.c18
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c2
-rw-r--r--net/switchdev/switchdev.c5
-rw-r--r--net/tipc/bcast.c5
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c18
-rw-r--r--net/tipc/link.c122
-rw-r--r--net/tipc/link.h1
-rw-r--r--net/tipc/name_table.c6
-rw-r--r--net/tipc/net.c7
-rw-r--r--net/tipc/netlink.c69
-rw-r--r--net/tipc/netlink.h11
-rw-r--r--net/tipc/node.c25
-rw-r--r--net/tipc/socket.c42
-rw-r--r--net/tipc/subscr.c3
-rw-r--r--net/tipc/udp_media.c42
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/sme.c6
-rw-r--r--net/wireless/wext-core.c52
-rw-r--r--samples/bpf/Makefile8
-rw-r--r--samples/bpf/bpf_helpers.h1
-rw-r--r--samples/bpf/bpf_load.c70
-rw-r--r--samples/bpf/bpf_load.h6
-rw-r--r--samples/bpf/fds_example.c2
-rw-r--r--samples/bpf/libbpf.c5
-rw-r--r--samples/bpf/libbpf.h2
-rw-r--r--samples/bpf/map_perf_test_kern.c100
-rw-r--r--samples/bpf/map_perf_test_user.c155
-rw-r--r--samples/bpf/offwaketime_user.c67
-rw-r--r--samples/bpf/sock_example.c2
-rw-r--r--samples/bpf/spintest_kern.c68
-rw-r--r--samples/bpf/spintest_user.c50
-rw-r--r--samples/bpf/test_maps.c29
-rw-r--r--samples/bpf/test_verifier.c4
-rw-r--r--security/selinux/hooks.c2
-rw-r--r--sound/core/control_compat.c90
-rw-r--r--sound/core/pcm_compat.c177
-rw-r--r--sound/core/rawmidi_compat.c56
-rw-r--r--sound/core/seq/oss/seq_oss.c2
-rw-r--r--sound/core/seq/oss/seq_oss_device.h1
-rw-r--r--sound/core/seq/oss/seq_oss_init.c16
-rw-r--r--sound/core/timer_compat.c18
-rw-r--r--sound/hda/hdac_controller.c7
-rw-r--r--sound/pci/hda/hda_controller.c47
-rw-r--r--sound/pci/hda/hda_intel.c16
-rw-r--r--sound/pci/hda/patch_hdmi.c19
-rw-r--r--sound/pci/hda/patch_realtek.c40
-rw-r--r--sound/pci/rme9652/hdsp.c4
-rw-r--r--sound/pci/rme9652/hdspm.c16
-rw-r--r--sound/usb/quirks.c1
-rw-r--r--tools/testing/nvdimm/test/nfit.c8
-rw-r--r--tools/testing/selftests/ftrace/test.d/instances/instance.tc15
-rw-r--r--virt/kvm/arm/vgic.c4
-rw-r--r--virt/kvm/async_pf.c2
592 files changed, 14507 insertions, 4535 deletions
diff --git a/Documentation/DocBook/media/v4l/media-types.xml b/Documentation/DocBook/media/v4l/media-types.xml
index 1af384250910..0ee0f3386cdf 100644
--- a/Documentation/DocBook/media/v4l/media-types.xml
+++ b/Documentation/DocBook/media/v4l/media-types.xml
@@ -57,10 +57,6 @@
<entry>Connector for a RGB composite signal.</entry>
</row>
<row>
- <entry><constant>MEDIA_ENT_F_CONN_TEST</constant></entry>
- <entry>Connector for a test generator.</entry>
- </row>
- <row>
<entry><constant>MEDIA_ENT_F_CAM_SENSOR</constant></entry>
<entry>Camera video sensor entity.</entry>
</row>
diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
index 65edc055722f..8d40ab27bc8c 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -9,8 +9,10 @@ Required properties:
"renesas,can-r8a7792" if CAN controller is a part of R8A7792 SoC.
"renesas,can-r8a7793" if CAN controller is a part of R8A7793 SoC.
"renesas,can-r8a7794" if CAN controller is a part of R8A7794 SoC.
+ "renesas,can-r8a7795" if CAN controller is a part of R8A7795 SoC.
"renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device.
"renesas,rcar-gen2-can" for a generic R-Car Gen2 compatible device.
+ "renesas,rcar-gen3-can" for a generic R-Car Gen3 compatible device.
When compatible with the generic version, nodes must list the
SoC-specific version corresponding to the platform first
followed by the generic version.
@@ -22,6 +24,15 @@ Required properties:
- pinctrl-0: pin control group to be used for this controller.
- pinctrl-names: must be "default".
+Required properties for "renesas,can-r8a7795" compatible:
+In R8A7795 SoC, "clkp2" can be CANFD clock. This is a div6 clock and can be
+used by both CAN and CAN FD controller at the same time. It needs to be scaled
+to maximum frequency if any of these controllers use it. This is done using
+the below properties.
+
+- assigned-clocks: phandle of clkp2(CANFD) clock.
+- assigned-clock-rates: maximum frequency of this clock.
+
Optional properties:
- renesas,can-clock-select: R-Car CAN Clock Source Select. Valid values are:
<0x0> (default) : Peripheral clock (clkp1)
diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index a4799fff0d1f..b037a9d78d93 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -12,7 +12,7 @@ Optional properties:
only if property "phy-reset-gpios" is available. Missing the property
will have the duration be 1 millisecond. Numbers greater than 1000 are
invalid and 1 millisecond will be used instead.
-- phy-reset-active-low : If present then the reset sequence using the GPIO
+- phy-reset-active-high : If present then the reset sequence using the GPIO
specified in the "phy-reset-gpios" property is reversed (H=reset state,
L=operation state).
- phy-supply : regulator that powers the Ethernet PHY.
diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
new file mode 100644
index 000000000000..5ca79290eabf
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -0,0 +1,77 @@
+MediaTek Frame Engine Ethernet controller
+=========================================
+
+The frame engine ethernet controller can be found on MediaTek SoCs. These SoCs
+have dual GMAC each represented by a child node..
+
+* Ethernet controller node
+
+Required properties:
+- compatible: Should be "mediatek,mt7623-eth"
+- reg: Address and length of the register set for the device
+- interrupts: Should contain the frame engines interrupt
+- clocks: the clock used by the core
+- clock-names: the names of the clock listed in the clocks property. These are
+ "ethif", "esw", "gp2", "gp1"
+- power-domains: phandle to the power domain that the ethernet is part of
+- resets: Should contain a phandle to the ethsys reset signal
+- reset-names: Should contain the reset signal name "eth"
+- mediatek,ethsys: phandle to the syscon node that handles the port setup
+- mediatek,pctl: phandle to the syscon node that handles the ports slew rate
+ and driver current
+
+Optional properties:
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+
+
+* Ethernet MAC node
+
+Required properties:
+- compatible: Should be "mediatek,eth-mac"
+- reg: The number of the MAC
+- phy-handle: see ethernet.txt file in the same directory.
+
+Example:
+
+eth: ethernet@1b100000 {
+ compatible = "mediatek,mt7623-eth";
+ reg = <0 0x1b100000 0 0x20000>;
+ clocks = <&topckgen CLK_TOP_ETHIF_SEL>,
+ <&ethsys CLK_ETHSYS_ESW>,
+ <&ethsys CLK_ETHSYS_GP2>,
+ <&ethsys CLK_ETHSYS_GP1>;
+ clock-names = "ethif", "esw", "gp2", "gp1";
+ interrupts = <GIC_SPI 200 IRQ_TYPE_LEVEL_LOW>;
+ power-domains = <&scpsys MT2701_POWER_DOMAIN_ETH>;
+ resets = <&ethsys MT2701_ETHSYS_ETH_RST>;
+ reset-names = "eth";
+ mediatek,ethsys = <&ethsys>;
+ mediatek,pctl = <&syscfg_pctl_a>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gmac1: mac@0 {
+ compatible = "mediatek,eth-mac";
+ reg = <0>;
+ phy-handle = <&phy0>;
+ };
+
+ gmac2: mac@1 {
+ compatible = "mediatek,eth-mac";
+ reg = <1>;
+ phy-handle = <&phy1>;
+ };
+
+ mdio-bus {
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ phy-mode = "rgmii";
+ };
+
+ phy1: ethernet-phy@1 {
+ reg = <1>;
+ phy-mode = "rgmii";
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/regulator/tps65217.txt b/Documentation/devicetree/bindings/regulator/tps65217.txt
index d18109657da6..4f05d208c95c 100644
--- a/Documentation/devicetree/bindings/regulator/tps65217.txt
+++ b/Documentation/devicetree/bindings/regulator/tps65217.txt
@@ -26,11 +26,7 @@ Example:
ti,pmic-shutdown-controller;
regulators {
- #address-cells = <1>;
- #size-cells = <0>;
-
dcdc1_reg: dcdc1 {
- reg = <0>;
regulator-min-microvolt = <900000>;
regulator-max-microvolt = <1800000>;
regulator-boot-on;
@@ -38,7 +34,6 @@ Example:
};
dcdc2_reg: dcdc2 {
- reg = <1>;
regulator-min-microvolt = <900000>;
regulator-max-microvolt = <3300000>;
regulator-boot-on;
@@ -46,7 +41,6 @@ Example:
};
dcdc3_reg: dcc3 {
- reg = <2>;
regulator-min-microvolt = <900000>;
regulator-max-microvolt = <1500000>;
regulator-boot-on;
@@ -54,7 +48,6 @@ Example:
};
ldo1_reg: ldo1 {
- reg = <3>;
regulator-min-microvolt = <1000000>;
regulator-max-microvolt = <3300000>;
regulator-boot-on;
@@ -62,7 +55,6 @@ Example:
};
ldo2_reg: ldo2 {
- reg = <4>;
regulator-min-microvolt = <900000>;
regulator-max-microvolt = <3300000>;
regulator-boot-on;
@@ -70,7 +62,6 @@ Example:
};
ldo3_reg: ldo3 {
- reg = <5>;
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
regulator-boot-on;
@@ -78,7 +69,6 @@ Example:
};
ldo4_reg: ldo4 {
- reg = <6>;
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
regulator-boot-on;
diff --git a/Documentation/networking/kcm.txt b/Documentation/networking/kcm.txt
new file mode 100644
index 000000000000..3476ede5bc2c
--- /dev/null
+++ b/Documentation/networking/kcm.txt
@@ -0,0 +1,285 @@
+Kernel Connection Mulitplexor
+-----------------------------
+
+Kernel Connection Multiplexor (KCM) is a mechanism that provides a message based
+interface over TCP for generic application protocols. With KCM an application
+can efficiently send and receive application protocol messages over TCP using
+datagram sockets.
+
+KCM implements an NxM multiplexor in the kernel as diagrammed below:
+
++------------+ +------------+ +------------+ +------------+
+| KCM socket | | KCM socket | | KCM socket | | KCM socket |
++------------+ +------------+ +------------+ +------------+
+ | | | |
+ +-----------+ | | +----------+
+ | | | |
+ +----------------------------------+
+ | Multiplexor |
+ +----------------------------------+
+ | | | | |
+ +---------+ | | | ------------+
+ | | | | |
++----------+ +----------+ +----------+ +----------+ +----------+
+| Psock | | Psock | | Psock | | Psock | | Psock |
++----------+ +----------+ +----------+ +----------+ +----------+
+ | | | | |
++----------+ +----------+ +----------+ +----------+ +----------+
+| TCP sock | | TCP sock | | TCP sock | | TCP sock | | TCP sock |
++----------+ +----------+ +----------+ +----------+ +----------+
+
+KCM sockets
+-----------
+
+The KCM sockets provide the user interface to the muliplexor. All the KCM sockets
+bound to a multiplexor are considered to have equivalent function, and I/O
+operations in different sockets may be done in parallel without the need for
+synchronization between threads in userspace.
+
+Multiplexor
+-----------
+
+The multiplexor provides the message steering. In the transmit path, messages
+written on a KCM socket are sent atomically on an appropriate TCP socket.
+Similarly, in the receive path, messages are constructed on each TCP socket
+(Psock) and complete messages are steered to a KCM socket.
+
+TCP sockets & Psocks
+--------------------
+
+TCP sockets may be bound to a KCM multiplexor. A Psock structure is allocated
+for each bound TCP socket, this structure holds the state for constructing
+messages on receive as well as other connection specific information for KCM.
+
+Connected mode semantics
+------------------------
+
+Each multiplexor assumes that all attached TCP connections are to the same
+destination and can use the different connections for load balancing when
+transmitting. The normal send and recv calls (include sendmmsg and recvmmsg)
+can be used to send and receive messages from the KCM socket.
+
+Socket types
+------------
+
+KCM supports SOCK_DGRAM and SOCK_SEQPACKET socket types.
+
+Message delineation
+-------------------
+
+Messages are sent over a TCP stream with some application protocol message
+format that typically includes a header which frames the messages. The length
+of a received message can be deduced from the application protocol header
+(often just a simple length field).
+
+A TCP stream must be parsed to determine message boundaries. Berkeley Packet
+Filter (BPF) is used for this. When attaching a TCP socket to a multiplexor a
+BPF program must be specified. The program is called at the start of receiving
+a new message and is given an skbuff that contains the bytes received so far.
+It parses the message header and returns the length of the message. Given this
+information, KCM will construct the message of the stated length and deliver it
+to a KCM socket.
+
+TCP socket management
+---------------------
+
+When a TCP socket is attached to a KCM multiplexor data ready (POLLIN) and
+write space available (POLLOUT) events are handled by the multiplexor. If there
+is a state change (disconnection) or other error on a TCP socket, an error is
+posted on the TCP socket so that a POLLERR event happens and KCM discontinues
+using the socket. When the application gets the error notification for a
+TCP socket, it should unattach the socket from KCM and then handle the error
+condition (the typical response is to close the socket and create a new
+connection if necessary).
+
+KCM limits the maximum receive message size to be the size of the receive
+socket buffer on the attached TCP socket (the socket buffer size can be set by
+SO_RCVBUF). If the length of a new message reported by the BPF program is
+greater than this limit a corresponding error (EMSGSIZE) is posted on the TCP
+socket. The BPF program may also enforce a maximum messages size and report an
+error when it is exceeded.
+
+A timeout may be set for assembling messages on a receive socket. The timeout
+value is taken from the receive timeout of the attached TCP socket (this is set
+by SO_RCVTIMEO). If the timer expires before assembly is complete an error
+(ETIMEDOUT) is posted on the socket.
+
+User interface
+==============
+
+Creating a multiplexor
+----------------------
+
+A new multiplexor and initial KCM socket is created by a socket call:
+
+ socket(AF_KCM, type, protocol)
+
+ - type is either SOCK_DGRAM or SOCK_SEQPACKET
+ - protocol is KCMPROTO_CONNECTED
+
+Cloning KCM sockets
+-------------------
+
+After the first KCM socket is created using the socket call as described
+above, additional sockets for the multiplexor can be created by cloning
+a KCM socket. This is accomplished by an ioctl on a KCM socket:
+
+ /* From linux/kcm.h */
+ struct kcm_clone {
+ int fd;
+ };
+
+ struct kcm_clone info;
+
+ memset(&info, 0, sizeof(info));
+
+ err = ioctl(kcmfd, SIOCKCMCLONE, &info);
+
+ if (!err)
+ newkcmfd = info.fd;
+
+Attach transport sockets
+------------------------
+
+Attaching of transport sockets to a multiplexor is performed by calling an
+ioctl on a KCM socket for the multiplexor. e.g.:
+
+ /* From linux/kcm.h */
+ struct kcm_attach {
+ int fd;
+ int bpf_fd;
+ };
+
+ struct kcm_attach info;
+
+ memset(&info, 0, sizeof(info));
+
+ info.fd = tcpfd;
+ info.bpf_fd = bpf_prog_fd;
+
+ ioctl(kcmfd, SIOCKCMATTACH, &info);
+
+The kcm_attach structure contains:
+ fd: file descriptor for TCP socket being attached
+ bpf_prog_fd: file descriptor for compiled BPF program downloaded
+
+Unattach transport sockets
+--------------------------
+
+Unattaching a transport socket from a multiplexor is straightforward. An
+"unattach" ioctl is done with the kcm_unattach structure as the argument:
+
+ /* From linux/kcm.h */
+ struct kcm_unattach {
+ int fd;
+ };
+
+ struct kcm_unattach info;
+
+ memset(&info, 0, sizeof(info));
+
+ info.fd = cfd;
+
+ ioctl(fd, SIOCKCMUNATTACH, &info);
+
+Disabling receive on KCM socket
+-------------------------------
+
+A setsockopt is used to disable or enable receiving on a KCM socket.
+When receive is disabled, any pending messages in the socket's
+receive buffer are moved to other sockets. This feature is useful
+if an application thread knows that it will be doing a lot of
+work on a request and won't be able to service new messages for a
+while. Example use:
+
+ int val = 1;
+
+ setsockopt(kcmfd, SOL_KCM, KCM_RECV_DISABLE, &val, sizeof(val))
+
+BFP programs for message delineation
+------------------------------------
+
+BPF programs can be compiled using the BPF LLVM backend. For exmple,
+the BPF program for parsing Thrift is:
+
+ #include "bpf.h" /* for __sk_buff */
+ #include "bpf_helpers.h" /* for load_word intrinsic */
+
+ SEC("socket_kcm")
+ int bpf_prog1(struct __sk_buff *skb)
+ {
+ return load_word(skb, 0) + 4;
+ }
+
+ char _license[] SEC("license") = "GPL";
+
+Use in applications
+===================
+
+KCM accelerates application layer protocols. Specifically, it allows
+applications to use a message based interface for sending and receiving
+messages. The kernel provides necessary assurances that messages are sent
+and received atomically. This relieves much of the burden applications have
+in mapping a message based protocol onto the TCP stream. KCM also make
+application layer messages a unit of work in the kernel for the purposes of
+steerng and scheduling, which in turn allows a simpler networking model in
+multithreaded applications.
+
+Configurations
+--------------
+
+In an Nx1 configuration, KCM logically provides multiple socket handles
+to the same TCP connection. This allows parallelism between in I/O
+operations on the TCP socket (for instance copyin and copyout of data is
+parallelized). In an application, a KCM socket can be opened for each
+processing thread and inserted into the epoll (similar to how SO_REUSEPORT
+is used to allow multiple listener sockets on the same port).
+
+In a MxN configuration, multiple connections are established to the
+same destination. These are used for simple load balancing.
+
+Message batching
+----------------
+
+The primary purpose of KCM is load balancing between KCM sockets and hence
+threads in a nominal use case. Perfect load balancing, that is steering
+each received message to a different KCM socket or steering each sent
+message to a different TCP socket, can negatively impact performance
+since this doesn't allow for affinities to be established. Balancing
+based on groups, or batches of messages, can be beneficial for performance.
+
+On transmit, there are three ways an application can batch (pipeline)
+messages on a KCM socket.
+ 1) Send multiple messages in a single sendmmsg.
+ 2) Send a group of messages each with a sendmsg call, where all messages
+ except the last have MSG_BATCH in the flags of sendmsg call.
+ 3) Create "super message" composed of multiple messages and send this
+ with a single sendmsg.
+
+On receive, the KCM module attempts to queue messages received on the
+same KCM socket during each TCP ready callback. The targeted KCM socket
+changes at each receive ready callback on the KCM socket. The application
+does not need to configure this.
+
+Error handling
+--------------
+
+An application should include a thread to monitor errors raised on
+the TCP connection. Normally, this will be done by placing each
+TCP socket attached to a KCM multiplexor in epoll set for POLLERR
+event. If an error occurs on an attached TCP socket, KCM sets an EPIPE
+on the socket thus waking up the application thread. When the application
+sees the error (which may just be a disconnect) it should unattach the
+socket from KCM and then close it. It is assumed that once an error is
+posted on the TCP socket the data stream is unrecoverable (i.e. an error
+may have occurred in in the middle of receiving a messssge).
+
+TCP connection monitoring
+-------------------------
+
+In KCM there is no means to correlate a message to the TCP socket that
+was used to send or receive the message (except in the case there is
+only one attached TCP socket). However, the application does retain
+an open file descriptor to the socket so it will be able to get statistics
+from the socket which can be used in detecting issues (such as high
+retransmissions on the socket).
diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
index 9f9ec9f76039..4e4b6f10d841 100644
--- a/Documentation/watchdog/watchdog-parameters.txt
+++ b/Documentation/watchdog/watchdog-parameters.txt
@@ -400,3 +400,7 @@ wm8350_wdt:
nowayout: Watchdog cannot be stopped once started
(default=kernel config parameter)
-------------------------------------------------
+sun4v_wdt:
+timeout_ms: Watchdog timeout in milliseconds 1..180000, default=60000)
+nowayout: Watchdog cannot be stopped once started
+-------------------------------------------------
diff --git a/MAINTAINERS b/MAINTAINERS
index be0b56b38720..1663ad41803c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -920,17 +920,24 @@ M: Emilio López <emilio@elopez.com.ar>
S: Maintained
F: drivers/clk/sunxi/
-ARM/Amlogic MesonX SoC support
+ARM/Amlogic Meson SoC support
M: Carlo Caione <carlo@caione.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L: linux-meson@googlegroups.com
+W: http://linux-meson.com/
S: Maintained
-F: drivers/media/rc/meson-ir.c
-N: meson[x68]
+F: arch/arm/mach-meson/
+F: arch/arm/boot/dts/meson*
+N: meson
ARM/Annapurna Labs ALPINE ARCHITECTURE
M: Tsahee Zidenberg <tsahee@annapurnalabs.com>
+M: Antoine Tenart <antoine.tenart@free-electrons.com>
S: Maintained
F: arch/arm/mach-alpine/
+F: arch/arm/boot/dts/alpine*
+F: arch/arm64/boot/dts/al/
+F: drivers/*/*alpine*
ARM/ATMEL AT91RM9200, AT91SAM9 AND SAMA5 SOC SUPPORT
M: Nicolas Ferre <nicolas.ferre@atmel.com>
@@ -3446,7 +3453,6 @@ F: drivers/usb/dwc2/
DESIGNWARE USB3 DRD IP DRIVER
M: Felipe Balbi <balbi@kernel.org>
L: linux-usb@vger.kernel.org
-L: linux-omap@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
S: Maintained
F: drivers/usb/dwc3/
@@ -4522,6 +4528,12 @@ L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: drivers/dma/fsldma.*
+FREESCALE GPMI NAND DRIVER
+M: Han Xu <han.xu@nxp.com>
+L: linux-mtd@lists.infradead.org
+S: Maintained
+F: drivers/mtd/nand/gpmi-nand/*
+
FREESCALE I2C CPM DRIVER
M: Jochen Friedrich <jochen@scram.de>
L: linuxppc-dev@lists.ozlabs.org
@@ -4538,7 +4550,7 @@ F: include/linux/platform_data/video-imxfb.h
F: drivers/video/fbdev/imxfb.c
FREESCALE QUAD SPI DRIVER
-M: Han Xu <han.xu@freescale.com>
+M: Han Xu <han.xu@nxp.com>
L: linux-mtd@lists.infradead.org
S: Maintained
F: drivers/mtd/spi-nor/fsl-quadspi.c
@@ -4552,6 +4564,15 @@ S: Maintained
F: drivers/net/ethernet/freescale/fs_enet/
F: include/linux/fs_enet_pd.h
+FREESCALE IMX / MXC FEC DRIVER
+M: Fugang Duan <fugang.duan@nxp.com>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/ethernet/freescale/fec_main.c
+F: drivers/net/ethernet/freescale/fec_ptp.c
+F: drivers/net/ethernet/freescale/fec.h
+F: Documentation/devicetree/bindings/net/fsl-fec.txt
+
FREESCALE QUICC ENGINE LIBRARY
L: linuxppc-dev@lists.ozlabs.org
S: Orphan
@@ -6769,6 +6790,7 @@ S: Maintained
F: Documentation/networking/mac80211-injection.txt
F: include/net/mac80211.h
F: net/mac80211/
+F: drivers/net/wireless/mac80211_hwsim.[ch]
MACVLAN DRIVER
M: Patrick McHardy <kaber@trash.net>
@@ -7013,6 +7035,13 @@ F: include/uapi/linux/meye.h
F: include/uapi/linux/ivtv*
F: include/uapi/linux/uvcvideo.h
+MEDIATEK ETHERNET DRIVER
+M: Felix Fietkau <nbd@openwrt.org>
+M: John Crispin <blogic@openwrt.org>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/ethernet/mediatek/
+
MEDIATEK MT7601U WIRELESS LAN DRIVER
M: Jakub Kicinski <kubakici@wp.pl>
L: linux-wireless@vger.kernel.org
@@ -7365,7 +7394,7 @@ F: drivers/tty/isicom.c
F: include/linux/isicom.h
MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
-M: Felipe Balbi <balbi@kernel.org>
+M: Bin Liu <b-liu@ti.com>
L: linux-usb@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
S: Maintained
@@ -7696,13 +7725,13 @@ S: Maintained
F: arch/nios2/
NOKIA N900 POWER SUPPLY DRIVERS
-M: Pali Rohár <pali.rohar@gmail.com>
-S: Maintained
+R: Pali Rohár <pali.rohar@gmail.com>
F: include/linux/power/bq2415x_charger.h
F: include/linux/power/bq27xxx_battery.h
F: include/linux/power/isp1704_charger.h
F: drivers/power/bq2415x_charger.c
F: drivers/power/bq27xxx_battery.c
+F: drivers/power/bq27xxx_battery_i2c.c
F: drivers/power/isp1704_charger.c
F: drivers/power/rx51_battery.c
@@ -7933,11 +7962,9 @@ F: drivers/media/platform/omap3isp/
F: drivers/staging/media/omap4iss/
OMAP USB SUPPORT
-M: Felipe Balbi <balbi@kernel.org>
L: linux-usb@vger.kernel.org
L: linux-omap@vger.kernel.org
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
-S: Maintained
+S: Orphan
F: drivers/usb/*/*omap*
F: arch/arm/*omap*/usb*
@@ -9572,6 +9599,12 @@ M: Andreas Noever <andreas.noever@gmail.com>
S: Maintained
F: drivers/thunderbolt/
+TI BQ27XXX POWER SUPPLY DRIVER
+R: Andrew F. Davis <afd@ti.com>
+F: include/linux/power/bq27xxx_battery.h
+F: drivers/power/bq27xxx_battery.c
+F: drivers/power/bq27xxx_battery_i2c.c
+
TIMEKEEPING, CLOCKSOURCE CORE, NTP, ALARMTIMER
M: John Stultz <john.stultz@linaro.org>
M: Thomas Gleixner <tglx@linutronix.de>
diff --git a/Makefile b/Makefile
index fbe1b921798f..2d519d2fb3a9 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 5
SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc7
NAME = Blurry Fish Butt
# *DOCUMENTATION*
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 0655495470ad..8a188bc1786a 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -12,8 +12,6 @@ config ARC
select BUILDTIME_EXTABLE_SORT
select COMMON_CLK
select CLONE_BACKWARDS
- # ARC Busybox based initramfs absolutely relies on DEVTMPFS for /dev
- select DEVTMPFS if !INITRAMFS_SOURCE=""
select GENERIC_ATOMIC64
select GENERIC_CLOCKEVENTS
select GENERIC_FIND_FIRST_BIT
@@ -275,14 +273,6 @@ config ARC_DCCM_BASE
default "0xA0000000"
depends on ARC_HAS_DCCM
-config ARC_HAS_HW_MPY
- bool "Use Hardware Multiplier (Normal or Faster XMAC)"
- default y
- help
- Influences how gcc generates code for MPY operations.
- If enabled, MPYxx insns are generated, provided by Standard/XMAC
- Multipler. Otherwise software multipy lib is used
-
choice
prompt "MMU Version"
default ARC_MMU_V3 if ARC_CPU_770
@@ -542,14 +532,6 @@ config ARC_DBG_TLB_MISS_COUNT
Counts number of I and D TLB Misses and exports them via Debugfs
The counters can be cleared via Debugfs as well
-if SMP
-
-config ARC_IPI_DBG
- bool "Debug Inter Core interrupts"
- default n
-
-endif
-
endif
config ARC_UBOOT_SUPPORT
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index aeb19021099e..c8230f3395f2 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -74,10 +74,6 @@ ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB
# --build-id w/o "-marclinux". Default arc-elf32-ld is OK
ldflags-$(upto_gcc44) += -marclinux
-ifndef CONFIG_ARC_HAS_HW_MPY
- cflags-y += -mno-mpy
-endif
-
LIBGCC := $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
# Modules with short calls might break for calls into builtin-kernel
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index f1ac9818b751..5d4e2a07ad3e 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -39,6 +39,7 @@ CONFIG_IP_PNP_RARP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -73,7 +74,6 @@ CONFIG_I2C_CHARDEV=y
CONFIG_I2C_DESIGNWARE_PLATFORM=y
# CONFIG_HWMON is not set
CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
CONFIG_LOGO=y
@@ -91,12 +91,10 @@ CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_DW=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 323486d6ee83..87ee46b237ef 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -39,14 +39,10 @@ CONFIG_IP_PNP_RARP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_AXS=y
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_NETDEVICES=y
@@ -78,14 +74,12 @@ CONFIG_I2C_CHARDEV=y
CONFIG_I2C_DESIGNWARE_PLATFORM=y
# CONFIG_HWMON is not set
CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
# CONFIG_LOGO_LINUX_CLUT224 is not set
-CONFIG_USB=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_HCD_PLATFORM=y
CONFIG_USB_OHCI_HCD=y
@@ -97,12 +91,10 @@ CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_DW=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 66191cd0447e..d80daf4f7e73 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -40,14 +40,10 @@ CONFIG_IP_PNP_RARP=y
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_AXS=y
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_NETDEVICES=y
@@ -79,14 +75,12 @@ CONFIG_I2C_CHARDEV=y
CONFIG_I2C_DESIGNWARE_PLATFORM=y
# CONFIG_HWMON is not set
CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
# CONFIG_LOGO_LINUX_CLUT224 is not set
-CONFIG_USB=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_HCD_PLATFORM=y
CONFIG_USB_OHCI_HCD=y
@@ -98,12 +92,10 @@ CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_DW=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXT3_FS=y
-CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index 138f9d887957..f41095340b6a 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -4,6 +4,7 @@ CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -26,7 +27,6 @@ CONFIG_ARC_PLAT_SIM=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsim_700"
CONFIG_PREEMPT=y
# CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -34,6 +34,7 @@ CONFIG_UNIX_DIAG=y
CONFIG_NET_KEY=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -51,7 +52,6 @@ CONFIG_SERIAL_ARC=y
CONFIG_SERIAL_ARC_CONSOLE=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_IOMMU_SUPPORT is not set
@@ -63,4 +63,3 @@ CONFIG_NFS_FS=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
# CONFIG_DEBUG_PREEMPT is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index f68838e8068a..cfaa33cb5921 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -35,6 +35,7 @@ CONFIG_UNIX_DIAG=y
CONFIG_NET_KEY=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -49,7 +50,6 @@ CONFIG_SERIAL_ARC=y
CONFIG_SERIAL_ARC_CONSOLE=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_IOMMU_SUPPORT is not set
@@ -61,4 +61,3 @@ CONFIG_NFS_FS=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
# CONFIG_DEBUG_PREEMPT is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index 96bd1c20fb0b..bb2a8dc778b5 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -2,6 +2,7 @@ CONFIG_CROSS_COMPILE="arc-linux-"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
@@ -21,13 +22,11 @@ CONFIG_MODULES=y
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARC_PLAT_SIM=y
-CONFIG_ARC_BOARD_ML509=y
CONFIG_ISA_ARCV2=y
CONFIG_SMP=y
CONFIG_ARC_BUILTIN_DTB_NAME="nsim_hs_idu"
CONFIG_PREEMPT=y
# CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -35,6 +34,7 @@ CONFIG_UNIX_DIAG=y
CONFIG_NET_KEY=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -49,7 +49,6 @@ CONFIG_SERIAL_ARC=y
CONFIG_SERIAL_ARC_CONSOLE=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_IOMMU_SUPPORT is not set
@@ -60,4 +59,3 @@ CONFIG_TMPFS=y
CONFIG_NFS_FS=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 31e1d95764ff..646182e93753 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -33,6 +33,7 @@ CONFIG_UNIX_DIAG=y
CONFIG_NET_KEY=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -58,7 +59,6 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_HID is not set
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index fcae66683ca0..ceca2541950d 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -34,12 +34,12 @@ CONFIG_UNIX_DIAG=y
CONFIG_NET_KEY=y
CONFIG_INET=y
# CONFIG_IPV6 is not set
+CONFIG_DEVTMPFS=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FIRMWARE_IN_KERNEL is not set
# CONFIG_BLK_DEV is not set
CONFIG_NETDEVICES=y
-CONFIG_NET_OSCI_LAN=y
CONFIG_INPUT_EVDEV=y
# CONFIG_MOUSE_PS2_ALPS is not set
# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
@@ -58,7 +58,6 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_FB=y
-# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_HID is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index b01b659168ea..4b6da90f6f26 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -2,6 +2,7 @@ CONFIG_CROSS_COMPILE="arc-linux-"
CONFIG_DEFAULT_HOSTNAME="ARCLinux"
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
@@ -18,15 +19,11 @@ CONFIG_MODULES=y
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARC_PLAT_SIM=y
-CONFIG_ARC_BOARD_ML509=y
CONFIG_ISA_ARCV2=y
CONFIG_SMP=y
-CONFIG_ARC_HAS_LL64=y
-# CONFIG_ARC_HAS_RTSC is not set
CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs_idu"
CONFIG_PREEMPT=y
# CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=y
@@ -40,6 +37,7 @@ CONFIG_INET=y
# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
# CONFIG_FIRMWARE_IN_KERNEL is not set
@@ -56,14 +54,11 @@ CONFIG_NETDEVICES=y
# CONFIG_NET_VENDOR_STMICRO is not set
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
-CONFIG_NET_OSCI_LAN=y
# CONFIG_WLAN is not set
CONFIG_INPUT_EVDEV=y
CONFIG_MOUSE_PS2_TOUCHKIT=y
# CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIO_LIBPS2=y
CONFIG_SERIO_ARC_PS2=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
@@ -75,9 +70,6 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_FB=y
-CONFIG_ARCPGU_RGB888=y
-CONFIG_ARCPGU_DISPTYPE=0
-# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_HID is not set
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index 3b4dc9cebcf1..9b342eaf95ae 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -3,6 +3,7 @@ CONFIG_CROSS_COMPILE="arc-linux-"
CONFIG_DEFAULT_HOSTNAME="tb10x"
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
@@ -26,12 +27,10 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_BLOCK is not set
CONFIG_ARC_PLAT_TB10X=y
CONFIG_ARC_CACHE_LINE_SHIFT=5
-CONFIG_ARC_STACK_NONEXEC=y
CONFIG_HZ=250
CONFIG_ARC_BUILTIN_DTB_NAME="abilis_tb100_dvk"
CONFIG_PREEMPT_VOLUNTARY=y
# CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -44,8 +43,8 @@ CONFIG_IP_MULTICAST=y
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
# CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_NETDEVICES=y
# CONFIG_NET_CADENCE is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
@@ -55,9 +54,6 @@ CONFIG_NETDEVICES=y
# CONFIG_NET_VENDOR_NATSEMI is not set
# CONFIG_NET_VENDOR_SEEQ is not set
CONFIG_STMMAC_ETH=y
-CONFIG_STMMAC_DEBUG_FS=y
-CONFIG_STMMAC_DA=y
-CONFIG_STMMAC_CHAINED=y
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_WLAN is not set
# CONFIG_INPUT is not set
@@ -91,7 +87,6 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_LEDS_TRIGGER_TRANSIENT=y
CONFIG_DMADEVICES=y
CONFIG_DW_DMAC=y
-CONFIG_NET_DMA=y
CONFIG_ASYNC_TX_DMA=y
# CONFIG_IOMMU_SUPPORT is not set
# CONFIG_DNOTIFY is not set
@@ -100,17 +95,16 @@ CONFIG_TMPFS=y
CONFIG_CONFIGFS_FS=y
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_DEBUG_INFO=y
# CONFIG_ENABLE_WARN_DEPRECATED is not set
-CONFIG_MAGIC_SYSRQ=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_DEBUG_FS=y
CONFIG_HEADERS_CHECK=y
CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_SCHEDSTATS=y
CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_MEMORY_INIT=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index fdc5be5b1029..f9f4c6f59fdb 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -10,7 +10,8 @@
#define _ASM_ARC_ARCREGS_H
/* Build Configuration Registers */
-#define ARC_REG_DCCMBASE_BCR 0x61 /* DCCM Base Addr */
+#define ARC_REG_AUX_DCCM 0x18 /* DCCM Base Addr ARCv2 */
+#define ARC_REG_DCCM_BASE_BUILD 0x61 /* DCCM Base Addr ARCompact */
#define ARC_REG_CRC_BCR 0x62
#define ARC_REG_VECBASE_BCR 0x68
#define ARC_REG_PERIBASE_BCR 0x69
@@ -18,10 +19,10 @@
#define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */
#define ARC_REG_FP_V2_BCR 0xc8 /* ARCv2 FPU */
#define ARC_REG_SLC_BCR 0xce
-#define ARC_REG_DCCM_BCR 0x74 /* DCCM Present + SZ */
+#define ARC_REG_DCCM_BUILD 0x74 /* DCCM size (common) */
#define ARC_REG_TIMERS_BCR 0x75
#define ARC_REG_AP_BCR 0x76
-#define ARC_REG_ICCM_BCR 0x78
+#define ARC_REG_ICCM_BUILD 0x78 /* ICCM size (common) */
#define ARC_REG_XY_MEM_BCR 0x79
#define ARC_REG_MAC_BCR 0x7a
#define ARC_REG_MUL_BCR 0x7b
@@ -36,6 +37,7 @@
#define ARC_REG_IRQ_BCR 0xF3
#define ARC_REG_SMART_BCR 0xFF
#define ARC_REG_CLUSTER_BCR 0xcf
+#define ARC_REG_AUX_ICCM 0x208 /* ICCM Base Addr (ARCv2) */
/* status32 Bits Positions */
#define STATUS_AE_BIT 5 /* Exception active */
@@ -246,7 +248,7 @@ struct bcr_perip {
#endif
};
-struct bcr_iccm {
+struct bcr_iccm_arcompact {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int base:16, pad:5, sz:3, ver:8;
#else
@@ -254,17 +256,15 @@ struct bcr_iccm {
#endif
};
-/* DCCM Base Address Register: ARC_REG_DCCMBASE_BCR */
-struct bcr_dccm_base {
+struct bcr_iccm_arcv2 {
#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int addr:24, ver:8;
+ unsigned int pad:8, sz11:4, sz01:4, sz10:4, sz00:4, ver:8;
#else
- unsigned int ver:8, addr:24;
+ unsigned int ver:8, sz00:4, sz10:4, sz01:4, sz11:4, pad:8;
#endif
};
-/* DCCM RAM Configuration Register: ARC_REG_DCCM_BCR */
-struct bcr_dccm {
+struct bcr_dccm_arcompact {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int res:21, sz:3, ver:8;
#else
@@ -272,6 +272,14 @@ struct bcr_dccm {
#endif
};
+struct bcr_dccm_arcv2 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad2:12, cyc:3, pad1:1, sz1:4, sz0:4, ver:8;
+#else
+ unsigned int ver:8, sz0:4, sz1:4, pad1:1, cyc:3, pad2:12;
+#endif
+};
+
/* ARCompact: Both SP and DP FPU BCRs have same format */
struct bcr_fp_arcompact {
#ifdef CONFIG_CPU_BIG_ENDIAN
@@ -315,9 +323,9 @@ struct bcr_bpu_arcv2 {
struct bcr_generic {
#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:24, ver:8;
+ unsigned int info:24, ver:8;
#else
- unsigned int ver:8, pad:24;
+ unsigned int ver:8, info:24;
#endif
};
diff --git a/arch/arc/include/asm/irq.h b/arch/arc/include/asm/irq.h
index 4fd7d62a6e30..49014f0ef36d 100644
--- a/arch/arc/include/asm/irq.h
+++ b/arch/arc/include/asm/irq.h
@@ -16,11 +16,9 @@
#ifdef CONFIG_ISA_ARCOMPACT
#define TIMER0_IRQ 3
#define TIMER1_IRQ 4
-#define IPI_IRQ (NR_CPU_IRQS-1) /* dummy to enable SMP build for up hardware */
#else
#define TIMER0_IRQ 16
#define TIMER1_IRQ 17
-#define IPI_IRQ 19
#endif
#include <linux/interrupt.h>
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index 1fc18ee06cf2..37c2f751eebf 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -22,6 +22,7 @@
#define AUX_IRQ_CTRL 0x00E
#define AUX_IRQ_ACT 0x043 /* Active Intr across all levels */
#define AUX_IRQ_LVL_PEND 0x200 /* Pending Intr across all levels */
+#define AUX_IRQ_HINT 0x201 /* For generating Soft Interrupts */
#define AUX_IRQ_PRIORITY 0x206
#define ICAUSE 0x40a
#define AUX_IRQ_SELECT 0x40b
@@ -115,6 +116,16 @@ static inline int arch_irqs_disabled(void)
return arch_irqs_disabled_flags(arch_local_save_flags());
}
+static inline void arc_softirq_trigger(int irq)
+{
+ write_aux_reg(AUX_IRQ_HINT, irq);
+}
+
+static inline void arc_softirq_clear(int irq)
+{
+ write_aux_reg(AUX_IRQ_HINT, 0);
+}
+
#else
.macro IRQ_DISABLE scratch
diff --git a/arch/arc/kernel/entry-arcv2.S b/arch/arc/kernel/entry-arcv2.S
index b17830294706..c1264607bbff 100644
--- a/arch/arc/kernel/entry-arcv2.S
+++ b/arch/arc/kernel/entry-arcv2.S
@@ -45,11 +45,12 @@ VECTOR reserved ; Reserved slots
VECTOR handle_interrupt ; (16) Timer0
VECTOR handle_interrupt ; unused (Timer1)
VECTOR handle_interrupt ; unused (WDT)
-VECTOR handle_interrupt ; (19) ICI (inter core interrupt)
-VECTOR handle_interrupt
-VECTOR handle_interrupt
-VECTOR handle_interrupt
-VECTOR handle_interrupt ; (23) End of fixed IRQs
+VECTOR handle_interrupt ; (19) Inter core Interrupt (IPI)
+VECTOR handle_interrupt ; (20) perf Interrupt
+VECTOR handle_interrupt ; (21) Software Triggered Intr (Self IPI)
+VECTOR handle_interrupt ; unused
+VECTOR handle_interrupt ; (23) unused
+# End of fixed IRQs
.rept CONFIG_ARC_NUMBER_OF_INTERRUPTS - 8
VECTOR handle_interrupt
diff --git a/arch/arc/kernel/intc-compact.c b/arch/arc/kernel/intc-compact.c
index 06bcedf19b62..224d1c3aa9c4 100644
--- a/arch/arc/kernel/intc-compact.c
+++ b/arch/arc/kernel/intc-compact.c
@@ -81,9 +81,6 @@ static int arc_intc_domain_map(struct irq_domain *d, unsigned int irq,
{
switch (irq) {
case TIMER0_IRQ:
-#ifdef CONFIG_SMP
- case IPI_IRQ:
-#endif
irq_set_chip_and_handler(irq, &onchip_intc, handle_percpu_irq);
break;
default:
diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c
index bc771f58fefb..c41c364b926c 100644
--- a/arch/arc/kernel/mcip.c
+++ b/arch/arc/kernel/mcip.c
@@ -11,9 +11,13 @@
#include <linux/smp.h>
#include <linux/irq.h>
#include <linux/spinlock.h>
+#include <asm/irqflags-arcv2.h>
#include <asm/mcip.h>
#include <asm/setup.h>
+#define IPI_IRQ 19
+#define SOFTIRQ_IRQ 21
+
static char smp_cpuinfo_buf[128];
static int idu_detected;
@@ -22,6 +26,7 @@ static DEFINE_RAW_SPINLOCK(mcip_lock);
static void mcip_setup_per_cpu(int cpu)
{
smp_ipi_irq_setup(cpu, IPI_IRQ);
+ smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ);
}
static void mcip_ipi_send(int cpu)
@@ -29,46 +34,44 @@ static void mcip_ipi_send(int cpu)
unsigned long flags;
int ipi_was_pending;
+ /* ARConnect can only send IPI to others */
+ if (unlikely(cpu == raw_smp_processor_id())) {
+ arc_softirq_trigger(SOFTIRQ_IRQ);
+ return;
+ }
+
+ raw_spin_lock_irqsave(&mcip_lock, flags);
+
/*
- * NOTE: We must spin here if the other cpu hasn't yet
- * serviced a previous message. This can burn lots
- * of time, but we MUST follows this protocol or
- * ipi messages can be lost!!!
- * Also, we must release the lock in this loop because
- * the other side may get to this same loop and not
- * be able to ack -- thus causing deadlock.
+ * If receiver already has a pending interrupt, elide sending this one.
+ * Linux cross core calling works well with concurrent IPIs
+ * coalesced into one
+ * see arch/arc/kernel/smp.c: ipi_send_msg_one()
*/
+ __mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
+ ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
+ if (!ipi_was_pending)
+ __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
- do {
- raw_spin_lock_irqsave(&mcip_lock, flags);
- __mcip_cmd(CMD_INTRPT_READ_STATUS, cpu);
- ipi_was_pending = read_aux_reg(ARC_REG_MCIP_READBACK);
- if (ipi_was_pending == 0)
- break; /* break out but keep lock */
- raw_spin_unlock_irqrestore(&mcip_lock, flags);
- } while (1);
-
- __mcip_cmd(CMD_INTRPT_GENERATE_IRQ, cpu);
raw_spin_unlock_irqrestore(&mcip_lock, flags);
-
-#ifdef CONFIG_ARC_IPI_DBG
- if (ipi_was_pending)
- pr_info("IPI ACK delayed from cpu %d\n", cpu);
-#endif
}
static void mcip_ipi_clear(int irq)
{
unsigned int cpu, c;
unsigned long flags;
- unsigned int __maybe_unused copy;
+
+ if (unlikely(irq == SOFTIRQ_IRQ)) {
+ arc_softirq_clear(irq);
+ return;
+ }
raw_spin_lock_irqsave(&mcip_lock, flags);
/* Who sent the IPI */
__mcip_cmd(CMD_INTRPT_CHECK_SOURCE, 0);
- copy = cpu = read_aux_reg(ARC_REG_MCIP_READBACK); /* 1,2,4,8... */
+ cpu = read_aux_reg(ARC_REG_MCIP_READBACK); /* 1,2,4,8... */
/*
* In rare case, multiple concurrent IPIs sent to same target can
@@ -82,12 +85,6 @@ static void mcip_ipi_clear(int irq)
} while (cpu);
raw_spin_unlock_irqrestore(&mcip_lock, flags);
-
-#ifdef CONFIG_ARC_IPI_DBG
- if (c != __ffs(copy))
- pr_info("IPIs from %x coalesced to %x\n",
- copy, raw_smp_processor_id());
-#endif
}
static void mcip_probe_n_setup(void)
@@ -111,10 +108,11 @@ static void mcip_probe_n_setup(void)
READ_BCR(ARC_REG_MCIP_BCR, mp);
sprintf(smp_cpuinfo_buf,
- "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s\n",
+ "Extn [SMP]\t: ARConnect (v%d): %d cores with %s%s%s%s%s\n",
mp.ver, mp.num_cores,
IS_AVAIL1(mp.ipi, "IPI "),
IS_AVAIL1(mp.idu, "IDU "),
+ IS_AVAIL1(mp.llm, "LLM "),
IS_AVAIL1(mp.dbg, "DEBUG "),
IS_AVAIL1(mp.gfrc, "GFRC"));
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index a7edceba5f84..cdc821df1809 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -42,6 +42,53 @@ struct task_struct *_current_task[NR_CPUS]; /* For stack switching */
struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
+static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+{
+ if (is_isa_arcompact()) {
+ struct bcr_iccm_arcompact iccm;
+ struct bcr_dccm_arcompact dccm;
+
+ READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+ if (iccm.ver) {
+ cpu->iccm.sz = 4096 << iccm.sz; /* 8K to 512K */
+ cpu->iccm.base_addr = iccm.base << 16;
+ }
+
+ READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+ if (dccm.ver) {
+ unsigned long base;
+ cpu->dccm.sz = 2048 << dccm.sz; /* 2K to 256K */
+
+ base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+ cpu->dccm.base_addr = base & ~0xF;
+ }
+ } else {
+ struct bcr_iccm_arcv2 iccm;
+ struct bcr_dccm_arcv2 dccm;
+ unsigned long region;
+
+ READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+ if (iccm.ver) {
+ cpu->iccm.sz = 256 << iccm.sz00; /* 512B to 16M */
+ if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+ cpu->iccm.sz <<= iccm.sz01;
+
+ region = read_aux_reg(ARC_REG_AUX_ICCM);
+ cpu->iccm.base_addr = region & 0xF0000000;
+ }
+
+ READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+ if (dccm.ver) {
+ cpu->dccm.sz = 256 << dccm.sz0;
+ if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+ cpu->dccm.sz <<= dccm.sz1;
+
+ region = read_aux_reg(ARC_REG_AUX_DCCM);
+ cpu->dccm.base_addr = region & 0xF0000000;
+ }
+ }
+}
+
static void read_arc_build_cfg_regs(void)
{
struct bcr_perip uncached_space;
@@ -76,36 +123,11 @@ static void read_arc_build_cfg_regs(void)
cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0; /* 1,3 */
cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0;
cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */
-
- /* Note that we read the CCM BCRs independent of kernel config
- * This is to catch the cases where user doesn't know that
- * CCMs are present in hardware build
- */
- {
- struct bcr_iccm iccm;
- struct bcr_dccm dccm;
- struct bcr_dccm_base dccm_base;
- unsigned int bcr_32bit_val;
-
- bcr_32bit_val = read_aux_reg(ARC_REG_ICCM_BCR);
- if (bcr_32bit_val) {
- iccm = *((struct bcr_iccm *)&bcr_32bit_val);
- cpu->iccm.base_addr = iccm.base << 16;
- cpu->iccm.sz = 0x2000 << (iccm.sz - 1);
- }
-
- bcr_32bit_val = read_aux_reg(ARC_REG_DCCM_BCR);
- if (bcr_32bit_val) {
- dccm = *((struct bcr_dccm *)&bcr_32bit_val);
- cpu->dccm.sz = 0x800 << (dccm.sz);
-
- READ_BCR(ARC_REG_DCCMBASE_BCR, dccm_base);
- cpu->dccm.base_addr = dccm_base.addr << 8;
- }
- }
-
READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem);
+ /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
+ read_decode_ccm_bcr(cpu);
+
read_decode_mmu_bcr();
read_decode_cache_bcr();
@@ -237,8 +259,6 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt);
}
- n += scnprintf(buf + n, len - n, "%s",
- IS_USED_CFG(CONFIG_ARC_HAS_HW_MPY));
}
n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index ef6e9e15b82a..424e937da5c8 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c
@@ -336,11 +336,8 @@ irqreturn_t do_IPI(int irq, void *dev_id)
int rc;
rc = __do_IPI(msg);
-#ifdef CONFIG_ARC_IPI_DBG
- /* IPI received but no valid @msg */
if (rc)
pr_info("IPI with bogus msg %ld in %ld\n", msg, copy);
-#endif
pending &= ~(1U << msg);
} while (pending);
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 7a6a58ef8aaf..43788b1a1ac5 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -195,5 +195,7 @@ CFLAGS_font.o := -Dstatic=
$(obj)/font.c: $(FONTC)
$(call cmd,shipped)
+AFLAGS_hyp-stub.o := -Wa,-march=armv7-a
+
$(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
$(call cmd,shipped)
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index f3db13d2d90e..0cc150b87b86 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -285,8 +285,10 @@
};
};
+
+/include/ "tps65217.dtsi"
+
&tps {
- compatible = "ti,tps65217";
/*
* Configure pmic to enter OFF-state instead of SLEEP-state ("RTC-only
* mode") at poweroff. Most BeagleBone versions do not support RTC-only
@@ -307,17 +309,12 @@
ti,pmic-shutdown-controller;
regulators {
- #address-cells = <1>;
- #size-cells = <0>;
-
dcdc1_reg: regulator@0 {
- reg = <0>;
regulator-name = "vdds_dpr";
regulator-always-on;
};
dcdc2_reg: regulator@1 {
- reg = <1>;
/* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
regulator-name = "vdd_mpu";
regulator-min-microvolt = <925000>;
@@ -327,7 +324,6 @@
};
dcdc3_reg: regulator@2 {
- reg = <2>;
/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
regulator-name = "vdd_core";
regulator-min-microvolt = <925000>;
@@ -337,25 +333,21 @@
};
ldo1_reg: regulator@3 {
- reg = <3>;
regulator-name = "vio,vrtc,vdds";
regulator-always-on;
};
ldo2_reg: regulator@4 {
- reg = <4>;
regulator-name = "vdd_3v3aux";
regulator-always-on;
};
ldo3_reg: regulator@5 {
- reg = <5>;
regulator-name = "vdd_1v8";
regulator-always-on;
};
ldo4_reg: regulator@6 {
- reg = <6>;
regulator-name = "vdd_3v3a";
regulator-always-on;
};
diff --git a/arch/arm/boot/dts/am335x-chilisom.dtsi b/arch/arm/boot/dts/am335x-chilisom.dtsi
index fda457b07e15..857d9894103a 100644
--- a/arch/arm/boot/dts/am335x-chilisom.dtsi
+++ b/arch/arm/boot/dts/am335x-chilisom.dtsi
@@ -128,21 +128,16 @@
};
-&tps {
- compatible = "ti,tps65217";
+/include/ "tps65217.dtsi"
+&tps {
regulators {
- #address-cells = <1>;
- #size-cells = <0>;
-
dcdc1_reg: regulator@0 {
- reg = <0>;
regulator-name = "vdds_dpr";
regulator-always-on;
};
dcdc2_reg: regulator@1 {
- reg = <1>;
/* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
regulator-name = "vdd_mpu";
regulator-min-microvolt = <925000>;
@@ -152,7 +147,6 @@
};
dcdc3_reg: regulator@2 {
- reg = <2>;
/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
regulator-name = "vdd_core";
regulator-min-microvolt = <925000>;
@@ -162,28 +156,24 @@
};
ldo1_reg: regulator@3 {
- reg = <3>;
regulator-name = "vio,vrtc,vdds";
regulator-boot-on;
regulator-always-on;
};
ldo2_reg: regulator@4 {
- reg = <4>;
regulator-name = "vdd_3v3aux";
regulator-boot-on;
regulator-always-on;
};
ldo3_reg: regulator@5 {
- reg = <5>;
regulator-name = "vdd_1v8";
regulator-boot-on;
regulator-always-on;
};
ldo4_reg: regulator@6 {
- reg = <6>;
regulator-name = "vdd_3v3d";
regulator-boot-on;
regulator-always-on;
diff --git a/arch/arm/boot/dts/am335x-nano.dts b/arch/arm/boot/dts/am335x-nano.dts
index 77559a1ded60..f313999c503e 100644
--- a/arch/arm/boot/dts/am335x-nano.dts
+++ b/arch/arm/boot/dts/am335x-nano.dts
@@ -375,15 +375,11 @@
wp-gpios = <&gpio3 18 0>;
};
-&tps {
- compatible = "ti,tps65217";
+#include "tps65217.dtsi"
+&tps {
regulators {
- #address-cells = <1>;
- #size-cells = <0>;
-
dcdc1_reg: regulator@0 {
- reg = <0>;
/* +1.5V voltage with ±4% tolerance */
regulator-min-microvolt = <1450000>;
regulator-max-microvolt = <1550000>;
@@ -392,7 +388,6 @@
};
dcdc2_reg: regulator@1 {
- reg = <1>;
/* VDD_MPU voltage limits 0.95V - 1.1V with ±4% tolerance */
regulator-name = "vdd_mpu";
regulator-min-microvolt = <915000>;
@@ -402,7 +397,6 @@
};
dcdc3_reg: regulator@2 {
- reg = <2>;
/* VDD_CORE voltage limits 0.95V - 1.1V with ±4% tolerance */
regulator-name = "vdd_core";
regulator-min-microvolt = <915000>;
@@ -412,7 +406,6 @@
};
ldo1_reg: regulator@3 {
- reg = <3>;
/* +1.8V voltage with ±4% tolerance */
regulator-min-microvolt = <1750000>;
regulator-max-microvolt = <1870000>;
@@ -421,7 +414,6 @@
};
ldo2_reg: regulator@4 {
- reg = <4>;
/* +3.3V voltage with ±4% tolerance */
regulator-min-microvolt = <3175000>;
regulator-max-microvolt = <3430000>;
@@ -430,7 +422,6 @@
};
ldo3_reg: regulator@5 {
- reg = <5>;
/* +1.8V voltage with ±4% tolerance */
regulator-min-microvolt = <1750000>;
regulator-max-microvolt = <1870000>;
@@ -439,7 +430,6 @@
};
ldo4_reg: regulator@6 {
- reg = <6>;
/* +3.3V voltage with ±4% tolerance */
regulator-min-microvolt = <3175000>;
regulator-max-microvolt = <3430000>;
diff --git a/arch/arm/boot/dts/am335x-pepper.dts b/arch/arm/boot/dts/am335x-pepper.dts
index 471a3a70ea1f..8867aaaec54d 100644
--- a/arch/arm/boot/dts/am335x-pepper.dts
+++ b/arch/arm/boot/dts/am335x-pepper.dts
@@ -420,9 +420,9 @@
vin-supply = <&vbat>;
};
-&tps {
- compatible = "ti,tps65217";
+/include/ "tps65217.dtsi"
+&tps {
backlight {
isel = <1>; /* ISET1 */
fdim = <200>; /* TPS65217_BL_FDIM_200HZ */
@@ -430,17 +430,12 @@
};
regulators {
- #address-cells = <1>;
- #size-cells = <0>;
-
dcdc1_reg: regulator@0 {
- reg = <0>;
/* VDD_1V8 system supply */
regulator-always-on;
};
dcdc2_reg: regulator@1 {
- reg = <1>;
/* VDD_CORE voltage limits 0.95V - 1.26V with +/-4% tolerance */
regulator-name = "vdd_core";
regulator-min-microvolt = <925000>;
@@ -450,7 +445,6 @@
};
dcdc3_reg: regulator@2 {
- reg = <2>;
/* VDD_MPU voltage limits 0.95V - 1.1V with +/-4% tolerance */
regulator-name = "vdd_mpu";
regulator-min-microvolt = <925000>;
@@ -460,21 +454,18 @@
};
ldo1_reg: regulator@3 {
- reg = <3>;
/* VRTC 1.8V always-on supply */
regulator-name = "vrtc,vdds";
regulator-always-on;
};
ldo2_reg: regulator@4 {
- reg = <4>;
/* 3.3V rail */
regulator-name = "vdd_3v3aux";
regulator-always-on;
};
ldo3_reg: regulator@5 {
- reg = <5>;
/* VDD_3V3A 3.3V rail */
regulator-name = "vdd_3v3a";
regulator-min-microvolt = <3300000>;
@@ -482,7 +473,6 @@
};
ldo4_reg: regulator@6 {
- reg = <6>;
/* VDD_3V3B 3.3V rail */
regulator-name = "vdd_3v3b";
regulator-always-on;
diff --git a/arch/arm/boot/dts/am335x-shc.dts b/arch/arm/boot/dts/am335x-shc.dts
index 1b5b044fcd91..865de8500f1c 100644
--- a/arch/arm/boot/dts/am335x-shc.dts
+++ b/arch/arm/boot/dts/am335x-shc.dts
@@ -46,7 +46,7 @@
gpios = <&gpio1 29 GPIO_ACTIVE_HIGH>;
linux,code = <KEY_BACK>;
debounce-interval = <1000>;
- gpio-key,wakeup;
+ wakeup-source;
};
front_button {
@@ -54,7 +54,7 @@
gpios = <&gpio1 25 GPIO_ACTIVE_HIGH>;
linux,code = <KEY_FRONT>;
debounce-interval = <1000>;
- gpio-key,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/am335x-sl50.dts b/arch/arm/boot/dts/am335x-sl50.dts
index d38edfa53bb9..3303c281697b 100644
--- a/arch/arm/boot/dts/am335x-sl50.dts
+++ b/arch/arm/boot/dts/am335x-sl50.dts
@@ -375,19 +375,16 @@
pinctrl-0 = <&uart4_pins>;
};
+#include "tps65217.dtsi"
+
&tps {
- compatible = "ti,tps65217";
ti,pmic-shutdown-controller;
interrupt-parent = <&intc>;
interrupts = <7>; /* NNMI */
regulators {
- #address-cells = <1>;
- #size-cells = <0>;
-
dcdc1_reg: regulator@0 {
- reg = <0>;
/* VDDS_DDR */
regulator-min-microvolt = <1500000>;
regulator-max-microvolt = <1500000>;
@@ -395,7 +392,6 @@
};
dcdc2_reg: regulator@1 {
- reg = <1>;
/* VDD_MPU voltage limits 0.95V - 1.26V with +/-4% tolerance */
regulator-name = "vdd_mpu";
regulator-min-microvolt = <925000>;
@@ -405,7 +401,6 @@
};
dcdc3_reg: regulator@2 {
- reg = <2>;
/* VDD_CORE voltage limits 0.95V - 1.1V with +/-4% tolerance */
regulator-name = "vdd_core";
regulator-min-microvolt = <925000>;
@@ -415,7 +410,6 @@
};
ldo1_reg: regulator@3 {
- reg = <3>;
/* VRTC / VIO / VDDS*/
regulator-always-on;
regulator-min-microvolt = <1800000>;
@@ -423,7 +417,6 @@
};
ldo2_reg: regulator@4 {
- reg = <4>;
/* VDD_3V3AUX */
regulator-always-on;
regulator-min-microvolt = <3300000>;
@@ -431,7 +424,6 @@
};
ldo3_reg: regulator@5 {
- reg = <5>;
/* VDD_1V8 */
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
@@ -439,7 +431,6 @@
};
ldo4_reg: regulator@6 {
- reg = <6>;
/* VDD_3V3A */
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts
index 36c0fa6c362a..a0986c65be0c 100644
--- a/arch/arm/boot/dts/am57xx-beagle-x15.dts
+++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts
@@ -173,6 +173,8 @@
sound0_master: simple-audio-card,codec {
sound-dai = <&tlv320aic3104>;
+ assigned-clocks = <&clkoutmux2_clk_mux>;
+ assigned-clock-parents = <&sys_clk2_dclk_div>;
clocks = <&clkout2_clk>;
};
};
@@ -796,6 +798,8 @@
pinctrl-names = "default", "sleep";
pinctrl-0 = <&mcasp3_pins_default>;
pinctrl-1 = <&mcasp3_pins_sleep>;
+ assigned-clocks = <&mcasp3_ahclkx_mux>;
+ assigned-clock-parents = <&sys_clkin2>;
status = "okay";
op-mode = <0>; /* MCASP_IIS_MODE */
diff --git a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
index 8d93882dc8d5..1c06cb76da07 100644
--- a/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
+++ b/arch/arm/boot/dts/am57xx-cl-som-am57x.dts
@@ -545,7 +545,7 @@
ti,debounce-tol = /bits/ 16 <10>;
ti,debounce-rep = /bits/ 16 <1>;
- linux,wakeup;
+ wakeup-source;
};
};
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index 4f6ae921656f..f74d3db4846d 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -896,7 +896,6 @@
#size-cells = <1>;
reg = <0x2100000 0x10000>;
ranges = <0 0x2100000 0x10000>;
- interrupt-parent = <&intc>;
clocks = <&clks IMX6QDL_CLK_CAAM_MEM>,
<&clks IMX6QDL_CLK_CAAM_ACLK>,
<&clks IMX6QDL_CLK_CAAM_IPG>,
diff --git a/arch/arm/boot/dts/kirkwood-ds112.dts b/arch/arm/boot/dts/kirkwood-ds112.dts
index bf4143c6cb8f..b84af3da8c84 100644
--- a/arch/arm/boot/dts/kirkwood-ds112.dts
+++ b/arch/arm/boot/dts/kirkwood-ds112.dts
@@ -14,7 +14,7 @@
#include "kirkwood-synology.dtsi"
/ {
- model = "Synology DS111";
+ model = "Synology DS112";
compatible = "synology,ds111", "marvell,kirkwood";
memory {
diff --git a/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts b/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
index 420788229e6f..aae8a7aceab7 100644
--- a/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
+++ b/arch/arm/boot/dts/orion5x-linkstation-lswtgl.dts
@@ -228,6 +228,37 @@
};
};
+&devbus_bootcs {
+ status = "okay";
+ devbus,keep-config;
+
+ flash@0 {
+ compatible = "jedec-flash";
+ reg = <0 0x40000>;
+ bank-width = <1>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ header@0 {
+ reg = <0 0x30000>;
+ read-only;
+ };
+
+ uboot@30000 {
+ reg = <0x30000 0xF000>;
+ read-only;
+ };
+
+ uboot_env@3F000 {
+ reg = <0x3F000 0x1000>;
+ };
+ };
+ };
+};
+
&mdio {
status = "okay";
diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts
index 6713b1ea732b..01d239c3eaaa 100644
--- a/arch/arm/boot/dts/r8a7791-porter.dts
+++ b/arch/arm/boot/dts/r8a7791-porter.dts
@@ -283,7 +283,6 @@
pinctrl-names = "default";
status = "okay";
- renesas,enable-gpio = <&gpio5 31 GPIO_ACTIVE_HIGH>;
};
&usbphy {
diff --git a/arch/arm/boot/dts/sama5d2-pinfunc.h b/arch/arm/boot/dts/sama5d2-pinfunc.h
index 1afe24629d1f..b0c912feaa2f 100644
--- a/arch/arm/boot/dts/sama5d2-pinfunc.h
+++ b/arch/arm/boot/dts/sama5d2-pinfunc.h
@@ -90,7 +90,7 @@
#define PIN_PA14__I2SC1_MCK PINMUX_PIN(PIN_PA14, 4, 2)
#define PIN_PA14__FLEXCOM3_IO2 PINMUX_PIN(PIN_PA14, 5, 1)
#define PIN_PA14__D9 PINMUX_PIN(PIN_PA14, 6, 2)
-#define PIN_PA15 14
+#define PIN_PA15 15
#define PIN_PA15__GPIO PINMUX_PIN(PIN_PA15, 0, 0)
#define PIN_PA15__SPI0_MOSI PINMUX_PIN(PIN_PA15, 1, 1)
#define PIN_PA15__TF1 PINMUX_PIN(PIN_PA15, 2, 1)
diff --git a/arch/arm/boot/dts/tps65217.dtsi b/arch/arm/boot/dts/tps65217.dtsi
new file mode 100644
index 000000000000..a63272422d76
--- /dev/null
+++ b/arch/arm/boot/dts/tps65217.dtsi
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2012 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Integrated Power Management Chip
+ * http://www.ti.com/lit/ds/symlink/tps65217.pdf
+ */
+
+&tps {
+ compatible = "ti,tps65217";
+
+ regulators {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ dcdc1_reg: regulator@0 {
+ reg = <0>;
+ regulator-compatible = "dcdc1";
+ };
+
+ dcdc2_reg: regulator@1 {
+ reg = <1>;
+ regulator-compatible = "dcdc2";
+ };
+
+ dcdc3_reg: regulator@2 {
+ reg = <2>;
+ regulator-compatible = "dcdc3";
+ };
+
+ ldo1_reg: regulator@3 {
+ reg = <3>;
+ regulator-compatible = "ldo1";
+ };
+
+ ldo2_reg: regulator@4 {
+ reg = <4>;
+ regulator-compatible = "ldo2";
+ };
+
+ ldo3_reg: regulator@5 {
+ reg = <5>;
+ regulator-compatible = "ldo3";
+ };
+
+ ldo4_reg: regulator@6 {
+ reg = <6>;
+ regulator-compatible = "ldo4";
+ };
+ };
+};
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index 7da5503c0591..e08d15184056 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -117,6 +117,7 @@ static inline u32 gic_read_iar(void)
u32 irqstat;
asm volatile("mrc " __stringify(ICC_IAR1) : "=r" (irqstat));
+ dsb(sy);
return irqstat;
}
diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index 0375c8caa061..9408a994cc91 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -35,14 +35,21 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
- bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page);
+ unsigned long page_pfn = page_to_xen_pfn(page);
+ unsigned long dev_pfn = XEN_PFN_DOWN(dev_addr);
+ unsigned long compound_pages =
+ (1<<compound_order(page)) * XEN_PFN_PER_PAGE;
+ bool local = (page_pfn <= dev_pfn) &&
+ (dev_pfn - page_pfn < compound_pages);
+
/*
- * Dom0 is mapped 1:1, while the Linux page can be spanned accross
- * multiple Xen page, it's not possible to have a mix of local and
- * foreign Xen page. So if the first xen_pfn == mfn the page is local
- * otherwise it's a foreign page grant-mapped in dom0. If the page is
- * local we can safely call the native dma_ops function, otherwise we
- * call the xen specific function.
+ * Dom0 is mapped 1:1, while the Linux page can span across
+ * multiple Xen pages, it's not possible for it to contain a
+ * mix of local and foreign Xen pages. So if the first xen_pfn
+ * == mfn the page is local otherwise it's a foreign page
+ * grant-mapped in dom0. If the page is local we can safely
+ * call the native dma_ops function, otherwise we call the xen
+ * specific function.
*/
if (local)
__generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 2c5f160be65e..ad325a8c7e1e 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -88,6 +88,7 @@ obj-$(CONFIG_DEBUG_LL) += debug.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o
+AFLAGS_hyp-stub.o :=-Wa,-march=armv7-a
ifeq ($(CONFIG_ARM_PSCI),y)
obj-$(CONFIG_SMP) += psci_smp.o
endif
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 5fa69d7bae58..99361f11354a 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -161,7 +161,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
u64 val;
val = kvm_arm_timer_get_reg(vcpu, reg->id);
- return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+ return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
}
static unsigned long num_core_regs(void)
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 7f33b2056ae6..0f6600f05137 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -206,7 +206,8 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
run->mmio.is_write = is_write;
run->mmio.phys_addr = fault_ipa;
run->mmio.len = len;
- memcpy(run->mmio.data, data_buf, len);
+ if (is_write)
+ memcpy(run->mmio.data, data_buf, len);
if (!ret) {
/* We handled the access successfully in the kernel. */
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index 809827265fb3..bab814d2f37d 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -18,6 +18,7 @@
#include <asm/setup.h>
#include <asm/mach/arch.h>
+#include <asm/system_info.h>
#include "common.h"
@@ -77,12 +78,31 @@ static const char *const n900_boards_compat[] __initconst = {
NULL,
};
+/* Set system_rev from atags */
+static void __init rx51_set_system_rev(const struct tag *tags)
+{
+ const struct tag *tag;
+
+ if (tags->hdr.tag != ATAG_CORE)
+ return;
+
+ for_each_tag(tag, tags) {
+ if (tag->hdr.tag == ATAG_REVISION) {
+ system_rev = tag->u.revision.rev;
+ break;
+ }
+ }
+}
+
/* Legacy userspace on Nokia N900 needs ATAGS exported in /proc/atags,
* save them while the data is still not overwritten
*/
static void __init rx51_reserve(void)
{
- save_atags((const struct tag *)(PAGE_OFFSET + 0x100));
+ const struct tag *tags = (const struct tag *)(PAGE_OFFSET + 0x100);
+
+ save_atags(tags);
+ rx51_set_system_rev(tags);
omap_reserve();
}
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 7b76ce01c21d..8633c703546a 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -101,10 +101,8 @@ static void omap2_onenand_set_async_mode(void __iomem *onenand_base)
static void set_onenand_cfg(void __iomem *onenand_base)
{
- u32 reg;
+ u32 reg = ONENAND_SYS_CFG1_RDY | ONENAND_SYS_CFG1_INT;
- reg = readw(onenand_base + ONENAND_REG_SYS_CFG1);
- reg &= ~((0x7 << ONENAND_SYS_CFG1_BRL_SHIFT) | (0x7 << 9));
reg |= (latency << ONENAND_SYS_CFG1_BRL_SHIFT) |
ONENAND_SYS_CFG1_BL_16;
if (onenand_flags & ONENAND_FLAG_SYNCREAD)
@@ -123,6 +121,7 @@ static void set_onenand_cfg(void __iomem *onenand_base)
reg |= ONENAND_SYS_CFG1_VHF;
else
reg &= ~ONENAND_SYS_CFG1_VHF;
+
writew(reg, onenand_base + ONENAND_REG_SYS_CFG1);
}
@@ -289,6 +288,7 @@ static int omap2_onenand_setup_async(void __iomem *onenand_base)
}
}
+ onenand_async.sync_write = true;
omap2_onenand_calc_async_timings(&t);
ret = gpmc_cs_program_settings(gpmc_onenand_data->cs, &onenand_async);
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 0437537751bc..f7ff3b9dad87 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -191,12 +191,22 @@ static int _omap_device_notifier_call(struct notifier_block *nb,
{
struct platform_device *pdev = to_platform_device(dev);
struct omap_device *od;
+ int err;
switch (event) {
case BUS_NOTIFY_DEL_DEVICE:
if (pdev->archdata.od)
omap_device_delete(pdev->archdata.od);
break;
+ case BUS_NOTIFY_UNBOUND_DRIVER:
+ od = to_omap_device(pdev);
+ if (od && (od->_state == OMAP_DEVICE_STATE_ENABLED)) {
+ dev_info(dev, "enabled after unload, idling\n");
+ err = omap_device_idle(pdev);
+ if (err)
+ dev_err(dev, "failed to idle\n");
+ }
+ break;
case BUS_NOTIFY_ADD_DEVICE:
if (pdev->dev.of_node)
omap_device_build_from_dt(pdev);
@@ -602,8 +612,10 @@ static int _od_runtime_resume(struct device *dev)
int ret;
ret = omap_device_enable(pdev);
- if (ret)
+ if (ret) {
+ dev_err(dev, "use pm_runtime_put_sync_suspend() in driver?\n");
return ret;
+ }
return pm_generic_runtime_resume(dev);
}
diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h
index 9cb11215ceba..b3a4ed5289ec 100644
--- a/arch/arm/mach-shmobile/common.h
+++ b/arch/arm/mach-shmobile/common.h
@@ -4,7 +4,6 @@
extern void shmobile_init_delay(void);
extern void shmobile_boot_vector(void);
extern unsigned long shmobile_boot_fn;
-extern unsigned long shmobile_boot_arg;
extern unsigned long shmobile_boot_size;
extern void shmobile_smp_boot(void);
extern void shmobile_smp_sleep(void);
diff --git a/arch/arm/mach-shmobile/headsmp-scu.S b/arch/arm/mach-shmobile/headsmp-scu.S
index fa5248c52399..5e503d91ad70 100644
--- a/arch/arm/mach-shmobile/headsmp-scu.S
+++ b/arch/arm/mach-shmobile/headsmp-scu.S
@@ -38,9 +38,3 @@ ENTRY(shmobile_boot_scu)
b secondary_startup
ENDPROC(shmobile_boot_scu)
-
- .text
- .align 2
- .globl shmobile_scu_base
-shmobile_scu_base:
- .space 4
diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S
index 330c1fc63197..32e0bf6e3ccb 100644
--- a/arch/arm/mach-shmobile/headsmp.S
+++ b/arch/arm/mach-shmobile/headsmp.S
@@ -24,7 +24,6 @@
.arm
.align 12
ENTRY(shmobile_boot_vector)
- ldr r0, 2f
ldr r1, 1f
bx r1
@@ -34,9 +33,6 @@ ENDPROC(shmobile_boot_vector)
.globl shmobile_boot_fn
shmobile_boot_fn:
1: .space 4
- .globl shmobile_boot_arg
-shmobile_boot_arg:
-2: .space 4
.globl shmobile_boot_size
shmobile_boot_size:
.long . - shmobile_boot_vector
@@ -46,13 +42,15 @@ shmobile_boot_size:
*/
ENTRY(shmobile_smp_boot)
- @ r0 = MPIDR_HWID_BITMASK
mrc p15, 0, r1, c0, c0, 5 @ r1 = MPIDR
- and r0, r1, r0 @ r0 = cpu_logical_map() value
+ and r0, r1, #0xffffff @ MPIDR_HWID_BITMASK
+ @ r0 = cpu_logical_map() value
mov r1, #0 @ r1 = CPU index
- adr r5, 1f @ array of per-cpu mpidr values
- adr r6, 2f @ array of per-cpu functions
- adr r7, 3f @ array of per-cpu arguments
+ adr r2, 1f
+ ldmia r2, {r5, r6, r7}
+ add r5, r5, r2 @ array of per-cpu mpidr values
+ add r6, r6, r2 @ array of per-cpu functions
+ add r7, r7, r2 @ array of per-cpu arguments
shmobile_smp_boot_find_mpidr:
ldr r8, [r5, r1, lsl #2]
@@ -80,12 +78,18 @@ ENTRY(shmobile_smp_sleep)
b shmobile_smp_boot
ENDPROC(shmobile_smp_sleep)
+ .align 2
+1: .long shmobile_smp_mpidr - .
+ .long shmobile_smp_fn - 1b
+ .long shmobile_smp_arg - 1b
+
+ .bss
.globl shmobile_smp_mpidr
shmobile_smp_mpidr:
-1: .space NR_CPUS * 4
+ .space NR_CPUS * 4
.globl shmobile_smp_fn
shmobile_smp_fn:
-2: .space NR_CPUS * 4
+ .space NR_CPUS * 4
.globl shmobile_smp_arg
shmobile_smp_arg:
-3: .space NR_CPUS * 4
+ .space NR_CPUS * 4
diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c
index 911884f7e28b..aba75c89f9c1 100644
--- a/arch/arm/mach-shmobile/platsmp-apmu.c
+++ b/arch/arm/mach-shmobile/platsmp-apmu.c
@@ -123,7 +123,6 @@ void __init shmobile_smp_apmu_prepare_cpus(unsigned int max_cpus,
{
/* install boot code shared by all CPUs */
shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
- shmobile_boot_arg = MPIDR_HWID_BITMASK;
/* perform per-cpu setup */
apmu_parse_cfg(apmu_init_cpu, apmu_config, num);
diff --git a/arch/arm/mach-shmobile/platsmp-scu.c b/arch/arm/mach-shmobile/platsmp-scu.c
index 64663110ab6c..081a097c9219 100644
--- a/arch/arm/mach-shmobile/platsmp-scu.c
+++ b/arch/arm/mach-shmobile/platsmp-scu.c
@@ -17,6 +17,9 @@
#include <asm/smp_scu.h>
#include "common.h"
+
+void __iomem *shmobile_scu_base;
+
static int shmobile_smp_scu_notifier_call(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
@@ -41,7 +44,6 @@ void __init shmobile_smp_scu_prepare_cpus(unsigned int max_cpus)
{
/* install boot code shared by all CPUs */
shmobile_boot_fn = virt_to_phys(shmobile_smp_boot);
- shmobile_boot_arg = MPIDR_HWID_BITMASK;
/* enable SCU and cache coherency on booting CPU */
scu_enable(shmobile_scu_base);
diff --git a/arch/arm/mach-shmobile/smp-r8a7779.c b/arch/arm/mach-shmobile/smp-r8a7779.c
index b854fe2095ad..0b024a9dbd43 100644
--- a/arch/arm/mach-shmobile/smp-r8a7779.c
+++ b/arch/arm/mach-shmobile/smp-r8a7779.c
@@ -92,8 +92,6 @@ static void __init r8a7779_smp_prepare_cpus(unsigned int max_cpus)
{
/* Map the reset vector (in headsmp-scu.S, headsmp.S) */
__raw_writel(__pa(shmobile_boot_vector), AVECR);
- shmobile_boot_fn = virt_to_phys(shmobile_boot_scu);
- shmobile_boot_arg = (unsigned long)shmobile_scu_base;
/* setup r8a7779 specific SCU bits */
shmobile_scu_base = IOMEM(R8A7779_SCU_BASE);
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 4b4058db0781..66353caa35b9 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -173,7 +173,7 @@ unsigned long arch_mmap_rnd(void)
{
unsigned long rnd;
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
return rnd << PAGE_SHIFT;
}
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
index cf30daff8932..d19b1ad29b07 100644
--- a/arch/arm/mm/pageattr.c
+++ b/arch/arm/mm/pageattr.c
@@ -49,6 +49,9 @@ static int change_memory_common(unsigned long addr, int numpages,
WARN_ON_ONCE(1);
}
+ if (!numpages)
+ return 0;
+
if (start < MODULES_VADDR || start >= MODULES_END)
return -EINVAL;
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index bf464de33f52..f50608674580 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -34,13 +34,13 @@
/*
* VMALLOC and SPARSEMEM_VMEMMAP ranges.
*
- * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array
+ * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array
* (rounded up to PUD_SIZE).
* VMALLOC_START: beginning of the kernel VA space
* VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space,
* fixed mappings and modules
*/
-#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE)
+#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT - 1)) * sizeof(struct page), PUD_SIZE)
#ifndef CONFIG_KASAN
#define VMALLOC_START (VA_START)
@@ -51,7 +51,8 @@
#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K)
-#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K))
+#define VMEMMAP_START (VMALLOC_END + SZ_64K)
+#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
#define FIRST_USER_ADDRESS 0UL
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index fcb778899a38..9e54ad7c240a 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -194,7 +194,7 @@ static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
u64 val;
val = kvm_arm_timer_get_reg(vcpu, reg->id);
- return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+ return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)) ? -EFAULT : 0;
}
/**
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 9142e082f5f3..5dd2a26444ec 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -149,16 +149,6 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
switch (nr_pri_bits) {
case 7:
- write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
- write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
- case 6:
- write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
- default:
- write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
- }
-
- switch (nr_pri_bits) {
- case 7:
write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
case 6:
@@ -167,6 +157,16 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
}
+ switch (nr_pri_bits) {
+ case 7:
+ write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
+ write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+ case 6:
+ write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+ default:
+ write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+ }
+
switch (max_lr_idx) {
case 15:
write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index f3b061e67bfe..7802f216a67a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -319,8 +319,8 @@ void __init mem_init(void)
#endif
MLG(VMALLOC_START, VMALLOC_END),
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- MLG((unsigned long)vmemmap,
- (unsigned long)vmemmap + VMEMMAP_SIZE),
+ MLG(VMEMMAP_START,
+ VMEMMAP_START + VMEMMAP_SIZE),
MLM((unsigned long)virt_to_page(PAGE_OFFSET),
(unsigned long)virt_to_page(high_memory)),
#endif
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 4c893b5189dd..232f787a088a 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -53,10 +53,10 @@ unsigned long arch_mmap_rnd(void)
#ifdef CONFIG_COMPAT
if (test_thread_flag(TIF_32BIT))
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
else
#endif
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
return rnd << PAGE_SHIFT;
}
diff --git a/arch/mips/jz4740/gpio.c b/arch/mips/jz4740/gpio.c
index 8c6d76c9b2d6..d9907e57e9b9 100644
--- a/arch/mips/jz4740/gpio.c
+++ b/arch/mips/jz4740/gpio.c
@@ -270,7 +270,7 @@ uint32_t jz_gpio_port_get_value(int port, uint32_t mask)
}
EXPORT_SYMBOL(jz_gpio_port_get_value);
-#define IRQ_TO_BIT(irq) BIT(irq_to_gpio(irq) & 0x1f)
+#define IRQ_TO_BIT(irq) BIT((irq - JZ4740_IRQ_GPIO(0)) & 0x1f)
static void jz_gpio_check_trigger_both(struct jz_gpio_chip *chip, unsigned int irq)
{
diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S
index 5ce3b746cedc..b4ac6374a38f 100644
--- a/arch/mips/kernel/r2300_fpu.S
+++ b/arch/mips/kernel/r2300_fpu.S
@@ -125,7 +125,7 @@ LEAF(_restore_fp_context)
END(_restore_fp_context)
.set reorder
- .type fault@function
+ .type fault, @function
.ent fault
fault: li v0, -EFAULT
jr ra
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index f09546ee2cdc..17732f876eff 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -358,7 +358,7 @@ LEAF(_restore_msa_all_upper)
.set reorder
- .type fault@function
+ .type fault, @function
.ent fault
fault: li v0, -EFAULT # failure
jr ra
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index ae790c575d4f..bf14da9f3e33 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -690,15 +690,15 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode)
asmlinkage void do_ov(struct pt_regs *regs)
{
enum ctx_state prev_state;
- siginfo_t info;
+ siginfo_t info = {
+ .si_signo = SIGFPE,
+ .si_code = FPE_INTOVF,
+ .si_addr = (void __user *)regs->cp0_epc,
+ };
prev_state = exception_enter();
die_if_kernel("Integer overflow", regs);
- info.si_code = FPE_INTOVF;
- info.si_signo = SIGFPE;
- info.si_errno = 0;
- info.si_addr = (void __user *) regs->cp0_epc;
force_sig_info(SIGFPE, &info, current);
exception_exit(prev_state);
}
@@ -874,7 +874,7 @@ out:
void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
const char *str)
{
- siginfo_t info;
+ siginfo_t info = { 0 };
char b[40];
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
@@ -903,7 +903,6 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
else
info.si_code = FPE_INTOVF;
info.si_signo = SIGFPE;
- info.si_errno = 0;
info.si_addr = (void __user *) regs->cp0_epc;
force_sig_info(SIGFPE, &info, current);
break;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 8bc3977576e6..3110447ab1e9 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -702,7 +702,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
void __user *uaddr = (void __user *)(long)reg->addr;
- return copy_to_user(uaddr, vs, 16);
+ return copy_to_user(uaddr, vs, 16) ? -EFAULT : 0;
} else {
return -EINVAL;
}
@@ -732,7 +732,7 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
} else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
void __user *uaddr = (void __user *)(long)reg->addr;
- return copy_from_user(vs, uaddr, 16);
+ return copy_from_user(vs, uaddr, 16) ? -EFAULT : 0;
} else {
return -EINVAL;
}
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 5c81fdd032c3..353037699512 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -146,7 +146,7 @@ unsigned long arch_mmap_rnd(void)
{
unsigned long rnd;
- rnd = (unsigned long)get_random_int();
+ rnd = get_random_long();
rnd <<= PAGE_SHIFT;
if (TASK_IS_32BIT_ADDR)
rnd &= 0xfffffful;
@@ -174,7 +174,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
static inline unsigned long brk_rnd(void)
{
- unsigned long rnd = get_random_int();
+ unsigned long rnd = get_random_long();
rnd = rnd << PAGE_SHIFT;
/* 8MB for 32bit, 256MB for 64bit */
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 249647578e58..91dec32c77b7 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -164,11 +164,13 @@ static int __init mips_sc_probe_cm3(void)
sets = cfg & CM_GCR_L2_CONFIG_SET_SIZE_MSK;
sets >>= CM_GCR_L2_CONFIG_SET_SIZE_SHF;
- c->scache.sets = 64 << sets;
+ if (sets)
+ c->scache.sets = 64 << sets;
line_sz = cfg & CM_GCR_L2_CONFIG_LINE_SIZE_MSK;
line_sz >>= CM_GCR_L2_CONFIG_LINE_SIZE_SHF;
- c->scache.linesz = 2 << line_sz;
+ if (line_sz)
+ c->scache.linesz = 2 << line_sz;
assoc = cfg & CM_GCR_L2_CONFIG_ASSOC_MSK;
assoc >>= CM_GCR_L2_CONFIG_ASSOC_SHF;
@@ -176,9 +178,12 @@ static int __init mips_sc_probe_cm3(void)
c->scache.waysize = c->scache.sets * c->scache.linesz;
c->scache.waybit = __ffs(c->scache.waysize);
- c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT;
+ if (c->scache.linesz) {
+ c->scache.flags &= ~MIPS_CACHE_NOT_PRESENT;
+ return 1;
+ }
- return 1;
+ return 0;
}
static inline int __init mips_sc_probe(void)
diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h
index f84ff12574b7..6d8276cd25ca 100644
--- a/arch/parisc/include/asm/floppy.h
+++ b/arch/parisc/include/asm/floppy.h
@@ -33,7 +33,7 @@
* floppy accesses go through the track buffer.
*/
#define _CROSS_64KB(a,s,vdma) \
-(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
+(!(vdma) && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64))
#define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1)
diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h
index 35bdccbb2036..b75039f92116 100644
--- a/arch/parisc/include/uapi/asm/unistd.h
+++ b/arch/parisc/include/uapi/asm/unistd.h
@@ -361,8 +361,9 @@
#define __NR_membarrier (__NR_Linux + 343)
#define __NR_userfaultfd (__NR_Linux + 344)
#define __NR_mlock2 (__NR_Linux + 345)
+#define __NR_copy_file_range (__NR_Linux + 346)
-#define __NR_Linux_syscalls (__NR_mlock2 + 1)
+#define __NR_Linux_syscalls (__NR_copy_file_range + 1)
#define __IGNORE_select /* newselect */
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 9585c81f755f..ce0b2b4075c7 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -269,14 +269,19 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
long do_syscall_trace_enter(struct pt_regs *regs)
{
- long ret = 0;
-
/* Do the secure computing check first. */
secure_computing_strict(regs->gr[20]);
if (test_thread_flag(TIF_SYSCALL_TRACE) &&
- tracehook_report_syscall_entry(regs))
- ret = -1L;
+ tracehook_report_syscall_entry(regs)) {
+ /*
+ * Tracing decided this syscall should not happen or the
+ * debugger stored an invalid system call number. Skip
+ * the system call and the system call restart handling.
+ */
+ regs->gr[20] = -1UL;
+ goto out;
+ }
#ifdef CONFIG_64BIT
if (!is_compat_task())
@@ -290,7 +295,8 @@ long do_syscall_trace_enter(struct pt_regs *regs)
regs->gr[24] & 0xffffffff,
regs->gr[23] & 0xffffffff);
- return ret ? : regs->gr[20];
+out:
+ return regs->gr[20];
}
void do_syscall_trace_exit(struct pt_regs *regs)
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 3fbd7252a4b2..fbafa0d0e2bf 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -343,7 +343,7 @@ tracesys_next:
#endif
comiclr,>>= __NR_Linux_syscalls, %r20, %r0
- b,n .Lsyscall_nosys
+ b,n .Ltracesys_nosys
LDREGX %r20(%r19), %r19
@@ -359,6 +359,9 @@ tracesys_next:
be 0(%sr7,%r19)
ldo R%tracesys_exit(%r2),%r2
+.Ltracesys_nosys:
+ ldo -ENOSYS(%r0),%r28 /* set errno */
+
/* Do *not* call this function on the gateway page, because it
makes a direct call to syscall_trace. */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index d4ffcfbc9885..585d50fc75c0 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -441,6 +441,7 @@
ENTRY_SAME(membarrier)
ENTRY_SAME(userfaultfd)
ENTRY_SAME(mlock2) /* 345 */
+ ENTRY_SAME(copy_file_range)
.ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 301be3126ae3..650cfb31ea3d 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -418,8 +418,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
eeh_pcid_put(dev);
if (driver->err_handler &&
driver->err_handler->error_detected &&
- driver->err_handler->slot_reset &&
- driver->err_handler->resume)
+ driver->err_handler->slot_reset)
return NULL;
}
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 05e804cdecaa..aec9a1b1d25b 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -109,8 +109,9 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp)
* If the breakpoint is unregistered between a hw_breakpoint_handler()
* and the single_step_dabr_instruction(), then cleanup the breakpoint
* restoration variables to prevent dangling pointers.
+ * FIXME, this should not be using bp->ctx at all! Sayeth peterz.
*/
- if (bp->ctx && bp->ctx->task)
+ if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L))
bp->ctx->task->thread.last_hit_ubp = NULL;
}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dccc87e8fee5..3c5736e52a14 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1768,9 +1768,9 @@ static inline unsigned long brk_rnd(void)
/* 8MB for 32bit, 1GB for 64bit */
if (is_32bit_task())
- rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
+ rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
else
- rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
+ rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
return rnd << PAGE_SHIFT;
}
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 0762c1e08c88..edb09912f0c9 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -111,7 +111,13 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
*/
if (!(old_pte & _PAGE_COMBO)) {
flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
- old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND;
+ /*
+ * clear the old slot details from the old and new pte.
+ * On hash insert failure we use old pte value and we don't
+ * want slot information there if we have a insert failure.
+ */
+ old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
+ new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND);
goto htab_insert_hpte;
}
/*
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 49b152b0f926..eb2accdd76fd 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -78,9 +78,19 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
* base page size. This is because demote_segment won't flush
* hash page table entries.
*/
- if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
+ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) {
flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
ssize, flags);
+ /*
+ * With THP, we also clear the slot information with
+ * respect to all the 64K hash pte mapping the 16MB
+ * page. They are all invalid now. This make sure we
+ * don't find the slot valid when we fault with 4k
+ * base page size.
+ *
+ */
+ memset(hpte_slot_array, 0, PTE_FRAG_SIZE);
+ }
}
valid = hpte_valid(hpte_slot_array, index);
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index 7e6d0880813f..83a8be791e06 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -8,6 +8,8 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
+#include <asm/mmu.h>
+
#ifdef CONFIG_PPC_FSL_BOOK3E
#ifdef CONFIG_PPC64
static inline int tlb1_next(void)
@@ -60,6 +62,14 @@ static inline void book3e_tlb_lock(void)
unsigned long tmp;
int token = smp_processor_id() + 1;
+ /*
+ * Besides being unnecessary in the absence of SMT, this
+ * check prevents trying to do lbarx/stbcx. on e5500 which
+ * doesn't implement either feature.
+ */
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
asm volatile("1: lbarx %0, 0, %1;"
"cmpwi %0, 0;"
"bne 2f;"
@@ -80,6 +90,9 @@ static inline void book3e_tlb_unlock(void)
{
struct paca_struct *paca = get_paca();
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
isync();
paca->tcd_ptr->lock = 0;
}
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 0f0502e12f6c..4087705ba90f 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -59,9 +59,9 @@ unsigned long arch_mmap_rnd(void)
/* 8MB for 32bit, 1GB for 64bit */
if (is_32bit_task())
- rnd = (unsigned long)get_random_int() % (1<<(23-PAGE_SHIFT));
+ rnd = get_random_long() % (1<<(23-PAGE_SHIFT));
else
- rnd = (unsigned long)get_random_int() % (1<<(30-PAGE_SHIFT));
+ rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT));
return rnd << PAGE_SHIFT;
}
diff --git a/arch/s390/include/asm/fpu/internal.h b/arch/s390/include/asm/fpu/internal.h
index ea91ddfe54eb..629c90865a07 100644
--- a/arch/s390/include/asm/fpu/internal.h
+++ b/arch/s390/include/asm/fpu/internal.h
@@ -40,6 +40,7 @@ static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
{
fpregs->pad = 0;
+ fpregs->fpc = fpu->fpc;
if (MACHINE_HAS_VX)
convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
else
@@ -49,6 +50,7 @@ static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
{
+ fpu->fpc = fpregs->fpc;
if (MACHINE_HAS_VX)
convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
else
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 66c94417c0ba..4af60374eba0 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -271,7 +271,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs,
/* Restore high gprs from signal stack */
if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
- sizeof(&sregs_ext->gprs_high)))
+ sizeof(sregs_ext->gprs_high)))
return -EFAULT;
for (i = 0; i < NUM_GPRS; i++)
*(__u32 *)&regs->gprs[i] = gprs_high[i];
diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile
index eaee14637d93..8496a074bd0e 100644
--- a/arch/sparc/Makefile
+++ b/arch/sparc/Makefile
@@ -24,7 +24,13 @@ LDFLAGS := -m elf32_sparc
export BITS := 32
UTS_MACHINE := sparc
+# We are adding -Wa,-Av8 to KBUILD_CFLAGS to deal with a specs bug in some
+# versions of gcc. Some gcc versions won't pass -Av8 to binutils when you
+# give -mcpu=v8. This silently worked with older bintutils versions but
+# does not any more.
KBUILD_CFLAGS += -m32 -mcpu=v8 -pipe -mno-fpu -fcall-used-g5 -fcall-used-g7
+KBUILD_CFLAGS += -Wa,-Av8
+
KBUILD_AFLAGS += -m32 -Wa,-Av8
else
diff --git a/arch/sparc/include/uapi/asm/unistd.h b/arch/sparc/include/uapi/asm/unistd.h
index 1c26d440d288..b6de8b10a55b 100644
--- a/arch/sparc/include/uapi/asm/unistd.h
+++ b/arch/sparc/include/uapi/asm/unistd.h
@@ -422,8 +422,9 @@
#define __NR_listen 354
#define __NR_setsockopt 355
#define __NR_mlock2 356
+#define __NR_copy_file_range 357
-#define NR_syscalls 357
+#define NR_syscalls 358
/* Bitmask values returned from kern_features system call. */
#define KERN_FEATURE_MIXED_MODE_STACK 0x00000001
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 33c02b15f478..a83707c83be8 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -948,7 +948,24 @@ linux_syscall_trace:
cmp %o0, 0
bne 3f
mov -ENOSYS, %o0
+
+ /* Syscall tracing can modify the registers. */
+ ld [%sp + STACKFRAME_SZ + PT_G1], %g1
+ sethi %hi(sys_call_table), %l7
+ ld [%sp + STACKFRAME_SZ + PT_I0], %i0
+ or %l7, %lo(sys_call_table), %l7
+ ld [%sp + STACKFRAME_SZ + PT_I1], %i1
+ ld [%sp + STACKFRAME_SZ + PT_I2], %i2
+ ld [%sp + STACKFRAME_SZ + PT_I3], %i3
+ ld [%sp + STACKFRAME_SZ + PT_I4], %i4
+ ld [%sp + STACKFRAME_SZ + PT_I5], %i5
+ cmp %g1, NR_syscalls
+ bgeu 3f
+ mov -ENOSYS, %o0
+
+ sll %g1, 2, %l4
mov %i0, %o0
+ ld [%l7 + %l4], %l7
mov %i1, %o1
mov %i2, %o2
mov %i3, %o3
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index afbaba52d2f1..d127130bf424 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -338,8 +338,9 @@ ENTRY(sun4v_mach_set_watchdog)
mov %o1, %o4
mov HV_FAST_MACH_SET_WATCHDOG, %o5
ta HV_FAST_TRAP
+ brnz,a,pn %o4, 0f
stx %o1, [%o4]
- retl
+0: retl
nop
ENDPROC(sun4v_mach_set_watchdog)
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index d88beff47bab..39aaec173f66 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -52,7 +52,7 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs)
unsigned char fenab;
int err;
- flush_user_windows();
+ synchronize_user_stack();
if (get_thread_wsaved() ||
(((unsigned long)ucp) & (sizeof(unsigned long)-1)) ||
(!__access_ok(ucp, sizeof(*ucp))))
diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c
index a92d5d2c46a3..9e034f29dcc5 100644
--- a/arch/sparc/kernel/sparc_ksyms_64.c
+++ b/arch/sparc/kernel/sparc_ksyms_64.c
@@ -37,6 +37,7 @@ EXPORT_SYMBOL(sun4v_niagara_getperf);
EXPORT_SYMBOL(sun4v_niagara_setperf);
EXPORT_SYMBOL(sun4v_niagara2_getperf);
EXPORT_SYMBOL(sun4v_niagara2_setperf);
+EXPORT_SYMBOL(sun4v_mach_set_watchdog);
/* from hweight.S */
EXPORT_SYMBOL(__arch_hweight8);
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index c690c8e16a96..b489e9759518 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -264,7 +264,7 @@ static unsigned long mmap_rnd(void)
unsigned long rnd = 0UL;
if (current->flags & PF_RANDOMIZE) {
- unsigned long val = get_random_int();
+ unsigned long val = get_random_long();
if (test_thread_flag(TIF_32BIT))
rnd = (val % (1UL << (23UL-PAGE_SHIFT)));
else
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index bb0008927598..c4a1b5c40e4e 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -158,7 +158,25 @@ linux_syscall_trace32:
add %sp, PTREGS_OFF, %o0
brnz,pn %o0, 3f
mov -ENOSYS, %o0
+
+ /* Syscall tracing can modify the registers. */
+ ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
+ sethi %hi(sys_call_table32), %l7
+ ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0
+ or %l7, %lo(sys_call_table32), %l7
+ ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1
+ ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2
+ ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3
+ ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4
+ ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+ cmp %g1, NR_syscalls
+ bgeu,pn %xcc, 3f
+ mov -ENOSYS, %o0
+
+ sll %g1, 2, %l4
srl %i0, 0, %o0
+ lduw [%l7 + %l4], %l7
srl %i4, 0, %o4
srl %i1, 0, %o1
srl %i2, 0, %o2
@@ -170,7 +188,25 @@ linux_syscall_trace:
add %sp, PTREGS_OFF, %o0
brnz,pn %o0, 3f
mov -ENOSYS, %o0
+
+ /* Syscall tracing can modify the registers. */
+ ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
+ sethi %hi(sys_call_table64), %l7
+ ldx [%sp + PTREGS_OFF + PT_V9_I0], %i0
+ or %l7, %lo(sys_call_table64), %l7
+ ldx [%sp + PTREGS_OFF + PT_V9_I1], %i1
+ ldx [%sp + PTREGS_OFF + PT_V9_I2], %i2
+ ldx [%sp + PTREGS_OFF + PT_V9_I3], %i3
+ ldx [%sp + PTREGS_OFF + PT_V9_I4], %i4
+ ldx [%sp + PTREGS_OFF + PT_V9_I5], %i5
+
+ cmp %g1, NR_syscalls
+ bgeu,pn %xcc, 3f
+ mov -ENOSYS, %o0
+
+ sll %g1, 2, %l4
mov %i0, %o0
+ lduw [%l7 + %l4], %l7
mov %i1, %o1
mov %i2, %o2
mov %i3, %o3
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index e663b6c78de2..6c3dd6c52f8b 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -88,4 +88,4 @@ sys_call_table:
/*340*/ .long sys_ni_syscall, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
/*345*/ .long sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .long sys_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
-/*355*/ .long sys_setsockopt, sys_mlock2
+/*355*/ .long sys_setsockopt, sys_mlock2, sys_copy_file_range
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 1557121f4cdc..12b524cfcfa0 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -89,7 +89,7 @@ sys_call_table32:
/*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
.word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
- .word compat_sys_setsockopt, sys_mlock2
+ .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range
#endif /* CONFIG_COMPAT */
@@ -170,4 +170,4 @@ sys_call_table:
/*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr
.word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf
/*350*/ .word sys64_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen
- .word sys_setsockopt, sys_mlock2
+ .word sys_setsockopt, sys_mlock2, sys_copy_file_range
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 9bdf67a092a5..b60a9f8cda75 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -12,6 +12,7 @@
#include <skas.h>
void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
static void kill_off_processes(void)
{
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index fc8be0e3a4ff..57acbd67d85d 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -69,7 +69,7 @@ void do_signal(struct pt_regs *regs)
struct ksignal ksig;
int handled_sig = 0;
- if (get_signal(&ksig)) {
+ while (get_signal(&ksig)) {
handled_sig = 1;
/* Whee! Actually deliver the signal. */
handle_signal(&ksig, regs);
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 77d8c5112900..bb3e376d0f33 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -294,6 +294,7 @@ sysenter_past_esp:
pushl $__USER_DS /* pt_regs->ss */
pushl %ebp /* pt_regs->sp (stashed in bp) */
pushfl /* pt_regs->flags (except IF = 0) */
+ ASM_CLAC /* Clear AC after saving FLAGS */
orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
pushl $__USER_CS /* pt_regs->cs */
pushl $0 /* pt_regs->ip = 0 (placeholder) */
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index ff1c6d61f332..3c990eeee40b 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -261,6 +261,7 @@ ENTRY(entry_INT80_compat)
* Interrupts are off on entry.
*/
PARAVIRT_ADJUST_EXCEPTION_FRAME
+ ASM_CLAC /* Do this early to minimize exposure */
SWAPGS
/*
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 46873fbd44e1..d08eacd298c2 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock;
extern int (*pcibios_enable_irq)(struct pci_dev *dev);
extern void (*pcibios_disable_irq)(struct pci_dev *dev);
+extern bool mp_should_keep_irq(struct device *dev);
+
struct pci_raw_ops {
int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *val);
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index f5dcb5204dcd..3fe0eac59462 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -48,20 +48,28 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
switch (n) {
case 1:
+ __uaccess_begin();
__put_user_size(*(u8 *)from, (u8 __user *)to,
1, ret, 1);
+ __uaccess_end();
return ret;
case 2:
+ __uaccess_begin();
__put_user_size(*(u16 *)from, (u16 __user *)to,
2, ret, 2);
+ __uaccess_end();
return ret;
case 4:
+ __uaccess_begin();
__put_user_size(*(u32 *)from, (u32 __user *)to,
4, ret, 4);
+ __uaccess_end();
return ret;
case 8:
+ __uaccess_begin();
__put_user_size(*(u64 *)from, (u64 __user *)to,
8, ret, 8);
+ __uaccess_end();
return ret;
}
}
@@ -103,13 +111,19 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
switch (n) {
case 1:
+ __uaccess_begin();
__get_user_size(*(u8 *)to, from, 1, ret, 1);
+ __uaccess_end();
return ret;
case 2:
+ __uaccess_begin();
__get_user_size(*(u16 *)to, from, 2, ret, 2);
+ __uaccess_end();
return ret;
case 4:
+ __uaccess_begin();
__get_user_size(*(u32 *)to, from, 4, ret, 4);
+ __uaccess_end();
return ret;
}
}
@@ -148,13 +162,19 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
switch (n) {
case 1:
+ __uaccess_begin();
__get_user_size(*(u8 *)to, from, 1, ret, 1);
+ __uaccess_end();
return ret;
case 2:
+ __uaccess_begin();
__get_user_size(*(u16 *)to, from, 2, ret, 2);
+ __uaccess_end();
return ret;
case 4:
+ __uaccess_begin();
__get_user_size(*(u32 *)to, from, 4, ret, 4);
+ __uaccess_end();
return ret;
}
}
@@ -170,13 +190,19 @@ static __always_inline unsigned long __copy_from_user_nocache(void *to,
switch (n) {
case 1:
+ __uaccess_begin();
__get_user_size(*(u8 *)to, from, 1, ret, 1);
+ __uaccess_end();
return ret;
case 2:
+ __uaccess_begin();
__get_user_size(*(u16 *)to, from, 2, ret, 2);
+ __uaccess_end();
return ret;
case 4:
+ __uaccess_begin();
__get_user_size(*(u32 *)to, from, 4, ret, 4);
+ __uaccess_end();
return ret;
}
}
diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h
index 968d57dd54c9..f320ee32d5a1 100644
--- a/arch/x86/include/asm/xen/pci.h
+++ b/arch/x86/include/asm/xen/pci.h
@@ -57,7 +57,7 @@ static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev,
{
if (xen_pci_frontend && xen_pci_frontend->enable_msi)
return xen_pci_frontend->enable_msi(dev, vectors);
- return -ENODEV;
+ return -ENOSYS;
}
static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev)
{
@@ -69,7 +69,7 @@ static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev,
{
if (xen_pci_frontend && xen_pci_frontend->enable_msix)
return xen_pci_frontend->enable_msix(dev, vectors, nvec);
- return -ENODEV;
+ return -ENOSYS;
}
static inline void xen_pci_frontend_disable_msix(struct pci_dev *dev)
{
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index d1daead5fcdd..adb3eaf8fe2a 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -16,6 +16,7 @@
#include <asm/cacheflush.h>
#include <asm/realmode.h>
+#include <linux/ftrace.h>
#include "../../realmode/rm/wakeup.h"
#include "sleep.h"
@@ -107,7 +108,13 @@ int x86_acpi_suspend_lowlevel(void)
saved_magic = 0x123456789abcdef0L;
#endif /* CONFIG_64BIT */
+ /*
+ * Pause/unpause graph tracing around do_suspend_lowlevel as it has
+ * inconsistent call/return info after it jumps to the wakeup vector.
+ */
+ pause_graph_tracing();
do_suspend_lowlevel();
+ unpause_graph_tracing();
return 0;
}
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1505587d06e9..b9b09fec173b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -650,10 +650,10 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
u16 sel;
la = seg_base(ctxt, addr.seg) + addr.ea;
- *linear = la;
*max_size = 0;
switch (mode) {
case X86EMUL_MODE_PROT64:
+ *linear = la;
if (is_noncanonical_address(la))
goto bad;
@@ -662,6 +662,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
goto bad;
break;
default:
+ *linear = la = (u32)la;
usable = ctxt->ops->get_segment(ctxt, &sel, &desc, NULL,
addr.seg);
if (!usable)
@@ -689,7 +690,6 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
if (size > *max_size)
goto bad;
}
- la &= (u32)-1;
break;
}
if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6c9fed957cce..2ce4f05e81d3 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -249,7 +249,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
return ret;
kvm_vcpu_mark_page_dirty(vcpu, table_gfn);
- walker->ptes[level] = pte;
+ walker->ptes[level - 1] = pte;
}
return 0;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e2951b6edbbc..0ff453749a90 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -596,6 +596,8 @@ struct vcpu_vmx {
/* Support for PML */
#define PML_ENTITY_NUM 512
struct page *pml_pg;
+
+ u64 current_tsc_ratio;
};
enum segment_cache_field {
@@ -2127,14 +2129,16 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
- /* Setup TSC multiplier */
- if (cpu_has_vmx_tsc_scaling())
- vmcs_write64(TSC_MULTIPLIER,
- vcpu->arch.tsc_scaling_ratio);
-
vmx->loaded_vmcs->cpu = cpu;
}
+ /* Setup TSC multiplier */
+ if (kvm_has_tsc_control &&
+ vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
+ vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
+ vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+ }
+
vmx_vcpu_pi_load(vcpu, cpu);
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4244c2baf57d..eaf6ee8c28b8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6618,12 +6618,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* KVM_DEBUGREG_WONT_EXIT again.
*/
if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
- int i;
-
WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
kvm_x86_ops->sync_dirty_debug_regs(vcpu);
- for (i = 0; i < KVM_NR_DB_REGS; i++)
- vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+ kvm_update_dr0123(vcpu);
+ kvm_update_dr6(vcpu);
+ kvm_update_dr7(vcpu);
+ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
/*
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 96bd1e2bffaf..72bb52f93c3d 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -71,12 +71,12 @@ unsigned long arch_mmap_rnd(void)
if (mmap_is_ia32())
#ifdef CONFIG_COMPAT
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_compat_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
#else
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
#endif
else
- rnd = (unsigned long)get_random_int() & ((1 << mmap_rnd_bits) - 1);
+ rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
return rnd << PAGE_SHIFT;
}
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index b2fd67da1701..ef05755a1900 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -123,7 +123,7 @@ static int get_reg_offset(struct insn *insn, struct pt_regs *regs,
break;
}
- if (regno > nr_registers) {
+ if (regno >= nr_registers) {
WARN_ONCE(1, "decoded an instruction with an invalid register");
return -EINVAL;
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 2440814b0069..9cf96d82147a 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -419,24 +419,30 @@ pmd_t *lookup_pmd_address(unsigned long address)
phys_addr_t slow_virt_to_phys(void *__virt_addr)
{
unsigned long virt_addr = (unsigned long)__virt_addr;
- unsigned long phys_addr, offset;
+ phys_addr_t phys_addr;
+ unsigned long offset;
enum pg_level level;
pte_t *pte;
pte = lookup_address(virt_addr, &level);
BUG_ON(!pte);
+ /*
+ * pXX_pfn() returns unsigned long, which must be cast to phys_addr_t
+ * before being left-shifted PAGE_SHIFT bits -- this trick is to
+ * make 32-PAE kernel work correctly.
+ */
switch (level) {
case PG_LEVEL_1G:
- phys_addr = pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
+ phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
offset = virt_addr & ~PUD_PAGE_MASK;
break;
case PG_LEVEL_2M:
- phys_addr = pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
+ phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
offset = virt_addr & ~PMD_PAGE_MASK;
break;
default:
- phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+ phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
offset = virt_addr & ~PAGE_MASK;
}
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 2879efc73a96..d34b5118b4e8 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -711,28 +711,22 @@ int pcibios_add_device(struct pci_dev *dev)
return 0;
}
-int pcibios_alloc_irq(struct pci_dev *dev)
+int pcibios_enable_device(struct pci_dev *dev, int mask)
{
- /*
- * If the PCI device was already claimed by core code and has
- * MSI enabled, probing of the pcibios IRQ will overwrite
- * dev->irq. So bail out if MSI is already enabled.
- */
- if (pci_dev_msi_enabled(dev))
- return -EBUSY;
+ int err;
- return pcibios_enable_irq(dev);
-}
+ if ((err = pci_enable_resources(dev, mask)) < 0)
+ return err;
-void pcibios_free_irq(struct pci_dev *dev)
-{
- if (pcibios_disable_irq)
- pcibios_disable_irq(dev);
+ if (!pci_dev_msi_enabled(dev))
+ return pcibios_enable_irq(dev);
+ return 0;
}
-int pcibios_enable_device(struct pci_dev *dev, int mask)
+void pcibios_disable_device (struct pci_dev *dev)
{
- return pci_enable_resources(dev, mask);
+ if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
+ pcibios_disable_irq(dev);
}
int pci_ext_cfg_avail(void)
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index 0d24e7c10145..8b93e634af84 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -215,7 +215,7 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
int polarity;
int ret;
- if (pci_has_managed_irq(dev))
+ if (dev->irq_managed && dev->irq > 0)
return 0;
switch (intel_mid_identify_cpu()) {
@@ -256,13 +256,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
static void intel_mid_pci_irq_disable(struct pci_dev *dev)
{
- if (pci_has_managed_irq(dev)) {
+ if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+ dev->irq > 0) {
mp_unmap_irq(dev->irq);
dev->irq_managed = 0;
- /*
- * Don't reset dev->irq here, otherwise
- * intel_mid_pci_irq_enable() will fail on next call.
- */
}
}
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index 32e70343e6fd..9bd115484745 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1202,7 +1202,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
struct pci_dev *temp_dev;
int irq;
- if (pci_has_managed_irq(dev))
+ if (dev->irq_managed && dev->irq > 0)
return 0;
irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
@@ -1230,7 +1230,8 @@ static int pirq_enable_irq(struct pci_dev *dev)
}
dev = temp_dev;
if (irq >= 0) {
- pci_set_managed_irq(dev, irq);
+ dev->irq_managed = 1;
+ dev->irq = irq;
dev_info(&dev->dev, "PCI->APIC IRQ transform: "
"INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
return 0;
@@ -1256,10 +1257,24 @@ static int pirq_enable_irq(struct pci_dev *dev)
return 0;
}
+bool mp_should_keep_irq(struct device *dev)
+{
+ if (dev->power.is_prepared)
+ return true;
+#ifdef CONFIG_PM
+ if (dev->power.runtime_status == RPM_SUSPENDING)
+ return true;
+#endif
+
+ return false;
+}
+
static void pirq_disable_irq(struct pci_dev *dev)
{
- if (io_apic_assign_pci_irqs && pci_has_managed_irq(dev)) {
+ if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
+ dev->irq_managed && dev->irq) {
mp_unmap_irq(dev->irq);
- pci_reset_managed_irq(dev);
+ dev->irq = 0;
+ dev->irq_managed = 0;
}
}
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index ff31ab464213..beac4dfdade6 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -196,7 +196,10 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
return 0;
error:
- dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+ if (ret == -ENOSYS)
+ dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n");
+ else if (ret)
+ dev_err(&dev->dev, "Xen PCI frontend error: %d!\n", ret);
free:
kfree(v);
return ret;
diff --git a/arch/x86/platform/intel-quark/imr.c b/arch/x86/platform/intel-quark/imr.c
index c61b6c332e97..bfadcd0f4944 100644
--- a/arch/x86/platform/intel-quark/imr.c
+++ b/arch/x86/platform/intel-quark/imr.c
@@ -592,14 +592,14 @@ static void __init imr_fixup_memmap(struct imr_device *idev)
end = (unsigned long)__end_rodata - 1;
/*
- * Setup a locked IMR around the physical extent of the kernel
+ * Setup an unlocked IMR around the physical extent of the kernel
* from the beginning of the .text secton to the end of the
* .rodata section as one physically contiguous block.
*
* We don't round up @size since it is already PAGE_SIZE aligned.
* See vmlinux.lds.S for details.
*/
- ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, true);
+ ret = imr_add_range(base, size, IMR_CPU, IMR_CPU, false);
if (ret < 0) {
pr_err("unable to setup IMR for kernel: %zu KiB (%lx - %lx)\n",
size / 1024, start, end);
diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c
index 8502ad30e61b..5adb6a2fd117 100644
--- a/arch/x86/um/os-Linux/task_size.c
+++ b/arch/x86/um/os-Linux/task_size.c
@@ -109,7 +109,7 @@ unsigned long os_get_top_address(void)
exit(1);
}
- printf("0x%x\n", bottom << UM_KERN_PAGE_SHIFT);
+ printf("0x%lx\n", bottom << UM_KERN_PAGE_SHIFT);
printf("Locating the top of the address space ... ");
fflush(stdout);
@@ -134,7 +134,7 @@ out:
exit(1);
}
top <<= UM_KERN_PAGE_SHIFT;
- printf("0x%x\n", top);
+ printf("0x%lx\n", top);
return top;
}
diff --git a/block/Kconfig b/block/Kconfig
index 161491d0a879..0363cd731320 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -88,6 +88,19 @@ config BLK_DEV_INTEGRITY
T10/SCSI Data Integrity Field or the T13/ATA External Path
Protection. If in doubt, say N.
+config BLK_DEV_DAX
+ bool "Block device DAX support"
+ depends on FS_DAX
+ depends on BROKEN
+ help
+ When DAX support is available (CONFIG_FS_DAX) raw block
+ devices can also support direct userspace access to the
+ storage capacity via MMAP(2) similar to a file on a
+ DAX-enabled filesystem. However, the DAX I/O-path disables
+ some standard I/O-statistics, and the MMAP(2) path has some
+ operational differences due to bypassing the page
+ cache. If in doubt, say N.
+
config BLK_DEV_THROTTLING
bool "Block layer bio throttling support"
depends on BLK_CGROUP=y
diff --git a/block/blk-map.c b/block/blk-map.c
index f565e11f465a..a54f0543b956 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -57,6 +57,49 @@ static int __blk_rq_unmap_user(struct bio *bio)
return ret;
}
+static int __blk_rq_map_user_iov(struct request *rq,
+ struct rq_map_data *map_data, struct iov_iter *iter,
+ gfp_t gfp_mask, bool copy)
+{
+ struct request_queue *q = rq->q;
+ struct bio *bio, *orig_bio;
+ int ret;
+
+ if (copy)
+ bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
+ else
+ bio = bio_map_user_iov(q, iter, gfp_mask);
+
+ if (IS_ERR(bio))
+ return PTR_ERR(bio);
+
+ if (map_data && map_data->null_mapped)
+ bio_set_flag(bio, BIO_NULL_MAPPED);
+
+ iov_iter_advance(iter, bio->bi_iter.bi_size);
+ if (map_data)
+ map_data->offset += bio->bi_iter.bi_size;
+
+ orig_bio = bio;
+ blk_queue_bounce(q, &bio);
+
+ /*
+ * We link the bounce buffer in and could have to traverse it
+ * later so we have to get a ref to prevent it from being freed
+ */
+ bio_get(bio);
+
+ ret = blk_rq_append_bio(q, rq, bio);
+ if (ret) {
+ bio_endio(bio);
+ __blk_rq_unmap_user(orig_bio);
+ bio_put(bio);
+ return ret;
+ }
+
+ return 0;
+}
+
/**
* blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage
* @q: request queue where request should be inserted
@@ -82,10 +125,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
struct rq_map_data *map_data,
const struct iov_iter *iter, gfp_t gfp_mask)
{
- struct bio *bio;
- int unaligned = 0;
- struct iov_iter i;
struct iovec iov, prv = {.iov_base = NULL, .iov_len = 0};
+ bool copy = (q->dma_pad_mask & iter->count) || map_data;
+ struct bio *bio = NULL;
+ struct iov_iter i;
+ int ret;
if (!iter || !iter->count)
return -EINVAL;
@@ -101,42 +145,29 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
*/
if ((uaddr & queue_dma_alignment(q)) ||
iovec_gap_to_prv(q, &prv, &iov))
- unaligned = 1;
+ copy = true;
prv.iov_base = iov.iov_base;
prv.iov_len = iov.iov_len;
}
- if (unaligned || (q->dma_pad_mask & iter->count) || map_data)
- bio = bio_copy_user_iov(q, map_data, iter, gfp_mask);
- else
- bio = bio_map_user_iov(q, iter, gfp_mask);
-
- if (IS_ERR(bio))
- return PTR_ERR(bio);
-
- if (map_data && map_data->null_mapped)
- bio_set_flag(bio, BIO_NULL_MAPPED);
-
- if (bio->bi_iter.bi_size != iter->count) {
- /*
- * Grab an extra reference to this bio, as bio_unmap_user()
- * expects to be able to drop it twice as it happens on the
- * normal IO completion path
- */
- bio_get(bio);
- bio_endio(bio);
- __blk_rq_unmap_user(bio);
- return -EINVAL;
- }
+ i = *iter;
+ do {
+ ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy);
+ if (ret)
+ goto unmap_rq;
+ if (!bio)
+ bio = rq->bio;
+ } while (iov_iter_count(&i));
if (!bio_flagged(bio, BIO_USER_MAPPED))
rq->cmd_flags |= REQ_COPY_USER;
-
- blk_queue_bounce(q, &bio);
- bio_get(bio);
- blk_rq_bio_prep(q, rq, bio);
return 0;
+
+unmap_rq:
+ __blk_rq_unmap_user(bio);
+ rq->bio = NULL;
+ return -EINVAL;
}
EXPORT_SYMBOL(blk_rq_map_user_iov);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 888a7fec81f7..261353166dcf 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -304,7 +304,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
struct bio *nxt)
{
struct bio_vec end_bv = { NULL }, nxt_bv;
- struct bvec_iter iter;
if (!blk_queue_cluster(q))
return 0;
@@ -316,11 +315,8 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
if (!bio_has_data(bio))
return 1;
- bio_for_each_segment(end_bv, bio, iter)
- if (end_bv.bv_len == iter.bi_size)
- break;
-
- nxt_bv = bio_iovec(nxt);
+ bio_get_last_bvec(bio, &end_bv);
+ bio_get_first_bvec(nxt, &nxt_bv);
if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
return 0;
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index ad6d8c6b777e..35947ac87644 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -469,37 +469,16 @@ static void nfit_mem_find_spa_bdw(struct acpi_nfit_desc *acpi_desc,
nfit_mem->bdw = NULL;
}
-static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
+static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
struct nfit_mem *nfit_mem, struct acpi_nfit_system_address *spa)
{
u16 dcr = __to_nfit_memdev(nfit_mem)->region_index;
struct nfit_memdev *nfit_memdev;
struct nfit_flush *nfit_flush;
- struct nfit_dcr *nfit_dcr;
struct nfit_bdw *nfit_bdw;
struct nfit_idt *nfit_idt;
u16 idt_idx, range_index;
- list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
- if (nfit_dcr->dcr->region_index != dcr)
- continue;
- nfit_mem->dcr = nfit_dcr->dcr;
- break;
- }
-
- if (!nfit_mem->dcr) {
- dev_dbg(acpi_desc->dev, "SPA %d missing:%s%s\n",
- spa->range_index, __to_nfit_memdev(nfit_mem)
- ? "" : " MEMDEV", nfit_mem->dcr ? "" : " DCR");
- return -ENODEV;
- }
-
- /*
- * We've found enough to create an nvdimm, optionally
- * find an associated BDW
- */
- list_add(&nfit_mem->list, &acpi_desc->dimms);
-
list_for_each_entry(nfit_bdw, &acpi_desc->bdws, list) {
if (nfit_bdw->bdw->region_index != dcr)
continue;
@@ -508,12 +487,12 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
}
if (!nfit_mem->bdw)
- return 0;
+ return;
nfit_mem_find_spa_bdw(acpi_desc, nfit_mem);
if (!nfit_mem->spa_bdw)
- return 0;
+ return;
range_index = nfit_mem->spa_bdw->range_index;
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
@@ -538,8 +517,6 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc,
}
break;
}
-
- return 0;
}
static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
@@ -548,7 +525,6 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
struct nfit_mem *nfit_mem, *found;
struct nfit_memdev *nfit_memdev;
int type = nfit_spa_type(spa);
- u16 dcr;
switch (type) {
case NFIT_SPA_DCR:
@@ -559,14 +535,18 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
}
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
- int rc;
+ struct nfit_dcr *nfit_dcr;
+ u32 device_handle;
+ u16 dcr;
if (nfit_memdev->memdev->range_index != spa->range_index)
continue;
found = NULL;
dcr = nfit_memdev->memdev->region_index;
+ device_handle = nfit_memdev->memdev->device_handle;
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list)
- if (__to_nfit_memdev(nfit_mem)->region_index == dcr) {
+ if (__to_nfit_memdev(nfit_mem)->device_handle
+ == device_handle) {
found = nfit_mem;
break;
}
@@ -579,6 +559,31 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
if (!nfit_mem)
return -ENOMEM;
INIT_LIST_HEAD(&nfit_mem->list);
+ list_add(&nfit_mem->list, &acpi_desc->dimms);
+ }
+
+ list_for_each_entry(nfit_dcr, &acpi_desc->dcrs, list) {
+ if (nfit_dcr->dcr->region_index != dcr)
+ continue;
+ /*
+ * Record the control region for the dimm. For
+ * the ACPI 6.1 case, where there are separate
+ * control regions for the pmem vs blk
+ * interfaces, be sure to record the extended
+ * blk details.
+ */
+ if (!nfit_mem->dcr)
+ nfit_mem->dcr = nfit_dcr->dcr;
+ else if (nfit_mem->dcr->windows == 0
+ && nfit_dcr->dcr->windows)
+ nfit_mem->dcr = nfit_dcr->dcr;
+ break;
+ }
+
+ if (dcr && !nfit_mem->dcr) {
+ dev_err(acpi_desc->dev, "SPA %d missing DCR %d\n",
+ spa->range_index, dcr);
+ return -ENODEV;
}
if (type == NFIT_SPA_DCR) {
@@ -595,6 +600,7 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
nfit_mem->idt_dcr = nfit_idt->idt;
break;
}
+ nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
} else {
/*
* A single dimm may belong to multiple SPA-PM
@@ -603,13 +609,6 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
*/
nfit_mem->memdev_pmem = nfit_memdev->memdev;
}
-
- if (found)
- continue;
-
- rc = nfit_mem_add(acpi_desc, nfit_mem, spa);
- if (rc)
- return rc;
}
return 0;
@@ -1504,9 +1503,7 @@ static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc,
case 1:
/* ARS unsupported, but we should never get here */
return 0;
- case 2:
- return -EINVAL;
- case 3:
+ case 6:
/* ARS is in progress */
msleep(1000);
break;
@@ -1517,13 +1514,13 @@ static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc,
}
static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
- struct nd_cmd_ars_status *cmd)
+ struct nd_cmd_ars_status *cmd, u32 size)
{
int rc;
while (1) {
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd,
- sizeof(*cmd));
+ size);
if (rc || cmd->status & 0xffff)
return -ENXIO;
@@ -1538,6 +1535,8 @@ static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
case 2:
/* No ARS performed for the current boot */
return 0;
+ case 3:
+ /* TODO: error list overflow support */
default:
return -ENXIO;
}
@@ -1581,6 +1580,7 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
struct nd_cmd_ars_start *ars_start = NULL;
struct nd_cmd_ars_cap *ars_cap = NULL;
u64 start, len, cur, remaining;
+ u32 ars_status_size;
int rc;
ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL);
@@ -1590,14 +1590,21 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
start = ndr_desc->res->start;
len = ndr_desc->res->end - ndr_desc->res->start + 1;
+ /*
+ * If ARS is unimplemented, unsupported, or if the 'Persistent Memory
+ * Scrub' flag in extended status is not set, skip this but continue
+ * initialization
+ */
rc = ars_get_cap(nd_desc, ars_cap, start, len);
+ if (rc == -ENOTTY) {
+ dev_dbg(acpi_desc->dev,
+ "Address Range Scrub is not implemented, won't create an error list\n");
+ rc = 0;
+ goto out;
+ }
if (rc)
goto out;
- /*
- * If ARS is unsupported, or if the 'Persistent Memory Scrub' flag in
- * extended status is not set, skip this but continue initialization
- */
if ((ars_cap->status & 0xffff) ||
!(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) {
dev_warn(acpi_desc->dev,
@@ -1610,14 +1617,14 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
* Check if a full-range ARS has been run. If so, use those results
* without having to start a new ARS.
*/
- ars_status = kzalloc(ars_cap->max_ars_out + sizeof(*ars_status),
- GFP_KERNEL);
+ ars_status_size = ars_cap->max_ars_out;
+ ars_status = kzalloc(ars_status_size, GFP_KERNEL);
if (!ars_status) {
rc = -ENOMEM;
goto out;
}
- rc = ars_get_status(nd_desc, ars_status);
+ rc = ars_get_status(nd_desc, ars_status, ars_status_size);
if (rc)
goto out;
@@ -1647,7 +1654,7 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
if (rc)
goto out;
- rc = ars_get_status(nd_desc, ars_status);
+ rc = ars_get_status(nd_desc, ars_status, ars_status_size);
if (rc)
goto out;
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index d30184c7f3bc..c8e169e46673 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -406,7 +406,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
return 0;
}
- if (pci_has_managed_irq(dev))
+ if (dev->irq_managed && dev->irq > 0)
return 0;
entry = acpi_pci_irq_lookup(dev, pin);
@@ -451,7 +451,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
kfree(entry);
return rc;
}
- pci_set_managed_irq(dev, rc);
+ dev->irq = rc;
+ dev->irq_managed = 1;
if (link)
snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
@@ -474,9 +475,17 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
u8 pin;
pin = dev->pin;
- if (!pin || !pci_has_managed_irq(dev))
+ if (!pin || !dev->irq_managed || dev->irq <= 0)
return;
+ /* Keep IOAPIC pin configuration when suspending */
+ if (dev->dev.power.is_prepared)
+ return;
+#ifdef CONFIG_PM
+ if (dev->dev.power.runtime_status == RPM_SUSPENDING)
+ return;
+#endif
+
entry = acpi_pci_irq_lookup(dev, pin);
if (!entry)
return;
@@ -496,6 +505,6 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
if (gsi >= 0) {
acpi_unregister_gsi(gsi);
- pci_reset_managed_irq(dev);
+ dev->irq_managed = 0;
}
}
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index fa2863567eed..ededa909df2f 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -4,7 +4,6 @@
* Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
* Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2002 Dominik Brodowski <devel@brodo.de>
- * Copyright (c) 2015, The Linux Foundation. All rights reserved.
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
@@ -438,6 +437,7 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, int irq)
* enabled system.
*/
+#define ACPI_MAX_IRQS 256
#define ACPI_MAX_ISA_IRQ 16
#define PIRQ_PENALTY_PCI_AVAILABLE (0)
@@ -447,7 +447,7 @@ static int acpi_pci_link_set(struct acpi_pci_link *link, int irq)
#define PIRQ_PENALTY_ISA_USED (16*16*16*16*16)
#define PIRQ_PENALTY_ISA_ALWAYS (16*16*16*16*16*16)
-static int acpi_irq_isa_penalty[ACPI_MAX_ISA_IRQ] = {
+static int acpi_irq_penalty[ACPI_MAX_IRQS] = {
PIRQ_PENALTY_ISA_ALWAYS, /* IRQ0 timer */
PIRQ_PENALTY_ISA_ALWAYS, /* IRQ1 keyboard */
PIRQ_PENALTY_ISA_ALWAYS, /* IRQ2 cascade */
@@ -464,68 +464,9 @@ static int acpi_irq_isa_penalty[ACPI_MAX_ISA_IRQ] = {
PIRQ_PENALTY_ISA_USED, /* IRQ13 fpe, sometimes */
PIRQ_PENALTY_ISA_USED, /* IRQ14 ide0 */
PIRQ_PENALTY_ISA_USED, /* IRQ15 ide1 */
+ /* >IRQ15 */
};
-struct irq_penalty_info {
- int irq;
- int penalty;
- struct list_head node;
-};
-
-static LIST_HEAD(acpi_irq_penalty_list);
-
-static int acpi_irq_get_penalty(int irq)
-{
- struct irq_penalty_info *irq_info;
-
- if (irq < ACPI_MAX_ISA_IRQ)
- return acpi_irq_isa_penalty[irq];
-
- list_for_each_entry(irq_info, &acpi_irq_penalty_list, node) {
- if (irq_info->irq == irq)
- return irq_info->penalty;
- }
-
- return 0;
-}
-
-static int acpi_irq_set_penalty(int irq, int new_penalty)
-{
- struct irq_penalty_info *irq_info;
-
- /* see if this is a ISA IRQ */
- if (irq < ACPI_MAX_ISA_IRQ) {
- acpi_irq_isa_penalty[irq] = new_penalty;
- return 0;
- }
-
- /* next, try to locate from the dynamic list */
- list_for_each_entry(irq_info, &acpi_irq_penalty_list, node) {
- if (irq_info->irq == irq) {
- irq_info->penalty = new_penalty;
- return 0;
- }
- }
-
- /* nope, let's allocate a slot for this IRQ */
- irq_info = kzalloc(sizeof(*irq_info), GFP_KERNEL);
- if (!irq_info)
- return -ENOMEM;
-
- irq_info->irq = irq;
- irq_info->penalty = new_penalty;
- list_add_tail(&irq_info->node, &acpi_irq_penalty_list);
-
- return 0;
-}
-
-static void acpi_irq_add_penalty(int irq, int penalty)
-{
- int curpen = acpi_irq_get_penalty(irq);
-
- acpi_irq_set_penalty(irq, curpen + penalty);
-}
-
int __init acpi_irq_penalty_init(void)
{
struct acpi_pci_link *link;
@@ -546,16 +487,15 @@ int __init acpi_irq_penalty_init(void)
link->irq.possible_count;
for (i = 0; i < link->irq.possible_count; i++) {
- if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ) {
- int irqpos = link->irq.possible[i];
-
- acpi_irq_add_penalty(irqpos, penalty);
- }
+ if (link->irq.possible[i] < ACPI_MAX_ISA_IRQ)
+ acpi_irq_penalty[link->irq.
+ possible[i]] +=
+ penalty;
}
} else if (link->irq.active) {
- acpi_irq_add_penalty(link->irq.active,
- PIRQ_PENALTY_PCI_POSSIBLE);
+ acpi_irq_penalty[link->irq.active] +=
+ PIRQ_PENALTY_PCI_POSSIBLE;
}
}
@@ -607,12 +547,12 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link)
* the use of IRQs 9, 10, 11, and >15.
*/
for (i = (link->irq.possible_count - 1); i >= 0; i--) {
- if (acpi_irq_get_penalty(irq) >
- acpi_irq_get_penalty(link->irq.possible[i]))
+ if (acpi_irq_penalty[irq] >
+ acpi_irq_penalty[link->irq.possible[i]])
irq = link->irq.possible[i];
}
}
- if (acpi_irq_get_penalty(irq) >= PIRQ_PENALTY_ISA_ALWAYS) {
+ if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) {
printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. "
"Try pci=noacpi or acpi=off\n",
acpi_device_name(link->device),
@@ -628,8 +568,7 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link)
acpi_device_bid(link->device));
return -ENODEV;
} else {
- acpi_irq_add_penalty(link->irq.active, PIRQ_PENALTY_PCI_USING);
-
+ acpi_irq_penalty[link->irq.active] += PIRQ_PENALTY_PCI_USING;
printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n",
acpi_device_name(link->device),
acpi_device_bid(link->device), link->irq.active);
@@ -839,7 +778,7 @@ static void acpi_pci_link_remove(struct acpi_device *device)
}
/*
- * modify penalty from cmdline
+ * modify acpi_irq_penalty[] from cmdline
*/
static int __init acpi_irq_penalty_update(char *str, int used)
{
@@ -857,10 +796,13 @@ static int __init acpi_irq_penalty_update(char *str, int used)
if (irq < 0)
continue;
+ if (irq >= ARRAY_SIZE(acpi_irq_penalty))
+ continue;
+
if (used)
- acpi_irq_add_penalty(irq, PIRQ_PENALTY_ISA_USED);
+ acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
else
- acpi_irq_set_penalty(irq, PIRQ_PENALTY_PCI_AVAILABLE);
+ acpi_irq_penalty[irq] = PIRQ_PENALTY_PCI_AVAILABLE;
if (retval != 2) /* no next number */
break;
@@ -877,15 +819,18 @@ static int __init acpi_irq_penalty_update(char *str, int used)
*/
void acpi_penalize_isa_irq(int irq, int active)
{
- if (irq >= 0)
- acpi_irq_add_penalty(irq, active ?
- PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING);
+ if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
+ if (active)
+ acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_USED;
+ else
+ acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
+ }
}
bool acpi_isa_irq_available(int irq)
{
- return irq >= 0 &&
- (acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS);
+ return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) ||
+ acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS);
}
/*
@@ -895,18 +840,13 @@ bool acpi_isa_irq_available(int irq)
*/
void acpi_penalize_sci_irq(int irq, int trigger, int polarity)
{
- int penalty;
-
- if (irq < 0)
- return;
-
- if (trigger != ACPI_MADT_TRIGGER_LEVEL ||
- polarity != ACPI_MADT_POLARITY_ACTIVE_LOW)
- penalty = PIRQ_PENALTY_ISA_ALWAYS;
- else
- penalty = PIRQ_PENALTY_PCI_USING;
-
- acpi_irq_add_penalty(irq, penalty);
+ if (irq >= 0 && irq < ARRAY_SIZE(acpi_irq_penalty)) {
+ if (trigger != ACPI_MADT_TRIGGER_LEVEL ||
+ polarity != ACPI_MADT_POLARITY_ACTIVE_LOW)
+ acpi_irq_penalty[irq] += PIRQ_PENALTY_ISA_ALWAYS;
+ else
+ acpi_irq_penalty[irq] += PIRQ_PENALTY_PCI_USING;
+ }
}
/*
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index a39e85f9efa9..7d00b7a015ea 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2074,7 +2074,7 @@ static int binder_thread_write(struct binder_proc *proc,
if (get_user(cookie, (binder_uintptr_t __user *)ptr))
return -EFAULT;
- ptr += sizeof(void *);
+ ptr += sizeof(cookie);
list_for_each_entry(w, &proc->delivered_death, entry) {
struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work);
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 546a3692774f..146dc0b8ec61 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -367,15 +367,21 @@ static const struct pci_device_id ahci_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, 0xa107), board_ahci }, /* Sunrise Point-H RAID */
{ PCI_VDEVICE(INTEL, 0xa10f), board_ahci }, /* Sunrise Point-H RAID */
{ PCI_VDEVICE(INTEL, 0x2822), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Lewisburg AHCI*/
{ PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa182), board_ahci }, /* Lewisburg AHCI*/
{ PCI_VDEVICE(INTEL, 0xa184), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa186), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa18e), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa1d2), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa1d6), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa202), board_ahci }, /* Lewisburg AHCI*/
{ PCI_VDEVICE(INTEL, 0xa204), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa206), board_ahci }, /* Lewisburg RAID*/
{ PCI_VDEVICE(INTEL, 0xa20e), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa252), board_ahci }, /* Lewisburg RAID*/
+ { PCI_VDEVICE(INTEL, 0xa256), board_ahci }, /* Lewisburg RAID*/
/* JMicron 360/1/3/5/6, match class to avoid IDE function */
{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
@@ -1325,6 +1331,44 @@ static inline void ahci_gtf_filter_workaround(struct ata_host *host)
{}
#endif
+#ifdef CONFIG_ARM64
+/*
+ * Due to ERRATA#22536, ThunderX needs to handle HOST_IRQ_STAT differently.
+ * Workaround is to make sure all pending IRQs are served before leaving
+ * handler.
+ */
+static irqreturn_t ahci_thunderx_irq_handler(int irq, void *dev_instance)
+{
+ struct ata_host *host = dev_instance;
+ struct ahci_host_priv *hpriv;
+ unsigned int rc = 0;
+ void __iomem *mmio;
+ u32 irq_stat, irq_masked;
+ unsigned int handled = 1;
+
+ VPRINTK("ENTER\n");
+ hpriv = host->private_data;
+ mmio = hpriv->mmio;
+ irq_stat = readl(mmio + HOST_IRQ_STAT);
+ if (!irq_stat)
+ return IRQ_NONE;
+
+ do {
+ irq_masked = irq_stat & hpriv->port_map;
+ spin_lock(&host->lock);
+ rc = ahci_handle_port_intr(host, irq_masked);
+ if (!rc)
+ handled = 0;
+ writel(irq_stat, mmio + HOST_IRQ_STAT);
+ irq_stat = readl(mmio + HOST_IRQ_STAT);
+ spin_unlock(&host->lock);
+ } while (irq_stat);
+ VPRINTK("EXIT\n");
+
+ return IRQ_RETVAL(handled);
+}
+#endif
+
/*
* ahci_init_msix() - optionally enable per-port MSI-X otherwise defer
* to single msi.
@@ -1560,6 +1604,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (ahci_broken_devslp(pdev))
hpriv->flags |= AHCI_HFLAG_NO_DEVSLP;
+#ifdef CONFIG_ARM64
+ if (pdev->vendor == 0x177d && pdev->device == 0xa01c)
+ hpriv->irq_handler = ahci_thunderx_irq_handler;
+#endif
+
/* save initial config */
ahci_pci_save_initial_config(pdev, hpriv);
diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index a44c75d4c284..167ba7e3b92e 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -240,8 +240,7 @@ enum {
error-handling stage) */
AHCI_HFLAG_NO_DEVSLP = (1 << 17), /* no device sleep */
AHCI_HFLAG_NO_FBS = (1 << 18), /* no FBS */
- AHCI_HFLAG_EDGE_IRQ = (1 << 19), /* HOST_IRQ_STAT behaves as
- Edge Triggered */
+
#ifdef CONFIG_PCI_MSI
AHCI_HFLAG_MULTI_MSI = (1 << 20), /* multiple PCI MSIs */
AHCI_HFLAG_MULTI_MSIX = (1 << 21), /* per-port MSI-X */
@@ -361,6 +360,7 @@ struct ahci_host_priv {
* be overridden anytime before the host is activated.
*/
void (*start_engine)(struct ata_port *ap);
+ irqreturn_t (*irq_handler)(int irq, void *dev_instance);
};
#ifdef CONFIG_PCI_MSI
@@ -424,6 +424,7 @@ int ahci_reset_em(struct ata_host *host);
void ahci_print_info(struct ata_host *host, const char *scc_s);
int ahci_host_activate(struct ata_host *host, struct scsi_host_template *sht);
void ahci_error_handler(struct ata_port *ap);
+u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked);
static inline void __iomem *__ahci_port_base(struct ata_host *host,
unsigned int port_no)
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index e2c6d9e0c5ac..8e3f7faf00d3 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -548,6 +548,88 @@ softreset_retry:
return rc;
}
+/**
+ * xgene_ahci_handle_broken_edge_irq - Handle the broken irq.
+ * @ata_host: Host that recieved the irq
+ * @irq_masked: HOST_IRQ_STAT value
+ *
+ * For hardware with broken edge trigger latch
+ * the HOST_IRQ_STAT register misses the edge interrupt
+ * when clearing of HOST_IRQ_STAT register and hardware
+ * reporting the PORT_IRQ_STAT register at the
+ * same clock cycle.
+ * As such, the algorithm below outlines the workaround.
+ *
+ * 1. Read HOST_IRQ_STAT register and save the state.
+ * 2. Clear the HOST_IRQ_STAT register.
+ * 3. Read back the HOST_IRQ_STAT register.
+ * 4. If HOST_IRQ_STAT register equals to zero, then
+ * traverse the rest of port's PORT_IRQ_STAT register
+ * to check if an interrupt is triggered at that point else
+ * go to step 6.
+ * 5. If PORT_IRQ_STAT register of rest ports is not equal to zero
+ * then update the state of HOST_IRQ_STAT saved in step 1.
+ * 6. Handle port interrupts.
+ * 7. Exit
+ */
+static int xgene_ahci_handle_broken_edge_irq(struct ata_host *host,
+ u32 irq_masked)
+{
+ struct ahci_host_priv *hpriv = host->private_data;
+ void __iomem *port_mmio;
+ int i;
+
+ if (!readl(hpriv->mmio + HOST_IRQ_STAT)) {
+ for (i = 0; i < host->n_ports; i++) {
+ if (irq_masked & (1 << i))
+ continue;
+
+ port_mmio = ahci_port_base(host->ports[i]);
+ if (readl(port_mmio + PORT_IRQ_STAT))
+ irq_masked |= (1 << i);
+ }
+ }
+
+ return ahci_handle_port_intr(host, irq_masked);
+}
+
+static irqreturn_t xgene_ahci_irq_intr(int irq, void *dev_instance)
+{
+ struct ata_host *host = dev_instance;
+ struct ahci_host_priv *hpriv;
+ unsigned int rc = 0;
+ void __iomem *mmio;
+ u32 irq_stat, irq_masked;
+
+ VPRINTK("ENTER\n");
+
+ hpriv = host->private_data;
+ mmio = hpriv->mmio;
+
+ /* sigh. 0xffffffff is a valid return from h/w */
+ irq_stat = readl(mmio + HOST_IRQ_STAT);
+ if (!irq_stat)
+ return IRQ_NONE;
+
+ irq_masked = irq_stat & hpriv->port_map;
+
+ spin_lock(&host->lock);
+
+ /*
+ * HOST_IRQ_STAT behaves as edge triggered latch meaning that
+ * it should be cleared before all the port events are cleared.
+ */
+ writel(irq_stat, mmio + HOST_IRQ_STAT);
+
+ rc = xgene_ahci_handle_broken_edge_irq(host, irq_masked);
+
+ spin_unlock(&host->lock);
+
+ VPRINTK("EXIT\n");
+
+ return IRQ_RETVAL(rc);
+}
+
static struct ata_port_operations xgene_ahci_v1_ops = {
.inherits = &ahci_ops,
.host_stop = xgene_ahci_host_stop,
@@ -779,7 +861,8 @@ skip_clk_phy:
hpriv->flags = AHCI_HFLAG_NO_NCQ;
break;
case XGENE_AHCI_V2:
- hpriv->flags |= AHCI_HFLAG_YES_FBS | AHCI_HFLAG_EDGE_IRQ;
+ hpriv->flags |= AHCI_HFLAG_YES_FBS;
+ hpriv->irq_handler = xgene_ahci_irq_intr;
break;
default:
break;
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 402967902cbe..85ea5142a095 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -113,6 +113,7 @@ static ssize_t ahci_store_em_buffer(struct device *dev,
const char *buf, size_t size);
static ssize_t ahci_show_em_supported(struct device *dev,
struct device_attribute *attr, char *buf);
+static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance);
static DEVICE_ATTR(ahci_host_caps, S_IRUGO, ahci_show_host_caps, NULL);
static DEVICE_ATTR(ahci_host_cap2, S_IRUGO, ahci_show_host_cap2, NULL);
@@ -512,6 +513,9 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv)
if (!hpriv->start_engine)
hpriv->start_engine = ahci_start_engine;
+
+ if (!hpriv->irq_handler)
+ hpriv->irq_handler = ahci_single_level_irq_intr;
}
EXPORT_SYMBOL_GPL(ahci_save_initial_config);
@@ -1164,8 +1168,7 @@ static void ahci_port_init(struct device *dev, struct ata_port *ap,
/* mark esata ports */
tmp = readl(port_mmio + PORT_CMD);
- if ((tmp & PORT_CMD_HPCP) ||
- ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS)))
+ if ((tmp & PORT_CMD_ESP) && (hpriv->cap & HOST_CAP_SXS))
ap->pflags |= ATA_PFLAG_EXTERNAL;
}
@@ -1846,7 +1849,7 @@ static irqreturn_t ahci_multi_irqs_intr_hard(int irq, void *dev_instance)
return IRQ_HANDLED;
}
-static u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
+u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
{
unsigned int i, handled = 0;
@@ -1872,43 +1875,7 @@ static u32 ahci_handle_port_intr(struct ata_host *host, u32 irq_masked)
return handled;
}
-
-static irqreturn_t ahci_single_edge_irq_intr(int irq, void *dev_instance)
-{
- struct ata_host *host = dev_instance;
- struct ahci_host_priv *hpriv;
- unsigned int rc = 0;
- void __iomem *mmio;
- u32 irq_stat, irq_masked;
-
- VPRINTK("ENTER\n");
-
- hpriv = host->private_data;
- mmio = hpriv->mmio;
-
- /* sigh. 0xffffffff is a valid return from h/w */
- irq_stat = readl(mmio + HOST_IRQ_STAT);
- if (!irq_stat)
- return IRQ_NONE;
-
- irq_masked = irq_stat & hpriv->port_map;
-
- spin_lock(&host->lock);
-
- /*
- * HOST_IRQ_STAT behaves as edge triggered latch meaning that
- * it should be cleared before all the port events are cleared.
- */
- writel(irq_stat, mmio + HOST_IRQ_STAT);
-
- rc = ahci_handle_port_intr(host, irq_masked);
-
- spin_unlock(&host->lock);
-
- VPRINTK("EXIT\n");
-
- return IRQ_RETVAL(rc);
-}
+EXPORT_SYMBOL_GPL(ahci_handle_port_intr);
static irqreturn_t ahci_single_level_irq_intr(int irq, void *dev_instance)
{
@@ -2535,14 +2502,18 @@ int ahci_host_activate(struct ata_host *host, struct scsi_host_template *sht)
int irq = hpriv->irq;
int rc;
- if (hpriv->flags & (AHCI_HFLAG_MULTI_MSI | AHCI_HFLAG_MULTI_MSIX))
+ if (hpriv->flags & (AHCI_HFLAG_MULTI_MSI | AHCI_HFLAG_MULTI_MSIX)) {
+ if (hpriv->irq_handler)
+ dev_warn(host->dev, "both AHCI_HFLAG_MULTI_MSI flag set \
+ and custom irq handler implemented\n");
+
rc = ahci_host_activate_multi_irqs(host, sht);
- else if (hpriv->flags & AHCI_HFLAG_EDGE_IRQ)
- rc = ata_host_activate(host, irq, ahci_single_edge_irq_intr,
- IRQF_SHARED, sht);
- else
- rc = ata_host_activate(host, irq, ahci_single_level_irq_intr,
+ } else {
+ rc = ata_host_activate(host, irq, hpriv->irq_handler,
IRQF_SHARED, sht);
+ }
+
+
return rc;
}
EXPORT_SYMBOL_GPL(ahci_host_activate);
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 7e959f90c020..e417e1a1d02c 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -675,19 +675,18 @@ static int ata_ioc32(struct ata_port *ap)
int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev,
int cmd, void __user *arg)
{
- int val = -EINVAL, rc = -EINVAL;
+ unsigned long val;
+ int rc = -EINVAL;
unsigned long flags;
switch (cmd) {
- case ATA_IOC_GET_IO32:
+ case HDIO_GET_32BIT:
spin_lock_irqsave(ap->lock, flags);
val = ata_ioc32(ap);
spin_unlock_irqrestore(ap->lock, flags);
- if (copy_to_user(arg, &val, 1))
- return -EFAULT;
- return 0;
+ return put_user(val, (unsigned long __user *)arg);
- case ATA_IOC_SET_IO32:
+ case HDIO_SET_32BIT:
val = (unsigned long) arg;
rc = 0;
spin_lock_irqsave(ap->lock, flags);
diff --git a/drivers/ata/pata_rb532_cf.c b/drivers/ata/pata_rb532_cf.c
index 12fe0f3bb7e9..c8b6a780a290 100644
--- a/drivers/ata/pata_rb532_cf.c
+++ b/drivers/ata/pata_rb532_cf.c
@@ -32,6 +32,8 @@
#include <linux/libata.h>
#include <scsi/scsi_host.h>
+#include <asm/mach-rc32434/rb.h>
+
#define DRV_NAME "pata-rb532-cf"
#define DRV_VERSION "0.1.0"
#define DRV_DESC "PATA driver for RouterBOARD 532 Compact Flash"
@@ -107,6 +109,7 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
int gpio;
struct resource *res;
struct ata_host *ah;
+ struct cf_device *pdata;
struct rb532_cf_info *info;
int ret;
@@ -122,7 +125,13 @@ static int rb532_pata_driver_probe(struct platform_device *pdev)
return -ENOENT;
}
- gpio = irq_to_gpio(irq);
+ pdata = dev_get_platdata(&pdev->dev);
+ if (!pdata) {
+ dev_err(&pdev->dev, "no platform data specified\n");
+ return -EINVAL;
+ }
+
+ gpio = pdata->gpio_pin;
if (gpio < 0) {
dev_err(&pdev->dev, "no GPIO found for irq%d\n", irq);
return -ENOENT;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index d0da5d852d41..b583e5336630 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1819,6 +1819,28 @@ unsigned int get_random_int(void)
EXPORT_SYMBOL(get_random_int);
/*
+ * Same as get_random_int(), but returns unsigned long.
+ */
+unsigned long get_random_long(void)
+{
+ __u32 *hash;
+ unsigned long ret;
+
+ if (arch_get_random_long(&ret))
+ return ret;
+
+ hash = get_cpu_var(get_random_int_hash);
+
+ hash[0] += current->pid + jiffies + random_get_entropy();
+ md5_transform(hash, random_int_secret);
+ ret = *(unsigned long *)hash;
+ put_cpu_var(get_random_int_hash);
+
+ return ret;
+}
+EXPORT_SYMBOL(get_random_long);
+
+/*
* randomize_range() returns a start address such that
*
* [...... <range> .....]
diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c
index 1c300388782b..cc739291a3ce 100644
--- a/drivers/clk/ti/dpll3xxx.c
+++ b/drivers/clk/ti/dpll3xxx.c
@@ -460,7 +460,8 @@ int omap3_noncore_dpll_enable(struct clk_hw *hw)
parent = clk_hw_get_parent(hw);
- if (clk_hw_get_rate(hw) == clk_get_rate(dd->clk_bypass)) {
+ if (clk_hw_get_rate(hw) ==
+ clk_hw_get_rate(__clk_get_hw(dd->clk_bypass))) {
WARN_ON(parent != __clk_get_hw(dd->clk_bypass));
r = _omap3_noncore_dpll_bypass(clk);
} else {
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 659879a56dba..f93511031177 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -296,6 +296,7 @@ endif
config QORIQ_CPUFREQ
tristate "CPU frequency scaling driver for Freescale QorIQ SoCs"
depends on OF && COMMON_CLK && (PPC_E500MC || ARM)
+ depends on !CPU_THERMAL || THERMAL
select CLK_QORIQ
help
This adds the CPUFreq driver support for Freescale QorIQ SoCs
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 0031069b64c9..14b1f9393b05 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -84,10 +84,10 @@ config ARM_KIRKWOOD_CPUFREQ
SoCs.
config ARM_MT8173_CPUFREQ
- bool "Mediatek MT8173 CPUFreq support"
+ tristate "Mediatek MT8173 CPUFreq support"
depends on ARCH_MEDIATEK && REGULATOR
depends on ARM64 || (ARM_CPU_TOPOLOGY && COMPILE_TEST)
- depends on !CPU_THERMAL || THERMAL=y
+ depends on !CPU_THERMAL || THERMAL
select PM_OPP
help
This adds the CPUFreq driver support for Mediatek MT8173 SoC.
diff --git a/drivers/cpufreq/mt8173-cpufreq.c b/drivers/cpufreq/mt8173-cpufreq.c
index 1efba340456d..2058e6d292ce 100644
--- a/drivers/cpufreq/mt8173-cpufreq.c
+++ b/drivers/cpufreq/mt8173-cpufreq.c
@@ -17,6 +17,7 @@
#include <linux/cpu_cooling.h>
#include <linux/cpufreq.h>
#include <linux/cpumask.h>
+#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm_opp.h>
diff --git a/drivers/devfreq/tegra-devfreq.c b/drivers/devfreq/tegra-devfreq.c
index 848b93ee930f..fe9dce0245bf 100644
--- a/drivers/devfreq/tegra-devfreq.c
+++ b/drivers/devfreq/tegra-devfreq.c
@@ -500,6 +500,8 @@ static int tegra_devfreq_target(struct device *dev, unsigned long *freq,
clk_set_min_rate(tegra->emc_clock, rate);
clk_set_rate(tegra->emc_clock, 0);
+ *freq = rate;
+
return 0;
}
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index f2a0310ae771..debca824bed6 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -583,6 +583,8 @@ static void set_updater_desc(struct pxad_desc_sw *sw_desc,
(PXA_DCMD_LENGTH & sizeof(u32));
if (flags & DMA_PREP_INTERRUPT)
updater->dcmd |= PXA_DCMD_ENDIRQEN;
+ if (sw_desc->cyclic)
+ sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr = sw_desc->first;
}
static bool is_desc_completed(struct virt_dma_desc *vd)
@@ -673,6 +675,10 @@ static irqreturn_t pxad_chan_handler(int irq, void *dev_id)
dev_dbg(&chan->vc.chan.dev->device,
"%s(): checking txd %p[%x]: completed=%d\n",
__func__, vd, vd->tx.cookie, is_desc_completed(vd));
+ if (to_pxad_sw_desc(vd)->cyclic) {
+ vchan_cyclic_callback(vd);
+ break;
+ }
if (is_desc_completed(vd)) {
list_del(&vd->node);
vchan_cookie_complete(vd);
@@ -1080,7 +1086,7 @@ pxad_prep_dma_cyclic(struct dma_chan *dchan,
return NULL;
pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
- dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH | period_len);
+ dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH & period_len);
dev_dbg(&chan->vc.chan.dev->device,
"%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n",
__func__, (unsigned long)buf_addr, len, period_len, dir, flags);
diff --git a/drivers/gpio/gpio-rcar.c b/drivers/gpio/gpio-rcar.c
index cf41440aff91..d9ab0cd1d205 100644
--- a/drivers/gpio/gpio-rcar.c
+++ b/drivers/gpio/gpio-rcar.c
@@ -196,6 +196,44 @@ static int gpio_rcar_irq_set_wake(struct irq_data *d, unsigned int on)
return 0;
}
+static void gpio_rcar_irq_bus_lock(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+
+ pm_runtime_get_sync(&p->pdev->dev);
+}
+
+static void gpio_rcar_irq_bus_sync_unlock(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+
+ pm_runtime_put(&p->pdev->dev);
+}
+
+
+static int gpio_rcar_irq_request_resources(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+ int error;
+
+ error = pm_runtime_get_sync(&p->pdev->dev);
+ if (error < 0)
+ return error;
+
+ return 0;
+}
+
+static void gpio_rcar_irq_release_resources(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct gpio_rcar_priv *p = gpiochip_get_data(gc);
+
+ pm_runtime_put(&p->pdev->dev);
+}
+
static irqreturn_t gpio_rcar_irq_handler(int irq, void *dev_id)
{
struct gpio_rcar_priv *p = dev_id;
@@ -450,6 +488,10 @@ static int gpio_rcar_probe(struct platform_device *pdev)
irq_chip->irq_unmask = gpio_rcar_irq_enable;
irq_chip->irq_set_type = gpio_rcar_irq_set_type;
irq_chip->irq_set_wake = gpio_rcar_irq_set_wake;
+ irq_chip->irq_bus_lock = gpio_rcar_irq_bus_lock;
+ irq_chip->irq_bus_sync_unlock = gpio_rcar_irq_bus_sync_unlock;
+ irq_chip->irq_request_resources = gpio_rcar_irq_request_resources;
+ irq_chip->irq_release_resources = gpio_rcar_irq_release_resources;
irq_chip->flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_MASK_ON_SUSPEND;
ret = gpiochip_add_data(gpio_chip, p);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 89c3dd62ba21..119cdc2c43e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -77,7 +77,7 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
} else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
/* Don't try to start link training before we
* have the dpcd */
- if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+ if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
return;
/* set it to OFF so that drm_helper_connector_dpms()
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index acd066d0a805..8297bc319369 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -72,8 +72,8 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
struct drm_crtc *crtc = &amdgpuCrtc->base;
unsigned long flags;
- unsigned i;
- int vpos, hpos, stat, min_udelay;
+ unsigned i, repcnt = 4;
+ int vpos, hpos, stat, min_udelay = 0;
struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
amdgpu_flip_wait_fence(adev, &work->excl);
@@ -96,7 +96,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
* In practice this won't execute very often unless on very fast
* machines because the time window for this to happen is very small.
*/
- for (;;) {
+ while (amdgpuCrtc->enabled && repcnt--) {
/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
* start in hpos, and to the "fudged earlier" vblank start in
* vpos.
@@ -114,10 +114,22 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
/* Sleep at least until estimated real start of hw vblank */
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+ if (min_udelay > vblank->framedur_ns / 2000) {
+ /* Don't wait ridiculously long - something is wrong */
+ repcnt = 0;
+ break;
+ }
usleep_range(min_udelay, 2 * min_udelay);
spin_lock_irqsave(&crtc->dev->event_lock, flags);
};
+ if (!repcnt)
+ DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
+ "framedur %d, linedur %d, stat %d, vpos %d, "
+ "hpos %d\n", work->crtc_id, min_udelay,
+ vblank->framedur_ns / 1000,
+ vblank->linedur_ns / 1000, stat, vpos, hpos);
+
/* do the flip (mmio) */
adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
/* set the flip status */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7380f782cd14..d20c2a8929cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -596,7 +596,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
break;
}
ttm_eu_backoff_reservation(&ticket, &list);
- if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
+ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
+ !amdgpu_vm_debug)
amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
drm_gem_object_unreference_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 7d8d84eaea4a..95a4a25d8df9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -113,6 +113,10 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return snprintf(buf, PAGE_SIZE, "off\n");
+
if (adev->pp_enabled) {
enum amd_dpm_forced_level level;
@@ -140,6 +144,11 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
enum amdgpu_dpm_forced_level level;
int ret = 0;
+ /* Can't force performance level when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
if (strncmp("low", buf, strlen("low")) == 0) {
level = AMDGPU_DPM_FORCED_LEVEL_LOW;
} else if (strncmp("high", buf, strlen("high")) == 0) {
@@ -157,6 +166,7 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
mutex_lock(&adev->pm.mutex);
if (adev->pm.dpm.thermal_active) {
count = -EINVAL;
+ mutex_unlock(&adev->pm.mutex);
goto fail;
}
ret = amdgpu_dpm_force_performance_level(adev, level);
@@ -167,8 +177,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
mutex_unlock(&adev->pm.mutex);
}
fail:
- mutex_unlock(&adev->pm.mutex);
-
return count;
}
@@ -182,8 +190,14 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
+ struct drm_device *ddev = adev->ddev;
int temp;
+ /* Can't get temperature when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
if (!adev->pp_enabled && !adev->pm.funcs->get_temperature)
temp = 0;
else
@@ -634,11 +648,6 @@ force:
/* update display watermarks based on new power state */
amdgpu_display_bandwidth_update(adev);
- /* update displays */
- amdgpu_dpm_display_configuration_changed(adev);
-
- adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
- adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
/* wait for the rings to drain */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -655,6 +664,12 @@ force:
amdgpu_dpm_post_set_power_state(adev);
+ /* update displays */
+ amdgpu_dpm_display_configuration_changed(adev);
+
+ adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
+ adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
+
if (adev->pm.funcs->force_performance_level) {
if (adev->pm.dpm.thermal_active) {
enum amdgpu_dpm_forced_level level = adev->pm.dpm.forced_level;
@@ -847,12 +862,16 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
+ struct drm_device *ddev = adev->ddev;
if (!adev->pm.dpm_enabled) {
seq_printf(m, "dpm not enabled\n");
return 0;
}
- if (adev->pp_enabled) {
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
+ seq_printf(m, "PX asic powered off\n");
+ } else if (adev->pp_enabled) {
amdgpu_dpm_debugfs_print_current_performance_level(adev, m);
} else {
mutex_lock(&adev->pm.mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index b9d0d55f6b47..3cb6d6c413c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -143,8 +143,10 @@ static int amdgpu_pp_late_init(void *handle)
adev->powerplay.pp_handle);
#ifdef CONFIG_DRM_AMD_POWERPLAY
- if (adev->pp_enabled)
+ if (adev->pp_enabled) {
amdgpu_pm_sysfs_init(adev);
+ amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL);
+ }
#endif
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index 9056355309d1..e7ef2261ff4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -2202,8 +2202,7 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
AMD_PG_STATE_GATE);
cz_enable_vce_dpm(adev, false);
- /* TODO: to figure out why vce can't be poweroff. */
- /* cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerOFF); */
+ cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerOFF);
pi->vce_power_gated = true;
} else {
cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerON);
@@ -2226,10 +2225,8 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
}
} else { /*pi->caps_vce_pg*/
cz_update_vce_dpm(adev);
- cz_enable_vce_dpm(adev, true);
+ cz_enable_vce_dpm(adev, !gate);
}
-
- return;
}
const struct amd_ip_funcs cz_dpm_ip_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 7732059ae30f..06602df707f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3628,6 +3628,19 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{
int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, 0xffffffff);
+ amdgpu_ring_write(ring, 4); /* poll interval */
+
if (usepfp) {
/* synce CE with ME to prevent CE fetch CEIB before context switch done */
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8f8ec37ecd88..7086ac17abee 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4809,7 +4809,8 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
- WAIT_REG_MEM_FUNCTION(3))); /* equal */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
amdgpu_ring_write(ring, seq);
@@ -4995,7 +4996,7 @@ static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
case AMDGPU_IRQ_STATE_ENABLE:
cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE, 0);
+ PRIV_REG_INT_ENABLE, 1);
WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
break;
default:
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index aa67244a77ae..589599f66fcc 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -402,8 +402,11 @@ int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_event event_id, void *input,
data.requested_ui_label = power_state_convert(ps);
ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
+ break;
}
- break;
+ case AMD_PP_EVENT_COMPLETE_INIT:
+ ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
index 83be3cf210e0..6b52c78cb404 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
@@ -165,6 +165,7 @@ const struct action_chain resume_action_chain = {
};
static const pem_event_action *complete_init_event[] = {
+ unblock_adjust_power_state_tasks,
adjust_power_state_tasks,
enable_gfx_clock_gating_tasks,
enable_gfx_voltage_island_power_gating_tasks,
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
index 52a3efc97f05..46410e3c7349 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
@@ -31,7 +31,7 @@
static int pem_init(struct pp_eventmgr *eventmgr)
{
int result = 0;
- struct pem_event_data event_data;
+ struct pem_event_data event_data = { {0} };
/* Initialize PowerPlay feature info */
pem_init_feature_info(eventmgr);
@@ -52,7 +52,7 @@ static int pem_init(struct pp_eventmgr *eventmgr)
static void pem_fini(struct pp_eventmgr *eventmgr)
{
- struct pem_event_data event_data;
+ struct pem_event_data event_data = { {0} };
pem_uninit_featureInfo(eventmgr);
pem_unregister_interrupts(eventmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
index ad7700822a1c..ff08ce41bde9 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
@@ -226,7 +226,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
}
} else {
cz_dpm_update_vce_dpm(hwmgr);
- cz_enable_disable_vce_dpm(hwmgr, true);
+ cz_enable_disable_vce_dpm(hwmgr, !bgate);
return 0;
}
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 9759009d1da3..b1480acbb3c3 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -227,7 +227,7 @@ static int ast_get_dram_info(struct drm_device *dev)
} while (ast_read32(ast, 0x10000) != 0x01);
data = ast_read32(ast, 0x10004);
- if (data & 0x400)
+ if (data & 0x40)
ast->dram_bus_width = 16;
else
ast->dram_bus_width = 32;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 0fc38bb7276c..cf39ed3133d6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -825,8 +825,11 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
}
for_each_pipe(dev_priv, pipe) {
- if (!intel_display_power_is_enabled(dev_priv,
- POWER_DOMAIN_PIPE(pipe))) {
+ enum intel_display_power_domain power_domain;
+
+ power_domain = POWER_DOMAIN_PIPE(pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv,
+ power_domain)) {
seq_printf(m, "Pipe %c power disabled\n",
pipe_name(pipe));
continue;
@@ -840,6 +843,8 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
seq_printf(m, "Pipe %c IER:\t%08x\n",
pipe_name(pipe),
I915_READ(GEN8_DE_PIPE_IER(pipe)));
+
+ intel_display_power_put(dev_priv, power_domain);
}
seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
@@ -3985,6 +3990,7 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe];
struct intel_crtc *crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev,
pipe));
+ enum intel_display_power_domain power_domain;
u32 val = 0; /* shut up gcc */
int ret;
@@ -3995,7 +4001,8 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
if (pipe_crc->source && source)
return -EINVAL;
- if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) {
+ power_domain = POWER_DOMAIN_PIPE(pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) {
DRM_DEBUG_KMS("Trying to capture CRC while pipe is off\n");
return -EIO;
}
@@ -4012,7 +4019,7 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
ret = ivb_pipe_crc_ctl_reg(dev, pipe, &source, &val);
if (ret != 0)
- return ret;
+ goto out;
/* none -> real source transition */
if (source) {
@@ -4024,8 +4031,10 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
entries = kcalloc(INTEL_PIPE_CRC_ENTRIES_NR,
sizeof(pipe_crc->entries[0]),
GFP_KERNEL);
- if (!entries)
- return -ENOMEM;
+ if (!entries) {
+ ret = -ENOMEM;
+ goto out;
+ }
/*
* When IPS gets enabled, the pipe CRC changes. Since IPS gets
@@ -4081,7 +4090,12 @@ static int pipe_crc_set_source(struct drm_device *dev, enum pipe pipe,
hsw_enable_ips(crtc);
}
- return 0;
+ ret = 0;
+
+out:
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
}
/*
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e7cd311e9fbb..b0847b915545 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -751,6 +751,7 @@ struct intel_csr {
uint32_t mmio_count;
i915_reg_t mmioaddr[8];
uint32_t mmiodata[8];
+ uint32_t dc_state;
};
#define DEV_INFO_FOR_EACH_FLAG(func, sep) \
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 9c89df1af036..a7b4a524fadd 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -71,22 +71,29 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder,
struct intel_crt *crt = intel_encoder_to_crt(encoder);
enum intel_display_power_domain power_domain;
u32 tmp;
+ bool ret;
power_domain = intel_display_port_power_domain(encoder);
- if (!intel_display_power_is_enabled(dev_priv, power_domain))
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
+ ret = false;
+
tmp = I915_READ(crt->adpa_reg);
if (!(tmp & ADPA_DAC_ENABLE))
- return false;
+ goto out;
if (HAS_PCH_CPT(dev))
*pipe = PORT_TO_PIPE_CPT(tmp);
else
*pipe = PORT_TO_PIPE(tmp);
- return true;
+ ret = true;
+out:
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
}
static unsigned int intel_crt_get_flags(struct intel_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 9bb63a85997a..647d85e77c2f 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -240,6 +240,8 @@ void intel_csr_load_program(struct drm_i915_private *dev_priv)
I915_WRITE(dev_priv->csr.mmioaddr[i],
dev_priv->csr.mmiodata[i]);
}
+
+ dev_priv->csr.dc_state = 0;
}
static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 54a165b9c92d..0f3df2c39f7c 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -1969,13 +1969,16 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
enum transcoder cpu_transcoder;
enum intel_display_power_domain power_domain;
uint32_t tmp;
+ bool ret;
power_domain = intel_display_port_power_domain(intel_encoder);
- if (!intel_display_power_is_enabled(dev_priv, power_domain))
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
- if (!intel_encoder->get_hw_state(intel_encoder, &pipe))
- return false;
+ if (!intel_encoder->get_hw_state(intel_encoder, &pipe)) {
+ ret = false;
+ goto out;
+ }
if (port == PORT_A)
cpu_transcoder = TRANSCODER_EDP;
@@ -1987,23 +1990,33 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector)
switch (tmp & TRANS_DDI_MODE_SELECT_MASK) {
case TRANS_DDI_MODE_SELECT_HDMI:
case TRANS_DDI_MODE_SELECT_DVI:
- return (type == DRM_MODE_CONNECTOR_HDMIA);
+ ret = type == DRM_MODE_CONNECTOR_HDMIA;
+ break;
case TRANS_DDI_MODE_SELECT_DP_SST:
- if (type == DRM_MODE_CONNECTOR_eDP)
- return true;
- return (type == DRM_MODE_CONNECTOR_DisplayPort);
+ ret = type == DRM_MODE_CONNECTOR_eDP ||
+ type == DRM_MODE_CONNECTOR_DisplayPort;
+ break;
+
case TRANS_DDI_MODE_SELECT_DP_MST:
/* if the transcoder is in MST state then
* connector isn't connected */
- return false;
+ ret = false;
+ break;
case TRANS_DDI_MODE_SELECT_FDI:
- return (type == DRM_MODE_CONNECTOR_VGA);
+ ret = type == DRM_MODE_CONNECTOR_VGA;
+ break;
default:
- return false;
+ ret = false;
+ break;
}
+
+out:
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
}
bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
@@ -2015,15 +2028,18 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
enum intel_display_power_domain power_domain;
u32 tmp;
int i;
+ bool ret;
power_domain = intel_display_port_power_domain(encoder);
- if (!intel_display_power_is_enabled(dev_priv, power_domain))
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
+ ret = false;
+
tmp = I915_READ(DDI_BUF_CTL(port));
if (!(tmp & DDI_BUF_CTL_ENABLE))
- return false;
+ goto out;
if (port == PORT_A) {
tmp = I915_READ(TRANS_DDI_FUNC_CTL(TRANSCODER_EDP));
@@ -2041,25 +2057,32 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
break;
}
- return true;
- } else {
- for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) {
- tmp = I915_READ(TRANS_DDI_FUNC_CTL(i));
+ ret = true;
- if ((tmp & TRANS_DDI_PORT_MASK)
- == TRANS_DDI_SELECT_PORT(port)) {
- if ((tmp & TRANS_DDI_MODE_SELECT_MASK) == TRANS_DDI_MODE_SELECT_DP_MST)
- return false;
+ goto out;
+ }
- *pipe = i;
- return true;
- }
+ for (i = TRANSCODER_A; i <= TRANSCODER_C; i++) {
+ tmp = I915_READ(TRANS_DDI_FUNC_CTL(i));
+
+ if ((tmp & TRANS_DDI_PORT_MASK) == TRANS_DDI_SELECT_PORT(port)) {
+ if ((tmp & TRANS_DDI_MODE_SELECT_MASK) ==
+ TRANS_DDI_MODE_SELECT_DP_MST)
+ goto out;
+
+ *pipe = i;
+ ret = true;
+
+ goto out;
}
}
DRM_DEBUG_KMS("No pipe for ddi port %c found\n", port_name(port));
- return false;
+out:
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
}
void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc)
@@ -2508,12 +2531,14 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv,
{
uint32_t val;
- if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+ if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
return false;
val = I915_READ(WRPLL_CTL(pll->id));
hw_state->wrpll = val;
+ intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
return val & WRPLL_PLL_ENABLE;
}
@@ -2523,12 +2548,14 @@ static bool hsw_ddi_spll_get_hw_state(struct drm_i915_private *dev_priv,
{
uint32_t val;
- if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+ if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
return false;
val = I915_READ(SPLL_CTL);
hw_state->spll = val;
+ intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
return val & SPLL_PLL_ENABLE;
}
@@ -2645,16 +2672,19 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
uint32_t val;
unsigned int dpll;
const struct skl_dpll_regs *regs = skl_dpll_regs;
+ bool ret;
- if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+ if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
return false;
+ ret = false;
+
/* DPLL0 is not part of the shared DPLLs, so pll->id is 0 for DPLL1 */
dpll = pll->id + 1;
val = I915_READ(regs[pll->id].ctl);
if (!(val & LCPLL_PLL_ENABLE))
- return false;
+ goto out;
val = I915_READ(DPLL_CTRL1);
hw_state->ctrl1 = (val >> (dpll * 6)) & 0x3f;
@@ -2664,8 +2694,12 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
hw_state->cfgcr1 = I915_READ(regs[pll->id].cfgcr1);
hw_state->cfgcr2 = I915_READ(regs[pll->id].cfgcr2);
}
+ ret = true;
- return true;
+out:
+ intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
+ return ret;
}
static void skl_shared_dplls_init(struct drm_i915_private *dev_priv)
@@ -2932,13 +2966,16 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
{
enum port port = (enum port)pll->id; /* 1:1 port->PLL mapping */
uint32_t val;
+ bool ret;
- if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+ if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
return false;
+ ret = false;
+
val = I915_READ(BXT_PORT_PLL_ENABLE(port));
if (!(val & PORT_PLL_ENABLE))
- return false;
+ goto out;
hw_state->ebb0 = I915_READ(BXT_PORT_PLL_EBB_0(port));
hw_state->ebb0 &= PORT_PLL_P1_MASK | PORT_PLL_P2_MASK;
@@ -2985,7 +3022,12 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv,
I915_READ(BXT_PORT_PCS_DW12_LN23(port)));
hw_state->pcsdw12 &= LANE_STAGGER_MASK | LANESTAGGER_STRAP_OVRD;
- return true;
+ ret = true;
+
+out:
+ intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
+ return ret;
}
static void bxt_shared_dplls_init(struct drm_i915_private *dev_priv)
@@ -3120,11 +3162,15 @@ bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
{
u32 temp;
- if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
+ if (intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_AUDIO)) {
temp = I915_READ(HSW_AUD_PIN_ELD_CP_VLD);
+
+ intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO);
+
if (temp & AUDIO_OUTPUT_ENABLE(intel_crtc->pipe))
return true;
}
+
return false;
}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5feb65725c04..46947fffd599 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1351,18 +1351,21 @@ void assert_pipe(struct drm_i915_private *dev_priv,
bool cur_state;
enum transcoder cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv,
pipe);
+ enum intel_display_power_domain power_domain;
/* if we need the pipe quirk it must be always on */
if ((pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE) ||
(pipe == PIPE_B && dev_priv->quirks & QUIRK_PIPEB_FORCE))
state = true;
- if (!intel_display_power_is_enabled(dev_priv,
- POWER_DOMAIN_TRANSCODER(cpu_transcoder))) {
- cur_state = false;
- } else {
+ power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder);
+ if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
u32 val = I915_READ(PIPECONF(cpu_transcoder));
cur_state = !!(val & PIPECONF_ENABLE);
+
+ intel_display_power_put(dev_priv, power_domain);
+ } else {
+ cur_state = false;
}
I915_STATE_WARN(cur_state != state,
@@ -8171,18 +8174,22 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
{
struct drm_device *dev = crtc->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+ enum intel_display_power_domain power_domain;
uint32_t tmp;
+ bool ret;
- if (!intel_display_power_is_enabled(dev_priv,
- POWER_DOMAIN_PIPE(crtc->pipe)))
+ power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
pipe_config->shared_dpll = DPLL_ID_PRIVATE;
+ ret = false;
+
tmp = I915_READ(PIPECONF(crtc->pipe));
if (!(tmp & PIPECONF_ENABLE))
- return false;
+ goto out;
if (IS_G4X(dev) || IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) {
switch (tmp & PIPECONF_BPC_MASK) {
@@ -8262,7 +8269,12 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
pipe_config->base.adjusted_mode.crtc_clock =
pipe_config->port_clock / pipe_config->pixel_multiplier;
- return true;
+ ret = true;
+
+out:
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
}
static void ironlake_init_pch_refclk(struct drm_device *dev)
@@ -9366,18 +9378,21 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
{
struct drm_device *dev = crtc->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
+ enum intel_display_power_domain power_domain;
uint32_t tmp;
+ bool ret;
- if (!intel_display_power_is_enabled(dev_priv,
- POWER_DOMAIN_PIPE(crtc->pipe)))
+ power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
pipe_config->shared_dpll = DPLL_ID_PRIVATE;
+ ret = false;
tmp = I915_READ(PIPECONF(crtc->pipe));
if (!(tmp & PIPECONF_ENABLE))
- return false;
+ goto out;
switch (tmp & PIPECONF_BPC_MASK) {
case PIPECONF_6BPC:
@@ -9440,7 +9455,12 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc,
ironlake_get_pfit_config(crtc, pipe_config);
- return true;
+ ret = true;
+
+out:
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
}
static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
@@ -9950,12 +9970,17 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
{
struct drm_device *dev = crtc->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- enum intel_display_power_domain pfit_domain;
+ enum intel_display_power_domain power_domain;
+ unsigned long power_domain_mask;
uint32_t tmp;
+ bool ret;
- if (!intel_display_power_is_enabled(dev_priv,
- POWER_DOMAIN_PIPE(crtc->pipe)))
+ power_domain = POWER_DOMAIN_PIPE(crtc->pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
+ power_domain_mask = BIT(power_domain);
+
+ ret = false;
pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
pipe_config->shared_dpll = DPLL_ID_PRIVATE;
@@ -9982,13 +10007,14 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
pipe_config->cpu_transcoder = TRANSCODER_EDP;
}
- if (!intel_display_power_is_enabled(dev_priv,
- POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder)))
- return false;
+ power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
+ goto out;
+ power_domain_mask |= BIT(power_domain);
tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder));
if (!(tmp & PIPECONF_ENABLE))
- return false;
+ goto out;
haswell_get_ddi_port_state(crtc, pipe_config);
@@ -9998,14 +10024,14 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
skl_init_scalers(dev, crtc, pipe_config);
}
- pfit_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
-
if (INTEL_INFO(dev)->gen >= 9) {
pipe_config->scaler_state.scaler_id = -1;
pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX);
}
- if (intel_display_power_is_enabled(dev_priv, pfit_domain)) {
+ power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe);
+ if (intel_display_power_get_if_enabled(dev_priv, power_domain)) {
+ power_domain_mask |= BIT(power_domain);
if (INTEL_INFO(dev)->gen >= 9)
skylake_get_pfit_config(crtc, pipe_config);
else
@@ -10023,7 +10049,13 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc,
pipe_config->pixel_multiplier = 1;
}
- return true;
+ ret = true;
+
+out:
+ for_each_power_domain(power_domain, power_domain_mask)
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
}
static void i845_update_cursor(struct drm_crtc *crtc, u32 base, bool on)
@@ -13630,7 +13662,7 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv,
{
uint32_t val;
- if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PLLS))
+ if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS))
return false;
val = I915_READ(PCH_DPLL(pll->id));
@@ -13638,6 +13670,8 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv,
hw_state->fp0 = I915_READ(PCH_FP0(pll->id));
hw_state->fp1 = I915_READ(PCH_FP1(pll->id));
+ intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS);
+
return val & DPLL_VCO_ENABLE;
}
@@ -15568,10 +15602,12 @@ void i915_redisable_vga(struct drm_device *dev)
* level, just check if the power well is enabled instead of trying to
* follow the "don't touch the power well if we don't need it" policy
* the rest of the driver uses. */
- if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_VGA))
+ if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_VGA))
return;
i915_redisable_vga_power_on(dev);
+
+ intel_display_power_put(dev_priv, POWER_DOMAIN_VGA);
}
static bool primary_get_hw_state(struct intel_plane *plane)
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 1bbd67b046da..1d8de43bed56 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -2362,15 +2362,18 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
struct drm_i915_private *dev_priv = dev->dev_private;
enum intel_display_power_domain power_domain;
u32 tmp;
+ bool ret;
power_domain = intel_display_port_power_domain(encoder);
- if (!intel_display_power_is_enabled(dev_priv, power_domain))
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
+ ret = false;
+
tmp = I915_READ(intel_dp->output_reg);
if (!(tmp & DP_PORT_EN))
- return false;
+ goto out;
if (IS_GEN7(dev) && port == PORT_A) {
*pipe = PORT_TO_PIPE_CPT(tmp);
@@ -2381,7 +2384,9 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
u32 trans_dp = I915_READ(TRANS_DP_CTL(p));
if (TRANS_DP_PIPE_TO_PORT(trans_dp) == port) {
*pipe = p;
- return true;
+ ret = true;
+
+ goto out;
}
}
@@ -2393,7 +2398,12 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder,
*pipe = PORT_TO_PIPE(tmp);
}
- return true;
+ ret = true;
+
+out:
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
}
static void intel_dp_get_config(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index ea5415851c6e..df7f3cb66056 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1428,6 +1428,8 @@ bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv,
enum intel_display_power_domain domain);
void intel_display_power_get(struct drm_i915_private *dev_priv,
enum intel_display_power_domain domain);
+bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+ enum intel_display_power_domain domain);
void intel_display_power_put(struct drm_i915_private *dev_priv,
enum intel_display_power_domain domain);
@@ -1514,6 +1516,7 @@ enable_rpm_wakeref_asserts(struct drm_i915_private *dev_priv)
enable_rpm_wakeref_asserts(dev_priv)
void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv);
void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv);
void intel_runtime_pm_put(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 44742fa2f616..0193c62a53ef 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -664,13 +664,16 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
struct drm_device *dev = encoder->base.dev;
enum intel_display_power_domain power_domain;
enum port port;
+ bool ret;
DRM_DEBUG_KMS("\n");
power_domain = intel_display_port_power_domain(encoder);
- if (!intel_display_power_is_enabled(dev_priv, power_domain))
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
+ ret = false;
+
/* XXX: this only works for one DSI output */
for_each_dsi_port(port, intel_dsi->ports) {
i915_reg_t ctrl_reg = IS_BROXTON(dev) ?
@@ -691,12 +694,16 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder,
if (dpi_enabled || (func & CMD_MODE_DATA_WIDTH_MASK)) {
if (I915_READ(MIPI_DEVICE_READY(port)) & DEVICE_READY) {
*pipe = port == PORT_A ? PIPE_A : PIPE_B;
- return true;
+ ret = true;
+
+ goto out;
}
}
}
+out:
+ intel_display_power_put(dev_priv, power_domain);
- return false;
+ return ret;
}
static void intel_dsi_get_config(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 4a77639a489d..cb5d1b15755c 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -880,15 +880,18 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
enum intel_display_power_domain power_domain;
u32 tmp;
+ bool ret;
power_domain = intel_display_port_power_domain(encoder);
- if (!intel_display_power_is_enabled(dev_priv, power_domain))
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
+ ret = false;
+
tmp = I915_READ(intel_hdmi->hdmi_reg);
if (!(tmp & SDVO_ENABLE))
- return false;
+ goto out;
if (HAS_PCH_CPT(dev))
*pipe = PORT_TO_PIPE_CPT(tmp);
@@ -897,7 +900,12 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
else
*pipe = PORT_TO_PIPE(tmp);
- return true;
+ ret = true;
+
+out:
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
}
static void intel_hdmi_get_config(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 0da0240caf81..bc04d8d29acb 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -75,22 +75,30 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder,
struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
enum intel_display_power_domain power_domain;
u32 tmp;
+ bool ret;
power_domain = intel_display_port_power_domain(encoder);
- if (!intel_display_power_is_enabled(dev_priv, power_domain))
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
return false;
+ ret = false;
+
tmp = I915_READ(lvds_encoder->reg);
if (!(tmp & LVDS_PORT_EN))
- return false;
+ goto out;
if (HAS_PCH_CPT(dev))
*pipe = PORT_TO_PIPE_CPT(tmp);
else
*pipe = PORT_TO_PIPE(tmp);
- return true;
+ ret = true;
+
+out:
+ intel_display_power_put(dev_priv, power_domain);
+
+ return ret;
}
static void intel_lvds_get_config(struct intel_encoder *encoder,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index a234687792f0..b28c29f20e75 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2829,7 +2829,10 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
memset(ddb, 0, sizeof(*ddb));
for_each_pipe(dev_priv, pipe) {
- if (!intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe)))
+ enum intel_display_power_domain power_domain;
+
+ power_domain = POWER_DOMAIN_PIPE(pipe);
+ if (!intel_display_power_get_if_enabled(dev_priv, power_domain))
continue;
for_each_plane(dev_priv, pipe, plane) {
@@ -2841,6 +2844,8 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
val = I915_READ(CUR_BUF_CFG(pipe));
skl_ddb_entry_init_from_hw(&ddb->plane[pipe][PLANE_CURSOR],
val);
+
+ intel_display_power_put(dev_priv, power_domain);
}
}
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index ddbdbffe829a..4f43d9b32e66 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -470,6 +470,43 @@ static void gen9_set_dc_state_debugmask_memory_up(
}
}
+static void gen9_write_dc_state(struct drm_i915_private *dev_priv,
+ u32 state)
+{
+ int rewrites = 0;
+ int rereads = 0;
+ u32 v;
+
+ I915_WRITE(DC_STATE_EN, state);
+
+ /* It has been observed that disabling the dc6 state sometimes
+ * doesn't stick and dmc keeps returning old value. Make sure
+ * the write really sticks enough times and also force rewrite until
+ * we are confident that state is exactly what we want.
+ */
+ do {
+ v = I915_READ(DC_STATE_EN);
+
+ if (v != state) {
+ I915_WRITE(DC_STATE_EN, state);
+ rewrites++;
+ rereads = 0;
+ } else if (rereads++ > 5) {
+ break;
+ }
+
+ } while (rewrites < 100);
+
+ if (v != state)
+ DRM_ERROR("Writing dc state to 0x%x failed, now 0x%x\n",
+ state, v);
+
+ /* Most of the times we need one retry, avoid spam */
+ if (rewrites > 1)
+ DRM_DEBUG_KMS("Rewrote dc state to 0x%x %d times\n",
+ state, rewrites);
+}
+
static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state)
{
uint32_t val;
@@ -494,10 +531,18 @@ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state)
val = I915_READ(DC_STATE_EN);
DRM_DEBUG_KMS("Setting DC state from %02x to %02x\n",
val & mask, state);
+
+ /* Check if DMC is ignoring our DC state requests */
+ if ((val & mask) != dev_priv->csr.dc_state)
+ DRM_ERROR("DC state mismatch (0x%x -> 0x%x)\n",
+ dev_priv->csr.dc_state, val & mask);
+
val &= ~mask;
val |= state;
- I915_WRITE(DC_STATE_EN, val);
- POSTING_READ(DC_STATE_EN);
+
+ gen9_write_dc_state(dev_priv, val);
+
+ dev_priv->csr.dc_state = val & mask;
}
void bxt_enable_dc9(struct drm_i915_private *dev_priv)
@@ -1442,6 +1487,22 @@ static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv,
chv_set_pipe_power_well(dev_priv, power_well, false);
}
+static void
+__intel_display_power_get_domain(struct drm_i915_private *dev_priv,
+ enum intel_display_power_domain domain)
+{
+ struct i915_power_domains *power_domains = &dev_priv->power_domains;
+ struct i915_power_well *power_well;
+ int i;
+
+ for_each_power_well(i, power_well, BIT(domain), power_domains) {
+ if (!power_well->count++)
+ intel_power_well_enable(dev_priv, power_well);
+ }
+
+ power_domains->domain_use_count[domain]++;
+}
+
/**
* intel_display_power_get - grab a power domain reference
* @dev_priv: i915 device instance
@@ -1457,24 +1518,53 @@ static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv,
void intel_display_power_get(struct drm_i915_private *dev_priv,
enum intel_display_power_domain domain)
{
- struct i915_power_domains *power_domains;
- struct i915_power_well *power_well;
- int i;
+ struct i915_power_domains *power_domains = &dev_priv->power_domains;
intel_runtime_pm_get(dev_priv);
- power_domains = &dev_priv->power_domains;
+ mutex_lock(&power_domains->lock);
+
+ __intel_display_power_get_domain(dev_priv, domain);
+
+ mutex_unlock(&power_domains->lock);
+}
+
+/**
+ * intel_display_power_get_if_enabled - grab a reference for an enabled display power domain
+ * @dev_priv: i915 device instance
+ * @domain: power domain to reference
+ *
+ * This function grabs a power domain reference for @domain and ensures that the
+ * power domain and all its parents are powered up. Therefore users should only
+ * grab a reference to the innermost power domain they need.
+ *
+ * Any power domain reference obtained by this function must have a symmetric
+ * call to intel_display_power_put() to release the reference again.
+ */
+bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv,
+ enum intel_display_power_domain domain)
+{
+ struct i915_power_domains *power_domains = &dev_priv->power_domains;
+ bool is_enabled;
+
+ if (!intel_runtime_pm_get_if_in_use(dev_priv))
+ return false;
mutex_lock(&power_domains->lock);
- for_each_power_well(i, power_well, BIT(domain), power_domains) {
- if (!power_well->count++)
- intel_power_well_enable(dev_priv, power_well);
+ if (__intel_display_power_is_enabled(dev_priv, domain)) {
+ __intel_display_power_get_domain(dev_priv, domain);
+ is_enabled = true;
+ } else {
+ is_enabled = false;
}
- power_domains->domain_use_count[domain]++;
-
mutex_unlock(&power_domains->lock);
+
+ if (!is_enabled)
+ intel_runtime_pm_put(dev_priv);
+
+ return is_enabled;
}
/**
@@ -2213,15 +2303,15 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume)
*/
void intel_power_domains_suspend(struct drm_i915_private *dev_priv)
{
- if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
- skl_display_core_uninit(dev_priv);
-
/*
* Even if power well support was disabled we still want to disable
* power wells while we are system suspended.
*/
if (!i915.disable_power_well)
intel_display_power_put(dev_priv, POWER_DOMAIN_INIT);
+
+ if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+ skl_display_core_uninit(dev_priv);
}
/**
@@ -2246,6 +2336,41 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
}
/**
+ * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
+ * @dev_priv: i915 device instance
+ *
+ * This function grabs a device-level runtime pm reference if the device is
+ * already in use and ensures that it is powered up.
+ *
+ * Any runtime pm reference obtained by this function must have a symmetric
+ * call to intel_runtime_pm_put() to release the reference again.
+ */
+bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
+{
+ struct drm_device *dev = dev_priv->dev;
+ struct device *device = &dev->pdev->dev;
+
+ if (IS_ENABLED(CONFIG_PM)) {
+ int ret = pm_runtime_get_if_in_use(device);
+
+ /*
+ * In cases runtime PM is disabled by the RPM core and we get
+ * an -EINVAL return value we are not supposed to call this
+ * function, since the power state is undefined. This applies
+ * atm to the late/early system suspend/resume handlers.
+ */
+ WARN_ON_ONCE(ret < 0);
+ if (ret <= 0)
+ return false;
+ }
+
+ atomic_inc(&dev_priv->pm.wakeref_count);
+ assert_rpm_wakelock_held(dev_priv);
+
+ return true;
+}
+
+/**
* intel_runtime_pm_get_noresume - grab a runtime pm reference
* @dev_priv: i915 device instance
*
diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 8a70cec59bcd..2dfe58af12e4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c
@@ -24,7 +24,7 @@
static int nouveau_platform_probe(struct platform_device *pdev)
{
const struct nvkm_device_tegra_func *func;
- struct nvkm_device *device;
+ struct nvkm_device *device = NULL;
struct drm_device *drm;
int ret;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 7f8a42721eb2..e7e581d6a8ff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -252,32 +252,40 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL)))
return -ENOMEM;
- *pdevice = &tdev->device;
+
tdev->func = func;
tdev->pdev = pdev;
tdev->irq = -1;
tdev->vdd = devm_regulator_get(&pdev->dev, "vdd");
- if (IS_ERR(tdev->vdd))
- return PTR_ERR(tdev->vdd);
+ if (IS_ERR(tdev->vdd)) {
+ ret = PTR_ERR(tdev->vdd);
+ goto free;
+ }
tdev->rst = devm_reset_control_get(&pdev->dev, "gpu");
- if (IS_ERR(tdev->rst))
- return PTR_ERR(tdev->rst);
+ if (IS_ERR(tdev->rst)) {
+ ret = PTR_ERR(tdev->rst);
+ goto free;
+ }
tdev->clk = devm_clk_get(&pdev->dev, "gpu");
- if (IS_ERR(tdev->clk))
- return PTR_ERR(tdev->clk);
+ if (IS_ERR(tdev->clk)) {
+ ret = PTR_ERR(tdev->clk);
+ goto free;
+ }
tdev->clk_pwr = devm_clk_get(&pdev->dev, "pwr");
- if (IS_ERR(tdev->clk_pwr))
- return PTR_ERR(tdev->clk_pwr);
+ if (IS_ERR(tdev->clk_pwr)) {
+ ret = PTR_ERR(tdev->clk_pwr);
+ goto free;
+ }
nvkm_device_tegra_probe_iommu(tdev);
ret = nvkm_device_tegra_power_up(tdev);
if (ret)
- return ret;
+ goto remove;
tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value;
ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev,
@@ -285,9 +293,19 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
cfg, dbg, detect, mmio, subdev_mask,
&tdev->device);
if (ret)
- return ret;
+ goto powerdown;
+
+ *pdevice = &tdev->device;
return 0;
+
+powerdown:
+ nvkm_device_tegra_power_down(tdev);
+remove:
+ nvkm_device_tegra_remove_iommu(tdev);
+free:
+ kfree(tdev);
+ return ret;
}
#else
int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
index 74e2f7c6c07e..9688970eca47 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
@@ -328,6 +328,7 @@ nvkm_dp_train(struct work_struct *w)
.outp = outp,
}, *dp = &_dp;
u32 datarate = 0;
+ u8 pwr;
int ret;
if (!outp->base.info.location && disp->func->sor.magic)
@@ -355,6 +356,15 @@ nvkm_dp_train(struct work_struct *w)
/* disable link interrupt handling during link training */
nvkm_notify_put(&outp->irq);
+ /* ensure sink is not in a low-power state */
+ if (!nvkm_rdaux(outp->aux, DPCD_SC00, &pwr, 1)) {
+ if ((pwr & DPCD_SC00_SET_POWER) != DPCD_SC00_SET_POWER_D0) {
+ pwr &= ~DPCD_SC00_SET_POWER;
+ pwr |= DPCD_SC00_SET_POWER_D0;
+ nvkm_wraux(outp->aux, DPCD_SC00, &pwr, 1);
+ }
+ }
+
/* enable down-spreading and execute pre-train script from vbios */
dp_link_train_init(dp, outp->dpcd[3] & 0x01);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
index 9596290329c7..6e10c5e0ef11 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
@@ -71,5 +71,11 @@
#define DPCD_LS0C_LANE1_POST_CURSOR2 0x0c
#define DPCD_LS0C_LANE0_POST_CURSOR2 0x03
+/* DPCD Sink Control */
+#define DPCD_SC00 0x00600
+#define DPCD_SC00_SET_POWER 0x03
+#define DPCD_SC00_SET_POWER_D0 0x01
+#define DPCD_SC00_SET_POWER_D3 0x03
+
void nvkm_dp_train(struct work_struct *);
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 902b59cebac5..4197ca1bb1e4 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1744,7 +1744,6 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
}
drm_kms_helper_poll_enable(dev);
- drm_helper_hpd_irq_event(dev);
/* set the power state here in case we are a PX system or headless */
if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 298ea1c453c3..2b9ba03a7c1a 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -403,7 +403,8 @@ static void radeon_flip_work_func(struct work_struct *__work)
struct drm_crtc *crtc = &radeon_crtc->base;
unsigned long flags;
int r;
- int vpos, hpos, stat, min_udelay;
+ int vpos, hpos, stat, min_udelay = 0;
+ unsigned repcnt = 4;
struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
down_read(&rdev->exclusive_lock);
@@ -454,7 +455,7 @@ static void radeon_flip_work_func(struct work_struct *__work)
* In practice this won't execute very often unless on very fast
* machines because the time window for this to happen is very small.
*/
- for (;;) {
+ while (radeon_crtc->enabled && repcnt--) {
/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
* start in hpos, and to the "fudged earlier" vblank start in
* vpos.
@@ -472,10 +473,22 @@ static void radeon_flip_work_func(struct work_struct *__work)
/* Sleep at least until estimated real start of hw vblank */
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+ if (min_udelay > vblank->framedur_ns / 2000) {
+ /* Don't wait ridiculously long - something is wrong */
+ repcnt = 0;
+ break;
+ }
usleep_range(min_udelay, 2 * min_udelay);
spin_lock_irqsave(&crtc->dev->event_lock, flags);
};
+ if (!repcnt)
+ DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
+ "framedur %d, linedur %d, stat %d, vpos %d, "
+ "hpos %d\n", work->crtc_id, min_udelay,
+ vblank->framedur_ns / 1000,
+ vblank->linedur_ns / 1000, stat, vpos, hpos);
+
/* do the flip (mmio) */
radeon_page_flip(rdev, radeon_crtc->crtc_id, work->base);
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 248c5a9fb0b6..0f14d897baf9 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -1079,12 +1079,6 @@ force:
/* update display watermarks based on new power state */
radeon_bandwidth_update(rdev);
- /* update displays */
- radeon_dpm_display_configuration_changed(rdev);
-
- rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
- rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
- rdev->pm.dpm.single_display = single_display;
/* wait for the rings to drain */
for (i = 0; i < RADEON_NUM_RINGS; i++) {
@@ -1101,6 +1095,13 @@ force:
radeon_dpm_post_set_power_state(rdev);
+ /* update displays */
+ radeon_dpm_display_configuration_changed(rdev);
+
+ rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
+ rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
+ rdev->pm.dpm.single_display = single_display;
+
if (rdev->asic->dpm.force_performance_level) {
if (rdev->pm.dpm.thermal_active) {
enum radeon_dpm_forced_level level = rdev->pm.dpm.forced_level;
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index da462afcb225..dd2dbb9746ce 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -18,6 +18,7 @@
#include <linux/host1x.h>
#include <linux/of.h>
#include <linux/slab.h>
+#include <linux/of_device.h>
#include "bus.h"
#include "dev.h"
@@ -394,6 +395,7 @@ static int host1x_device_add(struct host1x *host1x,
device->dev.coherent_dma_mask = host1x->dev->coherent_dma_mask;
device->dev.dma_mask = &device->dev.coherent_dma_mask;
dev_set_name(&device->dev, "%s", driver->driver.name);
+ of_dma_configure(&device->dev, host1x->dev->of_node);
device->dev.release = host1x_device_release;
device->dev.bus = &host1x_bus_type;
device->dev.parent = host1x->dev;
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 314bf3718cc7..ff348690df94 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -23,6 +23,7 @@
#include <linux/of_device.h>
#include <linux/clk.h>
#include <linux/io.h>
+#include <linux/dma-mapping.h>
#define CREATE_TRACE_POINTS
#include <trace/events/host1x.h>
@@ -68,6 +69,7 @@ static const struct host1x_info host1x01_info = {
.nb_bases = 8,
.init = host1x01_init,
.sync_offset = 0x3000,
+ .dma_mask = DMA_BIT_MASK(32),
};
static const struct host1x_info host1x02_info = {
@@ -77,6 +79,7 @@ static const struct host1x_info host1x02_info = {
.nb_bases = 12,
.init = host1x02_init,
.sync_offset = 0x3000,
+ .dma_mask = DMA_BIT_MASK(32),
};
static const struct host1x_info host1x04_info = {
@@ -86,6 +89,7 @@ static const struct host1x_info host1x04_info = {
.nb_bases = 64,
.init = host1x04_init,
.sync_offset = 0x2100,
+ .dma_mask = DMA_BIT_MASK(34),
};
static const struct host1x_info host1x05_info = {
@@ -95,6 +99,7 @@ static const struct host1x_info host1x05_info = {
.nb_bases = 64,
.init = host1x05_init,
.sync_offset = 0x2100,
+ .dma_mask = DMA_BIT_MASK(34),
};
static struct of_device_id host1x_of_match[] = {
@@ -148,6 +153,8 @@ static int host1x_probe(struct platform_device *pdev)
if (IS_ERR(host->regs))
return PTR_ERR(host->regs);
+ dma_set_mask_and_coherent(host->dev, host->info->dma_mask);
+
if (host->info->init) {
err = host->info->init(host);
if (err)
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 0b6e8e9629c5..dace124994bb 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -96,6 +96,7 @@ struct host1x_info {
int nb_mlocks; /* host1x: number of mlocks */
int (*init)(struct host1x *); /* initialize per SoC ops */
int sync_offset;
+ u64 dma_mask; /* mask of addressable memory */
};
struct host1x {
diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index 3711df1d4526..4a45408dd820 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -586,8 +586,7 @@ static int brcmstb_i2c_probe(struct platform_device *pdev)
if (!dev)
return -ENOMEM;
- dev->bsc_regmap = devm_kzalloc(&pdev->dev, sizeof(struct bsc_regs *),
- GFP_KERNEL);
+ dev->bsc_regmap = devm_kzalloc(&pdev->dev, sizeof(*dev->bsc_regmap), GFP_KERNEL);
if (!dev->bsc_regmap)
return -ENOMEM;
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 00da80e02154..94b80a51ab68 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -358,6 +358,7 @@ int ib_register_device(struct ib_device *device,
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
printk(KERN_WARNING "Couldn't query the device attributes\n");
+ ib_cache_cleanup_one(device);
goto out;
}
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index f334090bb612..1e37f3515d98 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1071,7 +1071,7 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
}
}
- if (rec->hop_limit > 1 || use_roce) {
+ if (rec->hop_limit > 0 || use_roce) {
ah_attr->ah_flags = IB_AH_GRH;
ah_attr->grh.dgid = rec->dgid;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 6ffc9c4e93af..6c6fbff19752 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1970,7 +1970,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
resp_size);
INIT_UDATA(&uhw, buf + sizeof(cmd),
(unsigned long)cmd.response + resp_size,
- in_len - sizeof(cmd), out_len - resp_size);
+ in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - resp_size);
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -3413,7 +3414,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof resp);
ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
if (ret)
@@ -3439,7 +3441,8 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
INIT_UDATA(&udata, buf + sizeof cmd,
(unsigned long) cmd.response + sizeof resp,
- in_len - sizeof cmd, out_len - sizeof resp);
+ in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
+ out_len - sizeof resp);
ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
if (ret)
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 4659256cd95e..3b2ddd64a371 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -75,7 +75,8 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
struct mlx5_create_srq_mbox_in **in,
- struct ib_udata *udata, int buf_size, int *inlen)
+ struct ib_udata *udata, int buf_size, int *inlen,
+ int is_xrc)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_create_srq ucmd = {};
@@ -87,13 +88,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
int ncont;
u32 offset;
u32 uidx = MLX5_IB_DEFAULT_UIDX;
- int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
- if (drv_data < 0)
- return -EINVAL;
-
- ucmdlen = (drv_data < sizeof(ucmd)) ?
- drv_data : sizeof(ucmd);
+ ucmdlen = min(udata->inlen, sizeof(ucmd));
if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
mlx5_ib_dbg(dev, "failed copy udata\n");
@@ -103,15 +99,17 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
if (ucmd.reserved0 || ucmd.reserved1)
return -EINVAL;
- if (drv_data > sizeof(ucmd) &&
+ if (udata->inlen > sizeof(ucmd) &&
!ib_is_udata_cleared(udata, sizeof(ucmd),
- drv_data - sizeof(ucmd)))
+ udata->inlen - sizeof(ucmd)))
return -EINVAL;
- err = get_srq_user_index(to_mucontext(pd->uobject->context),
- &ucmd, udata->inlen, &uidx);
- if (err)
- return err;
+ if (is_xrc) {
+ err = get_srq_user_index(to_mucontext(pd->uobject->context),
+ &ucmd, udata->inlen, &uidx);
+ if (err)
+ return err;
+ }
srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
@@ -151,7 +149,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
- if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+ is_xrc){
xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
xrc_srq_context_entry);
MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
@@ -170,7 +169,7 @@ err_umem:
static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
struct mlx5_create_srq_mbox_in **in, int buf_size,
- int *inlen)
+ int *inlen, int is_xrc)
{
int err;
int i;
@@ -224,7 +223,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
- if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+ if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+ is_xrc){
xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
xrc_srq_context_entry);
/* 0xffffff means we ask to work with cqe version 0 */
@@ -302,10 +302,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
srq->msrq.max_avail_gather);
+ is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+
if (pd->uobject)
- err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
+ err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen,
+ is_xrc);
else
- err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
+ err = create_srq_kernel(dev, srq, &in, buf_size, &inlen,
+ is_xrc);
if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
@@ -313,7 +317,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
goto err_srq;
}
- is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
in->ctx.state_log_sz = ilog2(srq->msrq.max);
flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
xrcdn = 0;
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index e5e223938eec..374c129219ef 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -114,6 +114,7 @@ struct kmem_cache *amd_iommu_irq_cache;
static void update_domain(struct protection_domain *domain);
static int protection_domain_init(struct protection_domain *domain);
+static void detach_device(struct device *dev);
/*
* For dynamic growth the aperture size is split into ranges of 128MB of
@@ -384,6 +385,9 @@ static void iommu_uninit_device(struct device *dev)
if (!dev_data)
return;
+ if (dev_data->domain)
+ detach_device(dev);
+
iommu_device_unlink(amd_iommu_rlookup_table[dev_data->devid]->iommu_dev,
dev);
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 013bdfff2d4d..bf4959f4225b 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -228,6 +228,10 @@ static int amd_iommu_enable_interrupts(void);
static int __init iommu_go_to_state(enum iommu_init_state state);
static void init_device_table_dma(void);
+static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
+ u8 bank, u8 cntr, u8 fxn,
+ u64 *value, bool is_write);
+
static inline void update_last_devid(u16 devid)
{
if (devid > amd_iommu_last_bdf)
@@ -1016,6 +1020,34 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
}
/*
+ * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
+ * Workaround:
+ * BIOS should enable ATS write permission check by setting
+ * L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
+ */
+static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
+{
+ u32 value;
+
+ if ((boot_cpu_data.x86 != 0x15) ||
+ (boot_cpu_data.x86_model < 0x30) ||
+ (boot_cpu_data.x86_model > 0x3f))
+ return;
+
+ /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
+ value = iommu_read_l2(iommu, 0x47);
+
+ if (value & BIT(0))
+ return;
+
+ /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
+ iommu_write_l2(iommu, 0x47, value | BIT(0));
+
+ pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n",
+ dev_name(&iommu->dev->dev));
+}
+
+/*
* This function clues the initialization function for one IOMMU
* together and also allocates the command buffer and programs the
* hardware. It does NOT enable the IOMMU. This is done afterwards.
@@ -1142,8 +1174,8 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
amd_iommu_pc_present = true;
/* Check if the performance counters can be written to */
- if ((0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val, true)) ||
- (0 != amd_iommu_pc_get_set_reg_val(0, 0, 0, 0, &val2, false)) ||
+ if ((0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val, true)) ||
+ (0 != iommu_pc_get_set_reg_val(iommu, 0, 0, 0, &val2, false)) ||
(val != val2)) {
pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
amd_iommu_pc_present = false;
@@ -1284,6 +1316,7 @@ static int iommu_init_pci(struct amd_iommu *iommu)
}
amd_iommu_erratum_746_workaround(iommu);
+ amd_iommu_ats_write_check_workaround(iommu);
iommu->iommu_dev = iommu_device_create(&iommu->dev->dev, iommu,
amd_iommu_groups, "ivhd%d",
@@ -2283,22 +2316,15 @@ u8 amd_iommu_pc_get_max_counters(u16 devid)
}
EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
-int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
+static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
+ u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write)
{
- struct amd_iommu *iommu;
u32 offset;
u32 max_offset_lim;
- /* Make sure the IOMMU PC resource is available */
- if (!amd_iommu_pc_present)
- return -ENODEV;
-
- /* Locate the iommu associated with the device ID */
- iommu = amd_iommu_rlookup_table[devid];
-
/* Check for valid iommu and pc register indexing */
- if (WARN_ON((iommu == NULL) || (fxn > 0x28) || (fxn & 7)))
+ if (WARN_ON((fxn > 0x28) || (fxn & 7)))
return -ENODEV;
offset = (u32)(((0x40|bank) << 12) | (cntr << 8) | fxn);
@@ -2322,3 +2348,16 @@ int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
return 0;
}
EXPORT_SYMBOL(amd_iommu_pc_get_set_reg_val);
+
+int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
+ u64 *value, bool is_write)
+{
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
+
+ /* Make sure the IOMMU PC resource is available */
+ if (!amd_iommu_pc_present || iommu == NULL)
+ return -ENODEV;
+
+ return iommu_pc_get_set_reg_val(iommu, bank, cntr, fxn,
+ value, is_write);
+}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index fb092f3f11cb..8ffd7568fc91 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -329,7 +329,8 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
/* Only care about add/remove events for physical functions */
if (pdev->is_virtfn)
return NOTIFY_DONE;
- if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE)
+ if (action != BUS_NOTIFY_ADD_DEVICE &&
+ action != BUS_NOTIFY_REMOVED_DEVICE)
return NOTIFY_DONE;
info = dmar_alloc_pci_notify_info(pdev, action);
@@ -339,7 +340,7 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb,
down_write(&dmar_global_lock);
if (action == BUS_NOTIFY_ADD_DEVICE)
dmar_pci_bus_add_dev(info);
- else if (action == BUS_NOTIFY_DEL_DEVICE)
+ else if (action == BUS_NOTIFY_REMOVED_DEVICE)
dmar_pci_bus_del_dev(info);
up_write(&dmar_global_lock);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 986a53e3eb96..a2e1b7f14df2 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4367,7 +4367,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
rmrru->devices_cnt);
if(ret < 0)
return ret;
- } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
+ } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
dmar_remove_dev_scope(info, rmrr->segment,
rmrru->devices, rmrru->devices_cnt);
}
@@ -4387,7 +4387,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
break;
else if(ret < 0)
return ret;
- } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
+ } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
if (dmar_remove_dev_scope(info, atsr->segment,
atsru->devices, atsru->devices_cnt))
break;
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 0a73632b28d5..43dfd15c1dd2 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -78,6 +78,9 @@ struct its_node {
#define ITS_ITT_ALIGN SZ_256
+/* Convert page order to size in bytes */
+#define PAGE_ORDER_TO_SIZE(o) (PAGE_SIZE << (o))
+
struct event_lpi_map {
unsigned long *lpi_map;
u16 *col_map;
@@ -600,11 +603,6 @@ static void its_unmask_irq(struct irq_data *d)
lpi_set_config(d, true);
}
-static void its_eoi_irq(struct irq_data *d)
-{
- gic_write_eoir(d->hwirq);
-}
-
static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
bool force)
{
@@ -641,7 +639,7 @@ static struct irq_chip its_irq_chip = {
.name = "ITS",
.irq_mask = its_mask_irq,
.irq_unmask = its_unmask_irq,
- .irq_eoi = its_eoi_irq,
+ .irq_eoi = irq_chip_eoi_parent,
.irq_set_affinity = its_set_affinity,
.irq_compose_msi_msg = its_irq_compose_msi_msg,
};
@@ -846,7 +844,6 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
u64 type = GITS_BASER_TYPE(val);
u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
int order = get_order(psz);
- int alloc_size;
int alloc_pages;
u64 tmp;
void *base;
@@ -878,9 +875,8 @@ static int its_alloc_tables(const char *node_name, struct its_node *its)
}
}
- alloc_size = (1 << order) * PAGE_SIZE;
retry_alloc_baser:
- alloc_pages = (alloc_size / psz);
+ alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz);
if (alloc_pages > GITS_BASER_PAGES_MAX) {
alloc_pages = GITS_BASER_PAGES_MAX;
order = get_order(GITS_BASER_PAGES_MAX * psz);
@@ -933,7 +929,7 @@ retry_baser:
shr = tmp & GITS_BASER_SHAREABILITY_MASK;
if (!shr) {
cache = GITS_BASER_nC;
- __flush_dcache_area(base, alloc_size);
+ __flush_dcache_area(base, PAGE_ORDER_TO_SIZE(order));
}
goto retry_baser;
}
@@ -966,7 +962,7 @@ retry_baser:
}
pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
- (int)(alloc_size / entry_size),
+ (int)(PAGE_ORDER_TO_SIZE(order) / entry_size),
its_base_type_string[type],
(unsigned long)virt_to_phys(base),
psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5df40480228b..dd834927bc66 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1191,6 +1191,8 @@ static void dm_unprep_request(struct request *rq)
if (clone)
free_rq_clone(clone);
+ else if (!tio->md->queue->mq_ops)
+ free_rq_tio(tio);
}
/*
diff --git a/drivers/media/i2c/adp1653.c b/drivers/media/i2c/adp1653.c
index 7e9cbf757e95..fb7ed730d932 100644
--- a/drivers/media/i2c/adp1653.c
+++ b/drivers/media/i2c/adp1653.c
@@ -497,7 +497,7 @@ static int adp1653_probe(struct i2c_client *client,
if (!client->dev.platform_data) {
dev_err(&client->dev,
"Neither DT not platform data provided\n");
- return EINVAL;
+ return -EINVAL;
}
flash->platform_data = client->dev.platform_data;
}
diff --git a/drivers/media/i2c/adv7604.c b/drivers/media/i2c/adv7604.c
index f8dd7505b529..e1719ffdfb3d 100644
--- a/drivers/media/i2c/adv7604.c
+++ b/drivers/media/i2c/adv7604.c
@@ -1960,10 +1960,9 @@ static int adv76xx_isr(struct v4l2_subdev *sd, u32 status, bool *handled)
}
/* tx 5v detect */
- tx_5v = io_read(sd, 0x70) & info->cable_det_mask;
+ tx_5v = irq_reg_0x70 & info->cable_det_mask;
if (tx_5v) {
v4l2_dbg(1, debug, sd, "%s: tx_5v: 0x%x\n", __func__, tx_5v);
- io_write(sd, 0x71, tx_5v);
adv76xx_s_detect_tx_5v_ctrl(sd);
if (handled)
*handled = true;
diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c
index 8c54fd21022e..a13625722848 100644
--- a/drivers/media/usb/au0828/au0828-video.c
+++ b/drivers/media/usb/au0828/au0828-video.c
@@ -1843,8 +1843,7 @@ static void au0828_analog_create_entities(struct au0828_dev *dev)
ent->function = MEDIA_ENT_F_CONN_RF;
break;
default: /* AU0828_VMUX_DEBUG */
- ent->function = MEDIA_ENT_F_CONN_TEST;
- break;
+ continue;
}
ret = media_entity_pads_init(ent, 1, &dev->input_pad[i]);
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 4c1903f781fc..0c6c17a1c59e 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -415,7 +415,7 @@ static int cxl_setup_psl_timebase(struct cxl *adapter, struct pci_dev *dev)
delta = mftb() - psl_tb;
if (delta < 0)
delta = -delta;
- } while (cputime_to_usecs(delta) > 16);
+ } while (tb_to_ns(delta) > 16000);
return 0;
}
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index b6639ea0bf18..f6e4d9718035 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2232,6 +2232,7 @@ err_irq:
dma_release_channel(host->tx_chan);
if (host->rx_chan)
dma_release_channel(host->rx_chan);
+ pm_runtime_dont_use_autosuspend(host->dev);
pm_runtime_put_sync(host->dev);
pm_runtime_disable(host->dev);
if (host->dbclk)
@@ -2253,6 +2254,7 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
dma_release_channel(host->tx_chan);
dma_release_channel(host->rx_chan);
+ pm_runtime_dont_use_autosuspend(host->dev);
pm_runtime_put_sync(host->dev);
pm_runtime_disable(host->dev);
device_init_wakeup(&pdev->dev, false);
diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c
index 2a1b6e037e1a..0134ba32a057 100644
--- a/drivers/mtd/ubi/upd.c
+++ b/drivers/mtd/ubi/upd.c
@@ -193,7 +193,7 @@ int ubi_start_leb_change(struct ubi_device *ubi, struct ubi_volume *vol,
vol->changing_leb = 1;
vol->ch_lnum = req->lnum;
- vol->upd_buf = vmalloc(req->bytes);
+ vol->upd_buf = vmalloc(ALIGN((int)req->bytes, ubi->min_io_size));
if (!vol->upd_buf)
return -ENOMEM;
diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index 164ccdeca663..0d40aef928e2 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -106,7 +106,7 @@ config CAN_JANZ_ICAN3
config CAN_RCAR
tristate "Renesas R-Car CAN controller"
- depends on ARM
+ depends on ARCH_RENESAS || ARM
---help---
Say Y here if you want to use CAN controller found on Renesas R-Car
SoCs.
diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c
index 0d1c164374b7..a1bd54ffd31e 100644
--- a/drivers/net/can/ifi_canfd/ifi_canfd.c
+++ b/drivers/net/can/ifi_canfd/ifi_canfd.c
@@ -135,8 +135,12 @@
#define IFI_CANFD_RXFIFO_ID 0x6c
#define IFI_CANFD_RXFIFO_ID_ID_OFFSET 0
-#define IFI_CANFD_RXFIFO_ID_ID_STD_MASK 0x3ff
-#define IFI_CANFD_RXFIFO_ID_ID_XTD_MASK 0x1fffffff
+#define IFI_CANFD_RXFIFO_ID_ID_STD_MASK CAN_SFF_MASK
+#define IFI_CANFD_RXFIFO_ID_ID_STD_OFFSET 0
+#define IFI_CANFD_RXFIFO_ID_ID_STD_WIDTH 10
+#define IFI_CANFD_RXFIFO_ID_ID_XTD_MASK CAN_EFF_MASK
+#define IFI_CANFD_RXFIFO_ID_ID_XTD_OFFSET 11
+#define IFI_CANFD_RXFIFO_ID_ID_XTD_WIDTH 18
#define IFI_CANFD_RXFIFO_ID_IDE BIT(29)
#define IFI_CANFD_RXFIFO_DATA 0x70 /* 0x70..0xac */
@@ -156,8 +160,12 @@
#define IFI_CANFD_TXFIFO_ID 0xbc
#define IFI_CANFD_TXFIFO_ID_ID_OFFSET 0
-#define IFI_CANFD_TXFIFO_ID_ID_STD_MASK 0x3ff
-#define IFI_CANFD_TXFIFO_ID_ID_XTD_MASK 0x1fffffff
+#define IFI_CANFD_TXFIFO_ID_ID_STD_MASK CAN_SFF_MASK
+#define IFI_CANFD_TXFIFO_ID_ID_STD_OFFSET 0
+#define IFI_CANFD_TXFIFO_ID_ID_STD_WIDTH 10
+#define IFI_CANFD_TXFIFO_ID_ID_XTD_MASK CAN_EFF_MASK
+#define IFI_CANFD_TXFIFO_ID_ID_XTD_OFFSET 11
+#define IFI_CANFD_TXFIFO_ID_ID_XTD_WIDTH 18
#define IFI_CANFD_TXFIFO_ID_IDE BIT(29)
#define IFI_CANFD_TXFIFO_DATA 0xc0 /* 0xb0..0xfc */
@@ -229,10 +237,20 @@ static void ifi_canfd_read_fifo(struct net_device *ndev)
rxid = readl(priv->base + IFI_CANFD_RXFIFO_ID);
id = (rxid >> IFI_CANFD_RXFIFO_ID_ID_OFFSET);
- if (id & IFI_CANFD_RXFIFO_ID_IDE)
+ if (id & IFI_CANFD_RXFIFO_ID_IDE) {
id &= IFI_CANFD_RXFIFO_ID_ID_XTD_MASK;
- else
+ /*
+ * In case the Extended ID frame is received, the standard
+ * and extended part of the ID are swapped in the register,
+ * so swap them back to obtain the correct ID.
+ */
+ id = (id >> IFI_CANFD_RXFIFO_ID_ID_XTD_OFFSET) |
+ ((id & IFI_CANFD_RXFIFO_ID_ID_STD_MASK) <<
+ IFI_CANFD_RXFIFO_ID_ID_XTD_WIDTH);
+ id |= CAN_EFF_FLAG;
+ } else {
id &= IFI_CANFD_RXFIFO_ID_ID_STD_MASK;
+ }
cf->can_id = id;
if (rxdlc & IFI_CANFD_RXFIFO_DLC_ESI) {
@@ -514,25 +532,25 @@ static irqreturn_t ifi_canfd_isr(int irq, void *dev_id)
static const struct can_bittiming_const ifi_canfd_bittiming_const = {
.name = KBUILD_MODNAME,
- .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */
+ .tseg1_min = 1, /* Time segment 1 = prop_seg + phase_seg1 */
.tseg1_max = 64,
- .tseg2_min = 1, /* Time segment 2 = phase_seg2 */
- .tseg2_max = 16,
+ .tseg2_min = 2, /* Time segment 2 = phase_seg2 */
+ .tseg2_max = 64,
.sjw_max = 16,
- .brp_min = 1,
- .brp_max = 1024,
+ .brp_min = 2,
+ .brp_max = 256,
.brp_inc = 1,
};
static const struct can_bittiming_const ifi_canfd_data_bittiming_const = {
.name = KBUILD_MODNAME,
- .tseg1_min = 2, /* Time segment 1 = prop_seg + phase_seg1 */
- .tseg1_max = 16,
- .tseg2_min = 1, /* Time segment 2 = phase_seg2 */
- .tseg2_max = 8,
- .sjw_max = 4,
- .brp_min = 1,
- .brp_max = 32,
+ .tseg1_min = 1, /* Time segment 1 = prop_seg + phase_seg1 */
+ .tseg1_max = 64,
+ .tseg2_min = 2, /* Time segment 2 = phase_seg2 */
+ .tseg2_max = 64,
+ .sjw_max = 16,
+ .brp_min = 2,
+ .brp_max = 256,
.brp_inc = 1,
};
@@ -545,32 +563,34 @@ static void ifi_canfd_set_bittiming(struct net_device *ndev)
u32 noniso_arg = 0;
u32 time_off;
- if (priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO) {
+ if ((priv->can.ctrlmode & CAN_CTRLMODE_FD) &&
+ !(priv->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO)) {
+ time_off = IFI_CANFD_TIME_SJW_OFF_ISO;
+ } else {
noniso_arg = IFI_CANFD_TIME_SET_TIMEB_BOSCH |
IFI_CANFD_TIME_SET_TIMEA_BOSCH |
IFI_CANFD_TIME_SET_PRESC_BOSCH |
IFI_CANFD_TIME_SET_SJW_BOSCH;
time_off = IFI_CANFD_TIME_SJW_OFF_BOSCH;
- } else {
- time_off = IFI_CANFD_TIME_SJW_OFF_ISO;
}
/* Configure bit timing */
- brp = bt->brp - 1;
+ brp = bt->brp - 2;
sjw = bt->sjw - 1;
tseg1 = bt->prop_seg + bt->phase_seg1 - 1;
- tseg2 = bt->phase_seg2 - 1;
+ tseg2 = bt->phase_seg2 - 2;
writel((tseg2 << IFI_CANFD_TIME_TIMEB_OFF) |
(tseg1 << IFI_CANFD_TIME_TIMEA_OFF) |
(brp << IFI_CANFD_TIME_PRESCALE_OFF) |
- (sjw << time_off),
+ (sjw << time_off) |
+ noniso_arg,
priv->base + IFI_CANFD_TIME);
/* Configure data bit timing */
- brp = dbt->brp - 1;
+ brp = dbt->brp - 2;
sjw = dbt->sjw - 1;
tseg1 = dbt->prop_seg + dbt->phase_seg1 - 1;
- tseg2 = dbt->phase_seg2 - 1;
+ tseg2 = dbt->phase_seg2 - 2;
writel((tseg2 << IFI_CANFD_TIME_TIMEB_OFF) |
(tseg1 << IFI_CANFD_TIME_TIMEA_OFF) |
(brp << IFI_CANFD_TIME_PRESCALE_OFF) |
@@ -747,8 +767,7 @@ static netdev_tx_t ifi_canfd_start_xmit(struct sk_buff *skb,
{
struct ifi_canfd_priv *priv = netdev_priv(ndev);
struct canfd_frame *cf = (struct canfd_frame *)skb->data;
- u32 txst, txid;
- u32 txdlc = 0;
+ u32 txst, txid, txdlc;
int i;
if (can_dropped_invalid_skb(ndev, skb))
@@ -766,17 +785,25 @@ static netdev_tx_t ifi_canfd_start_xmit(struct sk_buff *skb,
if (cf->can_id & CAN_EFF_FLAG) {
txid = cf->can_id & CAN_EFF_MASK;
+ /*
+ * In case the Extended ID frame is transmitted, the
+ * standard and extended part of the ID are swapped
+ * in the register, so swap them back to send the
+ * correct ID.
+ */
+ txid = (txid >> IFI_CANFD_TXFIFO_ID_ID_XTD_WIDTH) |
+ ((txid & IFI_CANFD_TXFIFO_ID_ID_XTD_MASK) <<
+ IFI_CANFD_TXFIFO_ID_ID_XTD_OFFSET);
txid |= IFI_CANFD_TXFIFO_ID_IDE;
} else {
txid = cf->can_id & CAN_SFF_MASK;
}
- if (priv->can.ctrlmode & (CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO)) {
- if (can_is_canfd_skb(skb)) {
- txdlc |= IFI_CANFD_TXFIFO_DLC_EDL;
- if (cf->flags & CANFD_BRS)
- txdlc |= IFI_CANFD_TXFIFO_DLC_BRS;
- }
+ txdlc = can_len2dlc(cf->len);
+ if ((priv->can.ctrlmode & CAN_CTRLMODE_FD) && can_is_canfd_skb(skb)) {
+ txdlc |= IFI_CANFD_TXFIFO_DLC_EDL;
+ if (cf->flags & CANFD_BRS)
+ txdlc |= IFI_CANFD_TXFIFO_DLC_BRS;
}
if (cf->can_id & CAN_RTR_FLAG)
@@ -847,7 +874,7 @@ static int ifi_canfd_plat_probe(struct platform_device *pdev)
priv->can.state = CAN_STATE_STOPPED;
- priv->can.clock.freq = readl(addr + IFI_CANFD_SYSCLOCK);
+ priv->can.clock.freq = readl(addr + IFI_CANFD_CANCLOCK);
priv->can.bittiming_const = &ifi_canfd_bittiming_const;
priv->can.data_bittiming_const = &ifi_canfd_data_bittiming_const;
diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c
index ad3d2e0cb191..788459f6bf5c 100644
--- a/drivers/net/can/rcar_can.c
+++ b/drivers/net/can/rcar_can.c
@@ -906,6 +906,7 @@ static const struct of_device_id rcar_can_of_table[] __maybe_unused = {
{ .compatible = "renesas,can-r8a7791" },
{ .compatible = "renesas,rcar-gen1-can" },
{ .compatible = "renesas,rcar-gen2-can" },
+ { .compatible = "renesas,rcar-gen3-can" },
{ }
};
MODULE_DEVICE_TABLE(of, rcar_can_of_table);
diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c
index 575790e8a75a..74a7dfecee27 100644
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@ -843,7 +843,7 @@ static irqreturn_t mcp251x_can_ist(int irq, void *dev_id)
if (clear_intf)
mcp251x_write_bits(spi, CANINTF, clear_intf, 0x00);
- if (eflag)
+ if (eflag & (EFLG_RX0OVR | EFLG_RX1OVR))
mcp251x_write_bits(spi, EFLG, eflag, 0x00);
/* Update can state */
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 5eee62badf45..cbc99d5649af 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -826,9 +826,8 @@ static struct gs_can *gs_make_candev(unsigned int channel, struct usb_interface
static void gs_destroy_candev(struct gs_can *dev)
{
unregister_candev(dev->netdev);
- free_candev(dev->netdev);
usb_kill_anchored_urbs(&dev->tx_submitted);
- kfree(dev);
+ free_candev(dev->netdev);
}
static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
@@ -913,12 +912,15 @@ static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *
for (i = 0; i < icount; i++) {
dev->canch[i] = gs_make_candev(i, intf);
if (IS_ERR_OR_NULL(dev->canch[i])) {
+ /* save error code to return later */
+ rc = PTR_ERR(dev->canch[i]);
+
/* on failure destroy previously created candevs */
icount = i;
- for (i = 0; i < icount; i++) {
+ for (i = 0; i < icount; i++)
gs_destroy_candev(dev->canch[i]);
- dev->canch[i] = NULL;
- }
+
+ usb_kill_anchored_urbs(&dev->rx_submitted);
kfree(dev);
return rc;
}
@@ -939,16 +941,12 @@ static void gs_usb_disconnect(struct usb_interface *intf)
return;
}
- for (i = 0; i < GS_MAX_INTF; i++) {
- struct gs_can *can = dev->canch[i];
-
- if (!can)
- continue;
-
- gs_destroy_candev(can);
- }
+ for (i = 0; i < GS_MAX_INTF; i++)
+ if (dev->canch[i])
+ gs_destroy_candev(dev->canch[i]);
usb_kill_anchored_urbs(&dev->rx_submitted);
+ kfree(dev);
}
static const struct usb_device_id gs_usb_table[] = {
diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c
index d11c9d58cf10..5f07524083c3 100644
--- a/drivers/net/dsa/mv88e6xxx.c
+++ b/drivers/net/dsa/mv88e6xxx.c
@@ -1051,39 +1051,49 @@ static int _mv88e6xxx_atu_remove(struct dsa_switch *ds, u16 fid, int port,
return _mv88e6xxx_atu_move(ds, fid, port, 0x0f, static_too);
}
-static int mv88e6xxx_set_port_state(struct dsa_switch *ds, int port, u8 state)
+static const char * const mv88e6xxx_port_state_names[] = {
+ [PORT_CONTROL_STATE_DISABLED] = "Disabled",
+ [PORT_CONTROL_STATE_BLOCKING] = "Blocking/Listening",
+ [PORT_CONTROL_STATE_LEARNING] = "Learning",
+ [PORT_CONTROL_STATE_FORWARDING] = "Forwarding",
+};
+
+static int _mv88e6xxx_port_state(struct dsa_switch *ds, int port, u8 state)
{
- struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
int reg, ret = 0;
u8 oldstate;
- mutex_lock(&ps->smi_mutex);
-
reg = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_CONTROL);
- if (reg < 0) {
- ret = reg;
- goto abort;
- }
+ if (reg < 0)
+ return reg;
oldstate = reg & PORT_CONTROL_STATE_MASK;
+
if (oldstate != state) {
/* Flush forwarding database if we're moving a port
* from Learning or Forwarding state to Disabled or
* Blocking or Listening state.
*/
- if (oldstate >= PORT_CONTROL_STATE_LEARNING &&
- state <= PORT_CONTROL_STATE_BLOCKING) {
+ if ((oldstate == PORT_CONTROL_STATE_LEARNING ||
+ oldstate == PORT_CONTROL_STATE_FORWARDING)
+ && (state == PORT_CONTROL_STATE_DISABLED ||
+ state == PORT_CONTROL_STATE_BLOCKING)) {
ret = _mv88e6xxx_atu_remove(ds, 0, port, false);
if (ret)
- goto abort;
+ return ret;
}
+
reg = (reg & ~PORT_CONTROL_STATE_MASK) | state;
ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL,
reg);
+ if (ret)
+ return ret;
+
+ netdev_dbg(ds->ports[port], "PortState %s (was %s)\n",
+ mv88e6xxx_port_state_names[state],
+ mv88e6xxx_port_state_names[oldstate]);
}
-abort:
- mutex_unlock(&ps->smi_mutex);
return ret;
}
@@ -1146,35 +1156,55 @@ int mv88e6xxx_port_stp_update(struct dsa_switch *ds, int port, u8 state)
break;
}
- netdev_dbg(ds->ports[port], "port state %d [%d]\n", state, stp_state);
-
/* mv88e6xxx_port_stp_update may be called with softirqs disabled,
* so we can not update the port state directly but need to schedule it.
*/
ps->ports[port].state = stp_state;
- set_bit(port, &ps->port_state_update_mask);
+ set_bit(port, ps->port_state_update_mask);
schedule_work(&ps->bridge_work);
return 0;
}
-static int _mv88e6xxx_port_pvid_get(struct dsa_switch *ds, int port, u16 *pvid)
+static int _mv88e6xxx_port_pvid(struct dsa_switch *ds, int port, u16 *new,
+ u16 *old)
{
+ u16 pvid;
int ret;
ret = _mv88e6xxx_reg_read(ds, REG_PORT(port), PORT_DEFAULT_VLAN);
if (ret < 0)
return ret;
- *pvid = ret & PORT_DEFAULT_VLAN_MASK;
+ pvid = ret & PORT_DEFAULT_VLAN_MASK;
+
+ if (new) {
+ ret &= ~PORT_DEFAULT_VLAN_MASK;
+ ret |= *new & PORT_DEFAULT_VLAN_MASK;
+
+ ret = _mv88e6xxx_reg_write(ds, REG_PORT(port),
+ PORT_DEFAULT_VLAN, ret);
+ if (ret < 0)
+ return ret;
+
+ netdev_dbg(ds->ports[port], "DefaultVID %d (was %d)\n", *new,
+ pvid);
+ }
+
+ if (old)
+ *old = pvid;
return 0;
}
+static int _mv88e6xxx_port_pvid_get(struct dsa_switch *ds, int port, u16 *pvid)
+{
+ return _mv88e6xxx_port_pvid(ds, port, NULL, pvid);
+}
+
static int _mv88e6xxx_port_pvid_set(struct dsa_switch *ds, int port, u16 pvid)
{
- return _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_DEFAULT_VLAN,
- pvid & PORT_DEFAULT_VLAN_MASK);
+ return _mv88e6xxx_port_pvid(ds, port, &pvid, NULL);
}
static int _mv88e6xxx_vtu_wait(struct dsa_switch *ds)
@@ -1735,16 +1765,21 @@ int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
old = ret & PORT_CONTROL_2_8021Q_MASK;
- ret &= ~PORT_CONTROL_2_8021Q_MASK;
- ret |= new & PORT_CONTROL_2_8021Q_MASK;
+ if (new != old) {
+ ret &= ~PORT_CONTROL_2_8021Q_MASK;
+ ret |= new & PORT_CONTROL_2_8021Q_MASK;
- ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_2, ret);
- if (ret < 0)
- goto unlock;
+ ret = _mv88e6xxx_reg_write(ds, REG_PORT(port), PORT_CONTROL_2,
+ ret);
+ if (ret < 0)
+ goto unlock;
+
+ netdev_dbg(ds->ports[port], "802.1Q Mode %s (was %s)\n",
+ mv88e6xxx_port_8021q_mode_names[new],
+ mv88e6xxx_port_8021q_mode_names[old]);
+ }
- netdev_dbg(ds->ports[port], "802.1Q Mode: %s (was %s)\n",
- mv88e6xxx_port_8021q_mode_names[new],
- mv88e6xxx_port_8021q_mode_names[old]);
+ ret = 0;
unlock:
mutex_unlock(&ps->smi_mutex);
@@ -2228,11 +2263,15 @@ static void mv88e6xxx_bridge_work(struct work_struct *work)
ps = container_of(work, struct mv88e6xxx_priv_state, bridge_work);
ds = ((struct dsa_switch *)ps) - 1;
- while (ps->port_state_update_mask) {
- port = __ffs(ps->port_state_update_mask);
- clear_bit(port, &ps->port_state_update_mask);
- mv88e6xxx_set_port_state(ds, port, ps->ports[port].state);
- }
+ mutex_lock(&ps->smi_mutex);
+
+ for (port = 0; port < ps->num_ports; ++port)
+ if (test_and_clear_bit(port, ps->port_state_update_mask) &&
+ _mv88e6xxx_port_state(ds, port, ps->ports[port].state))
+ netdev_warn(ds->ports[port], "failed to update state to %s\n",
+ mv88e6xxx_port_state_names[ps->ports[port].state]);
+
+ mutex_unlock(&ps->smi_mutex);
}
static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
diff --git a/drivers/net/dsa/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx.h
index d7b088dd8e16..3425616987ed 100644
--- a/drivers/net/dsa/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx.h
@@ -426,7 +426,7 @@ struct mv88e6xxx_priv_state {
struct mv88e6xxx_priv_port ports[DSA_MAX_PORTS];
- unsigned long port_state_update_mask;
+ DECLARE_BITMAP(port_state_update_mask, DSA_MAX_PORTS);
struct work_struct bridge_work;
};
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 7b881edc0b0a..d81fceddbe0e 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -2455,7 +2455,7 @@ boomerang_interrupt(int irq, void *dev_id)
int i;
pci_unmap_single(VORTEX_PCI(vp),
le32_to_cpu(vp->tx_ring[entry].frag[0].addr),
- le32_to_cpu(vp->tx_ring[entry].frag[0].length),
+ le32_to_cpu(vp->tx_ring[entry].frag[0].length)&0xFFF,
PCI_DMA_TODEVICE);
for (i=1; i<=skb_shinfo(skb)->nr_frags; i++)
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 0b13af8e4070..be67a19e01b9 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -106,6 +106,7 @@ config LANTIQ_ETOP
Support for the MII0 inside the Lantiq SoC
source "drivers/net/ethernet/marvell/Kconfig"
+source "drivers/net/ethernet/mediatek/Kconfig"
source "drivers/net/ethernet/mellanox/Kconfig"
source "drivers/net/ethernet/micrel/Kconfig"
source "drivers/net/ethernet/microchip/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 38dc1a776a2b..6ffcc801d37e 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -46,6 +46,7 @@ obj-$(CONFIG_JME) += jme.o
obj-$(CONFIG_KORINA) += korina.o
obj-$(CONFIG_LANTIQ_ETOP) += lantiq_etop.o
obj-$(CONFIG_NET_VENDOR_MARVELL) += marvell/
+obj-$(CONFIG_NET_VENDOR_MEDIATEK) += mediatek/
obj-$(CONFIG_NET_VENDOR_MELLANOX) += mellanox/
obj-$(CONFIG_NET_VENDOR_MICREL) += micrel/
obj-$(CONFIG_NET_VENDOR_MICROCHIP) += microchip/
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 17472851674f..f749e4d389eb 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -193,7 +193,6 @@ static void altera_tse_mdio_destroy(struct net_device *dev)
priv->mdio->id);
mdiobus_unregister(priv->mdio);
- kfree(priv->mdio->irq);
mdiobus_free(priv->mdio);
priv->mdio = NULL;
}
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index f71ab2647a3b..08a23e6b60e9 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -1460,7 +1460,19 @@ static int nb8800_probe(struct platform_device *pdev)
goto err_disable_clk;
}
- priv->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+ if (of_phy_is_fixed_link(pdev->dev.of_node)) {
+ ret = of_phy_register_fixed_link(pdev->dev.of_node);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "bad fixed-link spec\n");
+ goto err_free_bus;
+ }
+ priv->phy_node = of_node_get(pdev->dev.of_node);
+ }
+
+ if (!priv->phy_node)
+ priv->phy_node = of_parse_phandle(pdev->dev.of_node,
+ "phy-handle", 0);
+
if (!priv->phy_node) {
dev_err(&pdev->dev, "no PHY specified\n");
ret = -ENODEV;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 04523d41b873..f8b810313094 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -4901,9 +4901,9 @@ struct c2s_pri_trans_table_entry {
* cfc delete event data
*/
struct cfc_del_event_data {
- u32 cid;
- u32 reserved0;
- u32 reserved1;
+ __le32 cid;
+ __le32 reserved0;
+ __le32 reserved1;
};
@@ -5119,15 +5119,9 @@ struct vf_pf_channel_zone_trigger {
* zone that triggers the in-bound interrupt
*/
struct trigger_vf_zone {
-#if defined(__BIG_ENDIAN)
- u16 reserved1;
- u8 reserved0;
- struct vf_pf_channel_zone_trigger vf_pf_channel;
-#elif defined(__LITTLE_ENDIAN)
struct vf_pf_channel_zone_trigger vf_pf_channel;
u8 reserved0;
u16 reserved1;
-#endif
u32 reserved2;
};
@@ -5212,9 +5206,9 @@ struct e2_integ_data {
* set mac event data
*/
struct eth_event_data {
- u32 echo;
- u32 reserved0;
- u32 reserved1;
+ __le32 echo;
+ __le32 reserved0;
+ __le32 reserved1;
};
@@ -5224,9 +5218,9 @@ struct eth_event_data {
struct vf_pf_event_data {
u8 vf_id;
u8 reserved0;
- u16 reserved1;
- u32 msg_addr_lo;
- u32 msg_addr_hi;
+ __le16 reserved1;
+ __le32 msg_addr_lo;
+ __le32 msg_addr_hi;
};
/*
@@ -5235,9 +5229,9 @@ struct vf_pf_event_data {
struct vf_flr_event_data {
u8 vf_id;
u8 reserved0;
- u16 reserved1;
- u32 reserved2;
- u32 reserved3;
+ __le16 reserved1;
+ __le32 reserved2;
+ __le32 reserved3;
};
/*
@@ -5246,9 +5240,9 @@ struct vf_flr_event_data {
struct malicious_vf_event_data {
u8 vf_id;
u8 err_id;
- u16 reserved1;
- u32 reserved2;
- u32 reserved3;
+ __le16 reserved1;
+ __le32 reserved2;
+ __le32 reserved3;
};
/*
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 5c95d0c3b076..b597c32275aa 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -5282,14 +5282,14 @@ static void bnx2x_handle_classification_eqe(struct bnx2x *bp,
{
unsigned long ramrod_flags = 0;
int rc = 0;
- u32 cid = elem->message.data.eth_event.echo & BNX2X_SWCID_MASK;
+ u32 echo = le32_to_cpu(elem->message.data.eth_event.echo);
+ u32 cid = echo & BNX2X_SWCID_MASK;
struct bnx2x_vlan_mac_obj *vlan_mac_obj;
/* Always push next commands out, don't wait here */
__set_bit(RAMROD_CONT, &ramrod_flags);
- switch (le32_to_cpu((__force __le32)elem->message.data.eth_event.echo)
- >> BNX2X_SWCID_SHIFT) {
+ switch (echo >> BNX2X_SWCID_SHIFT) {
case BNX2X_FILTER_MAC_PENDING:
DP(BNX2X_MSG_SP, "Got SETUP_MAC completions\n");
if (CNIC_LOADED(bp) && (cid == BNX2X_ISCSI_ETH_CID(bp)))
@@ -5310,8 +5310,7 @@ static void bnx2x_handle_classification_eqe(struct bnx2x *bp,
bnx2x_handle_mcast_eqe(bp);
return;
default:
- BNX2X_ERR("Unsupported classification command: %d\n",
- elem->message.data.eth_event.echo);
+ BNX2X_ERR("Unsupported classification command: 0x%x\n", echo);
return;
}
@@ -5480,9 +5479,6 @@ static void bnx2x_eq_int(struct bnx2x *bp)
goto next_spqe;
}
- /* elem CID originates from FW; actually LE */
- cid = SW_CID((__force __le32)
- elem->message.data.cfc_del_event.cid);
opcode = elem->message.opcode;
/* handle eq element */
@@ -5505,6 +5501,10 @@ static void bnx2x_eq_int(struct bnx2x *bp)
* we may want to verify here that the bp state is
* HALTING
*/
+
+ /* elem CID originates from FW; actually LE */
+ cid = SW_CID(elem->message.data.cfc_del_event.cid);
+
DP(BNX2X_MSG_SP,
"got delete ramrod for MULTI[%d]\n", cid);
@@ -5598,10 +5598,8 @@ static void bnx2x_eq_int(struct bnx2x *bp)
BNX2X_STATE_OPENING_WAIT4_PORT):
case (EVENT_RING_OPCODE_RSS_UPDATE_RULES |
BNX2X_STATE_CLOSING_WAIT4_HALT):
- cid = elem->message.data.eth_event.echo &
- BNX2X_SWCID_MASK;
DP(BNX2X_MSG_SP, "got RSS_UPDATE ramrod. CID %d\n",
- cid);
+ SW_CID(elem->message.data.eth_event.echo));
rss_raw->clear_pending(rss_raw);
break;
@@ -5686,7 +5684,7 @@ static void bnx2x_sp_task(struct work_struct *work)
if (status & BNX2X_DEF_SB_IDX) {
struct bnx2x_fastpath *fp = bnx2x_fcoe_fp(bp);
- if (FCOE_INIT(bp) &&
+ if (FCOE_INIT(bp) &&
(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
/* Prevent local bottom-halves from running as
* we are going to change the local NAPI list.
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 9d027348cd09..632daff117d3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1672,11 +1672,12 @@ void bnx2x_vf_handle_classification_eqe(struct bnx2x *bp,
{
unsigned long ramrod_flags = 0;
int rc = 0;
+ u32 echo = le32_to_cpu(elem->message.data.eth_event.echo);
/* Always push next commands out, don't wait here */
set_bit(RAMROD_CONT, &ramrod_flags);
- switch (elem->message.data.eth_event.echo >> BNX2X_SWCID_SHIFT) {
+ switch (echo >> BNX2X_SWCID_SHIFT) {
case BNX2X_FILTER_MAC_PENDING:
rc = vfq->mac_obj.complete(bp, &vfq->mac_obj, elem,
&ramrod_flags);
@@ -1686,8 +1687,7 @@ void bnx2x_vf_handle_classification_eqe(struct bnx2x *bp,
&ramrod_flags);
break;
default:
- BNX2X_ERR("Unsupported classification command: %d\n",
- elem->message.data.eth_event.echo);
+ BNX2X_ERR("Unsupported classification command: 0x%x\n", echo);
return;
}
if (rc < 0)
@@ -1747,16 +1747,14 @@ int bnx2x_iov_eq_sp_event(struct bnx2x *bp, union event_ring_elem *elem)
switch (opcode) {
case EVENT_RING_OPCODE_CFC_DEL:
- cid = SW_CID((__force __le32)
- elem->message.data.cfc_del_event.cid);
+ cid = SW_CID(elem->message.data.cfc_del_event.cid);
DP(BNX2X_MSG_IOV, "checking cfc-del comp cid=%d\n", cid);
break;
case EVENT_RING_OPCODE_CLASSIFICATION_RULES:
case EVENT_RING_OPCODE_MULTICAST_RULES:
case EVENT_RING_OPCODE_FILTERS_RULES:
case EVENT_RING_OPCODE_RSS_UPDATE_RULES:
- cid = (elem->message.data.eth_event.echo &
- BNX2X_SWCID_MASK);
+ cid = SW_CID(elem->message.data.eth_event.echo);
DP(BNX2X_MSG_IOV, "checking filtering comp cid=%d\n", cid);
break;
case EVENT_RING_OPCODE_VF_FLR:
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 1374e5394a79..bfae300cf25f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -2187,8 +2187,10 @@ void bnx2x_vf_mbx_schedule(struct bnx2x *bp,
/* Update VFDB with current message and schedule its handling */
mutex_lock(&BP_VFDB(bp)->event_mutex);
- BP_VF_MBX(bp, vf_idx)->vf_addr_hi = vfpf_event->msg_addr_hi;
- BP_VF_MBX(bp, vf_idx)->vf_addr_lo = vfpf_event->msg_addr_lo;
+ BP_VF_MBX(bp, vf_idx)->vf_addr_hi =
+ le32_to_cpu(vfpf_event->msg_addr_hi);
+ BP_VF_MBX(bp, vf_idx)->vf_addr_lo =
+ le32_to_cpu(vfpf_event->msg_addr_lo);
BP_VFDB(bp)->event_occur |= (1ULL << vf_idx);
mutex_unlock(&BP_VFDB(bp)->event_mutex);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ce6b075842ee..aabbd51db981 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -248,7 +248,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
- end = PTR_ALIGN(pdata + length + 1, 8) - 1;
+ end = pdata + length;
+ end = PTR_ALIGN(end, 8) - 1;
*end = 0;
skb_copy_from_linear_data(skb, pdata, len);
@@ -2361,6 +2362,14 @@ static void bnxt_free_stats(struct bnxt *bp)
u32 size, i;
struct pci_dev *pdev = bp->pdev;
+ if (bp->hw_rx_port_stats) {
+ dma_free_coherent(&pdev->dev, bp->hw_port_stats_size,
+ bp->hw_rx_port_stats,
+ bp->hw_rx_port_stats_map);
+ bp->hw_rx_port_stats = NULL;
+ bp->flags &= ~BNXT_FLAG_PORT_STATS;
+ }
+
if (!bp->bnapi)
return;
@@ -2397,6 +2406,24 @@ static int bnxt_alloc_stats(struct bnxt *bp)
cpr->hw_stats_ctx_id = INVALID_STATS_CTX_ID;
}
+
+ if (BNXT_PF(bp)) {
+ bp->hw_port_stats_size = sizeof(struct rx_port_stats) +
+ sizeof(struct tx_port_stats) + 1024;
+
+ bp->hw_rx_port_stats =
+ dma_alloc_coherent(&pdev->dev, bp->hw_port_stats_size,
+ &bp->hw_rx_port_stats_map,
+ GFP_KERNEL);
+ if (!bp->hw_rx_port_stats)
+ return -ENOMEM;
+
+ bp->hw_tx_port_stats = (void *)(bp->hw_rx_port_stats + 1) +
+ 512;
+ bp->hw_tx_port_stats_map = bp->hw_rx_port_stats_map +
+ sizeof(struct rx_port_stats) + 512;
+ bp->flags |= BNXT_FLAG_PORT_STATS;
+ }
return 0;
}
@@ -3833,6 +3860,23 @@ hwrm_ver_get_exit:
return rc;
}
+static int bnxt_hwrm_port_qstats(struct bnxt *bp)
+{
+ int rc;
+ struct bnxt_pf_info *pf = &bp->pf;
+ struct hwrm_port_qstats_input req = {0};
+
+ if (!(bp->flags & BNXT_FLAG_PORT_STATS))
+ return 0;
+
+ bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_QSTATS, -1, -1);
+ req.port_id = cpu_to_le16(pf->port_id);
+ req.tx_stat_host_addr = cpu_to_le64(bp->hw_tx_port_stats_map);
+ req.rx_stat_host_addr = cpu_to_le64(bp->hw_rx_port_stats_map);
+ rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+ return rc;
+}
+
static void bnxt_hwrm_free_tunnel_ports(struct bnxt *bp)
{
if (bp->vxlan_port_cnt) {
@@ -4467,6 +4511,7 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
link_info->pause = resp->pause;
link_info->auto_mode = resp->auto_mode;
link_info->auto_pause_setting = resp->auto_pause;
+ link_info->lp_pause = resp->link_partner_adv_pause;
link_info->force_pause_setting = resp->force_pause;
link_info->duplex_setting = resp->duplex;
if (link_info->phy_link_status == BNXT_LINK_LINK)
@@ -4477,6 +4522,8 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
link_info->auto_link_speed = le16_to_cpu(resp->auto_link_speed);
link_info->support_speeds = le16_to_cpu(resp->support_speeds);
link_info->auto_link_speeds = le16_to_cpu(resp->auto_link_speed_mask);
+ link_info->lp_auto_link_speeds =
+ le16_to_cpu(resp->link_partner_adv_speeds);
link_info->preemphasis = le32_to_cpu(resp->preemphasis);
link_info->phy_ver[0] = resp->phy_maj;
link_info->phy_ver[1] = resp->phy_min;
@@ -4888,6 +4935,22 @@ bnxt_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
stats->tx_dropped += le64_to_cpu(hw_stats->tx_drop_pkts);
}
+ if (bp->flags & BNXT_FLAG_PORT_STATS) {
+ struct rx_port_stats *rx = bp->hw_rx_port_stats;
+ struct tx_port_stats *tx = bp->hw_tx_port_stats;
+
+ stats->rx_crc_errors = le64_to_cpu(rx->rx_fcs_err_frames);
+ stats->rx_frame_errors = le64_to_cpu(rx->rx_align_err_frames);
+ stats->rx_length_errors = le64_to_cpu(rx->rx_undrsz_frames) +
+ le64_to_cpu(rx->rx_ovrsz_frames) +
+ le64_to_cpu(rx->rx_runt_frames);
+ stats->rx_errors = le64_to_cpu(rx->rx_false_carrier_frames) +
+ le64_to_cpu(rx->rx_jbr_frames);
+ stats->collisions = le64_to_cpu(tx->tx_total_collisions);
+ stats->tx_fifo_errors = le64_to_cpu(tx->tx_fifo_underruns);
+ stats->tx_errors = le64_to_cpu(tx->tx_err);
+ }
+
return stats;
}
@@ -5228,6 +5291,10 @@ static void bnxt_timer(unsigned long data)
if (atomic_read(&bp->intr_sem) != 0)
goto bnxt_restart_timer;
+ if (bp->link_info.link_up && (bp->flags & BNXT_FLAG_PORT_STATS)) {
+ set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event);
+ schedule_work(&bp->sp_task);
+ }
bnxt_restart_timer:
mod_timer(&bp->timer, jiffies + bp->current_interval);
}
@@ -5279,6 +5346,9 @@ static void bnxt_sp_task(struct work_struct *work)
rtnl_unlock();
}
+ if (test_and_clear_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event))
+ bnxt_hwrm_port_qstats(bp);
+
smp_mb__before_atomic();
clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
}
@@ -5342,6 +5412,8 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
goto init_err_release;
}
+ pci_enable_pcie_error_reporting(pdev);
+
INIT_WORK(&bp->sp_task, bnxt_sp_task);
spin_lock_init(&bp->ntp_fltr_lock);
@@ -5721,6 +5793,7 @@ static void bnxt_remove_one(struct pci_dev *pdev)
if (BNXT_PF(bp))
bnxt_sriov_disable(bp);
+ pci_disable_pcie_error_reporting(pdev);
unregister_netdev(dev);
cancel_work_sync(&bp->sp_task);
bp->sp_event = 0;
@@ -5960,11 +6033,117 @@ init_err_free:
return rc;
}
+/**
+ * bnxt_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+
+ netdev_info(netdev, "PCI I/O error detected\n");
+
+ rtnl_lock();
+ netif_device_detach(netdev);
+
+ if (state == pci_channel_io_perm_failure) {
+ rtnl_unlock();
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ if (netif_running(netdev))
+ bnxt_close(netdev);
+
+ pci_disable_device(pdev);
+ rtnl_unlock();
+
+ /* Request a slot slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * bnxt_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ * At this point, the card has exprienced a hard reset,
+ * followed by fixups by BIOS, and has its config space
+ * set up identically to what it was at cold boot.
+ */
+static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct bnxt *bp = netdev_priv(netdev);
+ int err = 0;
+ pci_ers_result_t result = PCI_ERS_RESULT_DISCONNECT;
+
+ netdev_info(bp->dev, "PCI Slot Reset\n");
+
+ rtnl_lock();
+
+ if (pci_enable_device(pdev)) {
+ dev_err(&pdev->dev,
+ "Cannot re-enable PCI device after reset.\n");
+ } else {
+ pci_set_master(pdev);
+
+ if (netif_running(netdev))
+ err = bnxt_open(netdev);
+
+ if (!err)
+ result = PCI_ERS_RESULT_RECOVERED;
+ }
+
+ if (result != PCI_ERS_RESULT_RECOVERED && netif_running(netdev))
+ dev_close(netdev);
+
+ rtnl_unlock();
+
+ err = pci_cleanup_aer_uncorrect_error_status(pdev);
+ if (err) {
+ dev_err(&pdev->dev,
+ "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
+ err); /* non-fatal, continue */
+ }
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * bnxt_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells
+ * us that its OK to resume normal operation.
+ */
+static void bnxt_io_resume(struct pci_dev *pdev)
+{
+ struct net_device *netdev = pci_get_drvdata(pdev);
+
+ rtnl_lock();
+
+ netif_device_attach(netdev);
+
+ rtnl_unlock();
+}
+
+static const struct pci_error_handlers bnxt_err_handler = {
+ .error_detected = bnxt_io_error_detected,
+ .slot_reset = bnxt_io_slot_reset,
+ .resume = bnxt_io_resume
+};
+
static struct pci_driver bnxt_pci_driver = {
.name = DRV_MODULE_NAME,
.id_table = bnxt_pci_tbl,
.probe = bnxt_init_one,
.remove = bnxt_remove_one,
+ .err_handler = &bnxt_err_handler,
#if defined(CONFIG_BNXT_SRIOV)
.sriov_configure = bnxt_sriov_configure,
#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 9aa38f57601b..ec04c47172b7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -757,10 +757,6 @@ struct bnxt_ntuple_filter {
#define BNXT_FLTR_UPDATE 1
};
-#define BNXT_ALL_COPPER_ETHTOOL_SPEED \
- (ADVERTISED_100baseT_Full | ADVERTISED_1000baseT_Full | \
- ADVERTISED_10000baseT_Full)
-
struct bnxt_link_info {
u8 media_type;
u8 transceiver;
@@ -780,6 +776,7 @@ struct bnxt_link_info {
#define BNXT_LINK_PAUSE_RX PORT_PHY_QCFG_RESP_PAUSE_RX
#define BNXT_LINK_PAUSE_BOTH (PORT_PHY_QCFG_RESP_PAUSE_RX | \
PORT_PHY_QCFG_RESP_PAUSE_TX)
+ u8 lp_pause;
u8 auto_pause_setting;
u8 force_pause_setting;
u8 duplex_setting;
@@ -814,6 +811,7 @@ struct bnxt_link_info {
#define BNXT_LINK_SPEED_MSK_25GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_25GB
#define BNXT_LINK_SPEED_MSK_40GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_40GB
#define BNXT_LINK_SPEED_MSK_50GB PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_50GB
+ u16 lp_auto_link_speeds;
u16 auto_link_speed;
u16 force_link_speed;
u32 preemphasis;
@@ -875,6 +873,7 @@ struct bnxt {
#define BNXT_FLAG_MSIX_CAP 0x80
#define BNXT_FLAG_RFS 0x100
#define BNXT_FLAG_SHARED_RINGS 0x200
+ #define BNXT_FLAG_PORT_STATS 0x400
#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \
BNXT_FLAG_RFS | \
@@ -927,7 +926,7 @@ struct bnxt {
struct bnxt_queue_info q_info[BNXT_MAX_QUEUE];
unsigned int current_interval;
-#define BNXT_TIMER_INTERVAL (HZ / 2)
+#define BNXT_TIMER_INTERVAL HZ
struct timer_list timer;
@@ -947,6 +946,13 @@ struct bnxt {
void *hwrm_dbg_resp_addr;
dma_addr_t hwrm_dbg_resp_dma_addr;
#define HWRM_DBG_REG_BUF_SIZE 128
+
+ struct rx_port_stats *hw_rx_port_stats;
+ struct tx_port_stats *hw_tx_port_stats;
+ dma_addr_t hw_rx_port_stats_map;
+ dma_addr_t hw_tx_port_stats_map;
+ int hw_port_stats_size;
+
int hwrm_cmd_timeout;
struct mutex hwrm_cmd_lock; /* serialize hwrm messages */
struct hwrm_ver_get_output ver_resp;
@@ -982,6 +988,7 @@ struct bnxt {
#define BNXT_RESET_TASK_SP_EVENT 6
#define BNXT_RST_RING_SP_EVENT 7
#define BNXT_HWRM_PF_UNLOAD_SP_EVENT 8
+#define BNXT_PERIODIC_STATS_SP_EVENT 9
struct bnxt_pf_info pf;
#ifdef CONFIG_BNXT_SRIOV
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 84ea26d6f3ff..9ada1662b651 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -8,6 +8,7 @@
*/
#include <linux/ctype.h>
+#include <linux/stringify.h>
#include <linux/ethtool.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
@@ -83,13 +84,99 @@ static int bnxt_set_coalesce(struct net_device *dev,
#define BNXT_NUM_STATS 21
+#define BNXT_RX_STATS_OFFSET(counter) \
+ (offsetof(struct rx_port_stats, counter) / 8)
+
+#define BNXT_RX_STATS_ENTRY(counter) \
+ { BNXT_RX_STATS_OFFSET(counter), __stringify(counter) }
+
+#define BNXT_TX_STATS_OFFSET(counter) \
+ ((offsetof(struct tx_port_stats, counter) + \
+ sizeof(struct rx_port_stats) + 512) / 8)
+
+#define BNXT_TX_STATS_ENTRY(counter) \
+ { BNXT_TX_STATS_OFFSET(counter), __stringify(counter) }
+
+static const struct {
+ long offset;
+ char string[ETH_GSTRING_LEN];
+} bnxt_port_stats_arr[] = {
+ BNXT_RX_STATS_ENTRY(rx_64b_frames),
+ BNXT_RX_STATS_ENTRY(rx_65b_127b_frames),
+ BNXT_RX_STATS_ENTRY(rx_128b_255b_frames),
+ BNXT_RX_STATS_ENTRY(rx_256b_511b_frames),
+ BNXT_RX_STATS_ENTRY(rx_512b_1023b_frames),
+ BNXT_RX_STATS_ENTRY(rx_1024b_1518_frames),
+ BNXT_RX_STATS_ENTRY(rx_good_vlan_frames),
+ BNXT_RX_STATS_ENTRY(rx_1519b_2047b_frames),
+ BNXT_RX_STATS_ENTRY(rx_2048b_4095b_frames),
+ BNXT_RX_STATS_ENTRY(rx_4096b_9216b_frames),
+ BNXT_RX_STATS_ENTRY(rx_9217b_16383b_frames),
+ BNXT_RX_STATS_ENTRY(rx_total_frames),
+ BNXT_RX_STATS_ENTRY(rx_ucast_frames),
+ BNXT_RX_STATS_ENTRY(rx_mcast_frames),
+ BNXT_RX_STATS_ENTRY(rx_bcast_frames),
+ BNXT_RX_STATS_ENTRY(rx_fcs_err_frames),
+ BNXT_RX_STATS_ENTRY(rx_ctrl_frames),
+ BNXT_RX_STATS_ENTRY(rx_pause_frames),
+ BNXT_RX_STATS_ENTRY(rx_pfc_frames),
+ BNXT_RX_STATS_ENTRY(rx_align_err_frames),
+ BNXT_RX_STATS_ENTRY(rx_ovrsz_frames),
+ BNXT_RX_STATS_ENTRY(rx_jbr_frames),
+ BNXT_RX_STATS_ENTRY(rx_mtu_err_frames),
+ BNXT_RX_STATS_ENTRY(rx_tagged_frames),
+ BNXT_RX_STATS_ENTRY(rx_double_tagged_frames),
+ BNXT_RX_STATS_ENTRY(rx_good_frames),
+ BNXT_RX_STATS_ENTRY(rx_undrsz_frames),
+ BNXT_RX_STATS_ENTRY(rx_eee_lpi_events),
+ BNXT_RX_STATS_ENTRY(rx_eee_lpi_duration),
+ BNXT_RX_STATS_ENTRY(rx_bytes),
+ BNXT_RX_STATS_ENTRY(rx_runt_bytes),
+ BNXT_RX_STATS_ENTRY(rx_runt_frames),
+
+ BNXT_TX_STATS_ENTRY(tx_64b_frames),
+ BNXT_TX_STATS_ENTRY(tx_65b_127b_frames),
+ BNXT_TX_STATS_ENTRY(tx_128b_255b_frames),
+ BNXT_TX_STATS_ENTRY(tx_256b_511b_frames),
+ BNXT_TX_STATS_ENTRY(tx_512b_1023b_frames),
+ BNXT_TX_STATS_ENTRY(tx_1024b_1518_frames),
+ BNXT_TX_STATS_ENTRY(tx_good_vlan_frames),
+ BNXT_TX_STATS_ENTRY(tx_1519b_2047_frames),
+ BNXT_TX_STATS_ENTRY(tx_2048b_4095b_frames),
+ BNXT_TX_STATS_ENTRY(tx_4096b_9216b_frames),
+ BNXT_TX_STATS_ENTRY(tx_9217b_16383b_frames),
+ BNXT_TX_STATS_ENTRY(tx_good_frames),
+ BNXT_TX_STATS_ENTRY(tx_total_frames),
+ BNXT_TX_STATS_ENTRY(tx_ucast_frames),
+ BNXT_TX_STATS_ENTRY(tx_mcast_frames),
+ BNXT_TX_STATS_ENTRY(tx_bcast_frames),
+ BNXT_TX_STATS_ENTRY(tx_pause_frames),
+ BNXT_TX_STATS_ENTRY(tx_pfc_frames),
+ BNXT_TX_STATS_ENTRY(tx_jabber_frames),
+ BNXT_TX_STATS_ENTRY(tx_fcs_err_frames),
+ BNXT_TX_STATS_ENTRY(tx_err),
+ BNXT_TX_STATS_ENTRY(tx_fifo_underruns),
+ BNXT_TX_STATS_ENTRY(tx_eee_lpi_events),
+ BNXT_TX_STATS_ENTRY(tx_eee_lpi_duration),
+ BNXT_TX_STATS_ENTRY(tx_total_collisions),
+ BNXT_TX_STATS_ENTRY(tx_bytes),
+};
+
+#define BNXT_NUM_PORT_STATS ARRAY_SIZE(bnxt_port_stats_arr)
+
static int bnxt_get_sset_count(struct net_device *dev, int sset)
{
struct bnxt *bp = netdev_priv(dev);
switch (sset) {
- case ETH_SS_STATS:
- return BNXT_NUM_STATS * bp->cp_nr_rings;
+ case ETH_SS_STATS: {
+ int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings;
+
+ if (bp->flags & BNXT_FLAG_PORT_STATS)
+ num_stats += BNXT_NUM_PORT_STATS;
+
+ return num_stats;
+ }
default:
return -EOPNOTSUPP;
}
@@ -118,6 +205,14 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
buf[j] = le64_to_cpu(hw_stats[k]);
buf[j++] = cpr->rx_l4_csum_errors;
}
+ if (bp->flags & BNXT_FLAG_PORT_STATS) {
+ __le64 *port_stats = (__le64 *)bp->hw_rx_port_stats;
+
+ for (i = 0; i < BNXT_NUM_PORT_STATS; i++, j++) {
+ buf[j] = le64_to_cpu(*(port_stats +
+ bnxt_port_stats_arr[i].offset));
+ }
+ }
}
static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
@@ -172,6 +267,12 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
sprintf(buf, "[%d]: rx_l4_csum_errors", i);
buf += ETH_GSTRING_LEN;
}
+ if (bp->flags & BNXT_FLAG_PORT_STATS) {
+ for (i = 0; i < BNXT_NUM_PORT_STATS; i++) {
+ strcpy(buf, bnxt_port_stats_arr[i].string);
+ buf += ETH_GSTRING_LEN;
+ }
+ }
break;
default:
netdev_err(bp->dev, "bnxt_get_strings invalid request %x\n",
@@ -496,28 +597,8 @@ static void bnxt_get_drvinfo(struct net_device *dev,
kfree(pkglog);
}
-static u32 bnxt_fw_to_ethtool_support_spds(struct bnxt_link_info *link_info)
+static u32 _bnxt_fw_to_ethtool_adv_spds(u16 fw_speeds, u8 fw_pause)
{
- u16 fw_speeds = link_info->support_speeds;
- u32 speed_mask = 0;
-
- if (fw_speeds & BNXT_LINK_SPEED_MSK_100MB)
- speed_mask |= SUPPORTED_100baseT_Full;
- if (fw_speeds & BNXT_LINK_SPEED_MSK_1GB)
- speed_mask |= SUPPORTED_1000baseT_Full;
- if (fw_speeds & BNXT_LINK_SPEED_MSK_2_5GB)
- speed_mask |= SUPPORTED_2500baseX_Full;
- if (fw_speeds & BNXT_LINK_SPEED_MSK_10GB)
- speed_mask |= SUPPORTED_10000baseT_Full;
- if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
- speed_mask |= SUPPORTED_40000baseCR4_Full;
-
- return speed_mask;
-}
-
-static u32 bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info)
-{
- u16 fw_speeds = link_info->auto_link_speeds;
u32 speed_mask = 0;
/* TODO: support 25GB, 40GB, 50GB with different cable type */
@@ -532,9 +613,48 @@ static u32 bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info)
speed_mask |= ADVERTISED_10000baseT_Full;
if (fw_speeds & BNXT_LINK_SPEED_MSK_40GB)
speed_mask |= ADVERTISED_40000baseCR4_Full;
+
+ if ((fw_pause & BNXT_LINK_PAUSE_BOTH) == BNXT_LINK_PAUSE_BOTH)
+ speed_mask |= ADVERTISED_Pause;
+ else if (fw_pause & BNXT_LINK_PAUSE_TX)
+ speed_mask |= ADVERTISED_Asym_Pause;
+ else if (fw_pause & BNXT_LINK_PAUSE_RX)
+ speed_mask |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+
return speed_mask;
}
+static u32 bnxt_fw_to_ethtool_advertised_spds(struct bnxt_link_info *link_info)
+{
+ u16 fw_speeds = link_info->auto_link_speeds;
+ u8 fw_pause = 0;
+
+ if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL)
+ fw_pause = link_info->auto_pause_setting;
+
+ return _bnxt_fw_to_ethtool_adv_spds(fw_speeds, fw_pause);
+}
+
+static u32 bnxt_fw_to_ethtool_lp_adv(struct bnxt_link_info *link_info)
+{
+ u16 fw_speeds = link_info->lp_auto_link_speeds;
+ u8 fw_pause = 0;
+
+ if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL)
+ fw_pause = link_info->lp_pause;
+
+ return _bnxt_fw_to_ethtool_adv_spds(fw_speeds, fw_pause);
+}
+
+static u32 bnxt_fw_to_ethtool_support_spds(struct bnxt_link_info *link_info)
+{
+ u16 fw_speeds = link_info->support_speeds;
+ u32 supported;
+
+ supported = _bnxt_fw_to_ethtool_adv_spds(fw_speeds, 0);
+ return supported | SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+}
+
u32 bnxt_fw_to_ethtool_speed(u16 fw_link_speed)
{
switch (fw_link_speed) {
@@ -566,7 +686,6 @@ static int bnxt_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
u16 ethtool_speed;
cmd->supported = bnxt_fw_to_ethtool_support_spds(link_info);
- cmd->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
if (link_info->auto_link_speeds)
cmd->supported |= SUPPORTED_Autoneg;
@@ -576,21 +695,13 @@ static int bnxt_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
bnxt_fw_to_ethtool_advertised_spds(link_info);
cmd->advertising |= ADVERTISED_Autoneg;
cmd->autoneg = AUTONEG_ENABLE;
+ if (link_info->phy_link_status == BNXT_LINK_LINK)
+ cmd->lp_advertising =
+ bnxt_fw_to_ethtool_lp_adv(link_info);
} else {
cmd->autoneg = AUTONEG_DISABLE;
cmd->advertising = 0;
}
- if (link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) {
- if ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) ==
- BNXT_LINK_PAUSE_BOTH) {
- cmd->advertising |= ADVERTISED_Pause;
- } else {
- cmd->advertising |= ADVERTISED_Asym_Pause;
- if (link_info->auto_pause_setting &
- BNXT_LINK_PAUSE_RX)
- cmd->advertising |= ADVERTISED_Pause;
- }
- }
cmd->port = PORT_NONE;
if (link_info->media_type == PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP) {
@@ -686,16 +797,10 @@ static int bnxt_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
return rc;
if (cmd->autoneg == AUTONEG_ENABLE) {
- if (link_info->media_type != PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP) {
- netdev_err(dev, "Media type doesn't support autoneg\n");
- rc = -EINVAL;
- goto set_setting_exit;
- }
- if (cmd->advertising & ~(BNXT_ALL_COPPER_ETHTOOL_SPEED |
- ADVERTISED_Autoneg |
- ADVERTISED_TP |
- ADVERTISED_Pause |
- ADVERTISED_Asym_Pause)) {
+ u32 supported_spds = bnxt_fw_to_ethtool_support_spds(link_info);
+
+ if (cmd->advertising & ~(supported_spds | ADVERTISED_Autoneg |
+ ADVERTISED_TP | ADVERTISED_FIBRE)) {
netdev_err(dev, "Unsupported advertising mask (adv: 0x%x)\n",
cmd->advertising);
rc = -EINVAL;
@@ -750,8 +855,10 @@ static void bnxt_get_pauseparam(struct net_device *dev,
if (BNXT_VF(bp))
return;
epause->autoneg = !!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL);
- epause->rx_pause = ((link_info->pause & BNXT_LINK_PAUSE_RX) != 0);
- epause->tx_pause = ((link_info->pause & BNXT_LINK_PAUSE_TX) != 0);
+ epause->rx_pause =
+ ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_RX) != 0);
+ epause->tx_pause =
+ ((link_info->auto_pause_setting & BNXT_LINK_PAUSE_TX) != 0);
}
static int bnxt_set_pauseparam(struct net_device *dev,
diff --git a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
index 04b0d16b210e..95bc470ae441 100644
--- a/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
+++ b/drivers/net/ethernet/brocade/bna/bna_tx_rx.c
@@ -987,7 +987,7 @@ bna_rxf_ucast_cfg_apply(struct bna_rxf *rxf)
if (!list_empty(&rxf->ucast_pending_add_q)) {
mac = list_first_entry(&rxf->ucast_pending_add_q,
struct bna_mac, qe);
- list_add_tail(&mac->qe, &rxf->ucast_active_q);
+ list_move_tail(&mac->qe, &rxf->ucast_active_q);
bna_bfi_ucast_req(rxf, mac, BFI_ENET_H2I_MAC_UCAST_ADD_REQ);
return 1;
}
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 00cc9156abbb..092f097a5943 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -116,6 +116,15 @@
#define NIC_PF_INTR_ID_MBOX0 8
#define NIC_PF_INTR_ID_MBOX1 9
+/* Minimum FIFO level before all packets for the CQ are dropped
+ *
+ * This value ensures that once a packet has been "accepted"
+ * for reception it will not get dropped due to non-availability
+ * of CQ descriptor. An errata in HW mandates this value to be
+ * atleast 0x100.
+ */
+#define NICPF_CQM_MIN_DROP_LEVEL 0x100
+
/* Global timer for CQ timer thresh interrupts
* Calculated for SCLK of 700Mhz
* value written should be a 1/16th of what is expected
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 4dded90076c8..95f17f8cadac 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -304,6 +304,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic)
static void nic_init_hw(struct nicpf *nic)
{
int i;
+ u64 cqm_cfg;
/* Enable NIC HW block */
nic_reg_write(nic, NIC_PF_CFG, 0x3);
@@ -340,6 +341,11 @@ static void nic_init_hw(struct nicpf *nic)
/* Enable VLAN ethertype matching and stripping */
nic_reg_write(nic, NIC_PF_RX_ETYPE_0_7,
(2 << 19) | (ETYPE_ALG_VLAN_STRIP << 16) | ETH_P_8021Q);
+
+ /* Check if HW expected value is higher (could be in future chips) */
+ cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG);
+ if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL)
+ nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL);
}
/* Channel parse index configuration */
diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h
index dd536be20193..afb10e326b4f 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_reg.h
+++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h
@@ -21,7 +21,7 @@
#define NIC_PF_TCP_TIMER (0x0060)
#define NIC_PF_BP_CFG (0x0080)
#define NIC_PF_RRM_CFG (0x0088)
-#define NIC_PF_CQM_CF (0x00A0)
+#define NIC_PF_CQM_CFG (0x00A0)
#define NIC_PF_CNM_CF (0x00A8)
#define NIC_PF_CNM_STATUS (0x00B0)
#define NIC_PF_CQ_AVG_CFG (0x00C0)
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 91857b81009e..1cc8a7a69457 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -790,10 +790,6 @@ static int cxgb4vf_open(struct net_device *dev)
/*
* Note that this interface is up and start everything up ...
*/
- netif_set_real_num_tx_queues(dev, pi->nqsets);
- err = netif_set_real_num_rx_queues(dev, pi->nqsets);
- if (err)
- goto err_unwind;
err = link_start(dev);
if (err)
goto err_unwind;
@@ -2176,6 +2172,73 @@ static void cleanup_debugfs(struct adapter *adapter)
/* nothing to do */
}
+/* Figure out how many Ports and Queue Sets we can support. This depends on
+ * knowing our Virtual Function Resources and may be called a second time if
+ * we fall back from MSI-X to MSI Interrupt Mode.
+ */
+static void size_nports_qsets(struct adapter *adapter)
+{
+ struct vf_resources *vfres = &adapter->params.vfres;
+ unsigned int ethqsets, pmask_nports;
+
+ /* The number of "ports" which we support is equal to the number of
+ * Virtual Interfaces with which we've been provisioned.
+ */
+ adapter->params.nports = vfres->nvi;
+ if (adapter->params.nports > MAX_NPORTS) {
+ dev_warn(adapter->pdev_dev, "only using %d of %d maximum"
+ " allowed virtual interfaces\n", MAX_NPORTS,
+ adapter->params.nports);
+ adapter->params.nports = MAX_NPORTS;
+ }
+
+ /* We may have been provisioned with more VIs than the number of
+ * ports we're allowed to access (our Port Access Rights Mask).
+ * This is obviously a configuration conflict but we don't want to
+ * crash the kernel or anything silly just because of that.
+ */
+ pmask_nports = hweight32(adapter->params.vfres.pmask);
+ if (pmask_nports < adapter->params.nports) {
+ dev_warn(adapter->pdev_dev, "only using %d of %d provissioned"
+ " virtual interfaces; limited by Port Access Rights"
+ " mask %#x\n", pmask_nports, adapter->params.nports,
+ adapter->params.vfres.pmask);
+ adapter->params.nports = pmask_nports;
+ }
+
+ /* We need to reserve an Ingress Queue for the Asynchronous Firmware
+ * Event Queue. And if we're using MSI Interrupts, we'll also need to
+ * reserve an Ingress Queue for a Forwarded Interrupts.
+ *
+ * The rest of the FL/Intr-capable ingress queues will be matched up
+ * one-for-one with Ethernet/Control egress queues in order to form
+ * "Queue Sets" which will be aportioned between the "ports". For
+ * each Queue Set, we'll need the ability to allocate two Egress
+ * Contexts -- one for the Ingress Queue Free List and one for the TX
+ * Ethernet Queue.
+ *
+ * Note that even if we're currently configured to use MSI-X
+ * Interrupts (module variable msi == MSI_MSIX) we may get downgraded
+ * to MSI Interrupts if we can't get enough MSI-X Interrupts. If that
+ * happens we'll need to adjust things later.
+ */
+ ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI);
+ if (vfres->nethctrl != ethqsets)
+ ethqsets = min(vfres->nethctrl, ethqsets);
+ if (vfres->neq < ethqsets*2)
+ ethqsets = vfres->neq/2;
+ if (ethqsets > MAX_ETH_QSETS)
+ ethqsets = MAX_ETH_QSETS;
+ adapter->sge.max_ethqsets = ethqsets;
+
+ if (adapter->sge.max_ethqsets < adapter->params.nports) {
+ dev_warn(adapter->pdev_dev, "only using %d of %d available"
+ " virtual interfaces (too few Queue Sets)\n",
+ adapter->sge.max_ethqsets, adapter->params.nports);
+ adapter->params.nports = adapter->sge.max_ethqsets;
+ }
+}
+
/*
* Perform early "adapter" initialization. This is where we discover what
* adapter parameters we're going to be using and initialize basic adapter
@@ -2183,10 +2246,8 @@ static void cleanup_debugfs(struct adapter *adapter)
*/
static int adap_init0(struct adapter *adapter)
{
- struct vf_resources *vfres = &adapter->params.vfres;
struct sge_params *sge_params = &adapter->params.sge;
struct sge *s = &adapter->sge;
- unsigned int ethqsets;
int err;
u32 param, val = 0;
@@ -2295,69 +2356,23 @@ static int adap_init0(struct adapter *adapter)
return err;
}
- /*
- * The number of "ports" which we support is equal to the number of
- * Virtual Interfaces with which we've been provisioned.
- */
- adapter->params.nports = vfres->nvi;
- if (adapter->params.nports > MAX_NPORTS) {
- dev_warn(adapter->pdev_dev, "only using %d of %d allowed"
- " virtual interfaces\n", MAX_NPORTS,
- adapter->params.nports);
- adapter->params.nports = MAX_NPORTS;
- }
-
- /*
- * We need to reserve a number of the ingress queues with Free List
- * and Interrupt capabilities for special interrupt purposes (like
- * asynchronous firmware messages, or forwarded interrupts if we're
- * using MSI). The rest of the FL/Intr-capable ingress queues will be
- * matched up one-for-one with Ethernet/Control egress queues in order
- * to form "Queue Sets" which will be aportioned between the "ports".
- * For each Queue Set, we'll need the ability to allocate two Egress
- * Contexts -- one for the Ingress Queue Free List and one for the TX
- * Ethernet Queue.
- */
- ethqsets = vfres->niqflint - INGQ_EXTRAS;
- if (vfres->nethctrl != ethqsets) {
- dev_warn(adapter->pdev_dev, "unequal number of [available]"
- " ingress/egress queues (%d/%d); using minimum for"
- " number of Queue Sets\n", ethqsets, vfres->nethctrl);
- ethqsets = min(vfres->nethctrl, ethqsets);
- }
- if (vfres->neq < ethqsets*2) {
- dev_warn(adapter->pdev_dev, "Not enough Egress Contexts (%d)"
- " to support Queue Sets (%d); reducing allowed Queue"
- " Sets\n", vfres->neq, ethqsets);
- ethqsets = vfres->neq/2;
- }
- if (ethqsets > MAX_ETH_QSETS) {
- dev_warn(adapter->pdev_dev, "only using %d of %d allowed Queue"
- " Sets\n", MAX_ETH_QSETS, adapter->sge.max_ethqsets);
- ethqsets = MAX_ETH_QSETS;
- }
- if (vfres->niq != 0 || vfres->neq > ethqsets*2) {
- dev_warn(adapter->pdev_dev, "unused resources niq/neq (%d/%d)"
- " ignored\n", vfres->niq, vfres->neq - ethqsets*2);
- }
- adapter->sge.max_ethqsets = ethqsets;
-
- /*
- * Check for various parameter sanity issues. Most checks simply
- * result in us using fewer resources than our provissioning but we
- * do need at least one "port" with which to work ...
- */
- if (adapter->sge.max_ethqsets < adapter->params.nports) {
- dev_warn(adapter->pdev_dev, "only using %d of %d available"
- " virtual interfaces (too few Queue Sets)\n",
- adapter->sge.max_ethqsets, adapter->params.nports);
- adapter->params.nports = adapter->sge.max_ethqsets;
+ /* Check for various parameter sanity issues */
+ if (adapter->params.vfres.pmask == 0) {
+ dev_err(adapter->pdev_dev, "no port access configured\n"
+ "usable!\n");
+ return -EINVAL;
}
- if (adapter->params.nports == 0) {
+ if (adapter->params.vfres.nvi == 0) {
dev_err(adapter->pdev_dev, "no virtual interfaces configured/"
"usable!\n");
return -EINVAL;
}
+
+ /* Initialize nports and max_ethqsets now that we have our Virtual
+ * Function Resources.
+ */
+ size_nports_qsets(adapter);
+
return 0;
}
@@ -2771,6 +2786,40 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
}
}
+ /* See what interrupts we'll be using. If we've been configured to
+ * use MSI-X interrupts, try to enable them but fall back to using
+ * MSI interrupts if we can't enable MSI-X interrupts. If we can't
+ * get MSI interrupts we bail with the error.
+ */
+ if (msi == MSI_MSIX && enable_msix(adapter) == 0)
+ adapter->flags |= USING_MSIX;
+ else {
+ if (msi == MSI_MSIX) {
+ dev_info(adapter->pdev_dev,
+ "Unable to use MSI-X Interrupts; falling "
+ "back to MSI Interrupts\n");
+
+ /* We're going to need a Forwarded Interrupt Queue so
+ * that may cut into how many Queue Sets we can
+ * support.
+ */
+ msi = MSI_MSI;
+ size_nports_qsets(adapter);
+ }
+ err = pci_enable_msi(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;"
+ " err=%d\n", err);
+ goto err_free_dev;
+ }
+ adapter->flags |= USING_MSI;
+ }
+
+ /* Now that we know how many "ports" we have and what interrupt
+ * mechanism we're going to use, we can configure our queue resources.
+ */
+ cfg_queues(adapter);
+
/*
* The "card" is now ready to go. If any errors occur during device
* registration we do not fail the whole "card" but rather proceed
@@ -2778,10 +2827,14 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
* must register at least one net device.
*/
for_each_port(adapter, pidx) {
+ struct port_info *pi = netdev_priv(adapter->port[pidx]);
netdev = adapter->port[pidx];
if (netdev == NULL)
continue;
+ netif_set_real_num_tx_queues(netdev, pi->nqsets);
+ netif_set_real_num_rx_queues(netdev, pi->nqsets);
+
err = register_netdev(netdev);
if (err) {
dev_warn(&pdev->dev, "cannot register net device %s,"
@@ -2793,7 +2846,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
}
if (adapter->registered_device_map == 0) {
dev_err(&pdev->dev, "could not register any net devices\n");
- goto err_free_dev;
+ goto err_disable_interrupts;
}
/*
@@ -2811,32 +2864,6 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
}
/*
- * See what interrupts we'll be using. If we've been configured to
- * use MSI-X interrupts, try to enable them but fall back to using
- * MSI interrupts if we can't enable MSI-X interrupts. If we can't
- * get MSI interrupts we bail with the error.
- */
- if (msi == MSI_MSIX && enable_msix(adapter) == 0)
- adapter->flags |= USING_MSIX;
- else {
- err = pci_enable_msi(pdev);
- if (err) {
- dev_err(&pdev->dev, "Unable to allocate %s interrupts;"
- " err=%d\n",
- msi == MSI_MSIX ? "MSI-X or MSI" : "MSI", err);
- goto err_free_debugfs;
- }
- adapter->flags |= USING_MSI;
- }
-
- /*
- * Now that we know how many "ports" we have and what their types are,
- * and how many Queue Sets we can support, we can configure our queue
- * resources.
- */
- cfg_queues(adapter);
-
- /*
* Print a short notice on the existence and configuration of the new
* VF network device ...
*/
@@ -2856,11 +2883,13 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
* Error recovery and exit code. Unwind state that's been created
* so far and return the error.
*/
-
-err_free_debugfs:
- if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
- cleanup_debugfs(adapter);
- debugfs_remove_recursive(adapter->debugfs_root);
+err_disable_interrupts:
+ if (adapter->flags & USING_MSIX) {
+ pci_disable_msix(adapter->pdev);
+ adapter->flags &= ~USING_MSIX;
+ } else if (adapter->flags & USING_MSI) {
+ pci_disable_msi(adapter->pdev);
+ adapter->flags &= ~USING_MSI;
}
err_free_dev:
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 7ba6d530b0c0..130f910e4785 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -201,16 +201,20 @@ static inline struct net_device *vnic_get_netdev(struct vnic_dev *vdev)
}
/* wrappers function for kernel log
- * Make sure variable vdev of struct vnic_dev is available in the block where
- * these macros are used
*/
-#define vdev_info(args...) dev_info(&vdev->pdev->dev, args)
-#define vdev_warn(args...) dev_warn(&vdev->pdev->dev, args)
-#define vdev_err(args...) dev_err(&vdev->pdev->dev, args)
-
-#define vdev_netinfo(args...) netdev_info(vnic_get_netdev(vdev), args)
-#define vdev_netwarn(args...) netdev_warn(vnic_get_netdev(vdev), args)
-#define vdev_neterr(args...) netdev_err(vnic_get_netdev(vdev), args)
+#define vdev_err(vdev, fmt, ...) \
+ dev_err(&(vdev)->pdev->dev, fmt, ##__VA_ARGS__)
+#define vdev_warn(vdev, fmt, ...) \
+ dev_warn(&(vdev)->pdev->dev, fmt, ##__VA_ARGS__)
+#define vdev_info(vdev, fmt, ...) \
+ dev_info(&(vdev)->pdev->dev, fmt, ##__VA_ARGS__)
+
+#define vdev_neterr(vdev, fmt, ...) \
+ netdev_err(vnic_get_netdev(vdev), fmt, ##__VA_ARGS__)
+#define vdev_netwarn(vdev, fmt, ...) \
+ netdev_warn(vnic_get_netdev(vdev), fmt, ##__VA_ARGS__)
+#define vdev_netinfo(vdev, fmt, ...) \
+ netdev_info(vnic_get_netdev(vdev), fmt, ##__VA_ARGS__)
static inline struct device *enic_get_dev(struct enic *enic)
{
diff --git a/drivers/net/ethernet/cisco/enic/vnic_cq.c b/drivers/net/ethernet/cisco/enic/vnic_cq.c
index abeda2a9ea27..9c682aff3834 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_cq.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_cq.c
@@ -43,7 +43,7 @@ int vnic_cq_alloc(struct vnic_dev *vdev, struct vnic_cq *cq, unsigned int index,
cq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_CQ, index);
if (!cq->ctrl) {
- vdev_err("Failed to hook CQ[%d] resource\n", index);
+ vdev_err(vdev, "Failed to hook CQ[%d] resource\n", index);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 1fdf5fe12a95..8f27df3207bc 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -53,14 +53,14 @@ static int vnic_dev_discover_res(struct vnic_dev *vdev,
return -EINVAL;
if (bar->len < VNIC_MAX_RES_HDR_SIZE) {
- vdev_err("vNIC BAR0 res hdr length error\n");
+ vdev_err(vdev, "vNIC BAR0 res hdr length error\n");
return -EINVAL;
}
rh = bar->vaddr;
mrh = bar->vaddr;
if (!rh) {
- vdev_err("vNIC BAR0 res hdr not mem-mapped\n");
+ vdev_err(vdev, "vNIC BAR0 res hdr not mem-mapped\n");
return -EINVAL;
}
@@ -69,7 +69,7 @@ static int vnic_dev_discover_res(struct vnic_dev *vdev,
(ioread32(&rh->version) != VNIC_RES_VERSION)) {
if ((ioread32(&mrh->magic) != MGMTVNIC_MAGIC) ||
(ioread32(&mrh->version) != MGMTVNIC_VERSION)) {
- vdev_err("vNIC BAR0 res magic/version error exp (%lx/%lx) or (%lx/%lx), curr (%x/%x)\n",
+ vdev_err(vdev, "vNIC BAR0 res magic/version error exp (%lx/%lx) or (%lx/%lx), curr (%x/%x)\n",
VNIC_RES_MAGIC, VNIC_RES_VERSION,
MGMTVNIC_MAGIC, MGMTVNIC_VERSION,
ioread32(&rh->magic), ioread32(&rh->version));
@@ -106,7 +106,7 @@ static int vnic_dev_discover_res(struct vnic_dev *vdev,
/* each count is stride bytes long */
len = count * VNIC_RES_STRIDE;
if (len + bar_offset > bar[bar_num].len) {
- vdev_err("vNIC BAR0 resource %d out-of-bounds, offset 0x%x + size 0x%x > bar len 0x%lx\n",
+ vdev_err(vdev, "vNIC BAR0 resource %d out-of-bounds, offset 0x%x + size 0x%x > bar len 0x%lx\n",
type, bar_offset, len,
bar[bar_num].len);
return -EINVAL;
@@ -198,7 +198,7 @@ int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring,
&ring->base_addr_unaligned);
if (!ring->descs_unaligned) {
- vdev_err("Failed to allocate ring (size=%d), aborting\n",
+ vdev_err(vdev, "Failed to allocate ring (size=%d), aborting\n",
(int)ring->size);
return -ENOMEM;
}
@@ -241,7 +241,7 @@ static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
return -ENODEV;
}
if (status & STAT_BUSY) {
- vdev_neterr("Busy devcmd %d\n", _CMD_N(cmd));
+ vdev_neterr(vdev, "Busy devcmd %d\n", _CMD_N(cmd));
return -EBUSY;
}
@@ -275,7 +275,7 @@ static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
return -err;
if (err != ERR_ECMDUNKNOWN ||
cmd != CMD_CAPABILITY)
- vdev_neterr("Error %d devcmd %d\n",
+ vdev_neterr(vdev, "Error %d devcmd %d\n",
err, _CMD_N(cmd));
return -err;
}
@@ -290,7 +290,7 @@ static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
}
}
- vdev_neterr("Timedout devcmd %d\n", _CMD_N(cmd));
+ vdev_neterr(vdev, "Timedout devcmd %d\n", _CMD_N(cmd));
return -ETIMEDOUT;
}
@@ -313,7 +313,7 @@ static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
new_posted = (posted + 1) % DEVCMD2_RING_SIZE;
if (new_posted == fetch_index) {
- vdev_neterr("devcmd2 %d: wq is full. fetch index: %u, posted index: %u\n",
+ vdev_neterr(vdev, "devcmd2 %d: wq is full. fetch index: %u, posted index: %u\n",
_CMD_N(cmd), fetch_index, posted);
return -EBUSY;
}
@@ -352,7 +352,7 @@ static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
err = result->error;
if (err != ERR_ECMDUNKNOWN ||
cmd != CMD_CAPABILITY)
- vdev_neterr("Error %d devcmd %d\n",
+ vdev_neterr(vdev, "Error %d devcmd %d\n",
err, _CMD_N(cmd));
return -err;
}
@@ -365,7 +365,7 @@ static int _vnic_dev_cmd2(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd,
udelay(100);
}
- vdev_neterr("devcmd %d timed out\n", _CMD_N(cmd));
+ vdev_neterr(vdev, "devcmd %d timed out\n", _CMD_N(cmd));
return -ETIMEDOUT;
}
@@ -401,7 +401,7 @@ static int vnic_dev_init_devcmd2(struct vnic_dev *vdev)
fetch_index = ioread32(&vdev->devcmd2->wq.ctrl->fetch_index);
if (fetch_index == 0xFFFFFFFF) { /* check for hardware gone */
- vdev_err("Fatal error in devcmd2 init - hardware surprise removal");
+ vdev_err(vdev, "Fatal error in devcmd2 init - hardware surprise removal\n");
return -ENODEV;
}
@@ -474,8 +474,8 @@ static int vnic_dev_cmd_proxy(struct vnic_dev *vdev,
err = (int)vdev->args[1];
if (err != ERR_ECMDUNKNOWN ||
cmd != CMD_CAPABILITY)
- vdev_neterr("Error %d proxy devcmd %d\n", err,
- _CMD_N(cmd));
+ vdev_neterr(vdev, "Error %d proxy devcmd %d\n",
+ err, _CMD_N(cmd));
return err;
}
@@ -768,7 +768,7 @@ int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast,
err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait);
if (err)
- vdev_neterr("Can't set packet filter\n");
+ vdev_neterr(vdev, "Can't set packet filter\n");
return err;
}
@@ -785,7 +785,7 @@ int vnic_dev_add_addr(struct vnic_dev *vdev, const u8 *addr)
err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait);
if (err)
- vdev_neterr("Can't add addr [%pM], %d\n", addr, err);
+ vdev_neterr(vdev, "Can't add addr [%pM], %d\n", addr, err);
return err;
}
@@ -802,7 +802,7 @@ int vnic_dev_del_addr(struct vnic_dev *vdev, const u8 *addr)
err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a0, &a1, wait);
if (err)
- vdev_neterr("Can't del addr [%pM], %d\n", addr, err);
+ vdev_neterr(vdev, "Can't del addr [%pM], %d\n", addr, err);
return err;
}
@@ -846,7 +846,8 @@ int vnic_dev_notify_set(struct vnic_dev *vdev, u16 intr)
dma_addr_t notify_pa;
if (vdev->notify || vdev->notify_pa) {
- vdev_neterr("notify block %p still allocated", vdev->notify);
+ vdev_neterr(vdev, "notify block %p still allocated\n",
+ vdev->notify);
return -EINVAL;
}
@@ -965,7 +966,7 @@ int vnic_dev_intr_coal_timer_info(struct vnic_dev *vdev)
*/
if ((err == ERR_ECMDUNKNOWN) ||
(!err && !(vdev->args[0] && vdev->args[1] && vdev->args[2]))) {
- vdev_netwarn("Using default conversion factor for interrupt coalesce timer\n");
+ vdev_netwarn(vdev, "Using default conversion factor for interrupt coalesce timer\n");
vnic_dev_intr_coal_timer_info_default(vdev);
return 0;
}
@@ -1103,16 +1104,16 @@ int vnic_devcmd_init(struct vnic_dev *vdev)
if (res) {
err = vnic_dev_init_devcmd2(vdev);
if (err)
- vdev_warn("DEVCMD2 init failed: %d, Using DEVCMD1",
+ vdev_warn(vdev, "DEVCMD2 init failed: %d, Using DEVCMD1\n",
err);
else
return 0;
} else {
- vdev_warn("DEVCMD2 resource not found (old firmware?) Using DEVCMD1\n");
+ vdev_warn(vdev, "DEVCMD2 resource not found (old firmware?) Using DEVCMD1\n");
}
err = vnic_dev_init_devcmd1(vdev);
if (err)
- vdev_err("DEVCMD1 initialization failed: %d", err);
+ vdev_err(vdev, "DEVCMD1 initialization failed: %d\n", err);
return err;
}
diff --git a/drivers/net/ethernet/cisco/enic/vnic_intr.c b/drivers/net/ethernet/cisco/enic/vnic_intr.c
index 942759d9cb3c..23604e3d4455 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_intr.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_intr.c
@@ -40,7 +40,8 @@ int vnic_intr_alloc(struct vnic_dev *vdev, struct vnic_intr *intr,
intr->ctrl = vnic_dev_get_res(vdev, RES_TYPE_INTR_CTRL, index);
if (!intr->ctrl) {
- vdev_err("Failed to hook INTR[%d].ctrl resource\n", index);
+ vdev_err(vdev, "Failed to hook INTR[%d].ctrl resource\n",
+ index);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c
index cce2777dfc41..e572a527b18d 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_rq.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c
@@ -92,7 +92,7 @@ int vnic_rq_alloc(struct vnic_dev *vdev, struct vnic_rq *rq, unsigned int index,
rq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_RQ, index);
if (!rq->ctrl) {
- vdev_err("Failed to hook RQ[%d] resource\n", index);
+ vdev_err(vdev, "Failed to hook RQ[%d] resource\n", index);
return -EINVAL;
}
@@ -179,7 +179,7 @@ int vnic_rq_disable(struct vnic_rq *rq)
udelay(10);
}
- vdev_neterr("Failed to disable RQ[%d]\n", rq->index);
+ vdev_neterr(vdev, "Failed to disable RQ[%d]\n", rq->index);
return -ETIMEDOUT;
}
diff --git a/drivers/net/ethernet/cisco/enic/vnic_wq.c b/drivers/net/ethernet/cisco/enic/vnic_wq.c
index 05ad16a7e872..090cc65658a3 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_wq.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_wq.c
@@ -95,7 +95,7 @@ int vnic_wq_alloc(struct vnic_dev *vdev, struct vnic_wq *wq, unsigned int index,
wq->ctrl = vnic_dev_get_res(vdev, RES_TYPE_WQ, index);
if (!wq->ctrl) {
- vdev_err("Failed to hook WQ[%d] resource\n", index);
+ vdev_err(vdev, "Failed to hook WQ[%d] resource\n", index);
return -EINVAL;
}
@@ -187,7 +187,7 @@ int vnic_wq_disable(struct vnic_wq *wq)
udelay(10);
}
- vdev_neterr("Failed to disable WQ[%d]\n", wq->index);
+ vdev_neterr(vdev, "Failed to disable WQ[%d]\n", wq->index);
return -ETIMEDOUT;
}
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index ee584c59ff62..fe3763df3f13 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -545,6 +545,7 @@ struct be_adapter {
struct delayed_work be_err_detection_work;
u8 recovery_retries;
u8 err_flags;
+ bool pcicfg_mapped; /* pcicfg obtained via pci_iomap() */
u32 flags;
u32 cmd_privileges;
/* Ethtool knobs and info */
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 0b9f741f31af..d8540ae95e5a 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -666,10 +666,13 @@ enum be_if_flags {
BE_IF_FLAGS_VLAN_PROMISCUOUS |\
BE_IF_FLAGS_MCAST_PROMISCUOUS)
-#define BE_IF_EN_FLAGS (BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_PASS_L3L4_ERRORS |\
- BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_UNTAGGED)
+#define BE_IF_FILT_FLAGS_BASIC (BE_IF_FLAGS_BROADCAST | \
+ BE_IF_FLAGS_PASS_L3L4_ERRORS | \
+ BE_IF_FLAGS_UNTAGGED)
-#define BE_IF_ALL_FILT_FLAGS (BE_IF_EN_FLAGS | BE_IF_FLAGS_ALL_PROMISCUOUS)
+#define BE_IF_ALL_FILT_FLAGS (BE_IF_FILT_FLAGS_BASIC | \
+ BE_IF_FLAGS_MULTICAST | \
+ BE_IF_FLAGS_ALL_PROMISCUOUS)
/* An RX interface is an object with one or more MAC addresses and
* filtering capabilities. */
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 17422b20a8ec..536686476369 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -125,6 +125,11 @@ static const char * const ue_status_hi_desc[] = {
"Unknown"
};
+#define BE_VF_IF_EN_FLAGS (BE_IF_FLAGS_UNTAGGED | \
+ BE_IF_FLAGS_BROADCAST | \
+ BE_IF_FLAGS_MULTICAST | \
+ BE_IF_FLAGS_PASS_L3L4_ERRORS)
+
static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
{
struct be_dma_mem *mem = &q->dma_mem;
@@ -3558,7 +3563,7 @@ static int be_enable_if_filters(struct be_adapter *adapter)
{
int status;
- status = be_cmd_rx_filter(adapter, BE_IF_EN_FLAGS, ON);
+ status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
if (status)
return status;
@@ -3875,8 +3880,7 @@ static int be_vfs_if_create(struct be_adapter *adapter)
int status;
/* If a FW profile exists, then cap_flags are updated */
- cap_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
- BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS;
+ cap_flags = BE_VF_IF_EN_FLAGS;
for_all_vfs(adapter, vf_cfg, vf) {
if (!BE3_chip(adapter)) {
@@ -3892,10 +3896,8 @@ static int be_vfs_if_create(struct be_adapter *adapter)
}
}
- en_flags = cap_flags & (BE_IF_FLAGS_UNTAGGED |
- BE_IF_FLAGS_BROADCAST |
- BE_IF_FLAGS_MULTICAST |
- BE_IF_FLAGS_PASS_L3L4_ERRORS);
+ /* PF should enable IF flags during proxy if_create call */
+ en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
status = be_cmd_if_create(adapter, cap_flags, en_flags,
&vf_cfg->if_handle, vf + 1);
if (status)
@@ -5040,6 +5042,8 @@ static void be_unmap_pci_bars(struct be_adapter *adapter)
pci_iounmap(adapter->pdev, adapter->csr);
if (adapter->db)
pci_iounmap(adapter->pdev, adapter->db);
+ if (adapter->pcicfg && adapter->pcicfg_mapped)
+ pci_iounmap(adapter->pdev, adapter->pcicfg);
}
static int db_bar(struct be_adapter *adapter)
@@ -5091,8 +5095,10 @@ static int be_map_pci_bars(struct be_adapter *adapter)
if (!addr)
goto pci_map_err;
adapter->pcicfg = addr;
+ adapter->pcicfg_mapped = true;
} else {
adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
+ adapter->pcicfg_mapped = false;
}
}
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 62fa136554ac..41b010645100 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1265,7 +1265,6 @@ static int ethoc_remove(struct platform_device *pdev)
if (priv->mdio) {
mdiobus_unregister(priv->mdio);
- kfree(priv->mdio->irq);
mdiobus_free(priv->mdio);
}
if (priv->clk)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index bad0ba29a94a..37c081583084 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3191,7 +3191,7 @@ static int fec_enet_init(struct net_device *ndev)
static void fec_reset_phy(struct platform_device *pdev)
{
int err, phy_reset;
- bool active_low = false;
+ bool active_high = false;
int msec = 1;
struct device_node *np = pdev->dev.of_node;
@@ -3207,17 +3207,17 @@ static void fec_reset_phy(struct platform_device *pdev)
if (!gpio_is_valid(phy_reset))
return;
- active_low = of_property_read_bool(np, "phy-reset-active-low");
+ active_high = of_property_read_bool(np, "phy-reset-active-high");
err = devm_gpio_request_one(&pdev->dev, phy_reset,
- active_low ? GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW,
+ active_high ? GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW,
"phy-reset");
if (err) {
dev_err(&pdev->dev, "failed to get phy-reset-gpios: %d\n", err);
return;
}
msleep(msec);
- gpio_set_value_cansleep(phy_reset, !active_low);
+ gpio_set_value_cansleep(phy_reset, !active_high);
}
#else /* CONFIG_OF */
static void fec_reset_phy(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c
index 623aa1c8ebc6..79a210aaf0bb 100644
--- a/drivers/net/ethernet/freescale/fman/fman.c
+++ b/drivers/net/ethernet/freescale/fman/fman.c
@@ -2791,6 +2791,8 @@ static struct fman *read_dts_node(struct platform_device *of_dev)
goto fman_free;
}
+ fman->dev = &of_dev->dev;
+
return fman;
fman_node_put:
@@ -2845,8 +2847,6 @@ static int fman_probe(struct platform_device *of_dev)
dev_set_drvdata(dev, fman);
- fman->dev = dev;
-
dev_dbg(dev, "FMan%d probed\n", fman->dts_params.id);
return 0;
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index a01e5a32b631..08c041548300 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1111,8 +1111,10 @@ static void __gfar_detect_errata_85xx(struct gfar_private *priv)
if ((SVR_SOC_VER(svr) == SVR_8548) && (SVR_REV(svr) == 0x20))
priv->errata |= GFAR_ERRATA_12;
+ /* P2020/P1010 Rev 1; MPC8548 Rev 2 */
if (((SVR_SOC_VER(svr) == SVR_P2020) && (SVR_REV(svr) < 0x20)) ||
- ((SVR_SOC_VER(svr) == SVR_P2010) && (SVR_REV(svr) < 0x20)))
+ ((SVR_SOC_VER(svr) == SVR_P2010) && (SVR_REV(svr) < 0x20)) ||
+ ((SVR_SOC_VER(svr) == SVR_8548) && (SVR_REV(svr) < 0x31)))
priv->errata |= GFAR_ERRATA_76; /* aka eTSEC 20 */
}
#endif
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 74beb1867230..4ccc032633c4 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -25,6 +25,7 @@ config HIX5HD2_GMAC
config HIP04_ETH
tristate "HISILICON P04 Ethernet support"
+ depends on HAS_IOMEM # For MFD_SYSCON
select MARVELL_PHY
select MFD_SYSCON
select HNS_MDIO
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index a0070d0e740d..d4f92ed322d6 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -675,8 +675,12 @@ static int hns_ae_config_loopback(struct hnae_handle *handle,
{
int ret;
struct hnae_vf_cb *vf_cb = hns_ae_get_vf_cb(handle);
+ struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
switch (loop) {
+ case MAC_INTERNALLOOP_PHY:
+ ret = 0;
+ break;
case MAC_INTERNALLOOP_SERDES:
ret = hns_mac_config_sds_loopback(vf_cb->mac_cb, en);
break;
@@ -686,6 +690,10 @@ static int hns_ae_config_loopback(struct hnae_handle *handle,
default:
ret = -EINVAL;
}
+
+ if (!ret)
+ hns_dsaf_set_inner_lb(mac_cb->dsaf_dev, mac_cb->mac_id, en);
+
return ret;
}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 9439f04962e1..38fc5be3870c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -230,6 +230,30 @@ static void hns_dsaf_mix_def_qid_cfg(struct dsaf_device *dsaf_dev)
}
}
+static void hns_dsaf_inner_qid_cfg(struct dsaf_device *dsaf_dev)
+{
+ u16 max_q_per_vf, max_vfn;
+ u32 q_id, q_num_per_port;
+ u32 mac_id;
+
+ if (AE_IS_VER1(dsaf_dev->dsaf_ver))
+ return;
+
+ hns_rcb_get_queue_mode(dsaf_dev->dsaf_mode,
+ HNS_DSAF_COMM_SERVICE_NW_IDX,
+ &max_vfn, &max_q_per_vf);
+ q_num_per_port = max_vfn * max_q_per_vf;
+
+ for (mac_id = 0, q_id = 0; mac_id < DSAF_SERVICE_NW_NUM; mac_id++) {
+ dsaf_set_dev_field(dsaf_dev,
+ DSAFV2_SERDES_LBK_0_REG + 4 * mac_id,
+ DSAFV2_SERDES_LBK_QID_M,
+ DSAFV2_SERDES_LBK_QID_S,
+ q_id);
+ q_id += q_num_per_port;
+ }
+}
+
/**
* hns_dsaf_sw_port_type_cfg - cfg sw type
* @dsaf_id: dsa fabric id
@@ -691,6 +715,16 @@ void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en)
dsaf_set_dev_bit(dsaf_dev, DSAF_CFG_0_REG, DSAF_CFG_MIX_MODE_S, !!en);
}
+void hns_dsaf_set_inner_lb(struct dsaf_device *dsaf_dev, u32 mac_id, u32 en)
+{
+ if (AE_IS_VER1(dsaf_dev->dsaf_ver) ||
+ dsaf_dev->mac_cb[mac_id].mac_type == HNAE_PORT_DEBUG)
+ return;
+
+ dsaf_set_dev_bit(dsaf_dev, DSAFV2_SERDES_LBK_0_REG + 4 * mac_id,
+ DSAFV2_SERDES_LBK_EN_B, !!en);
+}
+
/**
* hns_dsaf_tbl_stat_en - tbl
* @dsaf_id: dsa fabric id
@@ -1022,6 +1056,9 @@ static void hns_dsaf_comm_init(struct dsaf_device *dsaf_dev)
/* set promisc def queue id */
hns_dsaf_mix_def_qid_cfg(dsaf_dev);
+ /* set inner loopback queue id */
+ hns_dsaf_inner_qid_cfg(dsaf_dev);
+
/* in non switch mode, set all port to access mode */
hns_dsaf_sw_port_type_cfg(dsaf_dev, DSAF_SW_PORT_TYPE_NON_VLAN);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index 40205b910f80..5fea226efaf3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -417,5 +417,6 @@ void hns_dsaf_get_strings(int stringset, u8 *data, int port);
void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data);
int hns_dsaf_get_regs_count(void);
void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en);
+void hns_dsaf_set_inner_lb(struct dsaf_device *dsaf_dev, u32 mac_id, u32 en);
#endif /* __HNS_DSAF_MAIN_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index f0c4f9b09d5b..60d695daa471 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -134,6 +134,7 @@
#define DSAF_XGE_INT_STS_0_REG 0x1C0
#define DSAF_PPE_INT_STS_0_REG 0x1E0
#define DSAF_ROCEE_INT_STS_0_REG 0x200
+#define DSAFV2_SERDES_LBK_0_REG 0x220
#define DSAF_PPE_QID_CFG_0_REG 0x300
#define DSAF_SW_PORT_TYPE_0_REG 0x320
#define DSAF_STP_PORT_TYPE_0_REG 0x340
@@ -857,6 +858,10 @@
#define PPEV2_CFG_RSS_TBL_4N3_S 24
#define PPEV2_CFG_RSS_TBL_4N3_M (((1UL << 5) - 1) << PPEV2_CFG_RSS_TBL_4N3_S)
+#define DSAFV2_SERDES_LBK_EN_B 8
+#define DSAFV2_SERDES_LBK_QID_S 0
+#define DSAFV2_SERDES_LBK_QID_M (((1UL << 8) - 1) << DSAFV2_SERDES_LBK_QID_S)
+
#define PPE_CNT_CLR_CE_B 0
#define PPE_CNT_CLR_SNAP_EN_B 1
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 3df22840fcd1..3c4a3bc31a89 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -295,8 +295,10 @@ static int __lb_setup(struct net_device *ndev,
switch (loop) {
case MAC_INTERNALLOOP_PHY:
- if ((phy_dev) && (!phy_dev->is_c45))
+ if ((phy_dev) && (!phy_dev->is_c45)) {
ret = hns_nic_config_phy_loopback(phy_dev, 0x1);
+ ret |= h->dev->ops->set_loopback(h, loop, 0x1);
+ }
break;
case MAC_INTERNALLOOP_MAC:
if ((h->dev->ops->set_loopback) &&
@@ -376,6 +378,7 @@ static void __lb_other_process(struct hns_nic_ring_data *ring_data,
struct sk_buff *skb)
{
struct net_device *ndev;
+ struct hns_nic_priv *priv;
struct hnae_ring *ring;
struct netdev_queue *dev_queue;
struct sk_buff *new_skb;
@@ -385,8 +388,17 @@ static void __lb_other_process(struct hns_nic_ring_data *ring_data,
char buff[33]; /* 32B data and the last character '\0' */
if (!ring_data) { /* Just for doing create frame*/
+ ndev = skb->dev;
+ priv = netdev_priv(ndev);
+
frame_size = skb->len;
memset(skb->data, 0xFF, frame_size);
+ if ((!AE_IS_VER1(priv->enet_ver)) &&
+ (priv->ae_handle->port_type == HNAE_PORT_SERVICE)) {
+ memcpy(skb->data, ndev->dev_addr, 6);
+ skb->data[5] += 0x1f;
+ }
+
frame_size &= ~1ul;
memset(&skb->data[frame_size / 2], 0xAA, frame_size / 2 - 1);
memset(&skb->data[frame_size / 2 + 10], 0xBE,
@@ -486,6 +498,7 @@ static int __lb_run_test(struct net_device *ndev,
/* place data into test skb */
(void)skb_put(skb, size);
+ skb->dev = ndev;
__lb_other_process(NULL, skb);
skb->queue_mapping = NIC_LB_TEST_RING_ID;
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 335417b4756b..ebe60719e489 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1166,7 +1166,10 @@ map_failed:
if (!firmware_has_feature(FW_FEATURE_CMO))
netdev_err(netdev, "tx: unable to map xmit buffer\n");
adapter->tx_map_failed++;
- skb_linearize(skb);
+ if (skb_linearize(skb)) {
+ netdev->stats.tx_dropped++;
+ goto out;
+ }
force_bounce = 1;
goto retry_bounce;
}
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 7d6570843723..6e9e16eee5d0 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1348,44 +1348,44 @@ static void init_sub_crqs(struct ibmvnic_adapter *adapter, int retry)
crq.request_capability.cmd = REQUEST_CAPABILITY;
crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES);
- crq.request_capability.number = cpu_to_be32(adapter->req_tx_queues);
+ crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues);
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES);
- crq.request_capability.number = cpu_to_be32(adapter->req_rx_queues);
+ crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues);
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES);
- crq.request_capability.number = cpu_to_be32(adapter->req_rx_add_queues);
+ crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues);
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability =
cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ);
crq.request_capability.number =
- cpu_to_be32(adapter->req_tx_entries_per_subcrq);
+ cpu_to_be64(adapter->req_tx_entries_per_subcrq);
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability =
cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ);
crq.request_capability.number =
- cpu_to_be32(adapter->req_rx_add_entries_per_subcrq);
+ cpu_to_be64(adapter->req_rx_add_entries_per_subcrq);
ibmvnic_send_crq(adapter, &crq);
crq.request_capability.capability = cpu_to_be16(REQ_MTU);
- crq.request_capability.number = cpu_to_be32(adapter->req_mtu);
+ crq.request_capability.number = cpu_to_be64(adapter->req_mtu);
ibmvnic_send_crq(adapter, &crq);
if (adapter->netdev->flags & IFF_PROMISC) {
if (adapter->promisc_supported) {
crq.request_capability.capability =
cpu_to_be16(PROMISC_REQUESTED);
- crq.request_capability.number = cpu_to_be32(1);
+ crq.request_capability.number = cpu_to_be64(1);
ibmvnic_send_crq(adapter, &crq);
}
} else {
crq.request_capability.capability =
cpu_to_be16(PROMISC_REQUESTED);
- crq.request_capability.number = cpu_to_be32(0);
+ crq.request_capability.number = cpu_to_be64(0);
ibmvnic_send_crq(adapter, &crq);
}
@@ -2312,93 +2312,93 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
switch (be16_to_cpu(crq->query_capability.capability)) {
case MIN_TX_QUEUES:
adapter->min_tx_queues =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "min_tx_queues = %lld\n",
adapter->min_tx_queues);
break;
case MIN_RX_QUEUES:
adapter->min_rx_queues =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "min_rx_queues = %lld\n",
adapter->min_rx_queues);
break;
case MIN_RX_ADD_QUEUES:
adapter->min_rx_add_queues =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "min_rx_add_queues = %lld\n",
adapter->min_rx_add_queues);
break;
case MAX_TX_QUEUES:
adapter->max_tx_queues =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "max_tx_queues = %lld\n",
adapter->max_tx_queues);
break;
case MAX_RX_QUEUES:
adapter->max_rx_queues =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "max_rx_queues = %lld\n",
adapter->max_rx_queues);
break;
case MAX_RX_ADD_QUEUES:
adapter->max_rx_add_queues =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "max_rx_add_queues = %lld\n",
adapter->max_rx_add_queues);
break;
case MIN_TX_ENTRIES_PER_SUBCRQ:
adapter->min_tx_entries_per_subcrq =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "min_tx_entries_per_subcrq = %lld\n",
adapter->min_tx_entries_per_subcrq);
break;
case MIN_RX_ADD_ENTRIES_PER_SUBCRQ:
adapter->min_rx_add_entries_per_subcrq =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "min_rx_add_entrs_per_subcrq = %lld\n",
adapter->min_rx_add_entries_per_subcrq);
break;
case MAX_TX_ENTRIES_PER_SUBCRQ:
adapter->max_tx_entries_per_subcrq =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "max_tx_entries_per_subcrq = %lld\n",
adapter->max_tx_entries_per_subcrq);
break;
case MAX_RX_ADD_ENTRIES_PER_SUBCRQ:
adapter->max_rx_add_entries_per_subcrq =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "max_rx_add_entrs_per_subcrq = %lld\n",
adapter->max_rx_add_entries_per_subcrq);
break;
case TCP_IP_OFFLOAD:
adapter->tcp_ip_offload =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "tcp_ip_offload = %lld\n",
adapter->tcp_ip_offload);
break;
case PROMISC_SUPPORTED:
adapter->promisc_supported =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "promisc_supported = %lld\n",
adapter->promisc_supported);
break;
case MIN_MTU:
- adapter->min_mtu = be32_to_cpu(crq->query_capability.number);
+ adapter->min_mtu = be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu);
break;
case MAX_MTU:
- adapter->max_mtu = be32_to_cpu(crq->query_capability.number);
+ adapter->max_mtu = be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu);
break;
case MAX_MULTICAST_FILTERS:
adapter->max_multicast_filters =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "max_multicast_filters = %lld\n",
adapter->max_multicast_filters);
break;
case VLAN_HEADER_INSERTION:
adapter->vlan_header_insertion =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
if (adapter->vlan_header_insertion)
netdev->features |= NETIF_F_HW_VLAN_STAG_TX;
netdev_dbg(netdev, "vlan_header_insertion = %lld\n",
@@ -2406,43 +2406,43 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
break;
case MAX_TX_SG_ENTRIES:
adapter->max_tx_sg_entries =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "max_tx_sg_entries = %lld\n",
adapter->max_tx_sg_entries);
break;
case RX_SG_SUPPORTED:
adapter->rx_sg_supported =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "rx_sg_supported = %lld\n",
adapter->rx_sg_supported);
break;
case OPT_TX_COMP_SUB_QUEUES:
adapter->opt_tx_comp_sub_queues =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "opt_tx_comp_sub_queues = %lld\n",
adapter->opt_tx_comp_sub_queues);
break;
case OPT_RX_COMP_QUEUES:
adapter->opt_rx_comp_queues =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "opt_rx_comp_queues = %lld\n",
adapter->opt_rx_comp_queues);
break;
case OPT_RX_BUFADD_Q_PER_RX_COMP_Q:
adapter->opt_rx_bufadd_q_per_rx_comp_q =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "opt_rx_bufadd_q_per_rx_comp_q = %lld\n",
adapter->opt_rx_bufadd_q_per_rx_comp_q);
break;
case OPT_TX_ENTRIES_PER_SUBCRQ:
adapter->opt_tx_entries_per_subcrq =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "opt_tx_entries_per_subcrq = %lld\n",
adapter->opt_tx_entries_per_subcrq);
break;
case OPT_RXBA_ENTRIES_PER_SUBCRQ:
adapter->opt_rxba_entries_per_subcrq =
- be32_to_cpu(crq->query_capability.number);
+ be64_to_cpu(crq->query_capability.number);
netdev_dbg(netdev, "opt_rxba_entries_per_subcrq = %lld\n",
adapter->opt_rxba_entries_per_subcrq);
break;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 1242925ad34c..1a9993cc79b5 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -319,10 +319,8 @@ struct ibmvnic_capability {
u8 first;
u8 cmd;
__be16 capability; /* one of ibmvnic_capabilities */
+ __be64 number;
struct ibmvnic_rc rc;
- __be32 number; /*FIX: should be __be64, but I'm getting the least
- * significant word first
- */
} __packed __aligned(8);
struct ibmvnic_login {
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index b1de7afd4116..3ddf657bc10b 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -270,11 +270,17 @@ jme_reset_mac_processor(struct jme_adapter *jme)
}
static inline void
-jme_clear_pm(struct jme_adapter *jme)
+jme_clear_pm_enable_wol(struct jme_adapter *jme)
{
jwrite32(jme, JME_PMCS, PMCS_STMASK | jme->reg_pmcs);
}
+static inline void
+jme_clear_pm_disable_wol(struct jme_adapter *jme)
+{
+ jwrite32(jme, JME_PMCS, PMCS_STMASK);
+}
+
static int
jme_reload_eeprom(struct jme_adapter *jme)
{
@@ -1853,7 +1859,7 @@ jme_open(struct net_device *netdev)
struct jme_adapter *jme = netdev_priv(netdev);
int rc;
- jme_clear_pm(jme);
+ jme_clear_pm_disable_wol(jme);
JME_NAPI_ENABLE(jme);
tasklet_init(&jme->linkch_task, jme_link_change_tasklet,
@@ -1925,11 +1931,11 @@ jme_wait_link(struct jme_adapter *jme)
static void
jme_powersave_phy(struct jme_adapter *jme)
{
- if (jme->reg_pmcs) {
+ if (jme->reg_pmcs && device_may_wakeup(&jme->pdev->dev)) {
jme_set_100m_half(jme);
if (jme->reg_pmcs & (PMCS_LFEN | PMCS_LREN))
jme_wait_link(jme);
- jme_clear_pm(jme);
+ jme_clear_pm_enable_wol(jme);
} else {
jme_phy_off(jme);
}
@@ -2646,9 +2652,6 @@ jme_set_wol(struct net_device *netdev,
if (wol->wolopts & WAKE_MAGIC)
jme->reg_pmcs |= PMCS_MFEN;
- jwrite32(jme, JME_PMCS, jme->reg_pmcs);
- device_set_wakeup_enable(&jme->pdev->dev, !!(jme->reg_pmcs));
-
return 0;
}
@@ -3172,8 +3175,8 @@ jme_init_one(struct pci_dev *pdev,
jme->mii_if.mdio_read = jme_mdio_read;
jme->mii_if.mdio_write = jme_mdio_write;
- jme_clear_pm(jme);
- device_set_wakeup_enable(&pdev->dev, true);
+ jme_clear_pm_disable_wol(jme);
+ device_init_wakeup(&pdev->dev, true);
jme_set_phyfifo_5level(jme);
jme->pcirev = pdev->revision;
@@ -3304,7 +3307,7 @@ jme_resume(struct device *dev)
if (!netif_running(netdev))
return 0;
- jme_clear_pm(jme);
+ jme_clear_pm_disable_wol(jme);
jme_phy_on(jme);
if (test_bit(JME_FLAG_SSET, &jme->flags))
jme_set_settings(netdev, &jme->old_ecmd);
@@ -3312,13 +3315,14 @@ jme_resume(struct device *dev)
jme_reset_phy_processor(jme);
jme_phy_calibration(jme);
jme_phy_setEA(jme);
- jme_start_irq(jme);
netif_device_attach(netdev);
atomic_inc(&jme->link_changing);
jme_reset_link(jme);
+ jme_start_irq(jme);
+
return 0;
}
diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
new file mode 100644
index 000000000000..698bb89aa901
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -0,0 +1,17 @@
+config NET_VENDOR_MEDIATEK
+ bool "MediaTek ethernet driver"
+ depends on ARCH_MEDIATEK
+ ---help---
+ If you have a Mediatek SoC with ethernet, say Y.
+
+if NET_VENDOR_MEDIATEK
+
+config NET_MEDIATEK_SOC
+ tristate "MediaTek MT7623 Gigabit ethernet support"
+ depends on NET_VENDOR_MEDIATEK && (MACH_MT7623 || MACH_MT2701)
+ select PHYLIB
+ ---help---
+ This driver supports the gigabit ethernet MACs in the
+ MediaTek MT2701/MT7623 chipset family.
+
+endif #NET_VENDOR_MEDIATEK
diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
new file mode 100644
index 000000000000..aa3f1c8ccd4a
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Mediatek SoCs built-in ethernet macs
+#
+
+obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth_soc.o
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
new file mode 100644
index 000000000000..ba3afa5d4640
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -0,0 +1,1807 @@
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
+ */
+
+#include <linux/of_device.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <linux/clk.h>
+#include <linux/if_vlan.h>
+#include <linux/reset.h>
+#include <linux/tcp.h>
+
+#include "mtk_eth_soc.h"
+
+static int mtk_msg_level = -1;
+module_param_named(msg_level, mtk_msg_level, int, 0);
+MODULE_PARM_DESC(msg_level, "Message level (-1=defaults,0=none,...,16=all)");
+
+#define MTK_ETHTOOL_STAT(x) { #x, \
+ offsetof(struct mtk_hw_stats, x) / sizeof(u64) }
+
+/* strings used by ethtool */
+static const struct mtk_ethtool_stats {
+ char str[ETH_GSTRING_LEN];
+ u32 offset;
+} mtk_ethtool_stats[] = {
+ MTK_ETHTOOL_STAT(tx_bytes),
+ MTK_ETHTOOL_STAT(tx_packets),
+ MTK_ETHTOOL_STAT(tx_skip),
+ MTK_ETHTOOL_STAT(tx_collisions),
+ MTK_ETHTOOL_STAT(rx_bytes),
+ MTK_ETHTOOL_STAT(rx_packets),
+ MTK_ETHTOOL_STAT(rx_overflow),
+ MTK_ETHTOOL_STAT(rx_fcs_errors),
+ MTK_ETHTOOL_STAT(rx_short_errors),
+ MTK_ETHTOOL_STAT(rx_long_errors),
+ MTK_ETHTOOL_STAT(rx_checksum_errors),
+ MTK_ETHTOOL_STAT(rx_flow_control_packets),
+};
+
+void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
+{
+ __raw_writel(val, eth->base + reg);
+}
+
+u32 mtk_r32(struct mtk_eth *eth, unsigned reg)
+{
+ return __raw_readl(eth->base + reg);
+}
+
+static int mtk_mdio_busy_wait(struct mtk_eth *eth)
+{
+ unsigned long t_start = jiffies;
+
+ while (1) {
+ if (!(mtk_r32(eth, MTK_PHY_IAC) & PHY_IAC_ACCESS))
+ return 0;
+ if (time_after(jiffies, t_start + PHY_IAC_TIMEOUT))
+ break;
+ usleep_range(10, 20);
+ }
+
+ dev_err(eth->dev, "mdio: MDIO timeout\n");
+ return -1;
+}
+
+u32 _mtk_mdio_write(struct mtk_eth *eth, u32 phy_addr,
+ u32 phy_register, u32 write_data)
+{
+ if (mtk_mdio_busy_wait(eth))
+ return -1;
+
+ write_data &= 0xffff;
+
+ mtk_w32(eth, PHY_IAC_ACCESS | PHY_IAC_START | PHY_IAC_WRITE |
+ (phy_register << PHY_IAC_REG_SHIFT) |
+ (phy_addr << PHY_IAC_ADDR_SHIFT) | write_data,
+ MTK_PHY_IAC);
+
+ if (mtk_mdio_busy_wait(eth))
+ return -1;
+
+ return 0;
+}
+
+u32 _mtk_mdio_read(struct mtk_eth *eth, int phy_addr, int phy_reg)
+{
+ u32 d;
+
+ if (mtk_mdio_busy_wait(eth))
+ return 0xffff;
+
+ mtk_w32(eth, PHY_IAC_ACCESS | PHY_IAC_START | PHY_IAC_READ |
+ (phy_reg << PHY_IAC_REG_SHIFT) |
+ (phy_addr << PHY_IAC_ADDR_SHIFT),
+ MTK_PHY_IAC);
+
+ if (mtk_mdio_busy_wait(eth))
+ return 0xffff;
+
+ d = mtk_r32(eth, MTK_PHY_IAC) & 0xffff;
+
+ return d;
+}
+
+static int mtk_mdio_write(struct mii_bus *bus, int phy_addr,
+ int phy_reg, u16 val)
+{
+ struct mtk_eth *eth = bus->priv;
+
+ return _mtk_mdio_write(eth, phy_addr, phy_reg, val);
+}
+
+static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg)
+{
+ struct mtk_eth *eth = bus->priv;
+
+ return _mtk_mdio_read(eth, phy_addr, phy_reg);
+}
+
+static void mtk_phy_link_adjust(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ u32 mcr = MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG |
+ MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN |
+ MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN |
+ MAC_MCR_BACKPR_EN;
+
+ switch (mac->phy_dev->speed) {
+ case SPEED_1000:
+ mcr |= MAC_MCR_SPEED_1000;
+ break;
+ case SPEED_100:
+ mcr |= MAC_MCR_SPEED_100;
+ break;
+ };
+
+ if (mac->phy_dev->link)
+ mcr |= MAC_MCR_FORCE_LINK;
+
+ if (mac->phy_dev->duplex)
+ mcr |= MAC_MCR_FORCE_DPX;
+
+ if (mac->phy_dev->pause)
+ mcr |= MAC_MCR_FORCE_RX_FC | MAC_MCR_FORCE_TX_FC;
+
+ mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
+
+ if (mac->phy_dev->link)
+ netif_carrier_on(dev);
+ else
+ netif_carrier_off(dev);
+}
+
+static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac,
+ struct device_node *phy_node)
+{
+ const __be32 *_addr = NULL;
+ struct phy_device *phydev;
+ int phy_mode, addr;
+
+ _addr = of_get_property(phy_node, "reg", NULL);
+
+ if (!_addr || (be32_to_cpu(*_addr) >= 0x20)) {
+ pr_err("%s: invalid phy address\n", phy_node->name);
+ return -EINVAL;
+ }
+ addr = be32_to_cpu(*_addr);
+ phy_mode = of_get_phy_mode(phy_node);
+ if (phy_mode < 0) {
+ dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode);
+ return -EINVAL;
+ }
+
+ phydev = of_phy_connect(eth->netdev[mac->id], phy_node,
+ mtk_phy_link_adjust, 0, phy_mode);
+ if (IS_ERR(phydev)) {
+ dev_err(eth->dev, "could not connect to PHY\n");
+ return PTR_ERR(phydev);
+ }
+
+ dev_info(eth->dev,
+ "connected mac %d to PHY at %s [uid=%08x, driver=%s]\n",
+ mac->id, phydev_name(phydev), phydev->phy_id,
+ phydev->drv->name);
+
+ mac->phy_dev = phydev;
+
+ return 0;
+}
+
+static int mtk_phy_connect(struct mtk_mac *mac)
+{
+ struct mtk_eth *eth = mac->hw;
+ struct device_node *np;
+ u32 val, ge_mode;
+
+ np = of_parse_phandle(mac->of_node, "phy-handle", 0);
+ if (!np)
+ return -ENODEV;
+
+ switch (of_get_phy_mode(np)) {
+ case PHY_INTERFACE_MODE_RGMII:
+ ge_mode = 0;
+ break;
+ case PHY_INTERFACE_MODE_MII:
+ ge_mode = 1;
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ ge_mode = 2;
+ break;
+ default:
+ dev_err(eth->dev, "invalid phy_mode\n");
+ return -1;
+ }
+
+ /* put the gmac into the right mode */
+ regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+ val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id);
+ val |= SYSCFG0_GE_MODE(ge_mode, mac->id);
+ regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
+
+ mtk_phy_connect_node(eth, mac, np);
+ mac->phy_dev->autoneg = AUTONEG_ENABLE;
+ mac->phy_dev->speed = 0;
+ mac->phy_dev->duplex = 0;
+ mac->phy_dev->supported &= PHY_BASIC_FEATURES;
+ mac->phy_dev->advertising = mac->phy_dev->supported |
+ ADVERTISED_Autoneg;
+ phy_start_aneg(mac->phy_dev);
+
+ return 0;
+}
+
+static int mtk_mdio_init(struct mtk_eth *eth)
+{
+ struct device_node *mii_np;
+ int err;
+
+ mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus");
+ if (!mii_np) {
+ dev_err(eth->dev, "no %s child node found", "mdio-bus");
+ return -ENODEV;
+ }
+
+ if (!of_device_is_available(mii_np)) {
+ err = 0;
+ goto err_put_node;
+ }
+
+ eth->mii_bus = mdiobus_alloc();
+ if (!eth->mii_bus) {
+ err = -ENOMEM;
+ goto err_put_node;
+ }
+
+ eth->mii_bus->name = "mdio";
+ eth->mii_bus->read = mtk_mdio_read;
+ eth->mii_bus->write = mtk_mdio_write;
+ eth->mii_bus->priv = eth;
+ eth->mii_bus->parent = eth->dev;
+
+ snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%s", mii_np->name);
+ err = of_mdiobus_register(eth->mii_bus, mii_np);
+ if (err)
+ goto err_free_bus;
+
+ return 0;
+
+err_free_bus:
+ kfree(eth->mii_bus);
+
+err_put_node:
+ of_node_put(mii_np);
+ eth->mii_bus = NULL;
+ return err;
+}
+
+static void mtk_mdio_cleanup(struct mtk_eth *eth)
+{
+ if (!eth->mii_bus)
+ return;
+
+ mdiobus_unregister(eth->mii_bus);
+ of_node_put(eth->mii_bus->dev.of_node);
+ kfree(eth->mii_bus);
+}
+
+static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
+{
+ u32 val;
+
+ val = mtk_r32(eth, MTK_QDMA_INT_MASK);
+ mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
+ /* flush write */
+ mtk_r32(eth, MTK_QDMA_INT_MASK);
+}
+
+static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask)
+{
+ u32 val;
+
+ val = mtk_r32(eth, MTK_QDMA_INT_MASK);
+ mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
+ /* flush write */
+ mtk_r32(eth, MTK_QDMA_INT_MASK);
+}
+
+static int mtk_set_mac_address(struct net_device *dev, void *p)
+{
+ int ret = eth_mac_addr(dev, p);
+ struct mtk_mac *mac = netdev_priv(dev);
+ const char *macaddr = dev->dev_addr;
+ unsigned long flags;
+
+ if (ret)
+ return ret;
+
+ spin_lock_irqsave(&mac->hw->page_lock, flags);
+ mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
+ MTK_GDMA_MAC_ADRH(mac->id));
+ mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
+ (macaddr[4] << 8) | macaddr[5],
+ MTK_GDMA_MAC_ADRL(mac->id));
+ spin_unlock_irqrestore(&mac->hw->page_lock, flags);
+
+ return 0;
+}
+
+void mtk_stats_update_mac(struct mtk_mac *mac)
+{
+ struct mtk_hw_stats *hw_stats = mac->hw_stats;
+ unsigned int base = MTK_GDM1_TX_GBCNT;
+ u64 stats;
+
+ base += hw_stats->reg_offset;
+
+ u64_stats_update_begin(&hw_stats->syncp);
+
+ hw_stats->rx_bytes += mtk_r32(mac->hw, base);
+ stats = mtk_r32(mac->hw, base + 0x04);
+ if (stats)
+ hw_stats->rx_bytes += (stats << 32);
+ hw_stats->rx_packets += mtk_r32(mac->hw, base + 0x08);
+ hw_stats->rx_overflow += mtk_r32(mac->hw, base + 0x10);
+ hw_stats->rx_fcs_errors += mtk_r32(mac->hw, base + 0x14);
+ hw_stats->rx_short_errors += mtk_r32(mac->hw, base + 0x18);
+ hw_stats->rx_long_errors += mtk_r32(mac->hw, base + 0x1c);
+ hw_stats->rx_checksum_errors += mtk_r32(mac->hw, base + 0x20);
+ hw_stats->rx_flow_control_packets +=
+ mtk_r32(mac->hw, base + 0x24);
+ hw_stats->tx_skip += mtk_r32(mac->hw, base + 0x28);
+ hw_stats->tx_collisions += mtk_r32(mac->hw, base + 0x2c);
+ hw_stats->tx_bytes += mtk_r32(mac->hw, base + 0x30);
+ stats = mtk_r32(mac->hw, base + 0x34);
+ if (stats)
+ hw_stats->tx_bytes += (stats << 32);
+ hw_stats->tx_packets += mtk_r32(mac->hw, base + 0x38);
+ u64_stats_update_end(&hw_stats->syncp);
+}
+
+static void mtk_stats_update(struct mtk_eth *eth)
+{
+ int i;
+
+ for (i = 0; i < MTK_MAC_COUNT; i++) {
+ if (!eth->mac[i] || !eth->mac[i]->hw_stats)
+ continue;
+ if (spin_trylock(&eth->mac[i]->hw_stats->stats_lock)) {
+ mtk_stats_update_mac(eth->mac[i]);
+ spin_unlock(&eth->mac[i]->hw_stats->stats_lock);
+ }
+ }
+}
+
+static struct rtnl_link_stats64 *mtk_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *storage)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_hw_stats *hw_stats = mac->hw_stats;
+ unsigned int start;
+
+ if (netif_running(dev) && netif_device_present(dev)) {
+ if (spin_trylock(&hw_stats->stats_lock)) {
+ mtk_stats_update_mac(mac);
+ spin_unlock(&hw_stats->stats_lock);
+ }
+ }
+
+ do {
+ start = u64_stats_fetch_begin_irq(&hw_stats->syncp);
+ storage->rx_packets = hw_stats->rx_packets;
+ storage->tx_packets = hw_stats->tx_packets;
+ storage->rx_bytes = hw_stats->rx_bytes;
+ storage->tx_bytes = hw_stats->tx_bytes;
+ storage->collisions = hw_stats->tx_collisions;
+ storage->rx_length_errors = hw_stats->rx_short_errors +
+ hw_stats->rx_long_errors;
+ storage->rx_over_errors = hw_stats->rx_overflow;
+ storage->rx_crc_errors = hw_stats->rx_fcs_errors;
+ storage->rx_errors = hw_stats->rx_checksum_errors;
+ storage->tx_aborted_errors = hw_stats->tx_skip;
+ } while (u64_stats_fetch_retry_irq(&hw_stats->syncp, start));
+
+ storage->tx_errors = dev->stats.tx_errors;
+ storage->rx_dropped = dev->stats.rx_dropped;
+ storage->tx_dropped = dev->stats.tx_dropped;
+
+ return storage;
+}
+
+static inline int mtk_max_frag_size(int mtu)
+{
+ /* make sure buf_size will be at least MTK_MAX_RX_LENGTH */
+ if (mtu + MTK_RX_ETH_HLEN < MTK_MAX_RX_LENGTH)
+ mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
+
+ return SKB_DATA_ALIGN(MTK_RX_HLEN + mtu) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+static inline int mtk_max_buf_size(int frag_size)
+{
+ int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN -
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ WARN_ON(buf_size < MTK_MAX_RX_LENGTH);
+
+ return buf_size;
+}
+
+static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd,
+ struct mtk_rx_dma *dma_rxd)
+{
+ rxd->rxd1 = READ_ONCE(dma_rxd->rxd1);
+ rxd->rxd2 = READ_ONCE(dma_rxd->rxd2);
+ rxd->rxd3 = READ_ONCE(dma_rxd->rxd3);
+ rxd->rxd4 = READ_ONCE(dma_rxd->rxd4);
+}
+
+/* the qdma core needs scratch memory to be setup */
+static int mtk_init_fq_dma(struct mtk_eth *eth)
+{
+ unsigned int phy_ring_head, phy_ring_tail;
+ int cnt = MTK_DMA_SIZE;
+ dma_addr_t dma_addr;
+ int i;
+
+ eth->scratch_ring = dma_alloc_coherent(eth->dev,
+ cnt * sizeof(struct mtk_tx_dma),
+ &phy_ring_head,
+ GFP_ATOMIC | __GFP_ZERO);
+ if (unlikely(!eth->scratch_ring))
+ return -ENOMEM;
+
+ eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE,
+ GFP_KERNEL);
+ dma_addr = dma_map_single(eth->dev,
+ eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
+ return -ENOMEM;
+
+ memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt);
+ phy_ring_tail = phy_ring_head +
+ (sizeof(struct mtk_tx_dma) * (cnt - 1));
+
+ for (i = 0; i < cnt; i++) {
+ eth->scratch_ring[i].txd1 =
+ (dma_addr + (i * MTK_QDMA_PAGE_SIZE));
+ if (i < cnt - 1)
+ eth->scratch_ring[i].txd2 = (phy_ring_head +
+ ((i + 1) * sizeof(struct mtk_tx_dma)));
+ eth->scratch_ring[i].txd3 = TX_DMA_SDL(MTK_QDMA_PAGE_SIZE);
+ }
+
+ mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD);
+ mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL);
+ mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT);
+ mtk_w32(eth, MTK_QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN);
+
+ return 0;
+}
+
+static inline void *mtk_qdma_phys_to_virt(struct mtk_tx_ring *ring, u32 desc)
+{
+ void *ret = ring->dma;
+
+ return ret + (desc - ring->phys);
+}
+
+static inline struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring,
+ struct mtk_tx_dma *txd)
+{
+ int idx = txd - ring->dma;
+
+ return &ring->buf[idx];
+}
+
+static void mtk_tx_unmap(struct device *dev, struct mtk_tx_buf *tx_buf)
+{
+ if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
+ dma_unmap_single(dev,
+ dma_unmap_addr(tx_buf, dma_addr0),
+ dma_unmap_len(tx_buf, dma_len0),
+ DMA_TO_DEVICE);
+ } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
+ dma_unmap_page(dev,
+ dma_unmap_addr(tx_buf, dma_addr0),
+ dma_unmap_len(tx_buf, dma_len0),
+ DMA_TO_DEVICE);
+ }
+ tx_buf->flags = 0;
+ if (tx_buf->skb &&
+ (tx_buf->skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC))
+ dev_kfree_skb_any(tx_buf->skb);
+ tx_buf->skb = NULL;
+}
+
+static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
+ int tx_num, struct mtk_tx_ring *ring, bool gso)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ struct mtk_tx_dma *itxd, *txd;
+ struct mtk_tx_buf *tx_buf;
+ unsigned long flags;
+ dma_addr_t mapped_addr;
+ unsigned int nr_frags;
+ int i, n_desc = 1;
+ u32 txd4 = 0;
+
+ itxd = ring->next_free;
+ if (itxd == ring->last_free)
+ return -ENOMEM;
+
+ /* set the forward port */
+ txd4 |= (mac->id + 1) << TX_DMA_FPORT_SHIFT;
+
+ tx_buf = mtk_desc_to_tx_buf(ring, itxd);
+ memset(tx_buf, 0, sizeof(*tx_buf));
+
+ if (gso)
+ txd4 |= TX_DMA_TSO;
+
+ /* TX Checksum offload */
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ txd4 |= TX_DMA_CHKSUM;
+
+ /* VLAN header offload */
+ if (skb_vlan_tag_present(skb))
+ txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb);
+
+ mapped_addr = dma_map_single(&dev->dev, skb->data,
+ skb_headlen(skb), DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+ return -ENOMEM;
+
+ /* normally we can rely on the stack not calling this more than once,
+ * however we have 2 queues running ont he same ring so we need to lock
+ * the ring access
+ */
+ spin_lock_irqsave(&eth->page_lock, flags);
+ WRITE_ONCE(itxd->txd1, mapped_addr);
+ tx_buf->flags |= MTK_TX_FLAGS_SINGLE0;
+ dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
+ dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
+
+ /* TX SG offload */
+ txd = itxd;
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ for (i = 0; i < nr_frags; i++) {
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
+ unsigned int offset = 0;
+ int frag_size = skb_frag_size(frag);
+
+ while (frag_size) {
+ bool last_frag = false;
+ unsigned int frag_map_size;
+
+ txd = mtk_qdma_phys_to_virt(ring, txd->txd2);
+ if (txd == ring->last_free)
+ goto err_dma;
+
+ n_desc++;
+ frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
+ mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
+ frag_map_size,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+ goto err_dma;
+
+ if (i == nr_frags - 1 &&
+ (frag_size - frag_map_size) == 0)
+ last_frag = true;
+
+ WRITE_ONCE(txd->txd1, mapped_addr);
+ WRITE_ONCE(txd->txd3, (TX_DMA_SWC |
+ TX_DMA_PLEN0(frag_map_size) |
+ last_frag * TX_DMA_LS0) |
+ mac->id);
+ WRITE_ONCE(txd->txd4, 0);
+
+ tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
+ tx_buf = mtk_desc_to_tx_buf(ring, txd);
+ memset(tx_buf, 0, sizeof(*tx_buf));
+
+ tx_buf->flags |= MTK_TX_FLAGS_PAGE0;
+ dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr);
+ dma_unmap_len_set(tx_buf, dma_len0, frag_map_size);
+ frag_size -= frag_map_size;
+ offset += frag_map_size;
+ }
+ }
+
+ /* store skb to cleanup */
+ tx_buf->skb = skb;
+
+ WRITE_ONCE(itxd->txd4, txd4);
+ WRITE_ONCE(itxd->txd3, (TX_DMA_SWC | TX_DMA_PLEN0(skb_headlen(skb)) |
+ (!nr_frags * TX_DMA_LS0)));
+
+ spin_unlock_irqrestore(&eth->page_lock, flags);
+
+ netdev_sent_queue(dev, skb->len);
+ skb_tx_timestamp(skb);
+
+ ring->next_free = mtk_qdma_phys_to_virt(ring, txd->txd2);
+ atomic_sub(n_desc, &ring->free_count);
+
+ /* make sure that all changes to the dma ring are flushed before we
+ * continue
+ */
+ wmb();
+
+ if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !skb->xmit_more)
+ mtk_w32(eth, txd->txd2, MTK_QTX_CTX_PTR);
+
+ return 0;
+
+err_dma:
+ do {
+ tx_buf = mtk_desc_to_tx_buf(ring, txd);
+
+ /* unmap dma */
+ mtk_tx_unmap(&dev->dev, tx_buf);
+
+ itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
+ itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
+ } while (itxd != txd);
+
+ return -ENOMEM;
+}
+
+static inline int mtk_cal_txd_req(struct sk_buff *skb)
+{
+ int i, nfrags;
+ struct skb_frag_struct *frag;
+
+ nfrags = 1;
+ if (skb_is_gso(skb)) {
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ frag = &skb_shinfo(skb)->frags[i];
+ nfrags += DIV_ROUND_UP(frag->size, MTK_TX_DMA_BUF_LEN);
+ }
+ } else {
+ nfrags += skb_shinfo(skb)->nr_frags;
+ }
+
+ return DIV_ROUND_UP(nfrags, 2);
+}
+
+static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ struct mtk_tx_ring *ring = &eth->tx_ring;
+ struct net_device_stats *stats = &dev->stats;
+ bool gso = false;
+ int tx_num;
+
+ tx_num = mtk_cal_txd_req(skb);
+ if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
+ netif_stop_queue(dev);
+ netif_err(eth, tx_queued, dev,
+ "Tx Ring full when queue awake!\n");
+ return NETDEV_TX_BUSY;
+ }
+
+ /* TSO: fill MSS info in tcp checksum field */
+ if (skb_is_gso(skb)) {
+ if (skb_cow_head(skb, 0)) {
+ netif_warn(eth, tx_err, dev,
+ "GSO expand head fail.\n");
+ goto drop;
+ }
+
+ if (skb_shinfo(skb)->gso_type &
+ (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) {
+ gso = true;
+ tcp_hdr(skb)->check = htons(skb_shinfo(skb)->gso_size);
+ }
+ }
+
+ if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0)
+ goto drop;
+
+ if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
+ netif_stop_queue(dev);
+ if (unlikely(atomic_read(&ring->free_count) >
+ ring->thresh))
+ netif_wake_queue(dev);
+ }
+
+ return NETDEV_TX_OK;
+
+drop:
+ stats->tx_dropped++;
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ struct mtk_eth *eth, u32 rx_intr)
+{
+ struct mtk_rx_ring *ring = &eth->rx_ring;
+ int idx = ring->calc_idx;
+ struct sk_buff *skb;
+ u8 *data, *new_data;
+ struct mtk_rx_dma *rxd, trxd;
+ int done = 0;
+
+ while (done < budget) {
+ struct net_device *netdev;
+ unsigned int pktlen;
+ dma_addr_t dma_addr;
+ int mac = 0;
+
+ idx = NEXT_RX_DESP_IDX(idx);
+ rxd = &ring->dma[idx];
+ data = ring->data[idx];
+
+ mtk_rx_get_desc(&trxd, rxd);
+ if (!(trxd.rxd2 & RX_DMA_DONE))
+ break;
+
+ /* find out which mac the packet come from. values start at 1 */
+ mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) &
+ RX_DMA_FPORT_MASK;
+ mac--;
+
+ netdev = eth->netdev[mac];
+
+ /* alloc new buffer */
+ new_data = napi_alloc_frag(ring->frag_size);
+ if (unlikely(!new_data)) {
+ netdev->stats.rx_dropped++;
+ goto release_desc;
+ }
+ dma_addr = dma_map_single(&eth->netdev[mac]->dev,
+ new_data + NET_SKB_PAD,
+ ring->buf_size,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
+ skb_free_frag(new_data);
+ goto release_desc;
+ }
+
+ /* receive data */
+ skb = build_skb(data, ring->frag_size);
+ if (unlikely(!skb)) {
+ put_page(virt_to_head_page(new_data));
+ goto release_desc;
+ }
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+
+ dma_unmap_single(&netdev->dev, trxd.rxd1,
+ ring->buf_size, DMA_FROM_DEVICE);
+ pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
+ skb->dev = netdev;
+ skb_put(skb, pktlen);
+ if (trxd.rxd4 & RX_DMA_L4_VALID)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ else
+ skb_checksum_none_assert(skb);
+ skb->protocol = eth_type_trans(skb, netdev);
+
+ if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX &&
+ RX_DMA_VID(trxd.rxd3))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ RX_DMA_VID(trxd.rxd3));
+ napi_gro_receive(napi, skb);
+
+ ring->data[idx] = new_data;
+ rxd->rxd1 = (unsigned int)dma_addr;
+
+release_desc:
+ rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
+
+ ring->calc_idx = idx;
+ /* make sure that all changes to the dma ring are flushed before
+ * we continue
+ */
+ wmb();
+ mtk_w32(eth, ring->calc_idx, MTK_QRX_CRX_IDX0);
+ done++;
+ }
+
+ if (done < budget)
+ mtk_w32(eth, rx_intr, MTK_QMTK_INT_STATUS);
+
+ return done;
+}
+
+static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again)
+{
+ struct mtk_tx_ring *ring = &eth->tx_ring;
+ struct mtk_tx_dma *desc;
+ struct sk_buff *skb;
+ struct mtk_tx_buf *tx_buf;
+ int total = 0, done[MTK_MAX_DEVS];
+ unsigned int bytes[MTK_MAX_DEVS];
+ u32 cpu, dma;
+ static int condition;
+ int i;
+
+ memset(done, 0, sizeof(done));
+ memset(bytes, 0, sizeof(bytes));
+
+ cpu = mtk_r32(eth, MTK_QTX_CRX_PTR);
+ dma = mtk_r32(eth, MTK_QTX_DRX_PTR);
+
+ desc = mtk_qdma_phys_to_virt(ring, cpu);
+
+ while ((cpu != dma) && budget) {
+ u32 next_cpu = desc->txd2;
+ int mac;
+
+ desc = mtk_qdma_phys_to_virt(ring, desc->txd2);
+ if ((desc->txd3 & TX_DMA_OWNER_CPU) == 0)
+ break;
+
+ mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) &
+ TX_DMA_FPORT_MASK;
+ mac--;
+
+ tx_buf = mtk_desc_to_tx_buf(ring, desc);
+ skb = tx_buf->skb;
+ if (!skb) {
+ condition = 1;
+ break;
+ }
+
+ if (skb != (struct sk_buff *)MTK_DMA_DUMMY_DESC) {
+ bytes[mac] += skb->len;
+ done[mac]++;
+ budget--;
+ }
+ mtk_tx_unmap(eth->dev, tx_buf);
+
+ ring->last_free->txd2 = next_cpu;
+ ring->last_free = desc;
+ atomic_inc(&ring->free_count);
+
+ cpu = next_cpu;
+ }
+
+ mtk_w32(eth, cpu, MTK_QTX_CRX_PTR);
+
+ for (i = 0; i < MTK_MAC_COUNT; i++) {
+ if (!eth->netdev[i] || !done[i])
+ continue;
+ netdev_completed_queue(eth->netdev[i], done[i], bytes[i]);
+ total += done[i];
+ }
+
+ /* read hw index again make sure no new tx packet */
+ if (cpu != dma || cpu != mtk_r32(eth, MTK_QTX_DRX_PTR))
+ *tx_again = true;
+ else
+ mtk_w32(eth, MTK_TX_DONE_INT, MTK_QMTK_INT_STATUS);
+
+ if (!total)
+ return 0;
+
+ for (i = 0; i < MTK_MAC_COUNT; i++) {
+ if (!eth->netdev[i] ||
+ unlikely(!netif_queue_stopped(eth->netdev[i])))
+ continue;
+ if (atomic_read(&ring->free_count) > ring->thresh)
+ netif_wake_queue(eth->netdev[i]);
+ }
+
+ return total;
+}
+
+static int mtk_poll(struct napi_struct *napi, int budget)
+{
+ struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
+ u32 status, status2, mask, tx_intr, rx_intr, status_intr;
+ int tx_done, rx_done;
+ bool tx_again = false;
+
+ status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+ status2 = mtk_r32(eth, MTK_INT_STATUS2);
+ tx_intr = MTK_TX_DONE_INT;
+ rx_intr = MTK_RX_DONE_INT;
+ status_intr = (MTK_GDM1_AF | MTK_GDM2_AF);
+ tx_done = 0;
+ rx_done = 0;
+ tx_again = 0;
+
+ if (status & tx_intr)
+ tx_done = mtk_poll_tx(eth, budget, &tx_again);
+
+ if (status & rx_intr)
+ rx_done = mtk_poll_rx(napi, budget, eth, rx_intr);
+
+ if (unlikely(status2 & status_intr)) {
+ mtk_stats_update(eth);
+ mtk_w32(eth, status_intr, MTK_INT_STATUS2);
+ }
+
+ if (unlikely(netif_msg_intr(eth))) {
+ mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
+ netdev_info(eth->netdev[0],
+ "done tx %d, rx %d, intr 0x%08x/0x%x\n",
+ tx_done, rx_done, status, mask);
+ }
+
+ if (tx_again || rx_done == budget)
+ return budget;
+
+ status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+ if (status & (tx_intr | rx_intr))
+ return budget;
+
+ napi_complete(napi);
+ mtk_irq_enable(eth, tx_intr | rx_intr);
+
+ return rx_done;
+}
+
+static int mtk_tx_alloc(struct mtk_eth *eth)
+{
+ struct mtk_tx_ring *ring = &eth->tx_ring;
+ int i, sz = sizeof(*ring->dma);
+
+ ring->buf = kcalloc(MTK_DMA_SIZE, sizeof(*ring->buf),
+ GFP_KERNEL);
+ if (!ring->buf)
+ goto no_tx_mem;
+
+ ring->dma = dma_alloc_coherent(eth->dev,
+ MTK_DMA_SIZE * sz,
+ &ring->phys,
+ GFP_ATOMIC | __GFP_ZERO);
+ if (!ring->dma)
+ goto no_tx_mem;
+
+ memset(ring->dma, 0, MTK_DMA_SIZE * sz);
+ for (i = 0; i < MTK_DMA_SIZE; i++) {
+ int next = (i + 1) % MTK_DMA_SIZE;
+ u32 next_ptr = ring->phys + next * sz;
+
+ ring->dma[i].txd2 = next_ptr;
+ ring->dma[i].txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
+ }
+
+ atomic_set(&ring->free_count, MTK_DMA_SIZE - 2);
+ ring->next_free = &ring->dma[0];
+ ring->last_free = &ring->dma[MTK_DMA_SIZE - 2];
+ ring->thresh = max((unsigned long)MTK_DMA_SIZE >> 2,
+ MAX_SKB_FRAGS);
+
+ /* make sure that all changes to the dma ring are flushed before we
+ * continue
+ */
+ wmb();
+
+ mtk_w32(eth, ring->phys, MTK_QTX_CTX_PTR);
+ mtk_w32(eth, ring->phys, MTK_QTX_DTX_PTR);
+ mtk_w32(eth,
+ ring->phys + ((MTK_DMA_SIZE - 1) * sz),
+ MTK_QTX_CRX_PTR);
+ mtk_w32(eth,
+ ring->phys + ((MTK_DMA_SIZE - 1) * sz),
+ MTK_QTX_DRX_PTR);
+
+ return 0;
+
+no_tx_mem:
+ return -ENOMEM;
+}
+
+static void mtk_tx_clean(struct mtk_eth *eth)
+{
+ struct mtk_tx_ring *ring = &eth->tx_ring;
+ int i;
+
+ if (ring->buf) {
+ for (i = 0; i < MTK_DMA_SIZE; i++)
+ mtk_tx_unmap(eth->dev, &ring->buf[i]);
+ kfree(ring->buf);
+ ring->buf = NULL;
+ }
+
+ if (ring->dma) {
+ dma_free_coherent(eth->dev,
+ MTK_DMA_SIZE * sizeof(*ring->dma),
+ ring->dma,
+ ring->phys);
+ ring->dma = NULL;
+ }
+}
+
+static int mtk_rx_alloc(struct mtk_eth *eth)
+{
+ struct mtk_rx_ring *ring = &eth->rx_ring;
+ int i;
+
+ ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN);
+ ring->buf_size = mtk_max_buf_size(ring->frag_size);
+ ring->data = kcalloc(MTK_DMA_SIZE, sizeof(*ring->data),
+ GFP_KERNEL);
+ if (!ring->data)
+ return -ENOMEM;
+
+ for (i = 0; i < MTK_DMA_SIZE; i++) {
+ ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ if (!ring->data[i])
+ return -ENOMEM;
+ }
+
+ ring->dma = dma_alloc_coherent(eth->dev,
+ MTK_DMA_SIZE * sizeof(*ring->dma),
+ &ring->phys,
+ GFP_ATOMIC | __GFP_ZERO);
+ if (!ring->dma)
+ return -ENOMEM;
+
+ for (i = 0; i < MTK_DMA_SIZE; i++) {
+ dma_addr_t dma_addr = dma_map_single(eth->dev,
+ ring->data[i] + NET_SKB_PAD,
+ ring->buf_size,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
+ return -ENOMEM;
+ ring->dma[i].rxd1 = (unsigned int)dma_addr;
+
+ ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
+ }
+ ring->calc_idx = MTK_DMA_SIZE - 1;
+ /* make sure that all changes to the dma ring are flushed before we
+ * continue
+ */
+ wmb();
+
+ mtk_w32(eth, eth->rx_ring.phys, MTK_QRX_BASE_PTR0);
+ mtk_w32(eth, MTK_DMA_SIZE, MTK_QRX_MAX_CNT0);
+ mtk_w32(eth, eth->rx_ring.calc_idx, MTK_QRX_CRX_IDX0);
+ mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX);
+ mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
+
+ return 0;
+}
+
+static void mtk_rx_clean(struct mtk_eth *eth)
+{
+ struct mtk_rx_ring *ring = &eth->rx_ring;
+ int i;
+
+ if (ring->data && ring->dma) {
+ for (i = 0; i < MTK_DMA_SIZE; i++) {
+ if (!ring->data[i])
+ continue;
+ if (!ring->dma[i].rxd1)
+ continue;
+ dma_unmap_single(eth->dev,
+ ring->dma[i].rxd1,
+ ring->buf_size,
+ DMA_FROM_DEVICE);
+ skb_free_frag(ring->data[i]);
+ }
+ kfree(ring->data);
+ ring->data = NULL;
+ }
+
+ if (ring->dma) {
+ dma_free_coherent(eth->dev,
+ MTK_DMA_SIZE * sizeof(*ring->dma),
+ ring->dma,
+ ring->phys);
+ ring->dma = NULL;
+ }
+}
+
+/* wait for DMA to finish whatever it is doing before we start using it again */
+static int mtk_dma_busy_wait(struct mtk_eth *eth)
+{
+ unsigned long t_start = jiffies;
+
+ while (1) {
+ if (!(mtk_r32(eth, MTK_QDMA_GLO_CFG) &
+ (MTK_RX_DMA_BUSY | MTK_TX_DMA_BUSY)))
+ return 0;
+ if (time_after(jiffies, t_start + MTK_DMA_BUSY_TIMEOUT))
+ break;
+ }
+
+ dev_err(eth->dev, "DMA init timeout\n");
+ return -1;
+}
+
+static int mtk_dma_init(struct mtk_eth *eth)
+{
+ int err;
+
+ if (mtk_dma_busy_wait(eth))
+ return -EBUSY;
+
+ /* QDMA needs scratch memory for internal reordering of the
+ * descriptors
+ */
+ err = mtk_init_fq_dma(eth);
+ if (err)
+ return err;
+
+ err = mtk_tx_alloc(eth);
+ if (err)
+ return err;
+
+ err = mtk_rx_alloc(eth);
+ if (err)
+ return err;
+
+ /* Enable random early drop and set drop threshold automatically */
+ mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN,
+ MTK_QDMA_FC_THRES);
+ mtk_w32(eth, 0x0, MTK_QDMA_HRED2);
+
+ return 0;
+}
+
+static void mtk_dma_free(struct mtk_eth *eth)
+{
+ int i;
+
+ for (i = 0; i < MTK_MAC_COUNT; i++)
+ if (eth->netdev[i])
+ netdev_reset_queue(eth->netdev[i]);
+ mtk_tx_clean(eth);
+ mtk_rx_clean(eth);
+ kfree(eth->scratch_head);
+}
+
+static void mtk_tx_timeout(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+
+ eth->netdev[mac->id]->stats.tx_errors++;
+ netif_err(eth, tx_err, dev,
+ "transmit timed out\n");
+ schedule_work(&mac->pending_work);
+}
+
+static irqreturn_t mtk_handle_irq(int irq, void *_eth)
+{
+ struct mtk_eth *eth = _eth;
+ u32 status;
+
+ status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
+ if (unlikely(!status))
+ return IRQ_NONE;
+
+ if (likely(status & (MTK_RX_DONE_INT | MTK_TX_DONE_INT))) {
+ if (likely(napi_schedule_prep(&eth->rx_napi)))
+ __napi_schedule(&eth->rx_napi);
+ } else {
+ mtk_w32(eth, status, MTK_QMTK_INT_STATUS);
+ }
+ mtk_irq_disable(eth, (MTK_RX_DONE_INT | MTK_TX_DONE_INT));
+
+ return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void mtk_poll_controller(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT;
+
+ mtk_irq_disable(eth, int_mask);
+ mtk_handle_irq(dev->irq, dev);
+ mtk_irq_enable(eth, int_mask);
+}
+#endif
+
+static int mtk_start_dma(struct mtk_eth *eth)
+{
+ int err;
+
+ err = mtk_dma_init(eth);
+ if (err) {
+ mtk_dma_free(eth);
+ return err;
+ }
+
+ mtk_w32(eth,
+ MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
+ MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
+ MTK_RX_BT_32DWORDS,
+ MTK_QDMA_GLO_CFG);
+
+ return 0;
+}
+
+static int mtk_open(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+
+ /* we run 2 netdevs on the same dma ring so we only bring it up once */
+ if (!atomic_read(&eth->dma_refcnt)) {
+ int err = mtk_start_dma(eth);
+
+ if (err)
+ return err;
+
+ napi_enable(&eth->rx_napi);
+ mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+ }
+ atomic_inc(&eth->dma_refcnt);
+
+ phy_start(mac->phy_dev);
+ netif_start_queue(dev);
+
+ return 0;
+}
+
+static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg)
+{
+ unsigned long flags;
+ u32 val;
+ int i;
+
+ /* stop the dma engine */
+ spin_lock_irqsave(&eth->page_lock, flags);
+ val = mtk_r32(eth, glo_cfg);
+ mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN),
+ glo_cfg);
+ spin_unlock_irqrestore(&eth->page_lock, flags);
+
+ /* wait for dma stop */
+ for (i = 0; i < 10; i++) {
+ val = mtk_r32(eth, glo_cfg);
+ if (val & (MTK_TX_DMA_BUSY | MTK_RX_DMA_BUSY)) {
+ msleep(20);
+ continue;
+ }
+ break;
+ }
+}
+
+static int mtk_stop(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+
+ netif_tx_disable(dev);
+ phy_stop(mac->phy_dev);
+
+ /* only shutdown DMA if this is the last user */
+ if (!atomic_dec_and_test(&eth->dma_refcnt))
+ return 0;
+
+ mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+ napi_disable(&eth->rx_napi);
+
+ mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
+
+ mtk_dma_free(eth);
+
+ return 0;
+}
+
+static int __init mtk_hw_init(struct mtk_eth *eth)
+{
+ int err, i;
+
+ /* reset the frame engine */
+ reset_control_assert(eth->rstc);
+ usleep_range(10, 20);
+ reset_control_deassert(eth->rstc);
+ usleep_range(10, 20);
+
+ /* Set GE2 driving and slew rate */
+ regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00);
+
+ /* set GE2 TDSEL */
+ regmap_write(eth->pctl, GPIO_OD33_CTRL8, 0x5);
+
+ /* set GE2 TUNE */
+ regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
+
+ /* GE1, Force 1000M/FD, FC ON */
+ mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0));
+
+ /* GE2, Force 1000M/FD, FC ON */
+ mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1));
+
+ /* Enable RX VLan Offloading */
+ mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
+
+ err = devm_request_irq(eth->dev, eth->irq, mtk_handle_irq, 0,
+ dev_name(eth->dev), eth);
+ if (err)
+ return err;
+
+ err = mtk_mdio_init(eth);
+ if (err)
+ return err;
+
+ /* disable delay and normal interrupt */
+ mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
+ mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+ mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
+ mtk_w32(eth, 0, MTK_RST_GL);
+
+ /* FE int grouping */
+ mtk_w32(eth, 0, MTK_FE_INT_GRP);
+
+ for (i = 0; i < 2; i++) {
+ u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
+
+ /* setup the forward port to send frame to QDMA */
+ val &= ~0xffff;
+ val |= 0x5555;
+
+ /* Enable RX checksum */
+ val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN;
+
+ /* setup the mac dma */
+ mtk_w32(eth, val, MTK_GDMA_FWD_CFG(i));
+ }
+
+ return 0;
+}
+
+static int __init mtk_init(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+ const char *mac_addr;
+
+ mac_addr = of_get_mac_address(mac->of_node);
+ if (mac_addr)
+ ether_addr_copy(dev->dev_addr, mac_addr);
+
+ /* If the mac address is invalid, use random mac address */
+ if (!is_valid_ether_addr(dev->dev_addr)) {
+ random_ether_addr(dev->dev_addr);
+ dev_err(eth->dev, "generated random MAC address %pM\n",
+ dev->dev_addr);
+ dev->addr_assign_type = NET_ADDR_RANDOM;
+ }
+
+ return mtk_phy_connect(mac);
+}
+
+static void mtk_uninit(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_eth *eth = mac->hw;
+
+ phy_disconnect(mac->phy_dev);
+ mtk_mdio_cleanup(eth);
+ mtk_irq_disable(eth, ~0);
+ free_irq(dev->irq, dev);
+}
+
+static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ switch (cmd) {
+ case SIOCGMIIPHY:
+ case SIOCGMIIREG:
+ case SIOCSMIIREG:
+ return phy_mii_ioctl(mac->phy_dev, ifr, cmd);
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static void mtk_pending_work(struct work_struct *work)
+{
+ struct mtk_mac *mac = container_of(work, struct mtk_mac, pending_work);
+ struct mtk_eth *eth = mac->hw;
+ struct net_device *dev = eth->netdev[mac->id];
+ int err;
+
+ rtnl_lock();
+ mtk_stop(dev);
+
+ err = mtk_open(dev);
+ if (err) {
+ netif_alert(eth, ifup, dev,
+ "Driver up/down cycle failed, closing device.\n");
+ dev_close(dev);
+ }
+ rtnl_unlock();
+}
+
+static int mtk_cleanup(struct mtk_eth *eth)
+{
+ int i;
+
+ for (i = 0; i < MTK_MAC_COUNT; i++) {
+ struct mtk_mac *mac = netdev_priv(eth->netdev[i]);
+
+ if (!eth->netdev[i])
+ continue;
+
+ unregister_netdev(eth->netdev[i]);
+ free_netdev(eth->netdev[i]);
+ cancel_work_sync(&mac->pending_work);
+ }
+
+ return 0;
+}
+
+static int mtk_get_settings(struct net_device *dev,
+ struct ethtool_cmd *cmd)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ int err;
+
+ err = phy_read_status(mac->phy_dev);
+ if (err)
+ return -ENODEV;
+
+ return phy_ethtool_gset(mac->phy_dev, cmd);
+}
+
+static int mtk_set_settings(struct net_device *dev,
+ struct ethtool_cmd *cmd)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ if (cmd->phy_address != mac->phy_dev->mdio.addr) {
+ mac->phy_dev = mdiobus_get_phy(mac->hw->mii_bus,
+ cmd->phy_address);
+ if (!mac->phy_dev)
+ return -ENODEV;
+ }
+
+ return phy_ethtool_sset(mac->phy_dev, cmd);
+}
+
+static void mtk_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ strlcpy(info->driver, mac->hw->dev->driver->name, sizeof(info->driver));
+ strlcpy(info->bus_info, dev_name(mac->hw->dev), sizeof(info->bus_info));
+ info->n_stats = ARRAY_SIZE(mtk_ethtool_stats);
+}
+
+static u32 mtk_get_msglevel(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ return mac->hw->msg_enable;
+}
+
+static void mtk_set_msglevel(struct net_device *dev, u32 value)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ mac->hw->msg_enable = value;
+}
+
+static int mtk_nway_reset(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+
+ return genphy_restart_aneg(mac->phy_dev);
+}
+
+static u32 mtk_get_link(struct net_device *dev)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ int err;
+
+ err = genphy_update_link(mac->phy_dev);
+ if (err)
+ return ethtool_op_get_link(dev);
+
+ return mac->phy_dev->link;
+}
+
+static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ int i;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < ARRAY_SIZE(mtk_ethtool_stats); i++) {
+ memcpy(data, mtk_ethtool_stats[i].str, ETH_GSTRING_LEN);
+ data += ETH_GSTRING_LEN;
+ }
+ break;
+ }
+}
+
+static int mtk_get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return ARRAY_SIZE(mtk_ethtool_stats);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void mtk_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct mtk_mac *mac = netdev_priv(dev);
+ struct mtk_hw_stats *hwstats = mac->hw_stats;
+ u64 *data_src, *data_dst;
+ unsigned int start;
+ int i;
+
+ if (netif_running(dev) && netif_device_present(dev)) {
+ if (spin_trylock(&hwstats->stats_lock)) {
+ mtk_stats_update_mac(mac);
+ spin_unlock(&hwstats->stats_lock);
+ }
+ }
+
+ do {
+ data_src = (u64*)hwstats;
+ data_dst = data;
+ start = u64_stats_fetch_begin_irq(&hwstats->syncp);
+
+ for (i = 0; i < ARRAY_SIZE(mtk_ethtool_stats); i++)
+ *data_dst++ = *(data_src + mtk_ethtool_stats[i].offset);
+ } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start));
+}
+
+static struct ethtool_ops mtk_ethtool_ops = {
+ .get_settings = mtk_get_settings,
+ .set_settings = mtk_set_settings,
+ .get_drvinfo = mtk_get_drvinfo,
+ .get_msglevel = mtk_get_msglevel,
+ .set_msglevel = mtk_set_msglevel,
+ .nway_reset = mtk_nway_reset,
+ .get_link = mtk_get_link,
+ .get_strings = mtk_get_strings,
+ .get_sset_count = mtk_get_sset_count,
+ .get_ethtool_stats = mtk_get_ethtool_stats,
+};
+
+static const struct net_device_ops mtk_netdev_ops = {
+ .ndo_init = mtk_init,
+ .ndo_uninit = mtk_uninit,
+ .ndo_open = mtk_open,
+ .ndo_stop = mtk_stop,
+ .ndo_start_xmit = mtk_start_xmit,
+ .ndo_set_mac_address = mtk_set_mac_address,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_do_ioctl = mtk_do_ioctl,
+ .ndo_change_mtu = eth_change_mtu,
+ .ndo_tx_timeout = mtk_tx_timeout,
+ .ndo_get_stats64 = mtk_get_stats64,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = mtk_poll_controller,
+#endif
+};
+
+static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
+{
+ struct mtk_mac *mac;
+ const __be32 *_id = of_get_property(np, "reg", NULL);
+ int id, err;
+
+ if (!_id) {
+ dev_err(eth->dev, "missing mac id\n");
+ return -EINVAL;
+ }
+
+ id = be32_to_cpup(_id);
+ if (id >= MTK_MAC_COUNT) {
+ dev_err(eth->dev, "%d is not a valid mac id\n", id);
+ return -EINVAL;
+ }
+
+ if (eth->netdev[id]) {
+ dev_err(eth->dev, "duplicate mac id found: %d\n", id);
+ return -EINVAL;
+ }
+
+ eth->netdev[id] = alloc_etherdev(sizeof(*mac));
+ if (!eth->netdev[id]) {
+ dev_err(eth->dev, "alloc_etherdev failed\n");
+ return -ENOMEM;
+ }
+ mac = netdev_priv(eth->netdev[id]);
+ eth->mac[id] = mac;
+ mac->id = id;
+ mac->hw = eth;
+ mac->of_node = np;
+ INIT_WORK(&mac->pending_work, mtk_pending_work);
+
+ mac->hw_stats = devm_kzalloc(eth->dev,
+ sizeof(*mac->hw_stats),
+ GFP_KERNEL);
+ if (!mac->hw_stats) {
+ dev_err(eth->dev, "failed to allocate counter memory\n");
+ err = -ENOMEM;
+ goto free_netdev;
+ }
+ spin_lock_init(&mac->hw_stats->stats_lock);
+ mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
+
+ SET_NETDEV_DEV(eth->netdev[id], eth->dev);
+ eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
+ eth->netdev[id]->base_addr = (unsigned long)eth->base;
+ eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
+ ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
+ eth->netdev[id]->features |= MTK_HW_FEATURES;
+ eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops;
+
+ err = register_netdev(eth->netdev[id]);
+ if (err) {
+ dev_err(eth->dev, "error bringing up device\n");
+ goto free_netdev;
+ }
+ eth->netdev[id]->irq = eth->irq;
+ netif_info(eth, probe, eth->netdev[id],
+ "mediatek frame engine at 0x%08lx, irq %d\n",
+ eth->netdev[id]->base_addr, eth->netdev[id]->irq);
+
+ return 0;
+
+free_netdev:
+ free_netdev(eth->netdev[id]);
+ return err;
+}
+
+static int mtk_probe(struct platform_device *pdev)
+{
+ struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ struct device_node *mac_np;
+ const struct of_device_id *match;
+ struct mtk_soc_data *soc;
+ struct mtk_eth *eth;
+ int err;
+
+ pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+ pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
+
+ device_reset(&pdev->dev);
+
+ match = of_match_device(of_mtk_match, &pdev->dev);
+ soc = (struct mtk_soc_data *)match->data;
+
+ eth = devm_kzalloc(&pdev->dev, sizeof(*eth), GFP_KERNEL);
+ if (!eth)
+ return -ENOMEM;
+
+ eth->base = devm_ioremap_resource(&pdev->dev, res);
+ if (!eth->base)
+ return -EADDRNOTAVAIL;
+
+ spin_lock_init(&eth->page_lock);
+
+ eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "mediatek,ethsys");
+ if (IS_ERR(eth->ethsys)) {
+ dev_err(&pdev->dev, "no ethsys regmap found\n");
+ return PTR_ERR(eth->ethsys);
+ }
+
+ eth->pctl = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "mediatek,pctl");
+ if (IS_ERR(eth->pctl)) {
+ dev_err(&pdev->dev, "no pctl regmap found\n");
+ return PTR_ERR(eth->pctl);
+ }
+
+ eth->rstc = devm_reset_control_get(&pdev->dev, "eth");
+ if (IS_ERR(eth->rstc)) {
+ dev_err(&pdev->dev, "no eth reset found\n");
+ return PTR_ERR(eth->rstc);
+ }
+
+ eth->irq = platform_get_irq(pdev, 0);
+ if (eth->irq < 0) {
+ dev_err(&pdev->dev, "no IRQ resource found\n");
+ return -ENXIO;
+ }
+
+ eth->clk_ethif = devm_clk_get(&pdev->dev, "ethif");
+ eth->clk_esw = devm_clk_get(&pdev->dev, "esw");
+ eth->clk_gp1 = devm_clk_get(&pdev->dev, "gp1");
+ eth->clk_gp2 = devm_clk_get(&pdev->dev, "gp2");
+ if (IS_ERR(eth->clk_esw) || IS_ERR(eth->clk_gp1) ||
+ IS_ERR(eth->clk_gp2) || IS_ERR(eth->clk_ethif))
+ return -ENODEV;
+
+ clk_prepare_enable(eth->clk_ethif);
+ clk_prepare_enable(eth->clk_esw);
+ clk_prepare_enable(eth->clk_gp1);
+ clk_prepare_enable(eth->clk_gp2);
+
+ eth->dev = &pdev->dev;
+ eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE);
+
+ err = mtk_hw_init(eth);
+ if (err)
+ return err;
+
+ for_each_child_of_node(pdev->dev.of_node, mac_np) {
+ if (!of_device_is_compatible(mac_np,
+ "mediatek,eth-mac"))
+ continue;
+
+ if (!of_device_is_available(mac_np))
+ continue;
+
+ err = mtk_add_mac(eth, mac_np);
+ if (err)
+ goto err_free_dev;
+ }
+
+ /* we run 2 devices on the same DMA ring so we need a dummy device
+ * for NAPI to work
+ */
+ init_dummy_netdev(&eth->dummy_dev);
+ netif_napi_add(&eth->dummy_dev, &eth->rx_napi, mtk_poll,
+ MTK_NAPI_WEIGHT);
+
+ platform_set_drvdata(pdev, eth);
+
+ return 0;
+
+err_free_dev:
+ mtk_cleanup(eth);
+ return err;
+}
+
+static int mtk_remove(struct platform_device *pdev)
+{
+ struct mtk_eth *eth = platform_get_drvdata(pdev);
+
+ clk_disable_unprepare(eth->clk_ethif);
+ clk_disable_unprepare(eth->clk_esw);
+ clk_disable_unprepare(eth->clk_gp1);
+ clk_disable_unprepare(eth->clk_gp2);
+
+ netif_napi_del(&eth->rx_napi);
+ mtk_cleanup(eth);
+ platform_set_drvdata(pdev, NULL);
+
+ return 0;
+}
+
+const struct of_device_id of_mtk_match[] = {
+ { .compatible = "mediatek,mt7623-eth" },
+ {},
+};
+
+static struct platform_driver mtk_driver = {
+ .probe = mtk_probe,
+ .remove = mtk_remove,
+ .driver = {
+ .name = "mtk_soc_eth",
+ .owner = THIS_MODULE,
+ .of_match_table = of_mtk_match,
+ },
+};
+
+module_platform_driver(mtk_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("John Crispin <blogic@openwrt.org>");
+MODULE_DESCRIPTION("Ethernet driver for MediaTek SoC");
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
new file mode 100644
index 000000000000..48a5292c8ed8
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -0,0 +1,421 @@
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2009-2016 John Crispin <blogic@openwrt.org>
+ * Copyright (C) 2009-2016 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2013-2016 Michael Lee <igvtee@gmail.com>
+ */
+
+#ifndef MTK_ETH_H
+#define MTK_ETH_H
+
+#define MTK_QDMA_PAGE_SIZE 2048
+#define MTK_MAX_RX_LENGTH 1536
+#define MTK_TX_DMA_BUF_LEN 0x3fff
+#define MTK_DMA_SIZE 256
+#define MTK_NAPI_WEIGHT 64
+#define MTK_MAC_COUNT 2
+#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
+#define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN)
+#define MTK_DMA_DUMMY_DESC 0xffffffff
+#define MTK_DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | \
+ NETIF_MSG_PROBE | \
+ NETIF_MSG_LINK | \
+ NETIF_MSG_TIMER | \
+ NETIF_MSG_IFDOWN | \
+ NETIF_MSG_IFUP | \
+ NETIF_MSG_RX_ERR | \
+ NETIF_MSG_TX_ERR)
+#define MTK_HW_FEATURES (NETIF_F_IP_CSUM | \
+ NETIF_F_RXCSUM | \
+ NETIF_F_HW_VLAN_CTAG_TX | \
+ NETIF_F_HW_VLAN_CTAG_RX | \
+ NETIF_F_SG | NETIF_F_TSO | \
+ NETIF_F_TSO6 | \
+ NETIF_F_IPV6_CSUM)
+#define NEXT_RX_DESP_IDX(X) (((X) + 1) & (MTK_DMA_SIZE - 1))
+
+/* Frame Engine Global Reset Register */
+#define MTK_RST_GL 0x04
+#define RST_GL_PSE BIT(0)
+
+/* Frame Engine Interrupt Status Register */
+#define MTK_INT_STATUS2 0x08
+#define MTK_GDM1_AF BIT(28)
+#define MTK_GDM2_AF BIT(29)
+
+/* Frame Engine Interrupt Grouping Register */
+#define MTK_FE_INT_GRP 0x20
+
+/* CDMP Exgress Control Register */
+#define MTK_CDMP_EG_CTRL 0x404
+
+/* GDM Exgress Control Register */
+#define MTK_GDMA_FWD_CFG(x) (0x500 + (x * 0x1000))
+#define MTK_GDMA_ICS_EN BIT(22)
+#define MTK_GDMA_TCS_EN BIT(21)
+#define MTK_GDMA_UCS_EN BIT(20)
+
+/* Unicast Filter MAC Address Register - Low */
+#define MTK_GDMA_MAC_ADRL(x) (0x508 + (x * 0x1000))
+
+/* Unicast Filter MAC Address Register - High */
+#define MTK_GDMA_MAC_ADRH(x) (0x50C + (x * 0x1000))
+
+/* QDMA TX Queue Configuration Registers */
+#define MTK_QTX_CFG(x) (0x1800 + (x * 0x10))
+#define QDMA_RES_THRES 4
+
+/* QDMA TX Queue Scheduler Registers */
+#define MTK_QTX_SCH(x) (0x1804 + (x * 0x10))
+
+/* QDMA RX Base Pointer Register */
+#define MTK_QRX_BASE_PTR0 0x1900
+
+/* QDMA RX Maximum Count Register */
+#define MTK_QRX_MAX_CNT0 0x1904
+
+/* QDMA RX CPU Pointer Register */
+#define MTK_QRX_CRX_IDX0 0x1908
+
+/* QDMA RX DMA Pointer Register */
+#define MTK_QRX_DRX_IDX0 0x190C
+
+/* QDMA Global Configuration Register */
+#define MTK_QDMA_GLO_CFG 0x1A04
+#define MTK_RX_2B_OFFSET BIT(31)
+#define MTK_RX_BT_32DWORDS (3 << 11)
+#define MTK_TX_WB_DDONE BIT(6)
+#define MTK_DMA_SIZE_16DWORDS (2 << 4)
+#define MTK_RX_DMA_BUSY BIT(3)
+#define MTK_TX_DMA_BUSY BIT(1)
+#define MTK_RX_DMA_EN BIT(2)
+#define MTK_TX_DMA_EN BIT(0)
+#define MTK_DMA_BUSY_TIMEOUT HZ
+
+/* QDMA Reset Index Register */
+#define MTK_QDMA_RST_IDX 0x1A08
+#define MTK_PST_DRX_IDX0 BIT(16)
+
+/* QDMA Delay Interrupt Register */
+#define MTK_QDMA_DELAY_INT 0x1A0C
+
+/* QDMA Flow Control Register */
+#define MTK_QDMA_FC_THRES 0x1A10
+#define FC_THRES_DROP_MODE BIT(20)
+#define FC_THRES_DROP_EN (7 << 16)
+#define FC_THRES_MIN 0x4444
+
+/* QDMA Interrupt Status Register */
+#define MTK_QMTK_INT_STATUS 0x1A18
+#define MTK_RX_DONE_INT1 BIT(17)
+#define MTK_RX_DONE_INT0 BIT(16)
+#define MTK_TX_DONE_INT3 BIT(3)
+#define MTK_TX_DONE_INT2 BIT(2)
+#define MTK_TX_DONE_INT1 BIT(1)
+#define MTK_TX_DONE_INT0 BIT(0)
+#define MTK_RX_DONE_INT (MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1)
+#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
+ MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
+
+/* QDMA Interrupt Status Register */
+#define MTK_QDMA_INT_MASK 0x1A1C
+
+/* QDMA Interrupt Mask Register */
+#define MTK_QDMA_HRED2 0x1A44
+
+/* QDMA TX Forward CPU Pointer Register */
+#define MTK_QTX_CTX_PTR 0x1B00
+
+/* QDMA TX Forward DMA Pointer Register */
+#define MTK_QTX_DTX_PTR 0x1B04
+
+/* QDMA TX Release CPU Pointer Register */
+#define MTK_QTX_CRX_PTR 0x1B10
+
+/* QDMA TX Release DMA Pointer Register */
+#define MTK_QTX_DRX_PTR 0x1B14
+
+/* QDMA FQ Head Pointer Register */
+#define MTK_QDMA_FQ_HEAD 0x1B20
+
+/* QDMA FQ Head Pointer Register */
+#define MTK_QDMA_FQ_TAIL 0x1B24
+
+/* QDMA FQ Free Page Counter Register */
+#define MTK_QDMA_FQ_CNT 0x1B28
+
+/* QDMA FQ Free Page Buffer Length Register */
+#define MTK_QDMA_FQ_BLEN 0x1B2C
+
+/* GMA1 Received Good Byte Count Register */
+#define MTK_GDM1_TX_GBCNT 0x2400
+#define MTK_STAT_OFFSET 0x40
+
+/* QDMA descriptor txd4 */
+#define TX_DMA_CHKSUM (0x7 << 29)
+#define TX_DMA_TSO BIT(28)
+#define TX_DMA_FPORT_SHIFT 25
+#define TX_DMA_FPORT_MASK 0x7
+#define TX_DMA_INS_VLAN BIT(16)
+
+/* QDMA descriptor txd3 */
+#define TX_DMA_OWNER_CPU BIT(31)
+#define TX_DMA_LS0 BIT(30)
+#define TX_DMA_PLEN0(_x) (((_x) & MTK_TX_DMA_BUF_LEN) << 16)
+#define TX_DMA_SWC BIT(14)
+#define TX_DMA_SDL(_x) (((_x) & 0x3fff) << 16)
+
+/* QDMA descriptor rxd2 */
+#define RX_DMA_DONE BIT(31)
+#define RX_DMA_PLEN0(_x) (((_x) & 0x3fff) << 16)
+#define RX_DMA_GET_PLEN0(_x) (((_x) >> 16) & 0x3fff)
+
+/* QDMA descriptor rxd3 */
+#define RX_DMA_VID(_x) ((_x) & 0xfff)
+
+/* QDMA descriptor rxd4 */
+#define RX_DMA_L4_VALID BIT(24)
+#define RX_DMA_FPORT_SHIFT 19
+#define RX_DMA_FPORT_MASK 0x7
+
+/* PHY Indirect Access Control registers */
+#define MTK_PHY_IAC 0x10004
+#define PHY_IAC_ACCESS BIT(31)
+#define PHY_IAC_READ BIT(19)
+#define PHY_IAC_WRITE BIT(18)
+#define PHY_IAC_START BIT(16)
+#define PHY_IAC_ADDR_SHIFT 20
+#define PHY_IAC_REG_SHIFT 25
+#define PHY_IAC_TIMEOUT HZ
+
+/* Mac control registers */
+#define MTK_MAC_MCR(x) (0x10100 + (x * 0x100))
+#define MAC_MCR_MAX_RX_1536 BIT(24)
+#define MAC_MCR_IPG_CFG (BIT(18) | BIT(16))
+#define MAC_MCR_FORCE_MODE BIT(15)
+#define MAC_MCR_TX_EN BIT(14)
+#define MAC_MCR_RX_EN BIT(13)
+#define MAC_MCR_BACKOFF_EN BIT(9)
+#define MAC_MCR_BACKPR_EN BIT(8)
+#define MAC_MCR_FORCE_RX_FC BIT(5)
+#define MAC_MCR_FORCE_TX_FC BIT(4)
+#define MAC_MCR_SPEED_1000 BIT(3)
+#define MAC_MCR_SPEED_100 BIT(2)
+#define MAC_MCR_FORCE_DPX BIT(1)
+#define MAC_MCR_FORCE_LINK BIT(0)
+#define MAC_MCR_FIXED_LINK (MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | \
+ MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN | \
+ MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN | \
+ MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_RX_FC | \
+ MAC_MCR_FORCE_TX_FC | MAC_MCR_SPEED_1000 | \
+ MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_LINK)
+
+/* GPIO port control registers for GMAC 2*/
+#define GPIO_OD33_CTRL8 0x4c0
+#define GPIO_BIAS_CTRL 0xed0
+#define GPIO_DRV_SEL10 0xf00
+
+/* ethernet subsystem config register */
+#define ETHSYS_SYSCFG0 0x14
+#define SYSCFG0_GE_MASK 0x3
+#define SYSCFG0_GE_MODE(x, y) (x << (12 + (y * 2)))
+
+struct mtk_rx_dma {
+ unsigned int rxd1;
+ unsigned int rxd2;
+ unsigned int rxd3;
+ unsigned int rxd4;
+} __packed __aligned(4);
+
+struct mtk_tx_dma {
+ unsigned int txd1;
+ unsigned int txd2;
+ unsigned int txd3;
+ unsigned int txd4;
+} __packed __aligned(4);
+
+struct mtk_eth;
+struct mtk_mac;
+
+/* struct mtk_hw_stats - the structure that holds the traffic statistics.
+ * @stats_lock: make sure that stats operations are atomic
+ * @reg_offset: the status register offset of the SoC
+ * @syncp: the refcount
+ *
+ * All of the supported SoCs have hardware counters for traffic statistics.
+ * Whenever the status IRQ triggers we can read the latest stats from these
+ * counters and store them in this struct.
+ */
+struct mtk_hw_stats {
+ u64 tx_bytes;
+ u64 tx_packets;
+ u64 tx_skip;
+ u64 tx_collisions;
+ u64 rx_bytes;
+ u64 rx_packets;
+ u64 rx_overflow;
+ u64 rx_fcs_errors;
+ u64 rx_short_errors;
+ u64 rx_long_errors;
+ u64 rx_checksum_errors;
+ u64 rx_flow_control_packets;
+
+ spinlock_t stats_lock;
+ u32 reg_offset;
+ struct u64_stats_sync syncp;
+};
+
+/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how
+ * memory was allocated so that it can be freed properly
+ */
+enum mtk_tx_flags {
+ MTK_TX_FLAGS_SINGLE0 = 0x01,
+ MTK_TX_FLAGS_PAGE0 = 0x02,
+};
+
+/* struct mtk_tx_buf - This struct holds the pointers to the memory pointed at
+ * by the TX descriptor s
+ * @skb: The SKB pointer of the packet being sent
+ * @dma_addr0: The base addr of the first segment
+ * @dma_len0: The length of the first segment
+ * @dma_addr1: The base addr of the second segment
+ * @dma_len1: The length of the second segment
+ */
+struct mtk_tx_buf {
+ struct sk_buff *skb;
+ u32 flags;
+ DEFINE_DMA_UNMAP_ADDR(dma_addr0);
+ DEFINE_DMA_UNMAP_LEN(dma_len0);
+ DEFINE_DMA_UNMAP_ADDR(dma_addr1);
+ DEFINE_DMA_UNMAP_LEN(dma_len1);
+};
+
+/* struct mtk_tx_ring - This struct holds info describing a TX ring
+ * @dma: The descriptor ring
+ * @buf: The memory pointed at by the ring
+ * @phys: The physical addr of tx_buf
+ * @next_free: Pointer to the next free descriptor
+ * @last_free: Pointer to the last free descriptor
+ * @thresh: The threshold of minimum amount of free descriptors
+ * @free_count: QDMA uses a linked list. Track how many free descriptors
+ * are present
+ */
+struct mtk_tx_ring {
+ struct mtk_tx_dma *dma;
+ struct mtk_tx_buf *buf;
+ dma_addr_t phys;
+ struct mtk_tx_dma *next_free;
+ struct mtk_tx_dma *last_free;
+ u16 thresh;
+ atomic_t free_count;
+};
+
+/* struct mtk_rx_ring - This struct holds info describing a RX ring
+ * @dma: The descriptor ring
+ * @data: The memory pointed at by the ring
+ * @phys: The physical addr of rx_buf
+ * @frag_size: How big can each fragment be
+ * @buf_size: The size of each packet buffer
+ * @calc_idx: The current head of ring
+ */
+struct mtk_rx_ring {
+ struct mtk_rx_dma *dma;
+ u8 **data;
+ dma_addr_t phys;
+ u16 frag_size;
+ u16 buf_size;
+ u16 calc_idx;
+};
+
+/* currently no SoC has more than 2 macs */
+#define MTK_MAX_DEVS 2
+
+/* struct mtk_eth - This is the main datasructure for holding the state
+ * of the driver
+ * @dev: The device pointer
+ * @base: The mapped register i/o base
+ * @page_lock: Make sure that register operations are atomic
+ * @dummy_dev: we run 2 netdevs on 1 physical DMA ring and need a
+ * dummy for NAPI to work
+ * @netdev: The netdev instances
+ * @mac: Each netdev is linked to a physical MAC
+ * @irq: The IRQ that we are using
+ * @msg_enable: Ethtool msg level
+ * @ethsys: The register map pointing at the range used to setup
+ * MII modes
+ * @pctl: The register map pointing at the range used to setup
+ * GMAC port drive/slew values
+ * @dma_refcnt: track how many netdevs are using the DMA engine
+ * @tx_ring: Pointer to the memore holding info about the TX ring
+ * @rx_ring: Pointer to the memore holding info about the RX ring
+ * @rx_napi: The NAPI struct
+ * @scratch_ring: Newer SoCs need memory for a second HW managed TX ring
+ * @scratch_head: The scratch memory that scratch_ring points to.
+ * @clk_ethif: The ethif clock
+ * @clk_esw: The switch clock
+ * @clk_gp1: The gmac1 clock
+ * @clk_gp2: The gmac2 clock
+ * @mii_bus: If there is a bus we need to create an instance for it
+ */
+
+struct mtk_eth {
+ struct device *dev;
+ void __iomem *base;
+ struct reset_control *rstc;
+ spinlock_t page_lock;
+ struct net_device dummy_dev;
+ struct net_device *netdev[MTK_MAX_DEVS];
+ struct mtk_mac *mac[MTK_MAX_DEVS];
+ int irq;
+ u32 msg_enable;
+ unsigned long sysclk;
+ struct regmap *ethsys;
+ struct regmap *pctl;
+ atomic_t dma_refcnt;
+ struct mtk_tx_ring tx_ring;
+ struct mtk_rx_ring rx_ring;
+ struct napi_struct rx_napi;
+ struct mtk_tx_dma *scratch_ring;
+ void *scratch_head;
+ struct clk *clk_ethif;
+ struct clk *clk_esw;
+ struct clk *clk_gp1;
+ struct clk *clk_gp2;
+ struct mii_bus *mii_bus;
+};
+
+/* struct mtk_mac - the structure that holds the info about the MACs of the
+ * SoC
+ * @id: The number of the MAC
+ * @of_node: Our devicetree node
+ * @hw: Backpointer to our main datastruture
+ * @hw_stats: Packet statistics counter
+ * @phy_dev: The attached PHY if available
+ * @pending_work: The workqueue used to reset the dma ring
+ */
+struct mtk_mac {
+ int id;
+ struct device_node *of_node;
+ struct mtk_eth *hw;
+ struct mtk_hw_stats *hw_stats;
+ struct phy_device *phy_dev;
+ struct work_struct pending_work;
+};
+
+/* the struct describing the SoC. these are declared in the soc_xyz.c files */
+extern const struct of_device_id of_mtk_match[];
+
+/* read the hardware status register */
+void mtk_stats_update_mac(struct mtk_mac *mac);
+
+void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg);
+u32 mtk_r32(struct mtk_eth *eth, unsigned reg);
+
+#endif /* MTK_ETH_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 16b26d17c54c..b4b258c8ca47 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2258,7 +2258,7 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac)
struct mlx4_en_dev *mdev = en_priv->mdev;
u64 mac_u64 = mlx4_mac_to_u64(mac);
- if (!is_valid_ether_addr(mac))
+ if (is_multicast_ether_addr(mac))
return -EINVAL;
return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index b8a51515e73c..503ec23e84cc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1272,6 +1272,7 @@ err_set_port:
static int mlx4_mf_bond(struct mlx4_dev *dev)
{
int err = 0;
+ int nvfs;
struct mlx4_slaves_pport slaves_port1;
struct mlx4_slaves_pport slaves_port2;
DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
@@ -1288,11 +1289,18 @@ static int mlx4_mf_bond(struct mlx4_dev *dev)
return -EINVAL;
}
+ /* number of virtual functions is number of total functions minus one
+ * physical function for each port.
+ */
+ nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
+ bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2;
+
/* limit on maximum allowed VFs */
- if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
- bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) >
- MAX_MF_BOND_ALLOWED_SLAVES)
+ if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) {
+ mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n",
+ nvfs, MAX_MF_BOND_ALLOWED_SLAVES);
return -EINVAL;
+ }
if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 787b7bb54d52..211c65087997 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -193,10 +193,10 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
if (need_mf_bond) {
if (port == 1) {
mutex_lock(&table->mutex);
- mutex_lock(&dup_table->mutex);
+ mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
} else {
mutex_lock(&dup_table->mutex);
- mutex_lock(&table->mutex);
+ mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
}
} else {
mutex_lock(&table->mutex);
@@ -389,10 +389,10 @@ void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
if (dup) {
if (port == 1) {
mutex_lock(&table->mutex);
- mutex_lock(&dup_table->mutex);
+ mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
} else {
mutex_lock(&dup_table->mutex);
- mutex_lock(&table->mutex);
+ mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
}
} else {
mutex_lock(&table->mutex);
@@ -479,10 +479,10 @@ int __mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
if (dup) {
if (port == 1) {
mutex_lock(&table->mutex);
- mutex_lock(&dup_table->mutex);
+ mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
} else {
mutex_lock(&dup_table->mutex);
- mutex_lock(&table->mutex);
+ mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
}
} else {
mutex_lock(&table->mutex);
@@ -588,10 +588,10 @@ int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
if (need_mf_bond) {
if (port == 1) {
mutex_lock(&table->mutex);
- mutex_lock(&dup_table->mutex);
+ mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
} else {
mutex_lock(&dup_table->mutex);
- mutex_lock(&table->mutex);
+ mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
}
} else {
mutex_lock(&table->mutex);
@@ -764,10 +764,10 @@ void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan)
if (dup) {
if (port == 1) {
mutex_lock(&table->mutex);
- mutex_lock(&dup_table->mutex);
+ mutex_lock_nested(&dup_table->mutex, SINGLE_DEPTH_NESTING);
} else {
mutex_lock(&dup_table->mutex);
- mutex_lock(&table->mutex);
+ mutex_lock_nested(&table->mutex, SINGLE_DEPTH_NESTING);
}
} else {
mutex_lock(&table->mutex);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 11b592dbf16a..4fc45ee0c5d1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -6,6 +6,6 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
- en_txrx.o en_clock.o vxlan.o
+ en_txrx.o en_clock.o vxlan.o en_tc.o
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 9c0e80e64b43..0f76d321030f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -43,6 +43,7 @@
#include <linux/mlx5/port.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/transobj.h>
+#include <linux/rhashtable.h>
#include "wq.h"
#include "mlx5_core.h"
@@ -237,6 +238,7 @@ struct mlx5e_pport_stats {
static const char rq_stats_strings[][ETH_GSTRING_LEN] = {
"packets",
+ "bytes",
"csum_none",
"csum_sw",
"lro_packets",
@@ -246,16 +248,18 @@ static const char rq_stats_strings[][ETH_GSTRING_LEN] = {
struct mlx5e_rq_stats {
u64 packets;
+ u64 bytes;
u64 csum_none;
u64 csum_sw;
u64 lro_packets;
u64 lro_bytes;
u64 wqe_err;
-#define NUM_RQ_STATS 6
+#define NUM_RQ_STATS 7
};
static const char sq_stats_strings[][ETH_GSTRING_LEN] = {
"packets",
+ "bytes",
"tso_packets",
"tso_bytes",
"tso_inner_packets",
@@ -271,6 +275,7 @@ static const char sq_stats_strings[][ETH_GSTRING_LEN] = {
struct mlx5e_sq_stats {
/* commonly accessed in data path */
u64 packets;
+ u64 bytes;
u64 tso_packets;
u64 tso_bytes;
u64 tso_inner_packets;
@@ -282,7 +287,7 @@ struct mlx5e_sq_stats {
u64 stopped;
u64 wake;
u64 dropped;
-#define NUM_SQ_STATS 11
+#define NUM_SQ_STATS 12
};
struct mlx5e_stats {
@@ -328,14 +333,9 @@ enum {
MLX5E_RQ_STATE_POST_WQES_ENABLE,
};
-enum cq_flags {
- MLX5E_CQ_HAS_CQES = 1,
-};
-
struct mlx5e_cq {
/* data path - accessed per cqe */
struct mlx5_cqwq wq;
- unsigned long flags;
/* data path - accessed per napi poll */
struct napi_struct *napi;
@@ -476,6 +476,8 @@ enum mlx5e_traffic_types {
MLX5E_NUM_TT,
};
+#define IS_HASHING_TT(tt) (tt != MLX5E_TT_ANY)
+
enum mlx5e_rqt_ix {
MLX5E_INDIRECTION_RQT,
MLX5E_SINGLE_RQ_RQT,
@@ -526,8 +528,16 @@ struct mlx5e_flow_table {
struct mlx5_flow_group **g;
};
+struct mlx5e_tc_flow_table {
+ struct mlx5_flow_table *t;
+
+ struct rhashtable_params ht_params;
+ struct rhashtable ht;
+};
+
struct mlx5e_flow_tables {
struct mlx5_flow_namespace *ns;
+ struct mlx5e_tc_flow_table tc;
struct mlx5e_flow_table vlan;
struct mlx5e_flow_table main;
};
@@ -646,9 +656,12 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv);
void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv);
int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix);
+void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv);
int mlx5e_open_locked(struct net_device *netdev);
int mlx5e_close_locked(struct net_device *netdev);
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+ int num_channels);
static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq,
struct mlx5e_tx_wqe *wqe, int bf_sz)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index be6543570b2b..2018eebe1531 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -62,10 +62,11 @@ static void mlx5e_timestamp_overflow(struct work_struct *work)
struct delayed_work *dwork = to_delayed_work(work);
struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp,
overflow_work);
+ unsigned long flags;
- write_lock(&tstamp->lock);
+ write_lock_irqsave(&tstamp->lock, flags);
timecounter_read(&tstamp->clock);
- write_unlock(&tstamp->lock);
+ write_unlock_irqrestore(&tstamp->lock, flags);
schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period);
}
@@ -136,10 +137,11 @@ static int mlx5e_ptp_settime(struct ptp_clock_info *ptp,
struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
ptp_info);
u64 ns = timespec64_to_ns(ts);
+ unsigned long flags;
- write_lock(&tstamp->lock);
+ write_lock_irqsave(&tstamp->lock, flags);
timecounter_init(&tstamp->clock, &tstamp->cycles, ns);
- write_unlock(&tstamp->lock);
+ write_unlock_irqrestore(&tstamp->lock, flags);
return 0;
}
@@ -150,10 +152,11 @@ static int mlx5e_ptp_gettime(struct ptp_clock_info *ptp,
struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
ptp_info);
u64 ns;
+ unsigned long flags;
- write_lock(&tstamp->lock);
+ write_lock_irqsave(&tstamp->lock, flags);
ns = timecounter_read(&tstamp->clock);
- write_unlock(&tstamp->lock);
+ write_unlock_irqrestore(&tstamp->lock, flags);
*ts = ns_to_timespec64(ns);
@@ -164,10 +167,11 @@ static int mlx5e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
{
struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
ptp_info);
+ unsigned long flags;
- write_lock(&tstamp->lock);
+ write_lock_irqsave(&tstamp->lock, flags);
timecounter_adjtime(&tstamp->clock, delta);
- write_unlock(&tstamp->lock);
+ write_unlock_irqrestore(&tstamp->lock, flags);
return 0;
}
@@ -176,6 +180,7 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
{
u64 adj;
u32 diff;
+ unsigned long flags;
int neg_adj = 0;
struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
ptp_info);
@@ -189,11 +194,11 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
adj *= delta;
diff = div_u64(adj, 1000000000ULL);
- write_lock(&tstamp->lock);
+ write_lock_irqsave(&tstamp->lock, flags);
timecounter_read(&tstamp->clock);
tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff :
tstamp->nominal_c_mult + diff;
- write_unlock(&tstamp->lock);
+ write_unlock_irqrestore(&tstamp->lock, flags);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 0959656404b3..68834b715f6c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -386,6 +386,8 @@ static int mlx5e_set_channels(struct net_device *dev,
mlx5e_close_locked(dev);
priv->params.num_channels = count;
+ mlx5e_build_default_indir_rqt(priv->params.indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, count);
if (was_opened)
err = mlx5e_open_locked(dev);
@@ -716,18 +718,36 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
return 0;
}
+static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
+ int i;
+
+ MLX5_SET(modify_tir_in, in, bitmask.hash, 1);
+ mlx5e_build_tir_ctx_hash(tirc, priv);
+
+ for (i = 0; i < MLX5E_NUM_TT; i++)
+ if (IS_HASHING_TT(i))
+ mlx5_core_modify_tir(mdev, priv->tirn[i], in, inlen);
+}
+
static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
struct mlx5e_priv *priv = netdev_priv(dev);
- bool close_open;
- int err = 0;
+ int inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+ void *in;
if ((hfunc != ETH_RSS_HASH_NO_CHANGE) &&
(hfunc != ETH_RSS_HASH_XOR) &&
(hfunc != ETH_RSS_HASH_TOP))
return -EINVAL;
+ in = mlx5_vzalloc(inlen);
+ if (!in)
+ return -ENOMEM;
+
mutex_lock(&priv->state_lock);
if (indir) {
@@ -736,11 +756,6 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT);
}
- close_open = (key || (hfunc != ETH_RSS_HASH_NO_CHANGE)) &&
- test_bit(MLX5E_STATE_OPENED, &priv->state);
- if (close_open)
- mlx5e_close_locked(dev);
-
if (key)
memcpy(priv->params.toeplitz_hash_key, key,
sizeof(priv->params.toeplitz_hash_key));
@@ -748,12 +763,13 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
if (hfunc != ETH_RSS_HASH_NO_CHANGE)
priv->params.rss_hfunc = hfunc;
- if (close_open)
- err = mlx5e_open_locked(priv->netdev);
+ mlx5e_modify_tirs_hash(priv, in, inlen);
mutex_unlock(&priv->state_lock);
- return err;
+ kvfree(in);
+
+ return 0;
}
static int mlx5e_get_rxnfc(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 80d81abc4820..d00a24203410 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -1041,7 +1041,7 @@ static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv)
int err;
ft->num_groups = 0;
- ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_MAIN_TABLE_SIZE);
+ ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_MAIN_TABLE_SIZE);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
@@ -1150,7 +1150,7 @@ static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv)
int err;
ft->num_groups = 0;
- ft->t = mlx5_create_flow_table(priv->fts.ns, 0, MLX5E_VLAN_TABLE_SIZE);
+ ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_VLAN_TABLE_SIZE);
if (IS_ERR(ft->t)) {
err = PTR_ERR(ft->t);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 5063c0e0f8ac..ac5807803c84 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -30,9 +30,12 @@
* SOFTWARE.
*/
+#include <net/tc_act/tc_gact.h>
+#include <net/pkt_cls.h>
#include <linux/mlx5/fs.h>
#include <net/vxlan.h>
#include "en.h"
+#include "en_tc.h"
#include "eswitch.h"
#include "vxlan.h"
@@ -143,6 +146,10 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
return;
/* Collect firts the SW counters and then HW for consistency */
+ s->rx_packets = 0;
+ s->rx_bytes = 0;
+ s->tx_packets = 0;
+ s->tx_bytes = 0;
s->tso_packets = 0;
s->tso_bytes = 0;
s->tso_inner_packets = 0;
@@ -160,6 +167,8 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
for (i = 0; i < priv->params.num_channels; i++) {
rq_stats = &priv->channel[i]->rq.stats;
+ s->rx_packets += rq_stats->packets;
+ s->rx_bytes += rq_stats->bytes;
s->lro_packets += rq_stats->lro_packets;
s->lro_bytes += rq_stats->lro_bytes;
s->rx_csum_none += rq_stats->csum_none;
@@ -169,6 +178,8 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
for (j = 0; j < priv->params.num_tc; j++) {
sq_stats = &priv->channel[i]->sq[j].stats;
+ s->tx_packets += sq_stats->packets;
+ s->tx_bytes += sq_stats->bytes;
s->tso_packets += sq_stats->tso_packets;
s->tso_bytes += sq_stats->tso_bytes;
s->tso_inner_packets += sq_stats->tso_inner_packets;
@@ -233,23 +244,6 @@ void mlx5e_update_stats(struct mlx5e_priv *priv)
s->tx_broadcast_bytes =
MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
- s->rx_packets =
- s->rx_unicast_packets +
- s->rx_multicast_packets +
- s->rx_broadcast_packets;
- s->rx_bytes =
- s->rx_unicast_bytes +
- s->rx_multicast_bytes +
- s->rx_broadcast_bytes;
- s->tx_packets =
- s->tx_unicast_packets +
- s->tx_multicast_packets +
- s->tx_broadcast_packets;
- s->tx_bytes =
- s->tx_unicast_bytes +
- s->tx_multicast_bytes +
- s->tx_broadcast_bytes;
-
/* Update calculated offload counters */
s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner;
s->rx_csum_good = s->rx_packets - s->rx_csum_none -
@@ -1211,7 +1205,6 @@ static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc)
ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE);
ix = priv->params.indirection_rqt[ix];
- ix = ix % priv->params.num_channels;
MLX5_SET(rqtc, rqtc, rq_num[i],
test_bit(MLX5E_STATE_OPENED, &priv->state) ?
priv->channel[ix]->rq.rqn :
@@ -1329,7 +1322,22 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv)
lro_timer_supported_periods[2]));
}
-static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
+void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv)
+{
+ MLX5_SET(tirc, tirc, rx_hash_fn,
+ mlx5e_rx_hash_fn(priv->params.rss_hfunc));
+ if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) {
+ void *rss_key = MLX5_ADDR_OF(tirc, tirc,
+ rx_hash_toeplitz_key);
+ size_t len = MLX5_FLD_SZ_BYTES(tirc,
+ rx_hash_toeplitz_key);
+
+ MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
+ memcpy(rss_key, priv->params.toeplitz_hash_key, len);
+ }
+}
+
+static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
@@ -1337,6 +1345,7 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
void *tirc;
int inlen;
int err;
+ int tt;
inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
in = mlx5_vzalloc(inlen);
@@ -1348,7 +1357,11 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
mlx5e_build_tir_ctx_lro(tirc, priv);
- err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen);
+ for (tt = 0; tt < MLX5E_NUM_TT; tt++) {
+ err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen);
+ if (err)
+ break;
+ }
kvfree(in);
@@ -1703,17 +1716,7 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt)
default:
MLX5_SET(tirc, tirc, indirect_table,
priv->rqtn[MLX5E_INDIRECTION_RQT]);
- MLX5_SET(tirc, tirc, rx_hash_fn,
- mlx5e_rx_hash_fn(priv->params.rss_hfunc));
- if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) {
- void *rss_key = MLX5_ADDR_OF(tirc, tirc,
- rx_hash_toeplitz_key);
- size_t len = MLX5_FLD_SZ_BYTES(tirc,
- rx_hash_toeplitz_key);
-
- MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
- memcpy(rss_key, priv->params.toeplitz_hash_key, len);
- }
+ mlx5e_build_tir_ctx_hash(tirc, priv);
break;
}
@@ -1883,7 +1886,25 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 tc)
static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle,
__be16 proto, struct tc_to_netdev *tc)
{
- if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+ goto mqprio;
+
+ switch (tc->type) {
+ case TC_SETUP_CLSFLOWER:
+ switch (tc->cls_flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return mlx5e_configure_flower(priv, proto, tc->cls_flower);
+ case TC_CLSFLOWER_DESTROY:
+ return mlx5e_delete_flower(priv, tc->cls_flower);
+ }
+ default:
+ return -EOPNOTSUPP;
+ }
+
+mqprio:
+ if (tc->type != TC_SETUP_MQPRIO)
return -EINVAL;
return mlx5e_setup_tc(dev, tc->tc);
@@ -1950,8 +1971,10 @@ static int mlx5e_set_features(struct net_device *netdev,
mlx5e_close_locked(priv->netdev);
priv->params.lro_en = !!(features & NETIF_F_LRO);
- mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV4_TCP);
- mlx5e_modify_tir_lro(priv, MLX5E_TT_IPV6_TCP);
+ err = mlx5e_modify_tirs_lro(priv);
+ if (err)
+ mlx5_core_warn(priv->mdev, "lro modify failed, %d\n",
+ err);
if (was_opened)
err = mlx5e_open_locked(priv->netdev);
@@ -1966,6 +1989,13 @@ static int mlx5e_set_features(struct net_device *netdev,
mlx5e_disable_vlan_filter(priv);
}
+ if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) &&
+ mlx5e_tc_num_filters(priv)) {
+ netdev_err(netdev,
+ "Active offloaded tc filters, can't turn hw_tc_offload off\n");
+ return -EINVAL;
+ }
+
return err;
}
@@ -2253,12 +2283,20 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv)
}
#endif
+void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
+ int num_channels)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ indirection_rqt[i] = i % num_channels;
+}
+
static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
struct net_device *netdev,
int num_channels)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- int i;
priv->params.log_sq_size =
MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
@@ -2281,8 +2319,8 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev,
netdev_rss_key_fill(priv->params.toeplitz_hash_key,
sizeof(priv->params.toeplitz_hash_key));
- for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++)
- priv->params.indirection_rqt[i] = i % num_channels;
+ mlx5e_build_default_indir_rqt(priv->params.indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, num_channels);
priv->params.lro_wqe_sz =
MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@ -2365,6 +2403,13 @@ static void mlx5e_build_netdev(struct net_device *netdev)
if (!priv->params.lro_en)
netdev->features &= ~NETIF_F_LRO;
+#define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
+ if (FT_CAP(flow_modify_en) &&
+ FT_CAP(modify_root) &&
+ FT_CAP(identified_miss_table_mode) &&
+ FT_CAP(flow_table_modify))
+ priv->netdev->hw_features |= NETIF_F_HW_TC;
+
netdev->features |= NETIF_F_HIGHDMA;
netdev->priv_flags |= IFF_UNICAST_FLT;
@@ -2486,6 +2531,10 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
mlx5e_vxlan_init(priv);
+ err = mlx5e_tc_init(priv);
+ if (err)
+ goto err_destroy_flow_tables;
+
#ifdef CONFIG_MLX5_CORE_EN_DCB
mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets);
#endif
@@ -2493,7 +2542,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
err = register_netdev(netdev);
if (err) {
mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
- goto err_destroy_flow_tables;
+ goto err_tc_cleanup;
}
if (mlx5e_vxlan_allowed(mdev))
@@ -2504,6 +2553,9 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev)
return priv;
+err_tc_cleanup:
+ mlx5e_tc_cleanup(priv);
+
err_destroy_flow_tables:
mlx5e_destroy_flow_tables(priv);
@@ -2551,6 +2603,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
mlx5e_disable_async_events(priv);
flush_scheduled_work();
unregister_netdev(netdev);
+ mlx5e_tc_cleanup(priv);
mlx5e_vxlan_cleanup(priv);
mlx5e_destroy_flow_tables(priv);
mlx5e_destroy_tirs(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 519a07f253f9..58d4e2f962c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -35,6 +35,7 @@
#include <linux/tcp.h>
#include <net/busy_poll.h>
#include "en.h"
+#include "en_tc.h"
static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
{
@@ -224,6 +225,8 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
if (cqe_has_vlan(cqe))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
be16_to_cpu(cqe->vlan_info));
+
+ skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
}
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
@@ -231,10 +234,6 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
int work_done;
- /* avoid accessing cq (dma coherent memory) if not needed */
- if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags))
- return 0;
-
for (work_done = 0; work_done < budget; work_done++) {
struct mlx5e_rx_wqe *wqe;
struct mlx5_cqe64 *cqe;
@@ -268,6 +267,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
mlx5e_build_rx_skb(cqe, rq, skb);
rq->stats.packets++;
+ rq->stats.bytes += be32_to_cpu(cqe->byte_cnt);
napi_gro_receive(cq->napi, skb);
wq_ll_pop:
@@ -280,8 +280,5 @@ wq_ll_pop:
/* ensure cq space is freed before enabling more cqes */
wmb();
- if (work_done == budget)
- set_bit(MLX5E_CQ_HAS_CQES, &cq->flags);
-
return work_done;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
new file mode 100644
index 000000000000..b3de09f13425
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -0,0 +1,429 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/flow_dissector.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_skbedit.h>
+#include <linux/mlx5/fs.h>
+#include <linux/mlx5/device.h>
+#include <linux/rhashtable.h>
+#include "en.h"
+#include "en_tc.h"
+
+struct mlx5e_tc_flow {
+ struct rhash_head node;
+ u64 cookie;
+ struct mlx5_flow_rule *rule;
+};
+
+#define MLX5E_TC_FLOW_TABLE_NUM_ENTRIES 1024
+#define MLX5E_TC_FLOW_TABLE_NUM_GROUPS 4
+
+static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv,
+ u32 *match_c, u32 *match_v,
+ u32 action, u32 flow_tag)
+{
+ struct mlx5_flow_destination dest = {
+ .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE,
+ {.ft = priv->fts.vlan.t},
+ };
+ struct mlx5_flow_rule *rule;
+ bool table_created = false;
+
+ if (IS_ERR_OR_NULL(priv->fts.tc.t)) {
+ priv->fts.tc.t =
+ mlx5_create_auto_grouped_flow_table(priv->fts.ns, 0,
+ MLX5E_TC_FLOW_TABLE_NUM_ENTRIES,
+ MLX5E_TC_FLOW_TABLE_NUM_GROUPS);
+ if (IS_ERR(priv->fts.tc.t)) {
+ netdev_err(priv->netdev,
+ "Failed to create tc offload table\n");
+ return ERR_CAST(priv->fts.tc.t);
+ }
+
+ table_created = true;
+ }
+
+ rule = mlx5_add_flow_rule(priv->fts.tc.t, MLX5_MATCH_OUTER_HEADERS,
+ match_c, match_v,
+ action, flow_tag,
+ action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST ? &dest : NULL);
+
+ if (IS_ERR(rule) && table_created) {
+ mlx5_destroy_flow_table(priv->fts.tc.t);
+ priv->fts.tc.t = NULL;
+ }
+
+ return rule;
+}
+
+static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
+ struct mlx5_flow_rule *rule)
+{
+ mlx5_del_flow_rule(rule);
+
+ if (!mlx5e_tc_num_filters(priv)) {
+ mlx5_destroy_flow_table(priv->fts.tc.t);
+ priv->fts.tc.t = NULL;
+ }
+}
+
+static int parse_cls_flower(struct mlx5e_priv *priv,
+ u32 *match_c, u32 *match_v,
+ struct tc_cls_flower_offload *f)
+{
+ void *headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers);
+ void *headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers);
+ u16 addr_type = 0;
+ u8 ip_proto = 0;
+
+ if (f->dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS))) {
+ netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
+ f->dissector->used_keys);
+ return -EOPNOTSUPP;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_dissector_key_control *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+ addr_type = key->addr_type;
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->key);
+ struct flow_dissector_key_basic *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ f->mask);
+ ip_proto = key->ip_proto;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+ ntohs(mask->n_proto));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+ ntohs(key->n_proto));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ mask->ip_proto);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ key->ip_proto);
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+ struct flow_dissector_key_eth_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->key);
+ struct flow_dissector_key_eth_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_ETH_ADDRS,
+ f->mask);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dmac_47_16),
+ mask->dst);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dmac_47_16),
+ key->dst);
+
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ smac_47_16),
+ mask->src);
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ smac_47_16),
+ key->src);
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_dissector_key_ipv4_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv4_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ f->mask);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &mask->src, sizeof(mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &key->src, sizeof(key->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &mask->dst, sizeof(mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &key->dst, sizeof(key->dst));
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_dissector_key_ipv6_addrs *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->key);
+ struct flow_dissector_key_ipv6_addrs *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ f->mask);
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &mask->src, sizeof(mask->src));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6),
+ &key->src, sizeof(key->src));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &mask->dst, sizeof(mask->dst));
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+ &key->dst, sizeof(key->dst));
+ }
+
+ if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_dissector_key_ports *key =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->key);
+ struct flow_dissector_key_ports *mask =
+ skb_flow_dissector_target(f->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ f->mask);
+ switch (ip_proto) {
+ case IPPROTO_TCP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_sport, ntohs(key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ tcp_dport, ntohs(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ tcp_dport, ntohs(key->dst));
+ break;
+
+ case IPPROTO_UDP:
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_sport, ntohs(mask->src));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_sport, ntohs(key->src));
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+ udp_dport, ntohs(mask->dst));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+ udp_dport, ntohs(key->dst));
+ break;
+ default:
+ netdev_err(priv->netdev,
+ "Only UDP and TCP transport are supported\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
+ u32 *action, u32 *flow_tag)
+{
+ const struct tc_action *a;
+
+ if (tc_no_actions(exts))
+ return -EINVAL;
+
+ *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+ *action = 0;
+
+ tc_for_each_action(a, exts) {
+ /* Only support a single action per rule */
+ if (*action)
+ return -EINVAL;
+
+ if (is_tcf_gact_shot(a)) {
+ *action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
+ continue;
+ }
+
+ if (is_tcf_skbedit_mark(a)) {
+ u32 mark = tcf_skbedit_mark(a);
+
+ if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
+ netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
+ mark);
+ return -EINVAL;
+ }
+
+ *flow_tag = mark;
+ *action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ continue;
+ }
+
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
+ struct tc_cls_flower_offload *f)
+{
+ struct mlx5e_tc_flow_table *tc = &priv->fts.tc;
+ u32 *match_c;
+ u32 *match_v;
+ int err = 0;
+ u32 flow_tag;
+ u32 action;
+ struct mlx5e_tc_flow *flow;
+ struct mlx5_flow_rule *old = NULL;
+
+ flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
+ tc->ht_params);
+ if (flow)
+ old = flow->rule;
+ else
+ flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+
+ match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
+ if (!match_c || !match_v || !flow) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ flow->cookie = f->cookie;
+
+ err = parse_cls_flower(priv, match_c, match_v, f);
+ if (err < 0)
+ goto err_free;
+
+ err = parse_tc_actions(priv, f->exts, &action, &flow_tag);
+ if (err < 0)
+ goto err_free;
+
+ err = rhashtable_insert_fast(&tc->ht, &flow->node,
+ tc->ht_params);
+ if (err)
+ goto err_free;
+
+ flow->rule = mlx5e_tc_add_flow(priv, match_c, match_v, action,
+ flow_tag);
+ if (IS_ERR(flow->rule)) {
+ err = PTR_ERR(flow->rule);
+ goto err_hash_del;
+ }
+
+ if (old)
+ mlx5e_tc_del_flow(priv, old);
+
+ goto out;
+
+err_hash_del:
+ rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
+
+err_free:
+ if (!old)
+ kfree(flow);
+out:
+ kfree(match_c);
+ kfree(match_v);
+ return err;
+}
+
+int mlx5e_delete_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f)
+{
+ struct mlx5e_tc_flow *flow;
+ struct mlx5e_tc_flow_table *tc = &priv->fts.tc;
+
+ flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
+ tc->ht_params);
+ if (!flow)
+ return -EINVAL;
+
+ rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
+
+ mlx5e_tc_del_flow(priv, flow->rule);
+
+ kfree(flow);
+
+ return 0;
+}
+
+static const struct rhashtable_params mlx5e_tc_flow_ht_params = {
+ .head_offset = offsetof(struct mlx5e_tc_flow, node),
+ .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
+ .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
+ .automatic_shrinking = true,
+};
+
+int mlx5e_tc_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_flow_table *tc = &priv->fts.tc;
+
+ tc->ht_params = mlx5e_tc_flow_ht_params;
+ return rhashtable_init(&tc->ht, &tc->ht_params);
+}
+
+static void _mlx5e_tc_del_flow(void *ptr, void *arg)
+{
+ struct mlx5e_tc_flow *flow = ptr;
+ struct mlx5e_priv *priv = arg;
+
+ mlx5e_tc_del_flow(priv, flow->rule);
+ kfree(flow);
+}
+
+void mlx5e_tc_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_tc_flow_table *tc = &priv->fts.tc;
+
+ rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv);
+
+ if (!IS_ERR_OR_NULL(priv->fts.tc.t)) {
+ mlx5_destroy_flow_table(priv->fts.tc.t);
+ priv->fts.tc.t = NULL;
+ }
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
new file mode 100644
index 000000000000..d677428dc10f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MLX5_EN_TC_H__
+#define __MLX5_EN_TC_H__
+
+#define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
+
+int mlx5e_tc_init(struct mlx5e_priv *priv);
+void mlx5e_tc_cleanup(struct mlx5e_priv *priv);
+
+int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
+ struct tc_cls_flower_offload *f);
+int mlx5e_delete_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f);
+
+static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv)
+{
+ return atomic_read(&priv->fts.tc.ht.nelems);
+}
+
+#endif /* __MLX5_EN_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index c34f4f3e9537..94a14f85f70d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -177,6 +177,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
unsigned int skb_len = skb->len;
u8 opcode = MLX5_OPCODE_SEND;
dma_addr_t dma_addr = 0;
+ unsigned int num_bytes;
bool bf = false;
u16 headlen;
u16 ds_cnt;
@@ -216,16 +217,17 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
sq->stats.tso_bytes += skb->len - ihs;
}
- wi->num_bytes = skb->len +
- (skb_shinfo(skb)->gso_segs - 1) * ihs;
+ num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs;
} else {
bf = sq->bf_budget &&
!skb->xmit_more &&
!skb_shinfo(skb)->nr_frags;
ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
- wi->num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
+ num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
}
+ wi->num_bytes = num_bytes;
+
if (skb_vlan_tag_present(skb)) {
mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data,
&skb_len);
@@ -317,6 +319,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
sq->bf_budget = bf ? sq->bf_budget - 1 : 0;
sq->stats.packets++;
+ sq->stats.bytes += num_bytes;
return NETDEV_TX_OK;
dma_unmap_wqe_err:
@@ -345,10 +348,6 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq)
u16 sqcc;
int i;
- /* avoid accessing cq (dma coherent memory) if not needed */
- if (!test_and_clear_bit(MLX5E_CQ_HAS_CQES, &cq->flags))
- return false;
-
sq = container_of(cq, struct mlx5e_sq, cq);
npkts = 0;
@@ -432,10 +431,6 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq)
netif_tx_wake_queue(sq->txq);
sq->stats.wake++;
}
- if (i == MLX5E_TX_CQ_POLL_BUDGET) {
- set_bit(MLX5E_CQ_HAS_CQES, &cq->flags);
- return true;
- }
- return false;
+ return (i == MLX5E_TX_CQ_POLL_BUDGET);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 4ac8d716dbdd..66d51a77609e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -88,7 +88,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq)
{
struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq);
- set_bit(MLX5E_CQ_HAS_CQES, &cq->flags);
set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags);
barrier();
napi_schedule(cq->napi);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index a9894d2e8e26..f46f1db0fc00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -218,19 +218,22 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
match_value);
memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
- in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
- list_for_each_entry(dst, &fte->node.children, node.list) {
- unsigned int id;
-
- MLX5_SET(dest_format_struct, in_dests, destination_type,
- dst->dest_attr.type);
- if (dst->dest_attr.type ==
- MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
- id = dst->dest_attr.ft->id;
- else
- id = dst->dest_attr.tir_num;
- MLX5_SET(dest_format_struct, in_dests, destination_id, id);
- in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ unsigned int id;
+
+ MLX5_SET(dest_format_struct, in_dests, destination_type,
+ dst->dest_attr.type);
+ if (dst->dest_attr.type ==
+ MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+ id = dst->dest_attr.ft->id;
+ } else {
+ id = dst->dest_attr.tir_num;
+ }
+ MLX5_SET(dest_format_struct, in_dests, destination_id, id);
+ in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
+ }
}
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 6f68dba8d7ed..e848d708d2b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -73,8 +73,8 @@
#define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
LEFTOVERS_MAX_FT)
-#define KERNEL_MAX_FT 2
-#define KERNEL_NUM_PRIOS 1
+#define KERNEL_MAX_FT 3
+#define KERNEL_NUM_PRIOS 2
#define KENREL_MIN_LEVEL 2
struct node_caps {
@@ -360,8 +360,8 @@ static void del_rule(struct fs_node *node)
memcpy(match_value, fte->val, sizeof(fte->val));
fs_get_obj(ft, fg->node.parent);
list_del(&rule->node.list);
- fte->dests_size--;
- if (fte->dests_size) {
+ if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+ --fte->dests_size) {
err = mlx5_cmd_update_fte(dev, ft,
fg->id, fte);
if (err)
@@ -763,7 +763,8 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
return NULL;
rule->node.type = FS_TYPE_FLOW_DEST;
- memcpy(&rule->dest_attr, dest, sizeof(*dest));
+ if (dest)
+ memcpy(&rule->dest_attr, dest, sizeof(*dest));
return rule;
}
@@ -785,8 +786,9 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte,
/* Add dest to dests list- added as first element after the head */
tree_init_node(&rule->node, 1, del_rule);
list_add_tail(&rule->node.list, &fte->node.children);
- fte->dests_size++;
- if (fte->dests_size == 1)
+ if (dest)
+ fte->dests_size++;
+ if (fte->dests_size == 1 || !dest)
err = mlx5_cmd_create_fte(get_dev(&ft->node),
ft, fg->id, fte);
else
@@ -802,7 +804,8 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte,
free_rule:
list_del(&rule->node.list);
kfree(rule);
- fte->dests_size--;
+ if (dest)
+ fte->dests_size--;
return ERR_PTR(err);
}
@@ -996,6 +999,9 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
struct mlx5_flow_group *g;
struct mlx5_flow_rule *rule;
+ if ((action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !dest)
+ return ERR_PTR(-EINVAL);
+
nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
fs_for_each_fg(g, ft)
if (compare_match_criteria(g->mask.match_criteria_enable,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index c071077aafbd..7992c553c1f5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -215,7 +215,7 @@ mlxsw_pci_queue_elem_info_producer_get(struct mlxsw_pci_queue *q)
{
int index = q->producer_counter & (q->count - 1);
- if ((q->producer_counter - q->consumer_counter) == q->count)
+ if ((u16) (q->producer_counter - q->consumer_counter) == q->count)
return NULL;
return mlxsw_pci_queue_elem_info_get(q, index);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 53487d3eb9f6..4afbc3e9e381 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2592,9 +2592,7 @@ static int mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port,
if (mlxsw_sp_port->bridged) {
mlxsw_sp_port_active_vlans_del(mlxsw_sp_port);
mlxsw_sp_port_bridge_leave(mlxsw_sp_port, false);
-
- if (lag->ref_count == 1)
- mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL);
+ mlxsw_sp_master_bridge_dec(mlxsw_sp, NULL);
}
if (lag->ref_count == 1) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 1b691d7e4a2a..4b8abaf06321 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -123,6 +123,8 @@ struct mlxsw_sp {
#define MLXSW_SP_DEFAULT_LEARNING_INTERVAL 100
unsigned int interval; /* ms */
} fdb_notify;
+#define MLXSW_SP_MIN_AGEING_TIME 10
+#define MLXSW_SP_MAX_AGEING_TIME 1000000
#define MLXSW_SP_DEFAULT_AGEING_TIME 300
u32 ageing_time;
struct mlxsw_sp_upper master_bridge;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 7b56098acc58..e1c74efff51a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -311,8 +311,13 @@ static int mlxsw_sp_port_attr_br_ageing_set(struct mlxsw_sp_port *mlxsw_sp_port,
unsigned long ageing_jiffies = clock_t_to_jiffies(ageing_clock_t);
u32 ageing_time = jiffies_to_msecs(ageing_jiffies) / 1000;
- if (switchdev_trans_ph_prepare(trans))
- return 0;
+ if (switchdev_trans_ph_prepare(trans)) {
+ if (ageing_time < MLXSW_SP_MIN_AGEING_TIME ||
+ ageing_time > MLXSW_SP_MAX_AGEING_TIME)
+ return -ERANGE;
+ else
+ return 0;
+ }
return mlxsw_sp_ageing_set(mlxsw_sp, ageing_time);
}
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index 00cfd95ca59d..3e67f451f2ab 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -474,9 +474,9 @@ static int moxart_mac_probe(struct platform_device *pdev)
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
ndev->base_addr = res->start;
priv->base = devm_ioremap_resource(p_dev, res);
- ret = IS_ERR(priv->base);
- if (ret) {
+ if (IS_ERR(priv->base)) {
dev_err(p_dev, "devm_ioremap_resource failed\n");
+ ret = PTR_ERR(priv->base);
goto init_fail;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index e5604eec81bf..fcb8e9ba51d9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -303,6 +303,9 @@ struct qed_hwfn {
bool b_int_enabled;
bool b_int_requested;
+ /* True if the driver requests for the link */
+ bool b_drv_link_init;
+
struct qed_mcp_info *mcp_info;
struct qed_hw_cid_data *p_tx_cids;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 592e0e6d9b42..a368f5e71d95 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -2919,7 +2919,19 @@ struct eth_vport_rx_mode {
};
struct eth_vport_tpa_param {
- u64 reserved[2];
+ u8 tpa_ipv4_en_flg;
+ u8 tpa_ipv6_en_flg;
+ u8 tpa_ipv4_tunn_en_flg;
+ u8 tpa_ipv6_tunn_en_flg;
+ u8 tpa_pkt_split_flg;
+ u8 tpa_hdr_data_split_flg;
+ u8 tpa_gro_consistent_flg;
+ u8 tpa_max_aggs_num;
+ u16 tpa_max_size;
+ u16 tpa_min_size_to_start;
+ u16 tpa_min_size_to_cont;
+ u8 max_buff_num;
+ u8 reserved;
};
struct eth_vport_tx_mode {
@@ -3609,6 +3621,9 @@ struct public_port {
u32 fc_npiv_nvram_tbl_addr;
u32 fc_npiv_nvram_tbl_size;
u32 transceiver_data;
+#define PMM_TRANSCEIVER_STATE_MASK 0x000000FF
+#define PMM_TRANSCEIVER_STATE_SHIFT 0x00000000
+#define PMM_TRANSCEIVER_STATE_PRESENT 0x00000001
};
/**************************************/
@@ -3943,6 +3958,14 @@ enum MFW_DRV_MSG_TYPE {
MFW_DRV_MSG_DCBX_REMOTE_MIB_UPDATED,
MFW_DRV_MSG_DCBX_OPERATIONAL_MIB_UPDATED,
MFW_DRV_MSG_ERROR_RECOVERY,
+ MFW_DRV_MSG_BW_UPDATE,
+ MFW_DRV_MSG_S_TAG_UPDATE,
+ MFW_DRV_MSG_GET_LAN_STATS,
+ MFW_DRV_MSG_GET_FCOE_STATS,
+ MFW_DRV_MSG_GET_ISCSI_STATS,
+ MFW_DRV_MSG_GET_RDMA_STATS,
+ MFW_DRV_MSG_FAILURE_DETECTED,
+ MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
MFW_DRV_MSG_MAX
};
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 102ddc73b841..3f35c6ca9252 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -132,16 +132,29 @@ struct qed_sp_vport_update_params {
struct qed_filter_accept_flags accept_flags;
};
+enum qed_tpa_mode {
+ QED_TPA_MODE_NONE,
+ QED_TPA_MODE_UNUSED,
+ QED_TPA_MODE_GRO,
+ QED_TPA_MODE_MAX
+};
+
+struct qed_sp_vport_start_params {
+ enum qed_tpa_mode tpa_mode;
+ bool remove_inner_vlan;
+ bool drop_ttl0;
+ u8 max_buffers_per_cqe;
+ u32 concrete_fid;
+ u16 opaque_fid;
+ u8 vport_id;
+ u16 mtu;
+};
+
#define QED_MAX_SGES_NUM 16
#define CRC32_POLY 0x1edc6f41
static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
- u32 concrete_fid,
- u16 opaque_fid,
- u8 vport_id,
- u16 mtu,
- u8 drop_ttl0_flg,
- u8 inner_vlan_removal_en_flg)
+ struct qed_sp_vport_start_params *p_params)
{
struct vport_start_ramrod_data *p_ramrod = NULL;
struct qed_spq_entry *p_ent = NULL;
@@ -150,13 +163,13 @@ static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
u16 rx_mode = 0;
u8 abs_vport_id = 0;
- rc = qed_fw_vport(p_hwfn, vport_id, &abs_vport_id);
+ rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
if (rc != 0)
return rc;
memset(&init_data, 0, sizeof(init_data));
init_data.cid = qed_spq_get_cid(p_hwfn);
- init_data.opaque_fid = opaque_fid;
+ init_data.opaque_fid = p_params->opaque_fid;
init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -168,9 +181,9 @@ static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
p_ramrod = &p_ent->ramrod.vport_start;
p_ramrod->vport_id = abs_vport_id;
- p_ramrod->mtu = cpu_to_le16(mtu);
- p_ramrod->inner_vlan_removal_en = inner_vlan_removal_en_flg;
- p_ramrod->drop_ttl0_en = drop_ttl0_flg;
+ p_ramrod->mtu = cpu_to_le16(p_params->mtu);
+ p_ramrod->inner_vlan_removal_en = p_params->remove_inner_vlan;
+ p_ramrod->drop_ttl0_en = p_params->drop_ttl0;
SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_UCAST_DROP_ALL, 1);
SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_MCAST_DROP_ALL, 1);
@@ -181,9 +194,26 @@ static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
memset(&p_ramrod->tpa_param, 0,
sizeof(struct eth_vport_tpa_param));
+ p_ramrod->tpa_param.max_buff_num = p_params->max_buffers_per_cqe;
+
+ switch (p_params->tpa_mode) {
+ case QED_TPA_MODE_GRO:
+ p_ramrod->tpa_param.tpa_max_aggs_num = ETH_TPA_MAX_AGGS_NUM;
+ p_ramrod->tpa_param.tpa_max_size = (u16)-1;
+ p_ramrod->tpa_param.tpa_min_size_to_cont = p_params->mtu / 2;
+ p_ramrod->tpa_param.tpa_min_size_to_start = p_params->mtu / 2;
+ p_ramrod->tpa_param.tpa_ipv4_en_flg = 1;
+ p_ramrod->tpa_param.tpa_ipv6_en_flg = 1;
+ p_ramrod->tpa_param.tpa_pkt_split_flg = 1;
+ p_ramrod->tpa_param.tpa_gro_consistent_flg = 1;
+ break;
+ default:
+ break;
+ }
+
/* Software Function ID in hwfn (PFs are 0 - 15, VFs are 16 - 135) */
p_ramrod->sw_fid = qed_concrete_to_sw_fid(p_hwfn->cdev,
- concrete_fid);
+ p_params->concrete_fid);
return qed_spq_post(p_hwfn, p_ent, NULL);
}
@@ -1592,24 +1622,25 @@ static void qed_register_eth_ops(struct qed_dev *cdev,
}
static int qed_start_vport(struct qed_dev *cdev,
- u8 vport_id,
- u16 mtu,
- u8 drop_ttl0_flg,
- u8 inner_vlan_removal_en_flg)
+ struct qed_start_vport_params *params)
{
int rc, i;
for_each_hwfn(cdev, i) {
+ struct qed_sp_vport_start_params start = { 0 };
struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
- rc = qed_sp_vport_start(p_hwfn,
- p_hwfn->hw_info.concrete_fid,
- p_hwfn->hw_info.opaque_fid,
- vport_id,
- mtu,
- drop_ttl0_flg,
- inner_vlan_removal_en_flg);
-
+ start.tpa_mode = params->gro_enable ? QED_TPA_MODE_GRO :
+ QED_TPA_MODE_NONE;
+ start.remove_inner_vlan = params->remove_inner_vlan;
+ start.drop_ttl0 = params->drop_ttl0;
+ start.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ start.concrete_fid = p_hwfn->hw_info.concrete_fid;
+ start.vport_id = params->vport_id;
+ start.max_buffers_per_cqe = 16;
+ start.mtu = params->mtu;
+
+ rc = qed_sp_vport_start(p_hwfn, &start);
if (rc) {
DP_ERR(cdev, "Failed to start VPORT\n");
return rc;
@@ -1619,7 +1650,7 @@ static int qed_start_vport(struct qed_dev *cdev,
DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
"Started V-PORT %d with MTU %d\n",
- vport_id, mtu);
+ start.vport_id, start.mtu);
}
qed_reset_vport_stats(cdev);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index f23ce734ab63..b89c9a8e1655 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -11,8 +11,8 @@
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/kernel.h>
-#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <linux/string.h>
#include "qed.h"
#include "qed_hsi.h"
@@ -168,8 +168,8 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
goto err;
- /* Initialize the MFW mutex */
- mutex_init(&p_info->mutex);
+ /* Initialize the MFW spinlock */
+ spin_lock_init(&p_info->lock);
return 0;
@@ -179,6 +179,52 @@ err:
return -ENOMEM;
}
+/* Locks the MFW mailbox of a PF to ensure a single access.
+ * The lock is achieved in most cases by holding a spinlock, causing other
+ * threads to wait till a previous access is done.
+ * In some cases (currently when a [UN]LOAD_REQ commands are sent), the single
+ * access is achieved by setting a blocking flag, which will fail other
+ * competing contexts to send their mailboxes.
+ */
+static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn,
+ u32 cmd)
+{
+ spin_lock_bh(&p_hwfn->mcp_info->lock);
+
+ /* The spinlock shouldn't be acquired when the mailbox command is
+ * [UN]LOAD_REQ, since the engine is locked by the MFW, and a parallel
+ * pending [UN]LOAD_REQ command of another PF together with a spinlock
+ * (i.e. interrupts are disabled) - can lead to a deadlock.
+ * It is assumed that for a single PF, no other mailbox commands can be
+ * sent from another context while sending LOAD_REQ, and that any
+ * parallel commands to UNLOAD_REQ can be cancelled.
+ */
+ if (cmd == DRV_MSG_CODE_LOAD_DONE || cmd == DRV_MSG_CODE_UNLOAD_DONE)
+ p_hwfn->mcp_info->block_mb_sending = false;
+
+ if (p_hwfn->mcp_info->block_mb_sending) {
+ DP_NOTICE(p_hwfn,
+ "Trying to send a MFW mailbox command [0x%x] in parallel to [UN]LOAD_REQ. Aborting.\n",
+ cmd);
+ spin_unlock_bh(&p_hwfn->mcp_info->lock);
+ return -EBUSY;
+ }
+
+ if (cmd == DRV_MSG_CODE_LOAD_REQ || cmd == DRV_MSG_CODE_UNLOAD_REQ) {
+ p_hwfn->mcp_info->block_mb_sending = true;
+ spin_unlock_bh(&p_hwfn->mcp_info->lock);
+ }
+
+ return 0;
+}
+
+static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn,
+ u32 cmd)
+{
+ if (cmd != DRV_MSG_CODE_LOAD_REQ && cmd != DRV_MSG_CODE_UNLOAD_REQ)
+ spin_unlock_bh(&p_hwfn->mcp_info->lock);
+}
+
int qed_mcp_reset(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt)
{
@@ -187,6 +233,13 @@ int qed_mcp_reset(struct qed_hwfn *p_hwfn,
u32 org_mcp_reset_seq, cnt = 0;
int rc = 0;
+ /* Ensure that only a single thread is accessing the mailbox at a
+ * certain time.
+ */
+ rc = qed_mcp_mb_lock(p_hwfn, DRV_MSG_CODE_MCP_RESET);
+ if (rc != 0)
+ return rc;
+
/* Set drv command along with the updated sequence */
org_mcp_reset_seq = qed_rd(p_hwfn, p_ptt, MISCS_REG_GENERIC_POR_0);
DRV_MB_WR(p_hwfn, p_ptt, drv_mb_header,
@@ -209,6 +262,8 @@ int qed_mcp_reset(struct qed_hwfn *p_hwfn,
rc = -EAGAIN;
}
+ qed_mcp_mb_unlock(p_hwfn, DRV_MSG_CODE_MCP_RESET);
+
return rc;
}
@@ -275,14 +330,12 @@ static int qed_do_mcp_cmd(struct qed_hwfn *p_hwfn,
return rc;
}
-int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- u32 cmd,
- u32 param,
- u32 *o_mcp_resp,
- u32 *o_mcp_param)
+static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ struct qed_mcp_mb_params *p_mb_params)
{
- int rc = 0;
+ u32 union_data_addr;
+ int rc;
/* MCP not initialized */
if (!qed_mcp_is_init(p_hwfn)) {
@@ -290,28 +343,56 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
return -EBUSY;
}
- /* Lock Mutex to ensure only single thread is
- * accessing the MCP at one time
+ union_data_addr = p_hwfn->mcp_info->drv_mb_addr +
+ offsetof(struct public_drv_mb, union_data);
+
+ /* Ensure that only a single thread is accessing the mailbox at a
+ * certain time.
*/
- mutex_lock(&p_hwfn->mcp_info->mutex);
- rc = qed_do_mcp_cmd(p_hwfn, p_ptt, cmd, param,
- o_mcp_resp, o_mcp_param);
- /* Release Mutex */
- mutex_unlock(&p_hwfn->mcp_info->mutex);
+ rc = qed_mcp_mb_lock(p_hwfn, p_mb_params->cmd);
+ if (rc)
+ return rc;
+
+ if (p_mb_params->p_data_src != NULL)
+ qed_memcpy_to(p_hwfn, p_ptt, union_data_addr,
+ p_mb_params->p_data_src,
+ sizeof(*p_mb_params->p_data_src));
+
+ rc = qed_do_mcp_cmd(p_hwfn, p_ptt, p_mb_params->cmd,
+ p_mb_params->param, &p_mb_params->mcp_resp,
+ &p_mb_params->mcp_param);
+
+ if (p_mb_params->p_data_dst != NULL)
+ qed_memcpy_from(p_hwfn, p_ptt, p_mb_params->p_data_dst,
+ union_data_addr,
+ sizeof(*p_mb_params->p_data_dst));
+
+ qed_mcp_mb_unlock(p_hwfn, p_mb_params->cmd);
return rc;
}
-static void qed_mcp_set_drv_ver(struct qed_dev *cdev,
- struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt)
+int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u32 cmd,
+ u32 param,
+ u32 *o_mcp_resp,
+ u32 *o_mcp_param)
{
- u32 i;
+ struct qed_mcp_mb_params mb_params;
+ int rc;
+
+ memset(&mb_params, 0, sizeof(mb_params));
+ mb_params.cmd = cmd;
+ mb_params.param = param;
+ rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+ if (rc)
+ return rc;
+
+ *o_mcp_resp = mb_params.mcp_resp;
+ *o_mcp_param = mb_params.mcp_param;
- /* Copy version string to MCP */
- for (i = 0; i < MCP_DRV_VER_STR_SIZE_DWORD; i++)
- DRV_MB_WR(p_hwfn, p_ptt, union_data.ver_str[i],
- *(u32 *)&cdev->ver_str[i * sizeof(u32)]);
+ return 0;
}
int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
@@ -319,26 +400,18 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
u32 *p_load_code)
{
struct qed_dev *cdev = p_hwfn->cdev;
- u32 param;
+ struct qed_mcp_mb_params mb_params;
+ union drv_union_data union_data;
int rc;
- if (!qed_mcp_is_init(p_hwfn)) {
- DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
- return -EBUSY;
- }
-
- /* Save driver's version to shmem */
- qed_mcp_set_drv_ver(cdev, p_hwfn, p_ptt);
-
- DP_VERBOSE(p_hwfn, QED_MSG_SP, "fw_seq 0x%08x, drv_pulse 0x%x\n",
- p_hwfn->mcp_info->drv_mb_seq,
- p_hwfn->mcp_info->drv_pulse_seq);
-
+ memset(&mb_params, 0, sizeof(mb_params));
/* Load Request */
- rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_LOAD_REQ,
- (PDA_COMP | DRV_ID_MCP_HSI_VER_CURRENT |
- cdev->drv_type),
- p_load_code, &param);
+ mb_params.cmd = DRV_MSG_CODE_LOAD_REQ;
+ mb_params.param = PDA_COMP | DRV_ID_MCP_HSI_VER_CURRENT |
+ cdev->drv_type;
+ memcpy(&union_data.ver_str, cdev->ver_str, MCP_DRV_VER_STR_SIZE);
+ mb_params.p_data_src = &union_data;
+ rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
/* if mcp fails to respond we must abort */
if (rc) {
@@ -346,6 +419,8 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
return rc;
}
+ *p_load_code = mb_params.mcp_resp;
+
/* If MFW refused (e.g. other port is in diagnostic mode) we
* must abort. This can happen in the following cases:
* - Other port is in diagnostic mode
@@ -365,6 +440,33 @@ int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
return 0;
}
+static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt)
+{
+ u32 transceiver_state;
+
+ transceiver_state = qed_rd(p_hwfn, p_ptt,
+ p_hwfn->mcp_info->port_addr +
+ offsetof(struct public_port,
+ transceiver_data));
+
+ DP_VERBOSE(p_hwfn,
+ (NETIF_MSG_HW | QED_MSG_SP),
+ "Received transceiver state update [0x%08x] from mfw [Addr 0x%x]\n",
+ transceiver_state,
+ (u32)(p_hwfn->mcp_info->port_addr +
+ offsetof(struct public_port,
+ transceiver_data)));
+
+ transceiver_state = GET_FIELD(transceiver_state,
+ PMM_TRANSCEIVER_STATE);
+
+ if (transceiver_state == PMM_TRANSCEIVER_STATE_PRESENT)
+ DP_NOTICE(p_hwfn, "Transceiver is present.\n");
+ else
+ DP_NOTICE(p_hwfn, "Transceiver is unplugged.\n");
+}
+
static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
bool b_reset)
@@ -390,7 +492,10 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
return;
}
- p_link->link_up = !!(status & LINK_STATUS_LINK_UP);
+ if (p_hwfn->b_drv_link_init)
+ p_link->link_up = !!(status & LINK_STATUS_LINK_UP);
+ else
+ p_link->link_up = false;
p_link->full_duplex = true;
switch ((status & LINK_STATUS_SPEED_AND_DUPLEX_MASK)) {
@@ -492,53 +597,43 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn,
bool b_up)
{
struct qed_mcp_link_params *params = &p_hwfn->mcp_info->link_input;
- u32 param = 0, reply = 0, cmd;
- struct pmm_phy_cfg phy_cfg;
+ struct qed_mcp_mb_params mb_params;
+ union drv_union_data union_data;
+ struct pmm_phy_cfg *phy_cfg;
int rc = 0;
- u32 i;
-
- if (!qed_mcp_is_init(p_hwfn)) {
- DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
- return -EBUSY;
- }
+ u32 cmd;
/* Set the shmem configuration according to params */
- memset(&phy_cfg, 0, sizeof(phy_cfg));
+ phy_cfg = &union_data.drv_phy_cfg;
+ memset(phy_cfg, 0, sizeof(*phy_cfg));
cmd = b_up ? DRV_MSG_CODE_INIT_PHY : DRV_MSG_CODE_LINK_RESET;
if (!params->speed.autoneg)
- phy_cfg.speed = params->speed.forced_speed;
- phy_cfg.pause |= (params->pause.autoneg) ? PMM_PAUSE_AUTONEG : 0;
- phy_cfg.pause |= (params->pause.forced_rx) ? PMM_PAUSE_RX : 0;
- phy_cfg.pause |= (params->pause.forced_tx) ? PMM_PAUSE_TX : 0;
- phy_cfg.adv_speed = params->speed.advertised_speeds;
- phy_cfg.loopback_mode = params->loopback_mode;
-
- /* Write the requested configuration to shmem */
- for (i = 0; i < sizeof(phy_cfg); i += 4)
- qed_wr(p_hwfn, p_ptt,
- p_hwfn->mcp_info->drv_mb_addr +
- offsetof(struct public_drv_mb, union_data) + i,
- ((u32 *)&phy_cfg)[i >> 2]);
+ phy_cfg->speed = params->speed.forced_speed;
+ phy_cfg->pause |= (params->pause.autoneg) ? PMM_PAUSE_AUTONEG : 0;
+ phy_cfg->pause |= (params->pause.forced_rx) ? PMM_PAUSE_RX : 0;
+ phy_cfg->pause |= (params->pause.forced_tx) ? PMM_PAUSE_TX : 0;
+ phy_cfg->adv_speed = params->speed.advertised_speeds;
+ phy_cfg->loopback_mode = params->loopback_mode;
+
+ p_hwfn->b_drv_link_init = b_up;
if (b_up) {
DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
"Configuring Link: Speed 0x%08x, Pause 0x%08x, adv_speed 0x%08x, loopback 0x%08x, features 0x%08x\n",
- phy_cfg.speed,
- phy_cfg.pause,
- phy_cfg.adv_speed,
- phy_cfg.loopback_mode,
- phy_cfg.feature_config_flags);
+ phy_cfg->speed,
+ phy_cfg->pause,
+ phy_cfg->adv_speed,
+ phy_cfg->loopback_mode,
+ phy_cfg->feature_config_flags);
} else {
DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
"Resetting link\n");
}
- DP_VERBOSE(p_hwfn, QED_MSG_SP, "fw_seq 0x%08x, drv_pulse 0x%x\n",
- p_hwfn->mcp_info->drv_mb_seq,
- p_hwfn->mcp_info->drv_pulse_seq);
-
- /* Load Request */
- rc = qed_mcp_cmd(p_hwfn, p_ptt, cmd, 0, &reply, &param);
+ memset(&mb_params, 0, sizeof(mb_params));
+ mb_params.cmd = cmd;
+ mb_params.p_data_src = &union_data;
+ rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
/* if mcp fails to respond we must abort */
if (rc) {
@@ -581,6 +676,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
case MFW_DRV_MSG_LINK_CHANGE:
qed_mcp_handle_link_change(p_hwfn, p_ptt, false);
break;
+ case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
+ qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
+ break;
default:
DP_NOTICE(p_hwfn, "Unimplemented MFW message %d\n", i);
rc = -EINVAL;
@@ -801,11 +899,11 @@ int qed_mcp_drain(struct qed_hwfn *p_hwfn,
int rc;
rc = qed_mcp_cmd(p_hwfn, p_ptt,
- DRV_MSG_CODE_NIG_DRAIN, 100,
+ DRV_MSG_CODE_NIG_DRAIN, 1000,
&resp, &param);
/* Wait for the drain to complete before returning */
- msleep(120);
+ msleep(1020);
return rc;
}
@@ -831,31 +929,28 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
struct qed_mcp_drv_version *p_ver)
{
- int rc = 0;
- u32 param = 0, reply = 0, i;
-
- if (!qed_mcp_is_init(p_hwfn)) {
- DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
- return -EBUSY;
- }
+ struct drv_version_stc *p_drv_version;
+ struct qed_mcp_mb_params mb_params;
+ union drv_union_data union_data;
+ __be32 val;
+ u32 i;
+ int rc;
- DRV_MB_WR(p_hwfn, p_ptt, union_data.drv_version.version,
- p_ver->version);
- /* Copy version string to shmem */
- for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / 4; i++) {
- DRV_MB_WR(p_hwfn, p_ptt,
- union_data.drv_version.name[i * sizeof(u32)],
- *(u32 *)&p_ver->name[i * sizeof(u32)]);
+ p_drv_version = &union_data.drv_version;
+ p_drv_version->version = p_ver->version;
+ for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) {
+ val = cpu_to_be32(p_ver->name[i]);
+ *(u32 *)&p_drv_version->name[i * sizeof(u32)] = val;
}
- rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_SET_VERSION, 0, &reply,
- &param);
- if (rc) {
+ memset(&mb_params, 0, sizeof(mb_params));
+ mb_params.cmd = DRV_MSG_CODE_SET_VERSION;
+ mb_params.p_data_src = &union_data;
+ rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+ if (rc)
DP_ERR(p_hwfn, "MCP response failure, aborting\n");
- return rc;
- }
- return 0;
+ return rc;
}
int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 506197d5c3dd..50917a2131a5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -11,8 +11,8 @@
#include <linux/types.h>
#include <linux/delay.h>
-#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include "qed_hsi.h"
struct qed_mcp_link_speed_params {
@@ -255,7 +255,8 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
#define MFW_PORT(_p_hwfn) ((_p_hwfn)->abs_pf_id % \
((_p_hwfn)->cdev->num_ports_in_engines * 2))
struct qed_mcp_info {
- struct mutex mutex; /* MCP access lock */
+ spinlock_t lock;
+ bool block_mb_sending;
u32 public_base;
u32 drv_mb_addr;
u32 mfw_mb_addr;
@@ -272,6 +273,15 @@ struct qed_mcp_info {
u16 mcp_hist;
};
+struct qed_mcp_mb_params {
+ u32 cmd;
+ u32 param;
+ union drv_union_data *p_data_src;
+ union drv_union_data *p_data_dst;
+ u32 mcp_resp;
+ u32 mcp_param;
+};
+
/**
* @brief Initialize the interface with the MCP
*
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 02e17d331f22..d023251544d9 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -160,6 +160,7 @@ struct qede_dev {
u16 q_num_rx_buffers; /* Must be a power of two */
u16 q_num_tx_buffers; /* Must be a power of two */
+ bool gro_disable;
struct list_head vlan_list;
u16 configured_vlans;
u16 non_configured_vlans;
@@ -188,6 +189,24 @@ struct sw_rx_data {
unsigned int page_offset;
};
+enum qede_agg_state {
+ QEDE_AGG_STATE_NONE = 0,
+ QEDE_AGG_STATE_START = 1,
+ QEDE_AGG_STATE_ERROR = 2
+};
+
+struct qede_agg_info {
+ struct sw_rx_data replace_buf;
+ dma_addr_t replace_buf_mapping;
+ struct sw_rx_data start_buf;
+ dma_addr_t start_buf_mapping;
+ struct eth_fast_path_rx_tpa_start_cqe start_cqe;
+ enum qede_agg_state agg_state;
+ struct sk_buff *skb;
+ int frag_id;
+ u16 vlan_tag;
+};
+
struct qede_rx_queue {
__le16 *hw_cons_ptr;
struct sw_rx_data *sw_rx_ring;
@@ -197,6 +216,9 @@ struct qede_rx_queue {
struct qed_chain rx_comp_ring;
void __iomem *hw_rxq_prod_addr;
+ /* GRO */
+ struct qede_agg_info tpa_info[ETH_TPA_MAX_AGGS_NUM];
+
int rx_buf_size;
unsigned int rx_buf_seg_size;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index ddd9e4aaa500..518af329502d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -866,6 +866,281 @@ static inline void qede_skb_receive(struct qede_dev *edev,
napi_gro_receive(&fp->napi, skb);
}
+static void qede_set_gro_params(struct qede_dev *edev,
+ struct sk_buff *skb,
+ struct eth_fast_path_rx_tpa_start_cqe *cqe)
+{
+ u16 parsing_flags = le16_to_cpu(cqe->pars_flags.flags);
+
+ if (((parsing_flags >> PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT) &
+ PARSING_AND_ERR_FLAGS_L3TYPE_MASK) == 2)
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+ else
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+ skb_shinfo(skb)->gso_size = __le16_to_cpu(cqe->len_on_first_bd) -
+ cqe->header_len;
+}
+
+static int qede_fill_frag_skb(struct qede_dev *edev,
+ struct qede_rx_queue *rxq,
+ u8 tpa_agg_index,
+ u16 len_on_bd)
+{
+ struct sw_rx_data *current_bd = &rxq->sw_rx_ring[rxq->sw_rx_cons &
+ NUM_RX_BDS_MAX];
+ struct qede_agg_info *tpa_info = &rxq->tpa_info[tpa_agg_index];
+ struct sk_buff *skb = tpa_info->skb;
+
+ if (unlikely(tpa_info->agg_state != QEDE_AGG_STATE_START))
+ goto out;
+
+ /* Add one frag and update the appropriate fields in the skb */
+ skb_fill_page_desc(skb, tpa_info->frag_id++,
+ current_bd->data, current_bd->page_offset,
+ len_on_bd);
+
+ if (unlikely(qede_realloc_rx_buffer(edev, rxq, current_bd))) {
+ tpa_info->agg_state = QEDE_AGG_STATE_ERROR;
+ goto out;
+ }
+
+ qed_chain_consume(&rxq->rx_bd_ring);
+ rxq->sw_rx_cons++;
+
+ skb->data_len += len_on_bd;
+ skb->truesize += rxq->rx_buf_seg_size;
+ skb->len += len_on_bd;
+
+ return 0;
+
+out:
+ return -ENOMEM;
+}
+
+static void qede_tpa_start(struct qede_dev *edev,
+ struct qede_rx_queue *rxq,
+ struct eth_fast_path_rx_tpa_start_cqe *cqe)
+{
+ struct qede_agg_info *tpa_info = &rxq->tpa_info[cqe->tpa_agg_index];
+ struct eth_rx_bd *rx_bd_cons = qed_chain_consume(&rxq->rx_bd_ring);
+ struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring);
+ struct sw_rx_data *replace_buf = &tpa_info->replace_buf;
+ dma_addr_t mapping = tpa_info->replace_buf_mapping;
+ struct sw_rx_data *sw_rx_data_cons;
+ struct sw_rx_data *sw_rx_data_prod;
+ enum pkt_hash_types rxhash_type;
+ u32 rxhash;
+
+ sw_rx_data_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX];
+ sw_rx_data_prod = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
+
+ /* Use pre-allocated replacement buffer - we can't release the agg.
+ * start until its over and we don't want to risk allocation failing
+ * here, so re-allocate when aggregation will be over.
+ */
+ dma_unmap_addr_set(sw_rx_data_prod, mapping,
+ dma_unmap_addr(replace_buf, mapping));
+
+ sw_rx_data_prod->data = replace_buf->data;
+ rx_bd_prod->addr.hi = cpu_to_le32(upper_32_bits(mapping));
+ rx_bd_prod->addr.lo = cpu_to_le32(lower_32_bits(mapping));
+ sw_rx_data_prod->page_offset = replace_buf->page_offset;
+
+ rxq->sw_rx_prod++;
+
+ /* move partial skb from cons to pool (don't unmap yet)
+ * save mapping, incase we drop the packet later on.
+ */
+ tpa_info->start_buf = *sw_rx_data_cons;
+ mapping = HILO_U64(le32_to_cpu(rx_bd_cons->addr.hi),
+ le32_to_cpu(rx_bd_cons->addr.lo));
+
+ tpa_info->start_buf_mapping = mapping;
+ rxq->sw_rx_cons++;
+
+ /* set tpa state to start only if we are able to allocate skb
+ * for this aggregation, otherwise mark as error and aggregation will
+ * be dropped
+ */
+ tpa_info->skb = netdev_alloc_skb(edev->ndev,
+ le16_to_cpu(cqe->len_on_first_bd));
+ if (unlikely(!tpa_info->skb)) {
+ tpa_info->agg_state = QEDE_AGG_STATE_ERROR;
+ return;
+ }
+
+ skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd));
+ memcpy(&tpa_info->start_cqe, cqe, sizeof(tpa_info->start_cqe));
+
+ /* Start filling in the aggregation info */
+ tpa_info->frag_id = 0;
+ tpa_info->agg_state = QEDE_AGG_STATE_START;
+
+ rxhash = qede_get_rxhash(edev, cqe->bitfields,
+ cqe->rss_hash, &rxhash_type);
+ skb_set_hash(tpa_info->skb, rxhash, rxhash_type);
+ if ((le16_to_cpu(cqe->pars_flags.flags) >>
+ PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT) &
+ PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK)
+ tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
+ else
+ tpa_info->vlan_tag = 0;
+
+ /* This is needed in order to enable forwarding support */
+ qede_set_gro_params(edev, tpa_info->skb, cqe);
+
+ if (likely(cqe->ext_bd_len_list[0]))
+ qede_fill_frag_skb(edev, rxq, cqe->tpa_agg_index,
+ le16_to_cpu(cqe->ext_bd_len_list[0]));
+
+ if (unlikely(cqe->ext_bd_len_list[1])) {
+ DP_ERR(edev,
+ "Unlikely - got a TPA aggregation with more than one ext_bd_len_list entry in the TPA start\n");
+ tpa_info->agg_state = QEDE_AGG_STATE_ERROR;
+ }
+}
+
+#ifdef CONFIG_INET
+static void qede_gro_ip_csum(struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct tcphdr *th;
+
+ skb_set_network_header(skb, 0);
+ skb_set_transport_header(skb, sizeof(struct iphdr));
+ th = tcp_hdr(skb);
+
+ th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
+ iph->saddr, iph->daddr, 0);
+
+ tcp_gro_complete(skb);
+}
+
+static void qede_gro_ipv6_csum(struct sk_buff *skb)
+{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct tcphdr *th;
+
+ skb_set_network_header(skb, 0);
+ skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ th = tcp_hdr(skb);
+
+ th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
+ &iph->saddr, &iph->daddr, 0);
+ tcp_gro_complete(skb);
+}
+#endif
+
+static void qede_gro_receive(struct qede_dev *edev,
+ struct qede_fastpath *fp,
+ struct sk_buff *skb,
+ u16 vlan_tag)
+{
+#ifdef CONFIG_INET
+ if (skb_shinfo(skb)->gso_size) {
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ qede_gro_ip_csum(skb);
+ break;
+ case htons(ETH_P_IPV6):
+ qede_gro_ipv6_csum(skb);
+ break;
+ default:
+ DP_ERR(edev,
+ "Error: FW GRO supports only IPv4/IPv6, not 0x%04x\n",
+ ntohs(skb->protocol));
+ }
+ }
+#endif
+ skb_record_rx_queue(skb, fp->rss_id);
+ qede_skb_receive(edev, fp, skb, vlan_tag);
+}
+
+static inline void qede_tpa_cont(struct qede_dev *edev,
+ struct qede_rx_queue *rxq,
+ struct eth_fast_path_rx_tpa_cont_cqe *cqe)
+{
+ int i;
+
+ for (i = 0; cqe->len_list[i]; i++)
+ qede_fill_frag_skb(edev, rxq, cqe->tpa_agg_index,
+ le16_to_cpu(cqe->len_list[i]));
+
+ if (unlikely(i > 1))
+ DP_ERR(edev,
+ "Strange - TPA cont with more than a single len_list entry\n");
+}
+
+static void qede_tpa_end(struct qede_dev *edev,
+ struct qede_fastpath *fp,
+ struct eth_fast_path_rx_tpa_end_cqe *cqe)
+{
+ struct qede_rx_queue *rxq = fp->rxq;
+ struct qede_agg_info *tpa_info;
+ struct sk_buff *skb;
+ int i;
+
+ tpa_info = &rxq->tpa_info[cqe->tpa_agg_index];
+ skb = tpa_info->skb;
+
+ for (i = 0; cqe->len_list[i]; i++)
+ qede_fill_frag_skb(edev, rxq, cqe->tpa_agg_index,
+ le16_to_cpu(cqe->len_list[i]));
+ if (unlikely(i > 1))
+ DP_ERR(edev,
+ "Strange - TPA emd with more than a single len_list entry\n");
+
+ if (unlikely(tpa_info->agg_state != QEDE_AGG_STATE_START))
+ goto err;
+
+ /* Sanity */
+ if (unlikely(cqe->num_of_bds != tpa_info->frag_id + 1))
+ DP_ERR(edev,
+ "Strange - TPA had %02x BDs, but SKB has only %d frags\n",
+ cqe->num_of_bds, tpa_info->frag_id);
+ if (unlikely(skb->len != le16_to_cpu(cqe->total_packet_len)))
+ DP_ERR(edev,
+ "Strange - total packet len [cqe] is %4x but SKB has len %04x\n",
+ le16_to_cpu(cqe->total_packet_len), skb->len);
+
+ memcpy(skb->data,
+ page_address(tpa_info->start_buf.data) +
+ tpa_info->start_cqe.placement_offset +
+ tpa_info->start_buf.page_offset,
+ le16_to_cpu(tpa_info->start_cqe.len_on_first_bd));
+
+ /* Recycle [mapped] start buffer for the next replacement */
+ tpa_info->replace_buf = tpa_info->start_buf;
+ tpa_info->replace_buf_mapping = tpa_info->start_buf_mapping;
+
+ /* Finalize the SKB */
+ skb->protocol = eth_type_trans(skb, edev->ndev);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ /* tcp_gro_complete() will copy NAPI_GRO_CB(skb)->count
+ * to skb_shinfo(skb)->gso_segs
+ */
+ NAPI_GRO_CB(skb)->count = le16_to_cpu(cqe->num_of_coalesced_segs);
+
+ qede_gro_receive(edev, fp, skb, tpa_info->vlan_tag);
+
+ tpa_info->agg_state = QEDE_AGG_STATE_NONE;
+
+ return;
+err:
+ /* The BD starting the aggregation is still mapped; Re-use it for
+ * future aggregations [as replacement buffer]
+ */
+ memcpy(&tpa_info->replace_buf, &tpa_info->start_buf,
+ sizeof(struct sw_rx_data));
+ tpa_info->replace_buf_mapping = tpa_info->start_buf_mapping;
+ tpa_info->start_buf.data = NULL;
+ tpa_info->agg_state = QEDE_AGG_STATE_NONE;
+ dev_kfree_skb_any(tpa_info->skb);
+ tpa_info->skb = NULL;
+}
+
static u8 qede_check_csum(u16 flag)
{
u16 csum_flag = 0;
@@ -931,6 +1206,25 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
goto next_cqe;
}
+ if (cqe_type != ETH_RX_CQE_TYPE_REGULAR) {
+ switch (cqe_type) {
+ case ETH_RX_CQE_TYPE_TPA_START:
+ qede_tpa_start(edev, rxq,
+ &cqe->fast_path_tpa_start);
+ goto next_cqe;
+ case ETH_RX_CQE_TYPE_TPA_CONT:
+ qede_tpa_cont(edev, rxq,
+ &cqe->fast_path_tpa_cont);
+ goto next_cqe;
+ case ETH_RX_CQE_TYPE_TPA_END:
+ qede_tpa_end(edev, fp,
+ &cqe->fast_path_tpa_end);
+ goto next_rx_only;
+ default:
+ break;
+ }
+ }
+
/* Get the data from the SW ring */
sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
sw_rx_data = &rxq->sw_rx_ring[sw_rx_index];
@@ -1057,9 +1351,9 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget)
qede_skb_receive(edev, fp, skb, le16_to_cpu(fp_cqe->vlan_tag));
qed_chain_consume(&rxq->rx_bd_ring);
-
next_rx:
rxq->sw_rx_cons++;
+next_rx_only:
rx_pkt++;
next_cqe: /* don't consume bd rx buffer */
@@ -1952,9 +2246,31 @@ static void qede_free_rx_buffers(struct qede_dev *edev,
}
}
+static void qede_free_sge_mem(struct qede_dev *edev,
+ struct qede_rx_queue *rxq) {
+ int i;
+
+ if (edev->gro_disable)
+ return;
+
+ for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
+ struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
+ struct sw_rx_data *replace_buf = &tpa_info->replace_buf;
+
+ if (replace_buf) {
+ dma_unmap_page(&edev->pdev->dev,
+ dma_unmap_addr(replace_buf, mapping),
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ __free_page(replace_buf->data);
+ }
+ }
+}
+
static void qede_free_mem_rxq(struct qede_dev *edev,
struct qede_rx_queue *rxq)
{
+ qede_free_sge_mem(edev, rxq);
+
/* Free rx buffers */
qede_free_rx_buffers(edev, rxq);
@@ -2010,6 +2326,53 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev,
return 0;
}
+static int qede_alloc_sge_mem(struct qede_dev *edev,
+ struct qede_rx_queue *rxq)
+{
+ dma_addr_t mapping;
+ int i;
+
+ if (edev->gro_disable)
+ return 0;
+
+ if (edev->ndev->mtu > PAGE_SIZE) {
+ edev->gro_disable = 1;
+ return 0;
+ }
+
+ for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
+ struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
+ struct sw_rx_data *replace_buf = &tpa_info->replace_buf;
+
+ replace_buf->data = alloc_pages(GFP_ATOMIC, 0);
+ if (unlikely(!replace_buf->data)) {
+ DP_NOTICE(edev,
+ "Failed to allocate TPA skb pool [replacement buffer]\n");
+ goto err;
+ }
+
+ mapping = dma_map_page(&edev->pdev->dev, replace_buf->data, 0,
+ rxq->rx_buf_size, DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
+ DP_NOTICE(edev,
+ "Failed to map TPA replacement buffer\n");
+ goto err;
+ }
+
+ dma_unmap_addr_set(replace_buf, mapping, mapping);
+ tpa_info->replace_buf.page_offset = 0;
+
+ tpa_info->replace_buf_mapping = mapping;
+ tpa_info->agg_state = QEDE_AGG_STATE_NONE;
+ }
+
+ return 0;
+err:
+ qede_free_sge_mem(edev, rxq);
+ edev->gro_disable = 1;
+ return -ENOMEM;
+}
+
/* This function allocates all memory needed per Rx queue */
static int qede_alloc_mem_rxq(struct qede_dev *edev,
struct qede_rx_queue *rxq)
@@ -2071,6 +2434,8 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev,
num_allocated);
}
+ qede_alloc_sge_mem(edev, rxq);
+
return 0;
err:
@@ -2233,6 +2598,8 @@ static void qede_init_fp(struct qede_dev *edev)
snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
edev->ndev->name, rss_id);
}
+
+ edev->gro_disable = !(edev->ndev->features & NETIF_F_GRO);
}
static int qede_set_real_num_queues(struct qede_dev *edev)
@@ -2466,11 +2833,12 @@ static int qede_stop_queues(struct qede_dev *edev)
static int qede_start_queues(struct qede_dev *edev)
{
int rc, tc, i;
- int vport_id = 0, drop_ttl0_flg = 1, vlan_removal_en = 1;
+ int vlan_removal_en = 1;
struct qed_dev *cdev = edev->cdev;
struct qed_update_vport_rss_params *rss_params = &edev->rss_params;
struct qed_update_vport_params vport_update_params;
struct qed_queue_start_common_params q_params;
+ struct qed_start_vport_params start = {0};
if (!edev->num_rss) {
DP_ERR(edev,
@@ -2478,10 +2846,13 @@ static int qede_start_queues(struct qede_dev *edev)
return -EINVAL;
}
- rc = edev->ops->vport_start(cdev, vport_id,
- edev->ndev->mtu,
- drop_ttl0_flg,
- vlan_removal_en);
+ start.gro_enable = !edev->gro_disable;
+ start.mtu = edev->ndev->mtu;
+ start.vport_id = 0;
+ start.drop_ttl0 = true;
+ start.remove_inner_vlan = vlan_removal_en;
+
+ rc = edev->ops->vport_start(cdev, &start);
if (rc) {
DP_ERR(edev, "Start V-PORT failed %d\n", rc);
@@ -2490,7 +2861,7 @@ static int qede_start_queues(struct qede_dev *edev)
DP_VERBOSE(edev, NETIF_MSG_IFUP,
"Start vport ramrod passed, vport_id = %d, MTU = %d, vlan_removal_en = %d\n",
- vport_id, edev->ndev->mtu + 0xe, vlan_removal_en);
+ start.vport_id, edev->ndev->mtu + 0xe, vlan_removal_en);
for_each_rss(i) {
struct qede_fastpath *fp = &edev->fp_array[i];
@@ -2555,7 +2926,7 @@ static int qede_start_queues(struct qede_dev *edev)
/* Prepare and send the vport enable */
memset(&vport_update_params, 0, sizeof(vport_update_params));
- vport_update_params.vport_id = vport_id;
+ vport_update_params.vport_id = start.vport_id;
vport_update_params.update_vport_active_flg = 1;
vport_update_params.vport_active_flg = 1;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 46bbea8e023c..55007f1e6bbc 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -566,6 +566,7 @@ struct qlcnic_adapter_stats {
u64 tx_dma_map_error;
u64 spurious_intr;
u64 mac_filter_limit_overrun;
+ u64 mbx_spurious_intr;
};
/*
@@ -1099,7 +1100,7 @@ struct qlcnic_mailbox {
unsigned long status;
spinlock_t queue_lock; /* Mailbox queue lock */
spinlock_t aen_lock; /* Mailbox response/AEN lock */
- atomic_t rsp_status;
+ u32 rsp_status;
u32 num_cmds;
};
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 37a731be7d39..f9640d5ce6ba 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -491,7 +491,7 @@ irqreturn_t qlcnic_83xx_clear_legacy_intr(struct qlcnic_adapter *adapter)
static inline void qlcnic_83xx_notify_mbx_response(struct qlcnic_mailbox *mbx)
{
- atomic_set(&mbx->rsp_status, QLC_83XX_MBX_RESPONSE_ARRIVED);
+ mbx->rsp_status = QLC_83XX_MBX_RESPONSE_ARRIVED;
complete(&mbx->completion);
}
@@ -510,7 +510,7 @@ static void qlcnic_83xx_poll_process_aen(struct qlcnic_adapter *adapter)
if (event & QLCNIC_MBX_ASYNC_EVENT) {
__qlcnic_83xx_process_aen(adapter);
} else {
- if (atomic_read(&mbx->rsp_status) != rsp_status)
+ if (mbx->rsp_status != rsp_status)
qlcnic_83xx_notify_mbx_response(mbx);
}
out:
@@ -1023,7 +1023,7 @@ static void qlcnic_83xx_process_aen(struct qlcnic_adapter *adapter)
if (event & QLCNIC_MBX_ASYNC_EVENT) {
__qlcnic_83xx_process_aen(adapter);
} else {
- if (atomic_read(&mbx->rsp_status) != rsp_status)
+ if (mbx->rsp_status != rsp_status)
qlcnic_83xx_notify_mbx_response(mbx);
}
}
@@ -2338,9 +2338,9 @@ static void qlcnic_83xx_handle_link_aen(struct qlcnic_adapter *adapter,
static irqreturn_t qlcnic_83xx_handle_aen(int irq, void *data)
{
+ u32 mask, resp, event, rsp_status = QLC_83XX_MBX_RESPONSE_ARRIVED;
struct qlcnic_adapter *adapter = data;
struct qlcnic_mailbox *mbx;
- u32 mask, resp, event;
unsigned long flags;
mbx = adapter->ahw->mailbox;
@@ -2350,10 +2350,14 @@ static irqreturn_t qlcnic_83xx_handle_aen(int irq, void *data)
goto out;
event = readl(QLCNIC_MBX_FW(adapter->ahw, 0));
- if (event & QLCNIC_MBX_ASYNC_EVENT)
+ if (event & QLCNIC_MBX_ASYNC_EVENT) {
__qlcnic_83xx_process_aen(adapter);
- else
- qlcnic_83xx_notify_mbx_response(mbx);
+ } else {
+ if (mbx->rsp_status != rsp_status)
+ qlcnic_83xx_notify_mbx_response(mbx);
+ else
+ adapter->stats.mbx_spurious_intr++;
+ }
out:
mask = QLCRDX(adapter->ahw, QLCNIC_DEF_INT_MASK);
@@ -4050,10 +4054,10 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work)
struct qlcnic_adapter *adapter = mbx->adapter;
const struct qlcnic_mbx_ops *mbx_ops = mbx->ops;
struct device *dev = &adapter->pdev->dev;
- atomic_t *rsp_status = &mbx->rsp_status;
struct list_head *head = &mbx->cmd_q;
struct qlcnic_hardware_context *ahw;
struct qlcnic_cmd_args *cmd = NULL;
+ unsigned long flags;
ahw = adapter->ahw;
@@ -4063,7 +4067,9 @@ static void qlcnic_83xx_mailbox_worker(struct work_struct *work)
return;
}
- atomic_set(rsp_status, QLC_83XX_MBX_RESPONSE_WAIT);
+ spin_lock_irqsave(&mbx->aen_lock, flags);
+ mbx->rsp_status = QLC_83XX_MBX_RESPONSE_WAIT;
+ spin_unlock_irqrestore(&mbx->aen_lock, flags);
spin_lock(&mbx->queue_lock);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 494e8105adee..0a2318cad34d 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -59,7 +59,8 @@ static const struct qlcnic_stats qlcnic_gstrings_stats[] = {
QLC_OFF(stats.mac_filter_limit_overrun)},
{"spurious intr", QLC_SIZEOF(stats.spurious_intr),
QLC_OFF(stats.spurious_intr)},
-
+ {"mbx spurious intr", QLC_SIZEOF(stats.mbx_spurious_intr),
+ QLC_OFF(stats.mbx_spurious_intr)},
};
static const char qlcnic_device_gstrings_stats[][ETH_GSTRING_LEN] = {
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 689a4a5c8dcf..1ef03939d25f 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -811,7 +811,7 @@ qcaspi_netdev_setup(struct net_device *dev)
dev->netdev_ops = &qcaspi_netdev_ops;
qcaspi_set_ethtool_ops(dev);
dev->watchdog_timeo = QCASPI_TX_TIMEOUT;
- dev->flags = IFF_MULTICAST;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->tx_queue_len = 100;
qca = netdev_priv(dev);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 537974cfd427..dd2cf3738b73 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4933,8 +4933,6 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST);
break;
case RTL_GIGA_MAC_VER_40:
- RTL_W32(RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
- break;
case RTL_GIGA_MAC_VER_41:
case RTL_GIGA_MAC_VER_42:
case RTL_GIGA_MAC_VER_43:
@@ -4943,8 +4941,6 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_46:
case RTL_GIGA_MAC_VER_47:
case RTL_GIGA_MAC_VER_48:
- RTL_W32(RxConfig, RX128_INT_EN | RX_DMA_BURST | RX_EARLY_OFF);
- break;
case RTL_GIGA_MAC_VER_49:
case RTL_GIGA_MAC_VER_50:
case RTL_GIGA_MAC_VER_51:
@@ -7730,10 +7726,13 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
struct rtl8169_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp->mmio_addr;
+ struct pci_dev *pdev = tp->pci_dev;
struct rtl8169_counters *counters = tp->counters;
unsigned int start;
- if (netif_running(dev))
+ pm_runtime_get_noresume(&pdev->dev);
+
+ if (netif_running(dev) && pm_runtime_active(&pdev->dev))
rtl8169_rx_missed(dev, ioaddr);
do {
@@ -7761,7 +7760,8 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
* Fetch additonal counter values missing in stats collected by driver
* from tally counters.
*/
- rtl8169_update_counters(dev);
+ if (pm_runtime_active(&pdev->dev))
+ rtl8169_update_counters(dev);
/*
* Subtract values fetched during initalization.
@@ -7774,6 +7774,8 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
stats->tx_aborted_errors = le16_to_cpu(counters->tx_aborted) -
le16_to_cpu(tp->tc_offset.tx_aborted);
+ pm_runtime_put_noidle(&pdev->dev);
+
return stats;
}
@@ -7853,6 +7855,10 @@ static int rtl8169_runtime_suspend(struct device *device)
rtl8169_net_suspend(dev);
+ /* Update counters before going runtime suspend */
+ rtl8169_rx_missed(dev, tp->mmio_addr);
+ rtl8169_update_counters(dev);
+
return 0;
}
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 88656ceb6e29..8f2c4fb4c724 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1708,7 +1708,6 @@ static int ravb_set_gti(struct net_device *ndev)
static int ravb_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
- const struct of_device_id *match;
struct ravb_private *priv;
enum ravb_chip_id chip_id;
struct net_device *ndev;
@@ -1740,8 +1739,7 @@ static int ravb_probe(struct platform_device *pdev)
ndev->base_addr = res->start;
ndev->dma = -1;
- match = of_match_device(of_match_ptr(ravb_match_table), &pdev->dev);
- chip_id = (enum ravb_chip_id)match->data;
+ chip_id = (enum ravb_chip_id)of_device_get_match_data(&pdev->dev);
if (chip_id == RCAR_GEN3)
irq = platform_get_irq_byname(pdev, "ch22");
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index a2767336b7c5..2e9a78164054 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -1127,11 +1127,8 @@ static void sh_eth_ring_format(struct net_device *ndev)
break;
sh_eth_set_receive_align(skb);
- /* RX descriptor */
- rxdesc = &mdp->rx_ring[i];
/* The size of the buffer is a multiple of 32 bytes. */
buf_len = ALIGN(mdp->rx_buf_sz, 32);
- rxdesc->len = cpu_to_le32(buf_len << 16);
dma_addr = dma_map_single(&ndev->dev, skb->data, buf_len,
DMA_FROM_DEVICE);
if (dma_mapping_error(&ndev->dev, dma_addr)) {
@@ -1139,6 +1136,10 @@ static void sh_eth_ring_format(struct net_device *ndev)
break;
}
mdp->rx_skbuff[i] = skb;
+
+ /* RX descriptor */
+ rxdesc = &mdp->rx_ring[i];
+ rxdesc->len = cpu_to_le32(buf_len << 16);
rxdesc->addr = cpu_to_le32(dma_addr);
rxdesc->status = cpu_to_le32(RD_RACT | RD_RFP);
@@ -1154,7 +1155,8 @@ static void sh_eth_ring_format(struct net_device *ndev)
mdp->dirty_rx = (u32) (i - mdp->num_rx_ring);
/* Mark the last entry as wrapping the ring. */
- rxdesc->status |= cpu_to_le32(RD_RDLE);
+ if (rxdesc)
+ rxdesc->status |= cpu_to_le32(RD_RDLE);
memset(mdp->tx_ring, 0, tx_ringsize);
@@ -3044,15 +3046,11 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
mdp->ether_link_active_low = pd->ether_link_active_low;
/* set cpu data */
- if (id) {
+ if (id)
mdp->cd = (struct sh_eth_cpu_data *)id->driver_data;
- } else {
- const struct of_device_id *match;
+ else
+ mdp->cd = (struct sh_eth_cpu_data *)of_device_get_match_data(&pdev->dev);
- match = of_match_device(of_match_ptr(sh_eth_match_table),
- &pdev->dev);
- mdp->cd = (struct sh_eth_cpu_data *)match->data;
- }
mdp->reg_offset = sh_eth_get_register_offset(mdp->cd->register_type);
if (!mdp->reg_offset) {
dev_err(&pdev->dev, "Unknown register type (%d)\n",
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 07218c360d86..0e758bcb26b0 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -188,6 +188,7 @@ struct ofdpa {
DECLARE_HASHTABLE(neigh_tbl, 16);
spinlock_t neigh_tbl_lock; /* for neigh tbl accesses */
u32 neigh_tbl_next_index;
+ unsigned long ageing_time;
};
struct ofdpa_port {
@@ -2105,7 +2106,7 @@ static void ofdpa_fdb_cleanup(unsigned long data)
struct ofdpa_port *ofdpa_port;
struct ofdpa_fdb_tbl_entry *entry;
struct hlist_node *tmp;
- unsigned long next_timer = jiffies + BR_MIN_AGEING_TIME;
+ unsigned long next_timer = jiffies + ofdpa->ageing_time;
unsigned long expires;
unsigned long lock_flags;
int flags = OFDPA_OP_FLAG_NOWAIT | OFDPA_OP_FLAG_REMOVE |
@@ -2492,6 +2493,8 @@ static int ofdpa_init(struct rocker *rocker)
(unsigned long) ofdpa);
mod_timer(&ofdpa->fdb_cleanup_timer, jiffies);
+ ofdpa->ageing_time = BR_DEFAULT_AGEING_TIME;
+
return 0;
}
@@ -2648,9 +2651,12 @@ ofdpa_port_attr_bridge_ageing_time_set(struct rocker_port *rocker_port,
struct switchdev_trans *trans)
{
struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
+ struct ofdpa *ofdpa = ofdpa_port->ofdpa;
if (!switchdev_trans_ph_prepare(trans)) {
ofdpa_port->ageing_time = clock_t_to_jiffies(ageing_time);
+ if (ofdpa_port->ageing_time < ofdpa->ageing_time)
+ ofdpa->ageing_time = ofdpa_port->ageing_time;
mod_timer(&ofdpa_port->ofdpa->fdb_cleanup_timer, jiffies);
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 0faf16336035..efb54f356a67 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -199,21 +199,12 @@ int stmmac_mdio_register(struct net_device *ndev)
struct stmmac_priv *priv = netdev_priv(ndev);
struct stmmac_mdio_bus_data *mdio_bus_data = priv->plat->mdio_bus_data;
int addr, found;
- struct device_node *mdio_node = NULL;
- struct device_node *child_node = NULL;
+ struct device_node *mdio_node = priv->plat->mdio_node;
if (!mdio_bus_data)
return 0;
if (IS_ENABLED(CONFIG_OF)) {
- for_each_child_of_node(priv->device->of_node, child_node) {
- if (of_device_is_compatible(child_node,
- "snps,dwmac-mdio")) {
- mdio_node = child_node;
- break;
- }
- }
-
if (mdio_node) {
netdev_dbg(ndev, "FOUND MDIO subnode\n");
} else {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 69ccf486d4fa..dcbd2a1601e8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -112,7 +112,7 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
if (!np)
return NULL;
- axi = kzalloc(sizeof(axi), GFP_KERNEL);
+ axi = kzalloc(sizeof(*axi), GFP_KERNEL);
if (!axi)
return ERR_PTR(-ENOMEM);
@@ -146,6 +146,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
struct device_node *np = pdev->dev.of_node;
struct plat_stmmacenet_data *plat;
struct stmmac_dma_cfg *dma_cfg;
+ struct device_node *child_node = NULL;
plat = devm_kzalloc(&pdev->dev, sizeof(*plat), GFP_KERNEL);
if (!plat)
@@ -176,13 +177,19 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
plat->phy_node = of_node_get(np);
}
+ for_each_child_of_node(np, child_node)
+ if (of_device_is_compatible(child_node, "snps,dwmac-mdio")) {
+ plat->mdio_node = child_node;
+ break;
+ }
+
/* "snps,phy-addr" is not a standard property. Mark it as deprecated
* and warn of its use. Remove this when phy node support is added.
*/
if (of_property_read_u32(np, "snps,phy-addr", &plat->phy_addr) == 0)
dev_warn(&pdev->dev, "snps,phy-addr property is deprecated\n");
- if ((plat->phy_node && !of_phy_is_fixed_link(np)) || plat->phy_bus_name)
+ if ((plat->phy_node && !of_phy_is_fixed_link(np)) || !plat->mdio_node)
plat->mdio_bus_data = NULL;
else
plat->mdio_bus_data =
diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
index fc8bbff2d7e3..af11ed1e0bcc 100644
--- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c
+++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
@@ -426,7 +426,7 @@
#define DWC_MMC_RXOCTETCOUNT_GB 0x0784
#define DWC_MMC_RXPACKETCOUNT_GB 0x0780
-static int debug = 3;
+static int debug = -1;
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "DWC_eth_qos debug level (0=none,...,16=all)");
@@ -650,6 +650,11 @@ struct net_local {
u32 mmc_tx_counters_mask;
struct dwceqos_flowcontrol flowcontrol;
+
+ /* Tracks the intermediate state of phy started but hardware
+ * init not finished yet.
+ */
+ bool phy_defer;
};
static void dwceqos_read_mmc_counters(struct net_local *lp, u32 rx_mask,
@@ -901,6 +906,9 @@ static void dwceqos_adjust_link(struct net_device *ndev)
struct phy_device *phydev = lp->phy_dev;
int status_change = 0;
+ if (lp->phy_defer)
+ return;
+
if (phydev->link) {
if ((lp->speed != phydev->speed) ||
(lp->duplex != phydev->duplex)) {
@@ -1113,7 +1121,7 @@ static int dwceqos_descriptor_init(struct net_local *lp)
/* Allocate DMA descriptors */
size = DWCEQOS_RX_DCNT * sizeof(struct dwceqos_dma_desc);
lp->rx_descs = dma_alloc_coherent(lp->ndev->dev.parent, size,
- &lp->rx_descs_addr, 0);
+ &lp->rx_descs_addr, GFP_KERNEL);
if (!lp->rx_descs)
goto err_out;
lp->rx_descs_tail_addr = lp->rx_descs_addr +
@@ -1121,7 +1129,7 @@ static int dwceqos_descriptor_init(struct net_local *lp)
size = DWCEQOS_TX_DCNT * sizeof(struct dwceqos_dma_desc);
lp->tx_descs = dma_alloc_coherent(lp->ndev->dev.parent, size,
- &lp->tx_descs_addr, 0);
+ &lp->tx_descs_addr, GFP_KERNEL);
if (!lp->tx_descs)
goto err_out;
lp->tx_descs_tail_addr = lp->tx_descs_addr +
@@ -1635,6 +1643,12 @@ static void dwceqos_init_hw(struct net_local *lp)
regval = dwceqos_read(lp, REG_DWCEQOS_MAC_CFG);
dwceqos_write(lp, REG_DWCEQOS_MAC_CFG,
regval | DWCEQOS_MAC_CFG_TE | DWCEQOS_MAC_CFG_RE);
+
+ lp->phy_defer = false;
+ mutex_lock(&lp->phy_dev->lock);
+ phy_read_status(lp->phy_dev);
+ dwceqos_adjust_link(lp->ndev);
+ mutex_unlock(&lp->phy_dev->lock);
}
static void dwceqos_tx_reclaim(unsigned long data)
@@ -1880,9 +1894,13 @@ static int dwceqos_open(struct net_device *ndev)
}
netdev_reset_queue(ndev);
+ /* The dwceqos reset state machine requires all phy clocks to complete,
+ * hence the unusual init order with phy_start first.
+ */
+ lp->phy_defer = true;
+ phy_start(lp->phy_dev);
dwceqos_init_hw(lp);
napi_enable(&lp->napi);
- phy_start(lp->phy_dev);
netif_start_queue(ndev);
tasklet_enable(&lp->tx_bdreclaim_tasklet);
@@ -1915,18 +1933,19 @@ static int dwceqos_stop(struct net_device *ndev)
{
struct net_local *lp = netdev_priv(ndev);
- phy_stop(lp->phy_dev);
-
tasklet_disable(&lp->tx_bdreclaim_tasklet);
- netif_stop_queue(ndev);
napi_disable(&lp->napi);
- dwceqos_drain_dma(lp);
+ /* Stop all tx before we drain the tx dma. */
+ netif_tx_lock_bh(lp->ndev);
+ netif_stop_queue(ndev);
+ netif_tx_unlock_bh(lp->ndev);
- netif_tx_lock(lp->ndev);
+ dwceqos_drain_dma(lp);
dwceqos_reset_hw(lp);
+ phy_stop(lp->phy_dev);
+
dwceqos_descriptor_free(lp);
- netif_tx_unlock(lp->ndev);
return 0;
}
@@ -2178,12 +2197,10 @@ static int dwceqos_start_xmit(struct sk_buff *skb, struct net_device *ndev)
((trans.initial_descriptor + trans.nr_descriptors) %
DWCEQOS_TX_DCNT));
- dwceqos_tx_finalize(skb, lp, &trans);
-
- netdev_sent_queue(ndev, skb->len);
-
spin_lock_bh(&lp->tx_lock);
lp->tx_free -= trans.nr_descriptors;
+ dwceqos_tx_finalize(skb, lp, &trans);
+ netdev_sent_queue(ndev, skb->len);
spin_unlock_bh(&lp->tx_lock);
ndev->trans_start = jiffies;
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index bc5da357e16d..192631a345df 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -68,6 +68,7 @@ struct geneve_dev {
u8 tos; /* TOS override */
union geneve_addr remote; /* IP address for link partner */
struct list_head next; /* geneve's per namespace list */
+ __be32 label; /* IPv6 flowlabel override */
__be16 dst_port;
bool collect_md;
struct gro_cells gro_cells;
@@ -462,8 +463,6 @@ static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
goto out;
}
- flush = 0;
-
for (p = *head; p; p = p->next) {
if (!NAPI_GRO_CB(p)->same_flow)
continue;
@@ -480,14 +479,13 @@ static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
rcu_read_lock();
ptype = gro_find_receive_by_type(type);
- if (!ptype) {
- flush = 1;
+ if (!ptype)
goto out_unlock;
- }
skb_gro_pull(skb, gh_len);
skb_gro_postpull_rcsum(skb, gh, gh_len);
pp = ptype->callbacks.gro_receive(head, skb);
+ flush = 0;
out_unlock:
rcu_read_unlock();
@@ -775,10 +773,10 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
struct flowi4 *fl4,
struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
struct dst_cache *dst_cache;
struct rtable *rt = NULL;
- bool use_cache = true;
__u8 tos;
memset(fl4, 0, sizeof(*fl4));
@@ -804,7 +802,6 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
dst_cache = &geneve->dst_cache;
}
- use_cache = use_cache && !skb->mark;
if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
if (rt)
@@ -832,11 +829,11 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
struct flowi6 *fl6,
struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs6 = geneve->sock6;
struct dst_entry *dst = NULL;
struct dst_cache *dst_cache;
- bool use_cache = true;
__u8 prio;
memset(fl6, 0, sizeof(*fl6));
@@ -847,6 +844,7 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
fl6->daddr = info->key.u.ipv6.dst;
fl6->saddr = info->key.u.ipv6.src;
fl6->flowi6_tos = RT_TOS(info->key.tos);
+ fl6->flowlabel = info->key.label;
dst_cache = &info->dst_cache;
} else {
prio = geneve->tos;
@@ -858,11 +856,11 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
}
fl6->flowi6_tos = RT_TOS(prio);
+ fl6->flowlabel = geneve->label;
fl6->daddr = geneve->remote.sin6.sin6_addr;
dst_cache = &geneve->dst_cache;
}
- use_cache = use_cache && !skb->mark;
if (use_cache) {
dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
if (dst)
@@ -940,7 +938,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
u8 vni[3];
tunnel_id_to_vni(key->tun_id, vni);
- if (key->tun_flags & TUNNEL_GENEVE_OPT)
+ if (info->options_len)
opts = ip_tunnel_info_opts(info);
if (key->tun_flags & TUNNEL_CSUM)
@@ -1000,6 +998,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct flowi6 fl6;
__u8 prio, ttl;
__be16 sport;
+ __be32 label;
bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
u32 flags = geneve->flags;
@@ -1027,7 +1026,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
u8 vni[3];
tunnel_id_to_vni(key->tun_id, vni);
- if (key->tun_flags & TUNNEL_GENEVE_OPT)
+ if (info->options_len)
opts = ip_tunnel_info_opts(info);
if (key->tun_flags & TUNNEL_CSUM)
@@ -1043,6 +1042,7 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
prio = ip_tunnel_ecn_encap(key->tos, iip, skb);
ttl = key->ttl;
+ label = info->key.label;
} else {
err = geneve6_build_skb(dst, skb, 0, geneve->vni,
0, NULL, flags, xnet);
@@ -1054,9 +1054,11 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (!ttl && ipv6_addr_is_multicast(&fl6.daddr))
ttl = 1;
ttl = ttl ? : ip6_dst_hoplimit(dst);
+ label = geneve->label;
}
+
udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
- &fl6.saddr, &fl6.daddr, prio, ttl,
+ &fl6.saddr, &fl6.daddr, prio, ttl, label,
sport, geneve->dst_port,
!!(flags & GENEVE_F_UDP_ZERO_CSUM6_TX));
return NETDEV_TX_OK;
@@ -1240,6 +1242,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
[IFLA_GENEVE_TTL] = { .type = NLA_U8 },
[IFLA_GENEVE_TOS] = { .type = NLA_U8 },
+ [IFLA_GENEVE_LABEL] = { .type = NLA_U32 },
[IFLA_GENEVE_PORT] = { .type = NLA_U16 },
[IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 },
@@ -1297,8 +1300,8 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
static int geneve_configure(struct net *net, struct net_device *dev,
union geneve_addr *remote,
- __u32 vni, __u8 ttl, __u8 tos, __be16 dst_port,
- bool metadata, u32 flags)
+ __u32 vni, __u8 ttl, __u8 tos, __be32 label,
+ __be16 dst_port, bool metadata, u32 flags)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *t, *geneve = netdev_priv(dev);
@@ -1308,7 +1311,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
if (!remote)
return -EINVAL;
if (metadata &&
- (remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl))
+ (remote->sa.sa_family != AF_UNSPEC || vni || tos || ttl || label))
return -EINVAL;
geneve->net = net;
@@ -1323,10 +1326,14 @@ static int geneve_configure(struct net *net, struct net_device *dev,
(remote->sa.sa_family == AF_INET6 &&
ipv6_addr_is_multicast(&remote->sin6.sin6_addr)))
return -EINVAL;
+ if (label && remote->sa.sa_family != AF_INET6)
+ return -EINVAL;
+
geneve->remote = *remote;
geneve->ttl = ttl;
geneve->tos = tos;
+ geneve->label = label;
geneve->dst_port = dst_port;
geneve->collect_md = metadata;
geneve->flags = flags;
@@ -1369,6 +1376,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
__u8 ttl = 0, tos = 0;
bool metadata = false;
union geneve_addr remote = geneve_remote_unspec;
+ __be32 label = 0;
__u32 vni = 0;
u32 flags = 0;
@@ -1405,6 +1413,10 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
if (data[IFLA_GENEVE_TOS])
tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
+ if (data[IFLA_GENEVE_LABEL])
+ label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
+ IPV6_FLOWLABEL_MASK;
+
if (data[IFLA_GENEVE_PORT])
dst_port = nla_get_be16(data[IFLA_GENEVE_PORT]);
@@ -1423,8 +1435,8 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
flags |= GENEVE_F_UDP_ZERO_CSUM6_RX;
- return geneve_configure(net, dev, &remote, vni, ttl, tos, dst_port,
- metadata, flags);
+ return geneve_configure(net, dev, &remote, vni, ttl, tos, label,
+ dst_port, metadata, flags);
}
static void geneve_dellink(struct net_device *dev, struct list_head *head)
@@ -1441,6 +1453,7 @@ static size_t geneve_get_size(const struct net_device *dev)
nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
+ nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
@@ -1471,7 +1484,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
}
if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) ||
- nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos))
+ nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos) ||
+ nla_put_be32(skb, IFLA_GENEVE_LABEL, geneve->label))
goto nla_put_failure;
if (nla_put_be16(skb, IFLA_GENEVE_PORT, geneve->dst_port))
@@ -1523,7 +1537,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
return dev;
err = geneve_configure(net, dev, &geneve_remote_unspec,
- 0, 0, 0, htons(dst_port), true,
+ 0, 0, 0, 0, htons(dst_port), true,
GENEVE_F_UDP_ZERO_CSUM6_RX);
if (err)
goto err;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index a37bbda37ffa..47d07c576a34 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1175,22 +1175,18 @@ int rndis_filter_device_add(struct hv_device *dev,
ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
/*
- * Wait for the host to send us the sub-channel offers.
+ * Set the number of sub-channels to be received.
*/
spin_lock_irqsave(&net_device->sc_lock, flags);
sc_delta = num_rss_qs - (net_device->num_chn - 1);
net_device->num_sc_offered -= sc_delta;
spin_unlock_irqrestore(&net_device->sc_lock, flags);
- while (net_device->num_sc_offered != 0) {
- t = wait_for_completion_timeout(&net_device->channel_init_wait, 10*HZ);
- if (t == 0)
- WARN(1, "Netvsc: Waiting for sub-channel processing");
- }
out:
if (ret) {
net_device->max_chn = 1;
net_device->num_chn = 1;
+ net_device->num_sc_offered = 0;
}
return 0; /* return 0 because primary channel can be used alone */
@@ -1204,6 +1200,17 @@ void rndis_filter_device_remove(struct hv_device *dev)
{
struct netvsc_device *net_dev = hv_get_drvdata(dev);
struct rndis_device *rndis_dev = net_dev->extension;
+ unsigned long t;
+
+ /* If not all subchannel offers are complete, wait for them until
+ * completion to avoid race.
+ */
+ while (net_dev->num_sc_offered > 0) {
+ t = wait_for_completion_timeout(&net_dev->channel_init_wait,
+ 10 * HZ);
+ if (t == 0)
+ WARN(1, "Netvsc: Waiting for sub-channel processing");
+ }
/* Halt and release the rndis device */
rndis_filter_halt_device(rndis_dev);
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index d636d051fac8..95394edd1ed5 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -760,6 +760,8 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
macvtap16_to_cpu(q, vnet_hdr.hdr_len) : GOODCOPY_LEN;
if (copylen > good_linear)
copylen = good_linear;
+ else if (copylen < ETH_HLEN)
+ copylen = ETH_HLEN;
linear = copylen;
i = *from;
iov_iter_advance(&i, copylen);
@@ -769,10 +771,11 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
if (!zerocopy) {
copylen = len;
- if (macvtap16_to_cpu(q, vnet_hdr.hdr_len) > good_linear)
+ linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len);
+ if (linear > good_linear)
linear = good_linear;
- else
- linear = macvtap16_to_cpu(q, vnet_hdr.hdr_len);
+ else if (linear < ETH_HLEN)
+ linear = ETH_HLEN;
}
skb = macvtap_alloc_skb(&q->sk, MACVTAP_RESERVE, copylen,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 48219c83fb00..4516c8a4fd82 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -297,6 +297,17 @@ static int kszphy_config_init(struct phy_device *phydev)
if (priv->led_mode >= 0)
kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
+ if (phy_interrupt_is_valid(phydev)) {
+ int ctl = phy_read(phydev, MII_BMCR);
+
+ if (ctl < 0)
+ return ctl;
+
+ ret = phy_write(phydev, MII_BMCR, ctl & ~BMCR_ANENABLE);
+ if (ret < 0)
+ return ret;
+ }
+
return 0;
}
@@ -636,6 +647,21 @@ static void kszphy_get_stats(struct phy_device *phydev,
data[i] = kszphy_get_stat(phydev, i);
}
+static int kszphy_resume(struct phy_device *phydev)
+{
+ int value;
+
+ mutex_lock(&phydev->lock);
+
+ value = phy_read(phydev, MII_BMCR);
+ phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
+
+ kszphy_config_intr(phydev);
+ mutex_unlock(&phydev->lock);
+
+ return 0;
+}
+
static int kszphy_probe(struct phy_device *phydev)
{
const struct kszphy_type *type = phydev->drv->driver_data;
@@ -845,7 +871,7 @@ static struct phy_driver ksphy_driver[] = {
.get_strings = kszphy_get_strings,
.get_stats = kszphy_get_stats,
.suspend = genphy_suspend,
- .resume = genphy_resume,
+ .resume = kszphy_resume,
}, {
.phy_id = PHY_ID_KSZ8061,
.name = "Micrel KSZ8061",
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 04f4eb34fa80..931836e09a6b 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -443,9 +443,14 @@ static ssize_t ppp_read(struct file *file, char __user *buf,
* network traffic (demand mode).
*/
struct ppp *ppp = PF_TO_PPP(pf);
+
+ ppp_recv_lock(ppp);
if (ppp->n_channels == 0 &&
- (ppp->flags & SC_LOOP_TRAFFIC) == 0)
+ (ppp->flags & SC_LOOP_TRAFFIC) == 0) {
+ ppp_recv_unlock(ppp);
break;
+ }
+ ppp_recv_unlock(ppp);
}
ret = -EAGAIN;
if (file->f_flags & O_NONBLOCK)
@@ -532,9 +537,12 @@ static unsigned int ppp_poll(struct file *file, poll_table *wait)
else if (pf->kind == INTERFACE) {
/* see comment in ppp_read */
struct ppp *ppp = PF_TO_PPP(pf);
+
+ ppp_recv_lock(ppp);
if (ppp->n_channels == 0 &&
(ppp->flags & SC_LOOP_TRAFFIC) == 0)
mask |= POLLIN | POLLRDNORM;
+ ppp_recv_unlock(ppp);
}
return mask;
@@ -2810,6 +2818,7 @@ static struct ppp *ppp_create_interface(struct net *net, int unit,
out2:
mutex_unlock(&pn->all_ppp_mutex);
+ rtnl_unlock();
free_netdev(dev);
out1:
*retp = ret;
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index 224e7d82de6d..cf77f2dffa69 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -134,7 +134,6 @@ static void ax88172a_remove_mdio(struct usbnet *dev)
netdev_info(dev->net, "deregistering mdio bus %s\n", priv->mdio->id);
mdiobus_unregister(priv->mdio);
- kfree(priv->mdio->irq);
mdiobus_free(priv->mdio);
}
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index dc0212c3cc28..86ba30ba35e8 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -837,7 +837,11 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
iface_no = ctx->data->cur_altsetting->desc.bInterfaceNumber;
- /* reset data interface */
+ /* Reset data interface. Some devices will not reset properly
+ * unless they are configured first. Toggle the altsetting to
+ * force a reset
+ */
+ usb_set_interface(dev->udev, iface_no, data_altsetting);
temp = usb_set_interface(dev->udev, iface_no, 0);
if (temp) {
dev_dbg(&intf->dev, "set interface failed\n");
@@ -984,8 +988,6 @@ EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting);
static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
{
- int ret;
-
/* MBIM backwards compatible function? */
if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM)
return -ENODEV;
@@ -994,16 +996,7 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
* Additionally, generic NCM devices are assumed to accept arbitrarily
* placed NDP.
*/
- ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
-
- /*
- * We should get an event when network connection is "connected" or
- * "disconnected". Set network connection in "disconnected" state
- * (carrier is OFF) during attach, so the IP network stack does not
- * start IPv6 negotiation and more.
- */
- usbnet_link_change(dev, 0, 0);
- return ret;
+ return cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0);
}
static void cdc_ncm_align_tail(struct sk_buff *skb, size_t modulus, size_t remainder, size_t max)
@@ -1586,7 +1579,8 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
static const struct driver_info cdc_ncm_info = {
.description = "CDC NCM",
- .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET,
+ .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
+ | FLAG_LINK_INTR,
.bind = cdc_ncm_bind,
.unbind = cdc_ncm_unbind,
.manage_power = usbnet_manage_power,
@@ -1599,7 +1593,7 @@ static const struct driver_info cdc_ncm_info = {
static const struct driver_info wwan_info = {
.description = "Mobile Broadband Network Device",
.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
- | FLAG_WWAN,
+ | FLAG_LINK_INTR | FLAG_WWAN,
.bind = cdc_ncm_bind,
.unbind = cdc_ncm_unbind,
.manage_power = usbnet_manage_power,
@@ -1612,7 +1606,7 @@ static const struct driver_info wwan_info = {
static const struct driver_info wwan_noarp_info = {
.description = "Mobile Broadband Network Device (NO ARP)",
.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
- | FLAG_WWAN | FLAG_NOARP,
+ | FLAG_LINK_INTR | FLAG_WWAN | FLAG_NOARP,
.bind = cdc_ncm_bind,
.unbind = cdc_ncm_unbind,
.manage_power = usbnet_manage_power,
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 570deef53f74..a3a4ccf7cf52 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -861,8 +861,10 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x1199, 0x9056, 8)}, /* Sierra Wireless Modem */
{QMI_FIXED_INTF(0x1199, 0x9057, 8)},
{QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */
- {QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx/EM74xx */
- {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx/EM74xx */
+ {QMI_FIXED_INTF(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx */
+ {QMI_FIXED_INTF(0x1199, 0x9071, 10)}, /* Sierra Wireless MC74xx */
+ {QMI_FIXED_INTF(0x1199, 0x9079, 8)}, /* Sierra Wireless EM74xx */
+ {QMI_FIXED_INTF(0x1199, 0x9079, 10)}, /* Sierra Wireless EM74xx */
{QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */
{QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */
{QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */
@@ -885,6 +887,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x413c, 0x81a8, 8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */
{QMI_FIXED_INTF(0x413c, 0x81a9, 8)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
{QMI_FIXED_INTF(0x413c, 0x81b1, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
+ {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
{QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */
{QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */
{QMI_FIXED_INTF(0x1e0e, 0x9001, 5)}, /* SIMCom 7230E */
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 0b0ba7ef14e4..10798128c03f 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1769,6 +1769,13 @@ out3:
if (info->unbind)
info->unbind (dev, udev);
out1:
+ /* subdrivers must undo all they did in bind() if they
+ * fail it, but we may fail later and a deferred kevent
+ * may trigger an error resubmitting itself and, worse,
+ * schedule a timer. So we kill it all just in case.
+ */
+ cancel_work_sync(&dev->kevent);
+ del_timer_sync(&dev->delay);
free_netdev(net);
out:
return status;
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 0cbf520cea77..fc895d0e85d9 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -814,7 +814,7 @@ vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
/*
- * parse and copy relevant protocol headers:
+ * parse relevant protocol headers:
* For a tso pkt, relevant headers are L2/3/4 including options
* For a pkt requesting csum offloading, they are L2/3 and may include L4
* if it's a TCP/UDP pkt
@@ -827,15 +827,14 @@ vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
* Other effects:
* 1. related *ctx fields are updated.
* 2. ctx->copy_size is # of bytes copied
- * 3. the portion copied is guaranteed to be in the linear part
+ * 3. the portion to be copied is guaranteed to be in the linear part
*
*/
static int
-vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
- struct vmxnet3_tx_ctx *ctx,
- struct vmxnet3_adapter *adapter)
+vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
+ struct vmxnet3_tx_ctx *ctx,
+ struct vmxnet3_adapter *adapter)
{
- struct Vmxnet3_TxDataDesc *tdd;
u8 protocol = 0;
if (ctx->mss) { /* TSO */
@@ -892,16 +891,34 @@ vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
return 0;
}
+ return 1;
+err:
+ return -1;
+}
+
+/*
+ * copy relevant protocol headers to the transmit ring:
+ * For a tso pkt, relevant headers are L2/3/4 including options
+ * For a pkt requesting csum offloading, they are L2/3 and may include L4
+ * if it's a TCP/UDP pkt
+ *
+ *
+ * Note that this requires that vmxnet3_parse_hdr be called first to set the
+ * appropriate bits in ctx first
+ */
+static void
+vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
+ struct vmxnet3_tx_ctx *ctx,
+ struct vmxnet3_adapter *adapter)
+{
+ struct Vmxnet3_TxDataDesc *tdd;
+
tdd = tq->data_ring.base + tq->tx_ring.next2fill;
memcpy(tdd->data, skb->data, ctx->copy_size);
netdev_dbg(adapter->netdev,
"copy %u bytes to dataRing[%u]\n",
ctx->copy_size, tq->tx_ring.next2fill);
- return 1;
-
-err:
- return -1;
}
@@ -998,22 +1015,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
}
}
- spin_lock_irqsave(&tq->tx_lock, flags);
-
- if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
- tq->stats.tx_ring_full++;
- netdev_dbg(adapter->netdev,
- "tx queue stopped on %s, next2comp %u"
- " next2fill %u\n", adapter->netdev->name,
- tq->tx_ring.next2comp, tq->tx_ring.next2fill);
-
- vmxnet3_tq_stop(tq, adapter);
- spin_unlock_irqrestore(&tq->tx_lock, flags);
- return NETDEV_TX_BUSY;
- }
-
-
- ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
+ ret = vmxnet3_parse_hdr(skb, tq, &ctx, adapter);
if (ret >= 0) {
BUG_ON(ret <= 0 && ctx.copy_size != 0);
/* hdrs parsed, check against other limits */
@@ -1033,9 +1035,26 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
}
} else {
tq->stats.drop_hdr_inspect_err++;
- goto unlock_drop_pkt;
+ goto drop_pkt;
}
+ spin_lock_irqsave(&tq->tx_lock, flags);
+
+ if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
+ tq->stats.tx_ring_full++;
+ netdev_dbg(adapter->netdev,
+ "tx queue stopped on %s, next2comp %u"
+ " next2fill %u\n", adapter->netdev->name,
+ tq->tx_ring.next2comp, tq->tx_ring.next2fill);
+
+ vmxnet3_tq_stop(tq, adapter);
+ spin_unlock_irqrestore(&tq->tx_lock, flags);
+ return NETDEV_TX_BUSY;
+ }
+
+
+ vmxnet3_copy_hdr(skb, tq, &ctx, adapter);
+
/* fill tx descs related to addr & len */
if (vmxnet3_map_pkt(skb, &ctx, tq, adapter->pdev, adapter))
goto unlock_drop_pkt;
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 9ce088bb28ab..9a9fabb900c1 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -103,20 +103,23 @@ static struct dst_ops vrf_dst_ops = {
#if IS_ENABLED(CONFIG_IPV6)
static bool check_ipv6_frame(const struct sk_buff *skb)
{
- const struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->data;
- size_t hlen = sizeof(*ipv6h);
+ const struct ipv6hdr *ipv6h;
+ struct ipv6hdr _ipv6h;
bool rc = true;
- if (skb->len < hlen)
+ ipv6h = skb_header_pointer(skb, 0, sizeof(_ipv6h), &_ipv6h);
+ if (!ipv6h)
goto out;
if (ipv6h->nexthdr == NEXTHDR_ICMP) {
const struct icmp6hdr *icmph;
+ struct icmp6hdr _icmph;
- if (skb->len < hlen + sizeof(*icmph))
+ icmph = skb_header_pointer(skb, sizeof(_ipv6h),
+ sizeof(_icmph), &_icmph);
+ if (!icmph)
goto out;
- icmph = (struct icmp6hdr *)(skb->data + sizeof(*ipv6h));
switch (icmph->icmp6_type) {
case NDISC_ROUTER_SOLICITATION:
case NDISC_ROUTER_ADVERTISEMENT:
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 8ca243d93b78..800106a7246c 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -591,8 +591,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
- flush = 0;
-
for (p = *head; p; p = p->next) {
if (!NAPI_GRO_CB(p)->same_flow)
continue;
@@ -606,6 +604,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
}
pp = eth_gro_receive(head, skb);
+ flush = 0;
out:
skb_gro_remcsum_cleanup(skb, &grc);
@@ -948,8 +947,10 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
NLM_F_MULTI, rd);
- if (err < 0)
+ if (err < 0) {
+ cb->args[1] = err;
goto out;
+ }
skip:
++idx;
}
@@ -1176,9 +1177,10 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
md->gbp = ntohs(gbp->policy_id);
tun_dst = (struct metadata_dst *)skb_dst(skb);
- if (tun_dst)
+ if (tun_dst) {
tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
-
+ tun_dst->u.tun_info.options_len = sizeof(*md);
+ }
if (gbp->dont_learn)
md->gbp |= VXLAN_GBP_DONT_LEARN;
@@ -1753,17 +1755,15 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr,
struct dst_cache *dst_cache,
- struct ip_tunnel_info *info)
+ const struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct rtable *rt = NULL;
- bool use_cache = false;
struct flowi4 fl4;
- /* when the ip_tunnel_info is availble, the tos used for lookup is
- * packet independent, so we can use the cache
- */
- if (!skb->mark && (!tos || info)) {
- use_cache = true;
+ if (tos && !info)
+ use_cache = false;
+ if (use_cache) {
rt = dst_cache_get_ip4(dst_cache, saddr);
if (rt)
return rt;
@@ -1788,16 +1788,21 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
- struct sk_buff *skb, int oif,
+ struct sk_buff *skb, int oif, u8 tos,
+ __be32 label,
const struct in6_addr *daddr,
struct in6_addr *saddr,
- struct dst_cache *dst_cache)
+ struct dst_cache *dst_cache,
+ const struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct dst_entry *ndst;
struct flowi6 fl6;
int err;
- if (!skb->mark) {
+ if (tos && !info)
+ use_cache = false;
+ if (use_cache) {
ndst = dst_cache_get_ip6(dst_cache, saddr);
if (ndst)
return ndst;
@@ -1805,8 +1810,10 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
+ fl6.flowi6_tos = RT_TOS(tos);
fl6.daddr = *daddr;
fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
+ fl6.flowlabel = label;
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = IPPROTO_UDP;
@@ -1817,7 +1824,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
return ERR_PTR(err);
*saddr = fl6.saddr;
- if (!skb->mark)
+ if (use_cache)
dst_cache_set_ip6(dst_cache, ndst, saddr);
return ndst;
}
@@ -1882,7 +1889,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be16 src_port = 0, dst_port;
- __be32 vni;
+ __be32 vni, label;
__be16 df = 0;
__u8 tos, ttl;
int err;
@@ -1933,12 +1940,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (tos == 1)
tos = ip_tunnel_get_dsfield(old_iph, skb);
+ label = vxlan->cfg.label;
src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
vxlan->cfg.port_max, true);
if (info) {
ttl = info->key.ttl;
tos = info->key.tos;
+ label = info->key.label;
udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
if (info->options_len)
@@ -2013,9 +2022,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
sk = vxlan->vn6_sock->sock->sk;
ndst = vxlan6_get_route(vxlan, skb,
- rdst ? rdst->remote_ifindex : 0,
- &dst->sin6.sin6_addr, &saddr,
- dst_cache);
+ rdst ? rdst->remote_ifindex : 0, tos,
+ label, &dst->sin6.sin6_addr, &saddr,
+ dst_cache, info);
if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
&dst->sin6.sin6_addr);
@@ -2050,6 +2059,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (!info)
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
+ tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
skb_scrub_packet(skb, xnet);
err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
@@ -2059,8 +2069,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
return;
}
udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
- &saddr, &dst->sin6.sin6_addr,
- 0, ttl, src_port, dst_port, !udp_sum);
+ &saddr, &dst->sin6.sin6_addr, tos, ttl,
+ label, src_port, dst_port, !udp_sum);
#endif
}
@@ -2382,9 +2392,9 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
if (!vxlan->vn6_sock)
return -EINVAL;
- ndst = vxlan6_get_route(vxlan, skb, 0,
- &info->key.u.ipv6.dst,
- &info->key.u.ipv6.src, NULL);
+ ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
+ info->key.label, &info->key.u.ipv6.dst,
+ &info->key.u.ipv6.src, NULL, info);
if (IS_ERR(ndst))
return PTR_ERR(ndst);
dst_release(ndst);
@@ -2498,6 +2508,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_TOS] = { .type = NLA_U8 },
[IFLA_VXLAN_TTL] = { .type = NLA_U8 },
+ [IFLA_VXLAN_LABEL] = { .type = NLA_U32 },
[IFLA_VXLAN_LEARNING] = { .type = NLA_U8 },
[IFLA_VXLAN_AGEING] = { .type = NLA_U32 },
[IFLA_VXLAN_LIMIT] = { .type = NLA_U32 },
@@ -2732,6 +2743,11 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
vxlan->flags |= VXLAN_F_IPV6;
}
+ if (conf->label && !use_ipv6) {
+ pr_info("label only supported in use with IPv6\n");
+ return -EINVAL;
+ }
+
if (conf->remote_ifindex) {
lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
dst->remote_ifindex = conf->remote_ifindex;
@@ -2880,6 +2896,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
if (data[IFLA_VXLAN_TTL])
conf.ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
+ if (data[IFLA_VXLAN_LABEL])
+ conf.label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
+ IPV6_FLOWLABEL_MASK;
+
if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING]))
conf.flags |= VXLAN_F_LEARN;
@@ -2983,6 +3003,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
+ nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */
@@ -3046,6 +3067,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
+ nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
!!(vxlan->flags & VXLAN_F_LEARN)) ||
nla_put_u8(skb, IFLA_VXLAN_PROXY,
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 7e2c43f701bc..5d28e9405f32 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -382,18 +382,18 @@ static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
[ND_CMD_ARS_CAP] = {
.in_num = 2,
.in_sizes = { 8, 8, },
- .out_num = 2,
- .out_sizes = { 4, 4, },
+ .out_num = 4,
+ .out_sizes = { 4, 4, 4, 4, },
},
[ND_CMD_ARS_START] = {
- .in_num = 4,
- .in_sizes = { 8, 8, 2, 6, },
- .out_num = 1,
- .out_sizes = { 4, },
+ .in_num = 5,
+ .in_sizes = { 8, 8, 2, 1, 5, },
+ .out_num = 2,
+ .out_sizes = { 4, 4, },
},
[ND_CMD_ARS_STATUS] = {
- .out_num = 2,
- .out_sizes = { 4, UINT_MAX, },
+ .out_num = 3,
+ .out_sizes = { 4, 4, UINT_MAX, },
},
};
@@ -442,8 +442,8 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
return in_field[1];
else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
return out_field[1];
- else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1)
- return ND_CMD_ARS_STATUS_MAX;
+ else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2)
+ return out_field[1] - 8;
return UINT_MAX;
}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 7edf31671dab..8d0b54670184 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -41,7 +41,7 @@ struct pmem_device {
phys_addr_t phys_addr;
/* when non-zero this device is hosting a 'pfn' instance */
phys_addr_t data_offset;
- unsigned long pfn_flags;
+ u64 pfn_flags;
void __pmem *virt_addr;
size_t size;
struct badblocks bb;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3cd921e6121e..03c46412fff4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -55,8 +55,9 @@ static void nvme_free_ns(struct kref *kref)
ns->disk->private_data = NULL;
spin_unlock(&dev_list_lock);
- nvme_put_ctrl(ns->ctrl);
put_disk(ns->disk);
+ ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
+ nvme_put_ctrl(ns->ctrl);
kfree(ns);
}
@@ -183,7 +184,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd,
goto out_unmap;
}
- if (meta_buffer) {
+ if (meta_buffer && meta_len) {
struct bio_integrity_payload *bip;
meta = kmalloc(meta_len, GFP_KERNEL);
@@ -373,6 +374,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
if (copy_from_user(&io, uio, sizeof(io)))
return -EFAULT;
+ if (io.flags)
+ return -EINVAL;
switch (io.opcode) {
case nvme_cmd_write:
@@ -424,6 +427,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
+ if (cmd.flags)
+ return -EINVAL;
memset(&c, 0, sizeof(c));
c.common.opcode = cmd.opcode;
@@ -556,6 +561,10 @@ static int nvme_revalidate_disk(struct gendisk *disk)
u16 old_ms;
unsigned short bs;
+ if (test_bit(NVME_NS_DEAD, &ns->flags)) {
+ set_capacity(disk, 0);
+ return -ENODEV;
+ }
if (nvme_identify_ns(ns->ctrl, ns->ns_id, &id)) {
dev_warn(ns->ctrl->dev, "%s: Identify failure nvme%dn%d\n",
__func__, ns->ctrl->instance, ns->ns_id);
@@ -831,6 +840,23 @@ int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl)
return ret;
}
+static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
+ struct request_queue *q)
+{
+ if (ctrl->max_hw_sectors) {
+ u32 max_segments =
+ (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
+
+ blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
+ blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
+ }
+ if (ctrl->stripe_size)
+ blk_queue_chunk_sectors(q, ctrl->stripe_size >> 9);
+ if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
+ blk_queue_flush(q, REQ_FLUSH | REQ_FUA);
+ blk_queue_virt_boundary(q, ctrl->page_size - 1);
+}
+
/*
* Initialize the cached copies of the Identify data and various controller
* register in our nvme_ctrl structure. This should be called as soon as
@@ -888,6 +914,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
}
}
+ nvme_set_queue_limits(ctrl, ctrl->admin_q);
+
kfree(id);
return 0;
}
@@ -1118,9 +1146,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (!ns)
return;
+ ns->instance = ida_simple_get(&ctrl->ns_ida, 1, 0, GFP_KERNEL);
+ if (ns->instance < 0)
+ goto out_free_ns;
+
ns->queue = blk_mq_init_queue(ctrl->tagset);
if (IS_ERR(ns->queue))
- goto out_free_ns;
+ goto out_release_instance;
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
ns->queue->queuedata = ns;
ns->ctrl = ctrl;
@@ -1134,17 +1166,9 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ns->disk = disk;
ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */
+
blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
- if (ctrl->max_hw_sectors) {
- blk_queue_max_hw_sectors(ns->queue, ctrl->max_hw_sectors);
- blk_queue_max_segments(ns->queue,
- (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1);
- }
- if (ctrl->stripe_size)
- blk_queue_chunk_sectors(ns->queue, ctrl->stripe_size >> 9);
- if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
- blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
- blk_queue_virt_boundary(ns->queue, ctrl->page_size - 1);
+ nvme_set_queue_limits(ctrl, ns->queue);
disk->major = nvme_major;
disk->first_minor = 0;
@@ -1153,7 +1177,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
disk->queue = ns->queue;
disk->driverfs_dev = ctrl->device;
disk->flags = GENHD_FL_EXT_DEVT;
- sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, nsid);
+ sprintf(disk->disk_name, "nvme%dn%d", ctrl->instance, ns->instance);
if (nvme_revalidate_disk(ns->disk))
goto out_free_disk;
@@ -1173,40 +1197,29 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
kfree(disk);
out_free_queue:
blk_cleanup_queue(ns->queue);
+ out_release_instance:
+ ida_simple_remove(&ctrl->ns_ida, ns->instance);
out_free_ns:
kfree(ns);
}
static void nvme_ns_remove(struct nvme_ns *ns)
{
- bool kill = nvme_io_incapable(ns->ctrl) &&
- !blk_queue_dying(ns->queue);
-
- lockdep_assert_held(&ns->ctrl->namespaces_mutex);
-
- if (kill) {
- blk_set_queue_dying(ns->queue);
+ if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
+ return;
- /*
- * The controller was shutdown first if we got here through
- * device removal. The shutdown may requeue outstanding
- * requests. These need to be aborted immediately so
- * del_gendisk doesn't block indefinitely for their completion.
- */
- blk_mq_abort_requeue_list(ns->queue);
- }
if (ns->disk->flags & GENHD_FL_UP) {
if (blk_get_integrity(ns->disk))
blk_integrity_unregister(ns->disk);
sysfs_remove_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_attr_group);
del_gendisk(ns->disk);
- }
- if (kill || !blk_queue_dying(ns->queue)) {
blk_mq_abort_requeue_list(ns->queue);
blk_cleanup_queue(ns->queue);
}
+ mutex_lock(&ns->ctrl->namespaces_mutex);
list_del_init(&ns->list);
+ mutex_unlock(&ns->ctrl->namespaces_mutex);
nvme_put_ns(ns);
}
@@ -1300,10 +1313,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns, *next;
- mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list)
nvme_ns_remove(ns);
- mutex_unlock(&ctrl->namespaces_mutex);
}
static DEFINE_IDA(nvme_instance_ida);
@@ -1350,6 +1361,7 @@ static void nvme_free_ctrl(struct kref *kref)
put_device(ctrl->device);
nvme_release_instance(ctrl);
+ ida_destroy(&ctrl->ns_ida);
ctrl->ops->free_ctrl(ctrl);
}
@@ -1390,6 +1402,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
}
get_device(ctrl->device);
dev_set_drvdata(ctrl->device, ctrl);
+ ida_init(&ctrl->ns_ida);
spin_lock(&dev_list_lock);
list_add_tail(&ctrl->node, &nvme_ctrl_list);
@@ -1402,6 +1415,38 @@ out:
return ret;
}
+/**
+ * nvme_kill_queues(): Ends all namespace queues
+ * @ctrl: the dead controller that needs to end
+ *
+ * Call this function when the driver determines it is unable to get the
+ * controller in a state capable of servicing IO.
+ */
+void nvme_kill_queues(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->namespaces_mutex);
+ list_for_each_entry(ns, &ctrl->namespaces, list) {
+ if (!kref_get_unless_zero(&ns->kref))
+ continue;
+
+ /*
+ * Revalidating a dead namespace sets capacity to 0. This will
+ * end buffered writers dirtying pages that can't be synced.
+ */
+ if (!test_and_set_bit(NVME_NS_DEAD, &ns->flags))
+ revalidate_disk(ns->disk);
+
+ blk_set_queue_dying(ns->queue);
+ blk_mq_abort_requeue_list(ns->queue);
+ blk_mq_start_stopped_hw_queues(ns->queue, true);
+
+ nvme_put_ns(ns);
+ }
+ mutex_unlock(&ctrl->namespaces_mutex);
+}
+
void nvme_stop_queues(struct nvme_ctrl *ctrl)
{
struct nvme_ns *ns;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9664d07d807d..fb15ba5f5d19 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -72,6 +72,7 @@ struct nvme_ctrl {
struct mutex namespaces_mutex;
struct device *device; /* char device */
struct list_head node;
+ struct ida ns_ida;
char name[12];
char serial[20];
@@ -102,6 +103,7 @@ struct nvme_ns {
struct request_queue *queue;
struct gendisk *disk;
struct kref kref;
+ int instance;
u8 eui[8];
u8 uuid[16];
@@ -112,6 +114,11 @@ struct nvme_ns {
bool ext;
u8 pi_type;
int type;
+ unsigned long flags;
+
+#define NVME_NS_REMOVING 0
+#define NVME_NS_DEAD 1
+
u64 mode_select_num_blocks;
u32 mode_select_block_len;
};
@@ -240,6 +247,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
void nvme_stop_queues(struct nvme_ctrl *ctrl);
void nvme_start_queues(struct nvme_ctrl *ctrl);
+void nvme_kill_queues(struct nvme_ctrl *ctrl);
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a128672472ec..680f5780750c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -86,7 +86,6 @@ struct nvme_queue;
static int nvme_reset(struct nvme_dev *dev);
static void nvme_process_cq(struct nvme_queue *nvmeq);
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev);
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
/*
@@ -120,6 +119,7 @@ struct nvme_dev {
unsigned long flags;
#define NVME_CTRL_RESETTING 0
+#define NVME_CTRL_REMOVING 1
struct nvme_ctrl ctrl;
struct completion ioq_wait;
@@ -286,6 +286,17 @@ static int nvme_init_request(void *data, struct request *req,
return 0;
}
+static void nvme_queue_scan(struct nvme_dev *dev)
+{
+ /*
+ * Do not queue new scan work when a controller is reset during
+ * removal.
+ */
+ if (test_bit(NVME_CTRL_REMOVING, &dev->flags))
+ return;
+ queue_work(nvme_workq, &dev->scan_work);
+}
+
static void nvme_complete_async_event(struct nvme_dev *dev,
struct nvme_completion *cqe)
{
@@ -300,7 +311,7 @@ static void nvme_complete_async_event(struct nvme_dev *dev,
switch (result & 0xff07) {
case NVME_AER_NOTICE_NS_CHANGED:
dev_info(dev->dev, "rescanning\n");
- queue_work(nvme_workq, &dev->scan_work);
+ nvme_queue_scan(dev);
default:
dev_warn(dev->dev, "async event result %08x\n", result);
}
@@ -679,7 +690,10 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
spin_lock_irq(&nvmeq->q_lock);
if (unlikely(nvmeq->cq_vector < 0)) {
- ret = BLK_MQ_RQ_QUEUE_BUSY;
+ if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
+ ret = BLK_MQ_RQ_QUEUE_BUSY;
+ else
+ ret = BLK_MQ_RQ_QUEUE_ERROR;
spin_unlock_irq(&nvmeq->q_lock);
goto out;
}
@@ -1250,6 +1264,12 @@ static struct blk_mq_ops nvme_mq_ops = {
static void nvme_dev_remove_admin(struct nvme_dev *dev)
{
if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
+ /*
+ * If the controller was reset during removal, it's possible
+ * user requests may be waiting on a stopped queue. Start the
+ * queue to flush these to completion.
+ */
+ blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
blk_cleanup_queue(dev->ctrl.admin_q);
blk_mq_free_tag_set(&dev->admin_tagset);
}
@@ -1690,14 +1710,14 @@ static int nvme_dev_add(struct nvme_dev *dev)
return 0;
dev->ctrl.tagset = &dev->tagset;
}
- queue_work(nvme_workq, &dev->scan_work);
+ nvme_queue_scan(dev);
return 0;
}
-static int nvme_dev_map(struct nvme_dev *dev)
+static int nvme_pci_enable(struct nvme_dev *dev)
{
u64 cap;
- int bars, result = -ENOMEM;
+ int result = -ENOMEM;
struct pci_dev *pdev = to_pci_dev(dev->dev);
if (pci_enable_device_mem(pdev))
@@ -1705,24 +1725,14 @@ static int nvme_dev_map(struct nvme_dev *dev)
dev->entry[0].vector = pdev->irq;
pci_set_master(pdev);
- bars = pci_select_bars(pdev, IORESOURCE_MEM);
- if (!bars)
- goto disable_pci;
-
- if (pci_request_selected_regions(pdev, bars, "nvme"))
- goto disable_pci;
if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)) &&
dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(32)))
goto disable;
- dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
- if (!dev->bar)
- goto disable;
-
if (readl(dev->bar + NVME_REG_CSTS) == -1) {
result = -ENODEV;
- goto unmap;
+ goto disable;
}
/*
@@ -1732,7 +1742,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (!pdev->irq) {
result = pci_enable_msix(pdev, dev->entry, 1);
if (result < 0)
- goto unmap;
+ goto disable;
}
cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
@@ -1759,18 +1769,20 @@ static int nvme_dev_map(struct nvme_dev *dev)
pci_save_state(pdev);
return 0;
- unmap:
- iounmap(dev->bar);
- dev->bar = NULL;
disable:
- pci_release_regions(pdev);
- disable_pci:
pci_disable_device(pdev);
return result;
}
static void nvme_dev_unmap(struct nvme_dev *dev)
{
+ if (dev->bar)
+ iounmap(dev->bar);
+ pci_release_regions(to_pci_dev(dev->dev));
+}
+
+static void nvme_pci_disable(struct nvme_dev *dev)
+{
struct pci_dev *pdev = to_pci_dev(dev->dev);
if (pdev->msi_enabled)
@@ -1778,12 +1790,6 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
else if (pdev->msix_enabled)
pci_disable_msix(pdev);
- if (dev->bar) {
- iounmap(dev->bar);
- dev->bar = NULL;
- pci_release_regions(pdev);
- }
-
if (pci_is_enabled(pdev)) {
pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
@@ -1842,7 +1848,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_dev_list_remove(dev);
mutex_lock(&dev->shutdown_lock);
- if (dev->bar) {
+ if (pci_is_enabled(to_pci_dev(dev->dev))) {
nvme_stop_queues(&dev->ctrl);
csts = readl(dev->bar + NVME_REG_CSTS);
}
@@ -1855,7 +1861,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_disable_io_queues(dev);
nvme_disable_admin_queue(dev, shutdown);
}
- nvme_dev_unmap(dev);
+ nvme_pci_disable(dev);
for (i = dev->queue_count - 1; i >= 0; i--)
nvme_clear_queue(dev->queues[i]);
@@ -1899,10 +1905,20 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
kfree(dev);
}
+static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
+{
+ dev_warn(dev->dev, "Removing after probe failure status: %d\n", status);
+
+ kref_get(&dev->ctrl.kref);
+ nvme_dev_disable(dev, false);
+ if (!schedule_work(&dev->remove_work))
+ nvme_put_ctrl(&dev->ctrl);
+}
+
static void nvme_reset_work(struct work_struct *work)
{
struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work);
- int result;
+ int result = -ENODEV;
if (WARN_ON(test_bit(NVME_CTRL_RESETTING, &dev->flags)))
goto out;
@@ -1911,37 +1927,37 @@ static void nvme_reset_work(struct work_struct *work)
* If we're called to reset a live controller first shut it down before
* moving on.
*/
- if (dev->bar)
+ if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false);
set_bit(NVME_CTRL_RESETTING, &dev->flags);
- result = nvme_dev_map(dev);
+ result = nvme_pci_enable(dev);
if (result)
goto out;
result = nvme_configure_admin_queue(dev);
if (result)
- goto unmap;
+ goto out;
nvme_init_queue(dev->queues[0], 0);
result = nvme_alloc_admin_tags(dev);
if (result)
- goto disable;
+ goto out;
result = nvme_init_identify(&dev->ctrl);
if (result)
- goto free_tags;
+ goto out;
result = nvme_setup_io_queues(dev);
if (result)
- goto free_tags;
+ goto out;
dev->ctrl.event_limit = NVME_NR_AEN_COMMANDS;
result = nvme_dev_list_add(dev);
if (result)
- goto remove;
+ goto out;
/*
* Keep the controller around but remove all namespaces if we don't have
@@ -1958,19 +1974,8 @@ static void nvme_reset_work(struct work_struct *work)
clear_bit(NVME_CTRL_RESETTING, &dev->flags);
return;
- remove:
- nvme_dev_list_remove(dev);
- free_tags:
- nvme_dev_remove_admin(dev);
- blk_put_queue(dev->ctrl.admin_q);
- dev->ctrl.admin_q = NULL;
- dev->queues[0]->tags = NULL;
- disable:
- nvme_disable_admin_queue(dev, false);
- unmap:
- nvme_dev_unmap(dev);
out:
- nvme_remove_dead_ctrl(dev);
+ nvme_remove_dead_ctrl(dev, result);
}
static void nvme_remove_dead_ctrl_work(struct work_struct *work)
@@ -1978,19 +1983,12 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
struct pci_dev *pdev = to_pci_dev(dev->dev);
+ nvme_kill_queues(&dev->ctrl);
if (pci_get_drvdata(pdev))
pci_stop_and_remove_bus_device_locked(pdev);
nvme_put_ctrl(&dev->ctrl);
}
-static void nvme_remove_dead_ctrl(struct nvme_dev *dev)
-{
- dev_warn(dev->dev, "Removing after probe failure\n");
- kref_get(&dev->ctrl.kref);
- if (!schedule_work(&dev->remove_work))
- nvme_put_ctrl(&dev->ctrl);
-}
-
static int nvme_reset(struct nvme_dev *dev)
{
if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
@@ -2042,6 +2040,27 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.free_ctrl = nvme_pci_free_ctrl,
};
+static int nvme_dev_map(struct nvme_dev *dev)
+{
+ int bars;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (!bars)
+ return -ENODEV;
+ if (pci_request_selected_regions(pdev, bars, "nvme"))
+ return -ENODEV;
+
+ dev->bar = ioremap(pci_resource_start(pdev, 0), 8192);
+ if (!dev->bar)
+ goto release;
+
+ return 0;
+ release:
+ pci_release_regions(pdev);
+ return -ENODEV;
+}
+
static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
int node, result = -ENOMEM;
@@ -2066,6 +2085,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev->dev = get_device(&pdev->dev);
pci_set_drvdata(pdev, dev);
+ result = nvme_dev_map(dev);
+ if (result)
+ goto free;
+
INIT_LIST_HEAD(&dev->node);
INIT_WORK(&dev->scan_work, nvme_dev_scan);
INIT_WORK(&dev->reset_work, nvme_reset_work);
@@ -2089,6 +2112,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
nvme_release_prp_pools(dev);
put_pci:
put_device(dev->dev);
+ nvme_dev_unmap(dev);
free:
kfree(dev->queues);
kfree(dev->entry);
@@ -2112,10 +2136,16 @@ static void nvme_shutdown(struct pci_dev *pdev)
nvme_dev_disable(dev, true);
}
+/*
+ * The driver's remove may be called on a device in a partially initialized
+ * state. This function must not have any dependencies on the device state in
+ * order to proceed.
+ */
static void nvme_remove(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);
+ set_bit(NVME_CTRL_REMOVING, &dev->flags);
pci_set_drvdata(pdev, NULL);
flush_work(&dev->scan_work);
nvme_remove_namespaces(&dev->ctrl);
@@ -2126,6 +2156,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_free_queues(dev, 0);
nvme_release_cmb(dev);
nvme_release_prp_pools(dev);
+ nvme_dev_unmap(dev);
nvme_put_ctrl(&dev->ctrl);
}
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 669739b302b2..5e7838290998 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -303,6 +303,7 @@ EXPORT_SYMBOL(of_phy_find_device);
* @dev: pointer to net_device claiming the phy
* @phy_np: Pointer to device tree node for the PHY
* @hndlr: Link state callback for the network device
+ * @flags: flags to pass to the PHY
* @iface: PHY data interface type
*
* If successful, returns a pointer to the phy_device with the embedded
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 75a605426538..d1cdd9c992ac 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -14,6 +14,7 @@ config PCI_DRA7XX
config PCI_MVEBU
bool "Marvell EBU PCIe controller"
depends on ARCH_MVEBU || ARCH_DOVE
+ depends on ARM
depends on OF
config PCIE_DW
diff --git a/drivers/pci/host/pci-keystone-dw.c b/drivers/pci/host/pci-keystone-dw.c
index ed34c9520a02..6153853ca9c3 100644
--- a/drivers/pci/host/pci-keystone-dw.c
+++ b/drivers/pci/host/pci-keystone-dw.c
@@ -58,11 +58,6 @@
#define to_keystone_pcie(x) container_of(x, struct keystone_pcie, pp)
-static inline struct pcie_port *sys_to_pcie(struct pci_sys_data *sys)
-{
- return sys->private_data;
-}
-
static inline void update_reg_offset_bit_pos(u32 offset, u32 *reg_offset,
u32 *bit_pos)
{
@@ -108,7 +103,7 @@ static void ks_dw_pcie_msi_irq_ack(struct irq_data *d)
struct pcie_port *pp;
msi = irq_data_get_msi_desc(d);
- pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
+ pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
ks_pcie = to_keystone_pcie(pp);
offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
update_reg_offset_bit_pos(offset, &reg_offset, &bit_pos);
@@ -146,7 +141,7 @@ static void ks_dw_pcie_msi_irq_mask(struct irq_data *d)
u32 offset;
msi = irq_data_get_msi_desc(d);
- pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
+ pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
ks_pcie = to_keystone_pcie(pp);
offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
@@ -167,7 +162,7 @@ static void ks_dw_pcie_msi_irq_unmask(struct irq_data *d)
u32 offset;
msi = irq_data_get_msi_desc(d);
- pp = sys_to_pcie(msi_desc_to_pci_sysdata(msi));
+ pp = (struct pcie_port *) msi_desc_to_pci_sysdata(msi);
ks_pcie = to_keystone_pcie(pp);
offset = d->irq - irq_linear_revmap(pp->irq_domain, 0);
diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c
index 3923bed93c7e..f39961bcf7aa 100644
--- a/drivers/pci/host/pci-layerscape.c
+++ b/drivers/pci/host/pci-layerscape.c
@@ -77,6 +77,16 @@ static void ls_pcie_fix_class(struct ls_pcie *pcie)
iowrite16(PCI_CLASS_BRIDGE_PCI, pcie->dbi + PCI_CLASS_DEVICE);
}
+/* Drop MSG TLP except for Vendor MSG */
+static void ls_pcie_drop_msg_tlp(struct ls_pcie *pcie)
+{
+ u32 val;
+
+ val = ioread32(pcie->dbi + PCIE_STRFMR1);
+ val &= 0xDFFFFFFF;
+ iowrite32(val, pcie->dbi + PCIE_STRFMR1);
+}
+
static int ls1021_pcie_link_up(struct pcie_port *pp)
{
u32 state;
@@ -97,7 +107,7 @@ static int ls1021_pcie_link_up(struct pcie_port *pp)
static void ls1021_pcie_host_init(struct pcie_port *pp)
{
struct ls_pcie *pcie = to_ls_pcie(pp);
- u32 val, index[2];
+ u32 index[2];
pcie->scfg = syscon_regmap_lookup_by_phandle(pp->dev->of_node,
"fsl,pcie-scfg");
@@ -116,13 +126,7 @@ static void ls1021_pcie_host_init(struct pcie_port *pp)
dw_pcie_setup_rc(pp);
- /*
- * LS1021A Workaround for internal TKT228622
- * to fix the INTx hang issue
- */
- val = ioread32(pcie->dbi + PCIE_STRFMR1);
- val &= 0xffff;
- iowrite32(val, pcie->dbi + PCIE_STRFMR1);
+ ls_pcie_drop_msg_tlp(pcie);
}
static int ls_pcie_link_up(struct pcie_port *pp)
@@ -147,6 +151,7 @@ static void ls_pcie_host_init(struct pcie_port *pp)
iowrite32(1, pcie->dbi + PCIE_DBI_RO_WR_EN);
ls_pcie_fix_class(pcie);
ls_pcie_clear_multifunction(pcie);
+ ls_pcie_drop_msg_tlp(pcie);
iowrite32(0, pcie->dbi + PCIE_DBI_RO_WR_EN);
}
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index c777b97207d5..5f70fee59a94 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -53,7 +53,7 @@ struct pcifront_device {
};
struct pcifront_sd {
- int domain;
+ struct pci_sysdata sd;
struct pcifront_device *pdev;
};
@@ -67,7 +67,9 @@ static inline void pcifront_init_sd(struct pcifront_sd *sd,
unsigned int domain, unsigned int bus,
struct pcifront_device *pdev)
{
- sd->domain = domain;
+ /* Because we do not expose that information via XenBus. */
+ sd->sd.node = first_online_node;
+ sd->sd.domain = domain;
sd->pdev = pdev;
}
@@ -468,8 +470,8 @@ static int pcifront_scan_root(struct pcifront_device *pdev,
dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
domain, bus);
- bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
- sd = kmalloc(sizeof(*sd), GFP_KERNEL);
+ bus_entry = kzalloc(sizeof(*bus_entry), GFP_KERNEL);
+ sd = kzalloc(sizeof(*sd), GFP_KERNEL);
if (!bus_entry || !sd) {
err = -ENOMEM;
goto err_out;
diff --git a/drivers/power/bq27xxx_battery_i2c.c b/drivers/power/bq27xxx_battery_i2c.c
index 9429e66be096..8eafc6f0df88 100644
--- a/drivers/power/bq27xxx_battery_i2c.c
+++ b/drivers/power/bq27xxx_battery_i2c.c
@@ -21,6 +21,9 @@
#include <linux/power/bq27xxx_battery.h>
+static DEFINE_IDR(battery_id);
+static DEFINE_MUTEX(battery_mutex);
+
static irqreturn_t bq27xxx_battery_irq_handler_thread(int irq, void *data)
{
struct bq27xxx_device_info *di = data;
@@ -70,19 +73,33 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
{
struct bq27xxx_device_info *di;
int ret;
+ char *name;
+ int num;
+
+ /* Get new ID for the new battery device */
+ mutex_lock(&battery_mutex);
+ num = idr_alloc(&battery_id, client, 0, 0, GFP_KERNEL);
+ mutex_unlock(&battery_mutex);
+ if (num < 0)
+ return num;
+
+ name = devm_kasprintf(&client->dev, GFP_KERNEL, "%s-%d", id->name, num);
+ if (!name)
+ goto err_mem;
di = devm_kzalloc(&client->dev, sizeof(*di), GFP_KERNEL);
if (!di)
- return -ENOMEM;
+ goto err_mem;
+ di->id = num;
di->dev = &client->dev;
di->chip = id->driver_data;
- di->name = id->name;
+ di->name = name;
di->bus.read = bq27xxx_battery_i2c_read;
ret = bq27xxx_battery_setup(di);
if (ret)
- return ret;
+ goto err_failed;
/* Schedule a polling after about 1 min */
schedule_delayed_work(&di->work, 60 * HZ);
@@ -103,6 +120,16 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
}
return 0;
+
+err_mem:
+ ret = -ENOMEM;
+
+err_failed:
+ mutex_lock(&battery_mutex);
+ idr_remove(&battery_id, num);
+ mutex_unlock(&battery_mutex);
+
+ return ret;
}
static int bq27xxx_battery_i2c_remove(struct i2c_client *client)
@@ -111,6 +138,10 @@ static int bq27xxx_battery_i2c_remove(struct i2c_client *client)
bq27xxx_battery_teardown(di);
+ mutex_lock(&battery_mutex);
+ idr_remove(&battery_id, di->id);
+ mutex_unlock(&battery_mutex);
+
return 0;
}
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 3b3e0998fa6e..d6a691e27d33 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -4002,6 +4002,7 @@ static ssize_t ipr_store_update_fw(struct device *dev,
struct ipr_sglist *sglist;
char fname[100];
char *src;
+ char *endline;
int result, dnld_size;
if (!capable(CAP_SYS_ADMIN))
@@ -4009,6 +4010,10 @@ static ssize_t ipr_store_update_fw(struct device *dev,
snprintf(fname, sizeof(fname), "%s", buf);
+ endline = strchr(fname, '\n');
+ if (endline)
+ *endline = '\0';
+
if (request_firmware(&fw_entry, fname, &ioa_cfg->pdev->dev)) {
dev_err(&ioa_cfg->pdev->dev, "Firmware file %s not found\n", fname);
return -EIO;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fa6b2c4eb7a2..8c6e31874171 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1344,6 +1344,7 @@ scsi_prep_return(struct request_queue *q, struct request *req, int ret)
switch (ret) {
case BLKPREP_KILL:
+ case BLKPREP_INVALID:
req->errors = DID_NO_CONNECT << 16;
/* release the command and kill it */
if (req->special) {
diff --git a/drivers/sh/pm_runtime.c b/drivers/sh/pm_runtime.c
index 91a003011acf..a9bac3bf20de 100644
--- a/drivers/sh/pm_runtime.c
+++ b/drivers/sh/pm_runtime.c
@@ -34,7 +34,7 @@ static struct pm_clk_notifier_block platform_bus_notifier = {
static int __init sh_pm_runtime_init(void)
{
- if (IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
+ if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_ARCH_SHMOBILE)) {
if (!of_find_compatible_node(NULL, NULL,
"renesas,cpg-mstp-clocks"))
return 0;
diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index 0c675861623f..d8e4219c2324 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -83,6 +83,7 @@ config SSB_SDIOHOST
config SSB_HOST_SOC
bool "Support for SSB bus on SoC"
depends on SSB && BCM47XX_NVRAM
+ select SSB_SPROM
help
Host interface for a SSB directly mapped into memory. This is
for some Broadcom SoCs from the BCM47xx and BCM53xx lines.
diff --git a/drivers/staging/media/davinci_vpfe/vpfe_video.c b/drivers/staging/media/davinci_vpfe/vpfe_video.c
index 3ec7e65a3ffa..db49af90217e 100644
--- a/drivers/staging/media/davinci_vpfe/vpfe_video.c
+++ b/drivers/staging/media/davinci_vpfe/vpfe_video.c
@@ -147,7 +147,7 @@ static int vpfe_prepare_pipeline(struct vpfe_video_device *video)
mutex_lock(&mdev->graph_mutex);
ret = media_entity_graph_walk_init(&graph, entity->graph_obj.mdev);
if (ret) {
- mutex_unlock(&video->lock);
+ mutex_unlock(&mdev->graph_mutex);
return -ENOMEM;
}
media_entity_graph_walk_start(&graph, entity);
diff --git a/drivers/usb/chipidea/ci_hdrc_pci.c b/drivers/usb/chipidea/ci_hdrc_pci.c
index b59195edf636..b635ab67490d 100644
--- a/drivers/usb/chipidea/ci_hdrc_pci.c
+++ b/drivers/usb/chipidea/ci_hdrc_pci.c
@@ -85,8 +85,8 @@ static int ci_hdrc_pci_probe(struct pci_dev *pdev,
/* register a nop PHY */
ci->phy = usb_phy_generic_register();
- if (!ci->phy)
- return -ENOMEM;
+ if (IS_ERR(ci->phy))
+ return PTR_ERR(ci->phy);
memset(res, 0, sizeof(res));
res[0].start = pci_resource_start(pdev, 0);
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index a4f7db2e18dd..df47110bad2d 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -100,6 +100,9 @@ static ssize_t ci_port_test_write(struct file *file, const char __user *ubuf,
if (sscanf(buf, "%u", &mode) != 1)
return -EINVAL;
+ if (mode > 255)
+ return -EBADRQC;
+
pm_runtime_get_sync(ci->dev);
spin_lock_irqsave(&ci->lock, flags);
ret = hw_port_test_set(ci, mode);
diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c
index 45f86da1d6d3..03b6743461d1 100644
--- a/drivers/usb/chipidea/otg.c
+++ b/drivers/usb/chipidea/otg.c
@@ -158,7 +158,7 @@ static void ci_otg_work(struct work_struct *work)
int ci_hdrc_otg_init(struct ci_hdrc *ci)
{
INIT_WORK(&ci->work, ci_otg_work);
- ci->wq = create_singlethread_workqueue("ci_otg");
+ ci->wq = create_freezable_workqueue("ci_otg");
if (!ci->wq) {
dev_err(ci->dev, "can't create workqueue\n");
return -ENODEV;
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 350dcd9af5d8..51b436918f78 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5401,6 +5401,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
}
bos = udev->bos;
+ udev->bos = NULL;
for (i = 0; i < SET_CONFIG_TRIES; ++i) {
@@ -5493,11 +5494,8 @@ done:
usb_set_usb2_hardware_lpm(udev, 1);
usb_unlocked_enable_lpm(udev);
usb_enable_ltm(udev);
- /* release the new BOS descriptor allocated by hub_port_init() */
- if (udev->bos != bos) {
- usb_release_bos_descriptor(udev);
- udev->bos = bos;
- }
+ usb_release_bos_descriptor(udev);
+ udev->bos = bos;
return 0;
re_enumerate:
diff --git a/drivers/usb/dwc2/Kconfig b/drivers/usb/dwc2/Kconfig
index fd95ba6ec317..f0decc0d69b5 100644
--- a/drivers/usb/dwc2/Kconfig
+++ b/drivers/usb/dwc2/Kconfig
@@ -1,5 +1,6 @@
config USB_DWC2
tristate "DesignWare USB2 DRD Core Support"
+ depends on HAS_DMA
depends on USB || USB_GADGET
help
Say Y here if your system has a Dual Role Hi-Speed USB
diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c
index e991d55914db..46c4ba75dc2a 100644
--- a/drivers/usb/dwc2/core.c
+++ b/drivers/usb/dwc2/core.c
@@ -619,6 +619,12 @@ void dwc2_force_dr_mode(struct dwc2_hsotg *hsotg)
__func__, hsotg->dr_mode);
break;
}
+
+ /*
+ * NOTE: This is required for some rockchip soc based
+ * platforms.
+ */
+ msleep(50);
}
/*
diff --git a/drivers/usb/dwc2/hcd_ddma.c b/drivers/usb/dwc2/hcd_ddma.c
index 36606fc33c0d..a41274aa52ad 100644
--- a/drivers/usb/dwc2/hcd_ddma.c
+++ b/drivers/usb/dwc2/hcd_ddma.c
@@ -1174,14 +1174,11 @@ static int dwc2_process_non_isoc_desc(struct dwc2_hsotg *hsotg,
failed = dwc2_update_non_isoc_urb_state_ddma(hsotg, chan, qtd, dma_desc,
halt_status, n_bytes,
xfer_done);
- if (*xfer_done && urb->status != -EINPROGRESS)
- failed = 1;
-
- if (failed) {
+ if (failed || (*xfer_done && urb->status != -EINPROGRESS)) {
dwc2_host_complete(hsotg, qtd, urb->status);
dwc2_hcd_qtd_unlink_and_free(hsotg, qtd, qh);
- dev_vdbg(hsotg->dev, "failed=%1x xfer_done=%1x status=%08x\n",
- failed, *xfer_done, urb->status);
+ dev_vdbg(hsotg->dev, "failed=%1x xfer_done=%1x\n",
+ failed, *xfer_done);
return failed;
}
@@ -1236,21 +1233,23 @@ static void dwc2_complete_non_isoc_xfer_ddma(struct dwc2_hsotg *hsotg,
list_for_each_safe(qtd_item, qtd_tmp, &qh->qtd_list) {
int i;
+ int qtd_desc_count;
qtd = list_entry(qtd_item, struct dwc2_qtd, qtd_list_entry);
xfer_done = 0;
+ qtd_desc_count = qtd->n_desc;
- for (i = 0; i < qtd->n_desc; i++) {
+ for (i = 0; i < qtd_desc_count; i++) {
if (dwc2_process_non_isoc_desc(hsotg, chan, chnum, qtd,
desc_num, halt_status,
- &xfer_done)) {
- qtd = NULL;
- break;
- }
+ &xfer_done))
+ goto stop_scan;
+
desc_num++;
}
}
+stop_scan:
if (qh->ep_type != USB_ENDPOINT_XFER_CONTROL) {
/*
* Resetting the data toggle for bulk and interrupt endpoints
@@ -1258,7 +1257,7 @@ static void dwc2_complete_non_isoc_xfer_ddma(struct dwc2_hsotg *hsotg,
*/
if (halt_status == DWC2_HC_XFER_STALL)
qh->data_toggle = DWC2_HC_PID_DATA0;
- else if (qtd)
+ else
dwc2_hcd_save_data_toggle(hsotg, chan, chnum, qtd);
}
diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c
index f8253803a050..cadba8b13c48 100644
--- a/drivers/usb/dwc2/hcd_intr.c
+++ b/drivers/usb/dwc2/hcd_intr.c
@@ -525,11 +525,19 @@ void dwc2_hcd_save_data_toggle(struct dwc2_hsotg *hsotg,
u32 pid = (hctsiz & TSIZ_SC_MC_PID_MASK) >> TSIZ_SC_MC_PID_SHIFT;
if (chan->ep_type != USB_ENDPOINT_XFER_CONTROL) {
+ if (WARN(!chan || !chan->qh,
+ "chan->qh must be specified for non-control eps\n"))
+ return;
+
if (pid == TSIZ_SC_MC_PID_DATA0)
chan->qh->data_toggle = DWC2_HC_PID_DATA0;
else
chan->qh->data_toggle = DWC2_HC_PID_DATA1;
} else {
+ if (WARN(!qtd,
+ "qtd must be specified for control eps\n"))
+ return;
+
if (pid == TSIZ_SC_MC_PID_DATA0)
qtd->data_toggle = DWC2_HC_PID_DATA0;
else
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 29130682e547..e4f8b90d9627 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -856,7 +856,6 @@ struct dwc3 {
unsigned pullups_connected:1;
unsigned resize_fifos:1;
unsigned setup_packet_pending:1;
- unsigned start_config_issued:1;
unsigned three_stage_setup:1;
unsigned usb3_lpm_capable:1;
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 3a9354abcb68..8d6b75c2f53b 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -555,7 +555,6 @@ static int dwc3_ep0_set_config(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
int ret;
u32 reg;
- dwc->start_config_issued = false;
cfg = le16_to_cpu(ctrl->wValue);
switch (state) {
@@ -737,10 +736,6 @@ static int dwc3_ep0_std_request(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl)
dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_ISOCH_DELAY");
ret = dwc3_ep0_set_isoch_delay(dwc, ctrl);
break;
- case USB_REQ_SET_INTERFACE:
- dwc3_trace(trace_dwc3_ep0, "USB_REQ_SET_INTERFACE");
- dwc->start_config_issued = false;
- /* Fall through */
default:
dwc3_trace(trace_dwc3_ep0, "Forwarding to gadget driver");
ret = dwc3_ep0_delegate_req(dwc, ctrl);
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 7d1dd82a95ac..2363bad45af8 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -385,24 +385,66 @@ static void dwc3_free_trb_pool(struct dwc3_ep *dep)
dep->trb_pool_dma = 0;
}
+static int dwc3_gadget_set_xfer_resource(struct dwc3 *dwc, struct dwc3_ep *dep);
+
+/**
+ * dwc3_gadget_start_config - Configure EP resources
+ * @dwc: pointer to our controller context structure
+ * @dep: endpoint that is being enabled
+ *
+ * The assignment of transfer resources cannot perfectly follow the
+ * data book due to the fact that the controller driver does not have
+ * all knowledge of the configuration in advance. It is given this
+ * information piecemeal by the composite gadget framework after every
+ * SET_CONFIGURATION and SET_INTERFACE. Trying to follow the databook
+ * programming model in this scenario can cause errors. For two
+ * reasons:
+ *
+ * 1) The databook says to do DEPSTARTCFG for every SET_CONFIGURATION
+ * and SET_INTERFACE (8.1.5). This is incorrect in the scenario of
+ * multiple interfaces.
+ *
+ * 2) The databook does not mention doing more DEPXFERCFG for new
+ * endpoint on alt setting (8.1.6).
+ *
+ * The following simplified method is used instead:
+ *
+ * All hardware endpoints can be assigned a transfer resource and this
+ * setting will stay persistent until either a core reset or
+ * hibernation. So whenever we do a DEPSTARTCFG(0) we can go ahead and
+ * do DEPXFERCFG for every hardware endpoint as well. We are
+ * guaranteed that there are as many transfer resources as endpoints.
+ *
+ * This function is called for each endpoint when it is being enabled
+ * but is triggered only when called for EP0-out, which always happens
+ * first, and which should only happen in one of the above conditions.
+ */
static int dwc3_gadget_start_config(struct dwc3 *dwc, struct dwc3_ep *dep)
{
struct dwc3_gadget_ep_cmd_params params;
u32 cmd;
+ int i;
+ int ret;
+
+ if (dep->number)
+ return 0;
memset(&params, 0x00, sizeof(params));
+ cmd = DWC3_DEPCMD_DEPSTARTCFG;
- if (dep->number != 1) {
- cmd = DWC3_DEPCMD_DEPSTARTCFG;
- /* XferRscIdx == 0 for ep0 and 2 for the remaining */
- if (dep->number > 1) {
- if (dwc->start_config_issued)
- return 0;
- dwc->start_config_issued = true;
- cmd |= DWC3_DEPCMD_PARAM(2);
- }
+ ret = dwc3_send_gadget_ep_cmd(dwc, 0, cmd, &params);
+ if (ret)
+ return ret;
- return dwc3_send_gadget_ep_cmd(dwc, 0, cmd, &params);
+ for (i = 0; i < DWC3_ENDPOINTS_NUM; i++) {
+ struct dwc3_ep *dep = dwc->eps[i];
+
+ if (!dep)
+ continue;
+
+ ret = dwc3_gadget_set_xfer_resource(dwc, dep);
+ if (ret)
+ return ret;
}
return 0;
@@ -516,10 +558,6 @@ static int __dwc3_gadget_ep_enable(struct dwc3_ep *dep,
struct dwc3_trb *trb_st_hw;
struct dwc3_trb *trb_link;
- ret = dwc3_gadget_set_xfer_resource(dwc, dep);
- if (ret)
- return ret;
-
dep->endpoint.desc = desc;
dep->comp_desc = comp_desc;
dep->type = usb_endpoint_type(desc);
@@ -1636,8 +1674,6 @@ static int dwc3_gadget_start(struct usb_gadget *g,
}
dwc3_writel(dwc->regs, DWC3_DCFG, reg);
- dwc->start_config_issued = false;
-
/* Start with SuperSpeed Default */
dwc3_gadget_ep0_desc.wMaxPacketSize = cpu_to_le16(512);
@@ -2237,7 +2273,6 @@ static void dwc3_gadget_disconnect_interrupt(struct dwc3 *dwc)
dwc3_writel(dwc->regs, DWC3_DCTL, reg);
dwc3_disconnect_gadget(dwc);
- dwc->start_config_issued = false;
dwc->gadget.speed = USB_SPEED_UNKNOWN;
dwc->setup_packet_pending = false;
@@ -2288,7 +2323,6 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
dwc3_stop_active_transfers(dwc);
dwc3_clear_stall_all_ep(dwc);
- dwc->start_config_issued = false;
/* Reset device address to zero */
reg = dwc3_readl(dwc->regs, DWC3_DCFG);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 7e179f81d05c..87fb0fd6aaab 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -130,7 +130,8 @@ struct dev_data {
setup_can_stall : 1,
setup_out_ready : 1,
setup_out_error : 1,
- setup_abort : 1;
+ setup_abort : 1,
+ gadget_registered : 1;
unsigned setup_wLength;
/* the rest is basically write-once */
@@ -1179,7 +1180,8 @@ dev_release (struct inode *inode, struct file *fd)
/* closing ep0 === shutdown all */
- usb_gadget_unregister_driver (&gadgetfs_driver);
+ if (dev->gadget_registered)
+ usb_gadget_unregister_driver (&gadgetfs_driver);
/* at this point "good" hardware has disconnected the
* device from USB; the host won't see it any more.
@@ -1847,6 +1849,7 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
* kick in after the ep0 descriptor is closed.
*/
value = len;
+ dev->gadget_registered = true;
}
return value;
diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c
index 53c0692f1b09..93d28cb00b76 100644
--- a/drivers/usb/gadget/udc/fsl_qe_udc.c
+++ b/drivers/usb/gadget/udc/fsl_qe_udc.c
@@ -2340,7 +2340,7 @@ static struct qe_udc *qe_udc_config(struct platform_device *ofdev)
{
struct qe_udc *udc;
struct device_node *np = ofdev->dev.of_node;
- unsigned int tmp_addr = 0;
+ unsigned long tmp_addr = 0;
struct usb_device_para __iomem *usbpram;
unsigned int i;
u64 size;
diff --git a/drivers/usb/gadget/udc/net2280.h b/drivers/usb/gadget/udc/net2280.h
index 4dff60d34f73..0d32052bf16f 100644
--- a/drivers/usb/gadget/udc/net2280.h
+++ b/drivers/usb/gadget/udc/net2280.h
@@ -369,9 +369,20 @@ static inline void set_max_speed(struct net2280_ep *ep, u32 max)
static const u32 ep_enhanced[9] = { 0x10, 0x60, 0x30, 0x80,
0x50, 0x20, 0x70, 0x40, 0x90 };
- if (ep->dev->enhanced_mode)
+ if (ep->dev->enhanced_mode) {
reg = ep_enhanced[ep->num];
- else{
+ switch (ep->dev->gadget.speed) {
+ case USB_SPEED_SUPER:
+ reg += 2;
+ break;
+ case USB_SPEED_FULL:
+ reg += 1;
+ break;
+ case USB_SPEED_HIGH:
+ default:
+ break;
+ }
+ } else {
reg = (ep->num + 1) * 0x10;
if (ep->dev->gadget.speed != USB_SPEED_HIGH)
reg += 1;
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index fd73a3ea07c2..b86a6f03592e 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -413,9 +413,10 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget,
if (!driver->udc_name || strcmp(driver->udc_name,
dev_name(&udc->dev)) == 0) {
ret = udc_bind_to_driver(udc, driver);
+ if (ret != -EPROBE_DEFER)
+ list_del(&driver->pending);
if (ret)
goto err4;
- list_del(&driver->pending);
break;
}
}
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 795a45b1b25b..58487a473521 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -662,7 +662,7 @@ static int musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
csr &= ~(MUSB_TXCSR_AUTOSET | MUSB_TXCSR_DMAMODE);
csr |= MUSB_TXCSR_DMAENAB; /* against programmer's guide */
}
- channel->desired_mode = mode;
+ channel->desired_mode = *mode;
musb_writew(epio, MUSB_TXCSR, csr);
return 0;
@@ -2003,10 +2003,8 @@ void musb_host_rx(struct musb *musb, u8 epnum)
qh->offset,
urb->transfer_buffer_length);
- done = musb_rx_dma_in_inventra_cppi41(c, hw_ep, qh,
- urb, xfer_len,
- iso_err);
- if (done)
+ if (musb_rx_dma_in_inventra_cppi41(c, hw_ep, qh, urb,
+ xfer_len, iso_err))
goto finish;
else
dev_err(musb->controller, "error: rx_dma failed\n");
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 970a30e155cb..72b387d592c2 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -757,14 +757,8 @@ static int msm_otg_set_host(struct usb_otg *otg, struct usb_bus *host)
otg->host = host;
dev_dbg(otg->usb_phy->dev, "host driver registered w/ tranceiver\n");
- /*
- * Kick the state machine work, if peripheral is not supported
- * or peripheral is already registered with us.
- */
- if (motg->pdata->mode == USB_DR_MODE_HOST || otg->gadget) {
- pm_runtime_get_sync(otg->usb_phy->dev);
- schedule_work(&motg->sm_work);
- }
+ pm_runtime_get_sync(otg->usb_phy->dev);
+ schedule_work(&motg->sm_work);
return 0;
}
@@ -827,14 +821,8 @@ static int msm_otg_set_peripheral(struct usb_otg *otg,
dev_dbg(otg->usb_phy->dev,
"peripheral driver registered w/ tranceiver\n");
- /*
- * Kick the state machine work, if host is not supported
- * or host is already registered with us.
- */
- if (motg->pdata->mode == USB_DR_MODE_PERIPHERAL || otg->host) {
- pm_runtime_get_sync(otg->usb_phy->dev);
- schedule_work(&motg->sm_work);
- }
+ pm_runtime_get_sync(otg->usb_phy->dev);
+ schedule_work(&motg->sm_work);
return 0;
}
diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig
index f612dda9c977..56ecb8b5115d 100644
--- a/drivers/usb/serial/Kconfig
+++ b/drivers/usb/serial/Kconfig
@@ -475,22 +475,6 @@ config USB_SERIAL_MOS7840
To compile this driver as a module, choose M here: the
module will be called mos7840. If unsure, choose N.
-config USB_SERIAL_MXUPORT11
- tristate "USB Moxa UPORT 11x0 Serial Driver"
- ---help---
- Say Y here if you want to use a MOXA UPort 11x0 Serial hub.
-
- This driver supports:
-
- - UPort 1110 : 1 port RS-232 USB to Serial Hub.
- - UPort 1130 : 1 port RS-422/485 USB to Serial Hub.
- - UPort 1130I : 1 port RS-422/485 USB to Serial Hub with Isolation.
- - UPort 1150 : 1 port RS-232/422/485 USB to Serial Hub.
- - UPort 1150I : 1 port RS-232/422/485 USB to Serial Hub with Isolation.
-
- To compile this driver as a module, choose M here: the
- module will be called mxu11x0.
-
config USB_SERIAL_MXUPORT
tristate "USB Moxa UPORT Serial Driver"
---help---
diff --git a/drivers/usb/serial/Makefile b/drivers/usb/serial/Makefile
index f3fa5e53702d..349d9df0895f 100644
--- a/drivers/usb/serial/Makefile
+++ b/drivers/usb/serial/Makefile
@@ -38,7 +38,6 @@ obj-$(CONFIG_USB_SERIAL_METRO) += metro-usb.o
obj-$(CONFIG_USB_SERIAL_MOS7720) += mos7720.o
obj-$(CONFIG_USB_SERIAL_MOS7840) += mos7840.o
obj-$(CONFIG_USB_SERIAL_MXUPORT) += mxuport.o
-obj-$(CONFIG_USB_SERIAL_MXUPORT11) += mxu11x0.o
obj-$(CONFIG_USB_SERIAL_NAVMAN) += navman.o
obj-$(CONFIG_USB_SERIAL_OMNINET) += omninet.o
obj-$(CONFIG_USB_SERIAL_OPTICON) += opticon.o
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 987813b8a7f9..73a366de5102 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -163,6 +163,9 @@ static const struct usb_device_id id_table[] = {
{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
{ USB_DEVICE(0x18EF, 0xE025) }, /* ELV Marble Sound Board 1 */
+ { USB_DEVICE(0x1901, 0x0190) }, /* GE B850 CP2105 Recorder interface */
+ { USB_DEVICE(0x1901, 0x0193) }, /* GE B650 CP2104 PMC interface */
+ { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */
{ USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
{ USB_DEVICE(0x1B1C, 0x1C00) }, /* Corsair USB Dongle */
{ USB_DEVICE(0x1BA4, 0x0002) }, /* Silicon Labs 358x factory default */
diff --git a/drivers/usb/serial/mxu11x0.c b/drivers/usb/serial/mxu11x0.c
deleted file mode 100644
index 619607323bfd..000000000000
--- a/drivers/usb/serial/mxu11x0.c
+++ /dev/null
@@ -1,1006 +0,0 @@
-/*
- * USB Moxa UPORT 11x0 Serial Driver
- *
- * Copyright (C) 2007 MOXA Technologies Co., Ltd.
- * Copyright (C) 2015 Mathieu Othacehe <m.othacehe@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- *
- * Supports the following Moxa USB to serial converters:
- * UPort 1110, 1 port RS-232 USB to Serial Hub.
- * UPort 1130, 1 port RS-422/485 USB to Serial Hub.
- * UPort 1130I, 1 port RS-422/485 USB to Serial Hub with isolation
- * protection.
- * UPort 1150, 1 port RS-232/422/485 USB to Serial Hub.
- * UPort 1150I, 1 port RS-232/422/485 USB to Serial Hub with isolation
- * protection.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/firmware.h>
-#include <linux/jiffies.h>
-#include <linux/serial.h>
-#include <linux/serial_reg.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/mutex.h>
-#include <linux/tty.h>
-#include <linux/tty_driver.h>
-#include <linux/tty_flip.h>
-#include <linux/uaccess.h>
-#include <linux/usb.h>
-#include <linux/usb/serial.h>
-
-/* Vendor and product ids */
-#define MXU1_VENDOR_ID 0x110a
-#define MXU1_1110_PRODUCT_ID 0x1110
-#define MXU1_1130_PRODUCT_ID 0x1130
-#define MXU1_1150_PRODUCT_ID 0x1150
-#define MXU1_1151_PRODUCT_ID 0x1151
-#define MXU1_1131_PRODUCT_ID 0x1131
-
-/* Commands */
-#define MXU1_GET_VERSION 0x01
-#define MXU1_GET_PORT_STATUS 0x02
-#define MXU1_GET_PORT_DEV_INFO 0x03
-#define MXU1_GET_CONFIG 0x04
-#define MXU1_SET_CONFIG 0x05
-#define MXU1_OPEN_PORT 0x06
-#define MXU1_CLOSE_PORT 0x07
-#define MXU1_START_PORT 0x08
-#define MXU1_STOP_PORT 0x09
-#define MXU1_TEST_PORT 0x0A
-#define MXU1_PURGE_PORT 0x0B
-#define MXU1_RESET_EXT_DEVICE 0x0C
-#define MXU1_GET_OUTQUEUE 0x0D
-#define MXU1_WRITE_DATA 0x80
-#define MXU1_READ_DATA 0x81
-#define MXU1_REQ_TYPE_CLASS 0x82
-
-/* Module identifiers */
-#define MXU1_I2C_PORT 0x01
-#define MXU1_IEEE1284_PORT 0x02
-#define MXU1_UART1_PORT 0x03
-#define MXU1_UART2_PORT 0x04
-#define MXU1_RAM_PORT 0x05
-
-/* Modem status */
-#define MXU1_MSR_DELTA_CTS 0x01
-#define MXU1_MSR_DELTA_DSR 0x02
-#define MXU1_MSR_DELTA_RI 0x04
-#define MXU1_MSR_DELTA_CD 0x08
-#define MXU1_MSR_CTS 0x10
-#define MXU1_MSR_DSR 0x20
-#define MXU1_MSR_RI 0x40
-#define MXU1_MSR_CD 0x80
-#define MXU1_MSR_DELTA_MASK 0x0F
-#define MXU1_MSR_MASK 0xF0
-
-/* Line status */
-#define MXU1_LSR_OVERRUN_ERROR 0x01
-#define MXU1_LSR_PARITY_ERROR 0x02
-#define MXU1_LSR_FRAMING_ERROR 0x04
-#define MXU1_LSR_BREAK 0x08
-#define MXU1_LSR_ERROR 0x0F
-#define MXU1_LSR_RX_FULL 0x10
-#define MXU1_LSR_TX_EMPTY 0x20
-
-/* Modem control */
-#define MXU1_MCR_LOOP 0x04
-#define MXU1_MCR_DTR 0x10
-#define MXU1_MCR_RTS 0x20
-
-/* Mask settings */
-#define MXU1_UART_ENABLE_RTS_IN 0x0001
-#define MXU1_UART_DISABLE_RTS 0x0002
-#define MXU1_UART_ENABLE_PARITY_CHECKING 0x0008
-#define MXU1_UART_ENABLE_DSR_OUT 0x0010
-#define MXU1_UART_ENABLE_CTS_OUT 0x0020
-#define MXU1_UART_ENABLE_X_OUT 0x0040
-#define MXU1_UART_ENABLE_XA_OUT 0x0080
-#define MXU1_UART_ENABLE_X_IN 0x0100
-#define MXU1_UART_ENABLE_DTR_IN 0x0800
-#define MXU1_UART_DISABLE_DTR 0x1000
-#define MXU1_UART_ENABLE_MS_INTS 0x2000
-#define MXU1_UART_ENABLE_AUTO_START_DMA 0x4000
-#define MXU1_UART_SEND_BREAK_SIGNAL 0x8000
-
-/* Parity */
-#define MXU1_UART_NO_PARITY 0x00
-#define MXU1_UART_ODD_PARITY 0x01
-#define MXU1_UART_EVEN_PARITY 0x02
-#define MXU1_UART_MARK_PARITY 0x03
-#define MXU1_UART_SPACE_PARITY 0x04
-
-/* Stop bits */
-#define MXU1_UART_1_STOP_BITS 0x00
-#define MXU1_UART_1_5_STOP_BITS 0x01
-#define MXU1_UART_2_STOP_BITS 0x02
-
-/* Bits per character */
-#define MXU1_UART_5_DATA_BITS 0x00
-#define MXU1_UART_6_DATA_BITS 0x01
-#define MXU1_UART_7_DATA_BITS 0x02
-#define MXU1_UART_8_DATA_BITS 0x03
-
-/* Operation modes */
-#define MXU1_UART_232 0x00
-#define MXU1_UART_485_RECEIVER_DISABLED 0x01
-#define MXU1_UART_485_RECEIVER_ENABLED 0x02
-
-/* Pipe transfer mode and timeout */
-#define MXU1_PIPE_MODE_CONTINUOUS 0x01
-#define MXU1_PIPE_MODE_MASK 0x03
-#define MXU1_PIPE_TIMEOUT_MASK 0x7C
-#define MXU1_PIPE_TIMEOUT_ENABLE 0x80
-
-/* Config struct */
-struct mxu1_uart_config {
- __be16 wBaudRate;
- __be16 wFlags;
- u8 bDataBits;
- u8 bParity;
- u8 bStopBits;
- char cXon;
- char cXoff;
- u8 bUartMode;
-} __packed;
-
-/* Purge modes */
-#define MXU1_PURGE_OUTPUT 0x00
-#define MXU1_PURGE_INPUT 0x80
-
-/* Read/Write data */
-#define MXU1_RW_DATA_ADDR_SFR 0x10
-#define MXU1_RW_DATA_ADDR_IDATA 0x20
-#define MXU1_RW_DATA_ADDR_XDATA 0x30
-#define MXU1_RW_DATA_ADDR_CODE 0x40
-#define MXU1_RW_DATA_ADDR_GPIO 0x50
-#define MXU1_RW_DATA_ADDR_I2C 0x60
-#define MXU1_RW_DATA_ADDR_FLASH 0x70
-#define MXU1_RW_DATA_ADDR_DSP 0x80
-
-#define MXU1_RW_DATA_UNSPECIFIED 0x00
-#define MXU1_RW_DATA_BYTE 0x01
-#define MXU1_RW_DATA_WORD 0x02
-#define MXU1_RW_DATA_DOUBLE_WORD 0x04
-
-struct mxu1_write_data_bytes {
- u8 bAddrType;
- u8 bDataType;
- u8 bDataCounter;
- __be16 wBaseAddrHi;
- __be16 wBaseAddrLo;
- u8 bData[0];
-} __packed;
-
-/* Interrupt codes */
-#define MXU1_CODE_HARDWARE_ERROR 0xFF
-#define MXU1_CODE_DATA_ERROR 0x03
-#define MXU1_CODE_MODEM_STATUS 0x04
-
-static inline int mxu1_get_func_from_code(unsigned char code)
-{
- return code & 0x0f;
-}
-
-/* Download firmware max packet size */
-#define MXU1_DOWNLOAD_MAX_PACKET_SIZE 64
-
-/* Firmware image header */
-struct mxu1_firmware_header {
- __le16 wLength;
- u8 bCheckSum;
-} __packed;
-
-#define MXU1_UART_BASE_ADDR 0xFFA0
-#define MXU1_UART_OFFSET_MCR 0x0004
-
-#define MXU1_BAUD_BASE 923077
-
-#define MXU1_TRANSFER_TIMEOUT 2
-#define MXU1_DOWNLOAD_TIMEOUT 1000
-#define MXU1_DEFAULT_CLOSING_WAIT 4000 /* in .01 secs */
-
-struct mxu1_port {
- u8 msr;
- u8 mcr;
- u8 uart_mode;
- spinlock_t spinlock; /* Protects msr */
- struct mutex mutex; /* Protects mcr */
- bool send_break;
-};
-
-struct mxu1_device {
- u16 mxd_model;
-};
-
-static const struct usb_device_id mxu1_idtable[] = {
- { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1110_PRODUCT_ID) },
- { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1130_PRODUCT_ID) },
- { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) },
- { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) },
- { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) },
- { }
-};
-
-MODULE_DEVICE_TABLE(usb, mxu1_idtable);
-
-/* Write the given buffer out to the control pipe. */
-static int mxu1_send_ctrl_data_urb(struct usb_serial *serial,
- u8 request,
- u16 value, u16 index,
- void *data, size_t size)
-{
- int status;
-
- status = usb_control_msg(serial->dev,
- usb_sndctrlpipe(serial->dev, 0),
- request,
- (USB_DIR_OUT | USB_TYPE_VENDOR |
- USB_RECIP_DEVICE), value, index,
- data, size,
- USB_CTRL_SET_TIMEOUT);
- if (status < 0) {
- dev_err(&serial->interface->dev,
- "%s - usb_control_msg failed: %d\n",
- __func__, status);
- return status;
- }
-
- if (status != size) {
- dev_err(&serial->interface->dev,
- "%s - short write (%d / %zd)\n",
- __func__, status, size);
- return -EIO;
- }
-
- return 0;
-}
-
-/* Send a vendor request without any data */
-static int mxu1_send_ctrl_urb(struct usb_serial *serial,
- u8 request, u16 value, u16 index)
-{
- return mxu1_send_ctrl_data_urb(serial, request, value, index,
- NULL, 0);
-}
-
-static int mxu1_download_firmware(struct usb_serial *serial,
- const struct firmware *fw_p)
-{
- int status = 0;
- int buffer_size;
- int pos;
- int len;
- int done;
- u8 cs = 0;
- u8 *buffer;
- struct usb_device *dev = serial->dev;
- struct mxu1_firmware_header *header;
- unsigned int pipe;
-
- pipe = usb_sndbulkpipe(dev, serial->port[0]->bulk_out_endpointAddress);
-
- buffer_size = fw_p->size + sizeof(*header);
- buffer = kmalloc(buffer_size, GFP_KERNEL);
- if (!buffer)
- return -ENOMEM;
-
- memcpy(buffer, fw_p->data, fw_p->size);
- memset(buffer + fw_p->size, 0xff, buffer_size - fw_p->size);
-
- for (pos = sizeof(*header); pos < buffer_size; pos++)
- cs = (u8)(cs + buffer[pos]);
-
- header = (struct mxu1_firmware_header *)buffer;
- header->wLength = cpu_to_le16(buffer_size - sizeof(*header));
- header->bCheckSum = cs;
-
- dev_dbg(&dev->dev, "%s - downloading firmware\n", __func__);
-
- for (pos = 0; pos < buffer_size; pos += done) {
- len = min(buffer_size - pos, MXU1_DOWNLOAD_MAX_PACKET_SIZE);
-
- status = usb_bulk_msg(dev, pipe, buffer + pos, len, &done,
- MXU1_DOWNLOAD_TIMEOUT);
- if (status)
- break;
- }
-
- kfree(buffer);
-
- if (status) {
- dev_err(&dev->dev, "failed to download firmware: %d\n", status);
- return status;
- }
-
- msleep_interruptible(100);
- usb_reset_device(dev);
-
- dev_dbg(&dev->dev, "%s - download successful\n", __func__);
-
- return 0;
-}
-
-static int mxu1_port_probe(struct usb_serial_port *port)
-{
- struct mxu1_port *mxport;
- struct mxu1_device *mxdev;
-
- if (!port->interrupt_in_urb) {
- dev_err(&port->dev, "no interrupt urb\n");
- return -ENODEV;
- }
-
- mxport = kzalloc(sizeof(struct mxu1_port), GFP_KERNEL);
- if (!mxport)
- return -ENOMEM;
-
- spin_lock_init(&mxport->spinlock);
- mutex_init(&mxport->mutex);
-
- mxdev = usb_get_serial_data(port->serial);
-
- switch (mxdev->mxd_model) {
- case MXU1_1110_PRODUCT_ID:
- case MXU1_1150_PRODUCT_ID:
- case MXU1_1151_PRODUCT_ID:
- mxport->uart_mode = MXU1_UART_232;
- break;
- case MXU1_1130_PRODUCT_ID:
- case MXU1_1131_PRODUCT_ID:
- mxport->uart_mode = MXU1_UART_485_RECEIVER_DISABLED;
- break;
- }
-
- usb_set_serial_port_data(port, mxport);
-
- port->port.closing_wait =
- msecs_to_jiffies(MXU1_DEFAULT_CLOSING_WAIT * 10);
- port->port.drain_delay = 1;
-
- return 0;
-}
-
-static int mxu1_port_remove(struct usb_serial_port *port)
-{
- struct mxu1_port *mxport;
-
- mxport = usb_get_serial_port_data(port);
- kfree(mxport);
-
- return 0;
-}
-
-static int mxu1_startup(struct usb_serial *serial)
-{
- struct mxu1_device *mxdev;
- struct usb_device *dev = serial->dev;
- struct usb_host_interface *cur_altsetting;
- char fw_name[32];
- const struct firmware *fw_p = NULL;
- int err;
-
- dev_dbg(&serial->interface->dev, "%s - product 0x%04X, num configurations %d, configuration value %d\n",
- __func__, le16_to_cpu(dev->descriptor.idProduct),
- dev->descriptor.bNumConfigurations,
- dev->actconfig->desc.bConfigurationValue);
-
- /* create device structure */
- mxdev = kzalloc(sizeof(struct mxu1_device), GFP_KERNEL);
- if (!mxdev)
- return -ENOMEM;
-
- usb_set_serial_data(serial, mxdev);
-
- mxdev->mxd_model = le16_to_cpu(dev->descriptor.idProduct);
-
- cur_altsetting = serial->interface->cur_altsetting;
-
- /* if we have only 1 configuration, download firmware */
- if (cur_altsetting->desc.bNumEndpoints == 1) {
-
- snprintf(fw_name,
- sizeof(fw_name),
- "moxa/moxa-%04x.fw",
- mxdev->mxd_model);
-
- err = request_firmware(&fw_p, fw_name, &serial->interface->dev);
- if (err) {
- dev_err(&serial->interface->dev, "failed to request firmware: %d\n",
- err);
- goto err_free_mxdev;
- }
-
- err = mxu1_download_firmware(serial, fw_p);
- if (err)
- goto err_release_firmware;
-
- /* device is being reset */
- err = -ENODEV;
- goto err_release_firmware;
- }
-
- return 0;
-
-err_release_firmware:
- release_firmware(fw_p);
-err_free_mxdev:
- kfree(mxdev);
-
- return err;
-}
-
-static void mxu1_release(struct usb_serial *serial)
-{
- struct mxu1_device *mxdev;
-
- mxdev = usb_get_serial_data(serial);
- kfree(mxdev);
-}
-
-static int mxu1_write_byte(struct usb_serial_port *port, u32 addr,
- u8 mask, u8 byte)
-{
- int status;
- size_t size;
- struct mxu1_write_data_bytes *data;
-
- dev_dbg(&port->dev, "%s - addr 0x%08X, mask 0x%02X, byte 0x%02X\n",
- __func__, addr, mask, byte);
-
- size = sizeof(struct mxu1_write_data_bytes) + 2;
- data = kzalloc(size, GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- data->bAddrType = MXU1_RW_DATA_ADDR_XDATA;
- data->bDataType = MXU1_RW_DATA_BYTE;
- data->bDataCounter = 1;
- data->wBaseAddrHi = cpu_to_be16(addr >> 16);
- data->wBaseAddrLo = cpu_to_be16(addr);
- data->bData[0] = mask;
- data->bData[1] = byte;
-
- status = mxu1_send_ctrl_data_urb(port->serial, MXU1_WRITE_DATA, 0,
- MXU1_RAM_PORT, data, size);
- if (status < 0)
- dev_err(&port->dev, "%s - failed: %d\n", __func__, status);
-
- kfree(data);
-
- return status;
-}
-
-static int mxu1_set_mcr(struct usb_serial_port *port, unsigned int mcr)
-{
- int status;
-
- status = mxu1_write_byte(port,
- MXU1_UART_BASE_ADDR + MXU1_UART_OFFSET_MCR,
- MXU1_MCR_RTS | MXU1_MCR_DTR | MXU1_MCR_LOOP,
- mcr);
- return status;
-}
-
-static void mxu1_set_termios(struct tty_struct *tty,
- struct usb_serial_port *port,
- struct ktermios *old_termios)
-{
- struct mxu1_port *mxport = usb_get_serial_port_data(port);
- struct mxu1_uart_config *config;
- tcflag_t cflag, iflag;
- speed_t baud;
- int status;
- unsigned int mcr;
-
- cflag = tty->termios.c_cflag;
- iflag = tty->termios.c_iflag;
-
- if (old_termios &&
- !tty_termios_hw_change(&tty->termios, old_termios) &&
- tty->termios.c_iflag == old_termios->c_iflag) {
- dev_dbg(&port->dev, "%s - nothing to change\n", __func__);
- return;
- }
-
- dev_dbg(&port->dev,
- "%s - cflag 0x%08x, iflag 0x%08x\n", __func__, cflag, iflag);
-
- if (old_termios) {
- dev_dbg(&port->dev, "%s - old cflag 0x%08x, old iflag 0x%08x\n",
- __func__,
- old_termios->c_cflag,
- old_termios->c_iflag);
- }
-
- config = kzalloc(sizeof(*config), GFP_KERNEL);
- if (!config)
- return;
-
- /* these flags must be set */
- config->wFlags |= MXU1_UART_ENABLE_MS_INTS;
- config->wFlags |= MXU1_UART_ENABLE_AUTO_START_DMA;
- if (mxport->send_break)
- config->wFlags |= MXU1_UART_SEND_BREAK_SIGNAL;
- config->bUartMode = mxport->uart_mode;
-
- switch (C_CSIZE(tty)) {
- case CS5:
- config->bDataBits = MXU1_UART_5_DATA_BITS;
- break;
- case CS6:
- config->bDataBits = MXU1_UART_6_DATA_BITS;
- break;
- case CS7:
- config->bDataBits = MXU1_UART_7_DATA_BITS;
- break;
- default:
- case CS8:
- config->bDataBits = MXU1_UART_8_DATA_BITS;
- break;
- }
-
- if (C_PARENB(tty)) {
- config->wFlags |= MXU1_UART_ENABLE_PARITY_CHECKING;
- if (C_CMSPAR(tty)) {
- if (C_PARODD(tty))
- config->bParity = MXU1_UART_MARK_PARITY;
- else
- config->bParity = MXU1_UART_SPACE_PARITY;
- } else {
- if (C_PARODD(tty))
- config->bParity = MXU1_UART_ODD_PARITY;
- else
- config->bParity = MXU1_UART_EVEN_PARITY;
- }
- } else {
- config->bParity = MXU1_UART_NO_PARITY;
- }
-
- if (C_CSTOPB(tty))
- config->bStopBits = MXU1_UART_2_STOP_BITS;
- else
- config->bStopBits = MXU1_UART_1_STOP_BITS;
-
- if (C_CRTSCTS(tty)) {
- /* RTS flow control must be off to drop RTS for baud rate B0 */
- if (C_BAUD(tty) != B0)
- config->wFlags |= MXU1_UART_ENABLE_RTS_IN;
- config->wFlags |= MXU1_UART_ENABLE_CTS_OUT;
- }
-
- if (I_IXOFF(tty) || I_IXON(tty)) {
- config->cXon = START_CHAR(tty);
- config->cXoff = STOP_CHAR(tty);
-
- if (I_IXOFF(tty))
- config->wFlags |= MXU1_UART_ENABLE_X_IN;
-
- if (I_IXON(tty))
- config->wFlags |= MXU1_UART_ENABLE_X_OUT;
- }
-
- baud = tty_get_baud_rate(tty);
- if (!baud)
- baud = 9600;
- config->wBaudRate = MXU1_BAUD_BASE / baud;
-
- dev_dbg(&port->dev, "%s - BaudRate=%d, wBaudRate=%d, wFlags=0x%04X, bDataBits=%d, bParity=%d, bStopBits=%d, cXon=%d, cXoff=%d, bUartMode=%d\n",
- __func__, baud, config->wBaudRate, config->wFlags,
- config->bDataBits, config->bParity, config->bStopBits,
- config->cXon, config->cXoff, config->bUartMode);
-
- cpu_to_be16s(&config->wBaudRate);
- cpu_to_be16s(&config->wFlags);
-
- status = mxu1_send_ctrl_data_urb(port->serial, MXU1_SET_CONFIG, 0,
- MXU1_UART1_PORT, config,
- sizeof(*config));
- if (status)
- dev_err(&port->dev, "cannot set config: %d\n", status);
-
- mutex_lock(&mxport->mutex);
- mcr = mxport->mcr;
-
- if (C_BAUD(tty) == B0)
- mcr &= ~(MXU1_MCR_DTR | MXU1_MCR_RTS);
- else if (old_termios && (old_termios->c_cflag & CBAUD) == B0)
- mcr |= MXU1_MCR_DTR | MXU1_MCR_RTS;
-
- status = mxu1_set_mcr(port, mcr);
- if (status)
- dev_err(&port->dev, "cannot set modem control: %d\n", status);
- else
- mxport->mcr = mcr;
-
- mutex_unlock(&mxport->mutex);
-
- kfree(config);
-}
-
-static int mxu1_get_serial_info(struct usb_serial_port *port,
- struct serial_struct __user *ret_arg)
-{
- struct serial_struct ret_serial;
- unsigned cwait;
-
- if (!ret_arg)
- return -EFAULT;
-
- cwait = port->port.closing_wait;
- if (cwait != ASYNC_CLOSING_WAIT_NONE)
- cwait = jiffies_to_msecs(cwait) / 10;
-
- memset(&ret_serial, 0, sizeof(ret_serial));
-
- ret_serial.type = PORT_16550A;
- ret_serial.line = port->minor;
- ret_serial.port = 0;
- ret_serial.xmit_fifo_size = port->bulk_out_size;
- ret_serial.baud_base = MXU1_BAUD_BASE;
- ret_serial.close_delay = 5*HZ;
- ret_serial.closing_wait = cwait;
-
- if (copy_to_user(ret_arg, &ret_serial, sizeof(*ret_arg)))
- return -EFAULT;
-
- return 0;
-}
-
-
-static int mxu1_set_serial_info(struct usb_serial_port *port,
- struct serial_struct __user *new_arg)
-{
- struct serial_struct new_serial;
- unsigned cwait;
-
- if (copy_from_user(&new_serial, new_arg, sizeof(new_serial)))
- return -EFAULT;
-
- cwait = new_serial.closing_wait;
- if (cwait != ASYNC_CLOSING_WAIT_NONE)
- cwait = msecs_to_jiffies(10 * new_serial.closing_wait);
-
- port->port.closing_wait = cwait;
-
- return 0;
-}
-
-static int mxu1_ioctl(struct tty_struct *tty,
- unsigned int cmd, unsigned long arg)
-{
- struct usb_serial_port *port = tty->driver_data;
-
- switch (cmd) {
- case TIOCGSERIAL:
- return mxu1_get_serial_info(port,
- (struct serial_struct __user *)arg);
- case TIOCSSERIAL:
- return mxu1_set_serial_info(port,
- (struct serial_struct __user *)arg);
- }
-
- return -ENOIOCTLCMD;
-}
-
-static int mxu1_tiocmget(struct tty_struct *tty)
-{
- struct usb_serial_port *port = tty->driver_data;
- struct mxu1_port *mxport = usb_get_serial_port_data(port);
- unsigned int result;
- unsigned int msr;
- unsigned int mcr;
- unsigned long flags;
-
- mutex_lock(&mxport->mutex);
- spin_lock_irqsave(&mxport->spinlock, flags);
-
- msr = mxport->msr;
- mcr = mxport->mcr;
-
- spin_unlock_irqrestore(&mxport->spinlock, flags);
- mutex_unlock(&mxport->mutex);
-
- result = ((mcr & MXU1_MCR_DTR) ? TIOCM_DTR : 0) |
- ((mcr & MXU1_MCR_RTS) ? TIOCM_RTS : 0) |
- ((mcr & MXU1_MCR_LOOP) ? TIOCM_LOOP : 0) |
- ((msr & MXU1_MSR_CTS) ? TIOCM_CTS : 0) |
- ((msr & MXU1_MSR_CD) ? TIOCM_CAR : 0) |
- ((msr & MXU1_MSR_RI) ? TIOCM_RI : 0) |
- ((msr & MXU1_MSR_DSR) ? TIOCM_DSR : 0);
-
- dev_dbg(&port->dev, "%s - 0x%04X\n", __func__, result);
-
- return result;
-}
-
-static int mxu1_tiocmset(struct tty_struct *tty,
- unsigned int set, unsigned int clear)
-{
- struct usb_serial_port *port = tty->driver_data;
- struct mxu1_port *mxport = usb_get_serial_port_data(port);
- int err;
- unsigned int mcr;
-
- mutex_lock(&mxport->mutex);
- mcr = mxport->mcr;
-
- if (set & TIOCM_RTS)
- mcr |= MXU1_MCR_RTS;
- if (set & TIOCM_DTR)
- mcr |= MXU1_MCR_DTR;
- if (set & TIOCM_LOOP)
- mcr |= MXU1_MCR_LOOP;
-
- if (clear & TIOCM_RTS)
- mcr &= ~MXU1_MCR_RTS;
- if (clear & TIOCM_DTR)
- mcr &= ~MXU1_MCR_DTR;
- if (clear & TIOCM_LOOP)
- mcr &= ~MXU1_MCR_LOOP;
-
- err = mxu1_set_mcr(port, mcr);
- if (!err)
- mxport->mcr = mcr;
-
- mutex_unlock(&mxport->mutex);
-
- return err;
-}
-
-static void mxu1_break(struct tty_struct *tty, int break_state)
-{
- struct usb_serial_port *port = tty->driver_data;
- struct mxu1_port *mxport = usb_get_serial_port_data(port);
-
- if (break_state == -1)
- mxport->send_break = true;
- else
- mxport->send_break = false;
-
- mxu1_set_termios(tty, port, NULL);
-}
-
-static int mxu1_open(struct tty_struct *tty, struct usb_serial_port *port)
-{
- struct mxu1_port *mxport = usb_get_serial_port_data(port);
- struct usb_serial *serial = port->serial;
- int status;
- u16 open_settings;
-
- open_settings = (MXU1_PIPE_MODE_CONTINUOUS |
- MXU1_PIPE_TIMEOUT_ENABLE |
- (MXU1_TRANSFER_TIMEOUT << 2));
-
- mxport->msr = 0;
-
- status = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
- if (status) {
- dev_err(&port->dev, "failed to submit interrupt urb: %d\n",
- status);
- return status;
- }
-
- if (tty)
- mxu1_set_termios(tty, port, NULL);
-
- status = mxu1_send_ctrl_urb(serial, MXU1_OPEN_PORT,
- open_settings, MXU1_UART1_PORT);
- if (status) {
- dev_err(&port->dev, "cannot send open command: %d\n", status);
- goto unlink_int_urb;
- }
-
- status = mxu1_send_ctrl_urb(serial, MXU1_START_PORT,
- 0, MXU1_UART1_PORT);
- if (status) {
- dev_err(&port->dev, "cannot send start command: %d\n", status);
- goto unlink_int_urb;
- }
-
- status = mxu1_send_ctrl_urb(serial, MXU1_PURGE_PORT,
- MXU1_PURGE_INPUT, MXU1_UART1_PORT);
- if (status) {
- dev_err(&port->dev, "cannot clear input buffers: %d\n",
- status);
-
- goto unlink_int_urb;
- }
-
- status = mxu1_send_ctrl_urb(serial, MXU1_PURGE_PORT,
- MXU1_PURGE_OUTPUT, MXU1_UART1_PORT);
- if (status) {
- dev_err(&port->dev, "cannot clear output buffers: %d\n",
- status);
-
- goto unlink_int_urb;
- }
-
- /*
- * reset the data toggle on the bulk endpoints to work around bug in
- * host controllers where things get out of sync some times
- */
- usb_clear_halt(serial->dev, port->write_urb->pipe);
- usb_clear_halt(serial->dev, port->read_urb->pipe);
-
- if (tty)
- mxu1_set_termios(tty, port, NULL);
-
- status = mxu1_send_ctrl_urb(serial, MXU1_OPEN_PORT,
- open_settings, MXU1_UART1_PORT);
- if (status) {
- dev_err(&port->dev, "cannot send open command: %d\n", status);
- goto unlink_int_urb;
- }
-
- status = mxu1_send_ctrl_urb(serial, MXU1_START_PORT,
- 0, MXU1_UART1_PORT);
- if (status) {
- dev_err(&port->dev, "cannot send start command: %d\n", status);
- goto unlink_int_urb;
- }
-
- status = usb_serial_generic_open(tty, port);
- if (status)
- goto unlink_int_urb;
-
- return 0;
-
-unlink_int_urb:
- usb_kill_urb(port->interrupt_in_urb);
-
- return status;
-}
-
-static void mxu1_close(struct usb_serial_port *port)
-{
- int status;
-
- usb_serial_generic_close(port);
- usb_kill_urb(port->interrupt_in_urb);
-
- status = mxu1_send_ctrl_urb(port->serial, MXU1_CLOSE_PORT,
- 0, MXU1_UART1_PORT);
- if (status) {
- dev_err(&port->dev, "failed to send close port command: %d\n",
- status);
- }
-}
-
-static void mxu1_handle_new_msr(struct usb_serial_port *port, u8 msr)
-{
- struct mxu1_port *mxport = usb_get_serial_port_data(port);
- struct async_icount *icount;
- unsigned long flags;
-
- dev_dbg(&port->dev, "%s - msr 0x%02X\n", __func__, msr);
-
- spin_lock_irqsave(&mxport->spinlock, flags);
- mxport->msr = msr & MXU1_MSR_MASK;
- spin_unlock_irqrestore(&mxport->spinlock, flags);
-
- if (msr & MXU1_MSR_DELTA_MASK) {
- icount = &port->icount;
- if (msr & MXU1_MSR_DELTA_CTS)
- icount->cts++;
- if (msr & MXU1_MSR_DELTA_DSR)
- icount->dsr++;
- if (msr & MXU1_MSR_DELTA_CD)
- icount->dcd++;
- if (msr & MXU1_MSR_DELTA_RI)
- icount->rng++;
-
- wake_up_interruptible(&port->port.delta_msr_wait);
- }
-}
-
-static void mxu1_interrupt_callback(struct urb *urb)
-{
- struct usb_serial_port *port = urb->context;
- unsigned char *data = urb->transfer_buffer;
- int length = urb->actual_length;
- int function;
- int status;
- u8 msr;
-
- switch (urb->status) {
- case 0:
- break;
- case -ECONNRESET:
- case -ENOENT:
- case -ESHUTDOWN:
- dev_dbg(&port->dev, "%s - urb shutting down: %d\n",
- __func__, urb->status);
- return;
- default:
- dev_dbg(&port->dev, "%s - nonzero urb status: %d\n",
- __func__, urb->status);
- goto exit;
- }
-
- if (length != 2) {
- dev_dbg(&port->dev, "%s - bad packet size: %d\n",
- __func__, length);
- goto exit;
- }
-
- if (data[0] == MXU1_CODE_HARDWARE_ERROR) {
- dev_err(&port->dev, "hardware error: %d\n", data[1]);
- goto exit;
- }
-
- function = mxu1_get_func_from_code(data[0]);
-
- dev_dbg(&port->dev, "%s - function %d, data 0x%02X\n",
- __func__, function, data[1]);
-
- switch (function) {
- case MXU1_CODE_DATA_ERROR:
- dev_dbg(&port->dev, "%s - DATA ERROR, data 0x%02X\n",
- __func__, data[1]);
- break;
-
- case MXU1_CODE_MODEM_STATUS:
- msr = data[1];
- mxu1_handle_new_msr(port, msr);
- break;
-
- default:
- dev_err(&port->dev, "unknown interrupt code: 0x%02X\n",
- data[1]);
- break;
- }
-
-exit:
- status = usb_submit_urb(urb, GFP_ATOMIC);
- if (status) {
- dev_err(&port->dev, "resubmit interrupt urb failed: %d\n",
- status);
- }
-}
-
-static struct usb_serial_driver mxu11x0_device = {
- .driver = {
- .owner = THIS_MODULE,
- .name = "mxu11x0",
- },
- .description = "MOXA UPort 11x0",
- .id_table = mxu1_idtable,
- .num_ports = 1,
- .port_probe = mxu1_port_probe,
- .port_remove = mxu1_port_remove,
- .attach = mxu1_startup,
- .release = mxu1_release,
- .open = mxu1_open,
- .close = mxu1_close,
- .ioctl = mxu1_ioctl,
- .set_termios = mxu1_set_termios,
- .tiocmget = mxu1_tiocmget,
- .tiocmset = mxu1_tiocmset,
- .tiocmiwait = usb_serial_generic_tiocmiwait,
- .get_icount = usb_serial_generic_get_icount,
- .break_ctl = mxu1_break,
- .read_int_callback = mxu1_interrupt_callback,
-};
-
-static struct usb_serial_driver *const serial_drivers[] = {
- &mxu11x0_device, NULL
-};
-
-module_usb_serial_driver(serial_drivers, mxu1_idtable);
-
-MODULE_AUTHOR("Mathieu Othacehe <m.othacehe@gmail.com>");
-MODULE_DESCRIPTION("MOXA UPort 11x0 USB to Serial Hub Driver");
-MODULE_LICENSE("GPL");
-MODULE_FIRMWARE("moxa/moxa-1110.fw");
-MODULE_FIRMWARE("moxa/moxa-1130.fw");
-MODULE_FIRMWARE("moxa/moxa-1131.fw");
-MODULE_FIRMWARE("moxa/moxa-1150.fw");
-MODULE_FIRMWARE("moxa/moxa-1151.fw");
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index db86e512e0fc..348e19834b83 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -270,6 +270,7 @@ static void option_instat_callback(struct urb *urb);
#define TELIT_PRODUCT_UE910_V2 0x1012
#define TELIT_PRODUCT_LE922_USBCFG0 0x1042
#define TELIT_PRODUCT_LE922_USBCFG3 0x1043
+#define TELIT_PRODUCT_LE922_USBCFG5 0x1045
#define TELIT_PRODUCT_LE920 0x1200
#define TELIT_PRODUCT_LE910 0x1201
@@ -315,6 +316,7 @@ static void option_instat_callback(struct urb *urb);
#define TOSHIBA_PRODUCT_G450 0x0d45
#define ALINK_VENDOR_ID 0x1e0e
+#define SIMCOM_PRODUCT_SIM7100E 0x9001 /* Yes, ALINK_VENDOR_ID */
#define ALINK_PRODUCT_PH300 0x9100
#define ALINK_PRODUCT_3GU 0x9200
@@ -607,6 +609,10 @@ static const struct option_blacklist_info zte_1255_blacklist = {
.reserved = BIT(3) | BIT(4),
};
+static const struct option_blacklist_info simcom_sim7100e_blacklist = {
+ .reserved = BIT(5) | BIT(6),
+};
+
static const struct option_blacklist_info telit_le910_blacklist = {
.sendsetup = BIT(0),
.reserved = BIT(1) | BIT(2),
@@ -1122,9 +1128,13 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) },
{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) },
{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6000)}, /* ZTE AC8700 */
+ { USB_DEVICE_AND_INTERFACE_INFO(QUALCOMM_VENDOR_ID, 0x6001, 0xff, 0xff, 0xff), /* 4G LTE usb-modem U901 */
+ .driver_info = (kernel_ulong_t)&net_intf3_blacklist },
{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
+ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9003), /* Quectel UC20 */
+ .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003),
@@ -1176,6 +1186,8 @@ static const struct usb_device_id option_ids[] = {
.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG3),
.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
+ { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE922_USBCFG5, 0xff),
+ .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg0 },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910),
.driver_info = (kernel_ulong_t)&telit_le910_blacklist },
{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
@@ -1645,6 +1657,8 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(ALINK_VENDOR_ID, 0x9000) },
{ USB_DEVICE(ALINK_VENDOR_ID, ALINK_PRODUCT_PH300) },
{ USB_DEVICE_AND_INTERFACE_INFO(ALINK_VENDOR_ID, ALINK_PRODUCT_3GU, 0xff, 0xff, 0xff) },
+ { USB_DEVICE(ALINK_VENDOR_ID, SIMCOM_PRODUCT_SIM7100E),
+ .driver_info = (kernel_ulong_t)&simcom_sim7100e_blacklist },
{ USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
.driver_info = (kernel_ulong_t)&alcatel_x200_blacklist
},
diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 9919d2a9faf2..1bc6089b9008 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -157,14 +157,17 @@ static const struct usb_device_id id_table[] = {
{DEVICE_SWI(0x1199, 0x9056)}, /* Sierra Wireless Modem */
{DEVICE_SWI(0x1199, 0x9060)}, /* Sierra Wireless Modem */
{DEVICE_SWI(0x1199, 0x9061)}, /* Sierra Wireless Modem */
- {DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx/EM74xx */
- {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx/EM74xx */
+ {DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx */
+ {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx */
+ {DEVICE_SWI(0x1199, 0x9078)}, /* Sierra Wireless EM74xx */
+ {DEVICE_SWI(0x1199, 0x9079)}, /* Sierra Wireless EM74xx */
{DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81a8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81a9)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */
{DEVICE_SWI(0x413c, 0x81b1)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */
+ {DEVICE_SWI(0x413c, 0x81b3)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */
/* Huawei devices */
{DEVICE_HWI(0x03f0, 0x581d)}, /* HP lt4112 LTE/HSPA+ Gobi 4G Modem (Huawei me906e) */
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 2760a7ba3f30..8c80a48e3233 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -446,7 +446,8 @@ static long vfio_pci_ioctl(void *device_data,
info.num_regions = VFIO_PCI_NUM_REGIONS;
info.num_irqs = VFIO_PCI_NUM_IRQS;
- return copy_to_user((void __user *)arg, &info, minsz);
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
struct pci_dev *pdev = vdev->pdev;
@@ -520,7 +521,8 @@ static long vfio_pci_ioctl(void *device_data,
return -EINVAL;
}
- return copy_to_user((void __user *)arg, &info, minsz);
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
struct vfio_irq_info info;
@@ -555,7 +557,8 @@ static long vfio_pci_ioctl(void *device_data,
else
info.flags |= VFIO_IRQ_INFO_NORESIZE;
- return copy_to_user((void __user *)arg, &info, minsz);
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
} else if (cmd == VFIO_DEVICE_SET_IRQS) {
struct vfio_irq_set hdr;
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index 418cdd9ba3f4..e65b142d3422 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -219,7 +219,8 @@ static long vfio_platform_ioctl(void *device_data,
info.num_regions = vdev->num_regions;
info.num_irqs = vdev->num_irqs;
- return copy_to_user((void __user *)arg, &info, minsz);
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
struct vfio_region_info info;
@@ -240,7 +241,8 @@ static long vfio_platform_ioctl(void *device_data,
info.size = vdev->regions[info.index].size;
info.flags = vdev->regions[info.index].flags;
- return copy_to_user((void __user *)arg, &info, minsz);
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
struct vfio_irq_info info;
@@ -259,7 +261,8 @@ static long vfio_platform_ioctl(void *device_data,
info.flags = vdev->irqs[info.index].flags;
info.count = vdev->irqs[info.index].count;
- return copy_to_user((void __user *)arg, &info, minsz);
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
} else if (cmd == VFIO_DEVICE_SET_IRQS) {
struct vfio_irq_set hdr;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 6f1ea3dddbad..75b24e93cedb 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -999,7 +999,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
info.iova_pgsizes = vfio_pgsize_bitmap(iommu);
- return copy_to_user((void __user *)arg, &info, minsz);
+ return copy_to_user((void __user *)arg, &info, minsz) ?
+ -EFAULT : 0;
} else if (cmd == VFIO_IOMMU_MAP_DMA) {
struct vfio_iommu_type1_dma_map map;
@@ -1032,7 +1033,8 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
if (ret)
return ret;
- return copy_to_user((void __user *)arg, &unmap, minsz);
+ return copy_to_user((void __user *)arg, &unmap, minsz) ?
+ -EFAULT : 0;
}
return -ENOTTY;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index ad2146a9ab2d..236553e81027 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1156,6 +1156,8 @@ int vhost_init_used(struct vhost_virtqueue *vq)
{
__virtio16 last_used_idx;
int r;
+ bool is_le = vq->is_le;
+
if (!vq->private_data) {
vq->is_le = virtio_legacy_is_little_endian();
return 0;
@@ -1165,15 +1167,20 @@ int vhost_init_used(struct vhost_virtqueue *vq)
r = vhost_update_used_flags(vq);
if (r)
- return r;
+ goto err;
vq->signalled_used_valid = false;
- if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx))
- return -EFAULT;
+ if (!access_ok(VERIFY_READ, &vq->used->idx, sizeof vq->used->idx)) {
+ r = -EFAULT;
+ goto err;
+ }
r = __get_user(last_used_idx, &vq->used->idx);
if (r)
- return r;
+ goto err;
vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx);
return 0;
+err:
+ vq->is_le = is_le;
+ return r;
}
EXPORT_SYMBOL_GPL(vhost_init_used);
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 92f394927f24..6e92917ba77a 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -709,6 +709,7 @@ static int con2fb_acquire_newinfo(struct vc_data *vc, struct fb_info *info,
}
if (!err) {
+ ops->cur_blink_jiffies = HZ / 5;
info->fbcon_par = ops;
if (vc)
@@ -956,6 +957,7 @@ static const char *fbcon_startup(void)
ops->currcon = -1;
ops->graphics = 1;
ops->cur_rotate = -1;
+ ops->cur_blink_jiffies = HZ / 5;
info->fbcon_par = ops;
p->con_rotate = initial_rotation;
set_blitting_type(vc, info);
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c
index c0c11fad4611..7760fc1a2218 100644
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -679,7 +679,7 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
- cap.length),
+ cap.offset),
&notify_offset);
/* We don't know how many VQs we'll map, ahead of the time.
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 0f6d8515ba4f..80825a7e8e48 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1569,6 +1569,17 @@ config WATCHDOG_RIO
machines. The watchdog timeout period is normally one minute but
can be changed with a boot-time parameter.
+config WATCHDOG_SUN4V
+ tristate "Sun4v Watchdog support"
+ select WATCHDOG_CORE
+ depends on SPARC64
+ help
+ Say Y here to support the hypervisor watchdog capability embedded
+ in the SPARC sun4v architecture.
+
+ To compile this driver as a module, choose M here. The module will
+ be called sun4v_wdt.
+
# XTENSA Architecture
# Xen Architecture
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index f566753256ab..f6a6a387c6c7 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -179,6 +179,7 @@ obj-$(CONFIG_SH_WDT) += shwdt.o
obj-$(CONFIG_WATCHDOG_RIO) += riowd.o
obj-$(CONFIG_WATCHDOG_CP1XXX) += cpwd.o
+obj-$(CONFIG_WATCHDOG_SUN4V) += sun4v_wdt.o
# XTENSA Architecture
diff --git a/drivers/watchdog/sun4v_wdt.c b/drivers/watchdog/sun4v_wdt.c
new file mode 100644
index 000000000000..1467fe50a76f
--- /dev/null
+++ b/drivers/watchdog/sun4v_wdt.c
@@ -0,0 +1,191 @@
+/*
+ * sun4v watchdog timer
+ * (c) Copyright 2016 Oracle Corporation
+ *
+ * Implement a simple watchdog driver using the built-in sun4v hypervisor
+ * watchdog support. If time expires, the hypervisor stops or bounces
+ * the guest domain.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/watchdog.h>
+#include <asm/hypervisor.h>
+#include <asm/mdesc.h>
+
+#define WDT_TIMEOUT 60
+#define WDT_MAX_TIMEOUT 31536000
+#define WDT_MIN_TIMEOUT 1
+#define WDT_DEFAULT_RESOLUTION_MS 1000 /* 1 second */
+
+static unsigned int timeout;
+module_param(timeout, uint, 0);
+MODULE_PARM_DESC(timeout, "Watchdog timeout in seconds (default="
+ __MODULE_STRING(WDT_TIMEOUT) ")");
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, S_IRUGO);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+ __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int sun4v_wdt_stop(struct watchdog_device *wdd)
+{
+ sun4v_mach_set_watchdog(0, NULL);
+
+ return 0;
+}
+
+static int sun4v_wdt_ping(struct watchdog_device *wdd)
+{
+ int hverr;
+
+ /*
+ * HV watchdog timer will round up the timeout
+ * passed in to the nearest multiple of the
+ * watchdog resolution in milliseconds.
+ */
+ hverr = sun4v_mach_set_watchdog(wdd->timeout * 1000, NULL);
+ if (hverr == HV_EINVAL)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int sun4v_wdt_set_timeout(struct watchdog_device *wdd,
+ unsigned int timeout)
+{
+ wdd->timeout = timeout;
+
+ return 0;
+}
+
+static const struct watchdog_info sun4v_wdt_ident = {
+ .options = WDIOF_SETTIMEOUT |
+ WDIOF_MAGICCLOSE |
+ WDIOF_KEEPALIVEPING,
+ .identity = "sun4v hypervisor watchdog",
+ .firmware_version = 0,
+};
+
+static struct watchdog_ops sun4v_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = sun4v_wdt_ping,
+ .stop = sun4v_wdt_stop,
+ .ping = sun4v_wdt_ping,
+ .set_timeout = sun4v_wdt_set_timeout,
+};
+
+static struct watchdog_device wdd = {
+ .info = &sun4v_wdt_ident,
+ .ops = &sun4v_wdt_ops,
+ .min_timeout = WDT_MIN_TIMEOUT,
+ .max_timeout = WDT_MAX_TIMEOUT,
+ .timeout = WDT_TIMEOUT,
+};
+
+static int __init sun4v_wdt_init(void)
+{
+ struct mdesc_handle *handle;
+ u64 node;
+ const u64 *value;
+ int err = 0;
+ unsigned long major = 1, minor = 1;
+
+ /*
+ * There are 2 properties that can be set from the control
+ * domain for the watchdog.
+ * watchdog-resolution
+ * watchdog-max-timeout
+ *
+ * We can expect a handle to be returned otherwise something
+ * serious is wrong. Correct to return -ENODEV here.
+ */
+
+ handle = mdesc_grab();
+ if (!handle)
+ return -ENODEV;
+
+ node = mdesc_node_by_name(handle, MDESC_NODE_NULL, "platform");
+ err = -ENODEV;
+ if (node == MDESC_NODE_NULL)
+ goto out_release;
+
+ /*
+ * This is a safe way to validate if we are on the right
+ * platform.
+ */
+ if (sun4v_hvapi_register(HV_GRP_CORE, major, &minor))
+ goto out_hv_unreg;
+
+ /* Allow value of watchdog-resolution up to 1s (default) */
+ value = mdesc_get_property(handle, node, "watchdog-resolution", NULL);
+ err = -EINVAL;
+ if (value) {
+ if (*value == 0 ||
+ *value > WDT_DEFAULT_RESOLUTION_MS)
+ goto out_hv_unreg;
+ }
+
+ value = mdesc_get_property(handle, node, "watchdog-max-timeout", NULL);
+ if (value) {
+ /*
+ * If the property value (in ms) is smaller than
+ * min_timeout, return -EINVAL.
+ */
+ if (*value < wdd.min_timeout * 1000)
+ goto out_hv_unreg;
+
+ /*
+ * If the property value is smaller than
+ * default max_timeout then set watchdog max_timeout to
+ * the value of the property in seconds.
+ */
+ if (*value < wdd.max_timeout * 1000)
+ wdd.max_timeout = *value / 1000;
+ }
+
+ watchdog_init_timeout(&wdd, timeout, NULL);
+
+ watchdog_set_nowayout(&wdd, nowayout);
+
+ err = watchdog_register_device(&wdd);
+ if (err)
+ goto out_hv_unreg;
+
+ pr_info("initialized (timeout=%ds, nowayout=%d)\n",
+ wdd.timeout, nowayout);
+
+ mdesc_release(handle);
+
+ return 0;
+
+out_hv_unreg:
+ sun4v_hvapi_unregister(HV_GRP_CORE);
+
+out_release:
+ mdesc_release(handle);
+ return err;
+}
+
+static void __exit sun4v_wdt_exit(void)
+{
+ sun4v_hvapi_unregister(HV_GRP_CORE);
+ watchdog_unregister_device(&wdd);
+}
+
+module_init(sun4v_wdt_init);
+module_exit(sun4v_wdt_exit);
+
+MODULE_AUTHOR("Wim Coekaerts <wim.coekaerts@oracle.com>");
+MODULE_DESCRIPTION("sun4v watchdog driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 73dafdc494aa..fb0221434f81 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -227,8 +227,9 @@ int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
/*
* PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able
* to access the BARs where the MSI-X entries reside.
+ * But VF devices are unique in which the PF needs to be checked.
*/
- pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ pci_read_config_word(pci_physfn(dev), PCI_COMMAND, &cmd);
if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY))
return -ENXIO;
@@ -332,6 +333,9 @@ void xen_pcibk_do_op(struct work_struct *data)
struct xen_pcibk_dev_data *dev_data = NULL;
struct xen_pci_op *op = &pdev->op;
int test_intx = 0;
+#ifdef CONFIG_PCI_MSI
+ unsigned int nr = 0;
+#endif
*op = pdev->sh_info->op;
barrier();
@@ -360,6 +364,7 @@ void xen_pcibk_do_op(struct work_struct *data)
op->err = xen_pcibk_disable_msi(pdev, dev, op);
break;
case XEN_PCI_OP_enable_msix:
+ nr = op->value;
op->err = xen_pcibk_enable_msix(pdev, dev, op);
break;
case XEN_PCI_OP_disable_msix:
@@ -382,7 +387,7 @@ void xen_pcibk_do_op(struct work_struct *data)
if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) {
unsigned int i;
- for (i = 0; i < op->value; i++)
+ for (i = 0; i < nr; i++)
pdev->sh_info->op.msix_entries[i].vector =
op->msix_entries[i].vector;
}
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index ad4eb1024d1f..c46ee189466f 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -849,15 +849,31 @@ static int scsiback_map(struct vscsibk_info *info)
}
/*
+ Check for a translation entry being present
+*/
+static struct v2p_entry *scsiback_chk_translation_entry(
+ struct vscsibk_info *info, struct ids_tuple *v)
+{
+ struct list_head *head = &(info->v2p_entry_lists);
+ struct v2p_entry *entry;
+
+ list_for_each_entry(entry, head, l)
+ if ((entry->v.chn == v->chn) &&
+ (entry->v.tgt == v->tgt) &&
+ (entry->v.lun == v->lun))
+ return entry;
+
+ return NULL;
+}
+
+/*
Add a new translation entry
*/
static int scsiback_add_translation_entry(struct vscsibk_info *info,
char *phy, struct ids_tuple *v)
{
int err = 0;
- struct v2p_entry *entry;
struct v2p_entry *new;
- struct list_head *head = &(info->v2p_entry_lists);
unsigned long flags;
char *lunp;
unsigned long long unpacked_lun;
@@ -917,15 +933,10 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info,
spin_lock_irqsave(&info->v2p_lock, flags);
/* Check double assignment to identical virtual ID */
- list_for_each_entry(entry, head, l) {
- if ((entry->v.chn == v->chn) &&
- (entry->v.tgt == v->tgt) &&
- (entry->v.lun == v->lun)) {
- pr_warn("Virtual ID is already used. Assignment was not performed.\n");
- err = -EEXIST;
- goto out;
- }
-
+ if (scsiback_chk_translation_entry(info, v)) {
+ pr_warn("Virtual ID is already used. Assignment was not performed.\n");
+ err = -EEXIST;
+ goto out;
}
/* Create a new translation entry and add to the list */
@@ -933,18 +944,18 @@ static int scsiback_add_translation_entry(struct vscsibk_info *info,
new->v = *v;
new->tpg = tpg;
new->lun = unpacked_lun;
- list_add_tail(&new->l, head);
+ list_add_tail(&new->l, &info->v2p_entry_lists);
out:
spin_unlock_irqrestore(&info->v2p_lock, flags);
out_free:
- mutex_lock(&tpg->tv_tpg_mutex);
- tpg->tv_tpg_fe_count--;
- mutex_unlock(&tpg->tv_tpg_mutex);
-
- if (err)
+ if (err) {
+ mutex_lock(&tpg->tv_tpg_mutex);
+ tpg->tv_tpg_fe_count--;
+ mutex_unlock(&tpg->tv_tpg_mutex);
kfree(new);
+ }
return err;
}
@@ -956,39 +967,40 @@ static void __scsiback_del_translation_entry(struct v2p_entry *entry)
}
/*
- Delete the translation entry specfied
+ Delete the translation entry specified
*/
static int scsiback_del_translation_entry(struct vscsibk_info *info,
struct ids_tuple *v)
{
struct v2p_entry *entry;
- struct list_head *head = &(info->v2p_entry_lists);
unsigned long flags;
+ int ret = 0;
spin_lock_irqsave(&info->v2p_lock, flags);
/* Find out the translation entry specified */
- list_for_each_entry(entry, head, l) {
- if ((entry->v.chn == v->chn) &&
- (entry->v.tgt == v->tgt) &&
- (entry->v.lun == v->lun)) {
- goto found;
- }
- }
-
- spin_unlock_irqrestore(&info->v2p_lock, flags);
- return 1;
-
-found:
- /* Delete the translation entry specfied */
- __scsiback_del_translation_entry(entry);
+ entry = scsiback_chk_translation_entry(info, v);
+ if (entry)
+ __scsiback_del_translation_entry(entry);
+ else
+ ret = -ENOENT;
spin_unlock_irqrestore(&info->v2p_lock, flags);
- return 0;
+ return ret;
}
static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
char *phy, struct ids_tuple *vir, int try)
{
+ struct v2p_entry *entry;
+ unsigned long flags;
+
+ if (try) {
+ spin_lock_irqsave(&info->v2p_lock, flags);
+ entry = scsiback_chk_translation_entry(info, vir);
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
+ if (entry)
+ return;
+ }
if (!scsiback_add_translation_entry(info, phy, vir)) {
if (xenbus_printf(XBT_NIL, info->dev->nodename, state,
"%d", XenbusStateInitialised)) {
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 9433e46518c8..912b64edb42b 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -188,6 +188,8 @@ static int queue_reply(struct list_head *queue, const void *data, size_t len)
if (len == 0)
return 0;
+ if (len > XENSTORE_PAYLOAD_MAX)
+ return -EINVAL;
rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
if (rb == NULL)
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0548c53f41d5..22fc7c802d69 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -511,8 +511,6 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
page->index, to);
BUG_ON(to > PAGE_CACHE_SIZE);
- kmap(page);
- data = page_address(page);
bsize = AFFS_SB(sb)->s_data_blksize;
tmp = page->index << PAGE_CACHE_SHIFT;
bidx = tmp / bsize;
@@ -524,14 +522,15 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
return PTR_ERR(bh);
tmp = min(bsize - boff, to - pos);
BUG_ON(pos + tmp > to || tmp > bsize);
+ data = kmap_atomic(page);
memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
+ kunmap_atomic(data);
affs_brelse(bh);
bidx++;
pos += tmp;
boff = 0;
}
flush_dcache_page(page);
- kunmap(page);
return 0;
}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 051ea4809c14..7d914c67a9d0 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -653,7 +653,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- random_variable = (unsigned long) get_random_int();
+ random_variable = get_random_long();
random_variable &= STACK_RND_MASK;
random_variable <<= PAGE_SHIFT;
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 39b3a174a425..826b164a4b5b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1201,7 +1201,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
- bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
+ if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
+ bdev->bd_inode->i_flags = S_DAX;
+ else
+ bdev->bd_inode->i_flags = 0;
+
if (!partno) {
ret = -ENXIO;
bdev->bd_part = disk_get_part(disk, partno);
@@ -1693,13 +1697,24 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
+static int blkdev_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ if (dax_mapping(mapping)) {
+ struct block_device *bdev = I_BDEV(mapping->host);
+
+ return dax_writeback_mapping_range(mapping, bdev, wbc);
+ }
+ return generic_writepages(mapping, wbc);
+}
+
static const struct address_space_operations def_blk_aops = {
.readpage = blkdev_readpage,
.readpages = blkdev_readpages,
.writepage = blkdev_writepage,
.write_begin = blkdev_write_begin,
.write_end = blkdev_write_end,
- .writepages = generic_writepages,
+ .writepages = blkdev_writepages,
.releasepage = blkdev_releasepage,
.direct_IO = blkdev_direct_IO,
.is_dirty_writeback = buffer_check_dirty_writeback,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7cf8509deda7..2c849b08a91b 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -310,8 +310,16 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
err = btrfs_insert_fs_root(root->fs_info, root);
+ /*
+ * The root might have been inserted already, as before we look
+ * for orphan roots, log replay might have happened, which
+ * triggers a transaction commit and qgroup accounting, which
+ * in turn reads and inserts fs roots while doing backref
+ * walking.
+ */
+ if (err == -EEXIST)
+ err = 0;
if (err) {
- BUG_ON(err == -EEXIST);
btrfs_free_fs_root(root);
break;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c22213789090..19adeb0ef82a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1756,6 +1756,10 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
u32 pool;
int ret, flags;
+ /* does not support pool namespace yet */
+ if (ci->i_pool_ns_len)
+ return -EIO;
+
if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
NOPOOLPERM))
return 0;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cdbf8cf3d52c..6fe0ad26a7df 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2753,7 +2753,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
void *inline_data, int inline_len,
struct ceph_buffer *xattr_buf,
struct ceph_mds_session *session,
- struct ceph_cap *cap, int issued)
+ struct ceph_cap *cap, int issued,
+ u32 pool_ns_len)
__releases(ci->i_ceph_lock)
__releases(mdsc->snap_rwsem)
{
@@ -2873,6 +2874,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
/* file layout may have changed */
ci->i_layout = grant->layout;
+ ci->i_pool_ns_len = pool_ns_len;
+
/* size/truncate_seq? */
queue_trunc = ceph_fill_file_size(inode, issued,
le32_to_cpu(grant->truncate_seq),
@@ -3411,6 +3414,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
u32 inline_len = 0;
void *snaptrace;
size_t snaptrace_len;
+ u32 pool_ns_len = 0;
void *p, *end;
dout("handle_caps from mds%d\n", mds);
@@ -3463,6 +3467,21 @@ void ceph_handle_caps(struct ceph_mds_session *session,
p += inline_len;
}
+ if (le16_to_cpu(msg->hdr.version) >= 8) {
+ u64 flush_tid;
+ u32 caller_uid, caller_gid;
+ u32 osd_epoch_barrier;
+ /* version >= 5 */
+ ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);
+ /* version >= 6 */
+ ceph_decode_64_safe(&p, end, flush_tid, bad);
+ /* version >= 7 */
+ ceph_decode_32_safe(&p, end, caller_uid, bad);
+ ceph_decode_32_safe(&p, end, caller_gid, bad);
+ /* version >= 8 */
+ ceph_decode_32_safe(&p, end, pool_ns_len, bad);
+ }
+
/* lookup ino */
inode = ceph_find_inode(sb, vino);
ci = ceph_inode(inode);
@@ -3518,7 +3537,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
&cap, &issued);
handle_cap_grant(mdsc, inode, h,
inline_version, inline_data, inline_len,
- msg->middle, session, cap, issued);
+ msg->middle, session, cap, issued,
+ pool_ns_len);
if (realm)
ceph_put_snap_realm(mdsc, realm);
goto done_unlocked;
@@ -3542,7 +3562,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
issued |= __ceph_caps_dirty(ci);
handle_cap_grant(mdsc, inode, h,
inline_version, inline_data, inline_len,
- msg->middle, session, cap, issued);
+ msg->middle, session, cap, issued,
+ pool_ns_len);
goto done_unlocked;
case CEPH_CAP_OP_FLUSH_ACK:
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index fb4ba2e4e2a5..5849b88bbed3 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -396,6 +396,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_symlink = NULL;
memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
+ ci->i_pool_ns_len = 0;
ci->i_fragtree = RB_ROOT;
mutex_init(&ci->i_fragtree_mutex);
@@ -756,6 +757,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool)
ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
ci->i_layout = info->layout;
+ ci->i_pool_ns_len = iinfo->pool_ns_len;
queue_trunc = ceph_fill_file_size(inode, issued,
le32_to_cpu(info->truncate_seq),
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index e7b130a637f9..911d64d865f1 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -100,6 +100,14 @@ static int parse_reply_info_in(void **p, void *end,
} else
info->inline_version = CEPH_INLINE_NONE;
+ if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
+ ceph_decode_32_safe(p, end, info->pool_ns_len, bad);
+ ceph_decode_need(p, end, info->pool_ns_len, bad);
+ *p += info->pool_ns_len;
+ } else {
+ info->pool_ns_len = 0;
+ }
+
return 0;
bad:
return err;
@@ -2298,6 +2306,14 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
CEPH_CAP_PIN);
+ /* deny access to directories with pool_ns layouts */
+ if (req->r_inode && S_ISDIR(req->r_inode->i_mode) &&
+ ceph_inode(req->r_inode)->i_pool_ns_len)
+ return -EIO;
+ if (req->r_locked_dir &&
+ ceph_inode(req->r_locked_dir)->i_pool_ns_len)
+ return -EIO;
+
/* issue */
mutex_lock(&mdsc->mutex);
__register_request(mdsc, req, dir);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ccf11ef0ca87..37712ccffcc6 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -44,6 +44,7 @@ struct ceph_mds_reply_info_in {
u64 inline_version;
u32 inline_len;
char *inline_data;
+ u32 pool_ns_len;
};
/*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 75b7d125ce66..9c458eb52245 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -287,6 +287,7 @@ struct ceph_inode_info {
struct ceph_dir_layout i_dir_layout;
struct ceph_file_layout i_layout;
+ size_t i_pool_ns_len;
char *i_symlink;
/* for dirs */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c48ca13673e3..2eea40353e60 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1013,7 +1013,6 @@ const struct file_operations cifs_file_strict_ops = {
.llseek = cifs_llseek,
.unlocked_ioctl = cifs_ioctl,
.clone_file_range = cifs_clone_file_range,
- .clone_file_range = cifs_clone_file_range,
.setlease = cifs_setlease,
.fallocate = cifs_fallocate,
};
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 68c4547528c4..83aac8ba50b0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -31,19 +31,15 @@
* so that it will fit. We use hash_64 to convert the value to 31 bits, and
* then add 1, to ensure that we don't end up with a 0 as the value.
*/
-#if BITS_PER_LONG == 64
static inline ino_t
cifs_uniqueid_to_ino_t(u64 fileid)
{
+ if ((sizeof(ino_t)) < (sizeof(u64)))
+ return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
+
return (ino_t)fileid;
+
}
-#else
-static inline ino_t
-cifs_uniqueid_to_ino_t(u64 fileid)
-{
- return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
-}
-#endif
extern struct file_system_type cifs_fs_type;
extern const struct address_space_operations cifs_addr_ops;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 90b4f9f7de66..76fcb50295a3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1396,11 +1396,10 @@ openRetry:
* current bigbuf.
*/
static int
-cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+discard_remaining_data(struct TCP_Server_Info *server)
{
unsigned int rfclen = get_rfc1002_length(server->smallbuf);
int remaining = rfclen + 4 - server->total_read;
- struct cifs_readdata *rdata = mid->callback_data;
while (remaining > 0) {
int length;
@@ -1414,10 +1413,20 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
remaining -= length;
}
- dequeue_mid(mid, rdata->result);
return 0;
}
+static int
+cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+ int length;
+ struct cifs_readdata *rdata = mid->callback_data;
+
+ length = discard_remaining_data(server);
+ dequeue_mid(mid, rdata->result);
+ return length;
+}
+
int
cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
{
@@ -1446,6 +1455,12 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
return length;
server->total_read += length;
+ if (server->ops->is_status_pending &&
+ server->ops->is_status_pending(buf, server, 0)) {
+ discard_remaining_data(server);
+ return -1;
+ }
+
/* Was the SMB read successful? */
rdata->result = server->ops->map_error(buf, false);
if (rdata->result != 0) {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 10f8d5cf5681..42e1f440eb1e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1106,21 +1106,25 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
{
char *data_offset;
struct create_context *cc;
- unsigned int next = 0;
+ unsigned int next;
+ unsigned int remaining;
char *name;
data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset);
+ remaining = le32_to_cpu(rsp->CreateContextsLength);
cc = (struct create_context *)data_offset;
- do {
- cc = (struct create_context *)((char *)cc + next);
+ while (remaining >= sizeof(struct create_context)) {
name = le16_to_cpu(cc->NameOffset) + (char *)cc;
- if (le16_to_cpu(cc->NameLength) != 4 ||
- strncmp(name, "RqLs", 4)) {
- next = le32_to_cpu(cc->Next);
- continue;
- }
- return server->ops->parse_lease_buf(cc, epoch);
- } while (next != 0);
+ if (le16_to_cpu(cc->NameLength) == 4 &&
+ strncmp(name, "RqLs", 4) == 0)
+ return server->ops->parse_lease_buf(cc, epoch);
+
+ next = le32_to_cpu(cc->Next);
+ if (!next)
+ break;
+ remaining -= next;
+ cc = (struct create_context *)((char *)cc + next);
+ }
return 0;
}
diff --git a/fs/dax.c b/fs/dax.c
index fc2e3141138b..711172450da6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -79,15 +79,14 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n)
}
/*
- * dax_clear_blocks() is called from within transaction context from XFS,
+ * dax_clear_sectors() is called from within transaction context from XFS,
* and hence this means the stack from this point must follow GFP_NOFS
* semantics for all operations.
*/
-int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
+int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
{
- struct block_device *bdev = inode->i_sb->s_bdev;
struct blk_dax_ctl dax = {
- .sector = block << (inode->i_blkbits - 9),
+ .sector = _sector,
.size = _size,
};
@@ -109,7 +108,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
wmb_pmem();
return 0;
}
-EXPORT_SYMBOL_GPL(dax_clear_blocks);
+EXPORT_SYMBOL_GPL(dax_clear_sectors);
/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
@@ -485,11 +484,10 @@ static int dax_writeback_one(struct block_device *bdev,
* end]. This is required by data integrity operations to ensure file data is
* on persistent storage prior to completion of the operation.
*/
-int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
- loff_t end)
+int dax_writeback_mapping_range(struct address_space *mapping,
+ struct block_device *bdev, struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
- struct block_device *bdev = inode->i_sb->s_bdev;
pgoff_t start_index, end_index, pmd_index;
pgoff_t indices[PAGEVEC_SIZE];
struct pagevec pvec;
@@ -500,8 +498,11 @@ int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
return -EIO;
- start_index = start >> PAGE_CACHE_SHIFT;
- end_index = end >> PAGE_CACHE_SHIFT;
+ if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
+ return 0;
+
+ start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
pmd_index = DAX_PMD_INDEX(start_index);
rcu_read_lock();
diff --git a/fs/dcache.c b/fs/dcache.c
index 92d5140de851..2398f9f94337 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,9 +269,6 @@ static inline int dname_external(const struct dentry *dentry)
return dentry->d_name.name != dentry->d_iname;
}
-/*
- * Make sure other CPUs see the inode attached before the type is set.
- */
static inline void __d_set_inode_and_type(struct dentry *dentry,
struct inode *inode,
unsigned type_flags)
@@ -279,28 +276,18 @@ static inline void __d_set_inode_and_type(struct dentry *dentry,
unsigned flags;
dentry->d_inode = inode;
- smp_wmb();
flags = READ_ONCE(dentry->d_flags);
flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
flags |= type_flags;
WRITE_ONCE(dentry->d_flags, flags);
}
-/*
- * Ideally, we want to make sure that other CPUs see the flags cleared before
- * the inode is detached, but this is really a violation of RCU principles
- * since the ordering suggests we should always set inode before flags.
- *
- * We should instead replace or discard the entire dentry - but that sucks
- * performancewise on mass deletion/rename.
- */
static inline void __d_clear_type_and_inode(struct dentry *dentry)
{
unsigned flags = READ_ONCE(dentry->d_flags);
flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
WRITE_ONCE(dentry->d_flags, flags);
- smp_wmb();
dentry->d_inode = NULL;
}
@@ -370,9 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
__releases(dentry->d_inode->i_lock)
{
struct inode *inode = dentry->d_inode;
+
+ raw_write_seqcount_begin(&dentry->d_seq);
__d_clear_type_and_inode(dentry);
hlist_del_init(&dentry->d_u.d_alias);
- dentry_rcuwalk_invalidate(dentry);
+ raw_write_seqcount_end(&dentry->d_seq);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
@@ -1758,8 +1747,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
spin_lock(&dentry->d_lock);
if (inode)
hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+ raw_write_seqcount_begin(&dentry->d_seq);
__d_set_inode_and_type(dentry, inode, add_flags);
- dentry_rcuwalk_invalidate(dentry);
+ raw_write_seqcount_end(&dentry->d_seq);
spin_unlock(&dentry->d_lock);
fsnotify_d_instantiate(dentry, inode);
}
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2c88d683cd91..c1400b109805 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -80,23 +80,6 @@ static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
return ret;
}
-static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct inode *inode = file_inode(vma->vm_file);
- struct ext2_inode_info *ei = EXT2_I(inode);
- int ret;
-
- sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
- down_read(&ei->dax_sem);
-
- ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL);
-
- up_read(&ei->dax_sem);
- sb_end_pagefault(inode->i_sb);
- return ret;
-}
-
static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
@@ -124,7 +107,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
static const struct vm_operations_struct ext2_dax_vm_ops = {
.fault = ext2_dax_fault,
.pmd_fault = ext2_dax_pmd_fault,
- .page_mkwrite = ext2_dax_mkwrite,
+ .page_mkwrite = ext2_dax_fault,
.pfn_mkwrite = ext2_dax_pfn_mkwrite,
};
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 338eefda70c6..6bd58e6ff038 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -737,8 +737,10 @@ static int ext2_get_blocks(struct inode *inode,
* so that it's not found by another thread before it's
* initialised
*/
- err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key),
- 1 << inode->i_blkbits);
+ err = dax_clear_sectors(inode->i_sb->s_bdev,
+ le32_to_cpu(chain[depth-1].key) <<
+ (inode->i_blkbits - 9),
+ 1 << inode->i_blkbits);
if (err) {
mutex_unlock(&ei->truncate_mutex);
goto cleanup;
@@ -874,6 +876,14 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
static int
ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
+#ifdef CONFIG_FS_DAX
+ if (dax_mapping(mapping)) {
+ return dax_writeback_mapping_range(mapping,
+ mapping->host->i_sb->s_bdev,
+ wbc);
+ }
+#endif
+
return mpage_writepages(mapping, wbc, ext2_get_block);
}
@@ -1296,7 +1306,7 @@ void ext2_set_inode_flags(struct inode *inode)
inode->i_flags |= S_NOATIME;
if (flags & EXT2_DIRSYNC_FL)
inode->i_flags |= S_DIRSYNC;
- if (test_opt(inode->i_sb, DAX))
+ if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
inode->i_flags |= S_DAX;
}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 474f1a4d2ca8..4cd318f31cbe 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -262,23 +262,8 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
return result;
}
-static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- int err;
- struct inode *inode = file_inode(vma->vm_file);
-
- sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
- down_read(&EXT4_I(inode)->i_mmap_sem);
- err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
- up_read(&EXT4_I(inode)->i_mmap_sem);
- sb_end_pagefault(inode->i_sb);
-
- return err;
-}
-
/*
- * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
* handler we check for races agaist truncate. Note that since we cycle through
* i_mmap_sem, we are sure that also any hole punching that began before we
* were called is finished by now and so if it included part of the file we
@@ -311,7 +296,7 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
.pmd_fault = ext4_dax_pmd_fault,
- .page_mkwrite = ext4_dax_mkwrite,
+ .page_mkwrite = ext4_dax_fault,
.pfn_mkwrite = ext4_dax_pfn_mkwrite,
};
#else
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9cc57c3b4661..aee960b1af34 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2478,6 +2478,10 @@ static int ext4_writepages(struct address_space *mapping,
trace_ext4_writepages(inode, wbc);
+ if (dax_mapping(mapping))
+ return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
+ wbc);
+
/*
* No pages to write? This is mainly a kludge to avoid starting
* a transaction for special inodes like journal inode on last iput()
@@ -4155,7 +4159,7 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (test_opt(inode->i_sb, DAX))
+ if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
new_fl |= S_DAX;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a99b010e2194..eae5917c534e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -583,6 +583,11 @@ group_extend_out:
"Online defrag not supported with bigalloc");
err = -EOPNOTSUPP;
goto mext_out;
+ } else if (IS_DAX(inode)) {
+ ext4_msg(sb, KERN_ERR,
+ "Online defrag not supported with DAX");
+ err = -EOPNOTSUPP;
+ goto mext_out;
}
err = mnt_want_write_file(filp);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1f76d8950a57..5c46ed9f3e14 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi,
#define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
/* one round can affect upto 5 slots */
+static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
+static struct workqueue_struct *isw_wq;
+
void __inode_attach_wb(struct inode *inode, struct page *page)
{
struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -317,7 +320,6 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
struct inode_switch_wbs_context *isw =
container_of(work, struct inode_switch_wbs_context, work);
struct inode *inode = isw->inode;
- struct super_block *sb = inode->i_sb;
struct address_space *mapping = inode->i_mapping;
struct bdi_writeback *old_wb = inode->i_wb;
struct bdi_writeback *new_wb = isw->new_wb;
@@ -424,8 +426,9 @@ skip_switch:
wb_put(new_wb);
iput(inode);
- deactivate_super(sb);
kfree(isw);
+
+ atomic_dec(&isw_nr_in_flight);
}
static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
@@ -435,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
/* needs to grab bh-unsafe locks, bounce to work item */
INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
- schedule_work(&isw->work);
+ queue_work(isw_wq, &isw->work);
}
/**
@@ -471,20 +474,20 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
/* while holding I_WB_SWITCH, no one else can update the association */
spin_lock(&inode->i_lock);
-
- if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
- inode_to_wb(inode) == isw->new_wb)
- goto out_unlock;
-
- if (!atomic_inc_not_zero(&inode->i_sb->s_active))
- goto out_unlock;
-
+ if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
+ inode->i_state & (I_WB_SWITCH | I_FREEING) ||
+ inode_to_wb(inode) == isw->new_wb) {
+ spin_unlock(&inode->i_lock);
+ goto out_free;
+ }
inode->i_state |= I_WB_SWITCH;
spin_unlock(&inode->i_lock);
ihold(inode);
isw->inode = inode;
+ atomic_inc(&isw_nr_in_flight);
+
/*
* In addition to synchronizing among switchers, I_WB_SWITCH tells
* the RCU protected stat update paths to grab the mapping's
@@ -494,8 +497,6 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
return;
-out_unlock:
- spin_unlock(&inode->i_lock);
out_free:
if (isw->new_wb)
wb_put(isw->new_wb);
@@ -847,6 +848,33 @@ restart:
wb_put(last_wb);
}
+/**
+ * cgroup_writeback_umount - flush inode wb switches for umount
+ *
+ * This function is called when a super_block is about to be destroyed and
+ * flushes in-flight inode wb switches. An inode wb switch goes through
+ * RCU and then workqueue, so the two need to be flushed in order to ensure
+ * that all previously scheduled switches are finished. As wb switches are
+ * rare occurrences and synchronize_rcu() can take a while, perform
+ * flushing iff wb switches are in flight.
+ */
+void cgroup_writeback_umount(void)
+{
+ if (atomic_read(&isw_nr_in_flight)) {
+ synchronize_rcu();
+ flush_workqueue(isw_wq);
+ }
+}
+
+static int __init cgroup_writeback_init(void)
+{
+ isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
+ if (!isw_wq)
+ return -ENOMEM;
+ return 0;
+}
+fs_initcall(cgroup_writeback_init);
+
#else /* CONFIG_CGROUP_WRITEBACK */
static struct bdi_writeback *
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 506765afa1a3..bb8d67e2740a 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -376,12 +376,11 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = d_inode(dentry);
dnode_secno dno;
int r;
- int rep = 0;
int err;
hpfs_lock(dir->i_sb);
hpfs_adjust_length(name, &len);
-again:
+
err = -ENOENT;
de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
if (!de)
@@ -401,33 +400,9 @@ again:
hpfs_error(dir->i_sb, "there was error when removing dirent");
err = -EFSERROR;
break;
- case 2: /* no space for deleting, try to truncate file */
-
+ case 2: /* no space for deleting */
err = -ENOSPC;
- if (rep++)
- break;
-
- dentry_unhash(dentry);
- if (!d_unhashed(dentry)) {
- hpfs_unlock(dir->i_sb);
- return -ENOSPC;
- }
- if (generic_permission(inode, MAY_WRITE) ||
- !S_ISREG(inode->i_mode) ||
- get_write_access(inode)) {
- d_rehash(dentry);
- } else {
- struct iattr newattrs;
- /*pr_info("truncating file before delete.\n");*/
- newattrs.ia_size = 0;
- newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
- err = notify_change(dentry, &newattrs, NULL);
- put_write_access(inode);
- if (!err)
- goto again;
- }
- hpfs_unlock(dir->i_sb);
- return -ENOSPC;
+ break;
default:
drop_nlink(inode);
err = 0;
diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking
index 3ea36554107f..8918ac905a3b 100644
--- a/fs/jffs2/README.Locking
+++ b/fs/jffs2/README.Locking
@@ -2,10 +2,6 @@
JFFS2 LOCKING DOCUMENTATION
---------------------------
-At least theoretically, JFFS2 does not require the Big Kernel Lock
-(BKL), which was always helpfully obtained for it by Linux 2.4 VFS
-code. It has its own locking, as described below.
-
This document attempts to describe the existing locking rules for
JFFS2. It is not expected to remain perfectly up to date, but ought to
be fairly close.
@@ -69,6 +65,7 @@ Ordering constraints:
any f->sem held.
2. Never attempt to lock two file mutexes in one thread.
No ordering rules have been made for doing so.
+ 3. Never lock a page cache page with f->sem held.
erase_completion_lock spinlock
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 0ae91ad6df2d..b288c8ae1236 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -50,7 +50,8 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
- struct jffs2_inode_cache *ic)
+ struct jffs2_inode_cache *ic,
+ int *dir_hardlinks)
{
struct jffs2_full_dirent *fd;
@@ -69,19 +70,21 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n",
fd->name, fd->ino, ic->ino);
jffs2_mark_node_obsolete(c, fd->raw);
+ /* Clear the ic/raw union so it doesn't cause problems later. */
+ fd->ic = NULL;
continue;
}
+ /* From this point, fd->raw is no longer used so we can set fd->ic */
+ fd->ic = child_ic;
+ child_ic->pino_nlink++;
+ /* If we appear (at this stage) to have hard-linked directories,
+ * set a flag to trigger a scan later */
if (fd->type == DT_DIR) {
- if (child_ic->pino_nlink) {
- JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
- fd->name, fd->ino, ic->ino);
- /* TODO: What do we do about it? */
- } else {
- child_ic->pino_nlink = ic->ino;
- }
- } else
- child_ic->pino_nlink++;
+ child_ic->flags |= INO_FLAGS_IS_DIR;
+ if (child_ic->pino_nlink > 1)
+ *dir_hardlinks = 1;
+ }
dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
/* Can't free scan_dents so far. We might need them in pass 2 */
@@ -95,8 +98,7 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
*/
static int jffs2_build_filesystem(struct jffs2_sb_info *c)
{
- int ret;
- int i;
+ int ret, i, dir_hardlinks = 0;
struct jffs2_inode_cache *ic;
struct jffs2_full_dirent *fd;
struct jffs2_full_dirent *dead_fds = NULL;
@@ -120,7 +122,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
/* Now scan the directory tree, increasing nlink according to every dirent found. */
for_each_inode(i, c, ic) {
if (ic->scan_dents) {
- jffs2_build_inode_pass1(c, ic);
+ jffs2_build_inode_pass1(c, ic, &dir_hardlinks);
cond_resched();
}
}
@@ -156,6 +158,20 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
}
dbg_fsbuild("pass 2a complete\n");
+
+ if (dir_hardlinks) {
+ /* If we detected directory hardlinks earlier, *hopefully*
+ * they are gone now because some of the links were from
+ * dead directories which still had some old dirents lying
+ * around and not yet garbage-collected, but which have
+ * been discarded above. So clear the pino_nlink field
+ * in each directory, so that the final scan below can
+ * print appropriate warnings. */
+ for_each_inode(i, c, ic) {
+ if (ic->flags & INO_FLAGS_IS_DIR)
+ ic->pino_nlink = 0;
+ }
+ }
dbg_fsbuild("freeing temporary data structures\n");
/* Finally, we can scan again and free the dirent structs */
@@ -163,6 +179,33 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
while(ic->scan_dents) {
fd = ic->scan_dents;
ic->scan_dents = fd->next;
+ /* We do use the pino_nlink field to count nlink of
+ * directories during fs build, so set it to the
+ * parent ino# now. Now that there's hopefully only
+ * one. */
+ if (fd->type == DT_DIR) {
+ if (!fd->ic) {
+ /* We'll have complained about it and marked the coresponding
+ raw node obsolete already. Just skip it. */
+ continue;
+ }
+
+ /* We *have* to have set this in jffs2_build_inode_pass1() */
+ BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR));
+
+ /* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks
+ * is set. Otherwise, we know this should never trigger anyway, so
+ * we don't do the check. And ic->pino_nlink still contains the nlink
+ * value (which is 1). */
+ if (dir_hardlinks && fd->ic->pino_nlink) {
+ JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n",
+ fd->name, fd->ino, ic->ino, fd->ic->pino_nlink);
+ /* Should we unlink it from its previous parent? */
+ }
+
+ /* For directories, ic->pino_nlink holds that parent inode # */
+ fd->ic->pino_nlink = ic->ino;
+ }
jffs2_free_full_dirent(fd);
}
ic->scan_dents = NULL;
@@ -241,11 +284,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
/* Reduce nlink of the child. If it's now zero, stick it on the
dead_fds list to be cleaned up later. Else just free the fd */
-
- if (fd->type == DT_DIR)
- child_ic->pino_nlink = 0;
- else
- child_ic->pino_nlink--;
+ child_ic->pino_nlink--;
if (!child_ic->pino_nlink) {
dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index c5ac5944bc1b..cad86bac3453 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct page *pg;
struct inode *inode = mapping->host;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
- struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
- struct jffs2_raw_inode ri;
- uint32_t alloc_len = 0;
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
uint32_t pageofs = index << PAGE_CACHE_SHIFT;
int ret = 0;
- jffs2_dbg(1, "%s()\n", __func__);
-
- if (pageofs > inode->i_size) {
- ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
- ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
- if (ret)
- return ret;
- }
-
- mutex_lock(&f->sem);
pg = grab_cache_page_write_begin(mapping, index, flags);
- if (!pg) {
- if (alloc_len)
- jffs2_complete_reservation(c);
- mutex_unlock(&f->sem);
+ if (!pg)
return -ENOMEM;
- }
*pagep = pg;
- if (alloc_len) {
+ jffs2_dbg(1, "%s()\n", __func__);
+
+ if (pageofs > inode->i_size) {
/* Make new hole frag from old EOF to new page */
+ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+ struct jffs2_raw_inode ri;
struct jffs2_full_dnode *fn;
+ uint32_t alloc_len;
jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
(unsigned int)inode->i_size, pageofs);
+ ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+ ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+ if (ret)
+ goto out_page;
+
+ mutex_lock(&f->sem);
memset(&ri, 0, sizeof(ri));
ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
if (IS_ERR(fn)) {
ret = PTR_ERR(fn);
jffs2_complete_reservation(c);
+ mutex_unlock(&f->sem);
goto out_page;
}
ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
jffs2_mark_node_obsolete(c, fn->raw);
jffs2_free_full_dnode(fn);
jffs2_complete_reservation(c);
+ mutex_unlock(&f->sem);
goto out_page;
}
jffs2_complete_reservation(c);
inode->i_size = pageofs;
+ mutex_unlock(&f->sem);
}
/*
@@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
* case of a short-copy.
*/
if (!PageUptodate(pg)) {
+ mutex_lock(&f->sem);
ret = jffs2_do_readpage_nolock(inode, pg);
+ mutex_unlock(&f->sem);
if (ret)
goto out_page;
}
- mutex_unlock(&f->sem);
jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
return ret;
out_page:
unlock_page(pg);
page_cache_release(pg);
- mutex_unlock(&f->sem);
return ret;
}
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 5a2dec2b064c..95d5880a63ee 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
BUG_ON(start > orig_start);
}
- /* First, use readpage() to read the appropriate page into the page cache */
- /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
- * triggered garbage collection in the first place?
- * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
- * page OK. We'll actually write it out again in commit_write, which is a little
- * suboptimal, but at least we're correct.
- */
+ /* The rules state that we must obtain the page lock *before* f->sem, so
+ * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
+ * actually going to *change* so we're safe; we only allow reading.
+ *
+ * It is important to note that jffs2_write_begin() will ensure that its
+ * page is marked Uptodate before allocating space. That means that if we
+ * end up here trying to GC the *same* page that jffs2_write_begin() is
+ * trying to write out, read_cache_page() will not deadlock. */
+ mutex_unlock(&f->sem);
pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
+ mutex_lock(&f->sem);
if (IS_ERR(pg_ptr)) {
pr_warn("read_cache_page() returned error: %ld\n",
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index fa35ff79ab35..0637271f3770 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -194,6 +194,7 @@ struct jffs2_inode_cache {
#define INO_STATE_CLEARING 6 /* In clear_inode() */
#define INO_FLAGS_XATTR_CHECKED 0x01 /* has no duplicate xattr_ref */
+#define INO_FLAGS_IS_DIR 0x02 /* is a directory */
#define RAWNODE_CLASS_INODE_CACHE 0
#define RAWNODE_CLASS_XATTR_DATUM 1
@@ -249,7 +250,10 @@ struct jffs2_readinode_info
struct jffs2_full_dirent
{
- struct jffs2_raw_node_ref *raw;
+ union {
+ struct jffs2_raw_node_ref *raw;
+ struct jffs2_inode_cache *ic; /* Just during part of build */
+ };
struct jffs2_full_dirent *next;
uint32_t version;
uint32_t ino; /* == zero for unlink */
diff --git a/fs/namei.c b/fs/namei.c
index f624d132e01e..9c590e0f66e9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1712,6 +1712,11 @@ static inline int should_follow_link(struct nameidata *nd, struct path *link,
return 0;
if (!follow)
return 0;
+ /* make sure that d_is_symlink above matches inode */
+ if (nd->flags & LOOKUP_RCU) {
+ if (read_seqcount_retry(&link->dentry->d_seq, seq))
+ return -ECHILD;
+ }
return pick_link(nd, link, inode, seq);
}
@@ -1743,11 +1748,11 @@ static int walk_component(struct nameidata *nd, int flags)
if (err < 0)
return err;
- inode = d_backing_inode(path.dentry);
seq = 0; /* we are already out of RCU mode */
err = -ENOENT;
if (d_is_negative(path.dentry))
goto out_path_put;
+ inode = d_backing_inode(path.dentry);
}
if (flags & WALK_PUT)
@@ -3192,12 +3197,12 @@ retry_lookup:
return error;
BUG_ON(nd->flags & LOOKUP_RCU);
- inode = d_backing_inode(path.dentry);
seq = 0; /* out of RCU mode, so the value doesn't matter */
if (unlikely(d_is_negative(path.dentry))) {
path_to_nameidata(&path, nd);
return -ENOENT;
}
+ inode = d_backing_inode(path.dentry);
finish_lookup:
if (nd->depth)
put_link(nd);
@@ -3206,11 +3211,6 @@ finish_lookup:
if (unlikely(error))
return error;
- if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) {
- path_to_nameidata(&path, nd);
- return -ELOOP;
- }
-
if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
path_to_nameidata(&path, nd);
} else {
@@ -3229,6 +3229,10 @@ finish_open:
return error;
}
audit_inode(nd->name, nd->path.dentry, 0);
+ if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) {
+ error = -ELOOP;
+ goto out;
+ }
error = -EISDIR;
if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
goto out;
@@ -3273,6 +3277,10 @@ opened:
goto exit_fput;
}
out:
+ if (unlikely(error > 0)) {
+ WARN_ON(1);
+ error = -EINVAL;
+ }
if (got_write)
mnt_drop_write(nd->path.mnt);
path_put(&save_parent);
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index c59a59c37f3d..35ab51c04814 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -476,6 +476,7 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
for (i = 0; i < nr_pages; i++)
put_page(arg->layoutupdate_pages[i]);
+ vfree(arg->start_p);
kfree(arg->layoutupdate_pages);
} else {
put_page(arg->layoutupdate_page);
@@ -559,10 +560,15 @@ retry:
if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
void *p = start_p, *end = p + arg->layoutupdate_len;
+ struct page *page = NULL;
int i = 0;
- for ( ; p < end; p += PAGE_SIZE)
- arg->layoutupdate_pages[i++] = vmalloc_to_page(p);
+ arg->start_p = start_p;
+ for ( ; p < end; p += PAGE_SIZE) {
+ page = vmalloc_to_page(p);
+ arg->layoutupdate_pages[i++] = page;
+ get_page(page);
+ }
}
dprintk("%s found %zu ranges\n", __func__, count);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index bd25dc7077f7..dff83460e5a6 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -16,29 +16,8 @@
#define NFSDBG_FACILITY NFSDBG_PROC
-static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
- fmode_t fmode)
-{
- struct nfs_open_context *open;
- struct nfs_lock_context *lock;
- int ret;
-
- open = get_nfs_open_context(nfs_file_open_context(file));
- lock = nfs_get_lock_context(open);
- if (IS_ERR(lock)) {
- put_nfs_open_context(open);
- return PTR_ERR(lock);
- }
-
- ret = nfs4_set_rw_stateid(dst, open, lock, fmode);
-
- nfs_put_lock_context(lock);
- put_nfs_open_context(open);
- return ret;
-}
-
static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
- loff_t offset, loff_t len)
+ struct nfs_lock_context *lock, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode);
@@ -56,7 +35,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
msg->rpc_argp = &args;
msg->rpc_resp = &res;
- status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE);
+ status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
+ lock, FMODE_WRITE);
if (status)
return status;
@@ -78,15 +58,26 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
{
struct nfs_server *server = NFS_SERVER(file_inode(filep));
struct nfs4_exception exception = { };
+ struct nfs_lock_context *lock;
int err;
+ lock = nfs_get_lock_context(nfs_file_open_context(filep));
+ if (IS_ERR(lock))
+ return PTR_ERR(lock);
+
+ exception.inode = file_inode(filep);
+ exception.state = lock->open_context->state;
+
do {
- err = _nfs42_proc_fallocate(msg, filep, offset, len);
- if (err == -ENOTSUPP)
- return -EOPNOTSUPP;
+ err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
+ if (err == -ENOTSUPP) {
+ err = -EOPNOTSUPP;
+ break;
+ }
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
+ nfs_put_lock_context(lock);
return err;
}
@@ -135,7 +126,8 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return err;
}
-static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
+static loff_t _nfs42_proc_llseek(struct file *filep,
+ struct nfs_lock_context *lock, loff_t offset, int whence)
{
struct inode *inode = file_inode(filep);
struct nfs42_seek_args args = {
@@ -156,7 +148,8 @@ static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
if (!nfs_server_capable(inode, NFS_CAP_SEEK))
return -ENOTSUPP;
- status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
+ status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
+ lock, FMODE_READ);
if (status)
return status;
@@ -175,17 +168,28 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
{
struct nfs_server *server = NFS_SERVER(file_inode(filep));
struct nfs4_exception exception = { };
+ struct nfs_lock_context *lock;
loff_t err;
+ lock = nfs_get_lock_context(nfs_file_open_context(filep));
+ if (IS_ERR(lock))
+ return PTR_ERR(lock);
+
+ exception.inode = file_inode(filep);
+ exception.state = lock->open_context->state;
+
do {
- err = _nfs42_proc_llseek(filep, offset, whence);
+ err = _nfs42_proc_llseek(filep, lock, offset, whence);
if (err >= 0)
break;
- if (err == -ENOTSUPP)
- return -EOPNOTSUPP;
+ if (err == -ENOTSUPP) {
+ err = -EOPNOTSUPP;
+ break;
+ }
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
+ nfs_put_lock_context(lock);
return err;
}
@@ -298,8 +302,9 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
}
static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
- struct file *dst_f, loff_t src_offset,
- loff_t dst_offset, loff_t count)
+ struct file *dst_f, struct nfs_lock_context *src_lock,
+ struct nfs_lock_context *dst_lock, loff_t src_offset,
+ loff_t dst_offset, loff_t count)
{
struct inode *src_inode = file_inode(src_f);
struct inode *dst_inode = file_inode(dst_f);
@@ -320,11 +325,13 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
msg->rpc_argp = &args;
msg->rpc_resp = &res;
- status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ);
+ status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+ src_lock, FMODE_READ);
if (status)
return status;
- status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE);
+ status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+ dst_lock, FMODE_WRITE);
if (status)
return status;
@@ -349,22 +356,48 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
};
struct inode *inode = file_inode(src_f);
struct nfs_server *server = NFS_SERVER(file_inode(src_f));
- struct nfs4_exception exception = { };
- int err;
+ struct nfs_lock_context *src_lock;
+ struct nfs_lock_context *dst_lock;
+ struct nfs4_exception src_exception = { };
+ struct nfs4_exception dst_exception = { };
+ int err, err2;
if (!nfs_server_capable(inode, NFS_CAP_CLONE))
return -EOPNOTSUPP;
+ src_lock = nfs_get_lock_context(nfs_file_open_context(src_f));
+ if (IS_ERR(src_lock))
+ return PTR_ERR(src_lock);
+
+ src_exception.inode = file_inode(src_f);
+ src_exception.state = src_lock->open_context->state;
+
+ dst_lock = nfs_get_lock_context(nfs_file_open_context(dst_f));
+ if (IS_ERR(dst_lock)) {
+ err = PTR_ERR(dst_lock);
+ goto out_put_src_lock;
+ }
+
+ dst_exception.inode = file_inode(dst_f);
+ dst_exception.state = dst_lock->open_context->state;
+
do {
- err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset,
- dst_offset, count);
+ err = _nfs42_proc_clone(&msg, src_f, dst_f, src_lock, dst_lock,
+ src_offset, dst_offset, count);
if (err == -ENOTSUPP || err == -EOPNOTSUPP) {
NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE;
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+ break;
}
- err = nfs4_handle_exception(server, err, &exception);
- } while (exception.retry);
- return err;
+ err2 = nfs4_handle_exception(server, err, &src_exception);
+ err = nfs4_handle_exception(server, err, &dst_exception);
+ if (!err)
+ err = err2;
+ } while (src_exception.retry || dst_exception.retry);
+ nfs_put_lock_context(dst_lock);
+out_put_src_lock:
+ nfs_put_lock_context(src_lock);
+ return err;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bfc33ad0563..14881594dd07 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2466,9 +2466,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
dentry = d_add_unique(dentry, igrab(state->inode));
if (dentry == NULL) {
dentry = opendata->dentry;
- } else if (dentry != ctx->dentry) {
+ } else {
dput(ctx->dentry);
- ctx->dentry = dget(dentry);
+ ctx->dentry = dentry;
}
nfs_set_verifier(dentry,
nfs_save_change_attribute(d_inode(opendata->dir)));
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 482b6e94bb37..2fa483e6dbe2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -252,6 +252,27 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
}
}
+/*
+ * Mark a pnfs_layout_hdr and all associated layout segments as invalid
+ *
+ * In order to continue using the pnfs_layout_hdr, a full recovery
+ * is required.
+ * Note that caller must hold inode->i_lock.
+ */
+static int
+pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
+ struct list_head *lseg_list)
+{
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+ return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range);
+}
+
static int
pnfs_iomode_to_fail_bit(u32 iomode)
{
@@ -554,9 +575,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
spin_lock(&nfsi->vfs_inode.i_lock);
lo = nfsi->layout;
if (lo) {
- lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
- pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
pnfs_get_layout_hdr(lo);
+ pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
spin_unlock(&nfsi->vfs_inode.i_lock);
@@ -617,11 +637,6 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
{
struct pnfs_layout_hdr *lo;
struct inode *inode;
- struct pnfs_layout_range range = {
- .iomode = IOMODE_ANY,
- .offset = 0,
- .length = NFS4_MAX_UINT64,
- };
LIST_HEAD(lseg_list);
int ret = 0;
@@ -636,11 +651,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
spin_lock(&inode->i_lock);
list_del_init(&lo->plh_bulk_destroy);
- lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
- if (is_bulk_recall)
- set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
- if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
+ if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
+ if (is_bulk_recall)
+ set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
ret = -EAGAIN;
+ }
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&lseg_list);
/* Free all lsegs that are attached to commit buckets */
@@ -1738,8 +1753,19 @@ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
if (lo->plh_return_iomode != 0)
iomode = IOMODE_ANY;
lo->plh_return_iomode = iomode;
+ set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
}
+/**
+ * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
+ * @lo: pointer to layout header
+ * @tmp_list: list header to be used with pnfs_free_lseg_list()
+ * @return_range: describe layout segment ranges to be returned
+ *
+ * This function is mainly intended for use by layoutrecall. It attempts
+ * to free the layout segment immediately, or else to mark it for return
+ * as soon as its reference count drops to zero.
+ */
int
pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
@@ -1762,12 +1788,11 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
lseg, lseg->pls_range.iomode,
lseg->pls_range.offset,
lseg->pls_range.length);
+ if (mark_lseg_invalid(lseg, tmp_list))
+ continue;
+ remaining++;
set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
pnfs_set_plh_return_iomode(lo, return_range->iomode);
- if (!mark_lseg_invalid(lseg, tmp_list))
- remaining++;
- set_bit(NFS_LAYOUT_RETURN_REQUESTED,
- &lo->plh_flags);
}
return remaining;
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 794fd1587f34..cda0361e95a4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -956,6 +956,7 @@ clean_orphan:
tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
update_isize, end);
if (tmp_ret < 0) {
+ ocfs2_inode_unlock(inode, 1);
ret = tmp_ret;
mlog_errno(ret);
brelse(di_bh);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index ed95272d57a6..52f6de5d40a9 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -618,7 +618,8 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
* sole user of this dentry. Too tricky... Just unhash for
* now.
*/
- d_drop(dentry);
+ if (!err)
+ d_drop(dentry);
inode_unlock(dir);
return err;
@@ -903,6 +904,13 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
if (!overwrite && new_is_dir && !old_opaque && new_opaque)
ovl_remove_opaque(newdentry);
+ /*
+ * Old dentry now lives in different location. Dentries in
+ * lowerstack are stale. We cannot drop them here because
+ * access to them is lockless. This could be only pure upper
+ * or opaque directory - numlower is zero. Or upper non-dir
+ * entry - its pureness is tracked by flag opaque.
+ */
if (old_opaque != new_opaque) {
ovl_dentry_set_opaque(old, new_opaque);
if (!overwrite)
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 49e204560655..a4ff5d0d7db9 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -65,6 +65,8 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
inode_lock(upperdentry->d_inode);
err = notify_change(upperdentry, attr, NULL);
+ if (!err)
+ ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
inode_unlock(upperdentry->d_inode);
}
ovl_drop_write(dentry);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 8d826bd56b26..619ad4b016d2 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -76,12 +76,14 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry)
if (oe->__upperdentry) {
type = __OVL_PATH_UPPER;
- if (oe->numlower) {
- if (S_ISDIR(dentry->d_inode->i_mode))
- type |= __OVL_PATH_MERGE;
- } else if (!oe->opaque) {
+ /*
+ * Non-dir dentry can hold lower dentry from previous
+ * location. Its purity depends only on opaque flag.
+ */
+ if (oe->numlower && S_ISDIR(dentry->d_inode->i_mode))
+ type |= __OVL_PATH_MERGE;
+ else if (!oe->opaque)
type |= __OVL_PATH_PURE;
- }
} else {
if (oe->numlower > 1)
type |= __OVL_PATH_MERGE;
@@ -341,6 +343,7 @@ static const struct dentry_operations ovl_dentry_operations = {
static const struct dentry_operations ovl_reval_dentry_operations = {
.d_release = ovl_dentry_release,
+ .d_select_inode = ovl_d_select_inode,
.d_revalidate = ovl_dentry_revalidate,
.d_weak_revalidate = ovl_dentry_weak_revalidate,
};
diff --git a/fs/pnode.c b/fs/pnode.c
index 6367e1e435c6..c524fdddc7fb 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master;
static struct mountpoint *mp;
static struct hlist_head *list;
+static inline bool peers(struct mount *m1, struct mount *m2)
+{
+ return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
+}
+
static int propagate_one(struct mount *m)
{
struct mount *child;
@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m)
/* skip if mountpoint isn't covered by it */
if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
return 0;
- if (m->mnt_group_id == last_dest->mnt_group_id) {
+ if (peers(m, last_dest)) {
type = CL_MAKE_SHARED;
} else {
struct mount *n, *p;
@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m)
last_source = last_source->mnt_master;
last_dest = last_source->mnt_parent;
}
- if (n->mnt_group_id != last_dest->mnt_group_id) {
+ if (!peers(n, last_dest)) {
last_source = last_source->mnt_master;
last_dest = last_source->mnt_parent;
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 324ec271cc4e..dadf24e5c95b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
#include <linux/splice.h>
#include <linux/compat.h>
#include <linux/mount.h>
+#include <linux/fs.h>
#include "internal.h"
#include <asm/uaccess.h>
@@ -183,7 +184,7 @@ loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
switch (whence) {
case SEEK_SET: case SEEK_CUR:
return generic_file_llseek_size(file, offset, whence,
- ~0ULL, 0);
+ OFFSET_MAX, 0);
default:
return -EINVAL;
}
@@ -1532,10 +1533,12 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
if (!(file_in->f_mode & FMODE_READ) ||
!(file_out->f_mode & FMODE_WRITE) ||
- (file_out->f_flags & O_APPEND) ||
- !file_in->f_op->clone_file_range)
+ (file_out->f_flags & O_APPEND))
return -EBADF;
+ if (!file_in->f_op->clone_file_range)
+ return -EOPNOTSUPP;
+
ret = clone_verify_area(file_in, pos_in, len, false);
if (ret)
return ret;
diff --git a/fs/super.c b/fs/super.c
index 1182af8fd5ff..74914b1bae70 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb)
sb->s_flags &= ~MS_ACTIVE;
fsnotify_unmount_inodes(sb);
+ cgroup_writeback_umount();
evict_inodes(sb);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 50311703135b..66cdb44616d5 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -287,6 +287,12 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
goto out;
/*
+ * We don't do userfault handling for the final child pid update.
+ */
+ if (current->flags & PF_EXITING)
+ goto out;
+
+ /*
* Check that we can return VM_FAULT_RETRY.
*
* NOTE: it should become possible to return VM_FAULT_RETRY
diff --git a/fs/xattr.c b/fs/xattr.c
index 07d0e47f6a7f..4861322e28e8 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -940,7 +940,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
bool trusted = capable(CAP_SYS_ADMIN);
struct simple_xattr *xattr;
ssize_t remaining_size = size;
- int err;
+ int err = 0;
#ifdef CONFIG_FS_POSIX_ACL
if (inode->i_acl) {
@@ -965,11 +965,11 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
err = xattr_list_one(&buffer, &remaining_size, xattr->name);
if (err)
- return err;
+ break;
}
spin_unlock(&xattrs->lock);
- return size - remaining_size;
+ return err ? err : size - remaining_size;
}
/*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 379c089fb051..a9ebabfe7587 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -55,7 +55,7 @@ xfs_count_page_state(
} while ((bh = bh->b_this_page) != head);
}
-STATIC struct block_device *
+struct block_device *
xfs_find_bdev_for_inode(
struct inode *inode)
{
@@ -1208,6 +1208,10 @@ xfs_vm_writepages(
struct writeback_control *wbc)
{
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+ if (dax_mapping(mapping))
+ return dax_writeback_mapping_range(mapping,
+ xfs_find_bdev_for_inode(mapping->host), wbc);
+
return generic_writepages(mapping, wbc);
}
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f6ffc9ae5ceb..a4343c63fb38 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -62,5 +62,6 @@ int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
extern void xfs_count_page_state(struct page *, int *, int *);
+extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 45ec9e40150c..6c876012b2e5 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -75,7 +75,8 @@ xfs_zero_extent(
ssize_t size = XFS_FSB_TO_B(mp, count_fsb);
if (IS_DAX(VFS_I(ip)))
- return dax_clear_blocks(VFS_I(ip), block, size);
+ return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)),
+ sector, size);
/*
* let the block layer decide on the fastest method of
diff --git a/include/linux/ata.h b/include/linux/ata.h
index d2992bfa1706..c1a2f345cbe6 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -487,8 +487,8 @@ enum ata_tf_protocols {
};
enum ata_ioctls {
- ATA_IOC_GET_IO32 = 0x309,
- ATA_IOC_SET_IO32 = 0x324,
+ ATA_IOC_GET_IO32 = 0x309, /* HDIO_GET_32BIT */
+ ATA_IOC_SET_IO32 = 0x324, /* HDIO_SET_32BIT */
};
/* core structures */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5349e6816cbb..cb6888824108 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -310,6 +310,43 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
bio->bi_flags &= ~(1U << bit);
}
+static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
+{
+ *bv = bio_iovec(bio);
+}
+
+static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
+{
+ struct bvec_iter iter = bio->bi_iter;
+ int idx;
+
+ if (!bio_flagged(bio, BIO_CLONED)) {
+ *bv = bio->bi_io_vec[bio->bi_vcnt - 1];
+ return;
+ }
+
+ if (unlikely(!bio_multiple_segments(bio))) {
+ *bv = bio_iovec(bio);
+ return;
+ }
+
+ bio_advance_iter(bio, &iter, iter.bi_size);
+
+ if (!iter.bi_bvec_done)
+ idx = iter.bi_idx - 1;
+ else /* in the middle of bvec */
+ idx = iter.bi_idx;
+
+ *bv = bio->bi_io_vec[idx];
+
+ /*
+ * iter.bi_bvec_done records actual length of the last bvec
+ * if this bio ends in the middle of one io vector
+ */
+ if (iter.bi_bvec_done)
+ bv->bv_len = iter.bi_bvec_done;
+}
+
enum bip_flags {
BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4571ef1a12a9..413c84fbc4ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -895,7 +895,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
{
struct request_queue *q = rq->q;
- if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
+ if (unlikely(rq->cmd_type != REQ_TYPE_FS))
return q->limits.max_hw_sectors;
if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD))
@@ -1372,6 +1372,13 @@ static inline void put_dev_sector(Sector p)
page_cache_release(p.v);
}
+static inline bool __bvec_gap_to_prev(struct request_queue *q,
+ struct bio_vec *bprv, unsigned int offset)
+{
+ return offset ||
+ ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+}
+
/*
* Check if adding a bio_vec after bprv with offset would create a gap in
* the SG list. Most drivers don't care about this, but some do.
@@ -1381,18 +1388,22 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
{
if (!queue_virt_boundary(q))
return false;
- return offset ||
- ((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+ return __bvec_gap_to_prev(q, bprv, offset);
}
static inline bool bio_will_gap(struct request_queue *q, struct bio *prev,
struct bio *next)
{
- if (!bio_has_data(prev))
- return false;
+ if (bio_has_data(prev) && queue_virt_boundary(q)) {
+ struct bio_vec pb, nb;
+
+ bio_get_last_bvec(prev, &pb);
+ bio_get_first_bvec(next, &nb);
- return bvec_gap_to_prev(q, &prev->bi_io_vec[prev->bi_vcnt - 1],
- next->bi_io_vec[0].bv_offset);
+ return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
+ }
+
+ return false;
}
static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 51e498e5470e..21ee41b92e8a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -10,6 +10,7 @@
#include <uapi/linux/bpf.h>
#include <linux/workqueue.h>
#include <linux/file.h>
+#include <linux/percpu.h>
struct bpf_map;
@@ -36,6 +37,7 @@ struct bpf_map {
u32 key_size;
u32 value_size;
u32 max_entries;
+ u32 map_flags;
u32 pages;
struct user_struct *user;
const struct bpf_map_ops *ops;
@@ -163,6 +165,8 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
#ifdef CONFIG_BPF_SYSCALL
+DECLARE_PER_CPU(int, bpf_prog_active);
+
void bpf_register_prog_type(struct bpf_prog_type_list *tl);
void bpf_register_map_type(struct bpf_map_type_list *tl);
@@ -175,6 +179,7 @@ struct bpf_map *__bpf_map_get(struct fd f);
void bpf_map_inc(struct bpf_map *map, bool uref);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
+int bpf_map_precharge_memlock(u32 pages);
extern int sysctl_unprivileged_bpf_disabled;
@@ -190,6 +195,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 flags);
+int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index c1ef6f14e7be..15151f3c4120 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -75,6 +75,7 @@
#define CEPH_FEATURE_CRUSH_TUNABLES5 (1ULL<<58) /* chooseleaf stable mode */
// duplicated since it was introduced at the same time as CEPH_FEATURE_CRUSH_TUNABLES5
#define CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING (1ULL<<58) /* New, v7 encoding */
+#define CEPH_FEATURE_FS_FILE_LAYOUT_V2 (1ULL<<58) /* file_layout_t */
/*
* The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 818e45078929..636dd59ab505 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -7,7 +7,7 @@
ssize_t dax_do_io(struct kiocb *, struct inode *, struct iov_iter *, loff_t,
get_block_t, dio_iodone_t, int flags);
-int dax_clear_blocks(struct inode *, sector_t block, long size);
+int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size);
int dax_zero_page_range(struct inode *, loff_t from, unsigned len, get_block_t);
int dax_truncate_page(struct inode *, loff_t from, get_block_t);
int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t,
@@ -52,6 +52,8 @@ static inline bool dax_mapping(struct address_space *mapping)
{
return mapping->host && IS_DAX(mapping->host);
}
-int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
- loff_t end);
+
+struct writeback_control;
+int dax_writeback_mapping_range(struct address_space *mapping,
+ struct block_device *bdev, struct writeback_control *wbc);
#endif
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 7781ce110503..c4b5f4b3f8f8 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -409,9 +409,7 @@ static inline bool d_mountpoint(const struct dentry *dentry)
*/
static inline unsigned __d_entry_type(const struct dentry *dentry)
{
- unsigned type = READ_ONCE(dentry->d_flags);
- smp_rmb();
- return type & DCACHE_ENTRY_TYPE;
+ return dentry->d_flags & DCACHE_ENTRY_TYPE;
}
static inline bool d_is_miss(const struct dentry *dentry)
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index a338a688ee4a..dcb89e3515db 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -46,10 +46,6 @@ struct br_ip_list {
#define BR_LEARNING_SYNC BIT(9)
#define BR_PROXYARP_WIFI BIT(10)
-/* values as per ieee8021QBridgeFdbAgingTime */
-#define BR_MIN_AGEING_TIME (10 * HZ)
-#define BR_MAX_AGEING_TIME (1000000 * HZ)
-
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
diff --git a/include/linux/libata.h b/include/linux/libata.h
index bec2abbd7ab2..2c4ebef79d0c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -720,7 +720,7 @@ struct ata_device {
union {
u16 id[ATA_ID_WORDS]; /* IDENTIFY xxx DEVICE data */
u32 gscr[SATA_PMP_GSCR_DWORDS]; /* PMP GSCR block */
- };
+ } ____cacheline_aligned;
/* DEVSLP Timing Variables from Identify Device Data Log */
u8 devslp_timing[ATA_LOG_DEVSLP_SIZE];
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index bed40dff0e86..141ffdd59960 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -26,9 +26,8 @@ enum {
/* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M,
- ND_CMD_MAX_ELEM = 4,
+ ND_CMD_MAX_ELEM = 5,
ND_CMD_MAX_ENVELOPE = 16,
- ND_CMD_ARS_STATUS_MAX = SZ_4K,
ND_MAX_MAPPINGS = 32,
/* region flag indicating to direct-map persistent memory by default */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 03ffe9530365..9d91ce39eb0f 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4259,7 +4259,9 @@ struct mlx5_ifc_modify_tir_bitmask_bits {
u8 reserved_at_20[0x1b];
u8 self_lb_en[0x1];
- u8 reserved_at_3c[0x3];
+ u8 reserved_at_3c[0x1];
+ u8 hash[0x1];
+ u8 reserved_at_3e[0x1];
u8 lro[0x1];
};
diff --git a/include/linux/net.h b/include/linux/net.h
index 0b4ac7da583a..49175e4ced11 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -215,6 +215,7 @@ int __sock_create(struct net *net, int family, int type, int proto,
int sock_create(int family, int type, int proto, struct socket **res);
int sock_create_kern(struct net *net, int family, int type, int proto, struct socket **res);
int sock_create_lite(int family, int type, int proto, struct socket **res);
+struct socket *sock_alloc(void);
void sock_release(struct socket *sock);
int sock_sendmsg(struct socket *sock, struct msghdr *msg);
int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index efe7cec111fa..41df0b450757 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -268,6 +268,7 @@ struct header_ops {
void (*cache_update)(struct hh_cache *hh,
const struct net_device *dev,
const unsigned char *haddr);
+ bool (*validate)(const char *ll_header, unsigned int len);
};
/* These flag bits are private to the generic network queueing
@@ -785,6 +786,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
enum {
TC_SETUP_MQPRIO,
TC_SETUP_CLSU32,
+ TC_SETUP_CLSFLOWER,
};
struct tc_cls_u32_offload;
@@ -794,6 +796,7 @@ struct tc_to_netdev {
union {
u8 tc;
struct tc_cls_u32_offload *cls_u32;
+ struct tc_cls_flower_offload *cls_flower;
};
};
@@ -1459,8 +1462,7 @@ enum netdev_priv_flags {
* @dma: DMA channel
* @mtu: Interface MTU value
* @type: Interface hardware type
- * @hard_header_len: Hardware header length, which means that this is the
- * minimum size of a packet.
+ * @hard_header_len: Maximum hardware header length.
*
* @needed_headroom: Extra headroom the hardware may need, but not in all
* cases can this be guaranteed
@@ -2687,6 +2689,24 @@ static inline int dev_parse_header(const struct sk_buff *skb,
return dev->header_ops->parse(skb, haddr);
}
+/* ll_header must have at least hard_header_len allocated */
+static inline bool dev_validate_header(const struct net_device *dev,
+ char *ll_header, int len)
+{
+ if (likely(len >= dev->hard_header_len))
+ return true;
+
+ if (capable(CAP_SYS_RAWIO)) {
+ memset(ll_header + len, 0, dev->hard_header_len - len);
+ return true;
+ }
+
+ if (dev->header_ops && dev->header_ops->validate)
+ return dev->header_ops->validate(ll_header, len);
+
+ return false;
+}
+
typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
static inline int unregister_gifconf(unsigned int family)
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 0ad556726181..9230f9aee896 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -141,22 +141,6 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
#ifdef HAVE_JUMP_LABEL
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
-
-static inline bool nf_hook_list_active(struct list_head *hook_list,
- u_int8_t pf, unsigned int hook)
-{
- if (__builtin_constant_p(pf) &&
- __builtin_constant_p(hook))
- return static_key_false(&nf_hooks_needed[pf][hook]);
-
- return !list_empty(hook_list);
-}
-#else
-static inline bool nf_hook_list_active(struct list_head *hook_list,
- u_int8_t pf, unsigned int hook)
-{
- return !list_empty(hook_list);
-}
#endif
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
@@ -177,9 +161,18 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
int (*okfn)(struct net *, struct sock *, struct sk_buff *),
int thresh)
{
- struct list_head *hook_list = &net->nf.hooks[pf][hook];
+ struct list_head *hook_list;
+
+#ifdef HAVE_JUMP_LABEL
+ if (__builtin_constant_p(pf) &&
+ __builtin_constant_p(hook) &&
+ !static_key_false(&nf_hooks_needed[pf][hook]))
+ return 1;
+#endif
+
+ hook_list = &net->nf.hooks[pf][hook];
- if (nf_hook_list_active(hook_list, pf, hook)) {
+ if (!list_empty(hook_list)) {
struct nf_hook_state state;
nf_hook_state_init(&state, hook_list, hook, thresh,
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c5577410c25d..80a305b85323 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -200,6 +200,9 @@ struct xt_table {
u_int8_t af; /* address/protocol family */
int priority; /* hook order */
+ /* called when table is needed in the given netns */
+ int (*table_init)(struct net *net);
+
/* A unique name... */
const char name[XT_TABLE_MAXNAMELEN];
};
@@ -408,8 +411,7 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
return cnt;
}
-struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *);
-void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *);
+struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
#ifdef CONFIG_COMPAT
#include <net/compat.h>
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 6f074db2f23d..029b95e8924e 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -48,10 +48,11 @@ struct arpt_error {
}
extern void *arpt_alloc_initial_table(const struct xt_table *);
-extern struct xt_table *arpt_register_table(struct net *net,
- const struct xt_table *table,
- const struct arpt_replace *repl);
-extern void arpt_unregister_table(struct xt_table *table);
+int arpt_register_table(struct net *net, const struct xt_table *table,
+ const struct arpt_replace *repl,
+ const struct nf_hook_ops *ops, struct xt_table **res);
+void arpt_unregister_table(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops);
extern unsigned int arpt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index aa598f942c01..7bfc5893ec31 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -24,10 +24,11 @@
extern void ipt_init(void) __init;
-extern struct xt_table *ipt_register_table(struct net *net,
- const struct xt_table *table,
- const struct ipt_replace *repl);
-extern void ipt_unregister_table(struct net *net, struct xt_table *table);
+int ipt_register_table(struct net *net, const struct xt_table *table,
+ const struct ipt_replace *repl,
+ const struct nf_hook_ops *ops, struct xt_table **res);
+void ipt_unregister_table(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops);
/* Standard entry. */
struct ipt_standard {
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 0f76e5c674f9..b21c392d6012 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -25,10 +25,11 @@
extern void ip6t_init(void) __init;
extern void *ip6t_alloc_initial_table(const struct xt_table *);
-extern struct xt_table *ip6t_register_table(struct net *net,
- const struct xt_table *table,
- const struct ip6t_replace *repl);
-extern void ip6t_unregister_table(struct net *net, struct xt_table *table);
+int ip6t_register_table(struct net *net, const struct xt_table *table,
+ const struct ip6t_replace *repl,
+ const struct nf_hook_ops *ops, struct xt_table **res);
+void ip6t_unregister_table(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops);
extern unsigned int ip6t_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 48e0320cd643..67300f8e5f2f 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -550,9 +550,7 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
static inline loff_t nfs_size_to_loff_t(__u64 size)
{
- if (size > (__u64) OFFSET_MAX - 1)
- return OFFSET_MAX - 1;
- return (loff_t) size;
+ return min_t(u64, size, OFFSET_MAX);
}
static inline ino_t
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 791098a08a87..d320906cf13e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -275,6 +275,7 @@ struct nfs4_layoutcommit_args {
size_t layoutupdate_len;
struct page *layoutupdate_page;
struct page **layoutupdate_pages;
+ __be32 *start_p;
};
struct nfs4_layoutcommit_res {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 27df4a6585da..27716254dcc5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -988,23 +988,6 @@ static inline int pci_is_managed(struct pci_dev *pdev)
return pdev->is_managed;
}
-static inline void pci_set_managed_irq(struct pci_dev *pdev, unsigned int irq)
-{
- pdev->irq = irq;
- pdev->irq_managed = 1;
-}
-
-static inline void pci_reset_managed_irq(struct pci_dev *pdev)
-{
- pdev->irq = 0;
- pdev->irq_managed = 0;
-}
-
-static inline bool pci_has_managed_irq(struct pci_dev *pdev)
-{
- return pdev->irq_managed && pdev->irq > 0;
-}
-
void pci_disable_device(struct pci_dev *dev);
extern unsigned int pcibios_max_latency;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7da3c25999df..0967a2467457 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -397,6 +397,7 @@ struct pmu {
* enum perf_event_active_state - the states of a event
*/
enum perf_event_active_state {
+ PERF_EVENT_STATE_DEAD = -4,
PERF_EVENT_STATE_EXIT = -3,
PERF_EVENT_STATE_ERROR = -2,
PERF_EVENT_STATE_OFF = -1,
@@ -905,7 +906,7 @@ perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
}
}
-extern struct static_key_deferred perf_sched_events;
+extern struct static_key_false perf_sched_events;
static __always_inline bool
perf_sw_migrate_enabled(void)
@@ -924,7 +925,7 @@ static inline void perf_event_task_migrate(struct task_struct *task)
static inline void perf_event_task_sched_in(struct task_struct *prev,
struct task_struct *task)
{
- if (static_key_false(&perf_sched_events.key))
+ if (static_branch_unlikely(&perf_sched_events))
__perf_event_task_sched_in(prev, task);
if (perf_sw_migrate_enabled() && task->sched_migrated) {
@@ -941,7 +942,7 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
{
perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0);
- if (static_key_false(&perf_sched_events.key))
+ if (static_branch_unlikely(&perf_sched_events))
__perf_event_task_sched_out(prev, next);
}
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index 998d8f1c3c91..b50c0492629d 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -49,6 +49,7 @@ struct bq27xxx_reg_cache {
struct bq27xxx_device_info {
struct device *dev;
+ int id;
enum bq27xxx_chip chip;
const char *name;
struct bq27xxx_access_methods bus;
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index e53b0ca49e41..e1d69834a11f 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -39,6 +39,14 @@ struct qed_update_vport_params {
struct qed_update_vport_rss_params rss_params;
};
+struct qed_start_vport_params {
+ bool remove_inner_vlan;
+ bool gro_enable;
+ bool drop_ttl0;
+ u8 vport_id;
+ u16 mtu;
+};
+
struct qed_stop_rxq_params {
u8 rss_id;
u8 rx_queue_id;
@@ -118,9 +126,7 @@ struct qed_eth_ops {
void *cookie);
int (*vport_start)(struct qed_dev *cdev,
- u8 vport_id, u16 mtu,
- u8 drop_ttl0_flg,
- u8 inner_vlan_removal_en_flg);
+ struct qed_start_vport_params *params);
int (*vport_stop)(struct qed_dev *cdev,
u8 vport_id);
diff --git a/include/linux/random.h b/include/linux/random.h
index a75840c1aa71..9c29122037f9 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -34,6 +34,7 @@ extern const struct file_operations random_fops, urandom_fops;
#endif
unsigned int get_random_int(void);
+unsigned long get_random_long(void);
unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
u32 prandom_u32(void);
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 14ec1652daf4..17d4f849c65e 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -319,6 +319,27 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
})
/**
+ * list_next_or_null_rcu - get the first element from a list
+ * @head: the head for the list.
+ * @ptr: the list head to take the next element from.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_head within the struct.
+ *
+ * Note that if the ptr is at the end of the list, NULL is returned.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_next_or_null_rcu(head, ptr, type, member) \
+({ \
+ struct list_head *__head = (head); \
+ struct list_head *__ptr = (ptr); \
+ struct list_head *__next = READ_ONCE(__ptr->next); \
+ likely(__next != __head) ? list_entry_rcu(__next, type, \
+ member) : NULL; \
+})
+
+/**
* list_for_each_entry_rcu - iterate over rcu list of given type
* @pos: the type * to use as a loop cursor.
* @head: the head for your list.
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 797cefb888fb..15d0df943466 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1981,6 +1981,30 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
skb->tail += len;
}
+/**
+ * skb_tailroom_reserve - adjust reserved_tailroom
+ * @skb: buffer to alter
+ * @mtu: maximum amount of headlen permitted
+ * @needed_tailroom: minimum amount of reserved_tailroom
+ *
+ * Set reserved_tailroom so that headlen can be as large as possible but
+ * not larger than mtu and tailroom cannot be smaller than
+ * needed_tailroom.
+ * The required headroom should already have been reserved before using
+ * this function.
+ */
+static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu,
+ unsigned int needed_tailroom)
+{
+ SKB_LINEAR_ASSERT(skb);
+ if (mtu < skb_tailroom(skb) - needed_tailroom)
+ /* use at most mtu */
+ skb->reserved_tailroom = skb_tailroom(skb) - mtu;
+ else
+ /* use up to all available space */
+ skb->reserved_tailroom = needed_tailroom;
+}
+
#define ENCAP_TYPE_ETHER 0
#define ENCAP_TYPE_IPPROTO 1
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 5bf59c8493b7..73bf6c6a833b 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -200,7 +200,9 @@ struct ucred {
#define AF_ALG 38 /* Algorithm sockets */
#define AF_NFC 39 /* NFC sockets */
#define AF_VSOCK 40 /* vSockets */
-#define AF_MAX 41 /* For now.. */
+#define AF_KCM 41 /* Kernel Connection Multiplexor*/
+
+#define AF_MAX 42 /* For now.. */
/* Protocol families, same as address families. */
#define PF_UNSPEC AF_UNSPEC
@@ -246,6 +248,7 @@ struct ucred {
#define PF_ALG AF_ALG
#define PF_NFC AF_NFC
#define PF_VSOCK AF_VSOCK
+#define PF_KCM AF_KCM
#define PF_MAX AF_MAX
/* Maximum queue length specifiable by listen. */
@@ -274,6 +277,7 @@ struct ucred {
#define MSG_MORE 0x8000 /* Sender will send more */
#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */
#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
+#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */
#define MSG_EOF MSG_FIN
#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
@@ -322,6 +326,7 @@ struct ucred {
#define SOL_CAIF 278
#define SOL_ALG 279
#define SOL_NFC 280
+#define SOL_KCM 281
/* IPX options */
#define IPX_TYPE 1
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 6e53fa8942a4..4bcf5a61aada 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -114,6 +114,7 @@ struct plat_stmmacenet_data {
int interface;
struct stmmac_mdio_bus_data *mdio_bus_data;
struct device_node *phy_node;
+ struct device_node *mdio_node;
struct stmmac_dma_cfg *dma_cfg;
int clk_csr;
int has_gmac;
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 429fdfc3baf5..925730bc9fc1 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -568,6 +568,8 @@ enum {
FILTER_DYN_STRING,
FILTER_PTR_STRING,
FILTER_TRACE_FN,
+ FILTER_COMM,
+ FILTER_CPU,
};
extern int trace_event_raw_init(struct trace_event_call *call);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index b333c945e571..d0b5ca5d4e08 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -198,6 +198,7 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
void wbc_detach_inode(struct writeback_control *wbc);
void wbc_account_io(struct writeback_control *wbc, struct page *page,
size_t bytes);
+void cgroup_writeback_umount(void);
/**
* inode_attach_wb - associate an inode with its wb
@@ -301,6 +302,10 @@ static inline void wbc_account_io(struct writeback_control *wbc,
{
}
+static inline void cgroup_writeback_umount(void)
+{
+}
+
#endif /* CONFIG_CGROUP_WRITEBACK */
/*
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 342be6c5ab5c..2a19fe111c78 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -78,11 +78,6 @@ static inline void tcf_lastuse_update(struct tcf_t *tm)
tm->lastuse = now;
}
-#ifdef CONFIG_NET_CLS_ACT
-
-#define ACT_P_CREATED 1
-#define ACT_P_DELETED 1
-
struct tc_action {
void *priv;
const struct tc_action_ops *ops;
@@ -92,6 +87,11 @@ struct tc_action {
struct tcf_hashinfo *hinfo;
};
+#ifdef CONFIG_NET_CLS_ACT
+
+#define ACT_P_CREATED 1
+#define ACT_P_DELETED 1
+
struct tc_action_ops {
struct list_head head;
char kind[IFNAMSIZ];
@@ -171,5 +171,16 @@ int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
+
+#define tc_no_actions(_exts) \
+ (list_empty(&(_exts)->actions))
+
+#define tc_for_each_action(_a, _exts) \
+ list_for_each_entry(a, &(_exts)->actions, list)
+#else /* CONFIG_NET_CLS_ACT */
+
+#define tc_no_actions(_exts) true
+#define tc_for_each_action(_a, _exts) while (0)
+
#endif /* CONFIG_NET_CLS_ACT */
#endif
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 10a16b5bd1c7..5c30891e84e5 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -88,8 +88,11 @@ static inline __wsum
csum_block_add(__wsum csum, __wsum csum2, int offset)
{
u32 sum = (__force u32)csum2;
- if (offset&1)
- sum = ((sum&0xFF00FF)<<8)+((sum>>8)&0xFF00FF);
+
+ /* rotate sum to align it with a 16b boundary */
+ if (offset & 1)
+ sum = ror32(sum, 8);
+
return csum_add(csum, (__force __wsum)sum);
}
@@ -102,10 +105,7 @@ csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len)
static inline __wsum
csum_block_sub(__wsum csum, __wsum csum2, int offset)
{
- u32 sum = (__force u32)csum2;
- if (offset&1)
- sum = ((sum&0xFF00FF)<<8)+((sum>>8)&0xFF00FF);
- return csum_sub(csum, (__force __wsum)sum);
+ return csum_block_add(csum, ~csum2, offset);
}
static inline __wsum csum_unfold(__sum16 n)
@@ -120,6 +120,11 @@ static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum)
#define CSUM_MANGLED_0 ((__force __sum16)0xffff)
+static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff)
+{
+ *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+}
+
static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to)
{
__wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from);
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 84b833af6882..5db9f5910428 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -126,7 +126,7 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
ip_tunnel_key_init(&tun_dst->u.tun_info.key,
iph->saddr, iph->daddr, iph->tos, iph->ttl,
- 0, 0, tunnel_id, flags);
+ 0, 0, 0, tunnel_id, flags);
return tun_dst;
}
@@ -152,8 +152,11 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
info->key.u.ipv6.src = ip6h->saddr;
info->key.u.ipv6.dst = ip6h->daddr;
+
info->key.tos = ipv6_get_dsfield(ip6h);
info->key.ttl = ip6h->hop_limit;
+ info->key.label = ip6_flowlabel(ip6h);
+
return tun_dst;
}
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 8c8548cf5888..d3d60dccd19f 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -184,4 +184,17 @@ static inline bool flow_keys_have_l4(struct flow_keys *keys)
u32 flow_hash_from_keys(struct flow_keys *keys);
+static inline bool dissector_uses_key(const struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id)
+{
+ return flow_dissector->used_keys & (1 << key_id);
+}
+
+static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
+ enum flow_dissector_key_id key_id,
+ void *target_container)
+{
+ return ((char *)target_container) + flow_dissector->offset[key_id];
+}
+
#endif
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 5f28b606633e..5dc2e454f866 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -48,6 +48,7 @@ struct ip_tunnel_key {
__be16 tun_flags;
u8 tos; /* TOS for IPv4, TC for IPv6 */
u8 ttl; /* TTL for IPv4, HL for IPv6 */
+ __be32 label; /* Flow Label for IPv6 */
__be16 tp_src;
__be16 tp_dst;
};
@@ -140,6 +141,7 @@ struct ip_tunnel {
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
+#define TUNNEL_NOCACHE __cpu_to_be16(0x2000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
@@ -180,7 +182,7 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
__be32 saddr, __be32 daddr,
- u8 tos, u8 ttl,
+ u8 tos, u8 ttl, __be32 label,
__be16 tp_src, __be16 tp_dst,
__be64 tun_id, __be16 tun_flags)
{
@@ -191,6 +193,7 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
key->tos = tos;
key->ttl = ttl;
+ key->label = label;
key->tun_flags = tun_flags;
/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
@@ -206,6 +209,20 @@ static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
0, sizeof(*key) - IP_TUNNEL_KEY_SIZE);
}
+static inline bool
+ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
+ const struct ip_tunnel_info *info)
+{
+ if (skb->mark)
+ return false;
+ if (!info)
+ return true;
+ if (info->key.tun_flags & TUNNEL_NOCACHE)
+ return false;
+
+ return true;
+}
+
static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
*tun_info)
{
@@ -354,6 +371,17 @@ static inline void ip_tunnel_unneed_metadata(void)
{
}
+static inline void ip_tunnel_info_opts_get(void *to,
+ const struct ip_tunnel_info *info)
+{
+}
+
+static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
+ const void *from, int len)
+{
+ info->options_len = 0;
+}
+
#endif /* CONFIG_INET */
#endif /* __NET_IP_TUNNELS_H */
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 8f81bbbc38fc..e0f4109e64c6 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -439,6 +439,12 @@ int dev_get_wireless_info(char *buffer, char **start, off_t offset, int length);
/* Send a single event to user space */
void wireless_send_event(struct net_device *dev, unsigned int cmd,
union iwreq_data *wrqu, const char *extra);
+#ifdef CONFIG_WEXT_CORE
+/* flush all previous wext events - if work is done from netdev notifiers */
+void wireless_nlevent_flush(void);
+#else
+static inline void wireless_nlevent_flush(void) {}
+#endif
/* We may need a function to send a stream of events to user space.
* More on that later... */
diff --git a/include/net/kcm.h b/include/net/kcm.h
new file mode 100644
index 000000000000..2840b5825dcc
--- /dev/null
+++ b/include/net/kcm.h
@@ -0,0 +1,226 @@
+/*
+ * Kernel Connection Multiplexor
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __NET_KCM_H_
+#define __NET_KCM_H_
+
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <uapi/linux/kcm.h>
+
+extern unsigned int kcm_net_id;
+
+#define KCM_STATS_ADD(stat, count) ((stat) += (count))
+#define KCM_STATS_INCR(stat) ((stat)++)
+
+struct kcm_psock_stats {
+ unsigned long long rx_msgs;
+ unsigned long long rx_bytes;
+ unsigned long long tx_msgs;
+ unsigned long long tx_bytes;
+ unsigned int rx_aborts;
+ unsigned int rx_mem_fail;
+ unsigned int rx_need_more_hdr;
+ unsigned int rx_msg_too_big;
+ unsigned int rx_msg_timeouts;
+ unsigned int rx_bad_hdr_len;
+ unsigned long long reserved;
+ unsigned long long unreserved;
+ unsigned int tx_aborts;
+};
+
+struct kcm_mux_stats {
+ unsigned long long rx_msgs;
+ unsigned long long rx_bytes;
+ unsigned long long tx_msgs;
+ unsigned long long tx_bytes;
+ unsigned int rx_ready_drops;
+ unsigned int tx_retries;
+ unsigned int psock_attach;
+ unsigned int psock_unattach_rsvd;
+ unsigned int psock_unattach;
+};
+
+struct kcm_stats {
+ unsigned long long rx_msgs;
+ unsigned long long rx_bytes;
+ unsigned long long tx_msgs;
+ unsigned long long tx_bytes;
+};
+
+struct kcm_tx_msg {
+ unsigned int sent;
+ unsigned int fragidx;
+ unsigned int frag_offset;
+ unsigned int msg_flags;
+ struct sk_buff *frag_skb;
+ struct sk_buff *last_skb;
+};
+
+struct kcm_rx_msg {
+ int full_len;
+ int accum_len;
+ int offset;
+ int early_eaten;
+};
+
+/* Socket structure for KCM client sockets */
+struct kcm_sock {
+ struct sock sk;
+ struct kcm_mux *mux;
+ struct list_head kcm_sock_list;
+ int index;
+ u32 done : 1;
+ struct work_struct done_work;
+
+ struct kcm_stats stats;
+
+ /* Transmit */
+ struct kcm_psock *tx_psock;
+ struct work_struct tx_work;
+ struct list_head wait_psock_list;
+ struct sk_buff *seq_skb;
+
+ /* Don't use bit fields here, these are set under different locks */
+ bool tx_wait;
+ bool tx_wait_more;
+
+ /* Receive */
+ struct kcm_psock *rx_psock;
+ struct list_head wait_rx_list; /* KCMs waiting for receiving */
+ bool rx_wait;
+ u32 rx_disabled : 1;
+};
+
+struct bpf_prog;
+
+/* Structure for an attached lower socket */
+struct kcm_psock {
+ struct sock *sk;
+ struct kcm_mux *mux;
+ int index;
+
+ u32 tx_stopped : 1;
+ u32 rx_stopped : 1;
+ u32 done : 1;
+ u32 unattaching : 1;
+
+ void (*save_state_change)(struct sock *sk);
+ void (*save_data_ready)(struct sock *sk);
+ void (*save_write_space)(struct sock *sk);
+
+ struct list_head psock_list;
+
+ struct kcm_psock_stats stats;
+
+ /* Receive */
+ struct sk_buff *rx_skb_head;
+ struct sk_buff **rx_skb_nextp;
+ struct sk_buff *ready_rx_msg;
+ struct list_head psock_ready_list;
+ struct work_struct rx_work;
+ struct delayed_work rx_delayed_work;
+ struct bpf_prog *bpf_prog;
+ struct kcm_sock *rx_kcm;
+ unsigned long long saved_rx_bytes;
+ unsigned long long saved_rx_msgs;
+ struct timer_list rx_msg_timer;
+ unsigned int rx_need_bytes;
+
+ /* Transmit */
+ struct kcm_sock *tx_kcm;
+ struct list_head psock_avail_list;
+ unsigned long long saved_tx_bytes;
+ unsigned long long saved_tx_msgs;
+};
+
+/* Per net MUX list */
+struct kcm_net {
+ struct mutex mutex;
+ struct kcm_psock_stats aggregate_psock_stats;
+ struct kcm_mux_stats aggregate_mux_stats;
+ struct list_head mux_list;
+ int count;
+};
+
+/* Structure for a MUX */
+struct kcm_mux {
+ struct list_head kcm_mux_list;
+ struct rcu_head rcu;
+ struct kcm_net *knet;
+
+ struct list_head kcm_socks; /* All KCM sockets on MUX */
+ int kcm_socks_cnt; /* Total KCM socket count for MUX */
+ struct list_head psocks; /* List of all psocks on MUX */
+ int psocks_cnt; /* Total attached sockets */
+
+ struct kcm_mux_stats stats;
+ struct kcm_psock_stats aggregate_psock_stats;
+
+ /* Receive */
+ spinlock_t rx_lock ____cacheline_aligned_in_smp;
+ struct list_head kcm_rx_waiters; /* KCMs waiting for receiving */
+ struct list_head psocks_ready; /* List of psocks with a msg ready */
+ struct sk_buff_head rx_hold_queue;
+
+ /* Transmit */
+ spinlock_t lock ____cacheline_aligned_in_smp; /* TX and mux locking */
+ struct list_head psocks_avail; /* List of available psocks */
+ struct list_head kcm_tx_waiters; /* KCMs waiting for a TX psock */
+};
+
+#ifdef CONFIG_PROC_FS
+int kcm_proc_init(void);
+void kcm_proc_exit(void);
+#else
+static inline int kcm_proc_init(void) { return 0; }
+static inline void kcm_proc_exit(void) { }
+#endif
+
+static inline void aggregate_psock_stats(struct kcm_psock_stats *stats,
+ struct kcm_psock_stats *agg_stats)
+{
+ /* Save psock statistics in the mux when psock is being unattached. */
+
+#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat)
+ SAVE_PSOCK_STATS(rx_msgs);
+ SAVE_PSOCK_STATS(rx_bytes);
+ SAVE_PSOCK_STATS(rx_aborts);
+ SAVE_PSOCK_STATS(rx_mem_fail);
+ SAVE_PSOCK_STATS(rx_need_more_hdr);
+ SAVE_PSOCK_STATS(rx_msg_too_big);
+ SAVE_PSOCK_STATS(rx_msg_timeouts);
+ SAVE_PSOCK_STATS(rx_bad_hdr_len);
+ SAVE_PSOCK_STATS(tx_msgs);
+ SAVE_PSOCK_STATS(tx_bytes);
+ SAVE_PSOCK_STATS(reserved);
+ SAVE_PSOCK_STATS(unreserved);
+ SAVE_PSOCK_STATS(tx_aborts);
+#undef SAVE_PSOCK_STATS
+}
+
+static inline void aggregate_mux_stats(struct kcm_mux_stats *stats,
+ struct kcm_mux_stats *agg_stats)
+{
+ /* Save psock statistics in the mux when psock is being unattached. */
+
+#define SAVE_MUX_STATS(_stat) (agg_stats->_stat += stats->_stat)
+ SAVE_MUX_STATS(rx_msgs);
+ SAVE_MUX_STATS(rx_bytes);
+ SAVE_MUX_STATS(tx_msgs);
+ SAVE_MUX_STATS(tx_bytes);
+ SAVE_MUX_STATS(rx_ready_drops);
+ SAVE_MUX_STATS(psock_attach);
+ SAVE_MUX_STATS(psock_unattach_rsvd);
+ SAVE_MUX_STATS(psock_unattach);
+#undef SAVE_MUX_STATS
+}
+
+#endif /* __NET_KCM_H_ */
diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h
index e2a518b60e19..a3f3c11b2526 100644
--- a/include/net/netfilter/nft_masq.h
+++ b/include/net/netfilter/nft_masq.h
@@ -2,7 +2,9 @@
#define _NFT_MASQ_H_
struct nft_masq {
- u32 flags;
+ u32 flags;
+ enum nft_registers sreg_proto_min:8;
+ enum nft_registers sreg_proto_max:8;
};
extern const struct nla_policy nft_masq_policy[];
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index c0368db6df54..10d0848f5b8a 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -58,7 +58,10 @@ struct netns_ipv6 {
struct timer_list ip6_fib_timer;
struct hlist_head *fib_table_hash;
struct fib6_table *fib6_main_tbl;
+ struct list_head fib6_walkers;
struct dst_ops ip6_dst_ops;
+ rwlock_t fib6_walker_lock;
+ spinlock_t fib6_gc_lock;
unsigned int ip6_rt_gc_expire;
unsigned long ip6_rt_last_gc;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index bea14eee373e..caa5e18636df 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -409,4 +409,18 @@ static inline bool tc_should_offload(struct net_device *dev, u32 flags)
return true;
}
+enum tc_fl_command {
+ TC_CLSFLOWER_REPLACE,
+ TC_CLSFLOWER_DESTROY,
+};
+
+struct tc_cls_flower_offload {
+ enum tc_fl_command command;
+ unsigned long cookie;
+ struct flow_dissector *dissector;
+ struct fl_flow_key *mask;
+ struct fl_flow_key *key;
+ struct tcf_exts *exts;
+};
+
#endif
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 04a31830711b..93c520b83d10 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -16,9 +16,9 @@ struct tcf_gact {
#define to_gact(a) \
container_of(a->priv, struct tcf_gact, common)
-#ifdef CONFIG_NET_CLS_ACT
static inline bool is_tcf_gact_shot(const struct tc_action *a)
{
+#ifdef CONFIG_NET_CLS_ACT
struct tcf_gact *gact;
if (a->ops && a->ops->type != TCA_ACT_GACT)
@@ -28,7 +28,7 @@ static inline bool is_tcf_gact_shot(const struct tc_action *a)
if (gact->tcf_action == TC_ACT_SHOT)
return true;
+#endif
return false;
}
-#endif
#endif /* __NET_TC_GACT_H */
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 0df9a0db4a8e..b496d5ad7d42 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -20,6 +20,7 @@
#define __NET_TC_SKBEDIT_H
#include <net/act_api.h>
+#include <linux/tc_act/tc_skbedit.h>
struct tcf_skbedit {
struct tcf_common common;
@@ -32,4 +33,19 @@ struct tcf_skbedit {
#define to_skbedit(a) \
container_of(a->priv, struct tcf_skbedit, common)
+/* Return true iff action is mark */
+static inline bool is_tcf_skbedit_mark(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ if (a->ops && a->ops->type == TCA_ACT_SKBEDIT)
+ return to_skbedit(a)->flags == SKBEDIT_F_MARK;
+#endif
+ return false;
+}
+
+static inline u32 tcf_skbedit_mark(const struct tc_action *a)
+{
+ return to_skbedit(a)->mark;
+}
+
#endif /* __NET_TC_SKBEDIT_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index e90db8546806..0302636af98c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1816,4 +1816,28 @@ static inline void skb_set_tcp_pure_ack(struct sk_buff *skb)
skb->truesize = 2;
}
+static inline int tcp_inq(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int answ;
+
+ if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+ answ = 0;
+ } else if (sock_flag(sk, SOCK_URGINLINE) ||
+ !tp->urg_data ||
+ before(tp->urg_seq, tp->copied_seq) ||
+ !before(tp->urg_seq, tp->rcv_nxt)) {
+
+ answ = tp->rcv_nxt - tp->copied_seq;
+
+ /* Subtract 1, if FIN was received */
+ if (answ && sock_flag(sk, SOCK_DONE))
+ answ--;
+ } else {
+ answ = tp->urg_seq - tp->copied_seq;
+ }
+
+ return answ;
+}
+
#endif /* _TCP_H */
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 97f5adb121a6..b83114077cee 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -88,8 +88,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr,
- __u8 prio, __u8 ttl, __be16 src_port,
- __be16 dst_port, bool nocheck);
+ __u8 prio, __u8 ttl, __be32 label,
+ __be16 src_port, __be16 dst_port, bool nocheck);
#endif
void udp_tunnel_sock_release(struct socket *sock);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 6eda4ed4d78b..a763c96ecde4 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -162,6 +162,7 @@ struct vxlan_config {
u16 port_max;
u8 tos;
u8 ttl;
+ __be32 label;
u32 flags;
unsigned long age_interval;
unsigned int addrmax;
diff --git a/include/sound/hdaudio.h b/include/sound/hdaudio.h
index e2b712c90d3f..c21c38ce7450 100644
--- a/include/sound/hdaudio.h
+++ b/include/sound/hdaudio.h
@@ -343,7 +343,7 @@ void snd_hdac_bus_enter_link_reset(struct hdac_bus *bus);
void snd_hdac_bus_exit_link_reset(struct hdac_bus *bus);
void snd_hdac_bus_update_rirb(struct hdac_bus *bus);
-void snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
+int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
void (*ack)(struct hdac_bus *,
struct hdac_stream *));
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6496f98d3d68..924f537183fd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -101,12 +101,15 @@ enum bpf_prog_type {
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */
+#define BPF_F_NO_PREALLOC (1U << 0)
+
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
__u32 key_size; /* size of key in bytes */
__u32 value_size; /* size of value in bytes */
__u32 max_entries; /* max number of entries in a map */
+ __u32 map_flags; /* prealloc or not */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -298,6 +301,17 @@ enum bpf_func_id {
* Return: csum result
*/
BPF_FUNC_csum_diff,
+
+ /**
+ * bpf_skb_[gs]et_tunnel_opt(skb, opt, size)
+ * retrieve or populate tunnel options metadata
+ * @skb: pointer to skb
+ * @opt: pointer to raw tunnel option data
+ * @size: size of @opt
+ * Return: 0 on success for set, option size for get
+ */
+ BPF_FUNC_skb_get_tunnel_opt,
+ BPF_FUNC_skb_set_tunnel_opt,
__BPF_FUNC_MAX_ID,
};
@@ -305,6 +319,7 @@ enum bpf_func_id {
/* BPF_FUNC_skb_store_bytes flags. */
#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH (1ULL << 1)
/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
* First 4 bits are for passing the header field size.
@@ -327,6 +342,10 @@ enum bpf_func_id {
#define BPF_F_FAST_STACK_CMP (1ULL << 9)
#define BPF_F_REUSE_STACKID (1ULL << 10)
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@@ -356,6 +375,7 @@ struct bpf_tunnel_key {
};
__u8 tunnel_tos;
__u8 tunnel_ttl;
+ __u32 tunnel_label;
};
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d452cea59020..249eef9a21bd 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -456,6 +456,7 @@ enum {
IFLA_VXLAN_GBP,
IFLA_VXLAN_REMCSUM_NOPARTIAL,
IFLA_VXLAN_COLLECT_METADATA,
+ IFLA_VXLAN_LABEL,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
@@ -478,6 +479,7 @@ enum {
IFLA_GENEVE_UDP_CSUM,
IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+ IFLA_GENEVE_LABEL,
__IFLA_GENEVE_MAX
};
#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
diff --git a/include/uapi/linux/kcm.h b/include/uapi/linux/kcm.h
new file mode 100644
index 000000000000..a5a530940b99
--- /dev/null
+++ b/include/uapi/linux/kcm.h
@@ -0,0 +1,40 @@
+/*
+ * Kernel Connection Multiplexor
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * User API to clone KCM sockets and attach transport socket to a KCM
+ * multiplexor.
+ */
+
+#ifndef KCM_KERNEL_H
+#define KCM_KERNEL_H
+
+struct kcm_attach {
+ int fd;
+ int bpf_fd;
+};
+
+struct kcm_unattach {
+ int fd;
+};
+
+struct kcm_clone {
+ int fd;
+};
+
+#define SIOCKCMATTACH (SIOCPROTOPRIVATE + 0)
+#define SIOCKCMUNATTACH (SIOCPROTOPRIVATE + 1)
+#define SIOCKCMCLONE (SIOCPROTOPRIVATE + 2)
+
+#define KCMPROTO_CONNECTED 0
+
+/* Socket options */
+#define KCM_RECV_DISABLE 1
+
+#endif
+
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 1e3c8cb43bd7..625b38f65764 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -66,27 +66,33 @@ struct media_device_info {
/*
* DVB entities
*/
-#define MEDIA_ENT_F_DTV_DEMOD (MEDIA_ENT_F_BASE + 1)
-#define MEDIA_ENT_F_TS_DEMUX (MEDIA_ENT_F_BASE + 2)
-#define MEDIA_ENT_F_DTV_CA (MEDIA_ENT_F_BASE + 3)
-#define MEDIA_ENT_F_DTV_NET_DECAP (MEDIA_ENT_F_BASE + 4)
+#define MEDIA_ENT_F_DTV_DEMOD (MEDIA_ENT_F_BASE + 0x00001)
+#define MEDIA_ENT_F_TS_DEMUX (MEDIA_ENT_F_BASE + 0x00002)
+#define MEDIA_ENT_F_DTV_CA (MEDIA_ENT_F_BASE + 0x00003)
+#define MEDIA_ENT_F_DTV_NET_DECAP (MEDIA_ENT_F_BASE + 0x00004)
/*
- * Connectors
+ * I/O entities
*/
-/* It is a responsibility of the entity drivers to add connectors and links */
-#define MEDIA_ENT_F_CONN_RF (MEDIA_ENT_F_BASE + 21)
-#define MEDIA_ENT_F_CONN_SVIDEO (MEDIA_ENT_F_BASE + 22)
-#define MEDIA_ENT_F_CONN_COMPOSITE (MEDIA_ENT_F_BASE + 23)
-/* For internal test signal generators and other debug connectors */
-#define MEDIA_ENT_F_CONN_TEST (MEDIA_ENT_F_BASE + 24)
+#define MEDIA_ENT_F_IO_DTV (MEDIA_ENT_F_BASE + 0x01001)
+#define MEDIA_ENT_F_IO_VBI (MEDIA_ENT_F_BASE + 0x01002)
+#define MEDIA_ENT_F_IO_SWRADIO (MEDIA_ENT_F_BASE + 0x01003)
/*
- * I/O entities
+ * Connectors
*/
-#define MEDIA_ENT_F_IO_DTV (MEDIA_ENT_F_BASE + 31)
-#define MEDIA_ENT_F_IO_VBI (MEDIA_ENT_F_BASE + 32)
-#define MEDIA_ENT_F_IO_SWRADIO (MEDIA_ENT_F_BASE + 33)
+/* It is a responsibility of the entity drivers to add connectors and links */
+#ifdef __KERNEL__
+ /*
+ * For now, it should not be used in userspace, as some
+ * definitions may change
+ */
+
+#define MEDIA_ENT_F_CONN_RF (MEDIA_ENT_F_BASE + 0x30001)
+#define MEDIA_ENT_F_CONN_SVIDEO (MEDIA_ENT_F_BASE + 0x30002)
+#define MEDIA_ENT_F_CONN_COMPOSITE (MEDIA_ENT_F_BASE + 0x30003)
+
+#endif
/*
* Don't touch on those. The ranges MEDIA_ENT_F_OLD_BASE and
@@ -291,14 +297,14 @@ struct media_v2_entity {
__u32 id;
char name[64]; /* FIXME: move to a property? (RFC says so) */
__u32 function; /* Main function of the entity */
- __u16 reserved[12];
-};
+ __u32 reserved[6];
+} __attribute__ ((packed));
/* Should match the specific fields at media_intf_devnode */
struct media_v2_intf_devnode {
__u32 major;
__u32 minor;
-};
+} __attribute__ ((packed));
struct media_v2_interface {
__u32 id;
@@ -310,22 +316,22 @@ struct media_v2_interface {
struct media_v2_intf_devnode devnode;
__u32 raw[16];
};
-};
+} __attribute__ ((packed));
struct media_v2_pad {
__u32 id;
__u32 entity_id;
__u32 flags;
- __u16 reserved[9];
-};
+ __u32 reserved[5];
+} __attribute__ ((packed));
struct media_v2_link {
__u32 id;
__u32 source_id;
__u32 sink_id;
__u32 flags;
- __u32 reserved[5];
-};
+ __u32 reserved[6];
+} __attribute__ ((packed));
struct media_v2_topology {
__u64 topology_version;
@@ -345,7 +351,7 @@ struct media_v2_topology {
__u32 num_links;
__u32 reserved4;
__u64 ptr_links;
-};
+} __attribute__ ((packed));
static inline void __user *media_get_uptr(__u64 arg)
{
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 5b4a4be06e2b..cc68b92124d4 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -66,14 +66,18 @@ struct nd_cmd_ars_cap {
__u64 length;
__u32 status;
__u32 max_ars_out;
+ __u32 clear_err_unit;
+ __u32 reserved;
} __packed;
struct nd_cmd_ars_start {
__u64 address;
__u64 length;
__u16 type;
- __u8 reserved[6];
+ __u8 flags;
+ __u8 reserved[5];
__u32 status;
+ __u32 scrub_time;
} __packed;
struct nd_cmd_ars_status {
@@ -81,11 +85,14 @@ struct nd_cmd_ars_status {
__u32 out_length;
__u64 address;
__u64 length;
+ __u64 restart_address;
+ __u64 restart_length;
__u16 type;
+ __u16 flags;
__u32 num_records;
struct nd_ars_record {
__u32 handle;
- __u32 flags;
+ __u32 reserved;
__u64 err_address;
__u64 length;
} __packed records[0];
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index be41ffc128b8..eeffde196f80 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -681,6 +681,7 @@ enum nft_exthdr_attributes {
* @NFT_META_IIFGROUP: packet input interface group
* @NFT_META_OIFGROUP: packet output interface group
* @NFT_META_CGROUP: socket control group (skb->sk->sk_classid)
+ * @NFT_META_PRANDOM: a 32bit pseudo-random number
*/
enum nft_meta_keys {
NFT_META_LEN,
@@ -707,6 +708,7 @@ enum nft_meta_keys {
NFT_META_IIFGROUP,
NFT_META_OIFGROUP,
NFT_META_CGROUP,
+ NFT_META_PRANDOM,
};
/**
@@ -949,10 +951,14 @@ enum nft_nat_attributes {
* enum nft_masq_attributes - nf_tables masquerade expression attributes
*
* @NFTA_MASQ_FLAGS: NAT flags (see NF_NAT_RANGE_* in linux/netfilter/nf_nat.h) (NLA_U32)
+ * @NFTA_MASQ_REG_PROTO_MIN: source register of proto range start (NLA_U32: nft_registers)
+ * @NFTA_MASQ_REG_PROTO_MAX: source register of proto range end (NLA_U32: nft_registers)
*/
enum nft_masq_attributes {
NFTA_MASQ_UNSPEC,
NFTA_MASQ_FLAGS,
+ NFTA_MASQ_REG_PROTO_MIN,
+ NFTA_MASQ_REG_PROTO_MAX,
__NFTA_MASQ_MAX
};
#define NFTA_MASQ_MAX (__NFTA_MASQ_MAX - 1)
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 9874f5680926..c43c5f78b9c4 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -417,6 +417,8 @@ enum {
TCA_FLOWER_KEY_TCP_DST, /* be16 */
TCA_FLOWER_KEY_UDP_SRC, /* be16 */
TCA_FLOWER_KEY_UDP_DST, /* be16 */
+
+ TCA_FLOWER_FLAGS,
__TCA_FLOWER_MAX,
};
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 8a932d079c24..eed911d091da 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,7 +1,7 @@
obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
-obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o
+obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index bd3bdf2486a7..76d5a794e426 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -53,7 +53,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
- attr->value_size == 0)
+ attr->value_size == 0 || attr->map_flags)
return ERR_PTR(-EINVAL);
if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index a68e95133fcd..fff3650d52fc 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1,4 +1,5 @@
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -13,6 +14,7 @@
#include <linux/jhash.h>
#include <linux/filter.h>
#include <linux/vmalloc.h>
+#include "percpu_freelist.h"
struct bucket {
struct hlist_head head;
@@ -22,6 +24,8 @@ struct bucket {
struct bpf_htab {
struct bpf_map map;
struct bucket *buckets;
+ void *elems;
+ struct pcpu_freelist freelist;
atomic_t count; /* number of elements in this hashtable */
u32 n_buckets; /* number of hash buckets */
u32 elem_size; /* size of each element in bytes */
@@ -29,15 +33,86 @@ struct bpf_htab {
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
- struct hlist_node hash_node;
- struct rcu_head rcu;
union {
- u32 hash;
- u32 key_size;
+ struct hlist_node hash_node;
+ struct bpf_htab *htab;
+ struct pcpu_freelist_node fnode;
};
+ struct rcu_head rcu;
+ u32 hash;
char key[0] __aligned(8);
};
+static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
+ void __percpu *pptr)
+{
+ *(void __percpu **)(l->key + key_size) = pptr;
+}
+
+static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
+{
+ return *(void __percpu **)(l->key + key_size);
+}
+
+static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i)
+{
+ return (struct htab_elem *) (htab->elems + i * htab->elem_size);
+}
+
+static void htab_free_elems(struct bpf_htab *htab)
+{
+ int i;
+
+ if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
+ goto free_elems;
+
+ for (i = 0; i < htab->map.max_entries; i++) {
+ void __percpu *pptr;
+
+ pptr = htab_elem_get_ptr(get_htab_elem(htab, i),
+ htab->map.key_size);
+ free_percpu(pptr);
+ }
+free_elems:
+ vfree(htab->elems);
+}
+
+static int prealloc_elems_and_freelist(struct bpf_htab *htab)
+{
+ int err = -ENOMEM, i;
+
+ htab->elems = vzalloc(htab->elem_size * htab->map.max_entries);
+ if (!htab->elems)
+ return -ENOMEM;
+
+ if (htab->map.map_type != BPF_MAP_TYPE_PERCPU_HASH)
+ goto skip_percpu_elems;
+
+ for (i = 0; i < htab->map.max_entries; i++) {
+ u32 size = round_up(htab->map.value_size, 8);
+ void __percpu *pptr;
+
+ pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN);
+ if (!pptr)
+ goto free_elems;
+ htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size,
+ pptr);
+ }
+
+skip_percpu_elems:
+ err = pcpu_freelist_init(&htab->freelist);
+ if (err)
+ goto free_elems;
+
+ pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size,
+ htab->map.max_entries);
+ return 0;
+
+free_elems:
+ htab_free_elems(htab);
+ return err;
+}
+
/* Called from syscall */
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
{
@@ -46,6 +121,10 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
int err, i;
u64 cost;
+ if (attr->map_flags & ~BPF_F_NO_PREALLOC)
+ /* reserved bits should not be used */
+ return ERR_PTR(-EINVAL);
+
htab = kzalloc(sizeof(*htab), GFP_USER);
if (!htab)
return ERR_PTR(-ENOMEM);
@@ -55,6 +134,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
htab->map.key_size = attr->key_size;
htab->map.value_size = attr->value_size;
htab->map.max_entries = attr->max_entries;
+ htab->map.map_flags = attr->map_flags;
/* check sanity of attributes.
* value_size == 0 may be allowed in the future to use map as a set
@@ -92,7 +172,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
if (percpu)
htab->elem_size += sizeof(void *);
else
- htab->elem_size += htab->map.value_size;
+ htab->elem_size += round_up(htab->map.value_size, 8);
/* prevent zero size kmalloc and check for u32 overflow */
if (htab->n_buckets == 0 ||
@@ -112,6 +192,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+ /* if map size is larger than memlock limit, reject it early */
+ err = bpf_map_precharge_memlock(htab->map.pages);
+ if (err)
+ goto free_htab;
+
err = -ENOMEM;
htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket),
GFP_USER | __GFP_NOWARN);
@@ -127,10 +212,16 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
raw_spin_lock_init(&htab->buckets[i].lock);
}
- atomic_set(&htab->count, 0);
+ if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
+ err = prealloc_elems_and_freelist(htab);
+ if (err)
+ goto free_buckets;
+ }
return &htab->map;
+free_buckets:
+ kvfree(htab->buckets);
free_htab:
kfree(htab);
return ERR_PTR(err);
@@ -249,42 +340,42 @@ find_first_elem:
}
}
- /* itereated over all buckets and all elements */
+ /* iterated over all buckets and all elements */
return -ENOENT;
}
-
-static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
- void __percpu *pptr)
-{
- *(void __percpu **)(l->key + key_size) = pptr;
-}
-
-static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size)
-{
- return *(void __percpu **)(l->key + key_size);
-}
-
-static void htab_percpu_elem_free(struct htab_elem *l)
+static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{
- free_percpu(htab_elem_get_ptr(l, l->key_size));
+ if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
+ free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
kfree(l);
+
}
-static void htab_percpu_elem_free_rcu(struct rcu_head *head)
+static void htab_elem_free_rcu(struct rcu_head *head)
{
struct htab_elem *l = container_of(head, struct htab_elem, rcu);
+ struct bpf_htab *htab = l->htab;
- htab_percpu_elem_free(l);
+ /* must increment bpf_prog_active to avoid kprobe+bpf triggering while
+ * we're calling kfree, otherwise deadlock is possible if kprobes
+ * are placed somewhere inside of slub
+ */
+ preempt_disable();
+ __this_cpu_inc(bpf_prog_active);
+ htab_elem_free(htab, l);
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
}
-static void free_htab_elem(struct htab_elem *l, bool percpu, u32 key_size)
+static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
{
- if (percpu) {
- l->key_size = key_size;
- call_rcu(&l->rcu, htab_percpu_elem_free_rcu);
+ if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
+ pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
- kfree_rcu(l, rcu);
+ atomic_dec(&htab->count);
+ l->htab = htab;
+ call_rcu(&l->rcu, htab_elem_free_rcu);
}
}
@@ -293,23 +384,39 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
bool percpu, bool onallcpus)
{
u32 size = htab->map.value_size;
+ bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
struct htab_elem *l_new;
void __percpu *pptr;
- l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
- if (!l_new)
- return NULL;
+ if (prealloc) {
+ l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
+ if (!l_new)
+ return ERR_PTR(-E2BIG);
+ } else {
+ if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
+ atomic_dec(&htab->count);
+ return ERR_PTR(-E2BIG);
+ }
+ l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+ if (!l_new)
+ return ERR_PTR(-ENOMEM);
+ }
memcpy(l_new->key, key, key_size);
if (percpu) {
/* round up value_size to 8 bytes */
size = round_up(size, 8);
- /* alloc_percpu zero-fills */
- pptr = __alloc_percpu_gfp(size, 8, GFP_ATOMIC | __GFP_NOWARN);
- if (!pptr) {
- kfree(l_new);
- return NULL;
+ if (prealloc) {
+ pptr = htab_elem_get_ptr(l_new, key_size);
+ } else {
+ /* alloc_percpu zero-fills */
+ pptr = __alloc_percpu_gfp(size, 8,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (!pptr) {
+ kfree(l_new);
+ return ERR_PTR(-ENOMEM);
+ }
}
if (!onallcpus) {
@@ -324,7 +431,8 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
off += size;
}
}
- htab_elem_set_ptr(l_new, key_size, pptr);
+ if (!prealloc)
+ htab_elem_set_ptr(l_new, key_size, pptr);
} else {
memcpy(l_new->key + round_up(key_size, 8), value, size);
}
@@ -336,12 +444,6 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
u64 map_flags)
{
- if (!l_old && unlikely(atomic_read(&htab->count) >= htab->map.max_entries))
- /* if elem with this 'key' doesn't exist and we've reached
- * max_entries limit, fail insertion of new elem
- */
- return -E2BIG;
-
if (l_old && map_flags == BPF_NOEXIST)
/* elem already exists */
return -EEXIST;
@@ -375,13 +477,6 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
hash = htab_map_hash(key, key_size);
- /* allocate new element outside of the lock, since
- * we're most likley going to insert it
- */
- l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
- if (!l_new)
- return -ENOMEM;
-
b = __select_bucket(htab, hash);
head = &b->head;
@@ -394,21 +489,24 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (ret)
goto err;
+ l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
+ if (IS_ERR(l_new)) {
+ /* all pre-allocated elements are in use or memory exhausted */
+ ret = PTR_ERR(l_new);
+ goto err;
+ }
+
/* add new element to the head of the list, so that
* concurrent search will find it before old elem
*/
hlist_add_head_rcu(&l_new->hash_node, head);
if (l_old) {
hlist_del_rcu(&l_old->hash_node);
- kfree_rcu(l_old, rcu);
- } else {
- atomic_inc(&htab->count);
+ free_htab_elem(htab, l_old);
}
- raw_spin_unlock_irqrestore(&b->lock, flags);
- return 0;
+ ret = 0;
err:
raw_spin_unlock_irqrestore(&b->lock, flags);
- kfree(l_new);
return ret;
}
@@ -466,12 +564,11 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
hash, true, onallcpus);
- if (!l_new) {
- ret = -ENOMEM;
+ if (IS_ERR(l_new)) {
+ ret = PTR_ERR(l_new);
goto err;
}
hlist_add_head_rcu(&l_new->hash_node, head);
- atomic_inc(&htab->count);
}
ret = 0;
err:
@@ -489,7 +586,6 @@ static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
static int htab_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
- bool percpu = map->map_type == BPF_MAP_TYPE_PERCPU_HASH;
struct hlist_head *head;
struct bucket *b;
struct htab_elem *l;
@@ -511,8 +607,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
if (l) {
hlist_del_rcu(&l->hash_node);
- atomic_dec(&htab->count);
- free_htab_elem(l, percpu, key_size);
+ free_htab_elem(htab, l);
ret = 0;
}
@@ -531,17 +626,10 @@ static void delete_all_elements(struct bpf_htab *htab)
hlist_for_each_entry_safe(l, n, head, hash_node) {
hlist_del_rcu(&l->hash_node);
- atomic_dec(&htab->count);
- if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) {
- l->key_size = htab->map.key_size;
- htab_percpu_elem_free(l);
- } else {
- kfree(l);
- }
+ htab_elem_free(htab, l);
}
}
}
-
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void htab_map_free(struct bpf_map *map)
{
@@ -554,10 +642,16 @@ static void htab_map_free(struct bpf_map *map)
*/
synchronize_rcu();
- /* some of kfree_rcu() callbacks for elements of this map may not have
- * executed. It's ok. Proceed to free residual elements and map itself
+ /* some of free_htab_elem() callbacks for elements of this map may
+ * not have executed. Wait for them.
*/
- delete_all_elements(htab);
+ rcu_barrier();
+ if (htab->map.map_flags & BPF_F_NO_PREALLOC) {
+ delete_all_elements(htab);
+ } else {
+ htab_free_elems(htab);
+ pcpu_freelist_destroy(&htab->freelist);
+ }
kvfree(htab->buckets);
kfree(htab);
}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 4504ca66118d..50da680c479f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -166,7 +166,7 @@ static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
if (!task)
return -EINVAL;
- memcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm)));
+ strlcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm)));
return 0;
}
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
new file mode 100644
index 000000000000..5c51d1985b51
--- /dev/null
+++ b/kernel/bpf/percpu_freelist.c
@@ -0,0 +1,100 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include "percpu_freelist.h"
+
+int pcpu_freelist_init(struct pcpu_freelist *s)
+{
+ int cpu;
+
+ s->freelist = alloc_percpu(struct pcpu_freelist_head);
+ if (!s->freelist)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct pcpu_freelist_head *head = per_cpu_ptr(s->freelist, cpu);
+
+ raw_spin_lock_init(&head->lock);
+ head->first = NULL;
+ }
+ return 0;
+}
+
+void pcpu_freelist_destroy(struct pcpu_freelist *s)
+{
+ free_percpu(s->freelist);
+}
+
+static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,
+ struct pcpu_freelist_node *node)
+{
+ raw_spin_lock(&head->lock);
+ node->next = head->first;
+ head->first = node;
+ raw_spin_unlock(&head->lock);
+}
+
+void pcpu_freelist_push(struct pcpu_freelist *s,
+ struct pcpu_freelist_node *node)
+{
+ struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
+
+ __pcpu_freelist_push(head, node);
+}
+
+void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
+ u32 nr_elems)
+{
+ struct pcpu_freelist_head *head;
+ unsigned long flags;
+ int i, cpu, pcpu_entries;
+
+ pcpu_entries = nr_elems / num_possible_cpus() + 1;
+ i = 0;
+
+ /* disable irq to workaround lockdep false positive
+ * in bpf usage pcpu_freelist_populate() will never race
+ * with pcpu_freelist_push()
+ */
+ local_irq_save(flags);
+ for_each_possible_cpu(cpu) {
+again:
+ head = per_cpu_ptr(s->freelist, cpu);
+ __pcpu_freelist_push(head, buf);
+ i++;
+ buf += elem_size;
+ if (i == nr_elems)
+ break;
+ if (i % pcpu_entries)
+ goto again;
+ }
+ local_irq_restore(flags);
+}
+
+struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
+{
+ struct pcpu_freelist_head *head;
+ struct pcpu_freelist_node *node;
+ int orig_cpu, cpu;
+
+ orig_cpu = cpu = raw_smp_processor_id();
+ while (1) {
+ head = per_cpu_ptr(s->freelist, cpu);
+ raw_spin_lock(&head->lock);
+ node = head->first;
+ if (node) {
+ head->first = node->next;
+ raw_spin_unlock(&head->lock);
+ return node;
+ }
+ raw_spin_unlock(&head->lock);
+ cpu = cpumask_next(cpu, cpu_possible_mask);
+ if (cpu >= nr_cpu_ids)
+ cpu = 0;
+ if (cpu == orig_cpu)
+ return NULL;
+ }
+}
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h
new file mode 100644
index 000000000000..3049aae8ea1e
--- /dev/null
+++ b/kernel/bpf/percpu_freelist.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __PERCPU_FREELIST_H__
+#define __PERCPU_FREELIST_H__
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+struct pcpu_freelist_head {
+ struct pcpu_freelist_node *first;
+ raw_spinlock_t lock;
+};
+
+struct pcpu_freelist {
+ struct pcpu_freelist_head __percpu *freelist;
+};
+
+struct pcpu_freelist_node {
+ struct pcpu_freelist_node *next;
+};
+
+void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
+struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
+void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
+ u32 nr_elems);
+int pcpu_freelist_init(struct pcpu_freelist *);
+void pcpu_freelist_destroy(struct pcpu_freelist *s);
+#endif
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 8a60ee14a977..499d9e933f8e 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -10,9 +10,10 @@
#include <linux/vmalloc.h>
#include <linux/stacktrace.h>
#include <linux/perf_event.h>
+#include "percpu_freelist.h"
struct stack_map_bucket {
- struct rcu_head rcu;
+ struct pcpu_freelist_node fnode;
u32 hash;
u32 nr;
u64 ip[];
@@ -20,10 +21,34 @@ struct stack_map_bucket {
struct bpf_stack_map {
struct bpf_map map;
+ void *elems;
+ struct pcpu_freelist freelist;
u32 n_buckets;
- struct stack_map_bucket __rcu *buckets[];
+ struct stack_map_bucket *buckets[];
};
+static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
+{
+ u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
+ int err;
+
+ smap->elems = vzalloc(elem_size * smap->map.max_entries);
+ if (!smap->elems)
+ return -ENOMEM;
+
+ err = pcpu_freelist_init(&smap->freelist);
+ if (err)
+ goto free_elems;
+
+ pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size,
+ smap->map.max_entries);
+ return 0;
+
+free_elems:
+ vfree(smap->elems);
+ return err;
+}
+
/* Called from syscall */
static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
{
@@ -35,6 +60,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
+ if (attr->map_flags)
+ return ERR_PTR(-EINVAL);
+
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
value_size < 8 || value_size % 8 ||
@@ -67,12 +95,22 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
smap->n_buckets = n_buckets;
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+ err = bpf_map_precharge_memlock(smap->map.pages);
+ if (err)
+ goto free_smap;
+
err = get_callchain_buffers();
if (err)
goto free_smap;
+ err = prealloc_elems_and_freelist(smap);
+ if (err)
+ goto put_buffers;
+
return &smap->map;
+put_buffers:
+ put_callchain_buffers();
free_smap:
kvfree(smap);
return ERR_PTR(err);
@@ -118,7 +156,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
ips = trace->ip + skip + init_nr;
hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
id = hash & (smap->n_buckets - 1);
- bucket = rcu_dereference(smap->buckets[id]);
+ bucket = READ_ONCE(smap->buckets[id]);
if (bucket && bucket->hash == hash) {
if (flags & BPF_F_FAST_STACK_CMP)
@@ -132,19 +170,18 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
if (bucket && !(flags & BPF_F_REUSE_STACKID))
return -EEXIST;
- new_bucket = kmalloc(sizeof(struct stack_map_bucket) + map->value_size,
- GFP_ATOMIC | __GFP_NOWARN);
+ new_bucket = (struct stack_map_bucket *)
+ pcpu_freelist_pop(&smap->freelist);
if (unlikely(!new_bucket))
return -ENOMEM;
memcpy(new_bucket->ip, ips, trace_len);
- memset(new_bucket->ip + trace_len / 8, 0, map->value_size - trace_len);
new_bucket->hash = hash;
new_bucket->nr = trace_nr;
old_bucket = xchg(&smap->buckets[id], new_bucket);
if (old_bucket)
- kfree_rcu(old_bucket, rcu);
+ pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
return id;
}
@@ -157,17 +194,34 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
.arg3_type = ARG_ANYTHING,
};
-/* Called from syscall or from eBPF program */
+/* Called from eBPF program */
static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
{
+ return NULL;
+}
+
+/* Called from syscall */
+int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
+{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
- struct stack_map_bucket *bucket;
- u32 id = *(u32 *)key;
+ struct stack_map_bucket *bucket, *old_bucket;
+ u32 id = *(u32 *)key, trace_len;
if (unlikely(id >= smap->n_buckets))
- return NULL;
- bucket = rcu_dereference(smap->buckets[id]);
- return bucket ? bucket->ip : NULL;
+ return -ENOENT;
+
+ bucket = xchg(&smap->buckets[id], NULL);
+ if (!bucket)
+ return -ENOENT;
+
+ trace_len = bucket->nr * sizeof(u64);
+ memcpy(value, bucket->ip, trace_len);
+ memset(value + trace_len, 0, map->value_size - trace_len);
+
+ old_bucket = xchg(&smap->buckets[id], bucket);
+ if (old_bucket)
+ pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
+ return 0;
}
static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -193,7 +247,7 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key)
old_bucket = xchg(&smap->buckets[id], NULL);
if (old_bucket) {
- kfree_rcu(old_bucket, rcu);
+ pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
return 0;
} else {
return -ENOENT;
@@ -204,13 +258,12 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key)
static void stack_map_free(struct bpf_map *map)
{
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
- int i;
+ /* wait for bpf programs to complete before freeing stack map */
synchronize_rcu();
- for (i = 0; i < smap->n_buckets; i++)
- if (smap->buckets[i])
- kfree_rcu(smap->buckets[i], rcu);
+ vfree(smap->elems);
+ pcpu_freelist_destroy(&smap->freelist);
kvfree(smap);
put_callchain_buffers();
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c95a753c2007..2a2efe1bc76c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -18,6 +18,8 @@
#include <linux/filter.h>
#include <linux/version.h>
+DEFINE_PER_CPU(int, bpf_prog_active);
+
int sysctl_unprivileged_bpf_disabled __read_mostly;
static LIST_HEAD(bpf_map_types);
@@ -46,6 +48,19 @@ void bpf_register_map_type(struct bpf_map_type_list *tl)
list_add(&tl->list_node, &bpf_map_types);
}
+int bpf_map_precharge_memlock(u32 pages)
+{
+ struct user_struct *user = get_current_user();
+ unsigned long memlock_limit, cur;
+
+ memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ cur = atomic_long_read(&user->locked_vm);
+ free_uid(user);
+ if (cur + pages > memlock_limit)
+ return -EPERM;
+ return 0;
+}
+
static int bpf_map_charge_memlock(struct bpf_map *map)
{
struct user_struct *user = get_current_user();
@@ -151,7 +166,7 @@ int bpf_map_new_fd(struct bpf_map *map)
offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
sizeof(attr->CMD##_LAST_FIELD)) != NULL
-#define BPF_MAP_CREATE_LAST_FIELD max_entries
+#define BPF_MAP_CREATE_LAST_FIELD map_flags
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@@ -229,6 +244,11 @@ static void __user *u64_to_ptr(__u64 val)
return (void __user *) (unsigned long) val;
}
+int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
+{
+ return -ENOTSUPP;
+}
+
/* last field in 'union bpf_attr' used by this command */
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
@@ -275,6 +295,8 @@ static int map_lookup_elem(union bpf_attr *attr)
err = bpf_percpu_hash_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_copy(map, key, value);
+ } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
+ err = bpf_stackmap_copy(map, key, value);
} else {
rcu_read_lock();
ptr = map->ops->map_lookup_elem(map, key);
@@ -347,6 +369,11 @@ static int map_update_elem(union bpf_attr *attr)
if (copy_from_user(value, uvalue, value_size) != 0)
goto free_value;
+ /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
+ * inside bpf map update or delete otherwise deadlocks are possible
+ */
+ preempt_disable();
+ __this_cpu_inc(bpf_prog_active);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
err = bpf_percpu_hash_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
@@ -356,6 +383,8 @@ static int map_update_elem(union bpf_attr *attr)
err = map->ops->map_update_elem(map, key, value, attr->flags);
rcu_read_unlock();
}
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
free_value:
kfree(value);
@@ -394,9 +423,13 @@ static int map_delete_elem(union bpf_attr *attr)
if (copy_from_user(key, ukey, map->key_size) != 0)
goto free_key;
+ preempt_disable();
+ __this_cpu_inc(bpf_prog_active);
rcu_read_lock();
err = map->ops->map_delete_elem(map, key);
rcu_read_unlock();
+ __this_cpu_dec(bpf_prog_active);
+ preempt_enable();
free_key:
kfree(key);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0d58522103cd..614614821f00 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -64,8 +64,17 @@ static void remote_function(void *data)
struct task_struct *p = tfc->p;
if (p) {
- tfc->ret = -EAGAIN;
- if (task_cpu(p) != smp_processor_id() || !task_curr(p))
+ /* -EAGAIN */
+ if (task_cpu(p) != smp_processor_id())
+ return;
+
+ /*
+ * Now that we're on right CPU with IRQs disabled, we can test
+ * if we hit the right task without races.
+ */
+
+ tfc->ret = -ESRCH; /* No such (running) process */
+ if (p != current)
return;
}
@@ -92,13 +101,17 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info)
.p = p,
.func = func,
.info = info,
- .ret = -ESRCH, /* No such (running) process */
+ .ret = -EAGAIN,
};
+ int ret;
- if (task_curr(p))
- smp_call_function_single(task_cpu(p), remote_function, &data, 1);
+ do {
+ ret = smp_call_function_single(task_cpu(p), remote_function, &data, 1);
+ if (!ret)
+ ret = data.ret;
+ } while (ret == -EAGAIN);
- return data.ret;
+ return ret;
}
/**
@@ -169,19 +182,6 @@ static bool is_kernel_event(struct perf_event *event)
* rely on ctx->is_active and therefore cannot use event_function_call().
* See perf_install_in_context().
*
- * This is because we need a ctx->lock serialized variable (ctx->is_active)
- * to reliably determine if a particular task/context is scheduled in. The
- * task_curr() use in task_function_call() is racy in that a remote context
- * switch is not a single atomic operation.
- *
- * As is, the situation is 'safe' because we set rq->curr before we do the
- * actual context switch. This means that task_curr() will fail early, but
- * we'll continue spinning on ctx->is_active until we've passed
- * perf_event_task_sched_out().
- *
- * Without this ctx->lock serialized variable we could have race where we find
- * the task (and hence the context) would not be active while in fact they are.
- *
* If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
*/
@@ -212,7 +212,7 @@ static int event_function(void *info)
*/
if (ctx->task) {
if (ctx->task != current) {
- ret = -EAGAIN;
+ ret = -ESRCH;
goto unlock;
}
@@ -276,10 +276,10 @@ static void event_function_call(struct perf_event *event, event_f func, void *da
return;
}
-again:
if (task == TASK_TOMBSTONE)
return;
+again:
if (!task_function_call(task, event_function, &efs))
return;
@@ -289,13 +289,15 @@ again:
* a concurrent perf_event_context_sched_out().
*/
task = ctx->task;
- if (task != TASK_TOMBSTONE) {
- if (ctx->is_active) {
- raw_spin_unlock_irq(&ctx->lock);
- goto again;
- }
- func(event, NULL, ctx, data);
+ if (task == TASK_TOMBSTONE) {
+ raw_spin_unlock_irq(&ctx->lock);
+ return;
}
+ if (ctx->is_active) {
+ raw_spin_unlock_irq(&ctx->lock);
+ goto again;
+ }
+ func(event, NULL, ctx, data);
raw_spin_unlock_irq(&ctx->lock);
}
@@ -314,6 +316,7 @@ again:
enum event_type_t {
EVENT_FLEXIBLE = 0x1,
EVENT_PINNED = 0x2,
+ EVENT_TIME = 0x4,
EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
};
@@ -321,7 +324,13 @@ enum event_type_t {
* perf_sched_events : >0 events exist
* perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
*/
-struct static_key_deferred perf_sched_events __read_mostly;
+
+static void perf_sched_delayed(struct work_struct *work);
+DEFINE_STATIC_KEY_FALSE(perf_sched_events);
+static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
+static DEFINE_MUTEX(perf_sched_mutex);
+static atomic_t perf_sched_count;
+
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
@@ -1288,16 +1297,18 @@ static u64 perf_event_time(struct perf_event *event)
/*
* Update the total_time_enabled and total_time_running fields for a event.
- * The caller of this function needs to hold the ctx->lock.
*/
static void update_event_times(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
u64 run_end;
+ lockdep_assert_held(&ctx->lock);
+
if (event->state < PERF_EVENT_STATE_INACTIVE ||
event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
return;
+
/*
* in cgroup mode, time_enabled represents
* the time the event was enabled AND active
@@ -1645,7 +1656,7 @@ out:
static bool is_orphaned_event(struct perf_event *event)
{
- return event->state == PERF_EVENT_STATE_EXIT;
+ return event->state == PERF_EVENT_STATE_DEAD;
}
static inline int pmu_filter_match(struct perf_event *event)
@@ -1690,14 +1701,14 @@ event_sched_out(struct perf_event *event,
perf_pmu_disable(event->pmu);
+ event->tstamp_stopped = tstamp;
+ event->pmu->del(event, 0);
+ event->oncpu = -1;
event->state = PERF_EVENT_STATE_INACTIVE;
if (event->pending_disable) {
event->pending_disable = 0;
event->state = PERF_EVENT_STATE_OFF;
}
- event->tstamp_stopped = tstamp;
- event->pmu->del(event, 0);
- event->oncpu = -1;
if (!is_software_event(event))
cpuctx->active_oncpu--;
@@ -1732,7 +1743,6 @@ group_sched_out(struct perf_event *group_event,
}
#define DETACH_GROUP 0x01UL
-#define DETACH_STATE 0x02UL
/*
* Cross CPU call to remove a performance event
@@ -1752,8 +1762,6 @@ __perf_remove_from_context(struct perf_event *event,
if (flags & DETACH_GROUP)
perf_group_detach(event);
list_del_event(event, ctx);
- if (flags & DETACH_STATE)
- event->state = PERF_EVENT_STATE_EXIT;
if (!ctx->nr_events && ctx->is_active) {
ctx->is_active = 0;
@@ -2063,14 +2071,27 @@ static void add_event_to_ctx(struct perf_event *event,
event->tstamp_stopped = tstamp;
}
-static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
- struct perf_event_context *ctx);
+static void ctx_sched_out(struct perf_event_context *ctx,
+ struct perf_cpu_context *cpuctx,
+ enum event_type_t event_type);
static void
ctx_sched_in(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx,
enum event_type_t event_type,
struct task_struct *task);
+static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
+ struct perf_event_context *ctx)
+{
+ if (!cpuctx->task_ctx)
+ return;
+
+ if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
+ return;
+
+ ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+}
+
static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx,
struct task_struct *task)
@@ -2097,49 +2118,68 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
/*
* Cross CPU call to install and enable a performance event
*
- * Must be called with ctx->mutex held
+ * Very similar to remote_function() + event_function() but cannot assume that
+ * things like ctx->is_active and cpuctx->task_ctx are set.
*/
static int __perf_install_in_context(void *info)
{
- struct perf_event_context *ctx = info;
+ struct perf_event *event = info;
+ struct perf_event_context *ctx = event->ctx;
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
struct perf_event_context *task_ctx = cpuctx->task_ctx;
+ bool activate = true;
+ int ret = 0;
raw_spin_lock(&cpuctx->ctx.lock);
if (ctx->task) {
raw_spin_lock(&ctx->lock);
- /*
- * If we hit the 'wrong' task, we've since scheduled and
- * everything should be sorted, nothing to do!
- */
task_ctx = ctx;
- if (ctx->task != current)
+
+ /* If we're on the wrong CPU, try again */
+ if (task_cpu(ctx->task) != smp_processor_id()) {
+ ret = -ESRCH;
goto unlock;
+ }
/*
- * If task_ctx is set, it had better be to us.
+ * If we're on the right CPU, see if the task we target is
+ * current, if not we don't have to activate the ctx, a future
+ * context switch will do that for us.
*/
- WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
+ if (ctx->task != current)
+ activate = false;
+ else
+ WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
+
} else if (task_ctx) {
raw_spin_lock(&task_ctx->lock);
}
- ctx_resched(cpuctx, task_ctx);
+ if (activate) {
+ ctx_sched_out(ctx, cpuctx, EVENT_TIME);
+ add_event_to_ctx(event, ctx);
+ ctx_resched(cpuctx, task_ctx);
+ } else {
+ add_event_to_ctx(event, ctx);
+ }
+
unlock:
perf_ctx_unlock(cpuctx, task_ctx);
- return 0;
+ return ret;
}
/*
- * Attach a performance event to a context
+ * Attach a performance event to a context.
+ *
+ * Very similar to event_function_call, see comment there.
*/
static void
perf_install_in_context(struct perf_event_context *ctx,
struct perf_event *event,
int cpu)
{
- struct task_struct *task = NULL;
+ struct task_struct *task = READ_ONCE(ctx->task);
lockdep_assert_held(&ctx->mutex);
@@ -2147,40 +2187,46 @@ perf_install_in_context(struct perf_event_context *ctx,
if (event->cpu != -1)
event->cpu = cpu;
+ if (!task) {
+ cpu_function_call(cpu, __perf_install_in_context, event);
+ return;
+ }
+
+ /*
+ * Should not happen, we validate the ctx is still alive before calling.
+ */
+ if (WARN_ON_ONCE(task == TASK_TOMBSTONE))
+ return;
+
/*
* Installing events is tricky because we cannot rely on ctx->is_active
* to be set in case this is the nr_events 0 -> 1 transition.
- *
- * So what we do is we add the event to the list here, which will allow
- * a future context switch to DTRT and then send a racy IPI. If the IPI
- * fails to hit the right task, this means a context switch must have
- * happened and that will have taken care of business.
*/
- raw_spin_lock_irq(&ctx->lock);
- task = ctx->task;
+again:
/*
- * Worse, we cannot even rely on the ctx actually existing anymore. If
- * between find_get_context() and perf_install_in_context() the task
- * went through perf_event_exit_task() its dead and we should not be
- * adding new events.
+ * Cannot use task_function_call() because we need to run on the task's
+ * CPU regardless of whether its current or not.
*/
- if (task == TASK_TOMBSTONE) {
+ if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
+ return;
+
+ raw_spin_lock_irq(&ctx->lock);
+ task = ctx->task;
+ if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) {
+ /*
+ * Cannot happen because we already checked above (which also
+ * cannot happen), and we hold ctx->mutex, which serializes us
+ * against perf_event_exit_task_context().
+ */
raw_spin_unlock_irq(&ctx->lock);
return;
}
- update_context_time(ctx);
+ raw_spin_unlock_irq(&ctx->lock);
/*
- * Update cgrp time only if current cgrp matches event->cgrp.
- * Must be done before calling add_event_to_ctx().
+ * Since !ctx->is_active doesn't mean anything, we must IPI
+ * unconditionally.
*/
- update_cgrp_time_from_event(event);
- add_event_to_ctx(event, ctx);
- raw_spin_unlock_irq(&ctx->lock);
-
- if (task)
- task_function_call(task, __perf_install_in_context, ctx);
- else
- cpu_function_call(cpu, __perf_install_in_context, ctx);
+ goto again;
}
/*
@@ -2219,17 +2265,18 @@ static void __perf_event_enable(struct perf_event *event,
event->state <= PERF_EVENT_STATE_ERROR)
return;
- update_context_time(ctx);
+ if (ctx->is_active)
+ ctx_sched_out(ctx, cpuctx, EVENT_TIME);
+
__perf_event_mark_enabled(event);
if (!ctx->is_active)
return;
if (!event_filter_match(event)) {
- if (is_cgroup_event(event)) {
- perf_cgroup_set_timestamp(current, ctx); // XXX ?
+ if (is_cgroup_event(event))
perf_cgroup_defer_enabled(event);
- }
+ ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
return;
}
@@ -2237,8 +2284,10 @@ static void __perf_event_enable(struct perf_event *event,
* If the event is in a group and isn't the group leader,
* then don't put it on unless the group is on.
*/
- if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
+ if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
+ ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
return;
+ }
task_ctx = cpuctx->task_ctx;
if (ctx->task)
@@ -2344,24 +2393,33 @@ static void ctx_sched_out(struct perf_event_context *ctx,
}
ctx->is_active &= ~event_type;
+ if (!(ctx->is_active & EVENT_ALL))
+ ctx->is_active = 0;
+
if (ctx->task) {
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
if (!ctx->is_active)
cpuctx->task_ctx = NULL;
}
- update_context_time(ctx);
- update_cgrp_time_from_cpuctx(cpuctx);
- if (!ctx->nr_active)
+ is_active ^= ctx->is_active; /* changed bits */
+
+ if (is_active & EVENT_TIME) {
+ /* update (and stop) ctx time */
+ update_context_time(ctx);
+ update_cgrp_time_from_cpuctx(cpuctx);
+ }
+
+ if (!ctx->nr_active || !(is_active & EVENT_ALL))
return;
perf_pmu_disable(ctx->pmu);
- if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
+ if (is_active & EVENT_PINNED) {
list_for_each_entry(event, &ctx->pinned_groups, group_entry)
group_sched_out(event, cpuctx, ctx);
}
- if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
+ if (is_active & EVENT_FLEXIBLE) {
list_for_each_entry(event, &ctx->flexible_groups, group_entry)
group_sched_out(event, cpuctx, ctx);
}
@@ -2641,18 +2699,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
perf_cgroup_sched_out(task, next);
}
-static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
- struct perf_event_context *ctx)
-{
- if (!cpuctx->task_ctx)
- return;
-
- if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
- return;
-
- ctx_sched_out(ctx, cpuctx, EVENT_ALL);
-}
-
/*
* Called with IRQs disabled
*/
@@ -2735,7 +2781,7 @@ ctx_sched_in(struct perf_event_context *ctx,
if (likely(!ctx->nr_events))
return;
- ctx->is_active |= event_type;
+ ctx->is_active |= (event_type | EVENT_TIME);
if (ctx->task) {
if (!is_active)
cpuctx->task_ctx = ctx;
@@ -2743,18 +2789,24 @@ ctx_sched_in(struct perf_event_context *ctx,
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
}
- now = perf_clock();
- ctx->timestamp = now;
- perf_cgroup_set_timestamp(task, ctx);
+ is_active ^= ctx->is_active; /* changed bits */
+
+ if (is_active & EVENT_TIME) {
+ /* start ctx time */
+ now = perf_clock();
+ ctx->timestamp = now;
+ perf_cgroup_set_timestamp(task, ctx);
+ }
+
/*
* First go through the list and put on any pinned groups
* in order to give them the best chance of going on.
*/
- if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
+ if (is_active & EVENT_PINNED)
ctx_pinned_sched_in(ctx, cpuctx);
/* Then walk through the lower prio flexible groups */
- if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
+ if (is_active & EVENT_FLEXIBLE)
ctx_flexible_sched_in(ctx, cpuctx);
}
@@ -3120,6 +3172,7 @@ static void perf_event_enable_on_exec(int ctxn)
cpuctx = __get_cpu_context(ctx);
perf_ctx_lock(cpuctx, ctx);
+ ctx_sched_out(ctx, cpuctx, EVENT_TIME);
list_for_each_entry(event, &ctx->event_list, event_entry)
enabled |= event_enable_on_exec(event, ctx);
@@ -3537,12 +3590,22 @@ static void unaccount_event(struct perf_event *event)
if (has_branch_stack(event))
dec = true;
- if (dec)
- static_key_slow_dec_deferred(&perf_sched_events);
+ if (dec) {
+ if (!atomic_add_unless(&perf_sched_count, -1, 1))
+ schedule_delayed_work(&perf_sched_work, HZ);
+ }
unaccount_event_cpu(event, event->cpu);
}
+static void perf_sched_delayed(struct work_struct *work)
+{
+ mutex_lock(&perf_sched_mutex);
+ if (atomic_dec_and_test(&perf_sched_count))
+ static_branch_disable(&perf_sched_events);
+ mutex_unlock(&perf_sched_mutex);
+}
+
/*
* The following implement mutual exclusion of events on "exclusive" pmus
* (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
@@ -3752,30 +3815,42 @@ static void put_event(struct perf_event *event)
*/
int perf_event_release_kernel(struct perf_event *event)
{
- struct perf_event_context *ctx;
+ struct perf_event_context *ctx = event->ctx;
struct perf_event *child, *tmp;
+ /*
+ * If we got here through err_file: fput(event_file); we will not have
+ * attached to a context yet.
+ */
+ if (!ctx) {
+ WARN_ON_ONCE(event->attach_state &
+ (PERF_ATTACH_CONTEXT|PERF_ATTACH_GROUP));
+ goto no_ctx;
+ }
+
if (!is_kernel_event(event))
perf_remove_from_owner(event);
ctx = perf_event_ctx_lock(event);
WARN_ON_ONCE(ctx->parent_ctx);
- perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE);
- perf_event_ctx_unlock(event, ctx);
+ perf_remove_from_context(event, DETACH_GROUP);
+ raw_spin_lock_irq(&ctx->lock);
/*
- * At this point we must have event->state == PERF_EVENT_STATE_EXIT,
- * either from the above perf_remove_from_context() or through
- * perf_event_exit_event().
+ * Mark this even as STATE_DEAD, there is no external reference to it
+ * anymore.
*
- * Therefore, anybody acquiring event->child_mutex after the below
- * loop _must_ also see this, most importantly inherit_event() which
- * will avoid placing more children on the list.
+ * Anybody acquiring event->child_mutex after the below loop _must_
+ * also see this, most importantly inherit_event() which will avoid
+ * placing more children on the list.
*
* Thus this guarantees that we will in fact observe and kill _ALL_
* child events.
*/
- WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
+ event->state = PERF_EVENT_STATE_DEAD;
+ raw_spin_unlock_irq(&ctx->lock);
+
+ perf_event_ctx_unlock(event, ctx);
again:
mutex_lock(&event->child_mutex);
@@ -3830,8 +3905,8 @@ again:
}
mutex_unlock(&event->child_mutex);
- /* Must be the last reference */
- put_event(event);
+no_ctx:
+ put_event(event); /* Must be the 'last' reference */
return 0;
}
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
@@ -3988,7 +4063,7 @@ static bool is_event_hup(struct perf_event *event)
{
bool no_children;
- if (event->state != PERF_EVENT_STATE_EXIT)
+ if (event->state > PERF_EVENT_STATE_EXIT)
return false;
mutex_lock(&event->child_mutex);
@@ -7769,8 +7844,28 @@ static void account_event(struct perf_event *event)
if (is_cgroup_event(event))
inc = true;
- if (inc)
- static_key_slow_inc(&perf_sched_events.key);
+ if (inc) {
+ if (atomic_inc_not_zero(&perf_sched_count))
+ goto enabled;
+
+ mutex_lock(&perf_sched_mutex);
+ if (!atomic_read(&perf_sched_count)) {
+ static_branch_enable(&perf_sched_events);
+ /*
+ * Guarantee that all CPUs observe they key change and
+ * call the perf scheduling hooks before proceeding to
+ * install events that need them.
+ */
+ synchronize_sched();
+ }
+ /*
+ * Now that we have waited for the sync_sched(), allow further
+ * increments to by-pass the mutex.
+ */
+ atomic_inc(&perf_sched_count);
+ mutex_unlock(&perf_sched_mutex);
+ }
+enabled:
account_event_cpu(event, event->cpu);
}
@@ -8389,10 +8484,19 @@ SYSCALL_DEFINE5(perf_event_open,
if (move_group) {
gctx = group_leader->ctx;
mutex_lock_double(&gctx->mutex, &ctx->mutex);
+ if (gctx->task == TASK_TOMBSTONE) {
+ err = -ESRCH;
+ goto err_locked;
+ }
} else {
mutex_lock(&ctx->mutex);
}
+ if (ctx->task == TASK_TOMBSTONE) {
+ err = -ESRCH;
+ goto err_locked;
+ }
+
if (!perf_event_validate_size(event)) {
err = -E2BIG;
goto err_locked;
@@ -8509,7 +8613,12 @@ err_context:
perf_unpin_context(ctx);
put_ctx(ctx);
err_alloc:
- free_event(event);
+ /*
+ * If event_file is set, the fput() above will have called ->release()
+ * and that will take care of freeing the event.
+ */
+ if (!event_file)
+ free_event(event);
err_cpus:
put_online_cpus();
err_task:
@@ -8563,12 +8672,14 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
+ if (ctx->task == TASK_TOMBSTONE) {
+ err = -ESRCH;
+ goto err_unlock;
+ }
+
if (!exclusive_event_installable(event, ctx)) {
- mutex_unlock(&ctx->mutex);
- perf_unpin_context(ctx);
- put_ctx(ctx);
err = -EBUSY;
- goto err_free;
+ goto err_unlock;
}
perf_install_in_context(ctx, event, cpu);
@@ -8577,6 +8688,10 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
return event;
+err_unlock:
+ mutex_unlock(&ctx->mutex);
+ perf_unpin_context(ctx);
+ put_ctx(ctx);
err_free:
free_event(event);
err:
@@ -8695,7 +8810,7 @@ perf_event_exit_event(struct perf_event *child_event,
if (parent_event)
perf_group_detach(child_event);
list_del_event(child_event, child_ctx);
- child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
+ child_event->state = PERF_EVENT_STATE_EXIT; /* is_event_hup() */
raw_spin_unlock_irq(&child_ctx->lock);
/*
@@ -9313,9 +9428,6 @@ void __init perf_event_init(void)
ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
- /* do not patch jump label more than once per second */
- jump_label_rate_limit(&perf_sched_events, HZ);
-
/*
* Build time assertion that we keep the data_head at the intended
* location. IOW, validation we got the __reserved[] size right.
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 7a1b5c3ef14e..b981a7b023f0 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -136,8 +136,10 @@ void *devm_memremap(struct device *dev, resource_size_t offset,
if (addr) {
*ptr = addr;
devres_add(dev, ptr);
- } else
+ } else {
devres_free(ptr);
+ return ERR_PTR(-ENXIO);
+ }
return addr;
}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index cd64c979d0e1..57b939c81bce 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -420,7 +420,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
* entity.
*/
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
- printk_deferred_once("sched: DL replenish lagged to much\n");
+ printk_deferred_once("sched: DL replenish lagged too much\n");
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
dl_se->runtime = pi_se->dl_runtime;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4b8caa392b86..3e4ffb3ace5f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -13,8 +13,6 @@
#include <linux/ctype.h>
#include "trace.h"
-static DEFINE_PER_CPU(int, bpf_prog_active);
-
/**
* trace_call_bpf - invoke BPF program
* @prog: BPF program
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f333e57c4614..05ddc0820771 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -97,16 +97,16 @@ trace_find_event_field(struct trace_event_call *call, char *name)
struct ftrace_event_field *field;
struct list_head *head;
- field = __find_event_field(&ftrace_generic_fields, name);
+ head = trace_get_fields(call);
+ field = __find_event_field(head, name);
if (field)
return field;
- field = __find_event_field(&ftrace_common_fields, name);
+ field = __find_event_field(&ftrace_generic_fields, name);
if (field)
return field;
- head = trace_get_fields(call);
- return __find_event_field(head, name);
+ return __find_event_field(&ftrace_common_fields, name);
}
static int __trace_define_field(struct list_head *head, const char *type,
@@ -171,8 +171,10 @@ static int trace_define_generic_fields(void)
{
int ret;
- __generic_field(int, cpu, FILTER_OTHER);
- __generic_field(char *, comm, FILTER_PTR_STRING);
+ __generic_field(int, CPU, FILTER_CPU);
+ __generic_field(int, cpu, FILTER_CPU);
+ __generic_field(char *, COMM, FILTER_COMM);
+ __generic_field(char *, comm, FILTER_COMM);
return ret;
}
@@ -869,7 +871,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
* The ftrace subsystem is for showing formats only.
* They can not be enabled or disabled via the event files.
*/
- if (call->class && call->class->reg)
+ if (call->class && call->class->reg &&
+ !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
return file;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f93a219b18da..6816302542b2 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1043,13 +1043,14 @@ static int init_pred(struct filter_parse_state *ps,
return -EINVAL;
}
- if (is_string_field(field)) {
+ if (field->filter_type == FILTER_COMM) {
+ filter_build_regex(pred);
+ fn = filter_pred_comm;
+ pred->regex.field_len = TASK_COMM_LEN;
+ } else if (is_string_field(field)) {
filter_build_regex(pred);
- if (!strcmp(field->name, "comm")) {
- fn = filter_pred_comm;
- pred->regex.field_len = TASK_COMM_LEN;
- } else if (field->filter_type == FILTER_STATIC_STRING) {
+ if (field->filter_type == FILTER_STATIC_STRING) {
fn = filter_pred_string;
pred->regex.field_len = field->size;
} else if (field->filter_type == FILTER_DYN_STRING)
@@ -1072,7 +1073,7 @@ static int init_pred(struct filter_parse_state *ps,
}
pred->val = val;
- if (!strcmp(field->name, "cpu"))
+ if (field->filter_type == FILTER_CPU)
fn = filter_pred_cpu;
else
fn = select_comparison_fn(pred->op, field->size,
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 202df6cffcca..2a1abbaca10e 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -156,7 +156,11 @@ check_stack(unsigned long ip, unsigned long *stack)
for (; p < top && i < stack_trace_max.nr_entries; p++) {
if (stack_dump_trace[i] == ULONG_MAX)
break;
- if (*p == stack_dump_trace[i]) {
+ /*
+ * The READ_ONCE_NOCHECK is used to let KASAN know that
+ * this is not a stack-out-of-bounds error.
+ */
+ if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) {
stack_dump_trace[x] = stack_dump_trace[i++];
this_size = stack_trace_index[x++] =
(top - p) * sizeof(unsigned long);
diff --git a/lib/random32.c b/lib/random32.c
index 12111910ccd0..510d1ce7d4d2 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -255,6 +255,7 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
prandom_warmup(state);
}
}
+EXPORT_SYMBOL(prandom_seed_full_state);
/*
* Generate better values after random number generator
diff --git a/mm/filemap.c b/mm/filemap.c
index 23edccecadb0..3461d97ecb30 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -446,7 +446,8 @@ int filemap_write_and_wait(struct address_space *mapping)
{
int err = 0;
- if (mapping->nrpages) {
+ if ((!dax_mapping(mapping) && mapping->nrpages) ||
+ (dax_mapping(mapping) && mapping->nrexceptional)) {
err = filemap_fdatawrite(mapping);
/*
* Even if the above returned error, the pages may be
@@ -482,13 +483,8 @@ int filemap_write_and_wait_range(struct address_space *mapping,
{
int err = 0;
- if (dax_mapping(mapping) && mapping->nrexceptional) {
- err = dax_writeback_mapping_range(mapping, lstart, lend);
- if (err)
- return err;
- }
-
- if (mapping->nrpages) {
+ if ((!dax_mapping(mapping) && mapping->nrpages) ||
+ (dax_mapping(mapping) && mapping->nrexceptional)) {
err = __filemap_fdatawrite_range(mapping, lstart, lend,
WB_SYNC_ALL);
/* See comment of filemap_write_and_wait() */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1c317b85ea7d..e10a4fee88d2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2836,6 +2836,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
pgtable_t pgtable;
pmd_t _pmd;
bool young, write, dirty;
+ unsigned long addr;
int i;
VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
@@ -2865,7 +2866,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
pmd_populate(mm, &_pmd, pgtable);
- for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+ for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
pte_t entry, *pte;
/*
* Note that NUMA hinting access restrictions are not
@@ -2886,9 +2887,9 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
}
if (dirty)
SetPageDirty(page + i);
- pte = pte_offset_map(&_pmd, haddr);
+ pte = pte_offset_map(&_pmd, addr);
BUG_ON(!pte_none(*pte));
- set_pte_at(mm, haddr, pte, entry);
+ set_pte_at(mm, addr, pte, entry);
atomic_inc(&page[i]._mapcount);
pte_unmap(pte);
}
@@ -2938,7 +2939,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
pmd_populate(mm, pmd, pgtable);
if (freeze) {
- for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
+ for (i = 0; i < HPAGE_PMD_NR; i++) {
page_remove_rmap(page + i, false);
put_page(page + i);
}
diff --git a/mm/memory.c b/mm/memory.c
index 635451abc8f7..8132787ae4d5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3404,8 +3404,18 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(pmd_none(*pmd)) &&
unlikely(__pte_alloc(mm, vma, pmd, address)))
return VM_FAULT_OOM;
- /* if an huge pmd materialized from under us just retry later */
- if (unlikely(pmd_trans_huge(*pmd) || pmd_devmap(*pmd)))
+ /*
+ * If a huge pmd materialized under us just retry later. Use
+ * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
+ * didn't become pmd_trans_huge under us and then back to pmd_none, as
+ * a result of MADV_DONTNEED running immediately after a huge pmd fault
+ * in a different thread of this mm, in turn leading to a misleading
+ * pmd_trans_huge() retval. All we have to ensure is that it is a
+ * regular pmd that we can walk with pte_offset_map() and we can do that
+ * through an atomic read in C, which is what pmd_trans_unstable()
+ * provides.
+ */
+ if (unlikely(pmd_trans_unstable(pmd) || pmd_devmap(*pmd)))
return 0;
/*
* A regular pmd is established and it can't morph into a huge pmd
diff --git a/mm/migrate.c b/mm/migrate.c
index b1034f9c77e7..3ad0fea5c438 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1582,7 +1582,7 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
(GFP_HIGHUSER_MOVABLE |
__GFP_THISNODE | __GFP_NOMEMALLOC |
__GFP_NORETRY | __GFP_NOWARN) &
- ~(__GFP_IO | __GFP_FS), 0);
+ ~__GFP_RECLAIM, 0);
return newpage;
}
diff --git a/net/Kconfig b/net/Kconfig
index 2760825e53fa..10640d5f8bee 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -360,6 +360,7 @@ source "net/can/Kconfig"
source "net/irda/Kconfig"
source "net/bluetooth/Kconfig"
source "net/rxrpc/Kconfig"
+source "net/kcm/Kconfig"
config FIB_RULES
bool
diff --git a/net/Makefile b/net/Makefile
index a5d04098dfce..81d14119eab5 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_IRDA) += irda/
obj-$(CONFIG_BT) += bluetooth/
obj-$(CONFIG_SUNRPC) += sunrpc/
obj-$(CONFIG_AF_RXRPC) += rxrpc/
+obj-$(CONFIG_AF_KCM) += kcm/
obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_L2TP) += l2tp/
obj-$(CONFIG_DECNET) += decnet/
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index b563a3f5f2a8..2fa3be965101 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -228,8 +228,23 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
}
#endif
+static bool ax25_validate_header(const char *header, unsigned int len)
+{
+ ax25_digi digi;
+
+ if (!len)
+ return false;
+
+ if (header[0])
+ return true;
+
+ return ax25_addr_parse(header + 1, len - 1, NULL, NULL, &digi, NULL,
+ NULL);
+}
+
const struct header_ops ax25_header_ops = {
.create = ax25_hard_header,
+ .validate = ax25_validate_header,
};
EXPORT_SYMBOL(ax25_header_ops);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 82e3e9705017..dcea4f4c62b3 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -723,6 +723,8 @@ int br_fdb_dump(struct sk_buff *skb,
struct net_bridge_fdb_entry *f;
hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
+ int err;
+
if (idx < cb->args[0])
goto skip;
@@ -741,12 +743,15 @@ int br_fdb_dump(struct sk_buff *skb,
if (!filter_dev && f->dst)
goto skip;
- if (fdb_fill_info(skb, br, f,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- NLM_F_MULTI) < 0)
+ err = fdb_fill_info(skb, br, f,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH,
+ NLM_F_MULTI);
+ if (err < 0) {
+ cb->args[1] = err;
break;
+ }
skip:
++idx;
}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 7ddbe7ec81d6..44114a94c576 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -37,6 +37,7 @@
#include <net/addrconf.h>
#include <net/route.h>
#include <net/netfilter/br_netfilter.h>
+#include <net/netns/generic.h>
#include <asm/uaccess.h>
#include "br_private.h"
@@ -44,6 +45,12 @@
#include <linux/sysctl.h>
#endif
+static int brnf_net_id __read_mostly;
+
+struct brnf_net {
+ bool enabled;
+};
+
#ifdef CONFIG_SYSCTL
static struct ctl_table_header *brnf_sysctl_header;
static int brnf_call_iptables __read_mostly = 1;
@@ -938,6 +945,53 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
},
};
+static int brnf_device_event(struct notifier_block *unused, unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct brnf_net *brnet;
+ struct net *net;
+ int ret;
+
+ if (event != NETDEV_REGISTER || !(dev->priv_flags & IFF_EBRIDGE))
+ return NOTIFY_DONE;
+
+ ASSERT_RTNL();
+
+ net = dev_net(dev);
+ brnet = net_generic(net, brnf_net_id);
+ if (brnet->enabled)
+ return NOTIFY_OK;
+
+ ret = nf_register_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ if (ret)
+ return NOTIFY_BAD;
+
+ brnet->enabled = true;
+ return NOTIFY_OK;
+}
+
+static void __net_exit brnf_exit_net(struct net *net)
+{
+ struct brnf_net *brnet = net_generic(net, brnf_net_id);
+
+ if (!brnet->enabled)
+ return;
+
+ nf_unregister_net_hooks(net, br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ brnet->enabled = false;
+}
+
+static struct pernet_operations brnf_net_ops __read_mostly = {
+ .exit = brnf_exit_net,
+ .id = &brnf_net_id,
+ .size = sizeof(struct brnf_net),
+};
+
+static struct notifier_block brnf_notifier __read_mostly = {
+ .notifier_call = brnf_device_event,
+};
+
#ifdef CONFIG_SYSCTL
static
int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
@@ -1003,16 +1057,23 @@ static int __init br_netfilter_init(void)
{
int ret;
- ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ ret = register_pernet_subsys(&brnf_net_ops);
if (ret < 0)
return ret;
+ ret = register_netdevice_notifier(&brnf_notifier);
+ if (ret < 0) {
+ unregister_pernet_subsys(&brnf_net_ops);
+ return ret;
+ }
+
#ifdef CONFIG_SYSCTL
brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
if (brnf_sysctl_header == NULL) {
printk(KERN_WARNING
"br_netfilter: can't register to sysctl.\n");
- nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ unregister_netdevice_notifier(&brnf_notifier);
+ unregister_pernet_subsys(&brnf_net_ops);
return -ENOMEM;
}
#endif
@@ -1024,7 +1085,8 @@ static int __init br_netfilter_init(void)
static void __exit br_netfilter_fini(void)
{
RCU_INIT_POINTER(nf_br_ops, NULL);
- nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+ unregister_netdevice_notifier(&brnf_notifier);
+ unregister_pernet_subsys(&brnf_net_ops);
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(brnf_sysctl_header);
#endif
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index c22816a0b1b1..e23449094188 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -562,6 +562,14 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
}
+/* Set time interval that dynamic forwarding entries live
+ * For pure software bridge, allow values outside the 802.1
+ * standard specification for special cases:
+ * 0 - entry never ages (all permanant)
+ * 1 - entry disappears (no persistance)
+ *
+ * Offloaded switch entries maybe more restrictive
+ */
int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
{
struct switchdev_attr attr = {
@@ -573,9 +581,6 @@ int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
unsigned long t = clock_t_to_jiffies(ageing_time);
int err;
- if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
- return -ERANGE;
-
err = switchdev_port_attr_set(br->dev, &attr);
if (err)
return err;
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 9cfedf565f5b..9382619a405b 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1197,6 +1197,13 @@ static bool ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
return new_piece;
}
+static size_t sizeof_footer(struct ceph_connection *con)
+{
+ return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
+ sizeof(struct ceph_msg_footer) :
+ sizeof(struct ceph_msg_footer_old);
+}
+
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
BUG_ON(!msg);
@@ -2335,9 +2342,9 @@ static int read_partial_message(struct ceph_connection *con)
ceph_pr_addr(&con->peer_addr.in_addr),
seq, con->in_seq + 1);
con->in_base_pos = -front_len - middle_len - data_len -
- sizeof(m->footer);
+ sizeof_footer(con);
con->in_tag = CEPH_MSGR_TAG_READY;
- return 0;
+ return 1;
} else if ((s64)seq - (s64)con->in_seq > 1) {
pr_err("read_partial_message bad seq %lld expected %lld\n",
seq, con->in_seq + 1);
@@ -2360,10 +2367,10 @@ static int read_partial_message(struct ceph_connection *con)
/* skip this message */
dout("alloc_msg said skip message\n");
con->in_base_pos = -front_len - middle_len - data_len -
- sizeof(m->footer);
+ sizeof_footer(con);
con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
- return 0;
+ return 1;
}
BUG_ON(!con->in_msg);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3534e12683d3..5bc053778fed 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2853,8 +2853,8 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
mutex_lock(&osdc->request_mutex);
req = __lookup_request(osdc, tid);
if (!req) {
- pr_warn("%s osd%d tid %llu unknown, skipping\n",
- __func__, osd->o_osd, tid);
+ dout("%s osd%d tid %llu unknown, skipping\n", __func__,
+ osd->o_osd, tid);
m = NULL;
*skip = 1;
goto out;
diff --git a/net/core/filter.c b/net/core/filter.c
index 5e2a3b5e5196..6fc3893a6170 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1353,7 +1353,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
unsigned int len = (unsigned int) r4;
void *ptr;
- if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM)))
+ if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
/* bpf verifier guarantees that:
@@ -1384,11 +1384,13 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
if (flags & BPF_F_RECOMPUTE_CSUM)
skb_postpush_rcsum(skb, ptr, len);
+ if (flags & BPF_F_INVALIDATE_HASH)
+ skb_clear_hash(skb);
return 0;
}
-const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
.func = bpf_skb_store_bytes,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1419,7 +1421,7 @@ static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
return 0;
}
-const struct bpf_func_proto bpf_skb_load_bytes_proto = {
+static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
.func = bpf_skb_load_bytes,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1447,6 +1449,12 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return -EFAULT;
switch (flags & BPF_F_HDR_FIELD_MASK) {
+ case 0:
+ if (unlikely(from != 0))
+ return -EINVAL;
+
+ csum_replace_by_diff(ptr, to);
+ break;
case 2:
csum_replace2(ptr, from, to);
break;
@@ -1464,7 +1472,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return 0;
}
-const struct bpf_func_proto bpf_l3_csum_replace_proto = {
+static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
.func = bpf_l3_csum_replace,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1523,7 +1531,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
return 0;
}
-const struct bpf_func_proto bpf_l4_csum_replace_proto = {
+static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
.func = bpf_l4_csum_replace,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1562,7 +1570,7 @@ static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
return csum_partial(sp->diff, diff_size, seed);
}
-const struct bpf_func_proto bpf_csum_diff_proto = {
+static const struct bpf_func_proto bpf_csum_diff_proto = {
.func = bpf_csum_diff,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1600,7 +1608,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
return dev_queue_xmit(skb2);
}
-const struct bpf_func_proto bpf_clone_redirect_proto = {
+static const struct bpf_func_proto bpf_clone_redirect_proto = {
.func = bpf_clone_redirect,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1652,7 +1660,7 @@ int skb_do_redirect(struct sk_buff *skb)
return dev_queue_xmit(skb);
}
-const struct bpf_func_proto bpf_redirect_proto = {
+static const struct bpf_func_proto bpf_redirect_proto = {
.func = bpf_redirect,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1762,12 +1770,15 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
return -EPROTO;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
+ case offsetof(struct bpf_tunnel_key, tunnel_label):
+ goto set_compat;
case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
/* Fixup deprecated structure layouts here, so we have
* a common path later on.
*/
if (ip_tunnel_info_af(info) != AF_INET)
return -EINVAL;
+set_compat:
to = (struct bpf_tunnel_key *)compat;
break;
default:
@@ -1779,11 +1790,13 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
to->tunnel_tos = info->key.tos;
to->tunnel_ttl = info->key.ttl;
- if (flags & BPF_F_TUNINFO_IPV6)
+ if (flags & BPF_F_TUNINFO_IPV6) {
memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
sizeof(to->remote_ipv6));
- else
+ to->tunnel_label = be32_to_cpu(info->key.label);
+ } else {
to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+ }
if (unlikely(size != sizeof(struct bpf_tunnel_key)))
memcpy((void *)(long) r2, to, size);
@@ -1791,7 +1804,7 @@ static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
return 0;
}
-const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
+static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.func = bpf_skb_get_tunnel_key,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1801,6 +1814,32 @@ const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
.arg4_type = ARG_ANYTHING,
};
+static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ u8 *to = (u8 *) (long) r2;
+ const struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+ if (unlikely(!info ||
+ !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT)))
+ return -ENOENT;
+ if (unlikely(size < info->options_len))
+ return -ENOMEM;
+
+ ip_tunnel_info_opts_get(to, info);
+
+ return info->options_len;
+}
+
+static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
+ .func = bpf_skb_get_tunnel_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+};
+
static struct metadata_dst __percpu *md_dst;
static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
@@ -1811,10 +1850,12 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
u8 compat[sizeof(struct bpf_tunnel_key)];
struct ip_tunnel_info *info;
- if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6)))
+ if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+ BPF_F_DONT_FRAGMENT)))
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
+ case offsetof(struct bpf_tunnel_key, tunnel_label):
case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
/* Fixup deprecated structure layouts here, so we have
* a common path later on.
@@ -1827,6 +1868,8 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
return -EINVAL;
}
}
+ if (unlikely(!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label))
+ return -EINVAL;
skb_dst_drop(skb);
dst_hold((struct dst_entry *) md);
@@ -1835,7 +1878,10 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
info = &md->u.tun_info;
info->mode = IP_TUNNEL_INFO_TX;
- info->key.tun_flags = TUNNEL_KEY;
+ info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
+ if (flags & BPF_F_DONT_FRAGMENT)
+ info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
+
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
info->key.ttl = from->tunnel_ttl;
@@ -1844,14 +1890,18 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
info->mode |= IP_TUNNEL_INFO_IPV6;
memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
sizeof(from->remote_ipv6));
+ info->key.label = cpu_to_be32(from->tunnel_label) &
+ IPV6_FLOWLABEL_MASK;
} else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+ if (flags & BPF_F_ZERO_CSUM_TX)
+ info->key.tun_flags &= ~TUNNEL_CSUM;
}
return 0;
}
-const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
+static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.func = bpf_skb_set_tunnel_key,
.gpl_only = false,
.ret_type = RET_INTEGER,
@@ -1861,17 +1911,58 @@ const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
.arg4_type = ARG_ANYTHING,
};
-static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void)
+#define BPF_TUNLEN_MAX 255
+
+static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ u8 *from = (u8 *) (long) r2;
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ const struct metadata_dst *md = this_cpu_ptr(md_dst);
+
+ if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
+ return -EINVAL;
+ if (unlikely(size > BPF_TUNLEN_MAX))
+ return -ENOMEM;
+
+ ip_tunnel_info_opts_set(info, from, size);
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
+ .func = bpf_skb_set_tunnel_opt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+};
+
+static const struct bpf_func_proto *
+bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
{
if (!md_dst) {
- /* race is not possible, since it's called from
- * verifier that is holding verifier mutex
+ BUILD_BUG_ON(FIELD_SIZEOF(struct ip_tunnel_info,
+ options_len) != 1);
+
+ /* Race is not possible, since it's called from verifier
+ * that is holding verifier mutex.
*/
- md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL);
+ md_dst = metadata_dst_alloc_percpu(BPF_TUNLEN_MAX,
+ GFP_KERNEL);
if (!md_dst)
return NULL;
}
- return &bpf_skb_set_tunnel_key_proto;
+
+ switch (which) {
+ case BPF_FUNC_skb_set_tunnel_key:
+ return &bpf_skb_set_tunnel_key_proto;
+ case BPF_FUNC_skb_set_tunnel_opt:
+ return &bpf_skb_set_tunnel_opt_proto;
+ default:
+ return NULL;
+ }
}
static const struct bpf_func_proto *
@@ -1925,7 +2016,11 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skb_get_tunnel_key:
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
- return bpf_get_skb_set_tunnel_key_proto();
+ return bpf_get_skb_set_tunnel_proto(func_id);
+ case BPF_FUNC_skb_get_tunnel_opt:
+ return &bpf_skb_get_tunnel_opt_proto;
+ case BPF_FUNC_skb_set_tunnel_opt:
+ return bpf_get_skb_set_tunnel_proto(func_id);
case BPF_FUNC_redirect:
return &bpf_redirect_proto;
case BPF_FUNC_get_route_realm:
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 7c7b8739b8b8..a669dea146c6 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -19,25 +19,12 @@
#include <net/flow_dissector.h>
#include <scsi/fc/fc_fcoe.h>
-static bool dissector_uses_key(const struct flow_dissector *flow_dissector,
- enum flow_dissector_key_id key_id)
-{
- return flow_dissector->used_keys & (1 << key_id);
-}
-
static void dissector_set_key(struct flow_dissector *flow_dissector,
enum flow_dissector_key_id key_id)
{
flow_dissector->used_keys |= (1 << key_id);
}
-static void *skb_flow_dissector_target(struct flow_dissector *flow_dissector,
- enum flow_dissector_key_id key_id,
- void *target_container)
-{
- return ((char *) target_container) + flow_dissector->offset[key_id];
-}
-
void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
const struct flow_dissector_key *key,
unsigned int key_count)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6128aac01b11..d2d9e5ebf58e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2970,6 +2970,7 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb,
nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc);
out:
netif_addr_unlock_bh(dev);
+ cb->args[1] = err;
return idx;
}
EXPORT_SYMBOL(ndo_dflt_fdb_dump);
@@ -3003,6 +3004,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
ops = br_dev->netdev_ops;
}
+ cb->args[1] = 0;
for_each_netdev(net, dev) {
if (brport_idx && (dev->ifindex != brport_idx))
continue;
@@ -3030,12 +3032,16 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
idx);
}
+ if (cb->args[1] == -EMSGSIZE)
+ break;
if (dev->netdev_ops->ndo_fdb_dump)
idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
idx);
else
idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
+ if (cb->args[1] == -EMSGSIZE)
+ break;
cops = NULL;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7af7ec635d90..51d768e7bc90 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1918,6 +1918,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
struct splice_pipe_desc *spd, struct sock *sk)
{
int seg;
+ struct sk_buff *iter;
/* map the linear part :
* If skb->head_frag is set, this 'linear' part is backed by a
@@ -1944,6 +1945,19 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe,
return true;
}
+ skb_walk_frags(skb, iter) {
+ if (*offset >= iter->len) {
+ *offset -= iter->len;
+ continue;
+ }
+ /* __skb_splice_bits() only fails if the output has no room
+ * left, so no point in going over the frag_list for the error
+ * case.
+ */
+ if (__skb_splice_bits(iter, pipe, offset, len, spd, sk))
+ return true;
+ }
+
return false;
}
@@ -1970,9 +1984,7 @@ ssize_t skb_socket_splice(struct sock *sk,
/*
* Map data from the skb to a pipe. Should handle both the linear part,
- * the fragments, and the frag list. It does NOT handle frag lists within
- * the frag list, if such a thing exists. We'd probably need to recurse to
- * handle that cleanly.
+ * the fragments, and the frag list.
*/
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int tlen,
@@ -1991,29 +2003,10 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
.ops = &nosteal_pipe_buf_ops,
.spd_release = sock_spd_release,
};
- struct sk_buff *frag_iter;
int ret = 0;
- /*
- * __skb_splice_bits() only fails if the output has no room left,
- * so no point in going over the frag_list for the error case.
- */
- if (__skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk))
- goto done;
- else if (!tlen)
- goto done;
+ __skb_splice_bits(skb, pipe, &offset, &tlen, &spd, sk);
- /*
- * now see if we have a frag_list to map
- */
- skb_walk_frags(skb, frag_iter) {
- if (!tlen)
- break;
- if (__skb_splice_bits(frag_iter, pipe, &offset, &tlen, &spd, sk))
- break;
- }
-
-done:
if (spd.nr_pages)
ret = splice_cb(sk, pipe, &spd);
@@ -3023,6 +3016,24 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
EXPORT_SYMBOL_GPL(skb_append_pagefrags);
/**
+ * skb_push_rcsum - push skb and update receive checksum
+ * @skb: buffer to update
+ * @len: length of data pulled
+ *
+ * This function performs an skb_push on the packet and updates
+ * the CHECKSUM_COMPLETE checksum. It should be used on
+ * receive path processing instead of skb_push unless you know
+ * that the checksum difference is zero (e.g., a valid IP header)
+ * or you are setting ip_summed to CHECKSUM_NONE.
+ */
+static unsigned char *skb_push_rcsum(struct sk_buff *skb, unsigned len)
+{
+ skb_push(skb, len);
+ skb_postpush_rcsum(skb, skb->data, len);
+ return skb->data;
+}
+
+/**
* skb_pull_rcsum - pull skb and update receive checksum
* @skb: buffer to update
* @len: length of data pulled
@@ -4167,9 +4178,9 @@ struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb,
if (!pskb_may_pull(skb_chk, offset))
goto err;
- __skb_pull(skb_chk, offset);
+ skb_pull_rcsum(skb_chk, offset);
ret = skb_chkf(skb_chk);
- __skb_push(skb_chk, offset);
+ skb_push_rcsum(skb_chk, offset);
if (ret)
goto err;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index fa4daba8db55..d8fb47fcad05 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -935,6 +935,14 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
{
int i;
+ dst->master_netdev->dsa_ptr = NULL;
+
+ /* If we used a tagging format that doesn't have an ethertype
+ * field, make sure that all packets from this point get sent
+ * without the tag and go through the regular receive path.
+ */
+ wmb();
+
for (i = 0; i < dst->pd->nr_chips; i++) {
struct dsa_switch *ds = dst->ds[i];
@@ -988,14 +996,6 @@ static int dsa_suspend(struct device *d)
struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
int i, ret = 0;
- dst->master_netdev->dsa_ptr = NULL;
-
- /* If we used a tagging format that doesn't have an ethertype
- * field, make sure that all packets from this point get sent
- * without the tag and go through the regular receive path.
- */
- wmb();
-
for (i = 0; i < dst->pd->nr_chips; i++) {
struct dsa_switch *ds = dst->ds[i];
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c102eb5ac55c..c34c7544d1db 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -665,7 +665,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
*/
if (!in_dev)
- goto out;
+ goto out_free_skb;
arp = arp_hdr(skb);
@@ -673,7 +673,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
default:
if (arp->ar_pro != htons(ETH_P_IP) ||
htons(dev_type) != arp->ar_hrd)
- goto out;
+ goto out_free_skb;
break;
case ARPHRD_ETHER:
case ARPHRD_FDDI:
@@ -690,17 +690,17 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
arp->ar_pro != htons(ETH_P_IP))
- goto out;
+ goto out_free_skb;
break;
case ARPHRD_AX25:
if (arp->ar_pro != htons(AX25_P_IP) ||
arp->ar_hrd != htons(ARPHRD_AX25))
- goto out;
+ goto out_free_skb;
break;
case ARPHRD_NETROM:
if (arp->ar_pro != htons(AX25_P_IP) ||
arp->ar_hrd != htons(ARPHRD_NETROM))
- goto out;
+ goto out_free_skb;
break;
}
@@ -708,7 +708,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
if (arp->ar_op != htons(ARPOP_REPLY) &&
arp->ar_op != htons(ARPOP_REQUEST))
- goto out;
+ goto out_free_skb;
/*
* Extract fields
@@ -733,7 +733,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
*/
if (ipv4_is_multicast(tip) ||
(!IN_DEV_ROUTE_LOCALNET(in_dev) && ipv4_is_loopback(tip)))
- goto out;
+ goto out_free_skb;
/*
* For some 802.11 wireless deployments (and possibly other networks),
@@ -741,7 +741,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
* and thus should not be accepted.
*/
if (sip == tip && IN_DEV_ORCONF(in_dev, DROP_GRATUITOUS_ARP))
- goto out;
+ goto out_free_skb;
/*
* Special case: We must set Frame Relay source Q.922 address
@@ -778,7 +778,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
!arp_ignore(in_dev, sip, tip))
arp_send_dst(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip,
sha, dev->dev_addr, sha, reply_dst);
- goto out;
+ goto out_consume_skb;
}
if (arp->ar_op == htons(ARPOP_REQUEST) &&
@@ -803,7 +803,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
neigh_release(n);
}
}
- goto out;
+ goto out_consume_skb;
} else if (IN_DEV_FORWARD(in_dev)) {
if (addr_type == RTN_UNICAST &&
(arp_fwd_proxy(in_dev, dev, rt) ||
@@ -826,7 +826,7 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
in_dev->arp_parms, skb);
goto out_free_dst;
}
- goto out;
+ goto out_consume_skb;
}
}
}
@@ -876,11 +876,16 @@ static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
neigh_release(n);
}
-out:
+out_consume_skb:
consume_skb(skb);
+
out_free_dst:
dst_release(reply_dst);
- return 0;
+ return NET_RX_SUCCESS;
+
+out_free_skb:
+ kfree_skb(skb);
+ return NET_RX_DROP;
}
static void parp_redo(struct sk_buff *skb)
@@ -924,11 +929,11 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
consumeskb:
consume_skb(skb);
- return 0;
+ return NET_RX_SUCCESS;
freeskb:
kfree_skb(skb);
out_of_mem:
- return 0;
+ return NET_RX_DROP;
}
/*
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 88dab0c1670c..780484243e14 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -319,8 +319,6 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
skb_gro_pull(skb, hdrlen);
- flush = 0;
-
for (p = *head; p; p = p->next) {
const struct guehdr *guehdr2;
@@ -352,6 +350,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
goto out_unlock;
pp = ops->callbacks.gro_receive(head, skb);
+ flush = 0;
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 47f4c544c916..540866dbd27d 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -175,8 +175,6 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
null_compute_pseudo);
}
- flush = 0;
-
for (p = *head; p; p = p->next) {
const struct gre_base_hdr *greh2;
@@ -213,6 +211,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
skb_gro_postpull_rcsum(skb, greh, grehlen);
pp = ptype->callbacks.gro_receive(head, skb);
+ flush = 0;
out_unlock:
rcu_read_unlock();
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2aea9f1a2a31..9b4ca87f70ba 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -350,9 +350,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
- skb->reserved_tailroom = skb_end_offset(skb) -
- min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
+ skb_tailroom_reserve(skb, mtu, tlen);
skb_reset_network_header(skb);
pip = ip_hdr(skb);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 202437d6087b..31936d387cfd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -527,11 +527,12 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
+ struct rtable *rt = NULL;
struct flowi4 fl;
- struct rtable *rt;
int min_headroom;
int tunnel_hlen;
__be16 df, flags;
+ bool use_cache;
int err;
tun_info = skb_tunnel_info(skb);
@@ -540,13 +541,14 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
key = &tun_info->key;
- rt = !skb->mark ? dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr) :
- NULL;
+ use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+ if (use_cache)
+ rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr);
if (!rt) {
rt = gre_get_rt(skb, dev, &fl, key);
if (IS_ERR(rt))
goto err_free_skb;
- if (!skb->mark)
+ if (use_cache)
dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
fl.saddr);
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f734c42acdaf..124bf0a66328 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1233,13 +1233,16 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
if (!skb)
return -EINVAL;
- cork->length += size;
if ((size + skb->len > mtu) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) {
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return -EOPNOTSUPP;
+
skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
}
+ cork->length += size;
while (size > 0) {
if (skb_is_gso(skb)) {
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index dff8a05739a2..6aad0192443d 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -607,6 +607,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
dst = tnl_params->daddr;
if (dst == 0) {
/* NBMA tunnel */
@@ -706,7 +708,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
tunnel->err_count--;
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
dst_link_failure(skb);
} else
tunnel->err_count = 0;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index b488cac9c5ca..bf081927e06b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1780,9 +1780,29 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
return ret;
}
-struct xt_table *arpt_register_table(struct net *net,
- const struct xt_table *table,
- const struct arpt_replace *repl)
+static void __arpt_unregister_table(struct xt_table *table)
+{
+ struct xt_table_info *private;
+ void *loc_cpu_entry;
+ struct module *table_owner = table->me;
+ struct arpt_entry *iter;
+
+ private = xt_unregister_table(table);
+
+ /* Decrease module usage counts and free resources */
+ loc_cpu_entry = private->entries;
+ xt_entry_foreach(iter, loc_cpu_entry, private->size)
+ cleanup_entry(iter);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
+ xt_free_table_info(private);
+}
+
+int arpt_register_table(struct net *net,
+ const struct xt_table *table,
+ const struct arpt_replace *repl,
+ const struct nf_hook_ops *ops,
+ struct xt_table **res)
{
int ret;
struct xt_table_info *newinfo;
@@ -1791,10 +1811,8 @@ struct xt_table *arpt_register_table(struct net *net,
struct xt_table *new_table;
newinfo = xt_alloc_table_info(repl->size);
- if (!newinfo) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!newinfo)
+ return -ENOMEM;
loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -1809,30 +1827,28 @@ struct xt_table *arpt_register_table(struct net *net,
ret = PTR_ERR(new_table);
goto out_free;
}
- return new_table;
+
+ /* set res now, will see skbs right after nf_register_net_hooks */
+ WRITE_ONCE(*res, new_table);
+
+ ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
+ if (ret != 0) {
+ __arpt_unregister_table(new_table);
+ *res = NULL;
+ }
+
+ return ret;
out_free:
xt_free_table_info(newinfo);
-out:
- return ERR_PTR(ret);
+ return ret;
}
-void arpt_unregister_table(struct xt_table *table)
+void arpt_unregister_table(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops)
{
- struct xt_table_info *private;
- void *loc_cpu_entry;
- struct module *table_owner = table->me;
- struct arpt_entry *iter;
-
- private = xt_unregister_table(table);
-
- /* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries;
- xt_entry_foreach(iter, loc_cpu_entry, private->size)
- cleanup_entry(iter);
- if (private->number > private->initial_entries)
- module_put(table_owner);
- xt_free_table_info(private);
+ nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ __arpt_unregister_table(table);
}
/* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 1897ee160920..dd8c80dc32a2 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -17,12 +17,15 @@ MODULE_DESCRIPTION("arptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
(1 << NF_ARP_FORWARD))
+static int __net_init arptable_filter_table_init(struct net *net);
+
static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_ARP,
.priority = NF_IP_PRI_FILTER,
+ .table_init = arptable_filter_table_init,
};
/* The work comes in here from netfilter.c */
@@ -35,26 +38,32 @@ arptable_filter_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *arpfilter_ops __read_mostly;
-static int __net_init arptable_filter_net_init(struct net *net)
+static int __net_init arptable_filter_table_init(struct net *net)
{
struct arpt_replace *repl;
-
+ int err;
+
+ if (net->ipv4.arptable_filter)
+ return 0;
+
repl = arpt_alloc_initial_table(&packet_filter);
if (repl == NULL)
return -ENOMEM;
- net->ipv4.arptable_filter =
- arpt_register_table(net, &packet_filter, repl);
+ err = arpt_register_table(net, &packet_filter, repl, arpfilter_ops,
+ &net->ipv4.arptable_filter);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter);
+ return err;
}
static void __net_exit arptable_filter_net_exit(struct net *net)
{
- arpt_unregister_table(net->ipv4.arptable_filter);
+ if (!net->ipv4.arptable_filter)
+ return;
+ arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops);
+ net->ipv4.arptable_filter = NULL;
}
static struct pernet_operations arptable_filter_net_ops = {
- .init = arptable_filter_net_init,
.exit = arptable_filter_net_exit,
};
@@ -62,26 +71,23 @@ static int __init arptable_filter_init(void)
{
int ret;
+ arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
+ if (IS_ERR(arpfilter_ops))
+ return PTR_ERR(arpfilter_ops);
+
ret = register_pernet_subsys(&arptable_filter_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(arpfilter_ops);
return ret;
-
- arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
- if (IS_ERR(arpfilter_ops)) {
- ret = PTR_ERR(arpfilter_ops);
- goto cleanup_table;
}
- return ret;
-cleanup_table:
- unregister_pernet_subsys(&arptable_filter_net_ops);
return ret;
}
static void __exit arptable_filter_fini(void)
{
- xt_hook_unlink(&packet_filter, arpfilter_ops);
unregister_pernet_subsys(&arptable_filter_net_ops);
+ kfree(arpfilter_ops);
}
module_init(arptable_filter_init);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b99affad6ba1..e53f8d6f326d 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2062,9 +2062,27 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}
-struct xt_table *ipt_register_table(struct net *net,
- const struct xt_table *table,
- const struct ipt_replace *repl)
+static void __ipt_unregister_table(struct net *net, struct xt_table *table)
+{
+ struct xt_table_info *private;
+ void *loc_cpu_entry;
+ struct module *table_owner = table->me;
+ struct ipt_entry *iter;
+
+ private = xt_unregister_table(table);
+
+ /* Decrease module usage counts and free resources */
+ loc_cpu_entry = private->entries;
+ xt_entry_foreach(iter, loc_cpu_entry, private->size)
+ cleanup_entry(iter, net);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
+ xt_free_table_info(private);
+}
+
+int ipt_register_table(struct net *net, const struct xt_table *table,
+ const struct ipt_replace *repl,
+ const struct nf_hook_ops *ops, struct xt_table **res)
{
int ret;
struct xt_table_info *newinfo;
@@ -2073,10 +2091,8 @@ struct xt_table *ipt_register_table(struct net *net,
struct xt_table *new_table;
newinfo = xt_alloc_table_info(repl->size);
- if (!newinfo) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!newinfo)
+ return -ENOMEM;
loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -2091,30 +2107,27 @@ struct xt_table *ipt_register_table(struct net *net,
goto out_free;
}
- return new_table;
+ /* set res now, will see skbs right after nf_register_net_hooks */
+ WRITE_ONCE(*res, new_table);
+
+ ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
+ if (ret != 0) {
+ __ipt_unregister_table(net, new_table);
+ *res = NULL;
+ }
+
+ return ret;
out_free:
xt_free_table_info(newinfo);
-out:
- return ERR_PTR(ret);
+ return ret;
}
-void ipt_unregister_table(struct net *net, struct xt_table *table)
+void ipt_unregister_table(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops)
{
- struct xt_table_info *private;
- void *loc_cpu_entry;
- struct module *table_owner = table->me;
- struct ipt_entry *iter;
-
- private = xt_unregister_table(table);
-
- /* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries;
- xt_entry_foreach(iter, loc_cpu_entry, private->size)
- cleanup_entry(iter, net);
- if (private->number > private->initial_entries)
- module_put(table_owner);
- xt_free_table_info(private);
+ nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ __ipt_unregister_table(net, table);
}
/* Returns 1 if the type and code is matched by the range, 0 otherwise */
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 397ef2dd133e..7667f223d7f8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -23,6 +23,7 @@ MODULE_DESCRIPTION("iptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT))
+static int __net_init iptable_filter_table_init(struct net *net);
static const struct xt_table packet_filter = {
.name = "filter",
@@ -30,6 +31,7 @@ static const struct xt_table packet_filter = {
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_FILTER,
+ .table_init = iptable_filter_table_init,
};
static unsigned int
@@ -48,12 +50,16 @@ iptable_filter_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
-static bool forward = true;
+static bool forward __read_mostly = true;
module_param(forward, bool, 0000);
-static int __net_init iptable_filter_net_init(struct net *net)
+static int __net_init iptable_filter_table_init(struct net *net)
{
struct ipt_replace *repl;
+ int err;
+
+ if (net->ipv4.iptable_filter)
+ return 0;
repl = ipt_alloc_initial_table(&packet_filter);
if (repl == NULL)
@@ -62,15 +68,26 @@ static int __net_init iptable_filter_net_init(struct net *net)
((struct ipt_standard *)repl->entries)[1].target.verdict =
forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
- net->ipv4.iptable_filter =
- ipt_register_table(net, &packet_filter, repl);
+ err = ipt_register_table(net, &packet_filter, repl, filter_ops,
+ &net->ipv4.iptable_filter);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter);
+ return err;
+}
+
+static int __net_init iptable_filter_net_init(struct net *net)
+{
+ if (net == &init_net || !forward)
+ return iptable_filter_table_init(net);
+
+ return 0;
}
static void __net_exit iptable_filter_net_exit(struct net *net)
{
- ipt_unregister_table(net, net->ipv4.iptable_filter);
+ if (!net->ipv4.iptable_filter)
+ return;
+ ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops);
+ net->ipv4.iptable_filter = NULL;
}
static struct pernet_operations iptable_filter_net_ops = {
@@ -82,24 +99,21 @@ static int __init iptable_filter_init(void)
{
int ret;
+ filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
+ if (IS_ERR(filter_ops))
+ return PTR_ERR(filter_ops);
+
ret = register_pernet_subsys(&iptable_filter_net_ops);
if (ret < 0)
- return ret;
-
- /* Register hooks */
- filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
- if (IS_ERR(filter_ops)) {
- ret = PTR_ERR(filter_ops);
- unregister_pernet_subsys(&iptable_filter_net_ops);
- }
+ kfree(filter_ops);
return ret;
}
static void __exit iptable_filter_fini(void)
{
- xt_hook_unlink(&packet_filter, filter_ops);
unregister_pernet_subsys(&iptable_filter_net_ops);
+ kfree(filter_ops);
}
module_init(iptable_filter_init);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index ba5d392a13c4..57fc97cdac70 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -28,12 +28,15 @@ MODULE_DESCRIPTION("iptables mangle table");
(1 << NF_INET_LOCAL_OUT) | \
(1 << NF_INET_POST_ROUTING))
+static int __net_init iptable_mangle_table_init(struct net *net);
+
static const struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_MANGLE,
+ .table_init = iptable_mangle_table_init,
};
static unsigned int
@@ -92,27 +95,32 @@ iptable_mangle_hook(void *priv,
}
static struct nf_hook_ops *mangle_ops __read_mostly;
-
-static int __net_init iptable_mangle_net_init(struct net *net)
+static int __net_init iptable_mangle_table_init(struct net *net)
{
struct ipt_replace *repl;
+ int ret;
+
+ if (net->ipv4.iptable_mangle)
+ return 0;
repl = ipt_alloc_initial_table(&packet_mangler);
if (repl == NULL)
return -ENOMEM;
- net->ipv4.iptable_mangle =
- ipt_register_table(net, &packet_mangler, repl);
+ ret = ipt_register_table(net, &packet_mangler, repl, mangle_ops,
+ &net->ipv4.iptable_mangle);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle);
+ return ret;
}
static void __net_exit iptable_mangle_net_exit(struct net *net)
{
- ipt_unregister_table(net, net->ipv4.iptable_mangle);
+ if (!net->ipv4.iptable_mangle)
+ return;
+ ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops);
+ net->ipv4.iptable_mangle = NULL;
}
static struct pernet_operations iptable_mangle_net_ops = {
- .init = iptable_mangle_net_init,
.exit = iptable_mangle_net_exit,
};
@@ -120,15 +128,22 @@ static int __init iptable_mangle_init(void)
{
int ret;
+ mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
+ if (IS_ERR(mangle_ops)) {
+ ret = PTR_ERR(mangle_ops);
+ return ret;
+ }
+
ret = register_pernet_subsys(&iptable_mangle_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(mangle_ops);
return ret;
+ }
- /* Register hooks */
- mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
- if (IS_ERR(mangle_ops)) {
- ret = PTR_ERR(mangle_ops);
+ ret = iptable_mangle_table_init(&init_net);
+ if (ret) {
unregister_pernet_subsys(&iptable_mangle_net_ops);
+ kfree(mangle_ops);
}
return ret;
@@ -136,8 +151,8 @@ static int __init iptable_mangle_init(void)
static void __exit iptable_mangle_fini(void)
{
- xt_hook_unlink(&packet_mangler, mangle_ops);
unregister_pernet_subsys(&iptable_mangle_net_ops);
+ kfree(mangle_ops);
}
module_init(iptable_mangle_init);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ae2cd2752046..138a24bc76ad 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -18,6 +18,8 @@
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l3proto.h>
+static int __net_init iptable_nat_table_init(struct net *net);
+
static const struct xt_table nf_nat_ipv4_table = {
.name = "nat",
.valid_hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -26,6 +28,7 @@ static const struct xt_table nf_nat_ipv4_table = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
+ .table_init = iptable_nat_table_init,
};
static unsigned int iptable_nat_do_chain(void *priv,
@@ -95,50 +98,50 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
},
};
-static int __net_init iptable_nat_net_init(struct net *net)
+static int __net_init iptable_nat_table_init(struct net *net)
{
struct ipt_replace *repl;
+ int ret;
+
+ if (net->ipv4.nat_table)
+ return 0;
repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
if (repl == NULL)
return -ENOMEM;
- net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
+ ret = ipt_register_table(net, &nf_nat_ipv4_table, repl,
+ nf_nat_ipv4_ops, &net->ipv4.nat_table);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv4.nat_table);
+ return ret;
}
static void __net_exit iptable_nat_net_exit(struct net *net)
{
- ipt_unregister_table(net, net->ipv4.nat_table);
+ if (!net->ipv4.nat_table)
+ return;
+ ipt_unregister_table(net, net->ipv4.nat_table, nf_nat_ipv4_ops);
+ net->ipv4.nat_table = NULL;
}
static struct pernet_operations iptable_nat_net_ops = {
- .init = iptable_nat_net_init,
.exit = iptable_nat_net_exit,
};
static int __init iptable_nat_init(void)
{
- int err;
+ int ret = register_pernet_subsys(&iptable_nat_net_ops);
- err = register_pernet_subsys(&iptable_nat_net_ops);
- if (err < 0)
- goto err1;
+ if (ret)
+ return ret;
- err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
- if (err < 0)
- goto err2;
- return 0;
-
-err2:
- unregister_pernet_subsys(&iptable_nat_net_ops);
-err1:
- return err;
+ ret = iptable_nat_table_init(&init_net);
+ if (ret)
+ unregister_pernet_subsys(&iptable_nat_net_ops);
+ return ret;
}
static void __exit iptable_nat_exit(void)
{
- nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
unregister_pernet_subsys(&iptable_nat_net_ops);
}
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 1ba02811acb0..2642ecd2645c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -10,12 +10,15 @@
#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
+static int __net_init iptable_raw_table_init(struct net *net);
+
static const struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_RAW,
+ .table_init = iptable_raw_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -34,26 +37,32 @@ iptable_raw_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *rawtable_ops __read_mostly;
-static int __net_init iptable_raw_net_init(struct net *net)
+static int __net_init iptable_raw_table_init(struct net *net)
{
struct ipt_replace *repl;
+ int ret;
+
+ if (net->ipv4.iptable_raw)
+ return 0;
repl = ipt_alloc_initial_table(&packet_raw);
if (repl == NULL)
return -ENOMEM;
- net->ipv4.iptable_raw =
- ipt_register_table(net, &packet_raw, repl);
+ ret = ipt_register_table(net, &packet_raw, repl, rawtable_ops,
+ &net->ipv4.iptable_raw);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw);
+ return ret;
}
static void __net_exit iptable_raw_net_exit(struct net *net)
{
- ipt_unregister_table(net, net->ipv4.iptable_raw);
+ if (!net->ipv4.iptable_raw)
+ return;
+ ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops);
+ net->ipv4.iptable_raw = NULL;
}
static struct pernet_operations iptable_raw_net_ops = {
- .init = iptable_raw_net_init,
.exit = iptable_raw_net_exit,
};
@@ -61,15 +70,20 @@ static int __init iptable_raw_init(void)
{
int ret;
+ rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook);
+ if (IS_ERR(rawtable_ops))
+ return PTR_ERR(rawtable_ops);
+
ret = register_pernet_subsys(&iptable_raw_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(rawtable_ops);
return ret;
+ }
- /* Register hooks */
- rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
- if (IS_ERR(rawtable_ops)) {
- ret = PTR_ERR(rawtable_ops);
+ ret = iptable_raw_table_init(&init_net);
+ if (ret) {
unregister_pernet_subsys(&iptable_raw_net_ops);
+ kfree(rawtable_ops);
}
return ret;
@@ -77,8 +91,8 @@ static int __init iptable_raw_init(void)
static void __exit iptable_raw_fini(void)
{
- xt_hook_unlink(&packet_raw, rawtable_ops);
unregister_pernet_subsys(&iptable_raw_net_ops);
+ kfree(rawtable_ops);
}
module_init(iptable_raw_init);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index c2e23d5e9cd4..ff226596e4b5 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -28,12 +28,15 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT)
+static int __net_init iptable_security_table_init(struct net *net);
+
static const struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV4,
.priority = NF_IP_PRI_SECURITY,
+ .table_init = iptable_security_table_init,
};
static unsigned int
@@ -51,26 +54,33 @@ iptable_security_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *sectbl_ops __read_mostly;
-static int __net_init iptable_security_net_init(struct net *net)
+static int __net_init iptable_security_table_init(struct net *net)
{
struct ipt_replace *repl;
+ int ret;
+
+ if (net->ipv4.iptable_security)
+ return 0;
repl = ipt_alloc_initial_table(&security_table);
if (repl == NULL)
return -ENOMEM;
- net->ipv4.iptable_security =
- ipt_register_table(net, &security_table, repl);
+ ret = ipt_register_table(net, &security_table, repl, sectbl_ops,
+ &net->ipv4.iptable_security);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv4.iptable_security);
+ return ret;
}
static void __net_exit iptable_security_net_exit(struct net *net)
{
- ipt_unregister_table(net, net->ipv4.iptable_security);
+ if (!net->ipv4.iptable_security)
+ return;
+
+ ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops);
+ net->ipv4.iptable_security = NULL;
}
static struct pernet_operations iptable_security_net_ops = {
- .init = iptable_security_net_init,
.exit = iptable_security_net_exit,
};
@@ -78,27 +88,29 @@ static int __init iptable_security_init(void)
{
int ret;
+ sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
+ if (IS_ERR(sectbl_ops))
+ return PTR_ERR(sectbl_ops);
+
ret = register_pernet_subsys(&iptable_security_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(sectbl_ops);
return ret;
-
- sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
- if (IS_ERR(sectbl_ops)) {
- ret = PTR_ERR(sectbl_ops);
- goto cleanup_table;
}
- return ret;
+ ret = iptable_security_table_init(&init_net);
+ if (ret) {
+ unregister_pernet_subsys(&iptable_security_net_ops);
+ kfree(sectbl_ops);
+ }
-cleanup_table:
- unregister_pernet_subsys(&iptable_security_net_ops);
return ret;
}
static void __exit iptable_security_fini(void)
{
- xt_hook_unlink(&security_table, sectbl_ops);
unregister_pernet_subsys(&iptable_security_net_ops);
+ kfree(sectbl_ops);
}
module_init(iptable_security_init);
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index a04dee536b8e..d88da36b383c 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -31,10 +31,8 @@ static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
err = ip_defrag(net, skb, user);
local_bh_enable();
- if (!err) {
- ip_send_check(ip_hdr(skb));
+ if (!err)
skb->ignore_df = 1;
- }
return err;
}
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index b72ffc58e255..51ced81b616c 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -25,7 +25,12 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
memset(&range, 0, sizeof(range));
range.flags = priv->flags;
-
+ if (priv->sreg_proto_min) {
+ range.min_proto.all =
+ *(__be16 *)&regs->data[priv->sreg_proto_min];
+ range.max_proto.all =
+ *(__be16 *)&regs->data[priv->sreg_proto_max];
+ }
regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook,
&range, pkt->out);
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f9faadb42485..a265f00b9df9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -556,20 +556,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return -EINVAL;
slow = lock_sock_fast(sk);
- if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
- answ = 0;
- else if (sock_flag(sk, SOCK_URGINLINE) ||
- !tp->urg_data ||
- before(tp->urg_seq, tp->copied_seq) ||
- !before(tp->urg_seq, tp->rcv_nxt)) {
-
- answ = tp->rcv_nxt - tp->copied_seq;
-
- /* Subtract 1, if FIN was received */
- if (answ && sock_flag(sk, SOCK_DONE))
- answ--;
- } else
- answ = tp->urg_seq - tp->copied_seq;
+ answ = tcp_inq(sk);
unlock_sock_fast(sk, slow);
break;
case SIOCATMARK:
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c26241f3057b..7b7eec439906 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -551,7 +551,7 @@ reset:
*/
if (crtt > tp->srtt_us) {
/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
- crtt /= 8 * USEC_PER_MSEC;
+ crtt /= 8 * USEC_PER_SEC / HZ;
inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
} else if (tp->srtt_us == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index fadd8b978951..ae90e4b34bd3 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -452,7 +452,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->rcv_wup = newtp->copied_seq =
newtp->rcv_nxt = treq->rcv_isn + 1;
- newtp->segs_in = 0;
+ newtp->segs_in = 1;
newtp->snd_sml = newtp->snd_una =
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
@@ -812,6 +812,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int ret = 0;
int state = child->sk_state;
+ tcp_sk(child)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
if (!sock_owned_by_user(child)) {
ret = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 0ec08814f37d..96599d1a1318 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -89,6 +89,8 @@ void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb
uh->source = src_port;
uh->len = htons(skb->len);
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
udp_set_csum(nocheck, skb, src, dst, skb->len);
iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 5c5d23e59da5..9508a20fbf61 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -257,7 +257,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
*fragoff = _frag_off;
return hp->nexthdr;
}
- return -ENOENT;
+ if (!found)
+ return -ENOENT;
+ if (fragoff)
+ *fragoff = _frag_off;
+ break;
}
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 0c7e276c230e..ea071fad67a0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -55,8 +55,6 @@ struct fib6_cleaner {
void *arg;
};
-static DEFINE_RWLOCK(fib6_walker_lock);
-
#ifdef CONFIG_IPV6_SUBTREES
#define FWS_INIT FWS_S
#else
@@ -66,7 +64,7 @@ static DEFINE_RWLOCK(fib6_walker_lock);
static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
-static int fib6_walk(struct fib6_walker *w);
+static int fib6_walk(struct net *net, struct fib6_walker *w);
static int fib6_walk_continue(struct fib6_walker *w);
/*
@@ -78,21 +76,21 @@ static int fib6_walk_continue(struct fib6_walker *w);
static void fib6_gc_timer_cb(unsigned long arg);
-static LIST_HEAD(fib6_walkers);
-#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh)
+#define FOR_WALKERS(net, w) \
+ list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)
-static void fib6_walker_link(struct fib6_walker *w)
+static void fib6_walker_link(struct net *net, struct fib6_walker *w)
{
- write_lock_bh(&fib6_walker_lock);
- list_add(&w->lh, &fib6_walkers);
- write_unlock_bh(&fib6_walker_lock);
+ write_lock_bh(&net->ipv6.fib6_walker_lock);
+ list_add(&w->lh, &net->ipv6.fib6_walkers);
+ write_unlock_bh(&net->ipv6.fib6_walker_lock);
}
-static void fib6_walker_unlink(struct fib6_walker *w)
+static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
{
- write_lock_bh(&fib6_walker_lock);
+ write_lock_bh(&net->ipv6.fib6_walker_lock);
list_del(&w->lh);
- write_unlock_bh(&fib6_walker_lock);
+ write_unlock_bh(&net->ipv6.fib6_walker_lock);
}
static int fib6_new_sernum(struct net *net)
@@ -325,12 +323,13 @@ static int fib6_dump_node(struct fib6_walker *w)
static void fib6_dump_end(struct netlink_callback *cb)
{
+ struct net *net = sock_net(cb->skb->sk);
struct fib6_walker *w = (void *)cb->args[2];
if (w) {
if (cb->args[4]) {
cb->args[4] = 0;
- fib6_walker_unlink(w);
+ fib6_walker_unlink(net, w);
}
cb->args[2] = 0;
kfree(w);
@@ -348,6 +347,7 @@ static int fib6_dump_done(struct netlink_callback *cb)
static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
struct fib6_walker *w;
int res;
@@ -359,7 +359,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
w->skip = 0;
read_lock_bh(&table->tb6_lock);
- res = fib6_walk(w);
+ res = fib6_walk(net, w);
read_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
@@ -379,7 +379,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
res = fib6_walk_continue(w);
read_unlock_bh(&table->tb6_lock);
if (res <= 0) {
- fib6_walker_unlink(w);
+ fib6_walker_unlink(net, w);
cb->args[4] = 0;
}
}
@@ -1340,8 +1340,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
}
#endif
- read_lock(&fib6_walker_lock);
- FOR_WALKERS(w) {
+ read_lock(&net->ipv6.fib6_walker_lock);
+ FOR_WALKERS(net, w) {
if (!child) {
if (w->root == fn) {
w->root = w->node = NULL;
@@ -1368,7 +1368,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
}
}
}
- read_unlock(&fib6_walker_lock);
+ read_unlock(&net->ipv6.fib6_walker_lock);
node_free(fn);
if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
@@ -1411,8 +1411,8 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
}
/* Adjust walkers */
- read_lock(&fib6_walker_lock);
- FOR_WALKERS(w) {
+ read_lock(&net->ipv6.fib6_walker_lock);
+ FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
w->leaf = rt->dst.rt6_next;
@@ -1420,7 +1420,7 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
w->state = FWS_U;
}
}
- read_unlock(&fib6_walker_lock);
+ read_unlock(&net->ipv6.fib6_walker_lock);
rt->dst.rt6_next = NULL;
@@ -1588,17 +1588,17 @@ skip:
}
}
-static int fib6_walk(struct fib6_walker *w)
+static int fib6_walk(struct net *net, struct fib6_walker *w)
{
int res;
w->state = FWS_INIT;
w->node = w->root;
- fib6_walker_link(w);
+ fib6_walker_link(net, w);
res = fib6_walk_continue(w);
if (res <= 0)
- fib6_walker_unlink(w);
+ fib6_walker_unlink(net, w);
return res;
}
@@ -1668,7 +1668,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
c.arg = arg;
c.net = net;
- fib6_walk(&c.w);
+ fib6_walk(net, &c.w);
}
static void __fib6_clean_all(struct net *net,
@@ -1725,14 +1725,15 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-static struct fib6_gc_args
+struct fib6_gc_args
{
int timeout;
int more;
-} gc_args;
+};
static int fib6_age(struct rt6_info *rt, void *arg)
{
+ struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;
/*
@@ -1748,10 +1749,10 @@ static int fib6_age(struct rt6_info *rt, void *arg)
RT6_TRACE("expiring %p\n", rt);
return -1;
}
- gc_args.more++;
+ gc_args->more++;
} else if (rt->rt6i_flags & RTF_CACHE) {
if (atomic_read(&rt->dst.__refcnt) == 0 &&
- time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
+ time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
RT6_TRACE("aging clone %p\n", rt);
return -1;
} else if (rt->rt6i_flags & RTF_GATEWAY) {
@@ -1769,21 +1770,20 @@ static int fib6_age(struct rt6_info *rt, void *arg)
return -1;
}
}
- gc_args.more++;
+ gc_args->more++;
}
return 0;
}
-static DEFINE_SPINLOCK(fib6_gc_lock);
-
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
+ struct fib6_gc_args gc_args;
unsigned long now;
if (force) {
- spin_lock_bh(&fib6_gc_lock);
- } else if (!spin_trylock_bh(&fib6_gc_lock)) {
+ spin_lock_bh(&net->ipv6.fib6_gc_lock);
+ } else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
return;
}
@@ -1792,7 +1792,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
gc_args.more = icmp6_dst_gc();
- fib6_clean_all(net, fib6_age, NULL);
+ fib6_clean_all(net, fib6_age, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;
@@ -1802,7 +1802,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
+ net->ipv6.sysctl.ip6_rt_gc_interval));
else
del_timer(&net->ipv6.ip6_fib_timer);
- spin_unlock_bh(&fib6_gc_lock);
+ spin_unlock_bh(&net->ipv6.fib6_gc_lock);
}
static void fib6_gc_timer_cb(unsigned long arg)
@@ -1814,6 +1814,9 @@ static int __net_init fib6_net_init(struct net *net)
{
size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
+ spin_lock_init(&net->ipv6.fib6_gc_lock);
+ rwlock_init(&net->ipv6.fib6_walker_lock);
+ INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
@@ -1974,7 +1977,8 @@ static int ipv6_route_yield(struct fib6_walker *w)
return 0;
}
-static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter)
+static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
+ struct net *net)
{
memset(&iter->w, 0, sizeof(iter->w));
iter->w.func = ipv6_route_yield;
@@ -1984,7 +1988,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter)
iter->w.args = iter;
iter->sernum = iter->w.root->fn_sernum;
INIT_LIST_HEAD(&iter->w.lh);
- fib6_walker_link(&iter->w);
+ fib6_walker_link(net, &iter->w);
}
static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
@@ -2045,16 +2049,16 @@ iter_table:
++*pos;
return iter->w.leaf;
} else if (r < 0) {
- fib6_walker_unlink(&iter->w);
+ fib6_walker_unlink(net, &iter->w);
return NULL;
}
- fib6_walker_unlink(&iter->w);
+ fib6_walker_unlink(net, &iter->w);
iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
if (!iter->tbl)
return NULL;
- ipv6_route_seq_setup_walk(iter);
+ ipv6_route_seq_setup_walk(iter, net);
goto iter_table;
}
@@ -2069,7 +2073,7 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
iter->skip = *pos;
if (iter->tbl) {
- ipv6_route_seq_setup_walk(iter);
+ ipv6_route_seq_setup_walk(iter, net);
return ipv6_route_seq_next(seq, NULL, pos);
} else {
return NULL;
@@ -2085,10 +2089,11 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
__releases(RCU_BH)
{
+ struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;
if (ipv6_route_iter_active(iter))
- fib6_walker_unlink(&iter->w);
+ fib6_walker_unlink(net, &iter->w);
rcu_read_unlock_bh();
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index f7c9560b75fa..4e636e60a360 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -777,6 +777,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
__u32 mtu;
int err;
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3f3aabd2f07b..eb2ac4bb09ce 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1089,6 +1089,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
u8 tproto;
int err;
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+
tproto = ACCESS_ONCE(t->parms.proto);
if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 14dacf1df529..a7520528ecd2 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -73,8 +73,8 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr,
- __u8 prio, __u8 ttl, __be16 src_port,
- __be16 dst_port, bool nocheck)
+ __u8 prio, __u8 ttl, __be32 label,
+ __be16 src_port, __be16 dst_port, bool nocheck)
{
struct udphdr *uh;
struct ipv6hdr *ip6h;
@@ -98,7 +98,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
__skb_push(skb, sizeof(*ip6h));
skb_reset_network_header(skb);
ip6h = ipv6_hdr(skb);
- ip6_flow_hdr(ip6h, prio, htonl(0));
+ ip6_flow_hdr(ip6h, prio, label);
ip6h->payload_len = htons(skb->len);
ip6h->nexthdr = IPPROTO_UDP;
ip6h->hop_limit = ttl;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 5ee56d0a8699..d64ee7e83664 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1574,9 +1574,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
return NULL;
skb->priority = TC_PRIO_CONTROL;
- skb->reserved_tailroom = skb_end_offset(skb) -
- min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
+ skb_tailroom_reserve(skb, mtu, tlen);
if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 99425cf2819b..84f9baf7aee8 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2071,9 +2071,28 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}
-struct xt_table *ip6t_register_table(struct net *net,
- const struct xt_table *table,
- const struct ip6t_replace *repl)
+static void __ip6t_unregister_table(struct net *net, struct xt_table *table)
+{
+ struct xt_table_info *private;
+ void *loc_cpu_entry;
+ struct module *table_owner = table->me;
+ struct ip6t_entry *iter;
+
+ private = xt_unregister_table(table);
+
+ /* Decrease module usage counts and free resources */
+ loc_cpu_entry = private->entries;
+ xt_entry_foreach(iter, loc_cpu_entry, private->size)
+ cleanup_entry(iter, net);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
+ xt_free_table_info(private);
+}
+
+int ip6t_register_table(struct net *net, const struct xt_table *table,
+ const struct ip6t_replace *repl,
+ const struct nf_hook_ops *ops,
+ struct xt_table **res)
{
int ret;
struct xt_table_info *newinfo;
@@ -2082,10 +2101,8 @@ struct xt_table *ip6t_register_table(struct net *net,
struct xt_table *new_table;
newinfo = xt_alloc_table_info(repl->size);
- if (!newinfo) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!newinfo)
+ return -ENOMEM;
loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
@@ -2099,30 +2116,28 @@ struct xt_table *ip6t_register_table(struct net *net,
ret = PTR_ERR(new_table);
goto out_free;
}
- return new_table;
+
+ /* set res now, will see skbs right after nf_register_net_hooks */
+ WRITE_ONCE(*res, new_table);
+
+ ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
+ if (ret != 0) {
+ __ip6t_unregister_table(net, new_table);
+ *res = NULL;
+ }
+
+ return ret;
out_free:
xt_free_table_info(newinfo);
-out:
- return ERR_PTR(ret);
+ return ret;
}
-void ip6t_unregister_table(struct net *net, struct xt_table *table)
+void ip6t_unregister_table(struct net *net, struct xt_table *table,
+ const struct nf_hook_ops *ops)
{
- struct xt_table_info *private;
- void *loc_cpu_entry;
- struct module *table_owner = table->me;
- struct ip6t_entry *iter;
-
- private = xt_unregister_table(table);
-
- /* Decrease module usage counts and free resources */
- loc_cpu_entry = private->entries;
- xt_entry_foreach(iter, loc_cpu_entry, private->size)
- cleanup_entry(iter, net);
- if (private->number > private->initial_entries)
- module_put(table_owner);
- xt_free_table_info(private);
+ nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ __ip6t_unregister_table(net, table);
}
/* Returns 1 if the type and code is matched by the range, 0 otherwise */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 8b277b983ca5..1343077dde93 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -22,12 +22,15 @@ MODULE_DESCRIPTION("ip6tables filter table");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT))
+static int __net_init ip6table_filter_table_init(struct net *net);
+
static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_FILTER,
+ .table_init = ip6table_filter_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -44,9 +47,13 @@ static struct nf_hook_ops *filter_ops __read_mostly;
static bool forward = true;
module_param(forward, bool, 0000);
-static int __net_init ip6table_filter_net_init(struct net *net)
+static int __net_init ip6table_filter_table_init(struct net *net)
{
struct ip6t_replace *repl;
+ int err;
+
+ if (net->ipv6.ip6table_filter)
+ return 0;
repl = ip6t_alloc_initial_table(&packet_filter);
if (repl == NULL)
@@ -55,15 +62,26 @@ static int __net_init ip6table_filter_net_init(struct net *net)
((struct ip6t_standard *)repl->entries)[1].target.verdict =
forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
- net->ipv6.ip6table_filter =
- ip6t_register_table(net, &packet_filter, repl);
+ err = ip6t_register_table(net, &packet_filter, repl, filter_ops,
+ &net->ipv6.ip6table_filter);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv6.ip6table_filter);
+ return err;
+}
+
+static int __net_init ip6table_filter_net_init(struct net *net)
+{
+ if (net == &init_net || !forward)
+ return ip6table_filter_table_init(net);
+
+ return 0;
}
static void __net_exit ip6table_filter_net_exit(struct net *net)
{
- ip6t_unregister_table(net, net->ipv6.ip6table_filter);
+ if (!net->ipv6.ip6table_filter)
+ return;
+ ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops);
+ net->ipv6.ip6table_filter = NULL;
}
static struct pernet_operations ip6table_filter_net_ops = {
@@ -75,28 +93,21 @@ static int __init ip6table_filter_init(void)
{
int ret;
+ filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
+ if (IS_ERR(filter_ops))
+ return PTR_ERR(filter_ops);
+
ret = register_pernet_subsys(&ip6table_filter_net_ops);
if (ret < 0)
- return ret;
-
- /* Register hooks */
- filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook);
- if (IS_ERR(filter_ops)) {
- ret = PTR_ERR(filter_ops);
- goto cleanup_table;
- }
+ kfree(filter_ops);
return ret;
-
- cleanup_table:
- unregister_pernet_subsys(&ip6table_filter_net_ops);
- return ret;
}
static void __exit ip6table_filter_fini(void)
{
- xt_hook_unlink(&packet_filter, filter_ops);
unregister_pernet_subsys(&ip6table_filter_net_ops);
+ kfree(filter_ops);
}
module_init(ip6table_filter_init);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index abe278b07932..cb2b28883252 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -23,12 +23,15 @@ MODULE_DESCRIPTION("ip6tables mangle table");
(1 << NF_INET_LOCAL_OUT) | \
(1 << NF_INET_POST_ROUTING))
+static int __net_init ip6table_mangle_table_init(struct net *net);
+
static const struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_MANGLE,
+ .table_init = ip6table_mangle_table_init,
};
static unsigned int
@@ -88,26 +91,33 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
}
static struct nf_hook_ops *mangle_ops __read_mostly;
-static int __net_init ip6table_mangle_net_init(struct net *net)
+static int __net_init ip6table_mangle_table_init(struct net *net)
{
struct ip6t_replace *repl;
+ int ret;
+
+ if (net->ipv6.ip6table_mangle)
+ return 0;
repl = ip6t_alloc_initial_table(&packet_mangler);
if (repl == NULL)
return -ENOMEM;
- net->ipv6.ip6table_mangle =
- ip6t_register_table(net, &packet_mangler, repl);
+ ret = ip6t_register_table(net, &packet_mangler, repl, mangle_ops,
+ &net->ipv6.ip6table_mangle);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv6.ip6table_mangle);
+ return ret;
}
static void __net_exit ip6table_mangle_net_exit(struct net *net)
{
- ip6t_unregister_table(net, net->ipv6.ip6table_mangle);
+ if (!net->ipv6.ip6table_mangle)
+ return;
+
+ ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops);
+ net->ipv6.ip6table_mangle = NULL;
}
static struct pernet_operations ip6table_mangle_net_ops = {
- .init = ip6table_mangle_net_init,
.exit = ip6table_mangle_net_exit,
};
@@ -115,28 +125,28 @@ static int __init ip6table_mangle_init(void)
{
int ret;
+ mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook);
+ if (IS_ERR(mangle_ops))
+ return PTR_ERR(mangle_ops);
+
ret = register_pernet_subsys(&ip6table_mangle_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(mangle_ops);
return ret;
-
- /* Register hooks */
- mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook);
- if (IS_ERR(mangle_ops)) {
- ret = PTR_ERR(mangle_ops);
- goto cleanup_table;
}
- return ret;
-
- cleanup_table:
- unregister_pernet_subsys(&ip6table_mangle_net_ops);
+ ret = ip6table_mangle_table_init(&init_net);
+ if (ret) {
+ unregister_pernet_subsys(&ip6table_mangle_net_ops);
+ kfree(mangle_ops);
+ }
return ret;
}
static void __exit ip6table_mangle_fini(void)
{
- xt_hook_unlink(&packet_mangler, mangle_ops);
unregister_pernet_subsys(&ip6table_mangle_net_ops);
+ kfree(mangle_ops);
}
module_init(ip6table_mangle_init);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index de2a10a565f5..7d2bd940291f 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -20,6 +20,8 @@
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_l3proto.h>
+static int __net_init ip6table_nat_table_init(struct net *net);
+
static const struct xt_table nf_nat_ipv6_table = {
.name = "nat",
.valid_hooks = (1 << NF_INET_PRE_ROUTING) |
@@ -28,6 +30,7 @@ static const struct xt_table nf_nat_ipv6_table = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
+ .table_init = ip6table_nat_table_init,
};
static unsigned int ip6table_nat_do_chain(void *priv,
@@ -97,50 +100,50 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
},
};
-static int __net_init ip6table_nat_net_init(struct net *net)
+static int __net_init ip6table_nat_table_init(struct net *net)
{
struct ip6t_replace *repl;
+ int ret;
+
+ if (net->ipv6.ip6table_nat)
+ return 0;
repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table);
if (repl == NULL)
return -ENOMEM;
- net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl);
+ ret = ip6t_register_table(net, &nf_nat_ipv6_table, repl,
+ nf_nat_ipv6_ops, &net->ipv6.ip6table_nat);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv6.ip6table_nat);
+ return ret;
}
static void __net_exit ip6table_nat_net_exit(struct net *net)
{
- ip6t_unregister_table(net, net->ipv6.ip6table_nat);
+ if (!net->ipv6.ip6table_nat)
+ return;
+ ip6t_unregister_table(net, net->ipv6.ip6table_nat, nf_nat_ipv6_ops);
+ net->ipv6.ip6table_nat = NULL;
}
static struct pernet_operations ip6table_nat_net_ops = {
- .init = ip6table_nat_net_init,
.exit = ip6table_nat_net_exit,
};
static int __init ip6table_nat_init(void)
{
- int err;
+ int ret = register_pernet_subsys(&ip6table_nat_net_ops);
- err = register_pernet_subsys(&ip6table_nat_net_ops);
- if (err < 0)
- goto err1;
+ if (ret)
+ return ret;
- err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
- if (err < 0)
- goto err2;
- return 0;
-
-err2:
- unregister_pernet_subsys(&ip6table_nat_net_ops);
-err1:
- return err;
+ ret = ip6table_nat_table_init(&init_net);
+ if (ret)
+ unregister_pernet_subsys(&ip6table_nat_net_ops);
+ return ret;
}
static void __exit ip6table_nat_exit(void)
{
- nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
unregister_pernet_subsys(&ip6table_nat_net_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 9021963565c3..d4bc56443dc1 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -9,12 +9,15 @@
#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
+static int __net_init ip6table_raw_table_init(struct net *net);
+
static const struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_RAW,
+ .table_init = ip6table_raw_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -27,26 +30,32 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *rawtable_ops __read_mostly;
-static int __net_init ip6table_raw_net_init(struct net *net)
+static int __net_init ip6table_raw_table_init(struct net *net)
{
struct ip6t_replace *repl;
+ int ret;
+
+ if (net->ipv6.ip6table_raw)
+ return 0;
repl = ip6t_alloc_initial_table(&packet_raw);
if (repl == NULL)
return -ENOMEM;
- net->ipv6.ip6table_raw =
- ip6t_register_table(net, &packet_raw, repl);
+ ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops,
+ &net->ipv6.ip6table_raw);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv6.ip6table_raw);
+ return ret;
}
static void __net_exit ip6table_raw_net_exit(struct net *net)
{
- ip6t_unregister_table(net, net->ipv6.ip6table_raw);
+ if (!net->ipv6.ip6table_raw)
+ return;
+ ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops);
+ net->ipv6.ip6table_raw = NULL;
}
static struct pernet_operations ip6table_raw_net_ops = {
- .init = ip6table_raw_net_init,
.exit = ip6table_raw_net_exit,
};
@@ -54,28 +63,29 @@ static int __init ip6table_raw_init(void)
{
int ret;
+ /* Register hooks */
+ rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook);
+ if (IS_ERR(rawtable_ops))
+ return PTR_ERR(rawtable_ops);
+
ret = register_pernet_subsys(&ip6table_raw_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(rawtable_ops);
return ret;
-
- /* Register hooks */
- rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook);
- if (IS_ERR(rawtable_ops)) {
- ret = PTR_ERR(rawtable_ops);
- goto cleanup_table;
}
- return ret;
-
- cleanup_table:
- unregister_pernet_subsys(&ip6table_raw_net_ops);
+ ret = ip6table_raw_table_init(&init_net);
+ if (ret) {
+ unregister_pernet_subsys(&ip6table_raw_net_ops);
+ kfree(rawtable_ops);
+ }
return ret;
}
static void __exit ip6table_raw_fini(void)
{
- xt_hook_unlink(&packet_raw, rawtable_ops);
unregister_pernet_subsys(&ip6table_raw_net_ops);
+ kfree(rawtable_ops);
}
module_init(ip6table_raw_init);
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 0d856fedfeb0..cf26ccb04056 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -27,12 +27,15 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT)
+static int __net_init ip6table_security_table_init(struct net *net);
+
static const struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_SECURITY,
+ .table_init = ip6table_security_table_init,
};
static unsigned int
@@ -44,26 +47,32 @@ ip6table_security_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *sectbl_ops __read_mostly;
-static int __net_init ip6table_security_net_init(struct net *net)
+static int __net_init ip6table_security_table_init(struct net *net)
{
struct ip6t_replace *repl;
+ int ret;
+
+ if (net->ipv6.ip6table_security)
+ return 0;
repl = ip6t_alloc_initial_table(&security_table);
if (repl == NULL)
return -ENOMEM;
- net->ipv6.ip6table_security =
- ip6t_register_table(net, &security_table, repl);
+ ret = ip6t_register_table(net, &security_table, repl, sectbl_ops,
+ &net->ipv6.ip6table_security);
kfree(repl);
- return PTR_ERR_OR_ZERO(net->ipv6.ip6table_security);
+ return ret;
}
static void __net_exit ip6table_security_net_exit(struct net *net)
{
- ip6t_unregister_table(net, net->ipv6.ip6table_security);
+ if (!net->ipv6.ip6table_security)
+ return;
+ ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops);
+ net->ipv6.ip6table_security = NULL;
}
static struct pernet_operations ip6table_security_net_ops = {
- .init = ip6table_security_net_init,
.exit = ip6table_security_net_exit,
};
@@ -71,27 +80,28 @@ static int __init ip6table_security_init(void)
{
int ret;
+ sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
+ if (IS_ERR(sectbl_ops))
+ return PTR_ERR(sectbl_ops);
+
ret = register_pernet_subsys(&ip6table_security_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ kfree(sectbl_ops);
return ret;
-
- sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook);
- if (IS_ERR(sectbl_ops)) {
- ret = PTR_ERR(sectbl_ops);
- goto cleanup_table;
}
- return ret;
-
-cleanup_table:
- unregister_pernet_subsys(&ip6table_security_net_ops);
+ ret = ip6table_security_table_init(&init_net);
+ if (ret) {
+ unregister_pernet_subsys(&ip6table_security_net_ops);
+ kfree(sectbl_ops);
+ }
return ret;
}
static void __exit ip6table_security_fini(void)
{
- xt_hook_unlink(&security_table, sectbl_ops);
unregister_pernet_subsys(&ip6table_security_net_ops);
+ kfree(sectbl_ops);
}
module_init(ip6table_security_init);
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index cd1ac1637a05..9597ffb74077 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -26,7 +26,12 @@ static void nft_masq_ipv6_eval(const struct nft_expr *expr,
memset(&range, 0, sizeof(range));
range.flags = priv->flags;
-
+ if (priv->sreg_proto_min) {
+ range.min_proto.all =
+ *(__be16 *)&regs->data[priv->sreg_proto_min];
+ range.max_proto.all =
+ *(__be16 *)&regs->data[priv->sreg_proto_max];
+ }
regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out);
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0711f8fe4d44..fd25e447a5fa 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -922,11 +922,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
- /* a return value > 0 means to resubmit the input, but
- * it wants the return to be -protocol, or 0
- */
+ /* a return value > 0 means to resubmit the input */
if (ret > 0)
- return -ret;
+ return ret;
return 0;
}
diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
new file mode 100644
index 000000000000..5db94d940ecc
--- /dev/null
+++ b/net/kcm/Kconfig
@@ -0,0 +1,10 @@
+
+config AF_KCM
+ tristate "KCM sockets"
+ depends on INET
+ select BPF_SYSCALL
+ ---help---
+ KCM (Kernel Connection Multiplexor) sockets provide a method
+ for multiplexing messages of a message based application
+ protocol over kernel connectons (e.g. TCP connections).
+
diff --git a/net/kcm/Makefile b/net/kcm/Makefile
new file mode 100644
index 000000000000..71256133e677
--- /dev/null
+++ b/net/kcm/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_AF_KCM) += kcm.o
+
+kcm-y := kcmsock.o kcmproc.o
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
new file mode 100644
index 000000000000..738008726cc6
--- /dev/null
+++ b/net/kcm/kcmproc.c
@@ -0,0 +1,426 @@
+#include <linux/in.h>
+#include <linux/inet.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/proc_fs.h>
+#include <linux/rculist.h>
+#include <linux/seq_file.h>
+#include <linux/socket.h>
+#include <net/inet_sock.h>
+#include <net/kcm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/tcp.h>
+
+#ifdef CONFIG_PROC_FS
+struct kcm_seq_muxinfo {
+ char *name;
+ const struct file_operations *seq_fops;
+ const struct seq_operations seq_ops;
+};
+
+static struct kcm_mux *kcm_get_first(struct seq_file *seq)
+{
+ struct net *net = seq_file_net(seq);
+ struct kcm_net *knet = net_generic(net, kcm_net_id);
+
+ return list_first_or_null_rcu(&knet->mux_list,
+ struct kcm_mux, kcm_mux_list);
+}
+
+static struct kcm_mux *kcm_get_next(struct kcm_mux *mux)
+{
+ struct kcm_net *knet = mux->knet;
+
+ return list_next_or_null_rcu(&knet->mux_list, &mux->kcm_mux_list,
+ struct kcm_mux, kcm_mux_list);
+}
+
+static struct kcm_mux *kcm_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct net *net = seq_file_net(seq);
+ struct kcm_net *knet = net_generic(net, kcm_net_id);
+ struct kcm_mux *m;
+
+ list_for_each_entry_rcu(m, &knet->mux_list, kcm_mux_list) {
+ if (!pos)
+ return m;
+ --pos;
+ }
+ return NULL;
+}
+
+static void *kcm_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ void *p;
+
+ if (v == SEQ_START_TOKEN)
+ p = kcm_get_first(seq);
+ else
+ p = kcm_get_next(v);
+ ++*pos;
+ return p;
+}
+
+static void *kcm_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(rcu)
+{
+ rcu_read_lock();
+
+ if (!*pos)
+ return SEQ_START_TOKEN;
+ else
+ return kcm_get_idx(seq, *pos - 1);
+}
+
+static void kcm_seq_stop(struct seq_file *seq, void *v)
+ __releases(rcu)
+{
+ rcu_read_unlock();
+}
+
+struct kcm_proc_mux_state {
+ struct seq_net_private p;
+ int idx;
+};
+
+static int kcm_seq_open(struct inode *inode, struct file *file)
+{
+ struct kcm_seq_muxinfo *muxinfo = PDE_DATA(inode);
+ int err;
+
+ err = seq_open_net(inode, file, &muxinfo->seq_ops,
+ sizeof(struct kcm_proc_mux_state));
+ if (err < 0)
+ return err;
+ return err;
+}
+
+static void kcm_format_mux_header(struct seq_file *seq)
+{
+ struct net *net = seq_file_net(seq);
+ struct kcm_net *knet = net_generic(net, kcm_net_id);
+
+ seq_printf(seq,
+ "*** KCM statistics (%d MUX) ****\n",
+ knet->count);
+
+ seq_printf(seq,
+ "%-14s %-10s %-16s %-10s %-16s %-8s %-8s %-8s %-8s %s",
+ "Object",
+ "RX-Msgs",
+ "RX-Bytes",
+ "TX-Msgs",
+ "TX-Bytes",
+ "Recv-Q",
+ "Rmem",
+ "Send-Q",
+ "Smem",
+ "Status");
+
+ /* XXX: pdsts header stuff here */
+ seq_puts(seq, "\n");
+}
+
+static void kcm_format_sock(struct kcm_sock *kcm, struct seq_file *seq,
+ int i, int *len)
+{
+ seq_printf(seq,
+ " kcm-%-7u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8s ",
+ kcm->index,
+ kcm->stats.rx_msgs,
+ kcm->stats.rx_bytes,
+ kcm->stats.tx_msgs,
+ kcm->stats.tx_bytes,
+ kcm->sk.sk_receive_queue.qlen,
+ sk_rmem_alloc_get(&kcm->sk),
+ kcm->sk.sk_write_queue.qlen,
+ "-");
+
+ if (kcm->tx_psock)
+ seq_printf(seq, "Psck-%u ", kcm->tx_psock->index);
+
+ if (kcm->tx_wait)
+ seq_puts(seq, "TxWait ");
+
+ if (kcm->tx_wait_more)
+ seq_puts(seq, "WMore ");
+
+ if (kcm->rx_wait)
+ seq_puts(seq, "RxWait ");
+
+ seq_puts(seq, "\n");
+}
+
+static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq,
+ int i, int *len)
+{
+ seq_printf(seq,
+ " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
+ psock->index,
+ psock->stats.rx_msgs,
+ psock->stats.rx_bytes,
+ psock->stats.tx_msgs,
+ psock->stats.tx_bytes,
+ psock->sk->sk_receive_queue.qlen,
+ atomic_read(&psock->sk->sk_rmem_alloc),
+ psock->sk->sk_write_queue.qlen,
+ atomic_read(&psock->sk->sk_wmem_alloc));
+
+ if (psock->done)
+ seq_puts(seq, "Done ");
+
+ if (psock->tx_stopped)
+ seq_puts(seq, "TxStop ");
+
+ if (psock->rx_stopped)
+ seq_puts(seq, "RxStop ");
+
+ if (psock->tx_kcm)
+ seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
+
+ if (psock->ready_rx_msg)
+ seq_puts(seq, "RdyRx ");
+
+ seq_puts(seq, "\n");
+}
+
+static void
+kcm_format_mux(struct kcm_mux *mux, loff_t idx, struct seq_file *seq)
+{
+ int i, len;
+ struct kcm_sock *kcm;
+ struct kcm_psock *psock;
+
+ /* mux information */
+ seq_printf(seq,
+ "%-6s%-8s %-10llu %-16llu %-10llu %-16llu %-8s %-8s %-8s %-8s ",
+ "mux", "",
+ mux->stats.rx_msgs,
+ mux->stats.rx_bytes,
+ mux->stats.tx_msgs,
+ mux->stats.tx_bytes,
+ "-", "-", "-", "-");
+
+ seq_printf(seq, "KCMs: %d, Psocks %d\n",
+ mux->kcm_socks_cnt, mux->psocks_cnt);
+
+ /* kcm sock information */
+ i = 0;
+ spin_lock_bh(&mux->lock);
+ list_for_each_entry(kcm, &mux->kcm_socks, kcm_sock_list) {
+ kcm_format_sock(kcm, seq, i, &len);
+ i++;
+ }
+ i = 0;
+ list_for_each_entry(psock, &mux->psocks, psock_list) {
+ kcm_format_psock(psock, seq, i, &len);
+ i++;
+ }
+ spin_unlock_bh(&mux->lock);
+}
+
+static int kcm_seq_show(struct seq_file *seq, void *v)
+{
+ struct kcm_proc_mux_state *mux_state;
+
+ mux_state = seq->private;
+ if (v == SEQ_START_TOKEN) {
+ mux_state->idx = 0;
+ kcm_format_mux_header(seq);
+ } else {
+ kcm_format_mux(v, mux_state->idx, seq);
+ mux_state->idx++;
+ }
+ return 0;
+}
+
+static const struct file_operations kcm_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = kcm_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+};
+
+static struct kcm_seq_muxinfo kcm_seq_muxinfo = {
+ .name = "kcm",
+ .seq_fops = &kcm_seq_fops,
+ .seq_ops = {
+ .show = kcm_seq_show,
+ .start = kcm_seq_start,
+ .next = kcm_seq_next,
+ .stop = kcm_seq_stop,
+ }
+};
+
+static int kcm_proc_register(struct net *net, struct kcm_seq_muxinfo *muxinfo)
+{
+ struct proc_dir_entry *p;
+ int rc = 0;
+
+ p = proc_create_data(muxinfo->name, S_IRUGO, net->proc_net,
+ muxinfo->seq_fops, muxinfo);
+ if (!p)
+ rc = -ENOMEM;
+ return rc;
+}
+EXPORT_SYMBOL(kcm_proc_register);
+
+static void kcm_proc_unregister(struct net *net,
+ struct kcm_seq_muxinfo *muxinfo)
+{
+ remove_proc_entry(muxinfo->name, net->proc_net);
+}
+EXPORT_SYMBOL(kcm_proc_unregister);
+
+static int kcm_stats_seq_show(struct seq_file *seq, void *v)
+{
+ struct kcm_psock_stats psock_stats;
+ struct kcm_mux_stats mux_stats;
+ struct kcm_mux *mux;
+ struct kcm_psock *psock;
+ struct net *net = seq->private;
+ struct kcm_net *knet = net_generic(net, kcm_net_id);
+
+ memset(&mux_stats, 0, sizeof(mux_stats));
+ memset(&psock_stats, 0, sizeof(psock_stats));
+
+ mutex_lock(&knet->mutex);
+
+ aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats);
+ aggregate_psock_stats(&knet->aggregate_psock_stats,
+ &psock_stats);
+
+ list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) {
+ spin_lock_bh(&mux->lock);
+ aggregate_mux_stats(&mux->stats, &mux_stats);
+ aggregate_psock_stats(&mux->aggregate_psock_stats,
+ &psock_stats);
+ list_for_each_entry(psock, &mux->psocks, psock_list)
+ aggregate_psock_stats(&psock->stats, &psock_stats);
+ spin_unlock_bh(&mux->lock);
+ }
+
+ mutex_unlock(&knet->mutex);
+
+ seq_printf(seq,
+ "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s\n",
+ "MUX",
+ "RX-Msgs",
+ "RX-Bytes",
+ "TX-Msgs",
+ "TX-Bytes",
+ "TX-Retries",
+ "Attach",
+ "Unattach",
+ "UnattchRsvd",
+ "RX-RdyDrops");
+
+ seq_printf(seq,
+ "%-8s %-10llu %-16llu %-10llu %-16llu %-10u %-10u %-10u %-10u %-10u\n",
+ "",
+ mux_stats.rx_msgs,
+ mux_stats.rx_bytes,
+ mux_stats.tx_msgs,
+ mux_stats.tx_bytes,
+ mux_stats.tx_retries,
+ mux_stats.psock_attach,
+ mux_stats.psock_unattach_rsvd,
+ mux_stats.psock_unattach,
+ mux_stats.rx_ready_drops);
+
+ seq_printf(seq,
+ "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
+ "Psock",
+ "RX-Msgs",
+ "RX-Bytes",
+ "TX-Msgs",
+ "TX-Bytes",
+ "Reserved",
+ "Unreserved",
+ "RX-Aborts",
+ "RX-MemFail",
+ "RX-NeedMor",
+ "RX-BadLen",
+ "RX-TooBig",
+ "RX-Timeout",
+ "TX-Aborts");
+
+ seq_printf(seq,
+ "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
+ "",
+ psock_stats.rx_msgs,
+ psock_stats.rx_bytes,
+ psock_stats.tx_msgs,
+ psock_stats.tx_bytes,
+ psock_stats.reserved,
+ psock_stats.unreserved,
+ psock_stats.rx_aborts,
+ psock_stats.rx_mem_fail,
+ psock_stats.rx_need_more_hdr,
+ psock_stats.rx_bad_hdr_len,
+ psock_stats.rx_msg_too_big,
+ psock_stats.rx_msg_timeouts,
+ psock_stats.tx_aborts);
+
+ return 0;
+}
+
+static int kcm_stats_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open_net(inode, file, kcm_stats_seq_show);
+}
+
+static const struct file_operations kcm_stats_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = kcm_stats_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release_net,
+};
+
+static int kcm_proc_init_net(struct net *net)
+{
+ int err;
+
+ if (!proc_create("kcm_stats", S_IRUGO, net->proc_net,
+ &kcm_stats_seq_fops)) {
+ err = -ENOMEM;
+ goto out_kcm_stats;
+ }
+
+ err = kcm_proc_register(net, &kcm_seq_muxinfo);
+ if (err)
+ goto out_kcm;
+
+ return 0;
+
+out_kcm:
+ remove_proc_entry("kcm_stats", net->proc_net);
+out_kcm_stats:
+ return err;
+}
+
+static void kcm_proc_exit_net(struct net *net)
+{
+ kcm_proc_unregister(net, &kcm_seq_muxinfo);
+ remove_proc_entry("kcm_stats", net->proc_net);
+}
+
+static struct pernet_operations kcm_net_ops = {
+ .init = kcm_proc_init_net,
+ .exit = kcm_proc_exit_net,
+};
+
+int __init kcm_proc_init(void)
+{
+ return register_pernet_subsys(&kcm_net_ops);
+}
+
+void __exit kcm_proc_exit(void)
+{
+ unregister_pernet_subsys(&kcm_net_ops);
+}
+
+#endif /* CONFIG_PROC_FS */
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
new file mode 100644
index 000000000000..40662d73204f
--- /dev/null
+++ b/net/kcm/kcmsock.c
@@ -0,0 +1,2409 @@
+#include <linux/bpf.h>
+#include <linux/errno.h>
+#include <linux/errqueue.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/uaccess.h>
+#include <linux/workqueue.h>
+#include <net/kcm.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <uapi/linux/kcm.h>
+
+unsigned int kcm_net_id;
+
+static struct kmem_cache *kcm_psockp __read_mostly;
+static struct kmem_cache *kcm_muxp __read_mostly;
+static struct workqueue_struct *kcm_wq;
+
+static inline struct kcm_sock *kcm_sk(const struct sock *sk)
+{
+ return (struct kcm_sock *)sk;
+}
+
+static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
+{
+ return (struct kcm_tx_msg *)skb->cb;
+}
+
+static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb)
+{
+ return (struct kcm_rx_msg *)((void *)skb->cb +
+ offsetof(struct qdisc_skb_cb, data));
+}
+
+static void report_csk_error(struct sock *csk, int err)
+{
+ csk->sk_err = EPIPE;
+ csk->sk_error_report(csk);
+}
+
+/* Callback lock held */
+static void kcm_abort_rx_psock(struct kcm_psock *psock, int err,
+ struct sk_buff *skb)
+{
+ struct sock *csk = psock->sk;
+
+ /* Unrecoverable error in receive */
+
+ del_timer(&psock->rx_msg_timer);
+
+ if (psock->rx_stopped)
+ return;
+
+ psock->rx_stopped = 1;
+ KCM_STATS_INCR(psock->stats.rx_aborts);
+
+ /* Report an error on the lower socket */
+ report_csk_error(csk, err);
+}
+
+static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
+ bool wakeup_kcm)
+{
+ struct sock *csk = psock->sk;
+ struct kcm_mux *mux = psock->mux;
+
+ /* Unrecoverable error in transmit */
+
+ spin_lock_bh(&mux->lock);
+
+ if (psock->tx_stopped) {
+ spin_unlock_bh(&mux->lock);
+ return;
+ }
+
+ psock->tx_stopped = 1;
+ KCM_STATS_INCR(psock->stats.tx_aborts);
+
+ if (!psock->tx_kcm) {
+ /* Take off psocks_avail list */
+ list_del(&psock->psock_avail_list);
+ } else if (wakeup_kcm) {
+ /* In this case psock is being aborted while outside of
+ * write_msgs and psock is reserved. Schedule tx_work
+ * to handle the failure there. Need to commit tx_stopped
+ * before queuing work.
+ */
+ smp_mb();
+
+ queue_work(kcm_wq, &psock->tx_kcm->tx_work);
+ }
+
+ spin_unlock_bh(&mux->lock);
+
+ /* Report error on lower socket */
+ report_csk_error(csk, err);
+}
+
+/* RX mux lock held. */
+static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
+ struct kcm_psock *psock)
+{
+ KCM_STATS_ADD(mux->stats.rx_bytes,
+ psock->stats.rx_bytes - psock->saved_rx_bytes);
+ mux->stats.rx_msgs +=
+ psock->stats.rx_msgs - psock->saved_rx_msgs;
+ psock->saved_rx_msgs = psock->stats.rx_msgs;
+ psock->saved_rx_bytes = psock->stats.rx_bytes;
+}
+
+static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
+ struct kcm_psock *psock)
+{
+ KCM_STATS_ADD(mux->stats.tx_bytes,
+ psock->stats.tx_bytes - psock->saved_tx_bytes);
+ mux->stats.tx_msgs +=
+ psock->stats.tx_msgs - psock->saved_tx_msgs;
+ psock->saved_tx_msgs = psock->stats.tx_msgs;
+ psock->saved_tx_bytes = psock->stats.tx_bytes;
+}
+
+static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+
+/* KCM is ready to receive messages on its queue-- either the KCM is new or
+ * has become unblocked after being blocked on full socket buffer. Queue any
+ * pending ready messages on a psock. RX mux lock held.
+ */
+static void kcm_rcv_ready(struct kcm_sock *kcm)
+{
+ struct kcm_mux *mux = kcm->mux;
+ struct kcm_psock *psock;
+ struct sk_buff *skb;
+
+ if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled))
+ return;
+
+ while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) {
+ if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
+ /* Assuming buffer limit has been reached */
+ skb_queue_head(&mux->rx_hold_queue, skb);
+ WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
+ return;
+ }
+ }
+
+ while (!list_empty(&mux->psocks_ready)) {
+ psock = list_first_entry(&mux->psocks_ready, struct kcm_psock,
+ psock_ready_list);
+
+ if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) {
+ /* Assuming buffer limit has been reached */
+ WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
+ return;
+ }
+
+ /* Consumed the ready message on the psock. Schedule rx_work to
+ * get more messages.
+ */
+ list_del(&psock->psock_ready_list);
+ psock->ready_rx_msg = NULL;
+
+ /* Commit clearing of ready_rx_msg for queuing work */
+ smp_mb();
+
+ queue_work(kcm_wq, &psock->rx_work);
+ }
+
+ /* Buffer limit is okay now, add to ready list */
+ list_add_tail(&kcm->wait_rx_list,
+ &kcm->mux->kcm_rx_waiters);
+ kcm->rx_wait = true;
+}
+
+static void kcm_rfree(struct sk_buff *skb)
+{
+ struct sock *sk = skb->sk;
+ struct kcm_sock *kcm = kcm_sk(sk);
+ struct kcm_mux *mux = kcm->mux;
+ unsigned int len = skb->truesize;
+
+ sk_mem_uncharge(sk, len);
+ atomic_sub(len, &sk->sk_rmem_alloc);
+
+ /* For reading rx_wait and rx_psock without holding lock */
+ smp_mb__after_atomic();
+
+ if (!kcm->rx_wait && !kcm->rx_psock &&
+ sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
+ spin_lock_bh(&mux->rx_lock);
+ kcm_rcv_ready(kcm);
+ spin_unlock_bh(&mux->rx_lock);
+ }
+}
+
+static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+ struct sk_buff_head *list = &sk->sk_receive_queue;
+
+ if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
+ return -ENOMEM;
+
+ if (!sk_rmem_schedule(sk, skb, skb->truesize))
+ return -ENOBUFS;
+
+ skb->dev = NULL;
+
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = kcm_rfree;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ sk_mem_charge(sk, skb->truesize);
+
+ skb_queue_tail(list, skb);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk->sk_data_ready(sk);
+
+ return 0;
+}
+
+/* Requeue received messages for a kcm socket to other kcm sockets. This is
+ * called with a kcm socket is receive disabled.
+ * RX mux lock held.
+ */
+static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
+{
+ struct sk_buff *skb;
+ struct kcm_sock *kcm;
+
+ while ((skb = __skb_dequeue(head))) {
+ /* Reset destructor to avoid calling kcm_rcv_ready */
+ skb->destructor = sock_rfree;
+ skb_orphan(skb);
+try_again:
+ if (list_empty(&mux->kcm_rx_waiters)) {
+ skb_queue_tail(&mux->rx_hold_queue, skb);
+ continue;
+ }
+
+ kcm = list_first_entry(&mux->kcm_rx_waiters,
+ struct kcm_sock, wait_rx_list);
+
+ if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
+ /* Should mean socket buffer full */
+ list_del(&kcm->wait_rx_list);
+ kcm->rx_wait = false;
+
+ /* Commit rx_wait to read in kcm_free */
+ smp_wmb();
+
+ goto try_again;
+ }
+ }
+}
+
+/* Lower sock lock held */
+static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
+ struct sk_buff *head)
+{
+ struct kcm_mux *mux = psock->mux;
+ struct kcm_sock *kcm;
+
+ WARN_ON(psock->ready_rx_msg);
+
+ if (psock->rx_kcm)
+ return psock->rx_kcm;
+
+ spin_lock_bh(&mux->rx_lock);
+
+ if (psock->rx_kcm) {
+ spin_unlock_bh(&mux->rx_lock);
+ return psock->rx_kcm;
+ }
+
+ kcm_update_rx_mux_stats(mux, psock);
+
+ if (list_empty(&mux->kcm_rx_waiters)) {
+ psock->ready_rx_msg = head;
+ list_add_tail(&psock->psock_ready_list,
+ &mux->psocks_ready);
+ spin_unlock_bh(&mux->rx_lock);
+ return NULL;
+ }
+
+ kcm = list_first_entry(&mux->kcm_rx_waiters,
+ struct kcm_sock, wait_rx_list);
+ list_del(&kcm->wait_rx_list);
+ kcm->rx_wait = false;
+
+ psock->rx_kcm = kcm;
+ kcm->rx_psock = psock;
+
+ spin_unlock_bh(&mux->rx_lock);
+
+ return kcm;
+}
+
+static void kcm_done(struct kcm_sock *kcm);
+
+static void kcm_done_work(struct work_struct *w)
+{
+ kcm_done(container_of(w, struct kcm_sock, done_work));
+}
+
+/* Lower sock held */
+static void unreserve_rx_kcm(struct kcm_psock *psock,
+ bool rcv_ready)
+{
+ struct kcm_sock *kcm = psock->rx_kcm;
+ struct kcm_mux *mux = psock->mux;
+
+ if (!kcm)
+ return;
+
+ spin_lock_bh(&mux->rx_lock);
+
+ psock->rx_kcm = NULL;
+ kcm->rx_psock = NULL;
+
+ /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
+ * kcm_rfree
+ */
+ smp_mb();
+
+ if (unlikely(kcm->done)) {
+ spin_unlock_bh(&mux->rx_lock);
+
+ /* Need to run kcm_done in a task since we need to qcquire
+ * callback locks which may already be held here.
+ */
+ INIT_WORK(&kcm->done_work, kcm_done_work);
+ schedule_work(&kcm->done_work);
+ return;
+ }
+
+ if (unlikely(kcm->rx_disabled)) {
+ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
+ } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) {
+ /* Check for degenerative race with rx_wait that all
+ * data was dequeued (accounted for in kcm_rfree).
+ */
+ kcm_rcv_ready(kcm);
+ }
+ spin_unlock_bh(&mux->rx_lock);
+}
+
+static void kcm_start_rx_timer(struct kcm_psock *psock)
+{
+ if (psock->sk->sk_rcvtimeo)
+ mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo);
+}
+
+/* Macro to invoke filter function. */
+#define KCM_RUN_FILTER(prog, ctx) \
+ (*prog->bpf_func)(ctx, prog->insnsi)
+
+/* Lower socket lock held */
+static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+ unsigned int orig_offset, size_t orig_len)
+{
+ struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
+ struct kcm_rx_msg *rxm;
+ struct kcm_sock *kcm;
+ struct sk_buff *head, *skb;
+ size_t eaten = 0, cand_len;
+ ssize_t extra;
+ int err;
+ bool cloned_orig = false;
+
+ if (psock->ready_rx_msg)
+ return 0;
+
+ head = psock->rx_skb_head;
+ if (head) {
+ /* Message already in progress */
+
+ rxm = kcm_rx_msg(head);
+ if (unlikely(rxm->early_eaten)) {
+ /* Already some number of bytes on the receive sock
+ * data saved in rx_skb_head, just indicate they
+ * are consumed.
+ */
+ eaten = orig_len <= rxm->early_eaten ?
+ orig_len : rxm->early_eaten;
+ rxm->early_eaten -= eaten;
+
+ return eaten;
+ }
+
+ if (unlikely(orig_offset)) {
+ /* Getting data with a non-zero offset when a message is
+ * in progress is not expected. If it does happen, we
+ * need to clone and pull since we can't deal with
+ * offsets in the skbs for a message expect in the head.
+ */
+ orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
+ if (!orig_skb) {
+ KCM_STATS_INCR(psock->stats.rx_mem_fail);
+ desc->error = -ENOMEM;
+ return 0;
+ }
+ if (!pskb_pull(orig_skb, orig_offset)) {
+ KCM_STATS_INCR(psock->stats.rx_mem_fail);
+ kfree_skb(orig_skb);
+ desc->error = -ENOMEM;
+ return 0;
+ }
+ cloned_orig = true;
+ orig_offset = 0;
+ }
+
+ if (!psock->rx_skb_nextp) {
+ /* We are going to append to the frags_list of head.
+ * Need to unshare the frag_list.
+ */
+ err = skb_unclone(head, GFP_ATOMIC);
+ if (err) {
+ KCM_STATS_INCR(psock->stats.rx_mem_fail);
+ desc->error = err;
+ return 0;
+ }
+
+ if (unlikely(skb_shinfo(head)->frag_list)) {
+ /* We can't append to an sk_buff that already
+ * has a frag_list. We create a new head, point
+ * the frag_list of that to the old head, and
+ * then are able to use the old head->next for
+ * appending to the message.
+ */
+ if (WARN_ON(head->next)) {
+ desc->error = -EINVAL;
+ return 0;
+ }
+
+ skb = alloc_skb(0, GFP_ATOMIC);
+ if (!skb) {
+ KCM_STATS_INCR(psock->stats.rx_mem_fail);
+ desc->error = -ENOMEM;
+ return 0;
+ }
+ skb->len = head->len;
+ skb->data_len = head->len;
+ skb->truesize = head->truesize;
+ *kcm_rx_msg(skb) = *kcm_rx_msg(head);
+ psock->rx_skb_nextp = &head->next;
+ skb_shinfo(skb)->frag_list = head;
+ psock->rx_skb_head = skb;
+ head = skb;
+ } else {
+ psock->rx_skb_nextp =
+ &skb_shinfo(head)->frag_list;
+ }
+ }
+ }
+
+ while (eaten < orig_len) {
+ /* Always clone since we will consume something */
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
+ if (!skb) {
+ KCM_STATS_INCR(psock->stats.rx_mem_fail);
+ desc->error = -ENOMEM;
+ break;
+ }
+
+ cand_len = orig_len - eaten;
+
+ head = psock->rx_skb_head;
+ if (!head) {
+ head = skb;
+ psock->rx_skb_head = head;
+ /* Will set rx_skb_nextp on next packet if needed */
+ psock->rx_skb_nextp = NULL;
+ rxm = kcm_rx_msg(head);
+ memset(rxm, 0, sizeof(*rxm));
+ rxm->offset = orig_offset + eaten;
+ } else {
+ /* Unclone since we may be appending to an skb that we
+ * already share a frag_list with.
+ */
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (err) {
+ KCM_STATS_INCR(psock->stats.rx_mem_fail);
+ desc->error = err;
+ break;
+ }
+
+ rxm = kcm_rx_msg(head);
+ *psock->rx_skb_nextp = skb;
+ psock->rx_skb_nextp = &skb->next;
+ head->data_len += skb->len;
+ head->len += skb->len;
+ head->truesize += skb->truesize;
+ }
+
+ if (!rxm->full_len) {
+ ssize_t len;
+
+ len = KCM_RUN_FILTER(psock->bpf_prog, head);
+
+ if (!len) {
+ /* Need more header to determine length */
+ if (!rxm->accum_len) {
+ /* Start RX timer for new message */
+ kcm_start_rx_timer(psock);
+ }
+ rxm->accum_len += cand_len;
+ eaten += cand_len;
+ KCM_STATS_INCR(psock->stats.rx_need_more_hdr);
+ WARN_ON(eaten != orig_len);
+ break;
+ } else if (len > psock->sk->sk_rcvbuf) {
+ /* Message length exceeds maximum allowed */
+ KCM_STATS_INCR(psock->stats.rx_msg_too_big);
+ desc->error = -EMSGSIZE;
+ psock->rx_skb_head = NULL;
+ kcm_abort_rx_psock(psock, EMSGSIZE, head);
+ break;
+ } else if (len <= (ssize_t)head->len -
+ skb->len - rxm->offset) {
+ /* Length must be into new skb (and also
+ * greater than zero)
+ */
+ KCM_STATS_INCR(psock->stats.rx_bad_hdr_len);
+ desc->error = -EPROTO;
+ psock->rx_skb_head = NULL;
+ kcm_abort_rx_psock(psock, EPROTO, head);
+ break;
+ }
+
+ rxm->full_len = len;
+ }
+
+ extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
+
+ if (extra < 0) {
+ /* Message not complete yet. */
+ if (rxm->full_len - rxm->accum_len >
+ tcp_inq(psock->sk)) {
+ /* Don't have the whole messages in the socket
+ * buffer. Set psock->rx_need_bytes to wait for
+ * the rest of the message. Also, set "early
+ * eaten" since we've already buffered the skb
+ * but don't consume yet per tcp_read_sock.
+ */
+
+ if (!rxm->accum_len) {
+ /* Start RX timer for new message */
+ kcm_start_rx_timer(psock);
+ }
+
+ psock->rx_need_bytes = rxm->full_len -
+ rxm->accum_len;
+ rxm->accum_len += cand_len;
+ rxm->early_eaten = cand_len;
+ KCM_STATS_ADD(psock->stats.rx_bytes, cand_len);
+ desc->count = 0; /* Stop reading socket */
+ break;
+ }
+ rxm->accum_len += cand_len;
+ eaten += cand_len;
+ WARN_ON(eaten != orig_len);
+ break;
+ }
+
+ /* Positive extra indicates ore bytes than needed for the
+ * message
+ */
+
+ WARN_ON(extra > cand_len);
+
+ eaten += (cand_len - extra);
+
+ /* Hurray, we have a new message! */
+ del_timer(&psock->rx_msg_timer);
+ psock->rx_skb_head = NULL;
+ KCM_STATS_INCR(psock->stats.rx_msgs);
+
+try_queue:
+ kcm = reserve_rx_kcm(psock, head);
+ if (!kcm) {
+ /* Unable to reserve a KCM, message is held in psock. */
+ break;
+ }
+
+ if (kcm_queue_rcv_skb(&kcm->sk, head)) {
+ /* Should mean socket buffer full */
+ unreserve_rx_kcm(psock, false);
+ goto try_queue;
+ }
+ }
+
+ if (cloned_orig)
+ kfree_skb(orig_skb);
+
+ KCM_STATS_ADD(psock->stats.rx_bytes, eaten);
+
+ return eaten;
+}
+
+/* Called with lock held on lower socket */
+static int psock_tcp_read_sock(struct kcm_psock *psock)
+{
+ read_descriptor_t desc;
+
+ desc.arg.data = psock;
+ desc.error = 0;
+ desc.count = 1; /* give more than one skb per call */
+
+ /* sk should be locked here, so okay to do tcp_read_sock */
+ tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
+
+ unreserve_rx_kcm(psock, true);
+
+ return desc.error;
+}
+
+/* Lower sock lock held */
+static void psock_tcp_data_ready(struct sock *sk)
+{
+ struct kcm_psock *psock;
+
+ read_lock_bh(&sk->sk_callback_lock);
+
+ psock = (struct kcm_psock *)sk->sk_user_data;
+ if (unlikely(!psock || psock->rx_stopped))
+ goto out;
+
+ if (psock->ready_rx_msg)
+ goto out;
+
+ if (psock->rx_need_bytes) {
+ if (tcp_inq(sk) >= psock->rx_need_bytes)
+ psock->rx_need_bytes = 0;
+ else
+ goto out;
+ }
+
+ if (psock_tcp_read_sock(psock) == -ENOMEM)
+ queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
+
+out:
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void do_psock_rx_work(struct kcm_psock *psock)
+{
+ read_descriptor_t rd_desc;
+ struct sock *csk = psock->sk;
+
+ /* We need the read lock to synchronize with psock_tcp_data_ready. We
+ * need the socket lock for calling tcp_read_sock.
+ */
+ lock_sock(csk);
+ read_lock_bh(&csk->sk_callback_lock);
+
+ if (unlikely(csk->sk_user_data != psock))
+ goto out;
+
+ if (unlikely(psock->rx_stopped))
+ goto out;
+
+ if (psock->ready_rx_msg)
+ goto out;
+
+ rd_desc.arg.data = psock;
+
+ if (psock_tcp_read_sock(psock) == -ENOMEM)
+ queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
+
+out:
+ read_unlock_bh(&csk->sk_callback_lock);
+ release_sock(csk);
+}
+
+static void psock_rx_work(struct work_struct *w)
+{
+ do_psock_rx_work(container_of(w, struct kcm_psock, rx_work));
+}
+
+static void psock_rx_delayed_work(struct work_struct *w)
+{
+ do_psock_rx_work(container_of(w, struct kcm_psock,
+ rx_delayed_work.work));
+}
+
+static void psock_tcp_state_change(struct sock *sk)
+{
+ /* TCP only does a POLLIN for a half close. Do a POLLHUP here
+ * since application will normally not poll with POLLIN
+ * on the TCP sockets.
+ */
+
+ report_csk_error(sk, EPIPE);
+}
+
+static void psock_tcp_write_space(struct sock *sk)
+{
+ struct kcm_psock *psock;
+ struct kcm_mux *mux;
+ struct kcm_sock *kcm;
+
+ read_lock_bh(&sk->sk_callback_lock);
+
+ psock = (struct kcm_psock *)sk->sk_user_data;
+ if (unlikely(!psock))
+ goto out;
+
+ mux = psock->mux;
+
+ spin_lock_bh(&mux->lock);
+
+ /* Check if the socket is reserved so someone is waiting for sending. */
+ kcm = psock->tx_kcm;
+ if (kcm)
+ queue_work(kcm_wq, &kcm->tx_work);
+
+ spin_unlock_bh(&mux->lock);
+out:
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void unreserve_psock(struct kcm_sock *kcm);
+
+/* kcm sock is locked. */
+static struct kcm_psock *reserve_psock(struct kcm_sock *kcm)
+{
+ struct kcm_mux *mux = kcm->mux;
+ struct kcm_psock *psock;
+
+ psock = kcm->tx_psock;
+
+ smp_rmb(); /* Must read tx_psock before tx_wait */
+
+ if (psock) {
+ WARN_ON(kcm->tx_wait);
+ if (unlikely(psock->tx_stopped))
+ unreserve_psock(kcm);
+ else
+ return kcm->tx_psock;
+ }
+
+ spin_lock_bh(&mux->lock);
+
+ /* Check again under lock to see if psock was reserved for this
+ * psock via psock_unreserve.
+ */
+ psock = kcm->tx_psock;
+ if (unlikely(psock)) {
+ WARN_ON(kcm->tx_wait);
+ spin_unlock_bh(&mux->lock);
+ return kcm->tx_psock;
+ }
+
+ if (!list_empty(&mux->psocks_avail)) {
+ psock = list_first_entry(&mux->psocks_avail,
+ struct kcm_psock,
+ psock_avail_list);
+ list_del(&psock->psock_avail_list);
+ if (kcm->tx_wait) {
+ list_del(&kcm->wait_psock_list);
+ kcm->tx_wait = false;
+ }
+ kcm->tx_psock = psock;
+ psock->tx_kcm = kcm;
+ KCM_STATS_INCR(psock->stats.reserved);
+ } else if (!kcm->tx_wait) {
+ list_add_tail(&kcm->wait_psock_list,
+ &mux->kcm_tx_waiters);
+ kcm->tx_wait = true;
+ }
+
+ spin_unlock_bh(&mux->lock);
+
+ return psock;
+}
+
+/* mux lock held */
+static void psock_now_avail(struct kcm_psock *psock)
+{
+ struct kcm_mux *mux = psock->mux;
+ struct kcm_sock *kcm;
+
+ if (list_empty(&mux->kcm_tx_waiters)) {
+ list_add_tail(&psock->psock_avail_list,
+ &mux->psocks_avail);
+ } else {
+ kcm = list_first_entry(&mux->kcm_tx_waiters,
+ struct kcm_sock,
+ wait_psock_list);
+ list_del(&kcm->wait_psock_list);
+ kcm->tx_wait = false;
+ psock->tx_kcm = kcm;
+
+ /* Commit before changing tx_psock since that is read in
+ * reserve_psock before queuing work.
+ */
+ smp_mb();
+
+ kcm->tx_psock = psock;
+ KCM_STATS_INCR(psock->stats.reserved);
+ queue_work(kcm_wq, &kcm->tx_work);
+ }
+}
+
+/* kcm sock is locked. */
+static void unreserve_psock(struct kcm_sock *kcm)
+{
+ struct kcm_psock *psock;
+ struct kcm_mux *mux = kcm->mux;
+
+ spin_lock_bh(&mux->lock);
+
+ psock = kcm->tx_psock;
+
+ if (WARN_ON(!psock)) {
+ spin_unlock_bh(&mux->lock);
+ return;
+ }
+
+ smp_rmb(); /* Read tx_psock before tx_wait */
+
+ kcm_update_tx_mux_stats(mux, psock);
+
+ WARN_ON(kcm->tx_wait);
+
+ kcm->tx_psock = NULL;
+ psock->tx_kcm = NULL;
+ KCM_STATS_INCR(psock->stats.unreserved);
+
+ if (unlikely(psock->tx_stopped)) {
+ if (psock->done) {
+ /* Deferred free */
+ list_del(&psock->psock_list);
+ mux->psocks_cnt--;
+ sock_put(psock->sk);
+ fput(psock->sk->sk_socket->file);
+ kmem_cache_free(kcm_psockp, psock);
+ }
+
+ /* Don't put back on available list */
+
+ spin_unlock_bh(&mux->lock);
+
+ return;
+ }
+
+ psock_now_avail(psock);
+
+ spin_unlock_bh(&mux->lock);
+}
+
+static void kcm_report_tx_retry(struct kcm_sock *kcm)
+{
+ struct kcm_mux *mux = kcm->mux;
+
+ spin_lock_bh(&mux->lock);
+ KCM_STATS_INCR(mux->stats.tx_retries);
+ spin_unlock_bh(&mux->lock);
+}
+
+/* Write any messages ready on the kcm socket. Called with kcm sock lock
+ * held. Return bytes actually sent or error.
+ */
+static int kcm_write_msgs(struct kcm_sock *kcm)
+{
+ struct sock *sk = &kcm->sk;
+ struct kcm_psock *psock;
+ struct sk_buff *skb, *head;
+ struct kcm_tx_msg *txm;
+ unsigned short fragidx, frag_offset;
+ unsigned int sent, total_sent = 0;
+ int ret = 0;
+
+ kcm->tx_wait_more = false;
+ psock = kcm->tx_psock;
+ if (unlikely(psock && psock->tx_stopped)) {
+ /* A reserved psock was aborted asynchronously. Unreserve
+ * it and we'll retry the message.
+ */
+ unreserve_psock(kcm);
+ kcm_report_tx_retry(kcm);
+ if (skb_queue_empty(&sk->sk_write_queue))
+ return 0;
+
+ kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0;
+
+ } else if (skb_queue_empty(&sk->sk_write_queue)) {
+ return 0;
+ }
+
+ head = skb_peek(&sk->sk_write_queue);
+ txm = kcm_tx_msg(head);
+
+ if (txm->sent) {
+ /* Send of first skbuff in queue already in progress */
+ if (WARN_ON(!psock)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ sent = txm->sent;
+ frag_offset = txm->frag_offset;
+ fragidx = txm->fragidx;
+ skb = txm->frag_skb;
+
+ goto do_frag;
+ }
+
+try_again:
+ psock = reserve_psock(kcm);
+ if (!psock)
+ goto out;
+
+ do {
+ skb = head;
+ txm = kcm_tx_msg(head);
+ sent = 0;
+
+do_frag_list:
+ if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags;
+ fragidx++) {
+ skb_frag_t *frag;
+
+ frag_offset = 0;
+do_frag:
+ frag = &skb_shinfo(skb)->frags[fragidx];
+ if (WARN_ON(!frag->size)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = kernel_sendpage(psock->sk->sk_socket,
+ frag->page.p,
+ frag->page_offset + frag_offset,
+ frag->size - frag_offset,
+ MSG_DONTWAIT);
+ if (ret <= 0) {
+ if (ret == -EAGAIN) {
+ /* Save state to try again when there's
+ * write space on the socket
+ */
+ txm->sent = sent;
+ txm->frag_offset = frag_offset;
+ txm->fragidx = fragidx;
+ txm->frag_skb = skb;
+
+ ret = 0;
+ goto out;
+ }
+
+ /* Hard failure in sending message, abort this
+ * psock since it has lost framing
+ * synchonization and retry sending the
+ * message from the beginning.
+ */
+ kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
+ true);
+ unreserve_psock(kcm);
+
+ txm->sent = 0;
+ kcm_report_tx_retry(kcm);
+ ret = 0;
+
+ goto try_again;
+ }
+
+ sent += ret;
+ frag_offset += ret;
+ KCM_STATS_ADD(psock->stats.tx_bytes, ret);
+ if (frag_offset < frag->size) {
+ /* Not finished with this frag */
+ goto do_frag;
+ }
+ }
+
+ if (skb == head) {
+ if (skb_has_frag_list(skb)) {
+ skb = skb_shinfo(skb)->frag_list;
+ goto do_frag_list;
+ }
+ } else if (skb->next) {
+ skb = skb->next;
+ goto do_frag_list;
+ }
+
+ /* Successfully sent the whole packet, account for it. */
+ skb_dequeue(&sk->sk_write_queue);
+ kfree_skb(head);
+ sk->sk_wmem_queued -= sent;
+ total_sent += sent;
+ KCM_STATS_INCR(psock->stats.tx_msgs);
+ } while ((head = skb_peek(&sk->sk_write_queue)));
+out:
+ if (!head) {
+ /* Done with all queued messages. */
+ WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
+ unreserve_psock(kcm);
+ }
+
+ /* Check if write space is available */
+ sk->sk_write_space(sk);
+
+ return total_sent ? : ret;
+}
+
+static void kcm_tx_work(struct work_struct *w)
+{
+ struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work);
+ struct sock *sk = &kcm->sk;
+ int err;
+
+ lock_sock(sk);
+
+ /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx
+ * aborts
+ */
+ err = kcm_write_msgs(kcm);
+ if (err < 0) {
+ /* Hard failure in write, report error on KCM socket */
+ pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err);
+ report_csk_error(&kcm->sk, -err);
+ goto out;
+ }
+
+ /* Primarily for SOCK_SEQPACKET sockets */
+ if (likely(sk->sk_socket) &&
+ test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
+ clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ sk->sk_write_space(sk);
+ }
+
+out:
+ release_sock(sk);
+}
+
+static void kcm_push(struct kcm_sock *kcm)
+{
+ if (kcm->tx_wait_more)
+ kcm_write_msgs(kcm);
+}
+
+static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags)
+
+{
+ struct sock *sk = sock->sk;
+ struct kcm_sock *kcm = kcm_sk(sk);
+ struct sk_buff *skb = NULL, *head = NULL;
+ long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+ bool eor;
+ int err = 0;
+ int i;
+
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ flags |= MSG_MORE;
+
+ /* No MSG_EOR from splice, only look at MSG_MORE */
+ eor = !(flags & MSG_MORE);
+
+ lock_sock(sk);
+
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+ err = -EPIPE;
+ if (sk->sk_err)
+ goto out_error;
+
+ if (kcm->seq_skb) {
+ /* Previously opened message */
+ head = kcm->seq_skb;
+ skb = kcm_tx_msg(head)->last_skb;
+ i = skb_shinfo(skb)->nr_frags;
+
+ if (skb_can_coalesce(skb, i, page, offset)) {
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
+ skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+ goto coalesced;
+ }
+
+ if (i >= MAX_SKB_FRAGS) {
+ struct sk_buff *tskb;
+
+ tskb = alloc_skb(0, sk->sk_allocation);
+ while (!tskb) {
+ kcm_push(kcm);
+ err = sk_stream_wait_memory(sk, &timeo);
+ if (err)
+ goto out_error;
+ }
+
+ if (head == skb)
+ skb_shinfo(head)->frag_list = tskb;
+ else
+ skb->next = tskb;
+
+ skb = tskb;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ i = 0;
+ }
+ } else {
+ /* Call the sk_stream functions to manage the sndbuf mem. */
+ if (!sk_stream_memory_free(sk)) {
+ kcm_push(kcm);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ err = sk_stream_wait_memory(sk, &timeo);
+ if (err)
+ goto out_error;
+ }
+
+ head = alloc_skb(0, sk->sk_allocation);
+ while (!head) {
+ kcm_push(kcm);
+ err = sk_stream_wait_memory(sk, &timeo);
+ if (err)
+ goto out_error;
+ }
+
+ skb = head;
+ i = 0;
+ }
+
+ get_page(page);
+ skb_fill_page_desc(skb, i, page, offset, size);
+ skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
+
+coalesced:
+ skb->len += size;
+ skb->data_len += size;
+ skb->truesize += size;
+ sk->sk_wmem_queued += size;
+ sk_mem_charge(sk, size);
+
+ if (head != skb) {
+ head->len += size;
+ head->data_len += size;
+ head->truesize += size;
+ }
+
+ if (eor) {
+ bool not_busy = skb_queue_empty(&sk->sk_write_queue);
+
+ /* Message complete, queue it on send buffer */
+ __skb_queue_tail(&sk->sk_write_queue, head);
+ kcm->seq_skb = NULL;
+ KCM_STATS_INCR(kcm->stats.tx_msgs);
+
+ if (flags & MSG_BATCH) {
+ kcm->tx_wait_more = true;
+ } else if (kcm->tx_wait_more || not_busy) {
+ err = kcm_write_msgs(kcm);
+ if (err < 0) {
+ /* We got a hard error in write_msgs but have
+ * already queued this message. Report an error
+ * in the socket, but don't affect return value
+ * from sendmsg
+ */
+ pr_warn("KCM: Hard failure on kcm_write_msgs\n");
+ report_csk_error(&kcm->sk, -err);
+ }
+ }
+ } else {
+ /* Message not complete, save state */
+ kcm->seq_skb = head;
+ kcm_tx_msg(head)->last_skb = skb;
+ }
+
+ KCM_STATS_ADD(kcm->stats.tx_bytes, size);
+
+ release_sock(sk);
+ return size;
+
+out_error:
+ kcm_push(kcm);
+
+ err = sk_stream_error(sk, flags, err);
+
+ /* make sure we wake any epoll edge trigger waiter */
+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+ sk->sk_write_space(sk);
+
+ release_sock(sk);
+ return err;
+}
+
+static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+{
+ struct sock *sk = sock->sk;
+ struct kcm_sock *kcm = kcm_sk(sk);
+ struct sk_buff *skb = NULL, *head = NULL;
+ size_t copy, copied = 0;
+ long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ int eor = (sock->type == SOCK_DGRAM) ?
+ !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR);
+ int err = -EPIPE;
+
+ lock_sock(sk);
+
+ /* Per tcp_sendmsg this should be in poll */
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+ if (sk->sk_err)
+ goto out_error;
+
+ if (kcm->seq_skb) {
+ /* Previously opened message */
+ head = kcm->seq_skb;
+ skb = kcm_tx_msg(head)->last_skb;
+ goto start;
+ }
+
+ /* Call the sk_stream functions to manage the sndbuf mem. */
+ if (!sk_stream_memory_free(sk)) {
+ kcm_push(kcm);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ err = sk_stream_wait_memory(sk, &timeo);
+ if (err)
+ goto out_error;
+ }
+
+ /* New message, alloc head skb */
+ head = alloc_skb(0, sk->sk_allocation);
+ while (!head) {
+ kcm_push(kcm);
+ err = sk_stream_wait_memory(sk, &timeo);
+ if (err)
+ goto out_error;
+
+ head = alloc_skb(0, sk->sk_allocation);
+ }
+
+ skb = head;
+
+ /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
+ * csum_and_copy_from_iter from skb_do_copy_data_nocache.
+ */
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+start:
+ while (msg_data_left(msg)) {
+ bool merge = true;
+ int i = skb_shinfo(skb)->nr_frags;
+ struct page_frag *pfrag = sk_page_frag(sk);
+
+ if (!sk_page_frag_refill(sk, pfrag))
+ goto wait_for_memory;
+
+ if (!skb_can_coalesce(skb, i, pfrag->page,
+ pfrag->offset)) {
+ if (i == MAX_SKB_FRAGS) {
+ struct sk_buff *tskb;
+
+ tskb = alloc_skb(0, sk->sk_allocation);
+ if (!tskb)
+ goto wait_for_memory;
+
+ if (head == skb)
+ skb_shinfo(head)->frag_list = tskb;
+ else
+ skb->next = tskb;
+
+ skb = tskb;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ continue;
+ }
+ merge = false;
+ }
+
+ copy = min_t(int, msg_data_left(msg),
+ pfrag->size - pfrag->offset);
+
+ if (!sk_wmem_schedule(sk, copy))
+ goto wait_for_memory;
+
+ err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
+ pfrag->page,
+ pfrag->offset,
+ copy);
+ if (err)
+ goto out_error;
+
+ /* Update the skb. */
+ if (merge) {
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+ } else {
+ skb_fill_page_desc(skb, i, pfrag->page,
+ pfrag->offset, copy);
+ get_page(pfrag->page);
+ }
+
+ pfrag->offset += copy;
+ copied += copy;
+ if (head != skb) {
+ head->len += copy;
+ head->data_len += copy;
+ }
+
+ continue;
+
+wait_for_memory:
+ kcm_push(kcm);
+ err = sk_stream_wait_memory(sk, &timeo);
+ if (err)
+ goto out_error;
+ }
+
+ if (eor) {
+ bool not_busy = skb_queue_empty(&sk->sk_write_queue);
+
+ /* Message complete, queue it on send buffer */
+ __skb_queue_tail(&sk->sk_write_queue, head);
+ kcm->seq_skb = NULL;
+ KCM_STATS_INCR(kcm->stats.tx_msgs);
+
+ if (msg->msg_flags & MSG_BATCH) {
+ kcm->tx_wait_more = true;
+ } else if (kcm->tx_wait_more || not_busy) {
+ err = kcm_write_msgs(kcm);
+ if (err < 0) {
+ /* We got a hard error in write_msgs but have
+ * already queued this message. Report an error
+ * in the socket, but don't affect return value
+ * from sendmsg
+ */
+ pr_warn("KCM: Hard failure on kcm_write_msgs\n");
+ report_csk_error(&kcm->sk, -err);
+ }
+ }
+ } else {
+ /* Message not complete, save state */
+partial_message:
+ kcm->seq_skb = head;
+ kcm_tx_msg(head)->last_skb = skb;
+ }
+
+ KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
+
+ release_sock(sk);
+ return copied;
+
+out_error:
+ kcm_push(kcm);
+
+ if (copied && sock->type == SOCK_SEQPACKET) {
+ /* Wrote some bytes before encountering an
+ * error, return partial success.
+ */
+ goto partial_message;
+ }
+
+ if (head != kcm->seq_skb)
+ kfree_skb(head);
+
+ err = sk_stream_error(sk, msg->msg_flags, err);
+
+ /* make sure we wake any epoll edge trigger waiter */
+ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
+ sk->sk_write_space(sk);
+
+ release_sock(sk);
+ return err;
+}
+
+static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
+ long timeo, int *err)
+{
+ struct sk_buff *skb;
+
+ while (!(skb = skb_peek(&sk->sk_receive_queue))) {
+ if (sk->sk_err) {
+ *err = sock_error(sk);
+ return NULL;
+ }
+
+ if (sock_flag(sk, SOCK_DONE))
+ return NULL;
+
+ if ((flags & MSG_DONTWAIT) || !timeo) {
+ *err = -EAGAIN;
+ return NULL;
+ }
+
+ sk_wait_data(sk, &timeo, NULL);
+
+ /* Handle signals */
+ if (signal_pending(current)) {
+ *err = sock_intr_errno(timeo);
+ return NULL;
+ }
+ }
+
+ return skb;
+}
+
+static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
+ size_t len, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct kcm_sock *kcm = kcm_sk(sk);
+ int err = 0;
+ long timeo;
+ struct kcm_rx_msg *rxm;
+ int copied = 0;
+ struct sk_buff *skb;
+
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+ lock_sock(sk);
+
+ skb = kcm_wait_data(sk, flags, timeo, &err);
+ if (!skb)
+ goto out;
+
+ /* Okay, have a message on the receive queue */
+
+ rxm = kcm_rx_msg(skb);
+
+ if (len > rxm->full_len)
+ len = rxm->full_len;
+
+ err = skb_copy_datagram_msg(skb, rxm->offset, msg, len);
+ if (err < 0)
+ goto out;
+
+ copied = len;
+ if (likely(!(flags & MSG_PEEK))) {
+ KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
+ if (copied < rxm->full_len) {
+ if (sock->type == SOCK_DGRAM) {
+ /* Truncated message */
+ msg->msg_flags |= MSG_TRUNC;
+ goto msg_finished;
+ }
+ rxm->offset += copied;
+ rxm->full_len -= copied;
+ } else {
+msg_finished:
+ /* Finished with message */
+ msg->msg_flags |= MSG_EOR;
+ KCM_STATS_INCR(kcm->stats.rx_msgs);
+ skb_unlink(skb, &sk->sk_receive_queue);
+ kfree_skb(skb);
+ }
+ }
+
+out:
+ release_sock(sk);
+
+ return copied ? : err;
+}
+
+static ssize_t kcm_sock_splice(struct sock *sk,
+ struct pipe_inode_info *pipe,
+ struct splice_pipe_desc *spd)
+{
+ int ret;
+
+ release_sock(sk);
+ ret = splice_to_pipe(pipe, spd);
+ lock_sock(sk);
+
+ return ret;
+}
+
+static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct sock *sk = sock->sk;
+ struct kcm_sock *kcm = kcm_sk(sk);
+ long timeo;
+ struct kcm_rx_msg *rxm;
+ int err = 0;
+ size_t copied;
+ struct sk_buff *skb;
+
+ /* Only support splice for SOCKSEQPACKET */
+
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+
+ lock_sock(sk);
+
+ skb = kcm_wait_data(sk, flags, timeo, &err);
+ if (!skb)
+ goto err_out;
+
+ /* Okay, have a message on the receive queue */
+
+ rxm = kcm_rx_msg(skb);
+
+ if (len > rxm->full_len)
+ len = rxm->full_len;
+
+ copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags,
+ kcm_sock_splice);
+ if (copied < 0) {
+ err = copied;
+ goto err_out;
+ }
+
+ KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
+
+ rxm->offset += copied;
+ rxm->full_len -= copied;
+
+ /* We have no way to return MSG_EOR. If all the bytes have been
+ * read we still leave the message in the receive socket buffer.
+ * A subsequent recvmsg needs to be done to return MSG_EOR and
+ * finish reading the message.
+ */
+
+ release_sock(sk);
+
+ return copied;
+
+err_out:
+ release_sock(sk);
+
+ return err;
+}
+
+/* kcm sock lock held */
+static void kcm_recv_disable(struct kcm_sock *kcm)
+{
+ struct kcm_mux *mux = kcm->mux;
+
+ if (kcm->rx_disabled)
+ return;
+
+ spin_lock_bh(&mux->rx_lock);
+
+ kcm->rx_disabled = 1;
+
+ /* If a psock is reserved we'll do cleanup in unreserve */
+ if (!kcm->rx_psock) {
+ if (kcm->rx_wait) {
+ list_del(&kcm->wait_rx_list);
+ kcm->rx_wait = false;
+ }
+
+ requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
+ }
+
+ spin_unlock_bh(&mux->rx_lock);
+}
+
+/* kcm sock lock held */
+static void kcm_recv_enable(struct kcm_sock *kcm)
+{
+ struct kcm_mux *mux = kcm->mux;
+
+ if (!kcm->rx_disabled)
+ return;
+
+ spin_lock_bh(&mux->rx_lock);
+
+ kcm->rx_disabled = 0;
+ kcm_rcv_ready(kcm);
+
+ spin_unlock_bh(&mux->rx_lock);
+}
+
+static int kcm_setsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, unsigned int optlen)
+{
+ struct kcm_sock *kcm = kcm_sk(sock->sk);
+ int val, valbool;
+ int err = 0;
+
+ if (level != SOL_KCM)
+ return -ENOPROTOOPT;
+
+ if (optlen < sizeof(int))
+ return -EINVAL;
+
+ if (get_user(val, (int __user *)optval))
+ return -EINVAL;
+
+ valbool = val ? 1 : 0;
+
+ switch (optname) {
+ case KCM_RECV_DISABLE:
+ lock_sock(&kcm->sk);
+ if (valbool)
+ kcm_recv_disable(kcm);
+ else
+ kcm_recv_enable(kcm);
+ release_sock(&kcm->sk);
+ break;
+ default:
+ err = -ENOPROTOOPT;
+ }
+
+ return err;
+}
+
+static int kcm_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct kcm_sock *kcm = kcm_sk(sock->sk);
+ int val, len;
+
+ if (level != SOL_KCM)
+ return -ENOPROTOOPT;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ len = min_t(unsigned int, len, sizeof(int));
+ if (len < 0)
+ return -EINVAL;
+
+ switch (optname) {
+ case KCM_RECV_DISABLE:
+ val = kcm->rx_disabled;
+ break;
+ default:
+ return -ENOPROTOOPT;
+ }
+
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, &val, len))
+ return -EFAULT;
+ return 0;
+}
+
+static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
+{
+ struct kcm_sock *tkcm;
+ struct list_head *head;
+ int index = 0;
+
+ /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
+ * we set sk_state, otherwise epoll_wait always returns right away with
+ * POLLHUP
+ */
+ kcm->sk.sk_state = TCP_ESTABLISHED;
+
+ /* Add to mux's kcm sockets list */
+ kcm->mux = mux;
+ spin_lock_bh(&mux->lock);
+
+ head = &mux->kcm_socks;
+ list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) {
+ if (tkcm->index != index)
+ break;
+ head = &tkcm->kcm_sock_list;
+ index++;
+ }
+
+ list_add(&kcm->kcm_sock_list, head);
+ kcm->index = index;
+
+ mux->kcm_socks_cnt++;
+ spin_unlock_bh(&mux->lock);
+
+ INIT_WORK(&kcm->tx_work, kcm_tx_work);
+
+ spin_lock_bh(&mux->rx_lock);
+ kcm_rcv_ready(kcm);
+ spin_unlock_bh(&mux->rx_lock);
+}
+
+static void kcm_rx_msg_timeout(unsigned long arg)
+{
+ struct kcm_psock *psock = (struct kcm_psock *)arg;
+
+ /* Message assembly timed out */
+ KCM_STATS_INCR(psock->stats.rx_msg_timeouts);
+ kcm_abort_rx_psock(psock, ETIMEDOUT, NULL);
+}
+
+static int kcm_attach(struct socket *sock, struct socket *csock,
+ struct bpf_prog *prog)
+{
+ struct kcm_sock *kcm = kcm_sk(sock->sk);
+ struct kcm_mux *mux = kcm->mux;
+ struct sock *csk;
+ struct kcm_psock *psock = NULL, *tpsock;
+ struct list_head *head;
+ int index = 0;
+
+ if (csock->ops->family != PF_INET &&
+ csock->ops->family != PF_INET6)
+ return -EINVAL;
+
+ csk = csock->sk;
+ if (!csk)
+ return -EINVAL;
+
+ /* Only support TCP for now */
+ if (csk->sk_protocol != IPPROTO_TCP)
+ return -EINVAL;
+
+ psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
+ if (!psock)
+ return -ENOMEM;
+
+ psock->mux = mux;
+ psock->sk = csk;
+ psock->bpf_prog = prog;
+
+ setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout,
+ (unsigned long)psock);
+
+ INIT_WORK(&psock->rx_work, psock_rx_work);
+ INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work);
+
+ sock_hold(csk);
+
+ write_lock_bh(&csk->sk_callback_lock);
+ psock->save_data_ready = csk->sk_data_ready;
+ psock->save_write_space = csk->sk_write_space;
+ psock->save_state_change = csk->sk_state_change;
+ csk->sk_user_data = psock;
+ csk->sk_data_ready = psock_tcp_data_ready;
+ csk->sk_write_space = psock_tcp_write_space;
+ csk->sk_state_change = psock_tcp_state_change;
+ write_unlock_bh(&csk->sk_callback_lock);
+
+ /* Finished initialization, now add the psock to the MUX. */
+ spin_lock_bh(&mux->lock);
+ head = &mux->psocks;
+ list_for_each_entry(tpsock, &mux->psocks, psock_list) {
+ if (tpsock->index != index)
+ break;
+ head = &tpsock->psock_list;
+ index++;
+ }
+
+ list_add(&psock->psock_list, head);
+ psock->index = index;
+
+ KCM_STATS_INCR(mux->stats.psock_attach);
+ mux->psocks_cnt++;
+ psock_now_avail(psock);
+ spin_unlock_bh(&mux->lock);
+
+ /* Schedule RX work in case there are already bytes queued */
+ queue_work(kcm_wq, &psock->rx_work);
+
+ return 0;
+}
+
+static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
+{
+ struct socket *csock;
+ struct bpf_prog *prog;
+ int err;
+
+ csock = sockfd_lookup(info->fd, &err);
+ if (!csock)
+ return -ENOENT;
+
+ prog = bpf_prog_get(info->bpf_fd);
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto out;
+ }
+
+ if (prog->type != BPF_PROG_TYPE_SOCKET_FILTER) {
+ bpf_prog_put(prog);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = kcm_attach(sock, csock, prog);
+ if (err) {
+ bpf_prog_put(prog);
+ goto out;
+ }
+
+ /* Keep reference on file also */
+
+ return 0;
+out:
+ fput(csock->file);
+ return err;
+}
+
+static void kcm_unattach(struct kcm_psock *psock)
+{
+ struct sock *csk = psock->sk;
+ struct kcm_mux *mux = psock->mux;
+
+ /* Stop getting callbacks from TCP socket. After this there should
+ * be no way to reserve a kcm for this psock.
+ */
+ write_lock_bh(&csk->sk_callback_lock);
+ csk->sk_user_data = NULL;
+ csk->sk_data_ready = psock->save_data_ready;
+ csk->sk_write_space = psock->save_write_space;
+ csk->sk_state_change = psock->save_state_change;
+ psock->rx_stopped = 1;
+
+ if (WARN_ON(psock->rx_kcm)) {
+ write_unlock_bh(&csk->sk_callback_lock);
+ return;
+ }
+
+ spin_lock_bh(&mux->rx_lock);
+
+ /* Stop receiver activities. After this point psock should not be
+ * able to get onto ready list either through callbacks or work.
+ */
+ if (psock->ready_rx_msg) {
+ list_del(&psock->psock_ready_list);
+ kfree_skb(psock->ready_rx_msg);
+ psock->ready_rx_msg = NULL;
+ KCM_STATS_INCR(mux->stats.rx_ready_drops);
+ }
+
+ spin_unlock_bh(&mux->rx_lock);
+
+ write_unlock_bh(&csk->sk_callback_lock);
+
+ del_timer_sync(&psock->rx_msg_timer);
+ cancel_work_sync(&psock->rx_work);
+ cancel_delayed_work_sync(&psock->rx_delayed_work);
+
+ bpf_prog_put(psock->bpf_prog);
+
+ kfree_skb(psock->rx_skb_head);
+ psock->rx_skb_head = NULL;
+
+ spin_lock_bh(&mux->lock);
+
+ aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
+
+ KCM_STATS_INCR(mux->stats.psock_unattach);
+
+ if (psock->tx_kcm) {
+ /* psock was reserved. Just mark it finished and we will clean
+ * up in the kcm paths, we need kcm lock which can not be
+ * acquired here.
+ */
+ KCM_STATS_INCR(mux->stats.psock_unattach_rsvd);
+ spin_unlock_bh(&mux->lock);
+
+ /* We are unattaching a socket that is reserved. Abort the
+ * socket since we may be out of sync in sending on it. We need
+ * to do this without the mux lock.
+ */
+ kcm_abort_tx_psock(psock, EPIPE, false);
+
+ spin_lock_bh(&mux->lock);
+ if (!psock->tx_kcm) {
+ /* psock now unreserved in window mux was unlocked */
+ goto no_reserved;
+ }
+ psock->done = 1;
+
+ /* Commit done before queuing work to process it */
+ smp_mb();
+
+ /* Queue tx work to make sure psock->done is handled */
+ queue_work(kcm_wq, &psock->tx_kcm->tx_work);
+ spin_unlock_bh(&mux->lock);
+ } else {
+no_reserved:
+ if (!psock->tx_stopped)
+ list_del(&psock->psock_avail_list);
+ list_del(&psock->psock_list);
+ mux->psocks_cnt--;
+ spin_unlock_bh(&mux->lock);
+
+ sock_put(csk);
+ fput(csk->sk_socket->file);
+ kmem_cache_free(kcm_psockp, psock);
+ }
+}
+
+static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
+{
+ struct kcm_sock *kcm = kcm_sk(sock->sk);
+ struct kcm_mux *mux = kcm->mux;
+ struct kcm_psock *psock;
+ struct socket *csock;
+ struct sock *csk;
+ int err;
+
+ csock = sockfd_lookup(info->fd, &err);
+ if (!csock)
+ return -ENOENT;
+
+ csk = csock->sk;
+ if (!csk) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = -ENOENT;
+
+ spin_lock_bh(&mux->lock);
+
+ list_for_each_entry(psock, &mux->psocks, psock_list) {
+ if (psock->sk != csk)
+ continue;
+
+ /* Found the matching psock */
+
+ if (psock->unattaching || WARN_ON(psock->done)) {
+ err = -EALREADY;
+ break;
+ }
+
+ psock->unattaching = 1;
+
+ spin_unlock_bh(&mux->lock);
+
+ kcm_unattach(psock);
+
+ err = 0;
+ goto out;
+ }
+
+ spin_unlock_bh(&mux->lock);
+
+out:
+ fput(csock->file);
+ return err;
+}
+
+static struct proto kcm_proto = {
+ .name = "KCM",
+ .owner = THIS_MODULE,
+ .obj_size = sizeof(struct kcm_sock),
+};
+
+/* Clone a kcm socket. */
+static int kcm_clone(struct socket *osock, struct kcm_clone *info,
+ struct socket **newsockp)
+{
+ struct socket *newsock;
+ struct sock *newsk;
+ struct file *newfile;
+ int err, newfd;
+
+ err = -ENFILE;
+ newsock = sock_alloc();
+ if (!newsock)
+ goto out;
+
+ newsock->type = osock->type;
+ newsock->ops = osock->ops;
+
+ __module_get(newsock->ops->owner);
+
+ newfd = get_unused_fd_flags(0);
+ if (unlikely(newfd < 0)) {
+ err = newfd;
+ goto out_fd_fail;
+ }
+
+ newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
+ if (unlikely(IS_ERR(newfile))) {
+ err = PTR_ERR(newfile);
+ goto out_sock_alloc_fail;
+ }
+
+ newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
+ &kcm_proto, true);
+ if (!newsk) {
+ err = -ENOMEM;
+ goto out_sk_alloc_fail;
+ }
+
+ sock_init_data(newsock, newsk);
+ init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
+
+ fd_install(newfd, newfile);
+ *newsockp = newsock;
+ info->fd = newfd;
+
+ return 0;
+
+out_sk_alloc_fail:
+ fput(newfile);
+out_sock_alloc_fail:
+ put_unused_fd(newfd);
+out_fd_fail:
+ sock_release(newsock);
+out:
+ return err;
+}
+
+static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ int err;
+
+ switch (cmd) {
+ case SIOCKCMATTACH: {
+ struct kcm_attach info;
+
+ if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+ err = -EFAULT;
+
+ err = kcm_attach_ioctl(sock, &info);
+
+ break;
+ }
+ case SIOCKCMUNATTACH: {
+ struct kcm_unattach info;
+
+ if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+ err = -EFAULT;
+
+ err = kcm_unattach_ioctl(sock, &info);
+
+ break;
+ }
+ case SIOCKCMCLONE: {
+ struct kcm_clone info;
+ struct socket *newsock = NULL;
+
+ if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
+ err = -EFAULT;
+
+ err = kcm_clone(sock, &info, &newsock);
+
+ if (!err) {
+ if (copy_to_user((void __user *)arg, &info,
+ sizeof(info))) {
+ err = -EFAULT;
+ sock_release(newsock);
+ }
+ }
+
+ break;
+ }
+ default:
+ err = -ENOIOCTLCMD;
+ break;
+ }
+
+ return err;
+}
+
+static void free_mux(struct rcu_head *rcu)
+{
+ struct kcm_mux *mux = container_of(rcu,
+ struct kcm_mux, rcu);
+
+ kmem_cache_free(kcm_muxp, mux);
+}
+
+static void release_mux(struct kcm_mux *mux)
+{
+ struct kcm_net *knet = mux->knet;
+ struct kcm_psock *psock, *tmp_psock;
+
+ /* Release psocks */
+ list_for_each_entry_safe(psock, tmp_psock,
+ &mux->psocks, psock_list) {
+ if (!WARN_ON(psock->unattaching))
+ kcm_unattach(psock);
+ }
+
+ if (WARN_ON(mux->psocks_cnt))
+ return;
+
+ __skb_queue_purge(&mux->rx_hold_queue);
+
+ mutex_lock(&knet->mutex);
+ aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
+ aggregate_psock_stats(&mux->aggregate_psock_stats,
+ &knet->aggregate_psock_stats);
+ list_del_rcu(&mux->kcm_mux_list);
+ knet->count--;
+ mutex_unlock(&knet->mutex);
+
+ call_rcu(&mux->rcu, free_mux);
+}
+
+static void kcm_done(struct kcm_sock *kcm)
+{
+ struct kcm_mux *mux = kcm->mux;
+ struct sock *sk = &kcm->sk;
+ int socks_cnt;
+
+ spin_lock_bh(&mux->rx_lock);
+ if (kcm->rx_psock) {
+ /* Cleanup in unreserve_rx_kcm */
+ WARN_ON(kcm->done);
+ kcm->rx_disabled = 1;
+ kcm->done = 1;
+ spin_unlock_bh(&mux->rx_lock);
+ return;
+ }
+
+ if (kcm->rx_wait) {
+ list_del(&kcm->wait_rx_list);
+ kcm->rx_wait = false;
+ }
+ /* Move any pending receive messages to other kcm sockets */
+ requeue_rx_msgs(mux, &sk->sk_receive_queue);
+
+ spin_unlock_bh(&mux->rx_lock);
+
+ if (WARN_ON(sk_rmem_alloc_get(sk)))
+ return;
+
+ /* Detach from MUX */
+ spin_lock_bh(&mux->lock);
+
+ list_del(&kcm->kcm_sock_list);
+ mux->kcm_socks_cnt--;
+ socks_cnt = mux->kcm_socks_cnt;
+
+ spin_unlock_bh(&mux->lock);
+
+ if (!socks_cnt) {
+ /* We are done with the mux now. */
+ release_mux(mux);
+ }
+
+ WARN_ON(kcm->rx_wait);
+
+ sock_put(&kcm->sk);
+}
+
+/* Called by kcm_release to close a KCM socket.
+ * If this is the last KCM socket on the MUX, destroy the MUX.
+ */
+static int kcm_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct kcm_sock *kcm;
+ struct kcm_mux *mux;
+ struct kcm_psock *psock;
+
+ if (!sk)
+ return 0;
+
+ kcm = kcm_sk(sk);
+ mux = kcm->mux;
+
+ sock_orphan(sk);
+ kfree_skb(kcm->seq_skb);
+
+ lock_sock(sk);
+ /* Purge queue under lock to avoid race condition with tx_work trying
+ * to act when queue is nonempty. If tx_work runs after this point
+ * it will just return.
+ */
+ __skb_queue_purge(&sk->sk_write_queue);
+ release_sock(sk);
+
+ spin_lock_bh(&mux->lock);
+ if (kcm->tx_wait) {
+ /* Take of tx_wait list, after this point there should be no way
+ * that a psock will be assigned to this kcm.
+ */
+ list_del(&kcm->wait_psock_list);
+ kcm->tx_wait = false;
+ }
+ spin_unlock_bh(&mux->lock);
+
+ /* Cancel work. After this point there should be no outside references
+ * to the kcm socket.
+ */
+ cancel_work_sync(&kcm->tx_work);
+
+ lock_sock(sk);
+ psock = kcm->tx_psock;
+ if (psock) {
+ /* A psock was reserved, so we need to kill it since it
+ * may already have some bytes queued from a message. We
+ * need to do this after removing kcm from tx_wait list.
+ */
+ kcm_abort_tx_psock(psock, EPIPE, false);
+ unreserve_psock(kcm);
+ }
+ release_sock(sk);
+
+ WARN_ON(kcm->tx_wait);
+ WARN_ON(kcm->tx_psock);
+
+ sock->sk = NULL;
+
+ kcm_done(kcm);
+
+ return 0;
+}
+
+static const struct proto_ops kcm_dgram_ops = {
+ .family = PF_KCM,
+ .owner = THIS_MODULE,
+ .release = kcm_release,
+ .bind = sock_no_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = datagram_poll,
+ .ioctl = kcm_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = kcm_setsockopt,
+ .getsockopt = kcm_getsockopt,
+ .sendmsg = kcm_sendmsg,
+ .recvmsg = kcm_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = kcm_sendpage,
+};
+
+static const struct proto_ops kcm_seqpacket_ops = {
+ .family = PF_KCM,
+ .owner = THIS_MODULE,
+ .release = kcm_release,
+ .bind = sock_no_bind,
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .accept = sock_no_accept,
+ .getname = sock_no_getname,
+ .poll = datagram_poll,
+ .ioctl = kcm_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .setsockopt = kcm_setsockopt,
+ .getsockopt = kcm_getsockopt,
+ .sendmsg = kcm_sendmsg,
+ .recvmsg = kcm_recvmsg,
+ .mmap = sock_no_mmap,
+ .sendpage = kcm_sendpage,
+ .splice_read = kcm_splice_read,
+};
+
+/* Create proto operation for kcm sockets */
+static int kcm_create(struct net *net, struct socket *sock,
+ int protocol, int kern)
+{
+ struct kcm_net *knet = net_generic(net, kcm_net_id);
+ struct sock *sk;
+ struct kcm_mux *mux;
+
+ switch (sock->type) {
+ case SOCK_DGRAM:
+ sock->ops = &kcm_dgram_ops;
+ break;
+ case SOCK_SEQPACKET:
+ sock->ops = &kcm_seqpacket_ops;
+ break;
+ default:
+ return -ESOCKTNOSUPPORT;
+ }
+
+ if (protocol != KCMPROTO_CONNECTED)
+ return -EPROTONOSUPPORT;
+
+ sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern);
+ if (!sk)
+ return -ENOMEM;
+
+ /* Allocate a kcm mux, shared between KCM sockets */
+ mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL);
+ if (!mux) {
+ sk_free(sk);
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&mux->lock);
+ spin_lock_init(&mux->rx_lock);
+ INIT_LIST_HEAD(&mux->kcm_socks);
+ INIT_LIST_HEAD(&mux->kcm_rx_waiters);
+ INIT_LIST_HEAD(&mux->kcm_tx_waiters);
+
+ INIT_LIST_HEAD(&mux->psocks);
+ INIT_LIST_HEAD(&mux->psocks_ready);
+ INIT_LIST_HEAD(&mux->psocks_avail);
+
+ mux->knet = knet;
+
+ /* Add new MUX to list */
+ mutex_lock(&knet->mutex);
+ list_add_rcu(&mux->kcm_mux_list, &knet->mux_list);
+ knet->count++;
+ mutex_unlock(&knet->mutex);
+
+ skb_queue_head_init(&mux->rx_hold_queue);
+
+ /* Init KCM socket */
+ sock_init_data(sock, sk);
+ init_kcm_sock(kcm_sk(sk), mux);
+
+ return 0;
+}
+
+static struct net_proto_family kcm_family_ops = {
+ .family = PF_KCM,
+ .create = kcm_create,
+ .owner = THIS_MODULE,
+};
+
+static __net_init int kcm_init_net(struct net *net)
+{
+ struct kcm_net *knet = net_generic(net, kcm_net_id);
+
+ INIT_LIST_HEAD_RCU(&knet->mux_list);
+ mutex_init(&knet->mutex);
+
+ return 0;
+}
+
+static __net_exit void kcm_exit_net(struct net *net)
+{
+ struct kcm_net *knet = net_generic(net, kcm_net_id);
+
+ /* All KCM sockets should be closed at this point, which should mean
+ * that all multiplexors and psocks have been destroyed.
+ */
+ WARN_ON(!list_empty(&knet->mux_list));
+}
+
+static struct pernet_operations kcm_net_ops = {
+ .init = kcm_init_net,
+ .exit = kcm_exit_net,
+ .id = &kcm_net_id,
+ .size = sizeof(struct kcm_net),
+};
+
+static int __init kcm_init(void)
+{
+ int err = -ENOMEM;
+
+ kcm_muxp = kmem_cache_create("kcm_mux_cache",
+ sizeof(struct kcm_mux), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+ if (!kcm_muxp)
+ goto fail;
+
+ kcm_psockp = kmem_cache_create("kcm_psock_cache",
+ sizeof(struct kcm_psock), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+ if (!kcm_psockp)
+ goto fail;
+
+ kcm_wq = create_singlethread_workqueue("kkcmd");
+ if (!kcm_wq)
+ goto fail;
+
+ err = proto_register(&kcm_proto, 1);
+ if (err)
+ goto fail;
+
+ err = sock_register(&kcm_family_ops);
+ if (err)
+ goto sock_register_fail;
+
+ err = register_pernet_device(&kcm_net_ops);
+ if (err)
+ goto net_ops_fail;
+
+ err = kcm_proc_init();
+ if (err)
+ goto proc_init_fail;
+
+ return 0;
+
+proc_init_fail:
+ unregister_pernet_device(&kcm_net_ops);
+
+net_ops_fail:
+ sock_unregister(PF_KCM);
+
+sock_register_fail:
+ proto_unregister(&kcm_proto);
+
+fail:
+ kmem_cache_destroy(kcm_muxp);
+ kmem_cache_destroy(kcm_psockp);
+
+ if (kcm_wq)
+ destroy_workqueue(kcm_wq);
+
+ return err;
+}
+
+static void __exit kcm_exit(void)
+{
+ kcm_proc_exit();
+ unregister_pernet_device(&kcm_net_ops);
+ sock_unregister(PF_KCM);
+ proto_unregister(&kcm_proto);
+ destroy_workqueue(kcm_wq);
+
+ kmem_cache_destroy(kcm_muxp);
+ kmem_cache_destroy(kcm_psockp);
+}
+
+module_init(kcm_init);
+module_exit(kcm_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(PF_KCM);
+
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 1b8a5caa221e..3a8f881b22f1 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -327,7 +327,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
}
/* prepare A-MPDU MLME for Rx aggregation */
- tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL);
+ tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL);
if (!tid_agg_rx)
goto end;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 1630975c89f1..804575ff7af5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -92,7 +92,7 @@ struct ieee80211_fragment_entry {
u16 extra_len;
u16 last_frag;
u8 rx_queue;
- bool ccmp; /* Whether fragments were encrypted with CCMP */
+ bool check_sequential_pn; /* needed for CCMP/GCMP */
u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
};
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 3ece7d1034c8..b54f398cda5d 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -711,7 +711,7 @@ static u32 minstrel_get_expected_throughput(void *priv_sta)
* computing cur_tp
*/
tmp_mrs = &mi->r[idx].stats;
- tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma);
+ tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma) * 10;
tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024;
return tmp_cur_tp;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 3928dbd24e25..370d677b547b 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -414,15 +414,16 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
(max_tp_group != MINSTREL_CCK_GROUP))
return;
+ max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
+ max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
+ max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
+
if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) {
cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx,
mrs->prob_ewma);
if (cur_tp_avg > tmp_tp_avg)
mi->max_prob_rate = index;
- max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES;
- max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES;
- max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma;
max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group,
max_gpr_idx,
max_gpr_prob);
@@ -431,7 +432,7 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index)
} else {
if (mrs->prob_ewma > tmp_prob)
mi->max_prob_rate = index;
- if (mrs->prob_ewma > mg->rates[mg->max_group_prob_rate].prob_ewma)
+ if (mrs->prob_ewma > max_gpr_prob)
mg->max_group_prob_rate = index;
}
}
@@ -691,7 +692,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
if (likely(sta->ampdu_mlme.tid_tx[tid]))
return;
- ieee80211_start_tx_ba_session(pubsta, tid, 5000);
+ ieee80211_start_tx_ba_session(pubsta, tid, 0);
}
static void
@@ -871,7 +872,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
* - if station is in dynamic SMPS (and streams > 1)
* - for fallback rates, to increase chances of getting through
*/
- if (offset > 0 &&
+ if (offset > 0 ||
(mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC &&
group->streams > 1)) {
ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
@@ -1334,7 +1335,8 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta)
prob = mi->groups[i].rates[j].prob_ewma;
/* convert tp_avg from pkt per second in kbps */
- tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024;
+ tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * 10;
+ tp_avg = tp_avg * AVG_PKT_SIZE * 8 / 1024;
return tp_avg;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5690e4c67486..dc27becb9b71 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1777,7 +1777,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
entry->seq = seq;
entry->rx_queue = rx_queue;
entry->last_frag = frag;
- entry->ccmp = 0;
+ entry->check_sequential_pn = false;
entry->extra_len = 0;
return entry;
@@ -1873,15 +1873,27 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
rx->seqno_idx, &(rx->skb));
if (rx->key &&
(rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
- rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256) &&
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
ieee80211_has_protected(fc)) {
int queue = rx->security_idx;
- /* Store CCMP PN so that we can verify that the next
- * fragment has a sequential PN value. */
- entry->ccmp = 1;
+
+ /* Store CCMP/GCMP PN so that we can verify that the
+ * next fragment has a sequential PN value.
+ */
+ entry->check_sequential_pn = true;
memcpy(entry->last_pn,
rx->key->u.ccmp.rx_pn[queue],
IEEE80211_CCMP_PN_LEN);
+ BUILD_BUG_ON(offsetof(struct ieee80211_key,
+ u.ccmp.rx_pn) !=
+ offsetof(struct ieee80211_key,
+ u.gcmp.rx_pn));
+ BUILD_BUG_ON(sizeof(rx->key->u.ccmp.rx_pn[queue]) !=
+ sizeof(rx->key->u.gcmp.rx_pn[queue]));
+ BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
+ IEEE80211_GCMP_PN_LEN);
}
return RX_QUEUED;
}
@@ -1896,15 +1908,21 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
return RX_DROP_MONITOR;
}
- /* Verify that MPDUs within one MSDU have sequential PN values.
- * (IEEE 802.11i, 8.3.3.4.5) */
- if (entry->ccmp) {
+ /* "The receiver shall discard MSDUs and MMPDUs whose constituent
+ * MPDU PN values are not incrementing in steps of 1."
+ * see IEEE P802.11-REVmc/D5.0, 12.5.3.4.4, item d (for CCMP)
+ * and IEEE P802.11-REVmc/D5.0, 12.5.5.4.4, item d (for GCMP)
+ */
+ if (entry->check_sequential_pn) {
int i;
u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
int queue;
+
if (!rx->key ||
(rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256))
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
+ rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
return RX_DROP_UNUSABLE;
memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -3473,6 +3491,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
return false;
/* ignore action frames to TDLS-peers */
if (ieee80211_is_action(hdr->frame_control) &&
+ !is_broadcast_ether_addr(bssid) &&
!ether_addr_equal(bssid, hdr->addr1))
return false;
}
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 0328f7250693..299edc6add5a 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -605,17 +605,13 @@ static const struct file_operations ip_vs_app_fops = {
int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
{
- struct net *net = ipvs->net;
-
INIT_LIST_HEAD(&ipvs->app_list);
- proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops);
+ proc_create("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_fops);
return 0;
}
void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
{
- struct net *net = ipvs->net;
-
unregister_ip_vs_app(ipvs, NULL /* all */);
- remove_proc_entry("ip_vs_app", net->proc_net);
+ remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index e7c1b052c2a3..404b2a4f4b5b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1376,8 +1376,6 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
struct ip_vs_pe *old_pe;
struct netns_ipvs *ipvs = svc->ipvs;
- pr_info("%s: enter\n", __func__);
-
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
ipvs->num_services--;
@@ -3947,7 +3945,6 @@ static struct notifier_block ip_vs_dst_notifier = {
int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
{
- struct net *net = ipvs->net;
int i, idx;
/* Initialize rs_table */
@@ -3974,9 +3971,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
spin_lock_init(&ipvs->tot_stats.lock);
- proc_create("ip_vs", 0, net->proc_net, &ip_vs_info_fops);
- proc_create("ip_vs_stats", 0, net->proc_net, &ip_vs_stats_fops);
- proc_create("ip_vs_stats_percpu", 0, net->proc_net,
+ proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops);
+ proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops);
+ proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net,
&ip_vs_stats_percpu_fops);
if (ip_vs_control_net_init_sysctl(ipvs))
@@ -3991,13 +3988,11 @@ err:
void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
{
- struct net *net = ipvs->net;
-
ip_vs_trash_cleanup(ipvs);
ip_vs_control_net_cleanup_sysctl(ipvs);
- remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
- remove_proc_entry("ip_vs_stats", net->proc_net);
- remove_proc_entry("ip_vs", net->proc_net);
+ remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
+ remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
+ remove_proc_entry("ip_vs", ipvs->net->proc_net);
free_percpu(ipvs->tot_stats.cpustats);
}
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 9aea747b43ea..81b5ad6165ac 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -17,7 +17,9 @@
#include <net/netfilter/nft_masq.h>
const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
- [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
+ [NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
+ [NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 },
+ [NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(nft_masq_policy);
@@ -40,6 +42,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
+ u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
struct nft_masq *priv = nft_expr_priv(expr);
int err;
@@ -47,12 +50,32 @@ int nft_masq_init(const struct nft_ctx *ctx,
if (err)
return err;
- if (tb[NFTA_MASQ_FLAGS] == NULL)
- return 0;
-
- priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
- if (priv->flags & ~NF_NAT_RANGE_MASK)
- return -EINVAL;
+ if (tb[NFTA_MASQ_FLAGS]) {
+ priv->flags = ntohl(nla_get_be32(tb[NFTA_MASQ_FLAGS]));
+ if (priv->flags & ~NF_NAT_RANGE_MASK)
+ return -EINVAL;
+ }
+
+ if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
+ priv->sreg_proto_min =
+ nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]);
+
+ err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
+ priv->sreg_proto_max =
+ nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]);
+
+ err = nft_validate_register_load(priv->sreg_proto_max,
+ plen);
+ if (err < 0)
+ return err;
+ } else {
+ priv->sreg_proto_max = priv->sreg_proto_min;
+ }
+ }
return 0;
}
@@ -62,12 +85,18 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_masq *priv = nft_expr_priv(expr);
- if (priv->flags == 0)
- return 0;
-
- if (nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
+ if (priv->flags != 0 &&
+ nla_put_be32(skb, NFTA_MASQ_FLAGS, htonl(priv->flags)))
goto nla_put_failure;
+ if (priv->sreg_proto_min) {
+ if (nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MIN,
+ priv->sreg_proto_min) ||
+ nft_dump_register(skb, NFTA_MASQ_REG_PROTO_MAX,
+ priv->sreg_proto_max))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index fe885bf271c5..16c50b0dd426 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -28,6 +28,8 @@
#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
+static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
+
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -181,6 +183,11 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*dest = sock_cgroup_classid(&sk->sk_cgrp_data);
break;
#endif
+ case NFT_META_PRANDOM: {
+ struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
+ *dest = prandom_u32_state(state);
+ break;
+ }
default:
WARN_ON(1);
goto err;
@@ -277,6 +284,10 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
case NFT_META_OIFNAME:
len = IFNAMSIZ;
break;
+ case NFT_META_PRANDOM:
+ prandom_init_once(&nft_prandom_state);
+ len = sizeof(u32);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index c8a0b7da5ff4..d0cd2b9bf844 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -694,12 +694,45 @@ EXPORT_SYMBOL(xt_free_table_info);
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name)
{
- struct xt_table *t;
+ struct xt_table *t, *found = NULL;
mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &net->xt.tables[af], list)
if (strcmp(t->name, name) == 0 && try_module_get(t->me))
return t;
+
+ if (net == &init_net)
+ goto out;
+
+ /* Table doesn't exist in this netns, re-try init */
+ list_for_each_entry(t, &init_net.xt.tables[af], list) {
+ if (strcmp(t->name, name))
+ continue;
+ if (!try_module_get(t->me))
+ return NULL;
+
+ mutex_unlock(&xt[af].mutex);
+ if (t->table_init(net) != 0) {
+ module_put(t->me);
+ return NULL;
+ }
+
+ found = t;
+
+ mutex_lock(&xt[af].mutex);
+ break;
+ }
+
+ if (!found)
+ goto out;
+
+ /* and once again: */
+ list_for_each_entry(t, &net->xt.tables[af], list)
+ if (strcmp(t->name, name) == 0)
+ return t;
+
+ module_put(found->me);
+ out:
mutex_unlock(&xt[af].mutex);
return NULL;
}
@@ -1170,20 +1203,20 @@ static const struct file_operations xt_target_ops = {
#endif /* CONFIG_PROC_FS */
/**
- * xt_hook_link - set up hooks for a new table
+ * xt_hook_ops_alloc - set up hooks for a new table
* @table: table with metadata needed to set up hooks
* @fn: Hook function
*
- * This function will take care of creating and registering the necessary
- * Netfilter hooks for XT tables.
+ * This function will create the nf_hook_ops that the x_table needs
+ * to hand to xt_hook_link_net().
*/
-struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
+struct nf_hook_ops *
+xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn)
{
unsigned int hook_mask = table->valid_hooks;
uint8_t i, num_hooks = hweight32(hook_mask);
uint8_t hooknum;
struct nf_hook_ops *ops;
- int ret;
ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
if (ops == NULL)
@@ -1200,27 +1233,9 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
++i;
}
- ret = nf_register_hooks(ops, num_hooks);
- if (ret < 0) {
- kfree(ops);
- return ERR_PTR(ret);
- }
-
return ops;
}
-EXPORT_SYMBOL_GPL(xt_hook_link);
-
-/**
- * xt_hook_unlink - remove hooks for a table
- * @ops: nf_hook_ops array as returned by nf_hook_link
- * @hook_mask: the very same mask that was passed to nf_hook_link
- */
-void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops)
-{
- nf_unregister_hooks(ops, hweight32(table->valid_hooks));
- kfree(ops);
-}
-EXPORT_SYMBOL_GPL(xt_hook_unlink);
+EXPORT_SYMBOL_GPL(xt_hook_ops_alloc);
int xt_proto_init(struct net *net, u_int8_t af)
{
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4e3c3affd285..2455b69b5810 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -262,7 +262,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
if (f->opt[optnum].kind == (*optp)) {
__u32 len = f->opt[optnum].length;
const __u8 *optend = optp + len;
- int loop_cont = 0;
fmatch = FMATCH_OK;
@@ -275,7 +274,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
mss = ntohs((__force __be16)mss);
break;
case OSFOPT_TS:
- loop_cont = 1;
break;
}
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f0cb92f3ddaf..ada67422234b 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -55,8 +55,8 @@ struct netlbl_domhsh_tbl {
static DEFINE_SPINLOCK(netlbl_domhsh_lock);
#define netlbl_domhsh_rcu_deref(p) \
rcu_dereference_check(p, lockdep_is_held(&netlbl_domhsh_lock))
-static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL;
-static struct netlbl_dom_map *netlbl_domhsh_def = NULL;
+static struct netlbl_domhsh_tbl *netlbl_domhsh;
+static struct netlbl_dom_map *netlbl_domhsh_def;
/*
* Domain Hash Table Helper Functions
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index b0380927f05f..9eaa9a1e8629 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -116,11 +116,11 @@ struct netlbl_unlhsh_walk_arg {
static DEFINE_SPINLOCK(netlbl_unlhsh_lock);
#define netlbl_unlhsh_rcu_deref(p) \
rcu_dereference_check(p, lockdep_is_held(&netlbl_unlhsh_lock))
-static struct netlbl_unlhsh_tbl *netlbl_unlhsh = NULL;
-static struct netlbl_unlhsh_iface *netlbl_unlhsh_def = NULL;
+static struct netlbl_unlhsh_tbl *netlbl_unlhsh;
+static struct netlbl_unlhsh_iface *netlbl_unlhsh_def;
/* Accept unlabeled packets flag */
-static u8 netlabel_unlabel_acceptflg = 0;
+static u8 netlabel_unlabel_acceptflg;
/* NetLabel Generic NETLINK unlabeled family */
static struct genl_family netlbl_unlabel_gnl_family = {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d41b1074cb2d..1ecfa710ca98 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1915,6 +1915,10 @@ retry:
goto retry;
}
+ if (!dev_validate_header(dev, skb->data, len)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
!packet_extra_vlan_len_allowed(dev, skb)) {
err = -EMSGSIZE;
@@ -2393,18 +2397,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
sock_wfree(skb);
}
-static bool ll_header_truncated(const struct net_device *dev, int len)
-{
- /* net device doesn't like empty head */
- if (unlikely(len < dev->hard_header_len)) {
- net_warn_ratelimited("%s: packet size is too short (%d < %d)\n",
- current->comm, len, dev->hard_header_len);
- return true;
- }
-
- return false;
-}
-
static void tpacket_set_protocol(const struct net_device *dev,
struct sk_buff *skb)
{
@@ -2522,16 +2514,20 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
if (unlikely(err < 0))
return -EINVAL;
} else if (copylen) {
+ int hdrlen = min_t(int, copylen, tp_len);
+
skb_push(skb, dev->hard_header_len);
skb_put(skb, copylen - dev->hard_header_len);
- err = skb_store_bits(skb, 0, data, copylen);
+ err = skb_store_bits(skb, 0, data, hdrlen);
if (unlikely(err))
return err;
+ if (!dev_validate_header(dev, skb->data, hdrlen))
+ return -EINVAL;
if (!skb->protocol)
tpacket_set_protocol(dev, skb);
- data += copylen;
- to_write -= copylen;
+ data += hdrlen;
+ to_write -= hdrlen;
}
offset = offset_in_page(data);
@@ -2703,13 +2699,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
copylen = __virtio16_to_cpu(vio_le(),
vnet_hdr->hdr_len);
}
- if (dev->hard_header_len) {
- if (ll_header_truncated(dev, tp_len)) {
- tp_len = -EINVAL;
- goto tpacket_error;
- }
- copylen = max_t(int, copylen, dev->hard_header_len);
- }
+ copylen = max_t(int, copylen, dev->hard_header_len);
skb = sock_alloc_send_skb(&po->sk,
hlen + tlen + sizeof(struct sockaddr_ll) +
(copylen - dev->hard_header_len),
@@ -2905,9 +2895,6 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
if (unlikely(offset < 0))
goto out_free;
- } else {
- if (ll_header_truncated(dev, len))
- goto out_free;
}
/* Returns -EFAULT on error */
@@ -2915,6 +2902,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (err)
goto out_free;
+ if (sock->type == SOCK_RAW &&
+ !dev_validate_header(dev, skb->data, len)) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 6e7ec257790d..c589a9ba506a 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -108,7 +108,7 @@ EXPORT_SYMBOL_GPL(ife_get_meta_u16);
int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval)
{
- mi->metaval = kmemdup(&metaval, sizeof(u32), GFP_KERNEL);
+ mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL);
if (!mi->metaval)
return -ENOMEM;
@@ -118,7 +118,7 @@ EXPORT_SYMBOL_GPL(ife_alloc_meta_u32);
int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval)
{
- mi->metaval = kmemdup(&metaval, sizeof(u16), GFP_KERNEL);
+ mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL);
if (!mi->metaval)
return -ENOMEM;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 89c41a1f3589..350e134cffb3 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -66,6 +66,7 @@ static void ipt_destroy_target(struct xt_entry_target *t)
struct xt_tgdtor_param par = {
.target = t->u.kernel.target,
.targinfo = t->data,
+ .family = NFPROTO_IPV4,
};
if (par.target->destroy != NULL)
par.target->destroy(&par);
@@ -219,6 +220,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
par.hooknum = ipt->tcfi_hook;
par.target = ipt->tcfi_t->u.kernel.target;
par.targinfo = ipt->tcfi_t->data;
+ par.family = NFPROTO_IPV4;
ret = par.target->target(skb, &par);
switch (ret) {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 95b021243233..2181ffc76638 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -165,6 +165,51 @@ static void fl_destroy_filter(struct rcu_head *head)
kfree(f);
}
+static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie)
+{
+ struct net_device *dev = tp->q->dev_queue->dev;
+ struct tc_cls_flower_offload offload = {0};
+ struct tc_to_netdev tc;
+
+ if (!tc_should_offload(dev, 0))
+ return;
+
+ offload.command = TC_CLSFLOWER_DESTROY;
+ offload.cookie = cookie;
+
+ tc.type = TC_SETUP_CLSFLOWER;
+ tc.cls_flower = &offload;
+
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+}
+
+static void fl_hw_replace_filter(struct tcf_proto *tp,
+ struct flow_dissector *dissector,
+ struct fl_flow_key *mask,
+ struct fl_flow_key *key,
+ struct tcf_exts *actions,
+ unsigned long cookie, u32 flags)
+{
+ struct net_device *dev = tp->q->dev_queue->dev;
+ struct tc_cls_flower_offload offload = {0};
+ struct tc_to_netdev tc;
+
+ if (!tc_should_offload(dev, flags))
+ return;
+
+ offload.command = TC_CLSFLOWER_REPLACE;
+ offload.cookie = cookie;
+ offload.dissector = dissector;
+ offload.mask = mask;
+ offload.key = key;
+ offload.exts = actions;
+
+ tc.type = TC_SETUP_CLSFLOWER;
+ tc.cls_flower = &offload;
+
+ dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+}
+
static bool fl_destroy(struct tcf_proto *tp, bool force)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
@@ -174,6 +219,7 @@ static bool fl_destroy(struct tcf_proto *tp, bool force)
return false;
list_for_each_entry_safe(f, next, &head->filters, list) {
+ fl_hw_destroy_filter(tp, (unsigned long)f);
list_del_rcu(&f->list);
call_rcu(&f->rcu, fl_destroy_filter);
}
@@ -459,6 +505,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
struct cls_fl_filter *fnew;
struct nlattr *tb[TCA_FLOWER_MAX + 1];
struct fl_flow_mask mask = {};
+ u32 flags = 0;
int err;
if (!tca[TCA_OPTIONS])
@@ -486,6 +533,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
fnew->handle = handle;
+ if (tb[TCA_FLOWER_FLAGS])
+ flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
+
err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
if (err)
goto errout;
@@ -498,9 +548,20 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
head->ht_params);
if (err)
goto errout;
- if (fold)
+
+ fl_hw_replace_filter(tp,
+ &head->dissector,
+ &mask.key,
+ &fnew->key,
+ &fnew->exts,
+ (unsigned long)fnew,
+ flags);
+
+ if (fold) {
rhashtable_remove_fast(&head->ht, &fold->ht_node,
head->ht_params);
+ fl_hw_destroy_filter(tp, (unsigned long)fold);
+ }
*arg = (unsigned long) fnew;
@@ -527,6 +588,7 @@ static int fl_delete(struct tcf_proto *tp, unsigned long arg)
rhashtable_remove_fast(&head->ht, &f->ht_node,
head->ht_params);
list_del_rcu(&f->list);
+ fl_hw_destroy_filter(tp, (unsigned long)f);
tcf_unbind_filter(tp, &f->res);
call_rcu(&f->rcu, fl_destroy_filter);
return 0;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index d0dff0cd8186..34b4ddaca27c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -276,7 +276,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
- skb = p->q->ops->dequeue(p->q);
+ skb = qdisc_dequeue_peeked(p->q);
if (skb == NULL)
return NULL;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ec529121f38a..ce46f1c7f133 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -526,6 +526,8 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
}
return 0;
}
+ if (addr1->v6.sin6_port != addr2->v6.sin6_port)
+ return 0;
if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
return 0;
/* If this is a linklocal address, compare the scope_id. */
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index cfc3c7101a38..5cfac8d5d3b3 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -480,7 +480,7 @@ static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v)
static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
{
struct sctp_association *assoc;
- struct sctp_transport *tsp;
+ struct sctp_transport *transport, *tsp;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX "
@@ -488,10 +488,10 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
return 0;
}
- tsp = (struct sctp_transport *)v;
- if (!sctp_transport_hold(tsp))
+ transport = (struct sctp_transport *)v;
+ if (!sctp_transport_hold(transport))
return 0;
- assoc = tsp->asoc;
+ assoc = transport->asoc;
list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
transports) {
@@ -544,7 +544,7 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "\n");
}
- sctp_transport_put(tsp);
+ sctp_transport_put(transport);
return 0;
}
diff --git a/net/socket.c b/net/socket.c
index c044d1e8508c..886649c88d8f 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -533,7 +533,7 @@ static const struct inode_operations sockfs_inode_ops = {
* NULL is returned.
*/
-static struct socket *sock_alloc(void)
+struct socket *sock_alloc(void)
{
struct inode *inode;
struct socket *sock;
@@ -554,6 +554,7 @@ static struct socket *sock_alloc(void)
this_cpu_add(sockets_in_use, 1);
return sock;
}
+EXPORT_SYMBOL(sock_alloc);
/**
* sock_release - close a socket
@@ -1874,7 +1875,8 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags,
- struct used_address *used_address)
+ struct used_address *used_address,
+ unsigned int allowed_msghdr_flags)
{
struct compat_msghdr __user *msg_compat =
(struct compat_msghdr __user *)msg;
@@ -1900,6 +1902,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
if (msg_sys->msg_controllen > INT_MAX)
goto out_freeiov;
+ flags |= (msg_sys->msg_flags & allowed_msghdr_flags);
ctl_len = msg_sys->msg_controllen;
if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
err =
@@ -1978,7 +1981,7 @@ long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
if (!sock)
goto out;
- err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL);
+ err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL, 0);
fput_light(sock->file, fput_needed);
out:
@@ -2005,6 +2008,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
struct compat_mmsghdr __user *compat_entry;
struct msghdr msg_sys;
struct used_address used_address;
+ unsigned int oflags = flags;
if (vlen > UIO_MAXIOV)
vlen = UIO_MAXIOV;
@@ -2019,11 +2023,15 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
entry = mmsg;
compat_entry = (struct compat_mmsghdr __user *)mmsg;
err = 0;
+ flags |= MSG_BATCH;
while (datagrams < vlen) {
+ if (datagrams == vlen - 1)
+ flags = oflags;
+
if (MSG_CMSG_COMPAT & flags) {
err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
- &msg_sys, flags, &used_address);
+ &msg_sys, flags, &used_address, MSG_EOR);
if (err < 0)
break;
err = __put_user(err, &compat_entry->msg_len);
@@ -2031,7 +2039,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
} else {
err = ___sys_sendmsg(sock,
(struct user_msghdr __user *)entry,
- &msg_sys, flags, &used_address);
+ &msg_sys, flags, &used_address, MSG_EOR);
if (err < 0)
break;
err = put_user(err, &entry->msg_len);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 799e65b944b9..cabf586f47d7 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -740,7 +740,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
default:
printk(KERN_CRIT "%s: bad return from "
"gss_fill_context: %zd\n", __func__, err);
- BUG();
+ gss_msg->msg.errno = -EIO;
}
goto err_release_msg;
}
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2b32fd602669..273bc3a35425 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1225,7 +1225,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
if (bp[0] == '\\' && bp[1] == 'x') {
/* HEX STRING */
bp += 2;
- while (len < bufsize) {
+ while (len < bufsize - 1) {
int h, l;
h = hex_to_bin(bp[0]);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index cc1251d07297..2dcd7640eeb5 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -341,6 +341,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
rqst->rq_reply_bytes_recvd = 0;
rqst->rq_bytes_sent = 0;
rqst->rq_xid = headerp->rm_xid;
+
+ rqst->rq_private_buf.len = size;
set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
buf = &rqst->rq_rcv_buf;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 47f7da58a7f0..8b5833c1ff2e 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1093,8 +1093,11 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
.cb = cb,
.idx = idx,
};
+ int err;
- switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
+ err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
+ switchdev_port_fdb_dump_cb);
+ cb->args[1] = err;
return dump.idx;
}
EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e401108360a2..ae469b37d852 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -412,11 +412,6 @@ enomem:
return -ENOMEM;
}
-void tipc_bcast_reinit(struct net *net)
-{
- tipc_link_reinit(tipc_bc_sndlink(net), tipc_own_addr(net));
-}
-
void tipc_bcast_stop(struct net *net)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 1944c6c00bb9..d5e79b3767fd 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -46,7 +46,6 @@ struct tipc_node_map;
extern const char tipc_bclink_name[];
int tipc_bcast_init(struct net *net);
-void tipc_bcast_reinit(struct net *net);
void tipc_bcast_stop(struct net *net);
void tipc_bcast_add_peer(struct net *net, struct tipc_link *l,
struct sk_buff_head *xmitq);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 802ffad3200d..27a5406213c6 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -40,6 +40,7 @@
#include "link.h"
#include "discover.h"
#include "bcast.h"
+#include "netlink.h"
#define MAX_ADDR_STR 60
@@ -54,23 +55,6 @@ static struct tipc_media * const media_info_array[] = {
NULL
};
-static const struct nla_policy
-tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
- [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_BEARER_NAME] = {
- .type = NLA_STRING,
- .len = TIPC_MAX_BEARER_NAME
- },
- [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED },
- [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 }
-};
-
-static const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
- [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING },
- [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
-};
-
static void bearer_disable(struct net *net, struct tipc_bearer *b);
/**
diff --git a/net/tipc/link.c b/net/tipc/link.c
index e31d92f80572..7d2bb3e70baa 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1,7 +1,7 @@
/*
* net/tipc/link.c: TIPC link code
*
- * Copyright (c) 1996-2007, 2012-2015, Ericsson AB
+ * Copyright (c) 1996-2007, 2012-2016, Ericsson AB
* Copyright (c) 2004-2007, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -127,6 +127,7 @@ struct tipc_link {
/* Management and link supervision data */
u32 peer_session;
+ u32 session;
u32 peer_bearer_id;
u32 bearer_id;
u32 tolerance;
@@ -136,11 +137,7 @@ struct tipc_link {
u16 peer_caps;
bool active;
u32 silent_intv_cnt;
- struct {
- unchar hdr[INT_H_SIZE];
- unchar body[TIPC_MAX_IF_NAME];
- } proto_msg;
- struct tipc_msg *pmsg;
+ char if_name[TIPC_MAX_IF_NAME];
u32 priority;
char net_plane;
@@ -195,14 +192,6 @@ struct tipc_link {
static const char *link_co_err = "Link tunneling error, ";
static const char *link_rst_msg = "Resetting link ";
-/* Properties valid for media, bearar and link */
-static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
- [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
- [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
- [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }
-};
-
/* Send states for broadcast NACKs
*/
enum {
@@ -215,10 +204,11 @@ enum {
* Interval between NACKs when packets arrive out of order
*/
#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
-/*
- * Out-of-range value for link session numbers
+
+/* Wildcard value for link session numbers. When it is known that
+ * peer endpoint is down, any session number must be accepted.
*/
-#define WILDCARD_SESSION 0x10000
+#define ANY_SESSION 0x10000
/* Link FSM states:
*/
@@ -398,16 +388,6 @@ char *tipc_link_name(struct tipc_link *l)
return l->name;
}
-static u32 link_own_addr(struct tipc_link *l)
-{
- return msg_prevnode(l->pmsg);
-}
-
-void tipc_link_reinit(struct tipc_link *l, u32 addr)
-{
- msg_set_prevnode(l->pmsg, addr);
-}
-
/**
* tipc_link_create - create a new link
* @n: pointer to associated node
@@ -441,29 +421,22 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
struct tipc_link **link)
{
struct tipc_link *l;
- struct tipc_msg *hdr;
l = kzalloc(sizeof(*l), GFP_ATOMIC);
if (!l)
return false;
*link = l;
- l->pmsg = (struct tipc_msg *)&l->proto_msg;
- hdr = l->pmsg;
- tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
- msg_set_size(hdr, sizeof(l->proto_msg));
- msg_set_session(hdr, session);
- msg_set_bearer_id(hdr, l->bearer_id);
+ l->session = session;
/* Note: peer i/f name is completed by reset/activate message */
sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
- strcpy((char *)msg_data(hdr), if_name);
-
+ strcpy(l->if_name, if_name);
l->addr = peer;
l->peer_caps = peer_caps;
l->net = net;
- l->peer_session = WILDCARD_SESSION;
+ l->peer_session = ANY_SESSION;
l->bearer_id = bearer_id;
l->tolerance = tolerance;
l->net_plane = net_plane;
@@ -790,7 +763,7 @@ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
struct tipc_msg *msg = buf_msg(skb_peek(list));
int imp = msg_importance(msg);
u32 oport = msg_origport(msg);
- u32 addr = link_own_addr(link);
+ u32 addr = tipc_own_addr(link->net);
struct sk_buff *skb;
/* This really cannot happen... */
@@ -839,16 +812,9 @@ void link_prepare_wakeup(struct tipc_link *l)
void tipc_link_reset(struct tipc_link *l)
{
- /* Link is down, accept any session */
- l->peer_session = WILDCARD_SESSION;
-
- /* If peer is up, it only accepts an incremented session number */
- msg_set_session(l->pmsg, msg_session(l->pmsg) + 1);
-
- /* Prepare for renewed mtu size negotiation */
+ l->peer_session = ANY_SESSION;
+ l->session++;
l->mtu = l->advertised_mtu;
-
- /* Clean up all queues and counters: */
__skb_queue_purge(&l->transmq);
__skb_queue_purge(&l->deferdq);
skb_queue_splice_init(&l->wakeupq, l->inputq);
@@ -1156,7 +1122,7 @@ int tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
/* Broadcast ACK must be sent via a unicast link => defer to caller */
if (link_is_bc_rcvlink(l)) {
- if (((l->rcv_nxt ^ link_own_addr(l)) & 0xf) != 0xf)
+ if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf)
return 0;
l->rcv_unacked = 0;
return TIPC_LINK_SND_BC_ACK;
@@ -1268,15 +1234,30 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
u16 rcvgap, int tolerance, int priority,
struct sk_buff_head *xmitq)
{
- struct sk_buff *skb = NULL;
- struct tipc_msg *hdr = l->pmsg;
+ struct sk_buff *skb;
+ struct tipc_msg *hdr;
+ struct sk_buff_head *dfq = &l->deferdq;
bool node_up = link_is_up(l->bc_rcvlink);
/* Don't send protocol message during reset or link failover */
if (tipc_link_is_blocked(l))
return;
- msg_set_type(hdr, mtyp);
+ if (!tipc_link_is_up(l) && (mtyp == STATE_MSG))
+ return;
+
+ if (!skb_queue_empty(dfq))
+ rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
+
+ skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
+ TIPC_MAX_IF_NAME, l->addr,
+ tipc_own_addr(l->net), 0, 0, 0);
+ if (!skb)
+ return;
+
+ hdr = buf_msg(skb);
+ msg_set_session(hdr, l->session);
+ msg_set_bearer_id(hdr, l->bearer_id);
msg_set_net_plane(hdr, l->net_plane);
msg_set_next_sent(hdr, l->snd_nxt);
msg_set_ack(hdr, l->rcv_nxt - 1);
@@ -1286,36 +1267,23 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
msg_set_linkprio(hdr, priority);
msg_set_redundant_link(hdr, node_up);
msg_set_seq_gap(hdr, 0);
-
- /* Compatibility: created msg must not be in sequence with pkt flow */
msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2);
if (mtyp == STATE_MSG) {
- if (!tipc_link_is_up(l))
- return;
-
- /* Override rcvgap if there are packets in deferred queue */
- if (!skb_queue_empty(&l->deferdq))
- rcvgap = buf_seqno(skb_peek(&l->deferdq)) - l->rcv_nxt;
- if (rcvgap) {
- msg_set_seq_gap(hdr, rcvgap);
- l->stats.sent_nacks++;
- }
+ msg_set_seq_gap(hdr, rcvgap);
+ msg_set_size(hdr, INT_H_SIZE);
msg_set_probe(hdr, probe);
- if (probe)
- l->stats.sent_probes++;
l->stats.sent_states++;
l->rcv_unacked = 0;
} else {
/* RESET_MSG or ACTIVATE_MSG */
msg_set_max_pkt(hdr, l->advertised_mtu);
- msg_set_ack(hdr, l->rcv_nxt - 1);
- msg_set_next_sent(hdr, 1);
+ strcpy(msg_data(hdr), l->if_name);
}
- skb = tipc_buf_acquire(msg_size(hdr));
- if (!skb)
- return;
- skb_copy_to_linear_data(skb, hdr, msg_size(hdr));
+ if (probe)
+ l->stats.sent_probes++;
+ if (rcvgap)
+ l->stats.sent_nacks++;
skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, skb);
}
@@ -1340,7 +1308,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
/* At least one packet required for safe algorithm => add dummy */
skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
- BASIC_H_SIZE, 0, l->addr, link_own_addr(l),
+ BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net),
0, 0, TIPC_ERR_NO_PORT);
if (!skb) {
pr_warn("%sunable to create tunnel packet\n", link_co_err);
@@ -1351,7 +1319,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
__skb_queue_purge(&tmpxq);
/* Initialize reusable tunnel packet header */
- tipc_msg_init(link_own_addr(l), &tnlhdr, TUNNEL_PROTOCOL,
+ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL,
mtyp, INT_H_SIZE, l->addr);
pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq);
msg_set_msgcnt(&tnlhdr, pktcnt);
@@ -1410,7 +1378,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (tipc_link_is_blocked(l) || !xmitq)
goto exit;
- if (link_own_addr(l) > msg_prevnode(hdr))
+ if (tipc_own_addr(l->net) > msg_prevnode(hdr))
l->net_plane = msg_net_plane(hdr);
switch (mtyp) {
@@ -1418,7 +1386,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
/* Ignore duplicate RESET with old session number */
if ((less_eq(msg_session(hdr), l->peer_session)) &&
- (l->peer_session != WILDCARD_SESSION))
+ (l->peer_session != ANY_SESSION))
break;
/* fall thru' */
@@ -1515,7 +1483,7 @@ static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast,
u16 gap_to = peers_snd_nxt - 1;
skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
- 0, l->addr, link_own_addr(l), 0, 0, 0);
+ 0, l->addr, tipc_own_addr(l->net), 0, 0, 0);
if (!skb)
return false;
hdr = buf_msg(skb);
@@ -1670,7 +1638,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
if (mtyp != STATE_MSG)
return 0;
- if (dnode == link_own_addr(l)) {
+ if (dnode == tipc_own_addr(l->net)) {
tipc_link_bc_ack_rcv(l, acked, xmitq);
rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq);
l->stats.recv_nacks++;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index b4ee9d6e181d..6a94175ee20a 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -86,7 +86,6 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
struct sk_buff_head *namedq,
struct tipc_link *bc_sndlink,
struct tipc_link **link);
-void tipc_link_reinit(struct tipc_link *l, u32 addr);
void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
int mtyp, struct sk_buff_head *xmitq);
void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 777b979b8463..e190460fe0d3 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -47,12 +47,6 @@
#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
-static const struct nla_policy
-tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = {
- [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED }
-};
-
/**
* struct name_info - name sequence publication info
* @node_list: circular list of publications made by own node
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 77bf9113c7a7..28bf4feeb81c 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -41,11 +41,7 @@
#include "socket.h"
#include "node.h"
#include "bcast.h"
-
-static const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
- [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_NET_ID] = { .type = NLA_U32 }
-};
+#include "netlink.h"
/*
* The TIPC locking policy is designed to ensure a very fine locking
@@ -116,7 +112,6 @@ int tipc_net_start(struct net *net, u32 addr)
tn->own_addr = addr;
tipc_named_reinit(net);
tipc_sk_reinit(net);
- tipc_bcast_reinit(net);
tipc_nametbl_publish(net, TIPC_CFG_SRV, tn->own_addr, tn->own_addr,
TIPC_ZONE_SCOPE, 0, tn->own_addr);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 8975b0135b76..56935df2167a 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -55,6 +55,75 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
[TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, }
};
+const struct nla_policy
+tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = {
+ [TIPC_NLA_NAME_TABLE_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED }
+};
+
+const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
+ [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 },
+ [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 },
+ [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED },
+ [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG }
+};
+
+const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
+ [TIPC_NLA_NET_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_NET_ID] = { .type = NLA_U32 }
+};
+
+const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
+ [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_LINK_NAME] = { .type = NLA_STRING,
+ .len = TIPC_MAX_LINK_NAME },
+ [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
+ [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
+ [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG },
+ [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG },
+ [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED },
+ [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED },
+ [TIPC_NLA_LINK_RX] = { .type = NLA_U32 },
+ [TIPC_NLA_LINK_TX] = { .type = NLA_U32 }
+};
+
+const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
+ [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
+ [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
+};
+
+/* Properties valid for media, bearer and link */
+const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
+ [TIPC_NLA_PROP_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_PROP_PRIO] = { .type = NLA_U32 },
+ [TIPC_NLA_PROP_TOL] = { .type = NLA_U32 },
+ [TIPC_NLA_PROP_WIN] = { .type = NLA_U32 }
+};
+
+const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
+ [TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_BEARER_NAME] = { .type = NLA_STRING,
+ .len = TIPC_MAX_BEARER_NAME },
+ [TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED },
+ [TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 }
+};
+
+const struct nla_policy tipc_nl_media_policy[TIPC_NLA_MEDIA_MAX + 1] = {
+ [TIPC_NLA_MEDIA_UNSPEC] = { .type = NLA_UNSPEC },
+ [TIPC_NLA_MEDIA_NAME] = { .type = NLA_STRING },
+ [TIPC_NLA_MEDIA_PROP] = { .type = NLA_NESTED }
+};
+
+const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
+ [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
+ [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
+ .len = sizeof(struct sockaddr_storage)},
+ [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY,
+ .len = sizeof(struct sockaddr_storage)},
+};
+
/* Users of the legacy API (tipc-config) can't handle that we add operations,
* so we have a separate genl handling for the new API.
*/
diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h
index 08a1db67b927..ed1dbcb4afbd 100644
--- a/net/tipc/netlink.h
+++ b/net/tipc/netlink.h
@@ -35,6 +35,7 @@
#ifndef _TIPC_NETLINK_H
#define _TIPC_NETLINK_H
+#include <net/netlink.h>
extern struct genl_family tipc_genl_family;
int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***buf);
@@ -45,6 +46,16 @@ struct tipc_nl_msg {
u32 seq;
};
+extern const struct nla_policy tipc_nl_name_table_policy[];
+extern const struct nla_policy tipc_nl_sock_policy[];
+extern const struct nla_policy tipc_nl_net_policy[];
+extern const struct nla_policy tipc_nl_link_policy[];
+extern const struct nla_policy tipc_nl_node_policy[];
+extern const struct nla_policy tipc_nl_prop_policy[];
+extern const struct nla_policy tipc_nl_bearer_policy[];
+extern const struct nla_policy tipc_nl_media_policy[];
+extern const struct nla_policy tipc_nl_udp_policy[];
+
int tipc_netlink_start(void);
int tipc_netlink_compat_start(void);
void tipc_netlink_stop(void);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index cdb79503d890..ace178fd3850 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -41,6 +41,7 @@
#include "socket.h"
#include "bcast.h"
#include "discover.h"
+#include "netlink.h"
#define INVALID_NODE_SIG 0x10000
@@ -164,28 +165,6 @@ struct tipc_sock_conn {
struct list_head list;
};
-static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
- [TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_LINK_NAME] = {
- .type = NLA_STRING,
- .len = TIPC_MAX_LINK_NAME
- },
- [TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
- [TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
- [TIPC_NLA_LINK_UP] = { .type = NLA_FLAG },
- [TIPC_NLA_LINK_ACTIVE] = { .type = NLA_FLAG },
- [TIPC_NLA_LINK_PROP] = { .type = NLA_NESTED },
- [TIPC_NLA_LINK_STATS] = { .type = NLA_NESTED },
- [TIPC_NLA_LINK_RX] = { .type = NLA_U32 },
- [TIPC_NLA_LINK_TX] = { .type = NLA_U32 }
-};
-
-static const struct nla_policy tipc_nl_node_policy[TIPC_NLA_NODE_MAX + 1] = {
- [TIPC_NLA_NODE_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_NODE_ADDR] = { .type = NLA_U32 },
- [TIPC_NLA_NODE_UP] = { .type = NLA_FLAG }
-};
-
static struct tipc_link *node_active_link(struct tipc_node *n, int sel)
{
int bearer_id = n->active_links[sel & 1];
@@ -843,7 +822,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
memcpy(&le->maddr, maddr, sizeof(*maddr));
exit:
tipc_node_write_unlock(n);
- if (reset && !tipc_link_is_reset(l))
+ if (reset && l && !tipc_link_is_reset(l))
tipc_node_link_down(n, b->identity, false);
tipc_node_put(n);
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 69c29050f14a..3eeb50a27b89 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -42,6 +42,7 @@
#include "name_distr.h"
#include "socket.h"
#include "bcast.h"
+#include "netlink.h"
#define SS_LISTENING -1 /* socket is listening */
#define SS_READY -2 /* socket is connectionless */
@@ -126,14 +127,6 @@ static const struct proto_ops stream_ops;
static const struct proto_ops msg_ops;
static struct proto tipc_proto;
-static const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = {
- [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 },
- [TIPC_NLA_SOCK_REF] = { .type = NLA_U32 },
- [TIPC_NLA_SOCK_CON] = { .type = NLA_NESTED },
- [TIPC_NLA_SOCK_HAS_PUBL] = { .type = NLA_FLAG }
-};
-
static const struct rhashtable_params tsk_rht_params;
/*
@@ -673,7 +666,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct tipc_sock *tsk = tipc_sk(sk);
struct net *net = sock_net(sk);
struct tipc_msg *mhdr = &tsk->phdr;
- struct sk_buff_head *pktchain = &sk->sk_write_queue;
+ struct sk_buff_head pktchain;
struct iov_iter save = msg->msg_iter;
uint mtu;
int rc;
@@ -687,14 +680,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
msg_set_nameupper(mhdr, seq->upper);
msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
+ skb_queue_head_init(&pktchain);
+
new_mtu:
mtu = tipc_bcast_get_mtu(net);
- rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
+ rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
if (unlikely(rc < 0))
return rc;
do {
- rc = tipc_bcast_xmit(net, pktchain);
+ rc = tipc_bcast_xmit(net, &pktchain);
if (likely(!rc))
return dsz;
@@ -704,7 +699,7 @@ new_mtu:
if (!rc)
continue;
}
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
if (rc == -EMSGSIZE) {
msg->msg_iter = save;
goto new_mtu;
@@ -863,7 +858,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
struct net *net = sock_net(sk);
struct tipc_msg *mhdr = &tsk->phdr;
u32 dnode, dport;
- struct sk_buff_head *pktchain = &sk->sk_write_queue;
+ struct sk_buff_head pktchain;
struct sk_buff *skb;
struct tipc_name_seq *seq;
struct iov_iter save;
@@ -924,17 +919,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
}
+ skb_queue_head_init(&pktchain);
save = m->msg_iter;
new_mtu:
mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
- rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
+ rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
if (rc < 0)
return rc;
do {
- skb = skb_peek(pktchain);
+ skb = skb_peek(&pktchain);
TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
- rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid);
+ rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
if (likely(!rc)) {
if (sock->state != SS_READY)
sock->state = SS_CONNECTING;
@@ -946,7 +942,7 @@ new_mtu:
if (!rc)
continue;
}
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
if (rc == -EMSGSIZE) {
m->msg_iter = save;
goto new_mtu;
@@ -1016,7 +1012,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
struct net *net = sock_net(sk);
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *mhdr = &tsk->phdr;
- struct sk_buff_head *pktchain = &sk->sk_write_queue;
+ struct sk_buff_head pktchain;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
u32 portid = tsk->portid;
int rc = -EINVAL;
@@ -1044,17 +1040,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
dnode = tsk_peer_node(tsk);
+ skb_queue_head_init(&pktchain);
next:
save = m->msg_iter;
mtu = tsk->max_pkt;
send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
- rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
+ rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
if (unlikely(rc < 0))
return rc;
+
do {
if (likely(!tsk_conn_cong(tsk))) {
- rc = tipc_node_xmit(net, pktchain, dnode, portid);
+ rc = tipc_node_xmit(net, &pktchain, dnode, portid);
if (likely(!rc)) {
tsk->sent_unacked++;
sent += send;
@@ -1063,7 +1061,7 @@ next:
goto next;
}
if (rc == -EMSGSIZE) {
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
tsk->max_pkt = tipc_node_get_mtu(net, dnode,
portid);
m->msg_iter = save;
@@ -1077,7 +1075,7 @@ next:
rc = tipc_wait_for_sndpkt(sock, &timeo);
} while (!rc);
- __skb_queue_purge(pktchain);
+ __skb_queue_purge(&pktchain);
return sent ? sent : rc;
}
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 22963cafd5ed..e6cb386fbf34 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -326,7 +326,8 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid,
return tipc_subscrp_cancel(s, subscriber);
}
- tipc_subscrp_subscribe(net, s, subscriber, swap);
+ if (s)
+ tipc_subscrp_subscribe(net, s, subscriber, swap);
}
/* Handle one request to establish a new subscriber */
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index d63a911e7fe2..c94f9a15e2cd 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -48,20 +48,13 @@
#include <linux/tipc_netlink.h>
#include "core.h"
#include "bearer.h"
+#include "netlink.h"
/* IANA assigned UDP port */
#define UDP_PORT_DEFAULT 6118
#define UDP_MIN_HEADROOM 28
-static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
- [TIPC_NLA_UDP_UNSPEC] = {.type = NLA_UNSPEC},
- [TIPC_NLA_UDP_LOCAL] = {.type = NLA_BINARY,
- .len = sizeof(struct sockaddr_storage)},
- [TIPC_NLA_UDP_REMOTE] = {.type = NLA_BINARY,
- .len = sizeof(struct sockaddr_storage)},
-};
-
/**
* struct udp_media_addr - IP/UDP addressing information
*
@@ -181,6 +174,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
err = PTR_ERR(rt);
goto tx_error;
}
+
+ skb->dev = rt->dst.dev;
ttl = ip4_dst_hoplimit(&rt->dst);
udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr,
dst->ipv4.s_addr, 0, ttl, 0, src->udp_port,
@@ -201,7 +196,7 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
ttl = ip6_dst_hoplimit(ndst);
err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb,
ndst->dev, &src->ipv6,
- &dst->ipv6, 0, ttl, src->udp_port,
+ &dst->ipv6, 0, ttl, 0, src->udp_port,
dst->udp_port, false);
#endif
}
@@ -274,7 +269,7 @@ static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
struct udp_media_addr *remote)
{
struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
- struct sockaddr_storage *sa_local, *sa_remote;
+ struct sockaddr_storage sa_local, sa_remote;
if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
goto err;
@@ -283,41 +278,48 @@ static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
tipc_nl_udp_policy))
goto err;
if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) {
- sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]);
- sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]);
+ nla_memcpy(&sa_local, opts[TIPC_NLA_UDP_LOCAL],
+ sizeof(sa_local));
+ nla_memcpy(&sa_remote, opts[TIPC_NLA_UDP_REMOTE],
+ sizeof(sa_remote));
} else {
err:
pr_err("Invalid UDP bearer configuration");
return -EINVAL;
}
- if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) {
+ if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET) {
struct sockaddr_in *ip4;
- ip4 = (struct sockaddr_in *)sa_local;
+ ip4 = (struct sockaddr_in *)&sa_local;
local->proto = htons(ETH_P_IP);
local->udp_port = ip4->sin_port;
local->ipv4.s_addr = ip4->sin_addr.s_addr;
- ip4 = (struct sockaddr_in *)sa_remote;
+ ip4 = (struct sockaddr_in *)&sa_remote;
remote->proto = htons(ETH_P_IP);
remote->udp_port = ip4->sin_port;
remote->ipv4.s_addr = ip4->sin_addr.s_addr;
return 0;
#if IS_ENABLED(CONFIG_IPV6)
- } else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) {
+ } else if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET6) {
+ int atype;
struct sockaddr_in6 *ip6;
- ip6 = (struct sockaddr_in6 *)sa_local;
+ ip6 = (struct sockaddr_in6 *)&sa_local;
+ atype = ipv6_addr_type(&ip6->sin6_addr);
+ if (__ipv6_addr_needs_scope_id(atype) && !ip6->sin6_scope_id)
+ return -EINVAL;
+
local->proto = htons(ETH_P_IPV6);
local->udp_port = ip6->sin6_port;
- local->ipv6 = ip6->sin6_addr;
+ memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
ub->ifindex = ip6->sin6_scope_id;
- ip6 = (struct sockaddr_in6 *)sa_remote;
+ ip6 = (struct sockaddr_in6 *)&sa_remote;
remote->proto = htons(ETH_P_IPV6);
remote->udp_port = ip6->sin6_port;
- remote->ipv6 = ip6->sin6_addr;
+ memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
return 0;
#endif
}
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3a9c41bc849a..9f1c4aa851ef 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1157,6 +1157,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
return NOTIFY_DONE;
}
+ wireless_nlevent_flush();
+
return NOTIFY_OK;
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 90890f183c0e..98c924260b3d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7554,7 +7554,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
if ((ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) &&
no_ht) {
- kfree(connkeys);
+ kzfree(connkeys);
return -EINVAL;
}
}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 79bd3a171caa..544558171787 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -917,6 +917,12 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap);
+ /* stop critical protocol if supported */
+ if (rdev->ops->crit_proto_stop && rdev->crit_proto_nlportid) {
+ rdev->crit_proto_nlportid = 0;
+ rdev_crit_proto_stop(rdev, wdev);
+ }
+
/*
* Delete all the keys ... pairwise keys can't really
* exist any more anyway, but default keys might.
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index c8717c1d082e..b50ee5d622e1 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -342,6 +342,40 @@ static const int compat_event_type_size[] = {
/* IW event code */
+void wireless_nlevent_flush(void)
+{
+ struct sk_buff *skb;
+ struct net *net;
+
+ ASSERT_RTNL();
+
+ for_each_net(net) {
+ while ((skb = skb_dequeue(&net->wext_nlevents)))
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
+ GFP_KERNEL);
+ }
+}
+EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
+
+static int wext_netdev_notifier_call(struct notifier_block *nb,
+ unsigned long state, void *ptr)
+{
+ /*
+ * When a netdev changes state in any way, flush all pending messages
+ * to avoid them going out in a strange order, e.g. RTM_NEWLINK after
+ * RTM_DELLINK, or with IFF_UP after without IFF_UP during dev_close()
+ * or similar - all of which could otherwise happen due to delays from
+ * schedule_work().
+ */
+ wireless_nlevent_flush();
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block wext_netdev_notifier = {
+ .notifier_call = wext_netdev_notifier_call,
+};
+
static int __net_init wext_pernet_init(struct net *net)
{
skb_queue_head_init(&net->wext_nlevents);
@@ -360,7 +394,12 @@ static struct pernet_operations wext_pernet_ops = {
static int __init wireless_nlevent_init(void)
{
- return register_pernet_subsys(&wext_pernet_ops);
+ int err = register_pernet_subsys(&wext_pernet_ops);
+
+ if (err)
+ return err;
+
+ return register_netdevice_notifier(&wext_netdev_notifier);
}
subsys_initcall(wireless_nlevent_init);
@@ -368,17 +407,8 @@ subsys_initcall(wireless_nlevent_init);
/* Process events generated by the wireless layer or the driver. */
static void wireless_nlevent_process(struct work_struct *work)
{
- struct sk_buff *skb;
- struct net *net;
-
rtnl_lock();
-
- for_each_net(net) {
- while ((skb = skb_dequeue(&net->wext_nlevents)))
- rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
- GFP_KERNEL);
- }
-
+ wireless_nlevent_flush();
rtnl_unlock();
}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index c4f8ae0c8afe..502c9fc8db85 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -17,6 +17,8 @@ hostprogs-y += tracex6
hostprogs-y += trace_output
hostprogs-y += lathist
hostprogs-y += offwaketime
+hostprogs-y += spintest
+hostprogs-y += map_perf_test
test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
@@ -34,6 +36,8 @@ tracex6-objs := bpf_load.o libbpf.o tracex6_user.o
trace_output-objs := bpf_load.o libbpf.o trace_output_user.o
lathist-objs := bpf_load.o libbpf.o lathist_user.o
offwaketime-objs := bpf_load.o libbpf.o offwaketime_user.o
+spintest-objs := bpf_load.o libbpf.o spintest_user.o
+map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -50,6 +54,8 @@ always += trace_output_kern.o
always += tcbpf1_kern.o
always += lathist_kern.o
always += offwaketime_kern.o
+always += spintest_kern.o
+always += map_perf_test_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
@@ -67,6 +73,8 @@ HOSTLOADLIBES_tracex6 += -lelf
HOSTLOADLIBES_trace_output += -lelf -lrt
HOSTLOADLIBES_lathist += -lelf
HOSTLOADLIBES_offwaketime += -lelf
+HOSTLOADLIBES_spintest += -lelf
+HOSTLOADLIBES_map_perf_test += -lelf -lrt
# point this to your LLVM backend with bpf support
LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 811bcca0f29d..9363500131a7 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -61,6 +61,7 @@ struct bpf_map_def {
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
+ unsigned int map_flags;
};
static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index da86a8e0a95a..58f86bd11b3d 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -157,9 +157,13 @@ static int load_maps(struct bpf_map_def *maps, int len)
map_fd[i] = bpf_create_map(maps[i].type,
maps[i].key_size,
maps[i].value_size,
- maps[i].max_entries);
- if (map_fd[i] < 0)
+ maps[i].max_entries,
+ maps[i].map_flags);
+ if (map_fd[i] < 0) {
+ printf("failed to create a map: %d %s\n",
+ errno, strerror(errno));
return 1;
+ }
if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
prog_array_fd = map_fd[i];
@@ -343,3 +347,65 @@ void read_trace_pipe(void)
}
}
}
+
+#define MAX_SYMS 300000
+static struct ksym syms[MAX_SYMS];
+static int sym_cnt;
+
+static int ksym_cmp(const void *p1, const void *p2)
+{
+ return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+int load_kallsyms(void)
+{
+ FILE *f = fopen("/proc/kallsyms", "r");
+ char func[256], buf[256];
+ char symbol;
+ void *addr;
+ int i = 0;
+
+ if (!f)
+ return -ENOENT;
+
+ while (!feof(f)) {
+ if (!fgets(buf, sizeof(buf), f))
+ break;
+ if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
+ break;
+ if (!addr)
+ continue;
+ syms[i].addr = (long) addr;
+ syms[i].name = strdup(func);
+ i++;
+ }
+ sym_cnt = i;
+ qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
+ return 0;
+}
+
+struct ksym *ksym_search(long key)
+{
+ int start = 0, end = sym_cnt;
+ int result;
+
+ while (start < end) {
+ size_t mid = start + (end - start) / 2;
+
+ result = key - syms[mid].addr;
+ if (result < 0)
+ end = mid;
+ else if (result > 0)
+ start = mid + 1;
+ else
+ return &syms[mid];
+ }
+
+ if (start >= 1 && syms[start - 1].addr < key &&
+ key < syms[start].addr)
+ /* valid ksym */
+ return &syms[start - 1];
+
+ /* out of range. return _stext */
+ return &syms[0];
+}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index cbd7c2b532b9..dfa57fe65c8e 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -23,5 +23,11 @@ extern int event_fd[MAX_PROGS];
int load_bpf_file(char *path);
void read_trace_pipe(void);
+struct ksym {
+ long addr;
+ char *name;
+};
+int load_kallsyms(void);
+struct ksym *ksym_search(long key);
#endif
diff --git a/samples/bpf/fds_example.c b/samples/bpf/fds_example.c
index e2fd16c3d0f0..625e797be6ef 100644
--- a/samples/bpf/fds_example.c
+++ b/samples/bpf/fds_example.c
@@ -44,7 +44,7 @@ static void usage(void)
static int bpf_map_create(void)
{
return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
- sizeof(uint32_t), 1024);
+ sizeof(uint32_t), 1024, 0);
}
static int bpf_prog_create(const char *object)
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 65a8d48d2799..9969e35550c3 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -19,13 +19,14 @@ static __u64 ptr_to_u64(void *ptr)
}
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
- int max_entries)
+ int max_entries, int map_flags)
{
union bpf_attr attr = {
.map_type = map_type,
.key_size = key_size,
.value_size = value_size,
- .max_entries = max_entries
+ .max_entries = max_entries,
+ .map_flags = map_flags,
};
return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index 014aacf916e4..364582b77888 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -5,7 +5,7 @@
struct bpf_insn;
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
- int max_entries);
+ int max_entries, int map_flags);
int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags);
int bpf_lookup_elem(int fd, void *key, void *value);
int bpf_delete_elem(int fd, void *key);
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
new file mode 100644
index 000000000000..311538e5a701
--- /dev/null
+++ b/samples/bpf/map_perf_test_kern.c
@@ -0,0 +1,100 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define MAX_ENTRIES 1000
+
+struct bpf_map_def SEC("maps") hash_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = MAX_ENTRIES,
+};
+
+struct bpf_map_def SEC("maps") percpu_hash_map = {
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = MAX_ENTRIES,
+};
+
+struct bpf_map_def SEC("maps") hash_map_alloc = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = MAX_ENTRIES,
+ .map_flags = BPF_F_NO_PREALLOC,
+};
+
+struct bpf_map_def SEC("maps") percpu_hash_map_alloc = {
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(long),
+ .max_entries = MAX_ENTRIES,
+ .map_flags = BPF_F_NO_PREALLOC,
+};
+
+SEC("kprobe/sys_getuid")
+int stress_hmap(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+
+ bpf_map_update_elem(&hash_map, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&hash_map, &key);
+ if (value)
+ bpf_map_delete_elem(&hash_map, &key);
+ return 0;
+}
+
+SEC("kprobe/sys_geteuid")
+int stress_percpu_hmap(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+
+ bpf_map_update_elem(&percpu_hash_map, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&percpu_hash_map, &key);
+ if (value)
+ bpf_map_delete_elem(&percpu_hash_map, &key);
+ return 0;
+}
+SEC("kprobe/sys_getgid")
+int stress_hmap_alloc(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+
+ bpf_map_update_elem(&hash_map_alloc, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&hash_map_alloc, &key);
+ if (value)
+ bpf_map_delete_elem(&hash_map_alloc, &key);
+ return 0;
+}
+
+SEC("kprobe/sys_getegid")
+int stress_percpu_hmap_alloc(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_current_pid_tgid();
+ long init_val = 1;
+ long *value;
+
+ bpf_map_update_elem(&percpu_hash_map_alloc, &key, &init_val, BPF_ANY);
+ value = bpf_map_lookup_elem(&percpu_hash_map_alloc, &key);
+ if (value)
+ bpf_map_delete_elem(&percpu_hash_map_alloc, &key);
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
new file mode 100644
index 000000000000..95af56ec5739
--- /dev/null
+++ b/samples/bpf/map_perf_test_user.c
@@ -0,0 +1,155 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <asm/unistd.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <time.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define MAX_CNT 1000000
+
+static __u64 time_get_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+#define HASH_PREALLOC (1 << 0)
+#define PERCPU_HASH_PREALLOC (1 << 1)
+#define HASH_KMALLOC (1 << 2)
+#define PERCPU_HASH_KMALLOC (1 << 3)
+
+static int test_flags = ~0;
+
+static void test_hash_prealloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_getuid);
+ printf("%d:hash_map_perf pre-alloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void test_percpu_hash_prealloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_geteuid);
+ printf("%d:percpu_hash_map_perf pre-alloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void test_hash_kmalloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_getgid);
+ printf("%d:hash_map_perf kmalloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void test_percpu_hash_kmalloc(int cpu)
+{
+ __u64 start_time;
+ int i;
+
+ start_time = time_get_ns();
+ for (i = 0; i < MAX_CNT; i++)
+ syscall(__NR_getegid);
+ printf("%d:percpu_hash_map_perf kmalloc %lld events per sec\n",
+ cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+}
+
+static void loop(int cpu)
+{
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ sched_setaffinity(0, sizeof(cpuset), &cpuset);
+
+ if (test_flags & HASH_PREALLOC)
+ test_hash_prealloc(cpu);
+
+ if (test_flags & PERCPU_HASH_PREALLOC)
+ test_percpu_hash_prealloc(cpu);
+
+ if (test_flags & HASH_KMALLOC)
+ test_hash_kmalloc(cpu);
+
+ if (test_flags & PERCPU_HASH_KMALLOC)
+ test_percpu_hash_kmalloc(cpu);
+}
+
+static void run_perf_test(int tasks)
+{
+ pid_t pid[tasks];
+ int i;
+
+ for (i = 0; i < tasks; i++) {
+ pid[i] = fork();
+ if (pid[i] == 0) {
+ loop(i);
+ exit(0);
+ } else if (pid[i] == -1) {
+ printf("couldn't spawn #%d process\n", i);
+ exit(1);
+ }
+ }
+ for (i = 0; i < tasks; i++) {
+ int status;
+
+ assert(waitpid(pid[i], &status, 0) == pid[i]);
+ assert(status == 0);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ char filename[256];
+ int num_cpu = 8;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ setrlimit(RLIMIT_MEMLOCK, &r);
+
+ if (argc > 1)
+ test_flags = atoi(argv[1]) ? : test_flags;
+
+ if (argc > 2)
+ num_cpu = atoi(argv[2]) ? : num_cpu;
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ run_perf_test(num_cpu);
+
+ return 0;
+}
diff --git a/samples/bpf/offwaketime_user.c b/samples/bpf/offwaketime_user.c
index 17cf3024e22c..6f002a9c24fa 100644
--- a/samples/bpf/offwaketime_user.c
+++ b/samples/bpf/offwaketime_user.c
@@ -18,80 +18,15 @@
#include "libbpf.h"
#include "bpf_load.h"
-#define MAX_SYMS 300000
#define PRINT_RAW_ADDR 0
-static struct ksym {
- long addr;
- char *name;
-} syms[MAX_SYMS];
-static int sym_cnt;
-
-static int ksym_cmp(const void *p1, const void *p2)
-{
- return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
-}
-
-static int load_kallsyms(void)
-{
- FILE *f = fopen("/proc/kallsyms", "r");
- char func[256], buf[256];
- char symbol;
- void *addr;
- int i = 0;
-
- if (!f)
- return -ENOENT;
-
- while (!feof(f)) {
- if (!fgets(buf, sizeof(buf), f))
- break;
- if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
- break;
- if (!addr)
- continue;
- syms[i].addr = (long) addr;
- syms[i].name = strdup(func);
- i++;
- }
- sym_cnt = i;
- qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
- return 0;
-}
-
-static void *search(long key)
-{
- int start = 0, end = sym_cnt;
- int result;
-
- while (start < end) {
- size_t mid = start + (end - start) / 2;
-
- result = key - syms[mid].addr;
- if (result < 0)
- end = mid;
- else if (result > 0)
- start = mid + 1;
- else
- return &syms[mid];
- }
-
- if (start >= 1 && syms[start - 1].addr < key &&
- key < syms[start].addr)
- /* valid ksym */
- return &syms[start - 1];
-
- /* out of range. return _stext */
- return &syms[0];
-}
-
static void print_ksym(__u64 addr)
{
struct ksym *sym;
if (!addr)
return;
- sym = search(addr);
+ sym = ksym_search(addr);
if (PRINT_RAW_ADDR)
printf("%s/%llx;", sym->name, addr);
else
diff --git a/samples/bpf/sock_example.c b/samples/bpf/sock_example.c
index a0ce251c5390..28b60baa9fa8 100644
--- a/samples/bpf/sock_example.c
+++ b/samples/bpf/sock_example.c
@@ -34,7 +34,7 @@ static int test_sock(void)
long long value = 0, tcp_cnt, udp_cnt, icmp_cnt;
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
- 256);
+ 256, 0);
if (map_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
goto cleanup;
diff --git a/samples/bpf/spintest_kern.c b/samples/bpf/spintest_kern.c
new file mode 100644
index 000000000000..4b27619d91a4
--- /dev/null
+++ b/samples/bpf/spintest_kern.c
@@ -0,0 +1,68 @@
+/* Copyright (c) 2016, Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/perf_event.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(long),
+ .value_size = sizeof(long),
+ .max_entries = 1024,
+};
+struct bpf_map_def SEC("maps") my_map2 = {
+ .type = BPF_MAP_TYPE_PERCPU_HASH,
+ .key_size = sizeof(long),
+ .value_size = sizeof(long),
+ .max_entries = 1024,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(u32),
+ .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
+ .max_entries = 10000,
+};
+
+#define PROG(foo) \
+int foo(struct pt_regs *ctx) \
+{ \
+ long v = ctx->ip, *val; \
+\
+ val = bpf_map_lookup_elem(&my_map, &v); \
+ bpf_map_update_elem(&my_map, &v, &v, BPF_ANY); \
+ bpf_map_update_elem(&my_map2, &v, &v, BPF_ANY); \
+ bpf_map_delete_elem(&my_map2, &v); \
+ bpf_get_stackid(ctx, &stackmap, BPF_F_REUSE_STACKID); \
+ return 0; \
+}
+
+/* add kprobes to all possible *spin* functions */
+SEC("kprobe/spin_unlock")PROG(p1)
+SEC("kprobe/spin_lock")PROG(p2)
+SEC("kprobe/mutex_spin_on_owner")PROG(p3)
+SEC("kprobe/rwsem_spin_on_owner")PROG(p4)
+SEC("kprobe/spin_unlock_irqrestore")PROG(p5)
+SEC("kprobe/_raw_spin_unlock_irqrestore")PROG(p6)
+SEC("kprobe/_raw_spin_unlock_bh")PROG(p7)
+SEC("kprobe/_raw_spin_unlock")PROG(p8)
+SEC("kprobe/_raw_spin_lock_irqsave")PROG(p9)
+SEC("kprobe/_raw_spin_trylock_bh")PROG(p10)
+SEC("kprobe/_raw_spin_lock_irq")PROG(p11)
+SEC("kprobe/_raw_spin_trylock")PROG(p12)
+SEC("kprobe/_raw_spin_lock")PROG(p13)
+SEC("kprobe/_raw_spin_lock_bh")PROG(p14)
+/* and to inner bpf helpers */
+SEC("kprobe/htab_map_update_elem")PROG(p15)
+SEC("kprobe/__htab_percpu_map_update_elem")PROG(p16)
+SEC("kprobe/htab_map_alloc")PROG(p17)
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/spintest_user.c b/samples/bpf/spintest_user.c
new file mode 100644
index 000000000000..311ede532230
--- /dev/null
+++ b/samples/bpf/spintest_user.c
@@ -0,0 +1,50 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <string.h>
+#include <assert.h>
+#include <sys/resource.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+int main(int ac, char **argv)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ long key, next_key, value;
+ char filename[256];
+ struct ksym *sym;
+ int i;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ setrlimit(RLIMIT_MEMLOCK, &r);
+
+ if (load_kallsyms()) {
+ printf("failed to process /proc/kallsyms\n");
+ return 2;
+ }
+
+ if (load_bpf_file(filename)) {
+ printf("%s", bpf_log_buf);
+ return 1;
+ }
+
+ for (i = 0; i < 5; i++) {
+ key = 0;
+ printf("kprobing funcs:");
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) {
+ bpf_lookup_elem(map_fd[0], &next_key, &value);
+ assert(next_key == value);
+ sym = ksym_search(value);
+ printf(" %s", sym->name);
+ key = next_key;
+ }
+ if (key)
+ printf("\n");
+ key = 0;
+ while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0)
+ bpf_delete_elem(map_fd[0], &next_key);
+ sleep(1);
+ }
+
+ return 0;
+}
diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c
index ad466ed33093..47bf0858f9e4 100644
--- a/samples/bpf/test_maps.c
+++ b/samples/bpf/test_maps.c
@@ -2,6 +2,7 @@
* Testsuite for eBPF maps
*
* Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -17,13 +18,16 @@
#include <stdlib.h>
#include "libbpf.h"
+static int map_flags;
+
/* sanity tests for map API */
static void test_hashmap_sanity(int i, void *data)
{
long long key, next_key, value;
int map_fd;
- map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 2);
+ map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ 2, map_flags);
if (map_fd < 0) {
printf("failed to create hashmap '%s'\n", strerror(errno));
exit(1);
@@ -99,7 +103,7 @@ static void test_percpu_hashmap_sanity(int task, void *data)
int map_fd, i;
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
- sizeof(value[0]), 2);
+ sizeof(value[0]), 2, map_flags);
if (map_fd < 0) {
printf("failed to create hashmap '%s'\n", strerror(errno));
exit(1);
@@ -188,7 +192,8 @@ static void test_arraymap_sanity(int i, void *data)
int key, next_key, map_fd;
long long value;
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), 2);
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
+ 2, 0);
if (map_fd < 0) {
printf("failed to create arraymap '%s'\n", strerror(errno));
exit(1);
@@ -244,7 +249,7 @@ static void test_percpu_arraymap_many_keys(void)
int key, map_fd, i;
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
- sizeof(values[0]), nr_keys);
+ sizeof(values[0]), nr_keys, 0);
if (map_fd < 0) {
printf("failed to create per-cpu arraymap '%s'\n",
strerror(errno));
@@ -275,7 +280,7 @@ static void test_percpu_arraymap_sanity(int i, void *data)
int key, next_key, map_fd;
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
- sizeof(values[0]), 2);
+ sizeof(values[0]), 2, 0);
if (map_fd < 0) {
printf("failed to create arraymap '%s'\n", strerror(errno));
exit(1);
@@ -336,7 +341,7 @@ static void test_map_large(void)
/* allocate 4Mbyte of memory */
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE);
+ MAP_SIZE, map_flags);
if (map_fd < 0) {
printf("failed to create large map '%s'\n", strerror(errno));
exit(1);
@@ -421,7 +426,7 @@ static void test_map_parallel(void)
int data[2];
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE);
+ MAP_SIZE, map_flags);
if (map_fd < 0) {
printf("failed to create map for parallel test '%s'\n",
strerror(errno));
@@ -463,7 +468,7 @@ static void test_map_parallel(void)
assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
}
-int main(void)
+static void run_all_tests(void)
{
test_hashmap_sanity(0, NULL);
test_percpu_hashmap_sanity(0, NULL);
@@ -474,6 +479,14 @@ int main(void)
test_map_large();
test_map_parallel();
test_map_stress();
+}
+
+int main(void)
+{
+ map_flags = 0;
+ run_all_tests();
+ map_flags = BPF_F_NO_PREALLOC;
+ run_all_tests();
printf("test_maps: OK\n");
return 0;
}
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index 563c507c0a09..4b51a9039c0d 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -1198,7 +1198,7 @@ static int create_map(void)
int map_fd;
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
- sizeof(long long), sizeof(long long), 1024);
+ sizeof(long long), sizeof(long long), 1024, 0);
if (map_fd < 0)
printf("failed to create map '%s'\n", strerror(errno));
@@ -1210,7 +1210,7 @@ static int create_prog_array(void)
int map_fd;
map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY,
- sizeof(int), sizeof(int), 4);
+ sizeof(int), sizeof(int), 4, 0);
if (map_fd < 0)
printf("failed to create prog_array '%s'\n", strerror(errno));
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f8110cfd80ff..f1ab71504e1d 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3249,7 +3249,7 @@ static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t
static void selinux_inode_getsecid(struct inode *inode, u32 *secid)
{
- struct inode_security_struct *isec = inode_security(inode);
+ struct inode_security_struct *isec = inode_security_novalidate(inode);
*secid = isec->sid;
}
diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c
index b9c0910fb8c4..0608f216f359 100644
--- a/sound/core/control_compat.c
+++ b/sound/core/control_compat.c
@@ -170,6 +170,19 @@ struct snd_ctl_elem_value32 {
unsigned char reserved[128];
};
+#ifdef CONFIG_X86_X32
+/* x32 has a different alignment for 64bit values from ia32 */
+struct snd_ctl_elem_value_x32 {
+ struct snd_ctl_elem_id id;
+ unsigned int indirect; /* bit-field causes misalignment */
+ union {
+ s32 integer[128];
+ unsigned char data[512];
+ s64 integer64[64];
+ } value;
+ unsigned char reserved[128];
+};
+#endif /* CONFIG_X86_X32 */
/* get the value type and count of the control */
static int get_ctl_type(struct snd_card *card, struct snd_ctl_elem_id *id,
@@ -219,9 +232,11 @@ static int get_elem_size(int type, int count)
static int copy_ctl_value_from_user(struct snd_card *card,
struct snd_ctl_elem_value *data,
- struct snd_ctl_elem_value32 __user *data32,
+ void __user *userdata,
+ void __user *valuep,
int *typep, int *countp)
{
+ struct snd_ctl_elem_value32 __user *data32 = userdata;
int i, type, size;
int uninitialized_var(count);
unsigned int indirect;
@@ -239,8 +254,9 @@ static int copy_ctl_value_from_user(struct snd_card *card,
if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN ||
type == SNDRV_CTL_ELEM_TYPE_INTEGER) {
for (i = 0; i < count; i++) {
+ s32 __user *intp = valuep;
int val;
- if (get_user(val, &data32->value.integer[i]))
+ if (get_user(val, &intp[i]))
return -EFAULT;
data->value.integer.value[i] = val;
}
@@ -250,8 +266,7 @@ static int copy_ctl_value_from_user(struct snd_card *card,
dev_err(card->dev, "snd_ioctl32_ctl_elem_value: unknown type %d\n", type);
return -EINVAL;
}
- if (copy_from_user(data->value.bytes.data,
- data32->value.data, size))
+ if (copy_from_user(data->value.bytes.data, valuep, size))
return -EFAULT;
}
@@ -261,7 +276,8 @@ static int copy_ctl_value_from_user(struct snd_card *card,
}
/* restore the value to 32bit */
-static int copy_ctl_value_to_user(struct snd_ctl_elem_value32 __user *data32,
+static int copy_ctl_value_to_user(void __user *userdata,
+ void __user *valuep,
struct snd_ctl_elem_value *data,
int type, int count)
{
@@ -270,22 +286,22 @@ static int copy_ctl_value_to_user(struct snd_ctl_elem_value32 __user *data32,
if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN ||
type == SNDRV_CTL_ELEM_TYPE_INTEGER) {
for (i = 0; i < count; i++) {
+ s32 __user *intp = valuep;
int val;
val = data->value.integer.value[i];
- if (put_user(val, &data32->value.integer[i]))
+ if (put_user(val, &intp[i]))
return -EFAULT;
}
} else {
size = get_elem_size(type, count);
- if (copy_to_user(data32->value.data,
- data->value.bytes.data, size))
+ if (copy_to_user(valuep, data->value.bytes.data, size))
return -EFAULT;
}
return 0;
}
-static int snd_ctl_elem_read_user_compat(struct snd_card *card,
- struct snd_ctl_elem_value32 __user *data32)
+static int ctl_elem_read_user(struct snd_card *card,
+ void __user *userdata, void __user *valuep)
{
struct snd_ctl_elem_value *data;
int err, type, count;
@@ -294,7 +310,9 @@ static int snd_ctl_elem_read_user_compat(struct snd_card *card,
if (data == NULL)
return -ENOMEM;
- if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0)
+ err = copy_ctl_value_from_user(card, data, userdata, valuep,
+ &type, &count);
+ if (err < 0)
goto error;
snd_power_lock(card);
@@ -303,14 +321,15 @@ static int snd_ctl_elem_read_user_compat(struct snd_card *card,
err = snd_ctl_elem_read(card, data);
snd_power_unlock(card);
if (err >= 0)
- err = copy_ctl_value_to_user(data32, data, type, count);
+ err = copy_ctl_value_to_user(userdata, valuep, data,
+ type, count);
error:
kfree(data);
return err;
}
-static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file,
- struct snd_ctl_elem_value32 __user *data32)
+static int ctl_elem_write_user(struct snd_ctl_file *file,
+ void __user *userdata, void __user *valuep)
{
struct snd_ctl_elem_value *data;
struct snd_card *card = file->card;
@@ -320,7 +339,9 @@ static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file,
if (data == NULL)
return -ENOMEM;
- if ((err = copy_ctl_value_from_user(card, data, data32, &type, &count)) < 0)
+ err = copy_ctl_value_from_user(card, data, userdata, valuep,
+ &type, &count);
+ if (err < 0)
goto error;
snd_power_lock(card);
@@ -329,12 +350,39 @@ static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file,
err = snd_ctl_elem_write(card, file, data);
snd_power_unlock(card);
if (err >= 0)
- err = copy_ctl_value_to_user(data32, data, type, count);
+ err = copy_ctl_value_to_user(userdata, valuep, data,
+ type, count);
error:
kfree(data);
return err;
}
+static int snd_ctl_elem_read_user_compat(struct snd_card *card,
+ struct snd_ctl_elem_value32 __user *data32)
+{
+ return ctl_elem_read_user(card, data32, &data32->value);
+}
+
+static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file,
+ struct snd_ctl_elem_value32 __user *data32)
+{
+ return ctl_elem_write_user(file, data32, &data32->value);
+}
+
+#ifdef CONFIG_X86_X32
+static int snd_ctl_elem_read_user_x32(struct snd_card *card,
+ struct snd_ctl_elem_value_x32 __user *data32)
+{
+ return ctl_elem_read_user(card, data32, &data32->value);
+}
+
+static int snd_ctl_elem_write_user_x32(struct snd_ctl_file *file,
+ struct snd_ctl_elem_value_x32 __user *data32)
+{
+ return ctl_elem_write_user(file, data32, &data32->value);
+}
+#endif /* CONFIG_X86_X32 */
+
/* add or replace a user control */
static int snd_ctl_elem_add_compat(struct snd_ctl_file *file,
struct snd_ctl_elem_info32 __user *data32,
@@ -393,6 +441,10 @@ enum {
SNDRV_CTL_IOCTL_ELEM_WRITE32 = _IOWR('U', 0x13, struct snd_ctl_elem_value32),
SNDRV_CTL_IOCTL_ELEM_ADD32 = _IOWR('U', 0x17, struct snd_ctl_elem_info32),
SNDRV_CTL_IOCTL_ELEM_REPLACE32 = _IOWR('U', 0x18, struct snd_ctl_elem_info32),
+#ifdef CONFIG_X86_X32
+ SNDRV_CTL_IOCTL_ELEM_READ_X32 = _IOWR('U', 0x12, struct snd_ctl_elem_value_x32),
+ SNDRV_CTL_IOCTL_ELEM_WRITE_X32 = _IOWR('U', 0x13, struct snd_ctl_elem_value_x32),
+#endif /* CONFIG_X86_X32 */
};
static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -431,6 +483,12 @@ static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, uns
return snd_ctl_elem_add_compat(ctl, argp, 0);
case SNDRV_CTL_IOCTL_ELEM_REPLACE32:
return snd_ctl_elem_add_compat(ctl, argp, 1);
+#ifdef CONFIG_X86_X32
+ case SNDRV_CTL_IOCTL_ELEM_READ_X32:
+ return snd_ctl_elem_read_user_x32(ctl->card, argp);
+ case SNDRV_CTL_IOCTL_ELEM_WRITE_X32:
+ return snd_ctl_elem_write_user_x32(ctl, argp);
+#endif /* CONFIG_X86_X32 */
}
down_read(&snd_ioctl_rwsem);
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 9630e9f72b7b..1f64ab0c2a95 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -183,6 +183,14 @@ static int snd_pcm_ioctl_channel_info_compat(struct snd_pcm_substream *substream
return err;
}
+#ifdef CONFIG_X86_X32
+/* X32 ABI has the same struct as x86-64 for snd_pcm_channel_info */
+static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream,
+ struct snd_pcm_channel_info __user *src);
+#define snd_pcm_ioctl_channel_info_x32(s, p) \
+ snd_pcm_channel_info_user(s, p)
+#endif /* CONFIG_X86_X32 */
+
struct snd_pcm_status32 {
s32 state;
struct compat_timespec trigger_tstamp;
@@ -243,6 +251,71 @@ static int snd_pcm_status_user_compat(struct snd_pcm_substream *substream,
return err;
}
+#ifdef CONFIG_X86_X32
+/* X32 ABI has 64bit timespec and 64bit alignment */
+struct snd_pcm_status_x32 {
+ s32 state;
+ u32 rsvd; /* alignment */
+ struct timespec trigger_tstamp;
+ struct timespec tstamp;
+ u32 appl_ptr;
+ u32 hw_ptr;
+ s32 delay;
+ u32 avail;
+ u32 avail_max;
+ u32 overrange;
+ s32 suspended_state;
+ u32 audio_tstamp_data;
+ struct timespec audio_tstamp;
+ struct timespec driver_tstamp;
+ u32 audio_tstamp_accuracy;
+ unsigned char reserved[52-2*sizeof(struct timespec)];
+} __packed;
+
+#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst))
+
+static int snd_pcm_status_user_x32(struct snd_pcm_substream *substream,
+ struct snd_pcm_status_x32 __user *src,
+ bool ext)
+{
+ struct snd_pcm_status status;
+ int err;
+
+ memset(&status, 0, sizeof(status));
+ /*
+ * with extension, parameters are read/write,
+ * get audio_tstamp_data from user,
+ * ignore rest of status structure
+ */
+ if (ext && get_user(status.audio_tstamp_data,
+ (u32 __user *)(&src->audio_tstamp_data)))
+ return -EFAULT;
+ err = snd_pcm_status(substream, &status);
+ if (err < 0)
+ return err;
+
+ if (clear_user(src, sizeof(*src)))
+ return -EFAULT;
+ if (put_user(status.state, &src->state) ||
+ put_timespec(&status.trigger_tstamp, &src->trigger_tstamp) ||
+ put_timespec(&status.tstamp, &src->tstamp) ||
+ put_user(status.appl_ptr, &src->appl_ptr) ||
+ put_user(status.hw_ptr, &src->hw_ptr) ||
+ put_user(status.delay, &src->delay) ||
+ put_user(status.avail, &src->avail) ||
+ put_user(status.avail_max, &src->avail_max) ||
+ put_user(status.overrange, &src->overrange) ||
+ put_user(status.suspended_state, &src->suspended_state) ||
+ put_user(status.audio_tstamp_data, &src->audio_tstamp_data) ||
+ put_timespec(&status.audio_tstamp, &src->audio_tstamp) ||
+ put_timespec(&status.driver_tstamp, &src->driver_tstamp) ||
+ put_user(status.audio_tstamp_accuracy, &src->audio_tstamp_accuracy))
+ return -EFAULT;
+
+ return err;
+}
+#endif /* CONFIG_X86_X32 */
+
/* both for HW_PARAMS and HW_REFINE */
static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream,
int refine,
@@ -469,6 +542,93 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,
return 0;
}
+#ifdef CONFIG_X86_X32
+/* X32 ABI has 64bit timespec and 64bit alignment */
+struct snd_pcm_mmap_status_x32 {
+ s32 state;
+ s32 pad1;
+ u32 hw_ptr;
+ u32 pad2; /* alignment */
+ struct timespec tstamp;
+ s32 suspended_state;
+ struct timespec audio_tstamp;
+} __packed;
+
+struct snd_pcm_mmap_control_x32 {
+ u32 appl_ptr;
+ u32 avail_min;
+};
+
+struct snd_pcm_sync_ptr_x32 {
+ u32 flags;
+ u32 rsvd; /* alignment */
+ union {
+ struct snd_pcm_mmap_status_x32 status;
+ unsigned char reserved[64];
+ } s;
+ union {
+ struct snd_pcm_mmap_control_x32 control;
+ unsigned char reserved[64];
+ } c;
+} __packed;
+
+static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream,
+ struct snd_pcm_sync_ptr_x32 __user *src)
+{
+ struct snd_pcm_runtime *runtime = substream->runtime;
+ volatile struct snd_pcm_mmap_status *status;
+ volatile struct snd_pcm_mmap_control *control;
+ u32 sflags;
+ struct snd_pcm_mmap_control scontrol;
+ struct snd_pcm_mmap_status sstatus;
+ snd_pcm_uframes_t boundary;
+ int err;
+
+ if (snd_BUG_ON(!runtime))
+ return -EINVAL;
+
+ if (get_user(sflags, &src->flags) ||
+ get_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
+ get_user(scontrol.avail_min, &src->c.control.avail_min))
+ return -EFAULT;
+ if (sflags & SNDRV_PCM_SYNC_PTR_HWSYNC) {
+ err = snd_pcm_hwsync(substream);
+ if (err < 0)
+ return err;
+ }
+ status = runtime->status;
+ control = runtime->control;
+ boundary = recalculate_boundary(runtime);
+ if (!boundary)
+ boundary = 0x7fffffff;
+ snd_pcm_stream_lock_irq(substream);
+ /* FIXME: we should consider the boundary for the sync from app */
+ if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL))
+ control->appl_ptr = scontrol.appl_ptr;
+ else
+ scontrol.appl_ptr = control->appl_ptr % boundary;
+ if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN))
+ control->avail_min = scontrol.avail_min;
+ else
+ scontrol.avail_min = control->avail_min;
+ sstatus.state = status->state;
+ sstatus.hw_ptr = status->hw_ptr % boundary;
+ sstatus.tstamp = status->tstamp;
+ sstatus.suspended_state = status->suspended_state;
+ sstatus.audio_tstamp = status->audio_tstamp;
+ snd_pcm_stream_unlock_irq(substream);
+ if (put_user(sstatus.state, &src->s.status.state) ||
+ put_user(sstatus.hw_ptr, &src->s.status.hw_ptr) ||
+ put_timespec(&sstatus.tstamp, &src->s.status.tstamp) ||
+ put_user(sstatus.suspended_state, &src->s.status.suspended_state) ||
+ put_timespec(&sstatus.audio_tstamp, &src->s.status.audio_tstamp) ||
+ put_user(scontrol.appl_ptr, &src->c.control.appl_ptr) ||
+ put_user(scontrol.avail_min, &src->c.control.avail_min))
+ return -EFAULT;
+
+ return 0;
+}
+#endif /* CONFIG_X86_X32 */
/*
*/
@@ -487,7 +647,12 @@ enum {
SNDRV_PCM_IOCTL_WRITEN_FRAMES32 = _IOW('A', 0x52, struct snd_xfern32),
SNDRV_PCM_IOCTL_READN_FRAMES32 = _IOR('A', 0x53, struct snd_xfern32),
SNDRV_PCM_IOCTL_SYNC_PTR32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr32),
-
+#ifdef CONFIG_X86_X32
+ SNDRV_PCM_IOCTL_CHANNEL_INFO_X32 = _IOR('A', 0x32, struct snd_pcm_channel_info),
+ SNDRV_PCM_IOCTL_STATUS_X32 = _IOR('A', 0x20, struct snd_pcm_status_x32),
+ SNDRV_PCM_IOCTL_STATUS_EXT_X32 = _IOWR('A', 0x24, struct snd_pcm_status_x32),
+ SNDRV_PCM_IOCTL_SYNC_PTR_X32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr_x32),
+#endif /* CONFIG_X86_X32 */
};
static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -559,6 +724,16 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l
return snd_pcm_ioctl_rewind_compat(substream, argp);
case SNDRV_PCM_IOCTL_FORWARD32:
return snd_pcm_ioctl_forward_compat(substream, argp);
+#ifdef CONFIG_X86_X32
+ case SNDRV_PCM_IOCTL_STATUS_X32:
+ return snd_pcm_status_user_x32(substream, argp, false);
+ case SNDRV_PCM_IOCTL_STATUS_EXT_X32:
+ return snd_pcm_status_user_x32(substream, argp, true);
+ case SNDRV_PCM_IOCTL_SYNC_PTR_X32:
+ return snd_pcm_ioctl_sync_ptr_x32(substream, argp);
+ case SNDRV_PCM_IOCTL_CHANNEL_INFO_X32:
+ return snd_pcm_ioctl_channel_info_x32(substream, argp);
+#endif /* CONFIG_X86_X32 */
}
return -ENOIOCTLCMD;
diff --git a/sound/core/rawmidi_compat.c b/sound/core/rawmidi_compat.c
index 5268c1f58c25..f69764d7cdd7 100644
--- a/sound/core/rawmidi_compat.c
+++ b/sound/core/rawmidi_compat.c
@@ -85,8 +85,7 @@ static int snd_rawmidi_ioctl_status_compat(struct snd_rawmidi_file *rfile,
if (err < 0)
return err;
- if (put_user(status.tstamp.tv_sec, &src->tstamp.tv_sec) ||
- put_user(status.tstamp.tv_nsec, &src->tstamp.tv_nsec) ||
+ if (compat_put_timespec(&status.tstamp, &src->tstamp) ||
put_user(status.avail, &src->avail) ||
put_user(status.xruns, &src->xruns))
return -EFAULT;
@@ -94,9 +93,58 @@ static int snd_rawmidi_ioctl_status_compat(struct snd_rawmidi_file *rfile,
return 0;
}
+#ifdef CONFIG_X86_X32
+/* X32 ABI has 64bit timespec and 64bit alignment */
+struct snd_rawmidi_status_x32 {
+ s32 stream;
+ u32 rsvd; /* alignment */
+ struct timespec tstamp;
+ u32 avail;
+ u32 xruns;
+ unsigned char reserved[16];
+} __attribute__((packed));
+
+#define put_timespec(src, dst) copy_to_user(dst, src, sizeof(*dst))
+
+static int snd_rawmidi_ioctl_status_x32(struct snd_rawmidi_file *rfile,
+ struct snd_rawmidi_status_x32 __user *src)
+{
+ int err;
+ struct snd_rawmidi_status status;
+
+ if (rfile->output == NULL)
+ return -EINVAL;
+ if (get_user(status.stream, &src->stream))
+ return -EFAULT;
+
+ switch (status.stream) {
+ case SNDRV_RAWMIDI_STREAM_OUTPUT:
+ err = snd_rawmidi_output_status(rfile->output, &status);
+ break;
+ case SNDRV_RAWMIDI_STREAM_INPUT:
+ err = snd_rawmidi_input_status(rfile->input, &status);
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (err < 0)
+ return err;
+
+ if (put_timespec(&status.tstamp, &src->tstamp) ||
+ put_user(status.avail, &src->avail) ||
+ put_user(status.xruns, &src->xruns))
+ return -EFAULT;
+
+ return 0;
+}
+#endif /* CONFIG_X86_X32 */
+
enum {
SNDRV_RAWMIDI_IOCTL_PARAMS32 = _IOWR('W', 0x10, struct snd_rawmidi_params32),
SNDRV_RAWMIDI_IOCTL_STATUS32 = _IOWR('W', 0x20, struct snd_rawmidi_status32),
+#ifdef CONFIG_X86_X32
+ SNDRV_RAWMIDI_IOCTL_STATUS_X32 = _IOWR('W', 0x20, struct snd_rawmidi_status_x32),
+#endif /* CONFIG_X86_X32 */
};
static long snd_rawmidi_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -115,6 +163,10 @@ static long snd_rawmidi_ioctl_compat(struct file *file, unsigned int cmd, unsign
return snd_rawmidi_ioctl_params_compat(rfile, argp);
case SNDRV_RAWMIDI_IOCTL_STATUS32:
return snd_rawmidi_ioctl_status_compat(rfile, argp);
+#ifdef CONFIG_X86_X32
+ case SNDRV_RAWMIDI_IOCTL_STATUS_X32:
+ return snd_rawmidi_ioctl_status_x32(rfile, argp);
+#endif /* CONFIG_X86_X32 */
}
return -ENOIOCTLCMD;
}
diff --git a/sound/core/seq/oss/seq_oss.c b/sound/core/seq/oss/seq_oss.c
index 8db156b207f1..8cdf489df80e 100644
--- a/sound/core/seq/oss/seq_oss.c
+++ b/sound/core/seq/oss/seq_oss.c
@@ -149,8 +149,6 @@ odev_release(struct inode *inode, struct file *file)
if ((dp = file->private_data) == NULL)
return 0;
- snd_seq_oss_drain_write(dp);
-
mutex_lock(&register_mutex);
snd_seq_oss_release(dp);
mutex_unlock(&register_mutex);
diff --git a/sound/core/seq/oss/seq_oss_device.h b/sound/core/seq/oss/seq_oss_device.h
index b43924325249..d7b4d016b547 100644
--- a/sound/core/seq/oss/seq_oss_device.h
+++ b/sound/core/seq/oss/seq_oss_device.h
@@ -127,7 +127,6 @@ int snd_seq_oss_write(struct seq_oss_devinfo *dp, const char __user *buf, int co
unsigned int snd_seq_oss_poll(struct seq_oss_devinfo *dp, struct file *file, poll_table * wait);
void snd_seq_oss_reset(struct seq_oss_devinfo *dp);
-void snd_seq_oss_drain_write(struct seq_oss_devinfo *dp);
/* */
void snd_seq_oss_process_queue(struct seq_oss_devinfo *dp, abstime_t time);
diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c
index 6779e82b46dd..92c96a95a903 100644
--- a/sound/core/seq/oss/seq_oss_init.c
+++ b/sound/core/seq/oss/seq_oss_init.c
@@ -436,22 +436,6 @@ snd_seq_oss_release(struct seq_oss_devinfo *dp)
/*
- * Wait until the queue is empty (if we don't have nonblock)
- */
-void
-snd_seq_oss_drain_write(struct seq_oss_devinfo *dp)
-{
- if (! dp->timer->running)
- return;
- if (is_write_mode(dp->file_mode) && !is_nonblock_mode(dp->file_mode) &&
- dp->writeq) {
- while (snd_seq_oss_writeq_sync(dp->writeq))
- ;
- }
-}
-
-
-/*
* reset sequencer devices
*/
void
diff --git a/sound/core/timer_compat.c b/sound/core/timer_compat.c
index e05802ae6e1b..2e908225d754 100644
--- a/sound/core/timer_compat.c
+++ b/sound/core/timer_compat.c
@@ -70,13 +70,14 @@ static int snd_timer_user_status_compat(struct file *file,
struct snd_timer_status32 __user *_status)
{
struct snd_timer_user *tu;
- struct snd_timer_status status;
+ struct snd_timer_status32 status;
tu = file->private_data;
if (snd_BUG_ON(!tu->timeri))
return -ENXIO;
memset(&status, 0, sizeof(status));
- status.tstamp = tu->tstamp;
+ status.tstamp.tv_sec = tu->tstamp.tv_sec;
+ status.tstamp.tv_nsec = tu->tstamp.tv_nsec;
status.resolution = snd_timer_resolution(tu->timeri);
status.lost = tu->timeri->lost;
status.overrun = tu->overrun;
@@ -88,12 +89,21 @@ static int snd_timer_user_status_compat(struct file *file,
return 0;
}
+#ifdef CONFIG_X86_X32
+/* X32 ABI has the same struct as x86-64 */
+#define snd_timer_user_status_x32(file, s) \
+ snd_timer_user_status(file, s)
+#endif /* CONFIG_X86_X32 */
+
/*
*/
enum {
SNDRV_TIMER_IOCTL_INFO32 = _IOR('T', 0x11, struct snd_timer_info32),
SNDRV_TIMER_IOCTL_STATUS32 = _IOW('T', 0x14, struct snd_timer_status32),
+#ifdef CONFIG_X86_X32
+ SNDRV_TIMER_IOCTL_STATUS_X32 = _IOW('T', 0x14, struct snd_timer_status),
+#endif /* CONFIG_X86_X32 */
};
static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg)
@@ -122,6 +132,10 @@ static long snd_timer_user_ioctl_compat(struct file *file, unsigned int cmd, uns
return snd_timer_user_info_compat(file, argp);
case SNDRV_TIMER_IOCTL_STATUS32:
return snd_timer_user_status_compat(file, argp);
+#ifdef CONFIG_X86_X32
+ case SNDRV_TIMER_IOCTL_STATUS_X32:
+ return snd_timer_user_status_x32(file, argp);
+#endif /* CONFIG_X86_X32 */
}
return -ENOIOCTLCMD;
}
diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c
index b5a17cb510a0..8c486235c905 100644
--- a/sound/hda/hdac_controller.c
+++ b/sound/hda/hdac_controller.c
@@ -426,18 +426,22 @@ EXPORT_SYMBOL_GPL(snd_hdac_bus_stop_chip);
* @bus: HD-audio core bus
* @status: INTSTS register value
* @ask: callback to be called for woken streams
+ *
+ * Returns the bits of handled streams, or zero if no stream is handled.
*/
-void snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
+int snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
void (*ack)(struct hdac_bus *,
struct hdac_stream *))
{
struct hdac_stream *azx_dev;
u8 sd_status;
+ int handled = 0;
list_for_each_entry(azx_dev, &bus->stream_list, list) {
if (status & azx_dev->sd_int_sta_mask) {
sd_status = snd_hdac_stream_readb(azx_dev, SD_STS);
snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK);
+ handled |= 1 << azx_dev->index;
if (!azx_dev->substream || !azx_dev->running ||
!(sd_status & SD_INT_COMPLETE))
continue;
@@ -445,6 +449,7 @@ void snd_hdac_bus_handle_stream_irq(struct hdac_bus *bus, unsigned int status,
ack(bus, azx_dev);
}
}
+ return handled;
}
EXPORT_SYMBOL_GPL(snd_hdac_bus_handle_stream_irq);
diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 37cf9cee9835..27de8015717d 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -930,6 +930,8 @@ irqreturn_t azx_interrupt(int irq, void *dev_id)
struct azx *chip = dev_id;
struct hdac_bus *bus = azx_bus(chip);
u32 status;
+ bool active, handled = false;
+ int repeat = 0; /* count for avoiding endless loop */
#ifdef CONFIG_PM
if (azx_has_pm_runtime(chip))
@@ -939,33 +941,36 @@ irqreturn_t azx_interrupt(int irq, void *dev_id)
spin_lock(&bus->reg_lock);
- if (chip->disabled) {
- spin_unlock(&bus->reg_lock);
- return IRQ_NONE;
- }
-
- status = azx_readl(chip, INTSTS);
- if (status == 0 || status == 0xffffffff) {
- spin_unlock(&bus->reg_lock);
- return IRQ_NONE;
- }
+ if (chip->disabled)
+ goto unlock;
- snd_hdac_bus_handle_stream_irq(bus, status, stream_update);
+ do {
+ status = azx_readl(chip, INTSTS);
+ if (status == 0 || status == 0xffffffff)
+ break;
- /* clear rirb int */
- status = azx_readb(chip, RIRBSTS);
- if (status & RIRB_INT_MASK) {
- if (status & RIRB_INT_RESPONSE) {
- if (chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND)
- udelay(80);
- snd_hdac_bus_update_rirb(bus);
+ handled = true;
+ active = false;
+ if (snd_hdac_bus_handle_stream_irq(bus, status, stream_update))
+ active = true;
+
+ /* clear rirb int */
+ status = azx_readb(chip, RIRBSTS);
+ if (status & RIRB_INT_MASK) {
+ active = true;
+ if (status & RIRB_INT_RESPONSE) {
+ if (chip->driver_caps & AZX_DCAPS_CTX_WORKAROUND)
+ udelay(80);
+ snd_hdac_bus_update_rirb(bus);
+ }
+ azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
}
- azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
- }
+ } while (active && ++repeat < 10);
+ unlock:
spin_unlock(&bus->reg_lock);
- return IRQ_HANDLED;
+ return IRQ_RETVAL(handled);
}
EXPORT_SYMBOL_GPL(azx_interrupt);
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index ce6b97f31390..e5240cb3749f 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -363,7 +363,10 @@ enum {
((pci)->device == 0x0d0c) || \
((pci)->device == 0x160c))
-#define IS_BROXTON(pci) ((pci)->device == 0x5a98)
+#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170)
+#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
+#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
+#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci))
static char *driver_short_names[] = {
[AZX_DRIVER_ICH] = "HDA Intel",
@@ -540,13 +543,13 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset)
if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
snd_hdac_set_codec_wakeup(bus, true);
- if (IS_BROXTON(pci)) {
+ if (IS_SKL_PLUS(pci)) {
pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
val = val & ~INTEL_HDA_CGCTL_MISCBDCGE;
pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
}
azx_init_chip(chip, full_reset);
- if (IS_BROXTON(pci)) {
+ if (IS_SKL_PLUS(pci)) {
pci_read_config_dword(pci, INTEL_HDA_CGCTL, &val);
val = val | INTEL_HDA_CGCTL_MISCBDCGE;
pci_write_config_dword(pci, INTEL_HDA_CGCTL, val);
@@ -555,7 +558,7 @@ static void hda_intel_init_chip(struct azx *chip, bool full_reset)
snd_hdac_set_codec_wakeup(bus, false);
/* reduce dma latency to avoid noise */
- if (IS_BROXTON(pci))
+ if (IS_BXT(pci))
bxt_reduce_dma_latency(chip);
}
@@ -977,11 +980,6 @@ static int azx_resume(struct device *dev)
/* put codec down to D3 at hibernation for Intel SKL+;
* otherwise BIOS may still access the codec and screw up the driver
*/
-#define IS_SKL(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0xa170)
-#define IS_SKL_LP(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x9d70)
-#define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
-#define IS_SKL_PLUS(pci) (IS_SKL(pci) || IS_SKL_LP(pci) || IS_BXT(pci))
-
static int azx_freeze_noirq(struct device *dev)
{
struct pci_dev *pci = to_pci_dev(dev);
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 8ee78dbd4c60..bcbc4ee10130 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -2477,13 +2477,6 @@ static int patch_generic_hdmi(struct hda_codec *codec)
is_broxton(codec))
codec->core.link_power_control = 1;
- if (codec_has_acomp(codec)) {
- codec->depop_delay = 0;
- spec->i915_audio_ops.audio_ptr = codec;
- spec->i915_audio_ops.pin_eld_notify = intel_pin_eld_notify;
- snd_hdac_i915_register_notifier(&spec->i915_audio_ops);
- }
-
if (hdmi_parse_codec(codec) < 0) {
if (spec->i915_bound)
snd_hdac_i915_exit(&codec->bus->core);
@@ -2505,6 +2498,18 @@ static int patch_generic_hdmi(struct hda_codec *codec)
init_channel_allocations();
+ if (codec_has_acomp(codec)) {
+ codec->depop_delay = 0;
+ spec->i915_audio_ops.audio_ptr = codec;
+ /* intel_audio_codec_enable() or intel_audio_codec_disable()
+ * will call pin_eld_notify with using audio_ptr pointer
+ * We need make sure audio_ptr is really setup
+ */
+ wmb();
+ spec->i915_audio_ops.pin_eld_notify = intel_pin_eld_notify;
+ snd_hdac_i915_register_notifier(&spec->i915_audio_ops);
+ }
+
return 0;
}
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index efd4980cffb8..93d2156b6241 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -3801,6 +3801,10 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
static void alc_headset_mode_default(struct hda_codec *codec)
{
+ static struct coef_fw coef0225[] = {
+ UPDATE_COEF(0x45, 0x3f<<10, 0x34<<10),
+ {}
+ };
static struct coef_fw coef0255[] = {
WRITE_COEF(0x45, 0xc089),
WRITE_COEF(0x45, 0xc489),
@@ -3842,6 +3846,9 @@ static void alc_headset_mode_default(struct hda_codec *codec)
};
switch (codec->core.vendor_id) {
+ case 0x10ec0225:
+ alc_process_coef_fw(codec, coef0225);
+ break;
case 0x10ec0255:
case 0x10ec0256:
alc_process_coef_fw(codec, coef0255);
@@ -4749,6 +4756,9 @@ enum {
ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE,
ALC293_FIXUP_LENOVO_SPK_NOISE,
ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY,
+ ALC255_FIXUP_DELL_SPK_NOISE,
+ ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
+ ALC280_FIXUP_HP_HEADSET_MIC,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -5368,6 +5378,29 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc233_fixup_lenovo_line2_mic_hotkey,
},
+ [ALC255_FIXUP_DELL_SPK_NOISE] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_disable_aamix,
+ .chained = true,
+ .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+ },
+ [ALC225_FIXUP_DELL1_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_VERBS,
+ .v.verbs = (const struct hda_verb[]) {
+ /* Disable pass-through path for FRONT 14h */
+ { 0x20, AC_VERB_SET_COEF_INDEX, 0x36 },
+ { 0x20, AC_VERB_SET_PROC_COEF, 0x57d7 },
+ {}
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
+ },
+ [ALC280_FIXUP_HP_HEADSET_MIC] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc_fixup_disable_aamix,
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MIC,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5379,6 +5412,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x0740, "Acer AO725", ALC271_FIXUP_HP_GATE_MIC_JACK),
SND_PCI_QUIRK(0x1025, 0x0742, "Acer AO756", ALC271_FIXUP_HP_GATE_MIC_JACK),
+ SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK),
@@ -5410,6 +5444,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+ SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -5470,6 +5505,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+ SND_PCI_QUIRK(0x103c, 0x221c, "HP EliteBook 755 G2", ALC280_FIXUP_HP_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -5638,10 +5674,10 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
{0x21, 0x03211020}
static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
- SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+ SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC225_STANDARD_PINS,
{0x14, 0x901701a0}),
- SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
+ SND_HDA_PIN_QUIRK(0x10ec0225, 0x1028, "Dell", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE,
ALC225_STANDARD_PINS,
{0x14, 0x901701b0}),
SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c
index 2875b4f6d8c9..7c8941b8b2de 100644
--- a/sound/pci/rme9652/hdsp.c
+++ b/sound/pci/rme9652/hdsp.c
@@ -2879,7 +2879,7 @@ static int snd_hdsp_get_dds_offset(struct snd_kcontrol *kcontrol, struct snd_ctl
{
struct hdsp *hdsp = snd_kcontrol_chip(kcontrol);
- ucontrol->value.enumerated.item[0] = hdsp_dds_offset(hdsp);
+ ucontrol->value.integer.value[0] = hdsp_dds_offset(hdsp);
return 0;
}
@@ -2891,7 +2891,7 @@ static int snd_hdsp_put_dds_offset(struct snd_kcontrol *kcontrol, struct snd_ctl
if (!snd_hdsp_use_is_exclusive(hdsp))
return -EBUSY;
- val = ucontrol->value.enumerated.item[0];
+ val = ucontrol->value.integer.value[0];
spin_lock_irq(&hdsp->lock);
if (val != hdsp_dds_offset(hdsp))
change = (hdsp_set_dds_offset(hdsp, val) == 0) ? 1 : 0;
diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c
index 8bc8016c173d..a4a999a0317e 100644
--- a/sound/pci/rme9652/hdspm.c
+++ b/sound/pci/rme9652/hdspm.c
@@ -1601,6 +1601,9 @@ static void hdspm_set_dds_value(struct hdspm *hdspm, int rate)
{
u64 n;
+ if (snd_BUG_ON(rate <= 0))
+ return;
+
if (rate >= 112000)
rate /= 4;
else if (rate >= 56000)
@@ -2215,6 +2218,8 @@ static int hdspm_get_system_sample_rate(struct hdspm *hdspm)
} else {
/* slave mode, return external sample rate */
rate = hdspm_external_sample_rate(hdspm);
+ if (!rate)
+ rate = hdspm->system_sample_rate;
}
}
@@ -2260,8 +2265,11 @@ static int snd_hdspm_put_system_sample_rate(struct snd_kcontrol *kcontrol,
ucontrol)
{
struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
+ int rate = ucontrol->value.integer.value[0];
- hdspm_set_dds_value(hdspm, ucontrol->value.enumerated.item[0]);
+ if (rate < 27000 || rate > 207000)
+ return -EINVAL;
+ hdspm_set_dds_value(hdspm, ucontrol->value.integer.value[0]);
return 0;
}
@@ -4449,7 +4457,7 @@ static int snd_hdspm_get_tco_word_term(struct snd_kcontrol *kcontrol,
{
struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
- ucontrol->value.enumerated.item[0] = hdspm->tco->term;
+ ucontrol->value.integer.value[0] = hdspm->tco->term;
return 0;
}
@@ -4460,8 +4468,8 @@ static int snd_hdspm_put_tco_word_term(struct snd_kcontrol *kcontrol,
{
struct hdspm *hdspm = snd_kcontrol_chip(kcontrol);
- if (hdspm->tco->term != ucontrol->value.enumerated.item[0]) {
- hdspm->tco->term = ucontrol->value.enumerated.item[0];
+ if (hdspm->tco->term != ucontrol->value.integer.value[0]) {
+ hdspm->tco->term = ucontrol->value.integer.value[0];
hdspm_tco_write(hdspm);
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 4f6ce1cac8e2..c458d60d5030 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1124,6 +1124,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
case USB_ID(0x045E, 0x076F): /* MS Lifecam HD-6000 */
case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
+ case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 90bd2ea41032..b3281dcd4a5d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -217,13 +217,16 @@ static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
return rc;
}
+#define NFIT_TEST_ARS_RECORDS 4
+
static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
unsigned int buf_len)
{
if (buf_len < sizeof(*nd_cmd))
return -EINVAL;
- nd_cmd->max_ars_out = 256;
+ nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+ + NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record);
nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
return 0;
@@ -246,7 +249,8 @@ static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd,
if (buf_len < sizeof(*nd_cmd))
return -EINVAL;
- nd_cmd->out_length = 256;
+ nd_cmd->out_length = sizeof(struct nd_cmd_ars_status);
+ /* TODO: emit error records */
nd_cmd->num_records = 0;
nd_cmd->address = 0;
nd_cmd->length = -1ULL;
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance.tc b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
index 773e276ff90b..1e1abe0ad354 100644
--- a/tools/testing/selftests/ftrace/test.d/instances/instance.tc
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
@@ -39,28 +39,23 @@ instance_slam() {
}
instance_slam &
-x=`jobs -l`
-p1=`echo $x | cut -d' ' -f2`
+p1=$!
echo $p1
instance_slam &
-x=`jobs -l | tail -1`
-p2=`echo $x | cut -d' ' -f2`
+p2=$!
echo $p2
instance_slam &
-x=`jobs -l | tail -1`
-p3=`echo $x | cut -d' ' -f2`
+p3=$!
echo $p3
instance_slam &
-x=`jobs -l | tail -1`
-p4=`echo $x | cut -d' ' -f2`
+p4=$!
echo $p4
instance_slam &
-x=`jobs -l | tail -1`
-p5=`echo $x | cut -d' ' -f2`
+p5=$!
echo $p5
ls -lR >/dev/null
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 043032c6a5a4..00429b392c61 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1875,8 +1875,8 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-
- int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
+ int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
+ int sz = nr_longs * sizeof(unsigned long);
vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 353159922456..db2dd3335c6a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -172,7 +172,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
* do alloc nowait since if we are going to sleep anyway we
* may as well sleep faulting in page
*/
- work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT);
+ work = kmem_cache_zalloc(async_pf_cache, GFP_NOWAIT | __GFP_NOWARN);
if (!work)
return 0;