aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-class-net24
-rw-r--r--Documentation/devicetree/bindings/net/can/rcar_can.txt7
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt2
-rw-r--r--Documentation/networking/00-INDEX2
-rw-r--r--Documentation/networking/can.rst1437
-rw-r--r--Documentation/networking/can.txt1308
-rw-r--r--Documentation/networking/filter.txt2
-rw-r--r--Documentation/networking/index.rst1
-rw-r--r--Documentation/networking/pktgen.txt19
-rw-r--r--Documentation/networking/xfrm_device.txt3
-rw-r--r--Documentation/virtual/kvm/api.txt46
-rw-r--r--Documentation/x86/pti.txt2
-rw-r--r--MAINTAINERS15
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/kernel/sys_sio.c35
-rw-r--r--arch/alpha/lib/ev6-memset.S12
-rw-r--r--arch/arm/boot/dts/imx6q-b450v3.dts52
-rw-r--r--arch/arm/boot/dts/imx6q-b650v3.dts52
-rw-r--r--arch/arm/boot/dts/imx6q-b850v3.dts75
-rw-r--r--arch/arm/boot/dts/imx6q-bx50v3.dtsi62
-rw-r--r--arch/arm/net/bpf_jit_32.c8
-rw-r--r--arch/arm64/kvm/handle_exit.c4
-rw-r--r--arch/arm64/net/bpf_jit_comp.c13
-rw-r--r--arch/mips/Kconfig12
-rw-r--r--arch/mips/Kconfig.debug14
-rw-r--r--arch/mips/ar7/platform.c2
-rw-r--r--arch/mips/ath25/devices.c2
-rw-r--r--arch/mips/kernel/mips-cm.c1
-rw-r--r--arch/mips/lib/Makefile3
-rw-r--r--arch/mips/lib/libgcc.h17
-rw-r--r--arch/mips/lib/multi3.c54
-rw-r--r--arch/mips/mm/uasm-micromips.c2
-rw-r--r--arch/mips/net/ebpf_jit.c29
-rw-r--r--arch/mips/ralink/timer.c4
-rw-r--r--arch/mips/rb532/Makefile4
-rw-r--r--arch/mips/rb532/devices.c4
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h25
-rw-r--r--arch/powerpc/kvm/powerpc.c131
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c8
-rw-r--r--arch/s390/include/asm/kvm_host.h3
-rw-r--r--arch/s390/include/uapi/asm/kvm.h5
-rw-r--r--arch/s390/kvm/kvm-s390.c12
-rw-r--r--arch/s390/kvm/vsie.c10
-rw-r--r--arch/s390/net/bpf_jit_comp.c10
-rw-r--r--arch/sparc/crypto/Makefile2
-rw-r--r--arch/sparc/net/bpf_jit_comp_64.c18
-rw-r--r--arch/x86/entry/entry_64.S2
-rw-r--r--arch/x86/include/asm/nospec-branch.h10
-rw-r--r--arch/x86/include/asm/traps.h1
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c5
-rw-r--r--arch/x86/kernel/ftrace_64.S24
-rw-r--r--arch/x86/kernel/kprobes/opt.c23
-rw-r--r--arch/x86/kernel/process.c25
-rw-r--r--arch/x86/kernel/unwind_orc.c48
-rw-r--r--arch/x86/kernel/vmlinux.lds.S6
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/lib/retpoline.S5
-rw-r--r--arch/x86/mm/mem_encrypt.c2
-rw-r--r--arch/x86/net/bpf_jit_comp.c20
-rw-r--r--arch/x86/pci/fixup.c32
-rw-r--r--drivers/base/property.c104
-rw-r--r--drivers/bluetooth/btbcm.c1
-rw-r--r--drivers/bluetooth/btintel.c155
-rw-r--r--drivers/bluetooth/btintel.h33
-rw-r--r--drivers/bluetooth/btusb.c133
-rw-r--r--drivers/bluetooth/hci_bcm.c5
-rw-r--r--drivers/bluetooth/hci_intel.c186
-rw-r--r--drivers/md/dm-crypt.c20
-rw-r--r--drivers/md/dm-integrity.c49
-rw-r--r--drivers/md/dm-thin-metadata.c6
-rw-r--r--drivers/md/persistent-data/dm-btree.c19
-rw-r--r--drivers/net/caif/caif_hsi.c1
-rw-r--r--drivers/net/can/dev.c2
-rw-r--r--drivers/net/can/vcan.c2
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c6
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c3
-rw-r--r--drivers/net/ethernet/chelsio/Kconfig1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/Makefile3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c51
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h13
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c105
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c9
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/sge.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c58
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_regs.h11
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h16
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/adapter.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/sge.c15
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c28
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c8
-rw-r--r--drivers/net/ethernet/freescale/fec.h5
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c8
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c52
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h5
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c37
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h47
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c456
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c5
-rw-r--r--drivers/net/ethernet/ibm/emac/core.c6
-rw-r--r--drivers/net/ethernet/ibm/emac/emac.h4
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c73
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h2
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_iov.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c54
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h67
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.c17
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h20
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_client.c36
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_client.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c47
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c186
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c214
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_nvm.c141
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h10
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_status.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c66
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_type.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq.c15
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h20
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_common.c2
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_status.h1
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c66
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.h2
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_type.h26
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf.h5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c28
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c32
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c35
-rw-r--r--drivers/net/ethernet/intel/igb/igb.h1
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c32
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c72
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c9
-rw-r--r--drivers/net/ethernet/intel/ixgbe/Makefile1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h33
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c39
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c941
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h93
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c59
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_type.h22
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c2
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ethtool.c3
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf.h16
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c362
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/vf.c17
-rw-r--r--drivers/net/ethernet/marvell/mvpp2.c206
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c253
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c33
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c24
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c7
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.h9
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_main.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c16
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c13
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_rdma.c31
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac-mac.h3
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c7
-rw-r--r--drivers/net/ethernet/rocker/rocker_ofdpa.c1
-rw-r--r--drivers/net/ethernet/sfc/ef10.c158
-rw-r--r--drivers/net/ethernet/sfc/efx.c11
-rw-r--r--drivers/net/ethernet/sfc/farch.c26
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h21
-rw-r--r--drivers/net/ethernet/sfc/nic.h4
-rw-r--r--drivers/net/ethernet/sfc/ptp.c366
-rw-r--r--drivers/net/ethernet/sfc/tx.c21
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/enh_desc.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c2
-rw-r--r--drivers/net/hyperv/rndis_filter.c11
-rw-r--r--drivers/net/macsec.c12
-rw-r--r--drivers/net/netdevsim/Makefile6
-rw-r--r--drivers/net/netdevsim/bpf.c38
-rw-r--r--drivers/net/netdevsim/netdevsim.h28
-rw-r--r--drivers/net/phy/sfp-bus.c2
-rw-r--r--drivers/net/ppp/pppoe.c11
-rw-r--r--drivers/net/tun.c5
-rw-r--r--drivers/net/usb/usbnet.c8
-rw-r--r--drivers/net/virtio_net.c191
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c2
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c94
-rw-r--r--drivers/net/wireless/mac80211_hwsim.h68
-rw-r--r--drivers/scsi/libsas/sas_scsi_host.c17
-rw-r--r--drivers/tty/serdev/core.c12
-rw-r--r--drivers/tty/serdev/serdev-ttyport.c24
-rw-r--r--drivers/vhost/vhost.c6
-rw-r--r--fs/nfsd/auth.c6
-rw-r--r--fs/orangefs/devorangefs-req.c3
-rw-r--r--fs/orangefs/waitqueue.c4
-rw-r--r--include/linux/acpi.h3
-rw-r--r--include/linux/filter.h12
-rw-r--r--include/linux/ftrace.h2
-rw-r--r--include/linux/inetdevice.h2
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/netdevice.h18
-rw-r--r--include/linux/property.h11
-rw-r--r--include/linux/serdev.h10
-rw-r--r--include/linux/swapops.h21
-rw-r--r--include/linux/tcp.h11
-rw-r--r--include/linux/vermagic.h8
-rw-r--r--include/net/erspan.h127
-rw-r--r--include/net/ipv6.h1
-rw-r--r--include/net/pkt_cls.h50
-rw-r--r--include/net/route.h2
-rw-r--r--include/net/sch_generic.h3
-rw-r--r--include/net/tc_act/tc_csum.h16
-rw-r--r--include/net/tcp.h42
-rw-r--r--include/net/wext.h4
-rw-r--r--include/net/xfrm.h12
-rw-r--r--include/uapi/linux/bpf.h84
-rw-r--r--include/uapi/linux/erspan.h52
-rw-r--r--include/uapi/linux/if_link.h2
-rw-r--r--include/uapi/linux/if_macsec.h6
-rw-r--r--include/uapi/linux/kvm.h4
-rw-r--r--include/uapi/linux/openvswitch.h1
-rw-r--r--kernel/bpf/core.c258
-rw-r--r--kernel/bpf/lpm_trie.c28
-rw-r--r--kernel/bpf/syscall.c5
-rw-r--r--kernel/bpf/verifier.c62
-rw-r--r--kernel/irq/matrix.c20
-rw-r--r--kernel/trace/ftrace.c29
-rw-r--r--kernel/trace/trace.c34
-rw-r--r--kernel/trace/trace_events_trigger.c13
-rw-r--r--kernel/trace/trace_functions.c49
-rw-r--r--lib/test_bpf.c8
-rw-r--r--mm/page_vma_mapped.c66
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/can/Kconfig2
-rw-r--r--net/core/dev.c22
-rw-r--r--net/core/dev_ioctl.c132
-rw-r--r--net/core/devlink.c1
-rw-r--r--net/core/filter.c303
-rw-r--r--net/core/link_watch.c2
-rw-r--r--net/core/net-sysfs.c25
-rw-r--r--net/core/net_namespace.c62
-rw-r--r--net/core/pktgen.c266
-rw-r--r--net/core/rtnetlink.c13
-rw-r--r--net/dsa/dsa2.c7
-rw-r--r--net/dsa/dsa_priv.h4
-rw-r--r--net/dsa/legacy.c4
-rw-r--r--net/dsa/port.c103
-rw-r--r--net/ipv4/af_inet.c28
-rw-r--r--net/ipv4/devinet.c57
-rw-r--r--net/ipv4/esp4_offload.c3
-rw-r--r--net/ipv4/fib_frontend.c8
-rw-r--r--net/ipv4/igmp.c2
-rw-r--r--net/ipv4/ip_gre.c38
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ip_tunnel.c13
-rw-r--r--net/ipv4/ipconfig.c47
-rw-r--r--net/ipv4/raw.c15
-rw-r--r--net/ipv4/tcp.c26
-rw-r--r--net/ipv4/tcp_nv.c2
-rw-r--r--net/ipv4/tcp_offload.c3
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c7
-rw-r--r--net/ipv4/udp.c15
-rw-r--r--net/ipv4/udp_offload.c3
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c1
-rw-r--r--net/ipv6/esp6_offload.c3
-rw-r--r--net/ipv6/ip6_gre.c36
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/route.c146
-rw-r--r--net/ipv6/tcpv6_offload.c3
-rw-r--r--net/ipv6/udp_offload.c3
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c1
-rw-r--r--net/kcm/kcmsock.c25
-rw-r--r--net/mac80211/debugfs_sta.c4
-rw-r--r--net/openvswitch/flow_netlink.c52
-rw-r--r--net/rds/tcp.c5
-rw-r--r--net/rds/tcp.h2
-rw-r--r--net/rds/tcp_send.c4
-rw-r--r--net/sched/act_csum.c66
-rw-r--r--net/sched/cls_api.c15
-rw-r--r--net/sched/cls_basic.c2
-rw-r--r--net/sched/cls_bpf.c36
-rw-r--r--net/sched/cls_cgroup.c3
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/cls_flower.c30
-rw-r--r--net/sched/cls_fw.c2
-rw-r--r--net/sched/cls_matchall.c19
-rw-r--r--net/sched/cls_route.c2
-rw-r--r--net/sched/cls_rsvp.h2
-rw-r--r--net/sched/cls_tcindex.c3
-rw-r--r--net/sched/cls_u32.c52
-rw-r--r--net/sched/em_nbyte.c2
-rw-r--r--net/sched/sch_generic.c4
-rw-r--r--net/sctp/offload.c3
-rw-r--r--net/sctp/socket.c3
-rw-r--r--net/smc/af_smc.c207
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_cdc.c40
-rw-r--r--net/smc/smc_cdc.h1
-rw-r--r--net/smc/smc_close.c206
-rw-r--r--net/smc/smc_close.h1
-rw-r--r--net/smc/smc_core.c17
-rw-r--r--net/smc/smc_diag.c6
-rw-r--r--net/smc/smc_ib.c38
-rw-r--r--net/smc/smc_tx.c23
-rw-r--r--net/smc/smc_wr.c50
-rw-r--r--net/smc/smc_wr.h2
-rw-r--r--net/socket.c271
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/wireless/wext-core.c13
-rw-r--r--net/xfrm/xfrm_device.c12
-rw-r--r--net/xfrm/xfrm_replay.c2
-rw-r--r--net/xfrm/xfrm_state.c12
-rw-r--r--net/xfrm/xfrm_user.c18
-rw-r--r--samples/bpf/Makefile5
-rw-r--r--samples/sockmap/sockmap_user.c392
-rw-r--r--tools/include/uapi/linux/bpf.h86
-rw-r--r--tools/testing/selftests/bpf/Makefile6
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h2
-rwxr-xr-xtools/testing/selftests/bpf/tcp_client.py51
-rwxr-xr-xtools/testing/selftests/bpf/tcp_server.py83
-rw-r--r--tools/testing/selftests/bpf/test_align.c30
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c2
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c95
-rw-r--r--tools/testing/selftests/bpf/test_maps.c32
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py216
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf.h16
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_kern.c115
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c126
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c344
-rw-r--r--virt/kvm/arm/mmu.c2
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c8
-rw-r--r--virt/kvm/arm/vgic/vgic-v4.c2
355 files changed, 11013 insertions, 4596 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 6856da99b6f7..2f1788111cd9 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -259,3 +259,27 @@ Contact: netdev@vger.kernel.org
Description:
Symbolic link to the PHY device this network device is attached
to.
+
+What: /sys/class/net/<iface>/carrier_changes
+Date: Mar 2014
+KernelVersion: 3.15
+Contact: netdev@vger.kernel.org
+Description:
+ 32-bit unsigned integer counting the number of times the link has
+ seen a change from UP to DOWN and vice versa
+
+What: /sys/class/net/<iface>/carrier_up_count
+Date: Jan 2018
+KernelVersion: 4.16
+Contact: netdev@vger.kernel.org
+Description:
+ 32-bit unsigned integer counting the number of times the link has
+ been up
+
+What: /sys/class/net/<iface>/carrier_down_count
+Date: Jan 2018
+KernelVersion: 4.16
+Contact: netdev@vger.kernel.org
+Description:
+ 32-bit unsigned integer counting the number of times the link has
+ been down
diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
index 06bb7cc334c8..94a7f33ac5e9 100644
--- a/Documentation/devicetree/bindings/net/can/rcar_can.txt
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -2,7 +2,9 @@ Renesas R-Car CAN controller Device Tree Bindings
-------------------------------------------------
Required properties:
-- compatible: "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
+- compatible: "renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC.
+ "renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC.
+ "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
"renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
"renesas,can-r8a7790" if CAN controller is a part of R8A7790 SoC.
"renesas,can-r8a7791" if CAN controller is a part of R8A7791 SoC.
@@ -12,7 +14,8 @@ Required properties:
"renesas,can-r8a7795" if CAN controller is a part of R8A7795 SoC.
"renesas,can-r8a7796" if CAN controller is a part of R8A7796 SoC.
"renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device.
- "renesas,rcar-gen2-can" for a generic R-Car Gen2 compatible device.
+ "renesas,rcar-gen2-can" for a generic R-Car Gen2 or RZ/G1
+ compatible device.
"renesas,rcar-gen3-can" for a generic R-Car Gen3 compatible device.
When compatible with the generic version, nodes must list the
SoC-specific version corresponding to the platform first
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
index 4ccb2cd5df94..d096cf461d81 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
@@ -195,4 +195,4 @@ External interrupts:
fsl,mpc5200-mscan nodes
-----------------------
-See file can.txt in this directory.
+See file Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index f5d642c01dd3..2b89d91b376f 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -36,8 +36,6 @@ bonding.txt
- Linux Ethernet Bonding Driver HOWTO: link aggregation in Linux.
bridge.txt
- where to get user space programs for ethernet bridging with Linux.
-can.txt
- - documentation on CAN protocol family.
cdc_mbim.txt
- 3G/LTE USB modem (Mobile Broadband Interface Model)
checksum-offloads.txt
diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
new file mode 100644
index 000000000000..d23c51abf8c6
--- /dev/null
+++ b/Documentation/networking/can.rst
@@ -0,0 +1,1437 @@
+===================================
+SocketCAN - Controller Area Network
+===================================
+
+Overview / What is SocketCAN
+============================
+
+The socketcan package is an implementation of CAN protocols
+(Controller Area Network) for Linux. CAN is a networking technology
+which has widespread use in automation, embedded devices, and
+automotive fields. While there have been other CAN implementations
+for Linux based on character devices, SocketCAN uses the Berkeley
+socket API, the Linux network stack and implements the CAN device
+drivers as network interfaces. The CAN socket API has been designed
+as similar as possible to the TCP/IP protocols to allow programmers,
+familiar with network programming, to easily learn how to use CAN
+sockets.
+
+
+.. _socketcan-motivation:
+
+Motivation / Why Using the Socket API
+=====================================
+
+There have been CAN implementations for Linux before SocketCAN so the
+question arises, why we have started another project. Most existing
+implementations come as a device driver for some CAN hardware, they
+are based on character devices and provide comparatively little
+functionality. Usually, there is only a hardware-specific device
+driver which provides a character device interface to send and
+receive raw CAN frames, directly to/from the controller hardware.
+Queueing of frames and higher-level transport protocols like ISO-TP
+have to be implemented in user space applications. Also, most
+character-device implementations support only one single process to
+open the device at a time, similar to a serial interface. Exchanging
+the CAN controller requires employment of another device driver and
+often the need for adaption of large parts of the application to the
+new driver's API.
+
+SocketCAN was designed to overcome all of these limitations. A new
+protocol family has been implemented which provides a socket interface
+to user space applications and which builds upon the Linux network
+layer, enabling use all of the provided queueing functionality. A device
+driver for CAN controller hardware registers itself with the Linux
+network layer as a network device, so that CAN frames from the
+controller can be passed up to the network layer and on to the CAN
+protocol family module and also vice-versa. Also, the protocol family
+module provides an API for transport protocol modules to register, so
+that any number of transport protocols can be loaded or unloaded
+dynamically. In fact, the can core module alone does not provide any
+protocol and cannot be used without loading at least one additional
+protocol module. Multiple sockets can be opened at the same time,
+on different or the same protocol module and they can listen/send
+frames on different or the same CAN IDs. Several sockets listening on
+the same interface for frames with the same CAN ID are all passed the
+same received matching CAN frames. An application wishing to
+communicate using a specific transport protocol, e.g. ISO-TP, just
+selects that protocol when opening the socket, and then can read and
+write application data byte streams, without having to deal with
+CAN-IDs, frames, etc.
+
+Similar functionality visible from user-space could be provided by a
+character device, too, but this would lead to a technically inelegant
+solution for a couple of reasons:
+
+* **Intricate usage:** Instead of passing a protocol argument to
+ socket(2) and using bind(2) to select a CAN interface and CAN ID, an
+ application would have to do all these operations using ioctl(2)s.
+
+* **Code duplication:** A character device cannot make use of the Linux
+ network queueing code, so all that code would have to be duplicated
+ for CAN networking.
+
+* **Abstraction:** In most existing character-device implementations, the
+ hardware-specific device driver for a CAN controller directly
+ provides the character device for the application to work with.
+ This is at least very unusual in Unix systems for both, char and
+ block devices. For example you don't have a character device for a
+ certain UART of a serial interface, a certain sound chip in your
+ computer, a SCSI or IDE controller providing access to your hard
+ disk or tape streamer device. Instead, you have abstraction layers
+ which provide a unified character or block device interface to the
+ application on the one hand, and a interface for hardware-specific
+ device drivers on the other hand. These abstractions are provided
+ by subsystems like the tty layer, the audio subsystem or the SCSI
+ and IDE subsystems for the devices mentioned above.
+
+ The easiest way to implement a CAN device driver is as a character
+ device without such a (complete) abstraction layer, as is done by most
+ existing drivers. The right way, however, would be to add such a
+ layer with all the functionality like registering for certain CAN
+ IDs, supporting several open file descriptors and (de)multiplexing
+ CAN frames between them, (sophisticated) queueing of CAN frames, and
+ providing an API for device drivers to register with. However, then
+ it would be no more difficult, or may be even easier, to use the
+ networking framework provided by the Linux kernel, and this is what
+ SocketCAN does.
+
+The use of the networking framework of the Linux kernel is just the
+natural and most appropriate way to implement CAN for Linux.
+
+
+.. _socketcan-concept:
+
+SocketCAN Concept
+=================
+
+As described in :ref:`socketcan-motivation` the main goal of SocketCAN is to
+provide a socket interface to user space applications which builds
+upon the Linux network layer. In contrast to the commonly known
+TCP/IP and ethernet networking, the CAN bus is a broadcast-only(!)
+medium that has no MAC-layer addressing like ethernet. The CAN-identifier
+(can_id) is used for arbitration on the CAN-bus. Therefore the CAN-IDs
+have to be chosen uniquely on the bus. When designing a CAN-ECU
+network the CAN-IDs are mapped to be sent by a specific ECU.
+For this reason a CAN-ID can be treated best as a kind of source address.
+
+
+.. _socketcan-receive-lists:
+
+Receive Lists
+-------------
+
+The network transparent access of multiple applications leads to the
+problem that different applications may be interested in the same
+CAN-IDs from the same CAN network interface. The SocketCAN core
+module - which implements the protocol family CAN - provides several
+high efficient receive lists for this reason. If e.g. a user space
+application opens a CAN RAW socket, the raw protocol module itself
+requests the (range of) CAN-IDs from the SocketCAN core that are
+requested by the user. The subscription and unsubscription of
+CAN-IDs can be done for specific CAN interfaces or for all(!) known
+CAN interfaces with the can_rx_(un)register() functions provided to
+CAN protocol modules by the SocketCAN core (see :ref:`socketcan-core-module`).
+To optimize the CPU usage at runtime the receive lists are split up
+into several specific lists per device that match the requested
+filter complexity for a given use-case.
+
+
+.. _socketcan-local-loopback1:
+
+Local Loopback of Sent Frames
+-----------------------------
+
+As known from other networking concepts the data exchanging
+applications may run on the same or different nodes without any
+change (except for the according addressing information):
+
+.. code::
+
+ ___ ___ ___ _______ ___
+ | _ | | _ | | _ | | _ _ | | _ |
+ ||A|| ||B|| ||C|| ||A| |B|| ||C||
+ |___| |___| |___| |_______| |___|
+ | | | | |
+ -----------------(1)- CAN bus -(2)---------------
+
+To ensure that application A receives the same information in the
+example (2) as it would receive in example (1) there is need for
+some kind of local loopback of the sent CAN frames on the appropriate
+node.
+
+The Linux network devices (by default) just can handle the
+transmission and reception of media dependent frames. Due to the
+arbitration on the CAN bus the transmission of a low prio CAN-ID
+may be delayed by the reception of a high prio CAN frame. To
+reflect the correct [*]_ traffic on the node the loopback of the sent
+data has to be performed right after a successful transmission. If
+the CAN network interface is not capable of performing the loopback for
+some reason the SocketCAN core can do this task as a fallback solution.
+See :ref:`socketcan-local-loopback1` for details (recommended).
+
+The loopback functionality is enabled by default to reflect standard
+networking behaviour for CAN applications. Due to some requests from
+the RT-SocketCAN group the loopback optionally may be disabled for each
+separate socket. See sockopts from the CAN RAW sockets in :ref:`socketcan-raw-sockets`.
+
+.. [*] you really like to have this when you're running analyser
+ tools like 'candump' or 'cansniffer' on the (same) node.
+
+
+.. _socketcan-network-problem-notifications:
+
+Network Problem Notifications
+-----------------------------
+
+The use of the CAN bus may lead to several problems on the physical
+and media access control layer. Detecting and logging of these lower
+layer problems is a vital requirement for CAN users to identify
+hardware issues on the physical transceiver layer as well as
+arbitration problems and error frames caused by the different
+ECUs. The occurrence of detected errors are important for diagnosis
+and have to be logged together with the exact timestamp. For this
+reason the CAN interface driver can generate so called Error Message
+Frames that can optionally be passed to the user application in the
+same way as other CAN frames. Whenever an error on the physical layer
+or the MAC layer is detected (e.g. by the CAN controller) the driver
+creates an appropriate error message frame. Error messages frames can
+be requested by the user application using the common CAN filter
+mechanisms. Inside this filter definition the (interested) type of
+errors may be selected. The reception of error messages is disabled
+by default. The format of the CAN error message frame is briefly
+described in the Linux header file "include/uapi/linux/can/error.h".
+
+
+How to use SocketCAN
+====================
+
+Like TCP/IP, you first need to open a socket for communicating over a
+CAN network. Since SocketCAN implements a new protocol family, you
+need to pass PF_CAN as the first argument to the socket(2) system
+call. Currently, there are two CAN protocols to choose from, the raw
+socket protocol and the broadcast manager (BCM). So to open a socket,
+you would write::
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+
+and::
+
+ s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
+
+respectively. After the successful creation of the socket, you would
+normally use the bind(2) system call to bind the socket to a CAN
+interface (which is different from TCP/IP due to different addressing
+- see :ref:`socketcan-concept`). After binding (CAN_RAW) or connecting (CAN_BCM)
+the socket, you can read(2) and write(2) from/to the socket or use
+send(2), sendto(2), sendmsg(2) and the recv* counterpart operations
+on the socket as usual. There are also CAN specific socket options
+described below.
+
+The basic CAN frame structure and the sockaddr structure are defined
+in include/linux/can.h:
+
+.. code-block:: C
+
+ struct can_frame {
+ canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
+ __u8 can_dlc; /* frame payload length in byte (0 .. 8) */
+ __u8 __pad; /* padding */
+ __u8 __res0; /* reserved / padding */
+ __u8 __res1; /* reserved / padding */
+ __u8 data[8] __attribute__((aligned(8)));
+ };
+
+The alignment of the (linear) payload data[] to a 64bit boundary
+allows the user to define their own structs and unions to easily access
+the CAN payload. There is no given byteorder on the CAN bus by
+default. A read(2) system call on a CAN_RAW socket transfers a
+struct can_frame to the user space.
+
+The sockaddr_can structure has an interface index like the
+PF_PACKET socket, that also binds to a specific interface:
+
+.. code-block:: C
+
+ struct sockaddr_can {
+ sa_family_t can_family;
+ int can_ifindex;
+ union {
+ /* transport protocol class address info (e.g. ISOTP) */
+ struct { canid_t rx_id, tx_id; } tp;
+
+ /* reserved for future CAN protocols address information */
+ } can_addr;
+ };
+
+To determine the interface index an appropriate ioctl() has to
+be used (example for CAN_RAW sockets without error checking):
+
+.. code-block:: C
+
+ int s;
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+
+ strcpy(ifr.ifr_name, "can0" );
+ ioctl(s, SIOCGIFINDEX, &ifr);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ bind(s, (struct sockaddr *)&addr, sizeof(addr));
+
+ (..)
+
+To bind a socket to all(!) CAN interfaces the interface index must
+be 0 (zero). In this case the socket receives CAN frames from every
+enabled CAN interface. To determine the originating CAN interface
+the system call recvfrom(2) may be used instead of read(2). To send
+on a socket that is bound to 'any' interface sendto(2) is needed to
+specify the outgoing interface.
+
+Reading CAN frames from a bound CAN_RAW socket (see above) consists
+of reading a struct can_frame:
+
+.. code-block:: C
+
+ struct can_frame frame;
+
+ nbytes = read(s, &frame, sizeof(struct can_frame));
+
+ if (nbytes < 0) {
+ perror("can raw socket read");
+ return 1;
+ }
+
+ /* paranoid check ... */
+ if (nbytes < sizeof(struct can_frame)) {
+ fprintf(stderr, "read: incomplete CAN frame\n");
+ return 1;
+ }
+
+ /* do something with the received CAN frame */
+
+Writing CAN frames can be done similarly, with the write(2) system call::
+
+ nbytes = write(s, &frame, sizeof(struct can_frame));
+
+When the CAN interface is bound to 'any' existing CAN interface
+(addr.can_ifindex = 0) it is recommended to use recvfrom(2) if the
+information about the originating CAN interface is needed:
+
+.. code-block:: C
+
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+ socklen_t len = sizeof(addr);
+ struct can_frame frame;
+
+ nbytes = recvfrom(s, &frame, sizeof(struct can_frame),
+ 0, (struct sockaddr*)&addr, &len);
+
+ /* get interface name of the received CAN frame */
+ ifr.ifr_ifindex = addr.can_ifindex;
+ ioctl(s, SIOCGIFNAME, &ifr);
+ printf("Received a CAN frame from interface %s", ifr.ifr_name);
+
+To write CAN frames on sockets bound to 'any' CAN interface the
+outgoing interface has to be defined certainly:
+
+.. code-block:: C
+
+ strcpy(ifr.ifr_name, "can0");
+ ioctl(s, SIOCGIFINDEX, &ifr);
+ addr.can_ifindex = ifr.ifr_ifindex;
+ addr.can_family = AF_CAN;
+
+ nbytes = sendto(s, &frame, sizeof(struct can_frame),
+ 0, (struct sockaddr*)&addr, sizeof(addr));
+
+An accurate timestamp can be obtained with an ioctl(2) call after reading
+a message from the socket:
+
+.. code-block:: C
+
+ struct timeval tv;
+ ioctl(s, SIOCGSTAMP, &tv);
+
+The timestamp has a resolution of one microsecond and is set automatically
+at the reception of a CAN frame.
+
+Remark about CAN FD (flexible data rate) support:
+
+Generally the handling of CAN FD is very similar to the formerly described
+examples. The new CAN FD capable CAN controllers support two different
+bitrates for the arbitration phase and the payload phase of the CAN FD frame
+and up to 64 bytes of payload. This extended payload length breaks all the
+kernel interfaces (ABI) which heavily rely on the CAN frame with fixed eight
+bytes of payload (struct can_frame) like the CAN_RAW socket. Therefore e.g.
+the CAN_RAW socket supports a new socket option CAN_RAW_FD_FRAMES that
+switches the socket into a mode that allows the handling of CAN FD frames
+and (legacy) CAN frames simultaneously (see :ref:`socketcan-rawfd`).
+
+The struct canfd_frame is defined in include/linux/can.h:
+
+.. code-block:: C
+
+ struct canfd_frame {
+ canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
+ __u8 len; /* frame payload length in byte (0 .. 64) */
+ __u8 flags; /* additional flags for CAN FD */
+ __u8 __res0; /* reserved / padding */
+ __u8 __res1; /* reserved / padding */
+ __u8 data[64] __attribute__((aligned(8)));
+ };
+
+The struct canfd_frame and the existing struct can_frame have the can_id,
+the payload length and the payload data at the same offset inside their
+structures. This allows to handle the different structures very similar.
+When the content of a struct can_frame is copied into a struct canfd_frame
+all structure elements can be used as-is - only the data[] becomes extended.
+
+When introducing the struct canfd_frame it turned out that the data length
+code (DLC) of the struct can_frame was used as a length information as the
+length and the DLC has a 1:1 mapping in the range of 0 .. 8. To preserve
+the easy handling of the length information the canfd_frame.len element
+contains a plain length value from 0 .. 64. So both canfd_frame.len and
+can_frame.can_dlc are equal and contain a length information and no DLC.
+For details about the distinction of CAN and CAN FD capable devices and
+the mapping to the bus-relevant data length code (DLC), see :ref:`socketcan-can-fd-driver`.
+
+The length of the two CAN(FD) frame structures define the maximum transfer
+unit (MTU) of the CAN(FD) network interface and skbuff data length. Two
+definitions are specified for CAN specific MTUs in include/linux/can.h:
+
+.. code-block:: C
+
+ #define CAN_MTU (sizeof(struct can_frame)) == 16 => 'legacy' CAN frame
+ #define CANFD_MTU (sizeof(struct canfd_frame)) == 72 => CAN FD frame
+
+
+.. _socketcan-raw-sockets:
+
+RAW Protocol Sockets with can_filters (SOCK_RAW)
+------------------------------------------------
+
+Using CAN_RAW sockets is extensively comparable to the commonly
+known access to CAN character devices. To meet the new possibilities
+provided by the multi user SocketCAN approach, some reasonable
+defaults are set at RAW socket binding time:
+
+- The filters are set to exactly one filter receiving everything
+- The socket only receives valid data frames (=> no error message frames)
+- The loopback of sent CAN frames is enabled (see :ref:`socketcan-local-loopback2`)
+- The socket does not receive its own sent frames (in loopback mode)
+
+These default settings may be changed before or after binding the socket.
+To use the referenced definitions of the socket options for CAN_RAW
+sockets, include <linux/can/raw.h>.
+
+
+.. _socketcan-rawfilter:
+
+RAW socket option CAN_RAW_FILTER
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The reception of CAN frames using CAN_RAW sockets can be controlled
+by defining 0 .. n filters with the CAN_RAW_FILTER socket option.
+
+The CAN filter structure is defined in include/linux/can.h:
+
+.. code-block:: C
+
+ struct can_filter {
+ canid_t can_id;
+ canid_t can_mask;
+ };
+
+A filter matches, when:
+
+.. code-block:: C
+
+ <received_can_id> & mask == can_id & mask
+
+which is analogous to known CAN controllers hardware filter semantics.
+The filter can be inverted in this semantic, when the CAN_INV_FILTER
+bit is set in can_id element of the can_filter structure. In
+contrast to CAN controller hardware filters the user may set 0 .. n
+receive filters for each open socket separately:
+
+.. code-block:: C
+
+ struct can_filter rfilter[2];
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = CAN_SFF_MASK;
+ rfilter[1].can_id = 0x200;
+ rfilter[1].can_mask = 0x700;
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
+
+To disable the reception of CAN frames on the selected CAN_RAW socket:
+
+.. code-block:: C
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, NULL, 0);
+
+To set the filters to zero filters is quite obsolete as to not read
+data causes the raw socket to discard the received CAN frames. But
+having this 'send only' use-case we may remove the receive list in the
+Kernel to save a little (really a very little!) CPU usage.
+
+CAN Filter Usage Optimisation
+.............................
+
+The CAN filters are processed in per-device filter lists at CAN frame
+reception time. To reduce the number of checks that need to be performed
+while walking through the filter lists the CAN core provides an optimized
+filter handling when the filter subscription focusses on a single CAN ID.
+
+For the possible 2048 SFF CAN identifiers the identifier is used as an index
+to access the corresponding subscription list without any further checks.
+For the 2^29 possible EFF CAN identifiers a 10 bit XOR folding is used as
+hash function to retrieve the EFF table index.
+
+To benefit from the optimized filters for single CAN identifiers the
+CAN_SFF_MASK or CAN_EFF_MASK have to be set into can_filter.mask together
+with set CAN_EFF_FLAG and CAN_RTR_FLAG bits. A set CAN_EFF_FLAG bit in the
+can_filter.mask makes clear that it matters whether a SFF or EFF CAN ID is
+subscribed. E.g. in the example from above:
+
+.. code-block:: C
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = CAN_SFF_MASK;
+
+both SFF frames with CAN ID 0x123 and EFF frames with 0xXXXXX123 can pass.
+
+To filter for only 0x123 (SFF) and 0x12345678 (EFF) CAN identifiers the
+filter has to be defined in this way to benefit from the optimized filters:
+
+.. code-block:: C
+
+ struct can_filter rfilter[2];
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_SFF_MASK);
+ rfilter[1].can_id = 0x12345678 | CAN_EFF_FLAG;
+ rfilter[1].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_EFF_MASK);
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
+
+
+RAW Socket Option CAN_RAW_ERR_FILTER
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As described in :ref:`socketcan-network-problem-notifications` the CAN interface driver can generate so
+called Error Message Frames that can optionally be passed to the user
+application in the same way as other CAN frames. The possible
+errors are divided into different error classes that may be filtered
+using the appropriate error mask. To register for every possible
+error condition CAN_ERR_MASK can be used as value for the error mask.
+The values for the error mask are defined in linux/can/error.h:
+
+.. code-block:: C
+
+ can_err_mask_t err_mask = ( CAN_ERR_TX_TIMEOUT | CAN_ERR_BUSOFF );
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_ERR_FILTER,
+ &err_mask, sizeof(err_mask));
+
+
+RAW Socket Option CAN_RAW_LOOPBACK
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To meet multi user needs the local loopback is enabled by default
+(see :ref:`socketcan-local-loopback1` for details). But in some embedded use-cases
+(e.g. when only one application uses the CAN bus) this loopback
+functionality can be disabled (separately for each socket):
+
+.. code-block:: C
+
+ int loopback = 0; /* 0 = disabled, 1 = enabled (default) */
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_LOOPBACK, &loopback, sizeof(loopback));
+
+
+RAW socket option CAN_RAW_RECV_OWN_MSGS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When the local loopback is enabled, all the sent CAN frames are
+looped back to the open CAN sockets that registered for the CAN
+frames' CAN-ID on this given interface to meet the multi user
+needs. The reception of the CAN frames on the same socket that was
+sending the CAN frame is assumed to be unwanted and therefore
+disabled by default. This default behaviour may be changed on
+demand:
+
+.. code-block:: C
+
+ int recv_own_msgs = 1; /* 0 = disabled (default), 1 = enabled */
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
+ &recv_own_msgs, sizeof(recv_own_msgs));
+
+
+.. _socketcan-rawfd:
+
+RAW Socket Option CAN_RAW_FD_FRAMES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+CAN FD support in CAN_RAW sockets can be enabled with a new socket option
+CAN_RAW_FD_FRAMES which is off by default. When the new socket option is
+not supported by the CAN_RAW socket (e.g. on older kernels), switching the
+CAN_RAW_FD_FRAMES option returns the error -ENOPROTOOPT.
+
+Once CAN_RAW_FD_FRAMES is enabled the application can send both CAN frames
+and CAN FD frames. OTOH the application has to handle CAN and CAN FD frames
+when reading from the socket:
+
+.. code-block:: C
+
+ CAN_RAW_FD_FRAMES enabled: CAN_MTU and CANFD_MTU are allowed
+ CAN_RAW_FD_FRAMES disabled: only CAN_MTU is allowed (default)
+
+Example:
+
+.. code-block:: C
+
+ [ remember: CANFD_MTU == sizeof(struct canfd_frame) ]
+
+ struct canfd_frame cfd;
+
+ nbytes = read(s, &cfd, CANFD_MTU);
+
+ if (nbytes == CANFD_MTU) {
+ printf("got CAN FD frame with length %d\n", cfd.len);
+ /* cfd.flags contains valid data */
+ } else if (nbytes == CAN_MTU) {
+ printf("got legacy CAN frame with length %d\n", cfd.len);
+ /* cfd.flags is undefined */
+ } else {
+ fprintf(stderr, "read: invalid CAN(FD) frame\n");
+ return 1;
+ }
+
+ /* the content can be handled independently from the received MTU size */
+
+ printf("can_id: %X data length: %d data: ", cfd.can_id, cfd.len);
+ for (i = 0; i < cfd.len; i++)
+ printf("%02X ", cfd.data[i]);
+
+When reading with size CANFD_MTU only returns CAN_MTU bytes that have
+been received from the socket a legacy CAN frame has been read into the
+provided CAN FD structure. Note that the canfd_frame.flags data field is
+not specified in the struct can_frame and therefore it is only valid in
+CANFD_MTU sized CAN FD frames.
+
+Implementation hint for new CAN applications:
+
+To build a CAN FD aware application use struct canfd_frame as basic CAN
+data structure for CAN_RAW based applications. When the application is
+executed on an older Linux kernel and switching the CAN_RAW_FD_FRAMES
+socket option returns an error: No problem. You'll get legacy CAN frames
+or CAN FD frames and can process them the same way.
+
+When sending to CAN devices make sure that the device is capable to handle
+CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU.
+The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
+
+
+RAW socket option CAN_RAW_JOIN_FILTERS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The CAN_RAW socket can set multiple CAN identifier specific filters that
+lead to multiple filters in the af_can.c filter processing. These filters
+are indenpendent from each other which leads to logical OR'ed filters when
+applied (see :ref:`socketcan-rawfilter`).
+
+This socket option joines the given CAN filters in the way that only CAN
+frames are passed to user space that matched *all* given CAN filters. The
+semantic for the applied filters is therefore changed to a logical AND.
+
+This is useful especially when the filterset is a combination of filters
+where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
+CAN ID ranges from the incoming traffic.
+
+
+RAW Socket Returned Message Flags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When using recvmsg() call, the msg->msg_flags may contain following flags:
+
+MSG_DONTROUTE:
+ set when the received frame was created on the local host.
+
+MSG_CONFIRM:
+ set when the frame was sent via the socket it is received on.
+ This flag can be interpreted as a 'transmission confirmation' when the
+ CAN driver supports the echo of frames on driver level, see
+ :ref:`socketcan-local-loopback1` and :ref:`socketcan-local-loopback2`.
+ In order to receive such messages, CAN_RAW_RECV_OWN_MSGS must be set.
+
+
+Broadcast Manager Protocol Sockets (SOCK_DGRAM)
+-----------------------------------------------
+
+The Broadcast Manager protocol provides a command based configuration
+interface to filter and send (e.g. cyclic) CAN messages in kernel space.
+
+Receive filters can be used to down sample frequent messages; detect events
+such as message contents changes, packet length changes, and do time-out
+monitoring of received messages.
+
+Periodic transmission tasks of CAN frames or a sequence of CAN frames can be
+created and modified at runtime; both the message content and the two
+possible transmit intervals can be altered.
+
+A BCM socket is not intended for sending individual CAN frames using the
+struct can_frame as known from the CAN_RAW socket. Instead a special BCM
+configuration message is defined. The basic BCM configuration message used
+to communicate with the broadcast manager and the available operations are
+defined in the linux/can/bcm.h include. The BCM message consists of a
+message header with a command ('opcode') followed by zero or more CAN frames.
+The broadcast manager sends responses to user space in the same form:
+
+.. code-block:: C
+
+ struct bcm_msg_head {
+ __u32 opcode; /* command */
+ __u32 flags; /* special flags */
+ __u32 count; /* run 'count' times with ival1 */
+ struct timeval ival1, ival2; /* count and subsequent interval */
+ canid_t can_id; /* unique can_id for task */
+ __u32 nframes; /* number of can_frames following */
+ struct can_frame frames[0];
+ };
+
+The aligned payload 'frames' uses the same basic CAN frame structure defined
+at the beginning of :ref:`socketcan-rawfd` and in the include/linux/can.h include. All
+messages to the broadcast manager from user space have this structure.
+
+Note a CAN_BCM socket must be connected instead of bound after socket
+creation (example without error checking):
+
+.. code-block:: C
+
+ int s;
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+
+ s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
+
+ strcpy(ifr.ifr_name, "can0");
+ ioctl(s, SIOCGIFINDEX, &ifr);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ connect(s, (struct sockaddr *)&addr, sizeof(addr));
+
+ (..)
+
+The broadcast manager socket is able to handle any number of in flight
+transmissions or receive filters concurrently. The different RX/TX jobs are
+distinguished by the unique can_id in each BCM message. However additional
+CAN_BCM sockets are recommended to communicate on multiple CAN interfaces.
+When the broadcast manager socket is bound to 'any' CAN interface (=> the
+interface index is set to zero) the configured receive filters apply to any
+CAN interface unless the sendto() syscall is used to overrule the 'any' CAN
+interface index. When using recvfrom() instead of read() to retrieve BCM
+socket messages the originating CAN interface is provided in can_ifindex.
+
+
+Broadcast Manager Operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The opcode defines the operation for the broadcast manager to carry out,
+or details the broadcast managers response to several events, including
+user requests.
+
+Transmit Operations (user space to broadcast manager):
+
+TX_SETUP:
+ Create (cyclic) transmission task.
+
+TX_DELETE:
+ Remove (cyclic) transmission task, requires only can_id.
+
+TX_READ:
+ Read properties of (cyclic) transmission task for can_id.
+
+TX_SEND:
+ Send one CAN frame.
+
+Transmit Responses (broadcast manager to user space):
+
+TX_STATUS:
+ Reply to TX_READ request (transmission task configuration).
+
+TX_EXPIRED:
+ Notification when counter finishes sending at initial interval
+ 'ival1'. Requires the TX_COUNTEVT flag to be set at TX_SETUP.
+
+Receive Operations (user space to broadcast manager):
+
+RX_SETUP:
+ Create RX content filter subscription.
+
+RX_DELETE:
+ Remove RX content filter subscription, requires only can_id.
+
+RX_READ:
+ Read properties of RX content filter subscription for can_id.
+
+Receive Responses (broadcast manager to user space):
+
+RX_STATUS:
+ Reply to RX_READ request (filter task configuration).
+
+RX_TIMEOUT:
+ Cyclic message is detected to be absent (timer ival1 expired).
+
+RX_CHANGED:
+ BCM message with updated CAN frame (detected content change).
+ Sent on first message received or on receipt of revised CAN messages.
+
+
+Broadcast Manager Message Flags
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When sending a message to the broadcast manager the 'flags' element may
+contain the following flag definitions which influence the behaviour:
+
+SETTIMER:
+ Set the values of ival1, ival2 and count
+
+STARTTIMER:
+ Start the timer with the actual values of ival1, ival2
+ and count. Starting the timer leads simultaneously to emit a CAN frame.
+
+TX_COUNTEVT:
+ Create the message TX_EXPIRED when count expires
+
+TX_ANNOUNCE:
+ A change of data by the process is emitted immediately.
+
+TX_CP_CAN_ID:
+ Copies the can_id from the message header to each
+ subsequent frame in frames. This is intended as usage simplification. For
+ TX tasks the unique can_id from the message header may differ from the
+ can_id(s) stored for transmission in the subsequent struct can_frame(s).
+
+RX_FILTER_ID:
+ Filter by can_id alone, no frames required (nframes=0).
+
+RX_CHECK_DLC:
+ A change of the DLC leads to an RX_CHANGED.
+
+RX_NO_AUTOTIMER:
+ Prevent automatically starting the timeout monitor.
+
+RX_ANNOUNCE_RESUME:
+ If passed at RX_SETUP and a receive timeout occurred, a
+ RX_CHANGED message will be generated when the (cyclic) receive restarts.
+
+TX_RESET_MULTI_IDX:
+ Reset the index for the multiple frame transmission.
+
+RX_RTR_FRAME:
+ Send reply for RTR-request (placed in op->frames[0]).
+
+
+Broadcast Manager Transmission Timers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Periodic transmission configurations may use up to two interval timers.
+In this case the BCM sends a number of messages ('count') at an interval
+'ival1', then continuing to send at another given interval 'ival2'. When
+only one timer is needed 'count' is set to zero and only 'ival2' is used.
+When SET_TIMER and START_TIMER flag were set the timers are activated.
+The timer values can be altered at runtime when only SET_TIMER is set.
+
+
+Broadcast Manager message sequence transmission
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Up to 256 CAN frames can be transmitted in a sequence in the case of a cyclic
+TX task configuration. The number of CAN frames is provided in the 'nframes'
+element of the BCM message head. The defined number of CAN frames are added
+as array to the TX_SETUP BCM configuration message:
+
+.. code-block:: C
+
+ /* create a struct to set up a sequence of four CAN frames */
+ struct {
+ struct bcm_msg_head msg_head;
+ struct can_frame frame[4];
+ } mytxmsg;
+
+ (..)
+ mytxmsg.msg_head.nframes = 4;
+ (..)
+
+ write(s, &mytxmsg, sizeof(mytxmsg));
+
+With every transmission the index in the array of CAN frames is increased
+and set to zero at index overflow.
+
+
+Broadcast Manager Receive Filter Timers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The timer values ival1 or ival2 may be set to non-zero values at RX_SETUP.
+When the SET_TIMER flag is set the timers are enabled:
+
+ival1:
+ Send RX_TIMEOUT when a received message is not received again within
+ the given time. When START_TIMER is set at RX_SETUP the timeout detection
+ is activated directly - even without a former CAN frame reception.
+
+ival2:
+ Throttle the received message rate down to the value of ival2. This
+ is useful to reduce messages for the application when the signal inside the
+ CAN frame is stateless as state changes within the ival2 periode may get
+ lost.
+
+Broadcast Manager Multiplex Message Receive Filter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To filter for content changes in multiplex message sequences an array of more
+than one CAN frames can be passed in a RX_SETUP configuration message. The
+data bytes of the first CAN frame contain the mask of relevant bits that
+have to match in the subsequent CAN frames with the received CAN frame.
+If one of the subsequent CAN frames is matching the bits in that frame data
+mark the relevant content to be compared with the previous received content.
+Up to 257 CAN frames (multiplex filter bit mask CAN frame plus 256 CAN
+filters) can be added as array to the TX_SETUP BCM configuration message:
+
+.. code-block:: C
+
+ /* usually used to clear CAN frame data[] - beware of endian problems! */
+ #define U64_DATA(p) (*(unsigned long long*)(p)->data)
+
+ struct {
+ struct bcm_msg_head msg_head;
+ struct can_frame frame[5];
+ } msg;
+
+ msg.msg_head.opcode = RX_SETUP;
+ msg.msg_head.can_id = 0x42;
+ msg.msg_head.flags = 0;
+ msg.msg_head.nframes = 5;
+ U64_DATA(&msg.frame[0]) = 0xFF00000000000000ULL; /* MUX mask */
+ U64_DATA(&msg.frame[1]) = 0x01000000000000FFULL; /* data mask (MUX 0x01) */
+ U64_DATA(&msg.frame[2]) = 0x0200FFFF000000FFULL; /* data mask (MUX 0x02) */
+ U64_DATA(&msg.frame[3]) = 0x330000FFFFFF0003ULL; /* data mask (MUX 0x33) */
+ U64_DATA(&msg.frame[4]) = 0x4F07FC0FF0000000ULL; /* data mask (MUX 0x4F) */
+
+ write(s, &msg, sizeof(msg));
+
+
+Broadcast Manager CAN FD Support
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The programming API of the CAN_BCM depends on struct can_frame which is
+given as array directly behind the bcm_msg_head structure. To follow this
+schema for the CAN FD frames a new flag 'CAN_FD_FRAME' in the bcm_msg_head
+flags indicates that the concatenated CAN frame structures behind the
+bcm_msg_head are defined as struct canfd_frame:
+
+.. code-block:: C
+
+ struct {
+ struct bcm_msg_head msg_head;
+ struct canfd_frame frame[5];
+ } msg;
+
+ msg.msg_head.opcode = RX_SETUP;
+ msg.msg_head.can_id = 0x42;
+ msg.msg_head.flags = CAN_FD_FRAME;
+ msg.msg_head.nframes = 5;
+ (..)
+
+When using CAN FD frames for multiplex filtering the MUX mask is still
+expected in the first 64 bit of the struct canfd_frame data section.
+
+
+Connected Transport Protocols (SOCK_SEQPACKET)
+----------------------------------------------
+
+(to be written)
+
+
+Unconnected Transport Protocols (SOCK_DGRAM)
+--------------------------------------------
+
+(to be written)
+
+
+.. _socketcan-core-module:
+
+SocketCAN Core Module
+=====================
+
+The SocketCAN core module implements the protocol family
+PF_CAN. CAN protocol modules are loaded by the core module at
+runtime. The core module provides an interface for CAN protocol
+modules to subscribe needed CAN IDs (see :ref:`socketcan-receive-lists`).
+
+
+can.ko Module Params
+--------------------
+
+- **stats_timer**:
+ To calculate the SocketCAN core statistics
+ (e.g. current/maximum frames per second) this 1 second timer is
+ invoked at can.ko module start time by default. This timer can be
+ disabled by using stattimer=0 on the module commandline.
+
+- **debug**:
+ (removed since SocketCAN SVN r546)
+
+
+procfs content
+--------------
+
+As described in :ref:`socketcan-receive-lists` the SocketCAN core uses several filter
+lists to deliver received CAN frames to CAN protocol modules. These
+receive lists, their filters and the count of filter matches can be
+checked in the appropriate receive list. All entries contain the
+device and a protocol module identifier::
+
+ foo@bar:~$ cat /proc/net/can/rcvlist_all
+
+ receive list 'rx_all':
+ (vcan3: no entry)
+ (vcan2: no entry)
+ (vcan1: no entry)
+ device can_id can_mask function userdata matches ident
+ vcan0 000 00000000 f88e6370 f6c6f400 0 raw
+ (any: no entry)
+
+In this example an application requests any CAN traffic from vcan0::
+
+ rcvlist_all - list for unfiltered entries (no filter operations)
+ rcvlist_eff - list for single extended frame (EFF) entries
+ rcvlist_err - list for error message frames masks
+ rcvlist_fil - list for mask/value filters
+ rcvlist_inv - list for mask/value filters (inverse semantic)
+ rcvlist_sff - list for single standard frame (SFF) entries
+
+Additional procfs files in /proc/net/can::
+
+ stats - SocketCAN core statistics (rx/tx frames, match ratios, ...)
+ reset_stats - manual statistic reset
+ version - prints the SocketCAN core version and the ABI version
+
+
+Writing Own CAN Protocol Modules
+--------------------------------
+
+To implement a new protocol in the protocol family PF_CAN a new
+protocol has to be defined in include/linux/can.h .
+The prototypes and definitions to use the SocketCAN core can be
+accessed by including include/linux/can/core.h .
+In addition to functions that register the CAN protocol and the
+CAN device notifier chain there are functions to subscribe CAN
+frames received by CAN interfaces and to send CAN frames::
+
+ can_rx_register - subscribe CAN frames from a specific interface
+ can_rx_unregister - unsubscribe CAN frames from a specific interface
+ can_send - transmit a CAN frame (optional with local loopback)
+
+For details see the kerneldoc documentation in net/can/af_can.c or
+the source code of net/can/raw.c or net/can/bcm.c .
+
+
+CAN Network Drivers
+===================
+
+Writing a CAN network device driver is much easier than writing a
+CAN character device driver. Similar to other known network device
+drivers you mainly have to deal with:
+
+- TX: Put the CAN frame from the socket buffer to the CAN controller.
+- RX: Put the CAN frame from the CAN controller to the socket buffer.
+
+See e.g. at Documentation/networking/netdevices.txt . The differences
+for writing CAN network device driver are described below:
+
+
+General Settings
+----------------
+
+.. code-block:: C
+
+ dev->type = ARPHRD_CAN; /* the netdevice hardware type */
+ dev->flags = IFF_NOARP; /* CAN has no arp */
+
+ dev->mtu = CAN_MTU; /* sizeof(struct can_frame) -> legacy CAN interface */
+
+ or alternative, when the controller supports CAN with flexible data rate:
+ dev->mtu = CANFD_MTU; /* sizeof(struct canfd_frame) -> CAN FD interface */
+
+The struct can_frame or struct canfd_frame is the payload of each socket
+buffer (skbuff) in the protocol family PF_CAN.
+
+
+.. _socketcan-local-loopback2:
+
+Local Loopback of Sent Frames
+-----------------------------
+
+As described in :ref:`socketcan-local-loopback1` the CAN network device driver should
+support a local loopback functionality similar to the local echo
+e.g. of tty devices. In this case the driver flag IFF_ECHO has to be
+set to prevent the PF_CAN core from locally echoing sent frames
+(aka loopback) as fallback solution::
+
+ dev->flags = (IFF_NOARP | IFF_ECHO);
+
+
+CAN Controller Hardware Filters
+-------------------------------
+
+To reduce the interrupt load on deep embedded systems some CAN
+controllers support the filtering of CAN IDs or ranges of CAN IDs.
+These hardware filter capabilities vary from controller to
+controller and have to be identified as not feasible in a multi-user
+networking approach. The use of the very controller specific
+hardware filters could make sense in a very dedicated use-case, as a
+filter on driver level would affect all users in the multi-user
+system. The high efficient filter sets inside the PF_CAN core allow
+to set different multiple filters for each socket separately.
+Therefore the use of hardware filters goes to the category 'handmade
+tuning on deep embedded systems'. The author is running a MPC603e
+@133MHz with four SJA1000 CAN controllers from 2002 under heavy bus
+load without any problems ...
+
+
+The Virtual CAN Driver (vcan)
+-----------------------------
+
+Similar to the network loopback devices, vcan offers a virtual local
+CAN interface. A full qualified address on CAN consists of
+
+- a unique CAN Identifier (CAN ID)
+- the CAN bus this CAN ID is transmitted on (e.g. can0)
+
+so in common use cases more than one virtual CAN interface is needed.
+
+The virtual CAN interfaces allow the transmission and reception of CAN
+frames without real CAN controller hardware. Virtual CAN network
+devices are usually named 'vcanX', like vcan0 vcan1 vcan2 ...
+When compiled as a module the virtual CAN driver module is called vcan.ko
+
+Since Linux Kernel version 2.6.24 the vcan driver supports the Kernel
+netlink interface to create vcan network devices. The creation and
+removal of vcan network devices can be managed with the ip(8) tool::
+
+ - Create a virtual CAN network interface:
+ $ ip link add type vcan
+
+ - Create a virtual CAN network interface with a specific name 'vcan42':
+ $ ip link add dev vcan42 type vcan
+
+ - Remove a (virtual CAN) network interface 'vcan42':
+ $ ip link del vcan42
+
+
+The CAN Network Device Driver Interface
+---------------------------------------
+
+The CAN network device driver interface provides a generic interface
+to setup, configure and monitor CAN network devices. The user can then
+configure the CAN device, like setting the bit-timing parameters, via
+the netlink interface using the program "ip" from the "IPROUTE2"
+utility suite. The following chapter describes briefly how to use it.
+Furthermore, the interface uses a common data structure and exports a
+set of common functions, which all real CAN network device drivers
+should use. Please have a look to the SJA1000 or MSCAN driver to
+understand how to use them. The name of the module is can-dev.ko.
+
+
+Netlink interface to set/get devices properties
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The CAN device must be configured via netlink interface. The supported
+netlink message types are defined and briefly described in
+"include/linux/can/netlink.h". CAN link support for the program "ip"
+of the IPROUTE2 utility suite is available and it can be used as shown
+below:
+
+Setting CAN device properties::
+
+ $ ip link set can0 type can help
+ Usage: ip link set DEVICE type can
+ [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] |
+ [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1
+ phase-seg2 PHASE-SEG2 [ sjw SJW ] ]
+
+ [ dbitrate BITRATE [ dsample-point SAMPLE-POINT] ] |
+ [ dtq TQ dprop-seg PROP_SEG dphase-seg1 PHASE-SEG1
+ dphase-seg2 PHASE-SEG2 [ dsjw SJW ] ]
+
+ [ loopback { on | off } ]
+ [ listen-only { on | off } ]
+ [ triple-sampling { on | off } ]
+ [ one-shot { on | off } ]
+ [ berr-reporting { on | off } ]
+ [ fd { on | off } ]
+ [ fd-non-iso { on | off } ]
+ [ presume-ack { on | off } ]
+
+ [ restart-ms TIME-MS ]
+ [ restart ]
+
+ Where: BITRATE := { 1..1000000 }
+ SAMPLE-POINT := { 0.000..0.999 }
+ TQ := { NUMBER }
+ PROP-SEG := { 1..8 }
+ PHASE-SEG1 := { 1..8 }
+ PHASE-SEG2 := { 1..8 }
+ SJW := { 1..4 }
+ RESTART-MS := { 0 | NUMBER }
+
+Display CAN device details and statistics::
+
+ $ ip -details -statistics link show can0
+ 2: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 16 qdisc pfifo_fast state UP qlen 10
+ link/can
+ can <TRIPLE-SAMPLING> state ERROR-ACTIVE restart-ms 100
+ bitrate 125000 sample_point 0.875
+ tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1
+ sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
+ clock 8000000
+ re-started bus-errors arbit-lost error-warn error-pass bus-off
+ 41 17457 0 41 42 41
+ RX: bytes packets errors dropped overrun mcast
+ 140859 17608 17457 0 0 0
+ TX: bytes packets errors dropped carrier collsns
+ 861 112 0 41 0 0
+
+More info to the above output:
+
+"<TRIPLE-SAMPLING>"
+ Shows the list of selected CAN controller modes: LOOPBACK,
+ LISTEN-ONLY, or TRIPLE-SAMPLING.
+
+"state ERROR-ACTIVE"
+ The current state of the CAN controller: "ERROR-ACTIVE",
+ "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED"
+
+"restart-ms 100"
+ Automatic restart delay time. If set to a non-zero value, a
+ restart of the CAN controller will be triggered automatically
+ in case of a bus-off condition after the specified delay time
+ in milliseconds. By default it's off.
+
+"bitrate 125000 sample-point 0.875"
+ Shows the real bit-rate in bits/sec and the sample-point in the
+ range 0.000..0.999. If the calculation of bit-timing parameters
+ is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the
+ bit-timing can be defined by setting the "bitrate" argument.
+ Optionally the "sample-point" can be specified. By default it's
+ 0.000 assuming CIA-recommended sample-points.
+
+"tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1"
+ Shows the time quanta in ns, propagation segment, phase buffer
+ segment 1 and 2 and the synchronisation jump width in units of
+ tq. They allow to define the CAN bit-timing in a hardware
+ independent format as proposed by the Bosch CAN 2.0 spec (see
+ chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf).
+
+"sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1 clock 8000000"
+ Shows the bit-timing constants of the CAN controller, here the
+ "sja1000". The minimum and maximum values of the time segment 1
+ and 2, the synchronisation jump width in units of tq, the
+ bitrate pre-scaler and the CAN system clock frequency in Hz.
+ These constants could be used for user-defined (non-standard)
+ bit-timing calculation algorithms in user-space.
+
+"re-started bus-errors arbit-lost error-warn error-pass bus-off"
+ Shows the number of restarts, bus and arbitration lost errors,
+ and the state changes to the error-warning, error-passive and
+ bus-off state. RX overrun errors are listed in the "overrun"
+ field of the standard network statistics.
+
+Setting the CAN Bit-Timing
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The CAN bit-timing parameters can always be defined in a hardware
+independent format as proposed in the Bosch CAN 2.0 specification
+specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2"
+and "sjw"::
+
+ $ ip link set canX type can tq 125 prop-seg 6 \
+ phase-seg1 7 phase-seg2 2 sjw 1
+
+If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA
+recommended CAN bit-timing parameters will be calculated if the bit-
+rate is specified with the argument "bitrate"::
+
+ $ ip link set canX type can bitrate 125000
+
+Note that this works fine for the most common CAN controllers with
+standard bit-rates but may *fail* for exotic bit-rates or CAN system
+clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some
+space and allows user-space tools to solely determine and set the
+bit-timing parameters. The CAN controller specific bit-timing
+constants can be used for that purpose. They are listed by the
+following command::
+
+ $ ip -details link show can0
+ ...
+ sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
+
+
+Starting and Stopping the CAN Network Device
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A CAN network device is started or stopped as usual with the command
+"ifconfig canX up/down" or "ip link set canX up/down". Be aware that
+you *must* define proper bit-timing parameters for real CAN devices
+before you can start it to avoid error-prone default settings::
+
+ $ ip link set canX up type can bitrate 125000
+
+A device may enter the "bus-off" state if too many errors occurred on
+the CAN bus. Then no more messages are received or sent. An automatic
+bus-off recovery can be enabled by setting the "restart-ms" to a
+non-zero value, e.g.::
+
+ $ ip link set canX type can restart-ms 100
+
+Alternatively, the application may realize the "bus-off" condition
+by monitoring CAN error message frames and do a restart when
+appropriate with the command::
+
+ $ ip link set canX type can restart
+
+Note that a restart will also create a CAN error message frame (see
+also :ref:`socketcan-network-problem-notifications`).
+
+
+.. _socketcan-can-fd-driver:
+
+CAN FD (Flexible Data Rate) Driver Support
+------------------------------------------
+
+CAN FD capable CAN controllers support two different bitrates for the
+arbitration phase and the payload phase of the CAN FD frame. Therefore a
+second bit timing has to be specified in order to enable the CAN FD bitrate.
+
+Additionally CAN FD capable CAN controllers support up to 64 bytes of
+payload. The representation of this length in can_frame.can_dlc and
+canfd_frame.len for userspace applications and inside the Linux network
+layer is a plain value from 0 .. 64 instead of the CAN 'data length code'.
+The data length code was a 1:1 mapping to the payload length in the legacy
+CAN frames anyway. The payload length to the bus-relevant DLC mapping is
+only performed inside the CAN drivers, preferably with the helper
+functions can_dlc2len() and can_len2dlc().
+
+The CAN netdevice driver capabilities can be distinguished by the network
+devices maximum transfer unit (MTU)::
+
+ MTU = 16 (CAN_MTU) => sizeof(struct can_frame) => 'legacy' CAN device
+ MTU = 72 (CANFD_MTU) => sizeof(struct canfd_frame) => CAN FD capable device
+
+The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
+N.B. CAN FD capable devices can also handle and send legacy CAN frames.
+
+When configuring CAN FD capable CAN controllers an additional 'data' bitrate
+has to be set. This bitrate for the data phase of the CAN FD frame has to be
+at least the bitrate which was configured for the arbitration phase. This
+second bitrate is specified analogue to the first bitrate but the bitrate
+setting keywords for the 'data' bitrate start with 'd' e.g. dbitrate,
+dsample-point, dsjw or dtq and similar settings. When a data bitrate is set
+within the configuration process the controller option "fd on" can be
+specified to enable the CAN FD mode in the CAN controller. This controller
+option also switches the device MTU to 72 (CANFD_MTU).
+
+The first CAN FD specification presented as whitepaper at the International
+CAN Conference 2012 needed to be improved for data integrity reasons.
+Therefore two CAN FD implementations have to be distinguished today:
+
+- ISO compliant: The ISO 11898-1:2015 CAN FD implementation (default)
+- non-ISO compliant: The CAN FD implementation following the 2012 whitepaper
+
+Finally there are three types of CAN FD controllers:
+
+1. ISO compliant (fixed)
+2. non-ISO compliant (fixed, like the M_CAN IP core v3.0.1 in m_can.c)
+3. ISO/non-ISO CAN FD controllers (switchable, like the PEAK PCAN-USB FD)
+
+The current ISO/non-ISO mode is announced by the CAN controller driver via
+netlink and displayed by the 'ip' tool (controller option FD-NON-ISO).
+The ISO/non-ISO-mode can be altered by setting 'fd-non-iso {on|off}' for
+switchable CAN FD controllers only.
+
+Example configuring 500 kbit/s arbitration bitrate and 4 Mbit/s data bitrate::
+
+ $ ip link set can0 up type can bitrate 500000 sample-point 0.75 \
+ dbitrate 4000000 dsample-point 0.8 fd on
+ $ ip -details link show can0
+ 5: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 72 qdisc pfifo_fast state UNKNOWN \
+ mode DEFAULT group default qlen 10
+ link/can promiscuity 0
+ can <FD> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
+ bitrate 500000 sample-point 0.750
+ tq 50 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 1
+ pcan_usb_pro_fd: tseg1 1..64 tseg2 1..16 sjw 1..16 brp 1..1024 \
+ brp-inc 1
+ dbitrate 4000000 dsample-point 0.800
+ dtq 12 dprop-seg 7 dphase-seg1 8 dphase-seg2 4 dsjw 1
+ pcan_usb_pro_fd: dtseg1 1..16 dtseg2 1..8 dsjw 1..4 dbrp 1..1024 \
+ dbrp-inc 1
+ clock 80000000
+
+Example when 'fd-non-iso on' is added on this switchable CAN FD adapter::
+
+ can <FD,FD-NON-ISO> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
+
+
+Supported CAN Hardware
+----------------------
+
+Please check the "Kconfig" file in "drivers/net/can" to get an actual
+list of the support CAN hardware. On the SocketCAN project website
+(see :ref:`socketcan-resources`) there might be further drivers available, also for
+older kernel versions.
+
+
+.. _socketcan-resources:
+
+SocketCAN Resources
+===================
+
+The Linux CAN / SocketCAN project resources (project site / mailing list)
+are referenced in the MAINTAINERS file in the Linux source tree.
+Search for CAN NETWORK [LAYERS|DRIVERS].
+
+Credits
+=======
+
+- Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver)
+- Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan)
+- Jan Kizka (RT-SocketCAN core, Socket-API reconciliation)
+- Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews, CAN device driver interface, MSCAN driver)
+- Robert Schwebel (design reviews, PTXdist integration)
+- Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers)
+- Benedikt Spranger (reviews)
+- Thomas Gleixner (LKML reviews, coding style, posting hints)
+- Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver)
+- Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003)
+- Klaus Hitschler (PEAK driver integration)
+- Uwe Koppe (CAN netdevices with PF_PACKET approach)
+- Michael Schulze (driver layer loopback requirement, RT CAN drivers review)
+- Pavel Pisa (Bit-timing calculation)
+- Sascha Hauer (SJA1000 platform driver)
+- Sebastian Haas (SJA1000 EMS PCI driver)
+- Markus Plessing (SJA1000 EMS PCI driver)
+- Per Dalen (SJA1000 Kvaser PCI driver)
+- Sam Ravnborg (reviews, coding style, kbuild help)
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
deleted file mode 100644
index aa15b9ee2e70..000000000000
--- a/Documentation/networking/can.txt
+++ /dev/null
@@ -1,1308 +0,0 @@
-============================================================================
-
-can.txt
-
-Readme file for the Controller Area Network Protocol Family (aka SocketCAN)
-
-This file contains
-
- 1 Overview / What is SocketCAN
-
- 2 Motivation / Why using the socket API
-
- 3 SocketCAN concept
- 3.1 receive lists
- 3.2 local loopback of sent frames
- 3.3 network problem notifications
-
- 4 How to use SocketCAN
- 4.1 RAW protocol sockets with can_filters (SOCK_RAW)
- 4.1.1 RAW socket option CAN_RAW_FILTER
- 4.1.2 RAW socket option CAN_RAW_ERR_FILTER
- 4.1.3 RAW socket option CAN_RAW_LOOPBACK
- 4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
- 4.1.5 RAW socket option CAN_RAW_FD_FRAMES
- 4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
- 4.1.7 RAW socket returned message flags
- 4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
- 4.2.1 Broadcast Manager operations
- 4.2.2 Broadcast Manager message flags
- 4.2.3 Broadcast Manager transmission timers
- 4.2.4 Broadcast Manager message sequence transmission
- 4.2.5 Broadcast Manager receive filter timers
- 4.2.6 Broadcast Manager multiplex message receive filter
- 4.2.7 Broadcast Manager CAN FD support
- 4.3 connected transport protocols (SOCK_SEQPACKET)
- 4.4 unconnected transport protocols (SOCK_DGRAM)
-
- 5 SocketCAN core module
- 5.1 can.ko module params
- 5.2 procfs content
- 5.3 writing own CAN protocol modules
-
- 6 CAN network drivers
- 6.1 general settings
- 6.2 local loopback of sent frames
- 6.3 CAN controller hardware filters
- 6.4 The virtual CAN driver (vcan)
- 6.5 The CAN network device driver interface
- 6.5.1 Netlink interface to set/get devices properties
- 6.5.2 Setting the CAN bit-timing
- 6.5.3 Starting and stopping the CAN network device
- 6.6 CAN FD (flexible data rate) driver support
- 6.7 supported CAN hardware
-
- 7 SocketCAN resources
-
- 8 Credits
-
-============================================================================
-
-1. Overview / What is SocketCAN
---------------------------------
-
-The socketcan package is an implementation of CAN protocols
-(Controller Area Network) for Linux. CAN is a networking technology
-which has widespread use in automation, embedded devices, and
-automotive fields. While there have been other CAN implementations
-for Linux based on character devices, SocketCAN uses the Berkeley
-socket API, the Linux network stack and implements the CAN device
-drivers as network interfaces. The CAN socket API has been designed
-as similar as possible to the TCP/IP protocols to allow programmers,
-familiar with network programming, to easily learn how to use CAN
-sockets.
-
-2. Motivation / Why using the socket API
-----------------------------------------
-
-There have been CAN implementations for Linux before SocketCAN so the
-question arises, why we have started another project. Most existing
-implementations come as a device driver for some CAN hardware, they
-are based on character devices and provide comparatively little
-functionality. Usually, there is only a hardware-specific device
-driver which provides a character device interface to send and
-receive raw CAN frames, directly to/from the controller hardware.
-Queueing of frames and higher-level transport protocols like ISO-TP
-have to be implemented in user space applications. Also, most
-character-device implementations support only one single process to
-open the device at a time, similar to a serial interface. Exchanging
-the CAN controller requires employment of another device driver and
-often the need for adaption of large parts of the application to the
-new driver's API.
-
-SocketCAN was designed to overcome all of these limitations. A new
-protocol family has been implemented which provides a socket interface
-to user space applications and which builds upon the Linux network
-layer, enabling use all of the provided queueing functionality. A device
-driver for CAN controller hardware registers itself with the Linux
-network layer as a network device, so that CAN frames from the
-controller can be passed up to the network layer and on to the CAN
-protocol family module and also vice-versa. Also, the protocol family
-module provides an API for transport protocol modules to register, so
-that any number of transport protocols can be loaded or unloaded
-dynamically. In fact, the can core module alone does not provide any
-protocol and cannot be used without loading at least one additional
-protocol module. Multiple sockets can be opened at the same time,
-on different or the same protocol module and they can listen/send
-frames on different or the same CAN IDs. Several sockets listening on
-the same interface for frames with the same CAN ID are all passed the
-same received matching CAN frames. An application wishing to
-communicate using a specific transport protocol, e.g. ISO-TP, just
-selects that protocol when opening the socket, and then can read and
-write application data byte streams, without having to deal with
-CAN-IDs, frames, etc.
-
-Similar functionality visible from user-space could be provided by a
-character device, too, but this would lead to a technically inelegant
-solution for a couple of reasons:
-
-* Intricate usage. Instead of passing a protocol argument to
- socket(2) and using bind(2) to select a CAN interface and CAN ID, an
- application would have to do all these operations using ioctl(2)s.
-
-* Code duplication. A character device cannot make use of the Linux
- network queueing code, so all that code would have to be duplicated
- for CAN networking.
-
-* Abstraction. In most existing character-device implementations, the
- hardware-specific device driver for a CAN controller directly
- provides the character device for the application to work with.
- This is at least very unusual in Unix systems for both, char and
- block devices. For example you don't have a character device for a
- certain UART of a serial interface, a certain sound chip in your
- computer, a SCSI or IDE controller providing access to your hard
- disk or tape streamer device. Instead, you have abstraction layers
- which provide a unified character or block device interface to the
- application on the one hand, and a interface for hardware-specific
- device drivers on the other hand. These abstractions are provided
- by subsystems like the tty layer, the audio subsystem or the SCSI
- and IDE subsystems for the devices mentioned above.
-
- The easiest way to implement a CAN device driver is as a character
- device without such a (complete) abstraction layer, as is done by most
- existing drivers. The right way, however, would be to add such a
- layer with all the functionality like registering for certain CAN
- IDs, supporting several open file descriptors and (de)multiplexing
- CAN frames between them, (sophisticated) queueing of CAN frames, and
- providing an API for device drivers to register with. However, then
- it would be no more difficult, or may be even easier, to use the
- networking framework provided by the Linux kernel, and this is what
- SocketCAN does.
-
- The use of the networking framework of the Linux kernel is just the
- natural and most appropriate way to implement CAN for Linux.
-
-3. SocketCAN concept
----------------------
-
- As described in chapter 2 it is the main goal of SocketCAN to
- provide a socket interface to user space applications which builds
- upon the Linux network layer. In contrast to the commonly known
- TCP/IP and ethernet networking, the CAN bus is a broadcast-only(!)
- medium that has no MAC-layer addressing like ethernet. The CAN-identifier
- (can_id) is used for arbitration on the CAN-bus. Therefore the CAN-IDs
- have to be chosen uniquely on the bus. When designing a CAN-ECU
- network the CAN-IDs are mapped to be sent by a specific ECU.
- For this reason a CAN-ID can be treated best as a kind of source address.
-
- 3.1 receive lists
-
- The network transparent access of multiple applications leads to the
- problem that different applications may be interested in the same
- CAN-IDs from the same CAN network interface. The SocketCAN core
- module - which implements the protocol family CAN - provides several
- high efficient receive lists for this reason. If e.g. a user space
- application opens a CAN RAW socket, the raw protocol module itself
- requests the (range of) CAN-IDs from the SocketCAN core that are
- requested by the user. The subscription and unsubscription of
- CAN-IDs can be done for specific CAN interfaces or for all(!) known
- CAN interfaces with the can_rx_(un)register() functions provided to
- CAN protocol modules by the SocketCAN core (see chapter 5).
- To optimize the CPU usage at runtime the receive lists are split up
- into several specific lists per device that match the requested
- filter complexity for a given use-case.
-
- 3.2 local loopback of sent frames
-
- As known from other networking concepts the data exchanging
- applications may run on the same or different nodes without any
- change (except for the according addressing information):
-
- ___ ___ ___ _______ ___
- | _ | | _ | | _ | | _ _ | | _ |
- ||A|| ||B|| ||C|| ||A| |B|| ||C||
- |___| |___| |___| |_______| |___|
- | | | | |
- -----------------(1)- CAN bus -(2)---------------
-
- To ensure that application A receives the same information in the
- example (2) as it would receive in example (1) there is need for
- some kind of local loopback of the sent CAN frames on the appropriate
- node.
-
- The Linux network devices (by default) just can handle the
- transmission and reception of media dependent frames. Due to the
- arbitration on the CAN bus the transmission of a low prio CAN-ID
- may be delayed by the reception of a high prio CAN frame. To
- reflect the correct* traffic on the node the loopback of the sent
- data has to be performed right after a successful transmission. If
- the CAN network interface is not capable of performing the loopback for
- some reason the SocketCAN core can do this task as a fallback solution.
- See chapter 6.2 for details (recommended).
-
- The loopback functionality is enabled by default to reflect standard
- networking behaviour for CAN applications. Due to some requests from
- the RT-SocketCAN group the loopback optionally may be disabled for each
- separate socket. See sockopts from the CAN RAW sockets in chapter 4.1.
-
- * = you really like to have this when you're running analyser tools
- like 'candump' or 'cansniffer' on the (same) node.
-
- 3.3 network problem notifications
-
- The use of the CAN bus may lead to several problems on the physical
- and media access control layer. Detecting and logging of these lower
- layer problems is a vital requirement for CAN users to identify
- hardware issues on the physical transceiver layer as well as
- arbitration problems and error frames caused by the different
- ECUs. The occurrence of detected errors are important for diagnosis
- and have to be logged together with the exact timestamp. For this
- reason the CAN interface driver can generate so called Error Message
- Frames that can optionally be passed to the user application in the
- same way as other CAN frames. Whenever an error on the physical layer
- or the MAC layer is detected (e.g. by the CAN controller) the driver
- creates an appropriate error message frame. Error messages frames can
- be requested by the user application using the common CAN filter
- mechanisms. Inside this filter definition the (interested) type of
- errors may be selected. The reception of error messages is disabled
- by default. The format of the CAN error message frame is briefly
- described in the Linux header file "include/uapi/linux/can/error.h".
-
-4. How to use SocketCAN
-------------------------
-
- Like TCP/IP, you first need to open a socket for communicating over a
- CAN network. Since SocketCAN implements a new protocol family, you
- need to pass PF_CAN as the first argument to the socket(2) system
- call. Currently, there are two CAN protocols to choose from, the raw
- socket protocol and the broadcast manager (BCM). So to open a socket,
- you would write
-
- s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
-
- and
-
- s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
-
- respectively. After the successful creation of the socket, you would
- normally use the bind(2) system call to bind the socket to a CAN
- interface (which is different from TCP/IP due to different addressing
- - see chapter 3). After binding (CAN_RAW) or connecting (CAN_BCM)
- the socket, you can read(2) and write(2) from/to the socket or use
- send(2), sendto(2), sendmsg(2) and the recv* counterpart operations
- on the socket as usual. There are also CAN specific socket options
- described below.
-
- The basic CAN frame structure and the sockaddr structure are defined
- in include/linux/can.h:
-
- struct can_frame {
- canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
- __u8 can_dlc; /* frame payload length in byte (0 .. 8) */
- __u8 __pad; /* padding */
- __u8 __res0; /* reserved / padding */
- __u8 __res1; /* reserved / padding */
- __u8 data[8] __attribute__((aligned(8)));
- };
-
- The alignment of the (linear) payload data[] to a 64bit boundary
- allows the user to define their own structs and unions to easily access
- the CAN payload. There is no given byteorder on the CAN bus by
- default. A read(2) system call on a CAN_RAW socket transfers a
- struct can_frame to the user space.
-
- The sockaddr_can structure has an interface index like the
- PF_PACKET socket, that also binds to a specific interface:
-
- struct sockaddr_can {
- sa_family_t can_family;
- int can_ifindex;
- union {
- /* transport protocol class address info (e.g. ISOTP) */
- struct { canid_t rx_id, tx_id; } tp;
-
- /* reserved for future CAN protocols address information */
- } can_addr;
- };
-
- To determine the interface index an appropriate ioctl() has to
- be used (example for CAN_RAW sockets without error checking):
-
- int s;
- struct sockaddr_can addr;
- struct ifreq ifr;
-
- s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
-
- strcpy(ifr.ifr_name, "can0" );
- ioctl(s, SIOCGIFINDEX, &ifr);
-
- addr.can_family = AF_CAN;
- addr.can_ifindex = ifr.ifr_ifindex;
-
- bind(s, (struct sockaddr *)&addr, sizeof(addr));
-
- (..)
-
- To bind a socket to all(!) CAN interfaces the interface index must
- be 0 (zero). In this case the socket receives CAN frames from every
- enabled CAN interface. To determine the originating CAN interface
- the system call recvfrom(2) may be used instead of read(2). To send
- on a socket that is bound to 'any' interface sendto(2) is needed to
- specify the outgoing interface.
-
- Reading CAN frames from a bound CAN_RAW socket (see above) consists
- of reading a struct can_frame:
-
- struct can_frame frame;
-
- nbytes = read(s, &frame, sizeof(struct can_frame));
-
- if (nbytes < 0) {
- perror("can raw socket read");
- return 1;
- }
-
- /* paranoid check ... */
- if (nbytes < sizeof(struct can_frame)) {
- fprintf(stderr, "read: incomplete CAN frame\n");
- return 1;
- }
-
- /* do something with the received CAN frame */
-
- Writing CAN frames can be done similarly, with the write(2) system call:
-
- nbytes = write(s, &frame, sizeof(struct can_frame));
-
- When the CAN interface is bound to 'any' existing CAN interface
- (addr.can_ifindex = 0) it is recommended to use recvfrom(2) if the
- information about the originating CAN interface is needed:
-
- struct sockaddr_can addr;
- struct ifreq ifr;
- socklen_t len = sizeof(addr);
- struct can_frame frame;
-
- nbytes = recvfrom(s, &frame, sizeof(struct can_frame),
- 0, (struct sockaddr*)&addr, &len);
-
- /* get interface name of the received CAN frame */
- ifr.ifr_ifindex = addr.can_ifindex;
- ioctl(s, SIOCGIFNAME, &ifr);
- printf("Received a CAN frame from interface %s", ifr.ifr_name);
-
- To write CAN frames on sockets bound to 'any' CAN interface the
- outgoing interface has to be defined certainly.
-
- strcpy(ifr.ifr_name, "can0");
- ioctl(s, SIOCGIFINDEX, &ifr);
- addr.can_ifindex = ifr.ifr_ifindex;
- addr.can_family = AF_CAN;
-
- nbytes = sendto(s, &frame, sizeof(struct can_frame),
- 0, (struct sockaddr*)&addr, sizeof(addr));
-
- An accurate timestamp can be obtained with an ioctl(2) call after reading
- a message from the socket:
-
- struct timeval tv;
- ioctl(s, SIOCGSTAMP, &tv);
-
- The timestamp has a resolution of one microsecond and is set automatically
- at the reception of a CAN frame.
-
- Remark about CAN FD (flexible data rate) support:
-
- Generally the handling of CAN FD is very similar to the formerly described
- examples. The new CAN FD capable CAN controllers support two different
- bitrates for the arbitration phase and the payload phase of the CAN FD frame
- and up to 64 bytes of payload. This extended payload length breaks all the
- kernel interfaces (ABI) which heavily rely on the CAN frame with fixed eight
- bytes of payload (struct can_frame) like the CAN_RAW socket. Therefore e.g.
- the CAN_RAW socket supports a new socket option CAN_RAW_FD_FRAMES that
- switches the socket into a mode that allows the handling of CAN FD frames
- and (legacy) CAN frames simultaneously (see section 4.1.5).
-
- The struct canfd_frame is defined in include/linux/can.h:
-
- struct canfd_frame {
- canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
- __u8 len; /* frame payload length in byte (0 .. 64) */
- __u8 flags; /* additional flags for CAN FD */
- __u8 __res0; /* reserved / padding */
- __u8 __res1; /* reserved / padding */
- __u8 data[64] __attribute__((aligned(8)));
- };
-
- The struct canfd_frame and the existing struct can_frame have the can_id,
- the payload length and the payload data at the same offset inside their
- structures. This allows to handle the different structures very similar.
- When the content of a struct can_frame is copied into a struct canfd_frame
- all structure elements can be used as-is - only the data[] becomes extended.
-
- When introducing the struct canfd_frame it turned out that the data length
- code (DLC) of the struct can_frame was used as a length information as the
- length and the DLC has a 1:1 mapping in the range of 0 .. 8. To preserve
- the easy handling of the length information the canfd_frame.len element
- contains a plain length value from 0 .. 64. So both canfd_frame.len and
- can_frame.can_dlc are equal and contain a length information and no DLC.
- For details about the distinction of CAN and CAN FD capable devices and
- the mapping to the bus-relevant data length code (DLC), see chapter 6.6.
-
- The length of the two CAN(FD) frame structures define the maximum transfer
- unit (MTU) of the CAN(FD) network interface and skbuff data length. Two
- definitions are specified for CAN specific MTUs in include/linux/can.h :
-
- #define CAN_MTU (sizeof(struct can_frame)) == 16 => 'legacy' CAN frame
- #define CANFD_MTU (sizeof(struct canfd_frame)) == 72 => CAN FD frame
-
- 4.1 RAW protocol sockets with can_filters (SOCK_RAW)
-
- Using CAN_RAW sockets is extensively comparable to the commonly
- known access to CAN character devices. To meet the new possibilities
- provided by the multi user SocketCAN approach, some reasonable
- defaults are set at RAW socket binding time:
-
- - The filters are set to exactly one filter receiving everything
- - The socket only receives valid data frames (=> no error message frames)
- - The loopback of sent CAN frames is enabled (see chapter 3.2)
- - The socket does not receive its own sent frames (in loopback mode)
-
- These default settings may be changed before or after binding the socket.
- To use the referenced definitions of the socket options for CAN_RAW
- sockets, include <linux/can/raw.h>.
-
- 4.1.1 RAW socket option CAN_RAW_FILTER
-
- The reception of CAN frames using CAN_RAW sockets can be controlled
- by defining 0 .. n filters with the CAN_RAW_FILTER socket option.
-
- The CAN filter structure is defined in include/linux/can.h:
-
- struct can_filter {
- canid_t can_id;
- canid_t can_mask;
- };
-
- A filter matches, when
-
- <received_can_id> & mask == can_id & mask
-
- which is analogous to known CAN controllers hardware filter semantics.
- The filter can be inverted in this semantic, when the CAN_INV_FILTER
- bit is set in can_id element of the can_filter structure. In
- contrast to CAN controller hardware filters the user may set 0 .. n
- receive filters for each open socket separately:
-
- struct can_filter rfilter[2];
-
- rfilter[0].can_id = 0x123;
- rfilter[0].can_mask = CAN_SFF_MASK;
- rfilter[1].can_id = 0x200;
- rfilter[1].can_mask = 0x700;
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
-
- To disable the reception of CAN frames on the selected CAN_RAW socket:
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, NULL, 0);
-
- To set the filters to zero filters is quite obsolete as to not read
- data causes the raw socket to discard the received CAN frames. But
- having this 'send only' use-case we may remove the receive list in the
- Kernel to save a little (really a very little!) CPU usage.
-
- 4.1.1.1 CAN filter usage optimisation
-
- The CAN filters are processed in per-device filter lists at CAN frame
- reception time. To reduce the number of checks that need to be performed
- while walking through the filter lists the CAN core provides an optimized
- filter handling when the filter subscription focusses on a single CAN ID.
-
- For the possible 2048 SFF CAN identifiers the identifier is used as an index
- to access the corresponding subscription list without any further checks.
- For the 2^29 possible EFF CAN identifiers a 10 bit XOR folding is used as
- hash function to retrieve the EFF table index.
-
- To benefit from the optimized filters for single CAN identifiers the
- CAN_SFF_MASK or CAN_EFF_MASK have to be set into can_filter.mask together
- with set CAN_EFF_FLAG and CAN_RTR_FLAG bits. A set CAN_EFF_FLAG bit in the
- can_filter.mask makes clear that it matters whether a SFF or EFF CAN ID is
- subscribed. E.g. in the example from above
-
- rfilter[0].can_id = 0x123;
- rfilter[0].can_mask = CAN_SFF_MASK;
-
- both SFF frames with CAN ID 0x123 and EFF frames with 0xXXXXX123 can pass.
-
- To filter for only 0x123 (SFF) and 0x12345678 (EFF) CAN identifiers the
- filter has to be defined in this way to benefit from the optimized filters:
-
- struct can_filter rfilter[2];
-
- rfilter[0].can_id = 0x123;
- rfilter[0].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_SFF_MASK);
- rfilter[1].can_id = 0x12345678 | CAN_EFF_FLAG;
- rfilter[1].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_EFF_MASK);
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
-
- 4.1.2 RAW socket option CAN_RAW_ERR_FILTER
-
- As described in chapter 3.3 the CAN interface driver can generate so
- called Error Message Frames that can optionally be passed to the user
- application in the same way as other CAN frames. The possible
- errors are divided into different error classes that may be filtered
- using the appropriate error mask. To register for every possible
- error condition CAN_ERR_MASK can be used as value for the error mask.
- The values for the error mask are defined in linux/can/error.h .
-
- can_err_mask_t err_mask = ( CAN_ERR_TX_TIMEOUT | CAN_ERR_BUSOFF );
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_ERR_FILTER,
- &err_mask, sizeof(err_mask));
-
- 4.1.3 RAW socket option CAN_RAW_LOOPBACK
-
- To meet multi user needs the local loopback is enabled by default
- (see chapter 3.2 for details). But in some embedded use-cases
- (e.g. when only one application uses the CAN bus) this loopback
- functionality can be disabled (separately for each socket):
-
- int loopback = 0; /* 0 = disabled, 1 = enabled (default) */
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_LOOPBACK, &loopback, sizeof(loopback));
-
- 4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
-
- When the local loopback is enabled, all the sent CAN frames are
- looped back to the open CAN sockets that registered for the CAN
- frames' CAN-ID on this given interface to meet the multi user
- needs. The reception of the CAN frames on the same socket that was
- sending the CAN frame is assumed to be unwanted and therefore
- disabled by default. This default behaviour may be changed on
- demand:
-
- int recv_own_msgs = 1; /* 0 = disabled (default), 1 = enabled */
-
- setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
- &recv_own_msgs, sizeof(recv_own_msgs));
-
- 4.1.5 RAW socket option CAN_RAW_FD_FRAMES
-
- CAN FD support in CAN_RAW sockets can be enabled with a new socket option
- CAN_RAW_FD_FRAMES which is off by default. When the new socket option is
- not supported by the CAN_RAW socket (e.g. on older kernels), switching the
- CAN_RAW_FD_FRAMES option returns the error -ENOPROTOOPT.
-
- Once CAN_RAW_FD_FRAMES is enabled the application can send both CAN frames
- and CAN FD frames. OTOH the application has to handle CAN and CAN FD frames
- when reading from the socket.
-
- CAN_RAW_FD_FRAMES enabled: CAN_MTU and CANFD_MTU are allowed
- CAN_RAW_FD_FRAMES disabled: only CAN_MTU is allowed (default)
-
- Example:
- [ remember: CANFD_MTU == sizeof(struct canfd_frame) ]
-
- struct canfd_frame cfd;
-
- nbytes = read(s, &cfd, CANFD_MTU);
-
- if (nbytes == CANFD_MTU) {
- printf("got CAN FD frame with length %d\n", cfd.len);
- /* cfd.flags contains valid data */
- } else if (nbytes == CAN_MTU) {
- printf("got legacy CAN frame with length %d\n", cfd.len);
- /* cfd.flags is undefined */
- } else {
- fprintf(stderr, "read: invalid CAN(FD) frame\n");
- return 1;
- }
-
- /* the content can be handled independently from the received MTU size */
-
- printf("can_id: %X data length: %d data: ", cfd.can_id, cfd.len);
- for (i = 0; i < cfd.len; i++)
- printf("%02X ", cfd.data[i]);
-
- When reading with size CANFD_MTU only returns CAN_MTU bytes that have
- been received from the socket a legacy CAN frame has been read into the
- provided CAN FD structure. Note that the canfd_frame.flags data field is
- not specified in the struct can_frame and therefore it is only valid in
- CANFD_MTU sized CAN FD frames.
-
- Implementation hint for new CAN applications:
-
- To build a CAN FD aware application use struct canfd_frame as basic CAN
- data structure for CAN_RAW based applications. When the application is
- executed on an older Linux kernel and switching the CAN_RAW_FD_FRAMES
- socket option returns an error: No problem. You'll get legacy CAN frames
- or CAN FD frames and can process them the same way.
-
- When sending to CAN devices make sure that the device is capable to handle
- CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU.
- The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
-
- 4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
-
- The CAN_RAW socket can set multiple CAN identifier specific filters that
- lead to multiple filters in the af_can.c filter processing. These filters
- are indenpendent from each other which leads to logical OR'ed filters when
- applied (see 4.1.1).
-
- This socket option joines the given CAN filters in the way that only CAN
- frames are passed to user space that matched *all* given CAN filters. The
- semantic for the applied filters is therefore changed to a logical AND.
-
- This is useful especially when the filterset is a combination of filters
- where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
- CAN ID ranges from the incoming traffic.
-
- 4.1.7 RAW socket returned message flags
-
- When using recvmsg() call, the msg->msg_flags may contain following flags:
-
- MSG_DONTROUTE: set when the received frame was created on the local host.
-
- MSG_CONFIRM: set when the frame was sent via the socket it is received on.
- This flag can be interpreted as a 'transmission confirmation' when the
- CAN driver supports the echo of frames on driver level, see 3.2 and 6.2.
- In order to receive such messages, CAN_RAW_RECV_OWN_MSGS must be set.
-
- 4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
-
- The Broadcast Manager protocol provides a command based configuration
- interface to filter and send (e.g. cyclic) CAN messages in kernel space.
-
- Receive filters can be used to down sample frequent messages; detect events
- such as message contents changes, packet length changes, and do time-out
- monitoring of received messages.
-
- Periodic transmission tasks of CAN frames or a sequence of CAN frames can be
- created and modified at runtime; both the message content and the two
- possible transmit intervals can be altered.
-
- A BCM socket is not intended for sending individual CAN frames using the
- struct can_frame as known from the CAN_RAW socket. Instead a special BCM
- configuration message is defined. The basic BCM configuration message used
- to communicate with the broadcast manager and the available operations are
- defined in the linux/can/bcm.h include. The BCM message consists of a
- message header with a command ('opcode') followed by zero or more CAN frames.
- The broadcast manager sends responses to user space in the same form:
-
- struct bcm_msg_head {
- __u32 opcode; /* command */
- __u32 flags; /* special flags */
- __u32 count; /* run 'count' times with ival1 */
- struct timeval ival1, ival2; /* count and subsequent interval */
- canid_t can_id; /* unique can_id for task */
- __u32 nframes; /* number of can_frames following */
- struct can_frame frames[0];
- };
-
- The aligned payload 'frames' uses the same basic CAN frame structure defined
- at the beginning of section 4 and in the include/linux/can.h include. All
- messages to the broadcast manager from user space have this structure.
-
- Note a CAN_BCM socket must be connected instead of bound after socket
- creation (example without error checking):
-
- int s;
- struct sockaddr_can addr;
- struct ifreq ifr;
-
- s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
-
- strcpy(ifr.ifr_name, "can0");
- ioctl(s, SIOCGIFINDEX, &ifr);
-
- addr.can_family = AF_CAN;
- addr.can_ifindex = ifr.ifr_ifindex;
-
- connect(s, (struct sockaddr *)&addr, sizeof(addr));
-
- (..)
-
- The broadcast manager socket is able to handle any number of in flight
- transmissions or receive filters concurrently. The different RX/TX jobs are
- distinguished by the unique can_id in each BCM message. However additional
- CAN_BCM sockets are recommended to communicate on multiple CAN interfaces.
- When the broadcast manager socket is bound to 'any' CAN interface (=> the
- interface index is set to zero) the configured receive filters apply to any
- CAN interface unless the sendto() syscall is used to overrule the 'any' CAN
- interface index. When using recvfrom() instead of read() to retrieve BCM
- socket messages the originating CAN interface is provided in can_ifindex.
-
- 4.2.1 Broadcast Manager operations
-
- The opcode defines the operation for the broadcast manager to carry out,
- or details the broadcast managers response to several events, including
- user requests.
-
- Transmit Operations (user space to broadcast manager):
-
- TX_SETUP: Create (cyclic) transmission task.
-
- TX_DELETE: Remove (cyclic) transmission task, requires only can_id.
-
- TX_READ: Read properties of (cyclic) transmission task for can_id.
-
- TX_SEND: Send one CAN frame.
-
- Transmit Responses (broadcast manager to user space):
-
- TX_STATUS: Reply to TX_READ request (transmission task configuration).
-
- TX_EXPIRED: Notification when counter finishes sending at initial interval
- 'ival1'. Requires the TX_COUNTEVT flag to be set at TX_SETUP.
-
- Receive Operations (user space to broadcast manager):
-
- RX_SETUP: Create RX content filter subscription.
-
- RX_DELETE: Remove RX content filter subscription, requires only can_id.
-
- RX_READ: Read properties of RX content filter subscription for can_id.
-
- Receive Responses (broadcast manager to user space):
-
- RX_STATUS: Reply to RX_READ request (filter task configuration).
-
- RX_TIMEOUT: Cyclic message is detected to be absent (timer ival1 expired).
-
- RX_CHANGED: BCM message with updated CAN frame (detected content change).
- Sent on first message received or on receipt of revised CAN messages.
-
- 4.2.2 Broadcast Manager message flags
-
- When sending a message to the broadcast manager the 'flags' element may
- contain the following flag definitions which influence the behaviour:
-
- SETTIMER: Set the values of ival1, ival2 and count
-
- STARTTIMER: Start the timer with the actual values of ival1, ival2
- and count. Starting the timer leads simultaneously to emit a CAN frame.
-
- TX_COUNTEVT: Create the message TX_EXPIRED when count expires
-
- TX_ANNOUNCE: A change of data by the process is emitted immediately.
-
- TX_CP_CAN_ID: Copies the can_id from the message header to each
- subsequent frame in frames. This is intended as usage simplification. For
- TX tasks the unique can_id from the message header may differ from the
- can_id(s) stored for transmission in the subsequent struct can_frame(s).
-
- RX_FILTER_ID: Filter by can_id alone, no frames required (nframes=0).
-
- RX_CHECK_DLC: A change of the DLC leads to an RX_CHANGED.
-
- RX_NO_AUTOTIMER: Prevent automatically starting the timeout monitor.
-
- RX_ANNOUNCE_RESUME: If passed at RX_SETUP and a receive timeout occurred, a
- RX_CHANGED message will be generated when the (cyclic) receive restarts.
-
- TX_RESET_MULTI_IDX: Reset the index for the multiple frame transmission.
-
- RX_RTR_FRAME: Send reply for RTR-request (placed in op->frames[0]).
-
- 4.2.3 Broadcast Manager transmission timers
-
- Periodic transmission configurations may use up to two interval timers.
- In this case the BCM sends a number of messages ('count') at an interval
- 'ival1', then continuing to send at another given interval 'ival2'. When
- only one timer is needed 'count' is set to zero and only 'ival2' is used.
- When SET_TIMER and START_TIMER flag were set the timers are activated.
- The timer values can be altered at runtime when only SET_TIMER is set.
-
- 4.2.4 Broadcast Manager message sequence transmission
-
- Up to 256 CAN frames can be transmitted in a sequence in the case of a cyclic
- TX task configuration. The number of CAN frames is provided in the 'nframes'
- element of the BCM message head. The defined number of CAN frames are added
- as array to the TX_SETUP BCM configuration message.
-
- /* create a struct to set up a sequence of four CAN frames */
- struct {
- struct bcm_msg_head msg_head;
- struct can_frame frame[4];
- } mytxmsg;
-
- (..)
- mytxmsg.msg_head.nframes = 4;
- (..)
-
- write(s, &mytxmsg, sizeof(mytxmsg));
-
- With every transmission the index in the array of CAN frames is increased
- and set to zero at index overflow.
-
- 4.2.5 Broadcast Manager receive filter timers
-
- The timer values ival1 or ival2 may be set to non-zero values at RX_SETUP.
- When the SET_TIMER flag is set the timers are enabled:
-
- ival1: Send RX_TIMEOUT when a received message is not received again within
- the given time. When START_TIMER is set at RX_SETUP the timeout detection
- is activated directly - even without a former CAN frame reception.
-
- ival2: Throttle the received message rate down to the value of ival2. This
- is useful to reduce messages for the application when the signal inside the
- CAN frame is stateless as state changes within the ival2 periode may get
- lost.
-
- 4.2.6 Broadcast Manager multiplex message receive filter
-
- To filter for content changes in multiplex message sequences an array of more
- than one CAN frames can be passed in a RX_SETUP configuration message. The
- data bytes of the first CAN frame contain the mask of relevant bits that
- have to match in the subsequent CAN frames with the received CAN frame.
- If one of the subsequent CAN frames is matching the bits in that frame data
- mark the relevant content to be compared with the previous received content.
- Up to 257 CAN frames (multiplex filter bit mask CAN frame plus 256 CAN
- filters) can be added as array to the TX_SETUP BCM configuration message.
-
- /* usually used to clear CAN frame data[] - beware of endian problems! */
- #define U64_DATA(p) (*(unsigned long long*)(p)->data)
-
- struct {
- struct bcm_msg_head msg_head;
- struct can_frame frame[5];
- } msg;
-
- msg.msg_head.opcode = RX_SETUP;
- msg.msg_head.can_id = 0x42;
- msg.msg_head.flags = 0;
- msg.msg_head.nframes = 5;
- U64_DATA(&msg.frame[0]) = 0xFF00000000000000ULL; /* MUX mask */
- U64_DATA(&msg.frame[1]) = 0x01000000000000FFULL; /* data mask (MUX 0x01) */
- U64_DATA(&msg.frame[2]) = 0x0200FFFF000000FFULL; /* data mask (MUX 0x02) */
- U64_DATA(&msg.frame[3]) = 0x330000FFFFFF0003ULL; /* data mask (MUX 0x33) */
- U64_DATA(&msg.frame[4]) = 0x4F07FC0FF0000000ULL; /* data mask (MUX 0x4F) */
-
- write(s, &msg, sizeof(msg));
-
- 4.2.7 Broadcast Manager CAN FD support
-
- The programming API of the CAN_BCM depends on struct can_frame which is
- given as array directly behind the bcm_msg_head structure. To follow this
- schema for the CAN FD frames a new flag 'CAN_FD_FRAME' in the bcm_msg_head
- flags indicates that the concatenated CAN frame structures behind the
- bcm_msg_head are defined as struct canfd_frame.
-
- struct {
- struct bcm_msg_head msg_head;
- struct canfd_frame frame[5];
- } msg;
-
- msg.msg_head.opcode = RX_SETUP;
- msg.msg_head.can_id = 0x42;
- msg.msg_head.flags = CAN_FD_FRAME;
- msg.msg_head.nframes = 5;
- (..)
-
- When using CAN FD frames for multiplex filtering the MUX mask is still
- expected in the first 64 bit of the struct canfd_frame data section.
-
- 4.3 connected transport protocols (SOCK_SEQPACKET)
- 4.4 unconnected transport protocols (SOCK_DGRAM)
-
-
-5. SocketCAN core module
--------------------------
-
- The SocketCAN core module implements the protocol family
- PF_CAN. CAN protocol modules are loaded by the core module at
- runtime. The core module provides an interface for CAN protocol
- modules to subscribe needed CAN IDs (see chapter 3.1).
-
- 5.1 can.ko module params
-
- - stats_timer: To calculate the SocketCAN core statistics
- (e.g. current/maximum frames per second) this 1 second timer is
- invoked at can.ko module start time by default. This timer can be
- disabled by using stattimer=0 on the module commandline.
-
- - debug: (removed since SocketCAN SVN r546)
-
- 5.2 procfs content
-
- As described in chapter 3.1 the SocketCAN core uses several filter
- lists to deliver received CAN frames to CAN protocol modules. These
- receive lists, their filters and the count of filter matches can be
- checked in the appropriate receive list. All entries contain the
- device and a protocol module identifier:
-
- foo@bar:~$ cat /proc/net/can/rcvlist_all
-
- receive list 'rx_all':
- (vcan3: no entry)
- (vcan2: no entry)
- (vcan1: no entry)
- device can_id can_mask function userdata matches ident
- vcan0 000 00000000 f88e6370 f6c6f400 0 raw
- (any: no entry)
-
- In this example an application requests any CAN traffic from vcan0.
-
- rcvlist_all - list for unfiltered entries (no filter operations)
- rcvlist_eff - list for single extended frame (EFF) entries
- rcvlist_err - list for error message frames masks
- rcvlist_fil - list for mask/value filters
- rcvlist_inv - list for mask/value filters (inverse semantic)
- rcvlist_sff - list for single standard frame (SFF) entries
-
- Additional procfs files in /proc/net/can
-
- stats - SocketCAN core statistics (rx/tx frames, match ratios, ...)
- reset_stats - manual statistic reset
- version - prints the SocketCAN core version and the ABI version
-
- 5.3 writing own CAN protocol modules
-
- To implement a new protocol in the protocol family PF_CAN a new
- protocol has to be defined in include/linux/can.h .
- The prototypes and definitions to use the SocketCAN core can be
- accessed by including include/linux/can/core.h .
- In addition to functions that register the CAN protocol and the
- CAN device notifier chain there are functions to subscribe CAN
- frames received by CAN interfaces and to send CAN frames:
-
- can_rx_register - subscribe CAN frames from a specific interface
- can_rx_unregister - unsubscribe CAN frames from a specific interface
- can_send - transmit a CAN frame (optional with local loopback)
-
- For details see the kerneldoc documentation in net/can/af_can.c or
- the source code of net/can/raw.c or net/can/bcm.c .
-
-6. CAN network drivers
-----------------------
-
- Writing a CAN network device driver is much easier than writing a
- CAN character device driver. Similar to other known network device
- drivers you mainly have to deal with:
-
- - TX: Put the CAN frame from the socket buffer to the CAN controller.
- - RX: Put the CAN frame from the CAN controller to the socket buffer.
-
- See e.g. at Documentation/networking/netdevices.txt . The differences
- for writing CAN network device driver are described below:
-
- 6.1 general settings
-
- dev->type = ARPHRD_CAN; /* the netdevice hardware type */
- dev->flags = IFF_NOARP; /* CAN has no arp */
-
- dev->mtu = CAN_MTU; /* sizeof(struct can_frame) -> legacy CAN interface */
-
- or alternative, when the controller supports CAN with flexible data rate:
- dev->mtu = CANFD_MTU; /* sizeof(struct canfd_frame) -> CAN FD interface */
-
- The struct can_frame or struct canfd_frame is the payload of each socket
- buffer (skbuff) in the protocol family PF_CAN.
-
- 6.2 local loopback of sent frames
-
- As described in chapter 3.2 the CAN network device driver should
- support a local loopback functionality similar to the local echo
- e.g. of tty devices. In this case the driver flag IFF_ECHO has to be
- set to prevent the PF_CAN core from locally echoing sent frames
- (aka loopback) as fallback solution:
-
- dev->flags = (IFF_NOARP | IFF_ECHO);
-
- 6.3 CAN controller hardware filters
-
- To reduce the interrupt load on deep embedded systems some CAN
- controllers support the filtering of CAN IDs or ranges of CAN IDs.
- These hardware filter capabilities vary from controller to
- controller and have to be identified as not feasible in a multi-user
- networking approach. The use of the very controller specific
- hardware filters could make sense in a very dedicated use-case, as a
- filter on driver level would affect all users in the multi-user
- system. The high efficient filter sets inside the PF_CAN core allow
- to set different multiple filters for each socket separately.
- Therefore the use of hardware filters goes to the category 'handmade
- tuning on deep embedded systems'. The author is running a MPC603e
- @133MHz with four SJA1000 CAN controllers from 2002 under heavy bus
- load without any problems ...
-
- 6.4 The virtual CAN driver (vcan)
-
- Similar to the network loopback devices, vcan offers a virtual local
- CAN interface. A full qualified address on CAN consists of
-
- - a unique CAN Identifier (CAN ID)
- - the CAN bus this CAN ID is transmitted on (e.g. can0)
-
- so in common use cases more than one virtual CAN interface is needed.
-
- The virtual CAN interfaces allow the transmission and reception of CAN
- frames without real CAN controller hardware. Virtual CAN network
- devices are usually named 'vcanX', like vcan0 vcan1 vcan2 ...
- When compiled as a module the virtual CAN driver module is called vcan.ko
-
- Since Linux Kernel version 2.6.24 the vcan driver supports the Kernel
- netlink interface to create vcan network devices. The creation and
- removal of vcan network devices can be managed with the ip(8) tool:
-
- - Create a virtual CAN network interface:
- $ ip link add type vcan
-
- - Create a virtual CAN network interface with a specific name 'vcan42':
- $ ip link add dev vcan42 type vcan
-
- - Remove a (virtual CAN) network interface 'vcan42':
- $ ip link del vcan42
-
- 6.5 The CAN network device driver interface
-
- The CAN network device driver interface provides a generic interface
- to setup, configure and monitor CAN network devices. The user can then
- configure the CAN device, like setting the bit-timing parameters, via
- the netlink interface using the program "ip" from the "IPROUTE2"
- utility suite. The following chapter describes briefly how to use it.
- Furthermore, the interface uses a common data structure and exports a
- set of common functions, which all real CAN network device drivers
- should use. Please have a look to the SJA1000 or MSCAN driver to
- understand how to use them. The name of the module is can-dev.ko.
-
- 6.5.1 Netlink interface to set/get devices properties
-
- The CAN device must be configured via netlink interface. The supported
- netlink message types are defined and briefly described in
- "include/linux/can/netlink.h". CAN link support for the program "ip"
- of the IPROUTE2 utility suite is available and it can be used as shown
- below:
-
- - Setting CAN device properties:
-
- $ ip link set can0 type can help
- Usage: ip link set DEVICE type can
- [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] |
- [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1
- phase-seg2 PHASE-SEG2 [ sjw SJW ] ]
-
- [ dbitrate BITRATE [ dsample-point SAMPLE-POINT] ] |
- [ dtq TQ dprop-seg PROP_SEG dphase-seg1 PHASE-SEG1
- dphase-seg2 PHASE-SEG2 [ dsjw SJW ] ]
-
- [ loopback { on | off } ]
- [ listen-only { on | off } ]
- [ triple-sampling { on | off } ]
- [ one-shot { on | off } ]
- [ berr-reporting { on | off } ]
- [ fd { on | off } ]
- [ fd-non-iso { on | off } ]
- [ presume-ack { on | off } ]
-
- [ restart-ms TIME-MS ]
- [ restart ]
-
- Where: BITRATE := { 1..1000000 }
- SAMPLE-POINT := { 0.000..0.999 }
- TQ := { NUMBER }
- PROP-SEG := { 1..8 }
- PHASE-SEG1 := { 1..8 }
- PHASE-SEG2 := { 1..8 }
- SJW := { 1..4 }
- RESTART-MS := { 0 | NUMBER }
-
- - Display CAN device details and statistics:
-
- $ ip -details -statistics link show can0
- 2: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 16 qdisc pfifo_fast state UP qlen 10
- link/can
- can <TRIPLE-SAMPLING> state ERROR-ACTIVE restart-ms 100
- bitrate 125000 sample_point 0.875
- tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1
- sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
- clock 8000000
- re-started bus-errors arbit-lost error-warn error-pass bus-off
- 41 17457 0 41 42 41
- RX: bytes packets errors dropped overrun mcast
- 140859 17608 17457 0 0 0
- TX: bytes packets errors dropped carrier collsns
- 861 112 0 41 0 0
-
- More info to the above output:
-
- "<TRIPLE-SAMPLING>"
- Shows the list of selected CAN controller modes: LOOPBACK,
- LISTEN-ONLY, or TRIPLE-SAMPLING.
-
- "state ERROR-ACTIVE"
- The current state of the CAN controller: "ERROR-ACTIVE",
- "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED"
-
- "restart-ms 100"
- Automatic restart delay time. If set to a non-zero value, a
- restart of the CAN controller will be triggered automatically
- in case of a bus-off condition after the specified delay time
- in milliseconds. By default it's off.
-
- "bitrate 125000 sample-point 0.875"
- Shows the real bit-rate in bits/sec and the sample-point in the
- range 0.000..0.999. If the calculation of bit-timing parameters
- is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the
- bit-timing can be defined by setting the "bitrate" argument.
- Optionally the "sample-point" can be specified. By default it's
- 0.000 assuming CIA-recommended sample-points.
-
- "tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1"
- Shows the time quanta in ns, propagation segment, phase buffer
- segment 1 and 2 and the synchronisation jump width in units of
- tq. They allow to define the CAN bit-timing in a hardware
- independent format as proposed by the Bosch CAN 2.0 spec (see
- chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf).
-
- "sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
- clock 8000000"
- Shows the bit-timing constants of the CAN controller, here the
- "sja1000". The minimum and maximum values of the time segment 1
- and 2, the synchronisation jump width in units of tq, the
- bitrate pre-scaler and the CAN system clock frequency in Hz.
- These constants could be used for user-defined (non-standard)
- bit-timing calculation algorithms in user-space.
-
- "re-started bus-errors arbit-lost error-warn error-pass bus-off"
- Shows the number of restarts, bus and arbitration lost errors,
- and the state changes to the error-warning, error-passive and
- bus-off state. RX overrun errors are listed in the "overrun"
- field of the standard network statistics.
-
- 6.5.2 Setting the CAN bit-timing
-
- The CAN bit-timing parameters can always be defined in a hardware
- independent format as proposed in the Bosch CAN 2.0 specification
- specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2"
- and "sjw":
-
- $ ip link set canX type can tq 125 prop-seg 6 \
- phase-seg1 7 phase-seg2 2 sjw 1
-
- If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA
- recommended CAN bit-timing parameters will be calculated if the bit-
- rate is specified with the argument "bitrate":
-
- $ ip link set canX type can bitrate 125000
-
- Note that this works fine for the most common CAN controllers with
- standard bit-rates but may *fail* for exotic bit-rates or CAN system
- clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some
- space and allows user-space tools to solely determine and set the
- bit-timing parameters. The CAN controller specific bit-timing
- constants can be used for that purpose. They are listed by the
- following command:
-
- $ ip -details link show can0
- ...
- sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
-
- 6.5.3 Starting and stopping the CAN network device
-
- A CAN network device is started or stopped as usual with the command
- "ifconfig canX up/down" or "ip link set canX up/down". Be aware that
- you *must* define proper bit-timing parameters for real CAN devices
- before you can start it to avoid error-prone default settings:
-
- $ ip link set canX up type can bitrate 125000
-
- A device may enter the "bus-off" state if too many errors occurred on
- the CAN bus. Then no more messages are received or sent. An automatic
- bus-off recovery can be enabled by setting the "restart-ms" to a
- non-zero value, e.g.:
-
- $ ip link set canX type can restart-ms 100
-
- Alternatively, the application may realize the "bus-off" condition
- by monitoring CAN error message frames and do a restart when
- appropriate with the command:
-
- $ ip link set canX type can restart
-
- Note that a restart will also create a CAN error message frame (see
- also chapter 3.3).
-
- 6.6 CAN FD (flexible data rate) driver support
-
- CAN FD capable CAN controllers support two different bitrates for the
- arbitration phase and the payload phase of the CAN FD frame. Therefore a
- second bit timing has to be specified in order to enable the CAN FD bitrate.
-
- Additionally CAN FD capable CAN controllers support up to 64 bytes of
- payload. The representation of this length in can_frame.can_dlc and
- canfd_frame.len for userspace applications and inside the Linux network
- layer is a plain value from 0 .. 64 instead of the CAN 'data length code'.
- The data length code was a 1:1 mapping to the payload length in the legacy
- CAN frames anyway. The payload length to the bus-relevant DLC mapping is
- only performed inside the CAN drivers, preferably with the helper
- functions can_dlc2len() and can_len2dlc().
-
- The CAN netdevice driver capabilities can be distinguished by the network
- devices maximum transfer unit (MTU):
-
- MTU = 16 (CAN_MTU) => sizeof(struct can_frame) => 'legacy' CAN device
- MTU = 72 (CANFD_MTU) => sizeof(struct canfd_frame) => CAN FD capable device
-
- The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
- N.B. CAN FD capable devices can also handle and send legacy CAN frames.
-
- When configuring CAN FD capable CAN controllers an additional 'data' bitrate
- has to be set. This bitrate for the data phase of the CAN FD frame has to be
- at least the bitrate which was configured for the arbitration phase. This
- second bitrate is specified analogue to the first bitrate but the bitrate
- setting keywords for the 'data' bitrate start with 'd' e.g. dbitrate,
- dsample-point, dsjw or dtq and similar settings. When a data bitrate is set
- within the configuration process the controller option "fd on" can be
- specified to enable the CAN FD mode in the CAN controller. This controller
- option also switches the device MTU to 72 (CANFD_MTU).
-
- The first CAN FD specification presented as whitepaper at the International
- CAN Conference 2012 needed to be improved for data integrity reasons.
- Therefore two CAN FD implementations have to be distinguished today:
-
- - ISO compliant: The ISO 11898-1:2015 CAN FD implementation (default)
- - non-ISO compliant: The CAN FD implementation following the 2012 whitepaper
-
- Finally there are three types of CAN FD controllers:
-
- 1. ISO compliant (fixed)
- 2. non-ISO compliant (fixed, like the M_CAN IP core v3.0.1 in m_can.c)
- 3. ISO/non-ISO CAN FD controllers (switchable, like the PEAK PCAN-USB FD)
-
- The current ISO/non-ISO mode is announced by the CAN controller driver via
- netlink and displayed by the 'ip' tool (controller option FD-NON-ISO).
- The ISO/non-ISO-mode can be altered by setting 'fd-non-iso {on|off}' for
- switchable CAN FD controllers only.
-
- Example configuring 500 kbit/s arbitration bitrate and 4 Mbit/s data bitrate:
-
- $ ip link set can0 up type can bitrate 500000 sample-point 0.75 \
- dbitrate 4000000 dsample-point 0.8 fd on
- $ ip -details link show can0
- 5: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 72 qdisc pfifo_fast state UNKNOWN \
- mode DEFAULT group default qlen 10
- link/can promiscuity 0
- can <FD> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
- bitrate 500000 sample-point 0.750
- tq 50 prop-seg 14 phase-seg1 15 phase-seg2 10 sjw 1
- pcan_usb_pro_fd: tseg1 1..64 tseg2 1..16 sjw 1..16 brp 1..1024 \
- brp-inc 1
- dbitrate 4000000 dsample-point 0.800
- dtq 12 dprop-seg 7 dphase-seg1 8 dphase-seg2 4 dsjw 1
- pcan_usb_pro_fd: dtseg1 1..16 dtseg2 1..8 dsjw 1..4 dbrp 1..1024 \
- dbrp-inc 1
- clock 80000000
-
- Example when 'fd-non-iso on' is added on this switchable CAN FD adapter:
- can <FD,FD-NON-ISO> state ERROR-ACTIVE (berr-counter tx 0 rx 0) restart-ms 0
-
- 6.7 Supported CAN hardware
-
- Please check the "Kconfig" file in "drivers/net/can" to get an actual
- list of the support CAN hardware. On the SocketCAN project website
- (see chapter 7) there might be further drivers available, also for
- older kernel versions.
-
-7. SocketCAN resources
------------------------
-
- The Linux CAN / SocketCAN project resources (project site / mailing list)
- are referenced in the MAINTAINERS file in the Linux source tree.
- Search for CAN NETWORK [LAYERS|DRIVERS].
-
-8. Credits
-----------
-
- Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver)
- Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan)
- Jan Kizka (RT-SocketCAN core, Socket-API reconciliation)
- Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews,
- CAN device driver interface, MSCAN driver)
- Robert Schwebel (design reviews, PTXdist integration)
- Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers)
- Benedikt Spranger (reviews)
- Thomas Gleixner (LKML reviews, coding style, posting hints)
- Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver)
- Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003)
- Klaus Hitschler (PEAK driver integration)
- Uwe Koppe (CAN netdevices with PF_PACKET approach)
- Michael Schulze (driver layer loopback requirement, RT CAN drivers review)
- Pavel Pisa (Bit-timing calculation)
- Sascha Hauer (SJA1000 platform driver)
- Sebastian Haas (SJA1000 EMS PCI driver)
- Markus Plessing (SJA1000 EMS PCI driver)
- Per Dalen (SJA1000 Kvaser PCI driver)
- Sam Ravnborg (reviews, coding style, kbuild help)
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 87814859cfc2..a4508ec1816b 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1134,7 +1134,7 @@ The verifier's knowledge about the variable offset consists of:
mask and value; no bit should ever be 1 in both. For example, if a byte is read
into a register from memory, the register's top 56 bits are known zero, while
the low 8 are unknown - which is represented as the tnum (0x0; 0xff). If we
-then OR this with 0x40, we get (0x40; 0xcf), then if we add 1 we get (0x0;
+then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0;
0x1ff), because of potential carries.
Besides arithmetic, the register state can also be updated by conditional
branches. For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 7d4b15977d61..90966c2692d8 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -7,6 +7,7 @@ Contents:
:maxdepth: 2
batman-adv
+ can
kapi
z8530book
msg_zerocopy
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index 2c4e3354e128..d2fd78f85aa4 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -12,8 +12,8 @@ suitable sample script and configure that.
On a dual CPU:
ps aux | grep pkt
-root 129 0.3 0.0 0 0 ? SW 2003 523:20 [pktgen/0]
-root 130 0.3 0.0 0 0 ? SW 2003 509:50 [pktgen/1]
+root 129 0.3 0.0 0 0 ? SW 2003 523:20 [kpktgend_0]
+root 130 0.3 0.0 0 0 ? SW 2003 509:50 [kpktgend_1]
For monitoring and control pktgen creates:
@@ -113,9 +113,16 @@ Configuring devices
===================
This is done via the /proc interface, and most easily done via pgset
as defined in the sample scripts.
+You need to specify PGDEV environment variable to use functions from sample
+scripts, i.e.:
+export PGDEV=/proc/net/pktgen/eth4@0
+source samples/pktgen/functions.sh
Examples:
+ pg_ctrl start starts injection.
+ pg_ctrl stop aborts injection. Also, ^C aborts generator.
+
pgset "clone_skb 1" sets the number of copies of the same packet
pgset "clone_skb 0" use single SKB for all transmits
pgset "burst 8" uses xmit_more API to queue 8 copies of the same
@@ -165,8 +172,12 @@ Examples:
IPSEC # IPsec encapsulation (needs CONFIG_XFRM)
NODE_ALLOC # node specific memory allocation
NO_TIMESTAMP # disable timestamping
+ pgset 'flag ![name]' Clear a flag to determine behaviour.
+ Note that you might need to use single quote in
+ interactive mode, so that your shell wouldn't expand
+ the specified flag as a history command.
- pgset spi SPI_VALUE Set specific SA used to transform packet.
+ pgset "spi [SPI_VALUE]" Set specific SA used to transform packet.
pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then
cycle through the port range.
@@ -207,8 +218,6 @@ Examples:
pgset "tos XX" set former IPv4 TOS field (e.g. "tos 28" for AF11 no ECN, default 00)
pgset "traffic_class XX" set former IPv6 TRAFFIC CLASS (e.g. "traffic_class B8" for EF no ECN, default 00)
- pgset stop aborts injection. Also, ^C aborts generator.
-
pgset "rate 300M" set rate to 300 Mb/s
pgset "ratep 1000000" set rate to 1Mpps
diff --git a/Documentation/networking/xfrm_device.txt b/Documentation/networking/xfrm_device.txt
index 2d9d588cd34b..50c34ca65efe 100644
--- a/Documentation/networking/xfrm_device.txt
+++ b/Documentation/networking/xfrm_device.txt
@@ -41,6 +41,7 @@ struct xfrmdev_ops {
void (*xdo_dev_state_free) (struct xfrm_state *x);
bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
struct xfrm_state *x);
+ void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
};
The NIC driver offering ipsec offload will need to implement these
@@ -117,6 +118,8 @@ the stack in xfrm_input().
hand the packet to napi_gro_receive() as usual
+In ESN mode, xdo_dev_state_advance_esn() is called from xfrm_replay_advance_esn().
+Driver will check packet seq number and update HW ESN state machine if needed.
When the SA is removed by the user, the driver's xdo_dev_state_delete()
is asked to disable the offload. Later, xdo_dev_state_free() is called
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 57d3ee9e4bde..fc3ae951bc07 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3403,6 +3403,52 @@ invalid, if invalid pages are written to (e.g. after the end of memory)
or if no page table is present for the addresses (e.g. when using
hugepages).
+4.108 KVM_PPC_GET_CPU_CHAR
+
+Capability: KVM_CAP_PPC_GET_CPU_CHAR
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_ppc_cpu_char (out)
+Returns: 0 on successful completion
+ -EFAULT if struct kvm_ppc_cpu_char cannot be written
+
+This ioctl gives userspace information about certain characteristics
+of the CPU relating to speculative execution of instructions and
+possible information leakage resulting from speculative execution (see
+CVE-2017-5715, CVE-2017-5753 and CVE-2017-5754). The information is
+returned in struct kvm_ppc_cpu_char, which looks like this:
+
+struct kvm_ppc_cpu_char {
+ __u64 character; /* characteristics of the CPU */
+ __u64 behaviour; /* recommended software behaviour */
+ __u64 character_mask; /* valid bits in character */
+ __u64 behaviour_mask; /* valid bits in behaviour */
+};
+
+For extensibility, the character_mask and behaviour_mask fields
+indicate which bits of character and behaviour have been filled in by
+the kernel. If the set of defined bits is extended in future then
+userspace will be able to tell whether it is running on a kernel that
+knows about the new bits.
+
+The character field describes attributes of the CPU which can help
+with preventing inadvertent information disclosure - specifically,
+whether there is an instruction to flash-invalidate the L1 data cache
+(ori 30,30,0 or mtspr SPRN_TRIG2,rN), whether the L1 data cache is set
+to a mode where entries can only be used by the thread that created
+them, whether the bcctr[l] instruction prevents speculation, and
+whether a speculation barrier instruction (ori 31,31,0) is provided.
+
+The behaviour field describes actions that software should take to
+prevent inadvertent information disclosure, and thus describes which
+vulnerabilities the hardware is subject to; specifically whether the
+L1 data cache should be flushed when returning to user mode from the
+kernel, and whether a speculation barrier should be placed between an
+array bounds check and the array access.
+
+These fields use the same bit definitions as the new
+H_GET_CPU_CHARACTERISTICS hypercall.
+
5. The kvm_run structure
------------------------
diff --git a/Documentation/x86/pti.txt b/Documentation/x86/pti.txt
index d11eff61fc9a..5cd58439ad2d 100644
--- a/Documentation/x86/pti.txt
+++ b/Documentation/x86/pti.txt
@@ -78,7 +78,7 @@ this protection comes at a cost:
non-PTI SYSCALL entry code, so requires mapping fewer
things into the userspace page tables. The downside is
that stacks must be switched at entry time.
- d. Global pages are disabled for all kernel structures not
+ c. Global pages are disabled for all kernel structures not
mapped into both kernel and userspace page tables. This
feature of the MMU allows different processes to share TLB
entries mapping the kernel. Losing the feature means more
diff --git a/MAINTAINERS b/MAINTAINERS
index 079af8b7ae8e..884ee9601707 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -62,7 +62,15 @@ trivial patch so apply some common sense.
7. When sending security related changes or reports to a maintainer
please Cc: security@kernel.org, especially if the maintainer
- does not respond.
+ does not respond. Please keep in mind that the security team is
+ a small set of people who can be efficient only when working on
+ verified bugs. Please only Cc: this list when you have identified
+ that the bug would present a short-term risk to other users if it
+ were publicly disclosed. For example, reports of address leaks do
+ not represent an immediate threat and are better handled publicly,
+ and ideally, should come with a patch proposal. Please do not send
+ automated reports to this list either. Such bugs will be handled
+ better and faster in the usual public places.
8. Happy hacking.
@@ -3198,7 +3206,7 @@ W: https://github.com/linux-can
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
S: Maintained
-F: Documentation/networking/can.txt
+F: Documentation/networking/can.rst
F: net/can/
F: include/linux/can/core.h
F: include/uapi/linux/can.h
@@ -9102,6 +9110,7 @@ F: drivers/usb/image/microtek.*
MIPS
M: Ralf Baechle <ralf@linux-mips.org>
+M: James Hogan <jhogan@kernel.org>
L: linux-mips@linux-mips.org
W: http://www.linux-mips.org/
T: git git://git.linux-mips.org/pub/scm/ralf/linux.git
@@ -12252,7 +12261,7 @@ M: Security Officers <security@kernel.org>
S: Supported
SECURITY SUBSYSTEM
-M: James Morris <james.l.morris@oracle.com>
+M: James Morris <jmorris@namei.org>
M: "Serge E. Hallyn" <serge@hallyn.com>
L: linux-security-module@vger.kernel.org (suggested Cc:)
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux-security.git
diff --git a/Makefile b/Makefile
index bf5b8cbb9469..339397b838d3 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 4
PATCHLEVEL = 15
SUBLEVEL = 0
-EXTRAVERSION = -rc8
+EXTRAVERSION = -rc9
NAME = Fearless Coyote
# *DOCUMENTATION*
diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c
index 37bd6d9b8eb9..a6bdc1da47ad 100644
--- a/arch/alpha/kernel/sys_sio.c
+++ b/arch/alpha/kernel/sys_sio.c
@@ -102,6 +102,15 @@ sio_pci_route(void)
alpha_mv.sys.sio.route_tab);
}
+static bool sio_pci_dev_irq_needs_level(const struct pci_dev *dev)
+{
+ if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) &&
+ (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA))
+ return false;
+
+ return true;
+}
+
static unsigned int __init
sio_collect_irq_levels(void)
{
@@ -110,8 +119,7 @@ sio_collect_irq_levels(void)
/* Iterate through the devices, collecting IRQ levels. */
for_each_pci_dev(dev) {
- if ((dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) &&
- (dev->class >> 8 != PCI_CLASS_BRIDGE_PCMCIA))
+ if (!sio_pci_dev_irq_needs_level(dev))
continue;
if (dev->irq)
@@ -120,8 +128,7 @@ sio_collect_irq_levels(void)
return level_bits;
}
-static void __init
-sio_fixup_irq_levels(unsigned int level_bits)
+static void __sio_fixup_irq_levels(unsigned int level_bits, bool reset)
{
unsigned int old_level_bits;
@@ -139,12 +146,21 @@ sio_fixup_irq_levels(unsigned int level_bits)
*/
old_level_bits = inb(0x4d0) | (inb(0x4d1) << 8);
- level_bits |= (old_level_bits & 0x71ff);
+ if (reset)
+ old_level_bits &= 0x71ff;
+
+ level_bits |= old_level_bits;
outb((level_bits >> 0) & 0xff, 0x4d0);
outb((level_bits >> 8) & 0xff, 0x4d1);
}
+static inline void
+sio_fixup_irq_levels(unsigned int level_bits)
+{
+ __sio_fixup_irq_levels(level_bits, true);
+}
+
static inline int
noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
{
@@ -181,7 +197,14 @@ noname_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
const long min_idsel = 6, max_idsel = 14, irqs_per_slot = 5;
int irq = COMMON_TABLE_LOOKUP, tmp;
tmp = __kernel_extbl(alpha_mv.sys.sio.route_tab, irq);
- return irq >= 0 ? tmp : -1;
+
+ irq = irq >= 0 ? tmp : -1;
+
+ /* Fixup IRQ level if an actual IRQ mapping is detected */
+ if (sio_pci_dev_irq_needs_level(dev) && irq >= 0)
+ __sio_fixup_irq_levels(1 << irq, false);
+
+ return irq;
}
static inline int
diff --git a/arch/alpha/lib/ev6-memset.S b/arch/alpha/lib/ev6-memset.S
index 316a99aa9efe..1cfcfbbea6f0 100644
--- a/arch/alpha/lib/ev6-memset.S
+++ b/arch/alpha/lib/ev6-memset.S
@@ -18,7 +18,7 @@
* The algorithm for the leading and trailing quadwords remains the same,
* however the loop has been unrolled to enable better memory throughput,
* and the code has been replicated for each of the entry points: __memset
- * and __memsetw to permit better scheduling to eliminate the stalling
+ * and __memset16 to permit better scheduling to eliminate the stalling
* encountered during the mask replication.
* A future enhancement might be to put in a byte store loop for really
* small (say < 32 bytes) memset()s. Whether or not that change would be
@@ -34,7 +34,7 @@
.globl memset
.globl __memset
.globl ___memset
- .globl __memsetw
+ .globl __memset16
.globl __constant_c_memset
.ent ___memset
@@ -415,9 +415,9 @@ end:
* to mask stalls. Note that entry point names also had to change
*/
.align 5
- .ent __memsetw
+ .ent __memset16
-__memsetw:
+__memset16:
.frame $30,0,$26,0
.prologue 0
@@ -596,8 +596,8 @@ end_w:
nop
ret $31,($26),1 # L0 :
- .end __memsetw
- EXPORT_SYMBOL(__memsetw)
+ .end __memset16
+ EXPORT_SYMBOL(__memset16)
memset = ___memset
__memset = ___memset
diff --git a/arch/arm/boot/dts/imx6q-b450v3.dts b/arch/arm/boot/dts/imx6q-b450v3.dts
index 404a93d9596b..3ec58500e9c2 100644
--- a/arch/arm/boot/dts/imx6q-b450v3.dts
+++ b/arch/arm/boot/dts/imx6q-b450v3.dts
@@ -112,3 +112,55 @@
line-name = "PCA9539-P07";
};
};
+
+&pci_root {
+ /* Intel Corporation I210 Gigabit Network Connection */
+ switch_nic: ethernet@3,0 {
+ compatible = "pci8086,1533";
+ reg = <0x00010000 0 0 0 0>;
+ };
+};
+
+&switch_ports {
+ port@0 {
+ reg = <0>;
+ label = "enacq";
+ phy-handle = <&switchphy0>;
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "eneport1";
+ phy-handle = <&switchphy1>;
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "enix";
+ phy-handle = <&switchphy2>;
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "enid";
+ phy-handle = <&switchphy3>;
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "cpu";
+ ethernet = <&switch_nic>;
+ phy-handle = <&switchphy4>;
+ };
+
+ port@5 {
+ reg = <5>;
+ label = "enembc";
+
+ /* connected to Ethernet MAC of AT91RM9200 in MII mode */
+ fixed-link {
+ speed = <100>;
+ full-duplex;
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/imx6q-b650v3.dts b/arch/arm/boot/dts/imx6q-b650v3.dts
index 7f9f176901d4..5650a9b11091 100644
--- a/arch/arm/boot/dts/imx6q-b650v3.dts
+++ b/arch/arm/boot/dts/imx6q-b650v3.dts
@@ -111,3 +111,55 @@
fsl,tx-cal-45-dp-ohms = <55>;
fsl,tx-d-cal = <100>;
};
+
+&pci_root {
+ /* Intel Corporation I210 Gigabit Network Connection */
+ switch_nic: ethernet@3,0 {
+ compatible = "pci8086,1533";
+ reg = <0x00010000 0 0 0 0>;
+ };
+};
+
+&switch_ports {
+ port@0 {
+ reg = <0>;
+ label = "enacq";
+ phy-handle = <&switchphy0>;
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "eneport1";
+ phy-handle = <&switchphy1>;
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "enix";
+ phy-handle = <&switchphy2>;
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "enid";
+ phy-handle = <&switchphy3>;
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "cpu";
+ ethernet = <&switch_nic>;
+ phy-handle = <&switchphy4>;
+ };
+
+ port@5 {
+ reg = <5>;
+ label = "enembc";
+
+ /* connected to Ethernet MAC of AT91RM9200 in MII mode */
+ fixed-link {
+ speed = <100>;
+ full-duplex;
+ };
+ };
+};
diff --git a/arch/arm/boot/dts/imx6q-b850v3.dts b/arch/arm/boot/dts/imx6q-b850v3.dts
index 46bdc6722715..35edbdc7bcd1 100644
--- a/arch/arm/boot/dts/imx6q-b850v3.dts
+++ b/arch/arm/boot/dts/imx6q-b850v3.dts
@@ -212,3 +212,78 @@
};
};
};
+
+&pci_root {
+ /* PLX Technology, Inc. PEX 8605 PCI Express 4-port Gen2 Switch */
+ bridge@1,0 {
+ compatible = "pci10b5,8605";
+ reg = <0x00010000 0 0 0 0>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+
+ bridge@2,1 {
+ compatible = "pci10b5,8605";
+ reg = <0x00020800 0 0 0 0>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+
+ /* Intel Corporation I210 Gigabit Network Connection */
+ ethernet@3,0 {
+ compatible = "pci8086,1533";
+ reg = <0x00030000 0 0 0 0>;
+ };
+ };
+
+ bridge@2,2 {
+ compatible = "pci10b5,8605";
+ reg = <0x00021000 0 0 0 0>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+
+ /* Intel Corporation I210 Gigabit Network Connection */
+ switch_nic: ethernet@4,0 {
+ compatible = "pci8086,1533";
+ reg = <0x00040000 0 0 0 0>;
+ };
+ };
+ };
+};
+
+&switch_ports {
+ port@0 {
+ reg = <0>;
+ label = "eneport1";
+ phy-handle = <&switchphy0>;
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "eneport2";
+ phy-handle = <&switchphy1>;
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "enix";
+ phy-handle = <&switchphy2>;
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "enid";
+ phy-handle = <&switchphy3>;
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "cpu";
+ ethernet = <&switch_nic>;
+ phy-handle = <&switchphy4>;
+ };
+};
diff --git a/arch/arm/boot/dts/imx6q-bx50v3.dtsi b/arch/arm/boot/dts/imx6q-bx50v3.dtsi
index b915837bbb5f..916ea94d75ca 100644
--- a/arch/arm/boot/dts/imx6q-bx50v3.dtsi
+++ b/arch/arm/boot/dts/imx6q-bx50v3.dtsi
@@ -92,6 +92,56 @@
mux-int-port = <1>;
mux-ext-port = <4>;
};
+
+ aliases {
+ mdio-gpio0 = &mdio0;
+ };
+
+ mdio0: mdio-gpio {
+ compatible = "virtual,mdio-gpio";
+ gpios = <&gpio2 5 GPIO_ACTIVE_HIGH>, /* mdc */
+ <&gpio2 7 GPIO_ACTIVE_HIGH>; /* mdio */
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch@0 {
+ compatible = "marvell,mv88e6085"; /* 88e6240*/
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ switch_ports: ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switchphy0: switchphy@0 {
+ reg = <0>;
+ };
+
+ switchphy1: switchphy@1 {
+ reg = <1>;
+ };
+
+ switchphy2: switchphy@2 {
+ reg = <2>;
+ };
+
+ switchphy3: switchphy@3 {
+ reg = <3>;
+ };
+
+ switchphy4: switchphy@4 {
+ reg = <4>;
+ };
+ };
+ };
+ };
};
&ecspi5 {
@@ -326,3 +376,15 @@
tcxo-clock-frequency = <26000000>;
};
};
+
+&pcie {
+ /* Synopsys, Inc. Device */
+ pci_root: root@0,0 {
+ compatible = "pci16c3,abcd";
+ reg = <0x00000000 0 0 0 0>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ };
+};
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 41e2feb0cf4f..b5030e1a41d8 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -363,15 +363,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
{
const u8 *tmp = bpf2a32[TMP_REG_1];
- s32 jmp_offset;
- /* checks if divisor is zero or not. If it is, then
- * exit directly.
- */
- emit(ARM_CMP_I(rn, 0), ctx);
- _emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx);
- jmp_offset = epilogue_offset(ctx);
- _emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
#if __LINUX_ARM_ARCH__ == 7
if (elf_hwcap & HWCAP_IDIVA) {
if (op == BPF_DIV)
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 304203fa9e33..e60494f1eef9 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -45,7 +45,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = kvm_psci_call(vcpu);
if (ret < 0) {
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
@@ -54,7 +54,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
- kvm_inject_undefined(vcpu);
+ vcpu_set_reg(vcpu, 0, ~0UL);
return 1;
}
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 0775d5ab8ee9..1d4f1da7c58f 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -390,18 +390,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_DIV | BPF_X:
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
- {
- const u8 r0 = bpf2a64[BPF_REG_0];
-
- /* if (src == 0) return 0 */
- jmp_offset = 3; /* skip ahead to else path */
- check_imm19(jmp_offset);
- emit(A64_CBNZ(is64, src, jmp_offset), ctx);
- emit(A64_MOVZ(1, r0, 0, 0), ctx);
- jmp_offset = epilogue_offset(ctx);
- check_imm26(jmp_offset);
- emit(A64_B(jmp_offset), ctx);
- /* else */
switch (BPF_OP(code)) {
case BPF_DIV:
emit(A64_UDIV(is64, dst, dst, src), ctx);
@@ -413,7 +401,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
}
break;
- }
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU64 | BPF_LSH | BPF_X:
emit(A64_LSLV(is64, dst, dst, src), ctx);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 350a990fc719..8e0b3702f1c0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -259,6 +259,7 @@ config BCM47XX
select LEDS_GPIO_REGISTER
select BCM47XX_NVRAM
select BCM47XX_SPROM
+ select BCM47XX_SSB if !BCM47XX_BCMA
help
Support for BCM47XX based boards
@@ -389,6 +390,7 @@ config LANTIQ
select SYS_SUPPORTS_32BIT_KERNEL
select SYS_SUPPORTS_MIPS16
select SYS_SUPPORTS_MULTITHREADING
+ select SYS_SUPPORTS_VPE_LOADER
select SYS_HAS_EARLY_PRINTK
select GPIOLIB
select SWAP_IO_SPACE
@@ -516,6 +518,7 @@ config MIPS_MALTA
select SYS_SUPPORTS_MIPS16
select SYS_SUPPORTS_MULTITHREADING
select SYS_SUPPORTS_SMARTMIPS
+ select SYS_SUPPORTS_VPE_LOADER
select SYS_SUPPORTS_ZBOOT
select SYS_SUPPORTS_RELOCATABLE
select USE_OF
@@ -2281,9 +2284,16 @@ config MIPSR2_TO_R6_EMULATOR
The only reason this is a build-time option is to save ~14K from the
final kernel image.
+config SYS_SUPPORTS_VPE_LOADER
+ bool
+ depends on SYS_SUPPORTS_MULTITHREADING
+ help
+ Indicates that the platform supports the VPE loader, and provides
+ physical_memsize.
+
config MIPS_VPE_LOADER
bool "VPE loader support."
- depends on SYS_SUPPORTS_MULTITHREADING && MODULES
+ depends on SYS_SUPPORTS_VPE_LOADER && MODULES
select CPU_MIPSR2_IRQ_VI
select CPU_MIPSR2_IRQ_EI
select MIPS_MT
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index 464af5e025d6..0749c3724543 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -124,30 +124,36 @@ config SCACHE_DEBUGFS
If unsure, say N.
-menuconfig MIPS_CPS_NS16550
+menuconfig MIPS_CPS_NS16550_BOOL
bool "CPS SMP NS16550 UART output"
depends on MIPS_CPS
help
Output debug information via an ns16550 compatible UART if exceptions
occur early in the boot process of a secondary core.
-if MIPS_CPS_NS16550
+if MIPS_CPS_NS16550_BOOL
+
+config MIPS_CPS_NS16550
+ def_bool MIPS_CPS_NS16550_BASE != 0
config MIPS_CPS_NS16550_BASE
hex "UART Base Address"
default 0x1b0003f8 if MIPS_MALTA
+ default 0
help
The base address of the ns16550 compatible UART on which to output
debug information from the early stages of core startup.
+ This is only used if non-zero.
+
config MIPS_CPS_NS16550_SHIFT
int "UART Register Shift"
- default 0 if MIPS_MALTA
+ default 0
help
The number of bits to shift ns16550 register indices by in order to
form their addresses. That is, log base 2 of the span between
adjacent ns16550 registers in the system.
-endif # MIPS_CPS_NS16550
+endif # MIPS_CPS_NS16550_BOOL
endmenu
diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c
index 4674f1efbe7a..e1675c25d5d4 100644
--- a/arch/mips/ar7/platform.c
+++ b/arch/mips/ar7/platform.c
@@ -575,7 +575,7 @@ static int __init ar7_register_uarts(void)
uart_port.type = PORT_AR7;
uart_port.uartclk = clk_get_rate(bus_clk) / 2;
uart_port.iotype = UPIO_MEM32;
- uart_port.flags = UPF_FIXED_TYPE;
+ uart_port.flags = UPF_FIXED_TYPE | UPF_BOOT_AUTOCONF;
uart_port.regshift = 2;
uart_port.line = 0;
diff --git a/arch/mips/ath25/devices.c b/arch/mips/ath25/devices.c
index e1156347da53..301a9028273c 100644
--- a/arch/mips/ath25/devices.c
+++ b/arch/mips/ath25/devices.c
@@ -73,6 +73,7 @@ const char *get_system_type(void)
void __init ath25_serial_setup(u32 mapbase, int irq, unsigned int uartclk)
{
+#ifdef CONFIG_SERIAL_8250_CONSOLE
struct uart_port s;
memset(&s, 0, sizeof(s));
@@ -85,6 +86,7 @@ void __init ath25_serial_setup(u32 mapbase, int irq, unsigned int uartclk)
s.uartclk = uartclk;
early_serial_setup(&s);
+#endif /* CONFIG_SERIAL_8250_CONSOLE */
}
int __init ath25_add_wmac(int nr, u32 base, int irq)
diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c
index dd5567b1e305..8f5bd04f320a 100644
--- a/arch/mips/kernel/mips-cm.c
+++ b/arch/mips/kernel/mips-cm.c
@@ -292,7 +292,6 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core,
*this_cpu_ptr(&cm_core_lock_flags));
} else {
WARN_ON(cluster != 0);
- WARN_ON(vp != 0);
WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL);
/*
diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile
index 78c2affeabf8..e84e12655fa8 100644
--- a/arch/mips/lib/Makefile
+++ b/arch/mips/lib/Makefile
@@ -16,4 +16,5 @@ obj-$(CONFIG_CPU_R3000) += r3k_dump_tlb.o
obj-$(CONFIG_CPU_TX39XX) += r3k_dump_tlb.o
# libgcc-style stuff needed in the kernel
-obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o ucmpdi2.o
+obj-y += ashldi3.o ashrdi3.o bswapsi.o bswapdi.o cmpdi2.o lshrdi3.o multi3.o \
+ ucmpdi2.o
diff --git a/arch/mips/lib/libgcc.h b/arch/mips/lib/libgcc.h
index 28002ed90c2c..199a7f96282f 100644
--- a/arch/mips/lib/libgcc.h
+++ b/arch/mips/lib/libgcc.h
@@ -10,10 +10,18 @@ typedef int word_type __attribute__ ((mode (__word__)));
struct DWstruct {
int high, low;
};
+
+struct TWstruct {
+ long long high, low;
+};
#elif defined(__LITTLE_ENDIAN)
struct DWstruct {
int low, high;
};
+
+struct TWstruct {
+ long long low, high;
+};
#else
#error I feel sick.
#endif
@@ -23,4 +31,13 @@ typedef union {
long long ll;
} DWunion;
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6)
+typedef int ti_type __attribute__((mode(TI)));
+
+typedef union {
+ struct TWstruct s;
+ ti_type ti;
+} TWunion;
+#endif
+
#endif /* __ASM_LIBGCC_H */
diff --git a/arch/mips/lib/multi3.c b/arch/mips/lib/multi3.c
new file mode 100644
index 000000000000..111ad475aa0c
--- /dev/null
+++ b/arch/mips/lib/multi3.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/export.h>
+
+#include "libgcc.h"
+
+/*
+ * GCC 7 suboptimally generates __multi3 calls for mips64r6, so for that
+ * specific case only we'll implement it here.
+ *
+ * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82981
+ */
+#if defined(CONFIG_64BIT) && defined(CONFIG_CPU_MIPSR6) && (__GNUC__ == 7)
+
+/* multiply 64-bit values, low 64-bits returned */
+static inline long long notrace dmulu(long long a, long long b)
+{
+ long long res;
+
+ asm ("dmulu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
+ return res;
+}
+
+/* multiply 64-bit unsigned values, high 64-bits of 128-bit result returned */
+static inline long long notrace dmuhu(long long a, long long b)
+{
+ long long res;
+
+ asm ("dmuhu %0,%1,%2" : "=r" (res) : "r" (a), "r" (b));
+ return res;
+}
+
+/* multiply 128-bit values, low 128-bits returned */
+ti_type notrace __multi3(ti_type a, ti_type b)
+{
+ TWunion res, aa, bb;
+
+ aa.ti = a;
+ bb.ti = b;
+
+ /*
+ * a * b = (a.lo * b.lo)
+ * + 2^64 * (a.hi * b.lo + a.lo * b.hi)
+ * [+ 2^128 * (a.hi * b.hi)]
+ */
+ res.s.low = dmulu(aa.s.low, bb.s.low);
+ res.s.high = dmuhu(aa.s.low, bb.s.low);
+ res.s.high += dmulu(aa.s.high, bb.s.low);
+ res.s.high += dmulu(aa.s.low, bb.s.high);
+
+ return res.ti;
+}
+EXPORT_SYMBOL(__multi3);
+
+#endif /* 64BIT && CPU_MIPSR6 && GCC7 */
diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c
index cdb5a191b9d5..9bb6baa45da3 100644
--- a/arch/mips/mm/uasm-micromips.c
+++ b/arch/mips/mm/uasm-micromips.c
@@ -40,7 +40,7 @@
#include "uasm.c"
-static const struct insn const insn_table_MM[insn_invalid] = {
+static const struct insn insn_table_MM[insn_invalid] = {
[insn_addu] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_addu32_op), RT | RS | RD},
[insn_addiu] = {M(mm_addiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM},
[insn_and] = {M(mm_pool32a_op, 0, 0, 0, 0, mm_and_op), RT | RS | RD},
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 4e347030ed2c..3e2798bfea4f 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -741,16 +741,11 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */
case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */
+ if (insn->imm == 0)
+ return -EINVAL;
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
if (dst < 0)
return dst;
- if (insn->imm == 0) { /* Div by zero */
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
- emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
- }
td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
if (td == REG_64BIT || td == REG_32BIT_ZERO_EX)
/* sign extend */
@@ -770,19 +765,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */
case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */
+ if (insn->imm == 0)
+ return -EINVAL;
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
if (dst < 0)
return dst;
- if (insn->imm == 0) { /* Div by zero */
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
- emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
- }
if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);
-
if (insn->imm == 1) {
/* div by 1 is a nop, mod by 1 is zero */
if (bpf_op == BPF_MOD)
@@ -860,11 +849,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_DIV:
case BPF_MOD:
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
- emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
emit_instr(ctx, ddivu, dst, src);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
@@ -943,11 +927,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_DIV:
case BPF_MOD:
- b_off = b_imm(exit_idx, ctx);
- if (is_bad_offset(b_off))
- return -E2BIG;
- emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
- emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
emit_instr(ctx, divu, dst, src);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c
index d4469b20d176..4f46a4509f79 100644
--- a/arch/mips/ralink/timer.c
+++ b/arch/mips/ralink/timer.c
@@ -109,9 +109,9 @@ static int rt_timer_probe(struct platform_device *pdev)
}
rt->irq = platform_get_irq(pdev, 0);
- if (!rt->irq) {
+ if (rt->irq < 0) {
dev_err(&pdev->dev, "failed to load irq\n");
- return -ENOENT;
+ return rt->irq;
}
rt->membase = devm_ioremap_resource(&pdev->dev, res);
diff --git a/arch/mips/rb532/Makefile b/arch/mips/rb532/Makefile
index efdecdb6e3ea..8186afca2234 100644
--- a/arch/mips/rb532/Makefile
+++ b/arch/mips/rb532/Makefile
@@ -2,4 +2,6 @@
# Makefile for the RB532 board specific parts of the kernel
#
-obj-y += irq.o time.o setup.o serial.o prom.o gpio.o devices.o
+obj-$(CONFIG_SERIAL_8250_CONSOLE) += serial.o
+
+obj-y += irq.o time.o setup.o prom.o gpio.o devices.o
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index 32ea3e6731d6..354d258396ff 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -310,6 +310,8 @@ static int __init plat_setup_devices(void)
return platform_add_devices(rb532_devs, ARRAY_SIZE(rb532_devs));
}
+#ifdef CONFIG_NET
+
static int __init setup_kmac(char *s)
{
printk(KERN_INFO "korina mac = %s\n", s);
@@ -322,4 +324,6 @@ static int __init setup_kmac(char *s)
__setup("kmac=", setup_kmac);
+#endif /* CONFIG_NET */
+
arch_initcall(plat_setup_devices);
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 61d6049f4c1e..637b7263cb86 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -443,6 +443,31 @@ struct kvm_ppc_rmmu_info {
__u32 ap_encodings[8];
};
+/* For KVM_PPC_GET_CPU_CHAR */
+struct kvm_ppc_cpu_char {
+ __u64 character; /* characteristics of the CPU */
+ __u64 behaviour; /* recommended software behaviour */
+ __u64 character_mask; /* valid bits in character */
+ __u64 behaviour_mask; /* valid bits in behaviour */
+};
+
+/*
+ * Values for character and character_mask.
+ * These are identical to the values used by H_GET_CPU_CHARACTERISTICS.
+ */
+#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 (1ULL << 63)
+#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED (1ULL << 62)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 (1ULL << 61)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 (1ULL << 60)
+#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV (1ULL << 59)
+#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58)
+#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57)
+#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56)
+
+#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63)
+#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62)
+#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61)
+
/* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1915e86cef6f..0a7c88786ec0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -39,6 +39,10 @@
#include <asm/iommu.h>
#include <asm/switch_to.h>
#include <asm/xive.h>
+#ifdef CONFIG_PPC_PSERIES
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#endif
#include "timing.h"
#include "irq.h"
@@ -548,6 +552,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS:
#endif
+ case KVM_CAP_PPC_GET_CPU_CHAR:
r = 1;
break;
@@ -1759,6 +1764,124 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
return r;
}
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * These functions check whether the underlying hardware is safe
+ * against attacks based on observing the effects of speculatively
+ * executed instructions, and whether it supplies instructions for
+ * use in workarounds. The information comes from firmware, either
+ * via the device tree on powernv platforms or from an hcall on
+ * pseries platforms.
+ */
+#ifdef CONFIG_PPC_PSERIES
+static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+ struct h_cpu_char_result c;
+ unsigned long rc;
+
+ if (!machine_is(pseries))
+ return -ENOTTY;
+
+ rc = plpar_get_cpu_characteristics(&c);
+ if (rc == H_SUCCESS) {
+ cp->character = c.character;
+ cp->behaviour = c.behaviour;
+ cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
+ KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
+ KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
+ KVM_PPC_CPU_CHAR_BR_HINT_HONOURED |
+ KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF |
+ KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+ cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
+ KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
+ KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+ }
+ return 0;
+}
+#else
+static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+ return -ENOTTY;
+}
+#endif
+
+static inline bool have_fw_feat(struct device_node *fw_features,
+ const char *state, const char *name)
+{
+ struct device_node *np;
+ bool r = false;
+
+ np = of_get_child_by_name(fw_features, name);
+ if (np) {
+ r = of_property_read_bool(np, state);
+ of_node_put(np);
+ }
+ return r;
+}
+
+static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+ struct device_node *np, *fw_features;
+ int r;
+
+ memset(cp, 0, sizeof(*cp));
+ r = pseries_get_cpu_char(cp);
+ if (r != -ENOTTY)
+ return r;
+
+ np = of_find_node_by_name(NULL, "ibm,opal");
+ if (np) {
+ fw_features = of_get_child_by_name(np, "fw-features");
+ of_node_put(np);
+ if (!fw_features)
+ return 0;
+ if (have_fw_feat(fw_features, "enabled",
+ "inst-spec-barrier-ori31,31,0"))
+ cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31;
+ if (have_fw_feat(fw_features, "enabled",
+ "fw-bcctrl-serialized"))
+ cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED;
+ if (have_fw_feat(fw_features, "enabled",
+ "inst-l1d-flush-ori30,30,0"))
+ cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30;
+ if (have_fw_feat(fw_features, "enabled",
+ "inst-l1d-flush-trig2"))
+ cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2;
+ if (have_fw_feat(fw_features, "enabled",
+ "fw-l1d-thread-split"))
+ cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV;
+ if (have_fw_feat(fw_features, "enabled",
+ "fw-count-cache-disabled"))
+ cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+ cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
+ KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
+ KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
+ KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+
+ if (have_fw_feat(fw_features, "enabled",
+ "speculation-policy-favor-security"))
+ cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY;
+ if (!have_fw_feat(fw_features, "disabled",
+ "needs-l1d-flush-msr-pr-0-to-1"))
+ cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR;
+ if (!have_fw_feat(fw_features, "disabled",
+ "needs-spec-barrier-for-bound-checks"))
+ cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+ cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
+ KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
+ KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+
+ of_node_put(fw_features);
+ }
+
+ return 0;
+}
+#endif
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -1861,6 +1984,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
+ case KVM_PPC_GET_CPU_CHAR: {
+ struct kvm_ppc_cpu_char cpuchar;
+
+ r = kvmppc_get_cpu_char(&cpuchar);
+ if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar)))
+ r = -EFAULT;
+ break;
+ }
default: {
struct kvm *kvm = filp->private_data;
r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 217a78e84865..0a34b0cec7b7 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -381,10 +381,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
goto bpf_alu32_trunc;
case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
- PPC_CMPWI(src_reg, 0);
- PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
- PPC_LI(b2p[BPF_REG_0], 0);
- PPC_JMP(exit_addr);
if (BPF_OP(code) == BPF_MOD) {
PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
PPC_MULW(b2p[TMP_REG_1], src_reg,
@@ -395,10 +391,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
goto bpf_alu32_trunc;
case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
- PPC_CMPDI(src_reg, 0);
- PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
- PPC_LI(b2p[BPF_REG_0], 0);
- PPC_JMP(exit_addr);
if (BPF_OP(code) == BPF_MOD) {
PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg);
PPC_MULD(b2p[TMP_REG_1], src_reg,
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index e14f381757f6..c1b0a9ac1dc8 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -207,7 +207,8 @@ struct kvm_s390_sie_block {
__u16 ipa; /* 0x0056 */
__u32 ipb; /* 0x0058 */
__u32 scaoh; /* 0x005c */
- __u8 reserved60; /* 0x0060 */
+#define FPF_BPBC 0x20
+ __u8 fpf; /* 0x0060 */
#define ECB_GS 0x40
#define ECB_TE 0x10
#define ECB_SRSI 0x04
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 38535a57fef8..4cdaa55fabfe 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -224,6 +224,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_RICCB (1UL << 7)
#define KVM_SYNC_FPRS (1UL << 8)
#define KVM_SYNC_GSCB (1UL << 9)
+#define KVM_SYNC_BPBC (1UL << 10)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
#define SDNXL (1UL << SDNXC)
@@ -247,7 +248,9 @@ struct kvm_sync_regs {
};
__u8 reserved[512]; /* for future vector expansion */
__u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
- __u8 padding1[52]; /* riccb needs to be 64byte aligned */
+ __u8 bpbc : 1; /* bp mode */
+ __u8 reserved2 : 7;
+ __u8 padding1[51]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
__u8 padding2[192]; /* sdnx needs to be 256byte aligned */
union {
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2c93cbbcd15e..2598cf243b86 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -421,6 +421,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_GS:
r = test_facility(133);
break;
+ case KVM_CAP_S390_BPB:
+ r = test_facility(82);
+ break;
default:
r = 0;
}
@@ -2198,6 +2201,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
+ if (test_kvm_facility(vcpu->kvm, 82))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC;
if (test_kvm_facility(vcpu->kvm, 133))
vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
/* fprs can be synchronized via vrs, even if the guest has no vx. With
@@ -2339,6 +2344,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
current->thread.fpu.fpc = 0;
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
@@ -3298,6 +3304,11 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
vcpu->arch.gs_enabled = 1;
}
+ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_BPBC) &&
+ test_kvm_facility(vcpu->kvm, 82)) {
+ vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
+ vcpu->arch.sie_block->fpf |= kvm_run->s.regs.bpbc ? FPF_BPBC : 0;
+ }
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
/* save host (userspace) fprs/vrs */
@@ -3344,6 +3355,7 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_run->s.regs.pft = vcpu->arch.pfault_token;
kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+ kvm_run->s.regs.bpbc = (vcpu->arch.sie_block->fpf & FPF_BPBC) == FPF_BPBC;
save_access_regs(vcpu->run->s.regs.acrs);
restore_access_regs(vcpu->arch.host_acrs);
/* Save guest register state */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 5d6ae0326d9e..751348348477 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -223,6 +223,12 @@ static void unshadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
memcpy(scb_o->gcr, scb_s->gcr, 128);
scb_o->pp = scb_s->pp;
+ /* branch prediction */
+ if (test_kvm_facility(vcpu->kvm, 82)) {
+ scb_o->fpf &= ~FPF_BPBC;
+ scb_o->fpf |= scb_s->fpf & FPF_BPBC;
+ }
+
/* interrupt intercept */
switch (scb_s->icptcode) {
case ICPT_PROGI:
@@ -265,6 +271,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->ecb3 = 0;
scb_s->ecd = 0;
scb_s->fac = 0;
+ scb_s->fpf = 0;
rc = prepare_cpuflags(vcpu, vsie_page);
if (rc)
@@ -324,6 +331,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
prefix_unmapped(vsie_page);
scb_s->ecb |= scb_o->ecb & ECB_TE;
}
+ /* branch prediction */
+ if (test_kvm_facility(vcpu->kvm, 82))
+ scb_s->fpf |= scb_o->fpf & FPF_BPBC;
/* SIMD */
if (test_kvm_facility(vcpu->kvm, 129)) {
scb_s->eca |= scb_o->eca & ECA_VX;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e50188773ff3..78a19c93b380 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -610,11 +610,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
- jit->seen |= SEEN_RET0;
- /* ltr %src,%src (if src == 0 goto fail) */
- EMIT2(0x1200, src_reg, src_reg);
- /* jz <ret0> */
- EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
/* lhi %w0,0 */
EMIT4_IMM(0xa7080000, REG_W0, 0);
/* lr %w1,%dst */
@@ -630,11 +625,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
- jit->seen |= SEEN_RET0;
- /* ltgr %src,%src (if src == 0 goto fail) */
- EMIT4(0xb9020000, src_reg, src_reg);
- /* jz <ret0> */
- EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
/* lghi %w0,0 */
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile
index 818d3aa5172e..d257186c27d1 100644
--- a/arch/sparc/crypto/Makefile
+++ b/arch/sparc/crypto/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_CRYPTO_MD5_SPARC64) += md5-sparc64.o
obj-$(CONFIG_CRYPTO_AES_SPARC64) += aes-sparc64.o
obj-$(CONFIG_CRYPTO_DES_SPARC64) += des-sparc64.o
-obj-$(CONFIG_CRYPTO_DES_SPARC64) += camellia-sparc64.o
+obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) += camellia-sparc64.o
obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) += crc32c-sparc64.o
diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c
index 50a24d7bd4c5..48a25869349b 100644
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@ -967,31 +967,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_alu(MULX, src, dst, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_X:
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_write_y(G0, ctx);
emit_alu(DIV, src, dst, ctx);
break;
-
case BPF_ALU64 | BPF_DIV | BPF_X:
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_alu(UDIVX, src, dst, ctx);
break;
-
case BPF_ALU | BPF_MOD | BPF_X: {
const u8 tmp = bpf2sparc[TMP_REG_1];
ctx->tmp_1_used = true;
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_write_y(G0, ctx);
emit_alu3(DIV, dst, src, tmp, ctx);
emit_alu3(MULX, tmp, src, tmp, ctx);
@@ -1003,10 +989,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
ctx->tmp_1_used = true;
- emit_cmp(src, G0, ctx);
- emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
- emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);
-
emit_alu3(UDIVX, dst, src, tmp, ctx);
emit_alu3(MULX, tmp, src, tmp, ctx);
emit_alu3(SUB, dst, tmp, dst, ctx);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index aa15b4c0e3d1..ff6f8022612c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1264,7 +1264,7 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
#endif
#ifdef CONFIG_X86_MCE
-idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
+idtentry machine_check do_mce has_error_code=0 paranoid=1
#endif
/*
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 7b45d8424150..4ad41087ce0e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -194,6 +194,9 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};
+extern char __indirect_thunk_start[];
+extern char __indirect_thunk_end[];
+
/*
* On VMEXIT we must ensure that no RSB predictions learned in the guest
* can be followed in the host, by overwriting the RSB completely. Both
@@ -203,16 +206,17 @@ enum spectre_v2_mitigation {
static inline void vmexit_fill_RSB(void)
{
#ifdef CONFIG_RETPOLINE
- unsigned long loops = RSB_CLEAR_LOOPS / 2;
+ unsigned long loops;
asm volatile (ANNOTATE_NOSPEC_ALTERNATIVE
ALTERNATIVE("jmp 910f",
__stringify(__FILL_RETURN_BUFFER(%0, RSB_CLEAR_LOOPS, %1)),
X86_FEATURE_RETPOLINE)
"910:"
- : "=&r" (loops), ASM_CALL_CONSTRAINT
- : "r" (loops) : "memory" );
+ : "=r" (loops), ASM_CALL_CONSTRAINT
+ : : "memory" );
#endif
}
+
#endif /* __ASSEMBLY__ */
#endif /* __NOSPEC_BRANCH_H__ */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 31051f35cbb7..3de69330e6c5 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -88,6 +88,7 @@ dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *, long);
#endif
+dotraplinkage void do_mce(struct pt_regs *, long);
static inline int get_si_code(unsigned long condition)
{
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 81bb565f4497..7e2baf7304ae 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -29,10 +29,13 @@ KASAN_SANITIZE_stacktrace.o := n
KASAN_SANITIZE_paravirt.o := n
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
-OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_test_nx.o := y
OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y
+ifdef CONFIG_FRAME_POINTER
+OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
+endif
+
# If instrumentation of this dir is enabled, boot hangs during first second.
# Probably could be more selective here, but note that files related to irqs,
# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b1d616d08eee..868e412b4f0c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1785,6 +1785,11 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code)
void (*machine_check_vector)(struct pt_regs *, long error_code) =
unexpected_machine_check;
+dotraplinkage void do_mce(struct pt_regs *regs, long error_code)
+{
+ machine_check_vector(regs, error_code);
+}
+
/*
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 7cb8ba08beb9..ef61f540cf0a 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -8,6 +8,7 @@
#include <asm/ftrace.h>
#include <asm/export.h>
#include <asm/nospec-branch.h>
+#include <asm/unwind_hints.h>
.code64
.section .entry.text, "ax"
@@ -20,7 +21,6 @@ EXPORT_SYMBOL(__fentry__)
EXPORT_SYMBOL(mcount)
#endif
-/* All cases save the original rbp (8 bytes) */
#ifdef CONFIG_FRAME_POINTER
# ifdef CC_USING_FENTRY
/* Save parent and function stack frames (rip and rbp) */
@@ -31,7 +31,7 @@ EXPORT_SYMBOL(mcount)
# endif
#else
/* No need to save a stack frame */
-# define MCOUNT_FRAME_SIZE 8
+# define MCOUNT_FRAME_SIZE 0
#endif /* CONFIG_FRAME_POINTER */
/* Size of stack used to save mcount regs in save_mcount_regs */
@@ -64,10 +64,10 @@ EXPORT_SYMBOL(mcount)
*/
.macro save_mcount_regs added=0
- /* Always save the original rbp */
+#ifdef CONFIG_FRAME_POINTER
+ /* Save the original rbp */
pushq %rbp
-#ifdef CONFIG_FRAME_POINTER
/*
* Stack traces will stop at the ftrace trampoline if the frame pointer
* is not set up properly. If fentry is used, we need to save a frame
@@ -105,7 +105,11 @@ EXPORT_SYMBOL(mcount)
* Save the original RBP. Even though the mcount ABI does not
* require this, it helps out callers.
*/
+#ifdef CONFIG_FRAME_POINTER
movq MCOUNT_REG_SIZE-8(%rsp), %rdx
+#else
+ movq %rbp, %rdx
+#endif
movq %rdx, RBP(%rsp)
/* Copy the parent address into %rsi (second parameter) */
@@ -148,7 +152,7 @@ EXPORT_SYMBOL(mcount)
ENTRY(function_hook)
retq
-END(function_hook)
+ENDPROC(function_hook)
ENTRY(ftrace_caller)
/* save_mcount_regs fills in first two parameters */
@@ -184,7 +188,7 @@ GLOBAL(ftrace_graph_call)
/* This is weak to keep gas from relaxing the jumps */
WEAK(ftrace_stub)
retq
-END(ftrace_caller)
+ENDPROC(ftrace_caller)
ENTRY(ftrace_regs_caller)
/* Save the current flags before any operations that can change them */
@@ -255,7 +259,7 @@ GLOBAL(ftrace_regs_caller_end)
jmp ftrace_epilogue
-END(ftrace_regs_caller)
+ENDPROC(ftrace_regs_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
@@ -313,9 +317,10 @@ ENTRY(ftrace_graph_caller)
restore_mcount_regs
retq
-END(ftrace_graph_caller)
+ENDPROC(ftrace_graph_caller)
-GLOBAL(return_to_handler)
+ENTRY(return_to_handler)
+ UNWIND_HINT_EMPTY
subq $24, %rsp
/* Save the return values */
@@ -330,4 +335,5 @@ GLOBAL(return_to_handler)
movq (%rsp), %rax
addq $24, %rsp
JMP_NOSPEC %rdi
+END(return_to_handler)
#endif
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index e941136e24d8..203d398802a3 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -40,6 +40,7 @@
#include <asm/debugreg.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
+#include <asm/nospec-branch.h>
#include "common.h"
@@ -203,7 +204,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
}
/* Check whether insn is indirect jump */
-static int insn_is_indirect_jump(struct insn *insn)
+static int __insn_is_indirect_jump(struct insn *insn)
{
return ((insn->opcode.bytes[0] == 0xff &&
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -237,6 +238,26 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
return (start <= target && target <= start + len);
}
+static int insn_is_indirect_jump(struct insn *insn)
+{
+ int ret = __insn_is_indirect_jump(insn);
+
+#ifdef CONFIG_RETPOLINE
+ /*
+ * Jump to x86_indirect_thunk_* is treated as an indirect jump.
+ * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
+ * older gcc may use indirect jump. So we add this check instead of
+ * replace indirect-jump check.
+ */
+ if (!ret)
+ ret = insn_jump_into_range(insn,
+ (unsigned long)__indirect_thunk_start,
+ (unsigned long)__indirect_thunk_end -
+ (unsigned long)__indirect_thunk_start);
+#endif
+ return ret;
+}
+
/* Decode whole function to ensure any instructions don't jump into target */
static int can_optimize(unsigned long paddr)
{
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 832a6acd730f..cb368c2a22ab 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -380,19 +380,24 @@ void stop_this_cpu(void *dummy)
disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
+ /*
+ * Use wbinvd on processors that support SME. This provides support
+ * for performing a successful kexec when going from SME inactive
+ * to SME active (or vice-versa). The cache must be cleared so that
+ * if there are entries with the same physical address, both with and
+ * without the encryption bit, they don't race each other when flushed
+ * and potentially end up with the wrong entry being committed to
+ * memory.
+ */
+ if (boot_cpu_has(X86_FEATURE_SME))
+ native_wbinvd();
for (;;) {
/*
- * Use wbinvd followed by hlt to stop the processor. This
- * provides support for kexec on a processor that supports
- * SME. With kexec, going from SME inactive to SME active
- * requires clearing cache entries so that addresses without
- * the encryption bit set don't corrupt the same physical
- * address that has the encryption bit set when caches are
- * flushed. To achieve this a wbinvd is performed followed by
- * a hlt. Even if the processor is not in the kexec/SME
- * scenario this only adds a wbinvd to a halting processor.
+ * Use native_halt() so that memory contents don't change
+ * (stack usage and variables) after possibly issuing the
+ * native_wbinvd() above.
*/
- asm volatile("wbinvd; hlt" : : : "memory");
+ native_halt();
}
}
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index be86a865087a..1f9188f5357c 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -74,8 +74,50 @@ static struct orc_entry *orc_module_find(unsigned long ip)
}
#endif
+#ifdef CONFIG_DYNAMIC_FTRACE
+static struct orc_entry *orc_find(unsigned long ip);
+
+/*
+ * Ftrace dynamic trampolines do not have orc entries of their own.
+ * But they are copies of the ftrace entries that are static and
+ * defined in ftrace_*.S, which do have orc entries.
+ *
+ * If the undwinder comes across a ftrace trampoline, then find the
+ * ftrace function that was used to create it, and use that ftrace
+ * function's orc entrie, as the placement of the return code in
+ * the stack will be identical.
+ */
+static struct orc_entry *orc_ftrace_find(unsigned long ip)
+{
+ struct ftrace_ops *ops;
+ unsigned long caller;
+
+ ops = ftrace_ops_trampoline(ip);
+ if (!ops)
+ return NULL;
+
+ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+ caller = (unsigned long)ftrace_regs_call;
+ else
+ caller = (unsigned long)ftrace_call;
+
+ /* Prevent unlikely recursion */
+ if (ip == caller)
+ return NULL;
+
+ return orc_find(caller);
+}
+#else
+static struct orc_entry *orc_ftrace_find(unsigned long ip)
+{
+ return NULL;
+}
+#endif
+
static struct orc_entry *orc_find(unsigned long ip)
{
+ static struct orc_entry *orc;
+
if (!orc_init)
return NULL;
@@ -111,7 +153,11 @@ static struct orc_entry *orc_find(unsigned long ip)
__stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
/* Module lookup: */
- return orc_module_find(ip);
+ orc = orc_module_find(ip);
+ if (orc)
+ return orc;
+
+ return orc_ftrace_find(ip);
}
static void orc_sort_swap(void *_a, void *_b, int size)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 1e413a9326aa..9b138a06c1a4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -124,6 +124,12 @@ SECTIONS
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
#endif
+#ifdef CONFIG_RETPOLINE
+ __indirect_thunk_start = .;
+ *(.text.__x86.indirect_thunk)
+ __indirect_thunk_end = .;
+#endif
+
/* End of text section */
_etext = .;
} :text = 0x9090
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1cec2c62a0b0..c53298dfbf50 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7496,13 +7496,13 @@ EXPORT_SYMBOL_GPL(kvm_task_switch);
int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
{
- if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
+ if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG)) {
/*
* When EFER.LME and CR0.PG are set, the processor is in
* 64-bit mode (though maybe in a 32-bit code segment).
* CR4.PAE and EFER.LMA must be set.
*/
- if (!(sregs->cr4 & X86_CR4_PAE_BIT)
+ if (!(sregs->cr4 & X86_CR4_PAE)
|| !(sregs->efer & EFER_LMA))
return -EINVAL;
} else {
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index cb45c6cb465f..dfb2ba91b670 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -9,7 +9,7 @@
#include <asm/nospec-branch.h>
.macro THUNK reg
- .section .text.__x86.indirect_thunk.\reg
+ .section .text.__x86.indirect_thunk
ENTRY(__x86_indirect_thunk_\reg)
CFI_STARTPROC
@@ -25,7 +25,8 @@ ENDPROC(__x86_indirect_thunk_\reg)
* than one per register with the correct names. So we do it
* the simple and nasty way...
*/
-#define EXPORT_THUNK(reg) EXPORT_SYMBOL(__x86_indirect_thunk_ ## reg)
+#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
+#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg)
GENERATE_THUNK(_ASM_AX)
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 3ef362f598e3..e1d61e8500f9 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -738,7 +738,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return total;
}
-void __init sme_encrypt_kernel(struct boot_params *bp)
+void __init __nostackprotector sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5acee5139e28..4923d92f918d 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -568,26 +568,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
*/
EMIT2(0x31, 0xd2);
- if (BPF_SRC(insn->code) == BPF_X) {
- /* if (src_reg == 0) return 0 */
-
- /* cmp r11, 0 */
- EMIT4(0x49, 0x83, 0xFB, 0x00);
-
- /* jne .+9 (skip over pop, pop, xor and jmp) */
- EMIT2(X86_JNE, 1 + 1 + 2 + 5);
- EMIT1(0x5A); /* pop rdx */
- EMIT1(0x58); /* pop rax */
- EMIT2(0x31, 0xc0); /* xor eax, eax */
-
- /* jmp cleanup_addr
- * addrs[i] - 11, because there are 11 bytes
- * after this insn: div, mov, pop, pop, mov
- */
- jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
- EMIT1_off32(0xE9, jmp_offset);
- }
-
if (BPF_CLASS(insn->code) == BPF_ALU64)
/* div r11 */
EMIT3(0x49, 0xF7, 0xF3);
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index f6a26e3cb476..54ef19e90705 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -662,11 +662,11 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid);
*/
static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
{
+ static const char *name = "PCI Bus 0000:00";
+ struct resource *res, *conflict;
u32 base, limit, high;
struct pci_dev *other;
- struct resource *res;
unsigned i;
- int r;
if (!(pci_probe & PCI_BIG_ROOT_WINDOW))
return;
@@ -707,21 +707,26 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
* Allocate a 256GB window directly below the 0xfd00000000 hardware
* limit (see AMD Family 15h Models 30h-3Fh BKDG, sec 2.4.6).
*/
- res->name = "PCI Bus 0000:00";
+ res->name = name;
res->flags = IORESOURCE_PREFETCH | IORESOURCE_MEM |
IORESOURCE_MEM_64 | IORESOURCE_WINDOW;
res->start = 0xbd00000000ull;
res->end = 0xfd00000000ull - 1;
- r = request_resource(&iomem_resource, res);
- if (r) {
+ conflict = request_resource_conflict(&iomem_resource, res);
+ if (conflict) {
kfree(res);
- return;
- }
+ if (conflict->name != name)
+ return;
- dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
- res);
- add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ /* We are resuming from suspend; just reenable the window */
+ res = conflict;
+ } else {
+ dev_info(&dev->dev, "adding root bus resource %pR (tainting kernel)\n",
+ res);
+ add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
+ pci_bus_add_resource(dev->bus, res, 0);
+ }
base = ((res->start >> 8) & AMD_141b_MMIO_BASE_MMIOBASE_MASK) |
AMD_141b_MMIO_BASE_RE_MASK | AMD_141b_MMIO_BASE_WE_MASK;
@@ -733,13 +738,16 @@ static void pci_amd_enable_64bit_bar(struct pci_dev *dev)
pci_write_config_dword(dev, AMD_141b_MMIO_HIGH(i), high);
pci_write_config_dword(dev, AMD_141b_MMIO_LIMIT(i), limit);
pci_write_config_dword(dev, AMD_141b_MMIO_BASE(i), base);
-
- pci_bus_add_resource(dev->bus, res, 0);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1401, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x141b, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
+DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
#endif
diff --git a/drivers/base/property.c b/drivers/base/property.c
index 851b1b6596a4..613ba820f545 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -16,6 +16,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_graph.h>
+#include <linux/of_irq.h>
#include <linux/property.h>
#include <linux/etherdevice.h>
#include <linux/phy.h>
@@ -997,6 +998,32 @@ fwnode_get_next_child_node(const struct fwnode_handle *fwnode,
EXPORT_SYMBOL_GPL(fwnode_get_next_child_node);
/**
+ * fwnode_get_next_available_child_node - Return the next
+ * available child node handle for a node
+ * @fwnode: Firmware node to find the next child node for.
+ * @child: Handle to one of the node's child nodes or a %NULL handle.
+ */
+struct fwnode_handle *
+fwnode_get_next_available_child_node(const struct fwnode_handle *fwnode,
+ struct fwnode_handle *child)
+{
+ struct fwnode_handle *next_child = child;
+
+ if (!fwnode)
+ return NULL;
+
+ do {
+ next_child = fwnode_get_next_child_node(fwnode, next_child);
+
+ if (!next_child || fwnode_device_is_available(next_child))
+ break;
+ } while (next_child);
+
+ return next_child;
+}
+EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node);
+
+/**
* device_get_next_child_node - Return the next child node handle for a device
* @dev: Device to find the next child node for.
* @child: Handle to one of the device's child nodes or a null handle.
@@ -1126,21 +1153,21 @@ enum dev_dma_attr device_get_dma_attr(struct device *dev)
EXPORT_SYMBOL_GPL(device_get_dma_attr);
/**
- * device_get_phy_mode - Get phy mode for given device
- * @dev: Pointer to the given device
+ * fwnode_get_phy_mode - Get phy mode for given firmware node
+ * @fwnode: Pointer to the given node
*
* The function gets phy interface string from property 'phy-mode' or
* 'phy-connection-type', and return its index in phy_modes table, or errno in
* error case.
*/
-int device_get_phy_mode(struct device *dev)
+int fwnode_get_phy_mode(struct fwnode_handle *fwnode)
{
const char *pm;
int err, i;
- err = device_property_read_string(dev, "phy-mode", &pm);
+ err = fwnode_property_read_string(fwnode, "phy-mode", &pm);
if (err < 0)
- err = device_property_read_string(dev,
+ err = fwnode_property_read_string(fwnode,
"phy-connection-type", &pm);
if (err < 0)
return err;
@@ -1151,13 +1178,27 @@ int device_get_phy_mode(struct device *dev)
return -ENODEV;
}
+EXPORT_SYMBOL_GPL(fwnode_get_phy_mode);
+
+/**
+ * device_get_phy_mode - Get phy mode for given device
+ * @dev: Pointer to the given device
+ *
+ * The function gets phy interface string from property 'phy-mode' or
+ * 'phy-connection-type', and return its index in phy_modes table, or errno in
+ * error case.
+ */
+int device_get_phy_mode(struct device *dev)
+{
+ return fwnode_get_phy_mode(dev_fwnode(dev));
+}
EXPORT_SYMBOL_GPL(device_get_phy_mode);
-static void *device_get_mac_addr(struct device *dev,
+static void *fwnode_get_mac_addr(struct fwnode_handle *fwnode,
const char *name, char *addr,
int alen)
{
- int ret = device_property_read_u8_array(dev, name, addr, alen);
+ int ret = fwnode_property_read_u8_array(fwnode, name, addr, alen);
if (ret == 0 && alen == ETH_ALEN && is_valid_ether_addr(addr))
return addr;
@@ -1165,8 +1206,8 @@ static void *device_get_mac_addr(struct device *dev,
}
/**
- * device_get_mac_address - Get the MAC for a given device
- * @dev: Pointer to the device
+ * fwnode_get_mac_address - Get the MAC from the firmware node
+ * @fwnode: Pointer to the firmware node
* @addr: Address of buffer to store the MAC in
* @alen: Length of the buffer pointed to by addr, should be ETH_ALEN
*
@@ -1187,23 +1228,60 @@ static void *device_get_mac_addr(struct device *dev,
* In this case, the real MAC is in 'local-mac-address', and 'mac-address'
* exists but is all zeros.
*/
-void *device_get_mac_address(struct device *dev, char *addr, int alen)
+void *fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen)
{
char *res;
- res = device_get_mac_addr(dev, "mac-address", addr, alen);
+ res = fwnode_get_mac_addr(fwnode, "mac-address", addr, alen);
if (res)
return res;
- res = device_get_mac_addr(dev, "local-mac-address", addr, alen);
+ res = fwnode_get_mac_addr(fwnode, "local-mac-address", addr, alen);
if (res)
return res;
- return device_get_mac_addr(dev, "address", addr, alen);
+ return fwnode_get_mac_addr(fwnode, "address", addr, alen);
+}
+EXPORT_SYMBOL(fwnode_get_mac_address);
+
+/**
+ * device_get_mac_address - Get the MAC for a given device
+ * @dev: Pointer to the device
+ * @addr: Address of buffer to store the MAC in
+ * @alen: Length of the buffer pointed to by addr, should be ETH_ALEN
+ */
+void *device_get_mac_address(struct device *dev, char *addr, int alen)
+{
+ return fwnode_get_mac_address(dev_fwnode(dev), addr, alen);
}
EXPORT_SYMBOL(device_get_mac_address);
/**
+ * fwnode_irq_get - Get IRQ directly from a fwnode
+ * @fwnode: Pointer to the firmware node
+ * @index: Zero-based index of the IRQ
+ *
+ * Returns Linux IRQ number on success. Other values are determined
+ * accordingly to acpi_/of_ irq_get() operation.
+ */
+int fwnode_irq_get(struct fwnode_handle *fwnode, unsigned int index)
+{
+ struct device_node *of_node = to_of_node(fwnode);
+ struct resource res;
+ int ret;
+
+ if (IS_ENABLED(CONFIG_OF) && of_node)
+ return of_irq_get(of_node, index);
+
+ ret = acpi_irq_get(ACPI_HANDLE_FWNODE(fwnode), index, &res);
+ if (ret)
+ return ret;
+
+ return res.start;
+}
+EXPORT_SYMBOL(fwnode_irq_get);
+
+/**
* device_graph_get_next_endpoint - Get next endpoint firmware node
* @fwnode: Pointer to the parent firmware node
* @prev: Previous endpoint node or %NULL to get the first
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index afa4cb3b16e3..6659f113042c 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -323,6 +323,7 @@ static const struct {
{ 0x410e, "BCM43341B0" }, /* 002.001.014 */
{ 0x4406, "BCM4324B3" }, /* 002.004.006 */
{ 0x610c, "BCM4354" }, /* 003.001.012 */
+ { 0x2122, "BCM4343A0" }, /* 001.001.034 */
{ 0x2209, "BCM43430A1" }, /* 001.002.009 */
{ 0x6119, "BCM4345C0" }, /* 003.001.025 */
{ 0x230f, "BCM4356A2" }, /* 001.003.015 */
diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c
index 07f00e422e85..5270d5513201 100644
--- a/drivers/bluetooth/btintel.c
+++ b/drivers/bluetooth/btintel.c
@@ -24,6 +24,7 @@
#include <linux/module.h>
#include <linux/firmware.h>
#include <linux/regmap.h>
+#include <asm/unaligned.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -569,6 +570,160 @@ struct regmap *btintel_regmap_init(struct hci_dev *hdev, u16 opcode_read,
}
EXPORT_SYMBOL_GPL(btintel_regmap_init);
+int btintel_send_intel_reset(struct hci_dev *hdev, u32 boot_param)
+{
+ struct intel_reset params = { 0x00, 0x01, 0x00, 0x01, 0x00000000 };
+ struct sk_buff *skb;
+
+ params.boot_param = cpu_to_le32(boot_param);
+
+ skb = __hci_cmd_sync(hdev, 0xfc01, sizeof(params), &params,
+ HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Failed to send Intel Reset command");
+ return PTR_ERR(skb);
+ }
+
+ kfree_skb(skb);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btintel_send_intel_reset);
+
+int btintel_read_boot_params(struct hci_dev *hdev,
+ struct intel_boot_params *params)
+{
+ struct sk_buff *skb;
+
+ skb = __hci_cmd_sync(hdev, 0xfc0d, 0, NULL, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ bt_dev_err(hdev, "Reading Intel boot parameters failed (%ld)",
+ PTR_ERR(skb));
+ return PTR_ERR(skb);
+ }
+
+ if (skb->len != sizeof(*params)) {
+ bt_dev_err(hdev, "Intel boot parameters size mismatch");
+ kfree_skb(skb);
+ return -EILSEQ;
+ }
+
+ memcpy(params, skb->data, sizeof(*params));
+
+ kfree_skb(skb);
+
+ if (params->status) {
+ bt_dev_err(hdev, "Intel boot parameters command failed (%02x)",
+ params->status);
+ return -bt_to_errno(params->status);
+ }
+
+ bt_dev_info(hdev, "Device revision is %u",
+ le16_to_cpu(params->dev_revid));
+
+ bt_dev_info(hdev, "Secure boot is %s",
+ params->secure_boot ? "enabled" : "disabled");
+
+ bt_dev_info(hdev, "OTP lock is %s",
+ params->otp_lock ? "enabled" : "disabled");
+
+ bt_dev_info(hdev, "API lock is %s",
+ params->api_lock ? "enabled" : "disabled");
+
+ bt_dev_info(hdev, "Debug lock is %s",
+ params->debug_lock ? "enabled" : "disabled");
+
+ bt_dev_info(hdev, "Minimum firmware build %u week %u %u",
+ params->min_fw_build_nn, params->min_fw_build_cw,
+ 2000 + params->min_fw_build_yy);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btintel_read_boot_params);
+
+int btintel_download_firmware(struct hci_dev *hdev, const struct firmware *fw,
+ u32 *boot_param)
+{
+ int err;
+ const u8 *fw_ptr;
+ u32 frag_len;
+
+ /* Start the firmware download transaction with the Init fragment
+ * represented by the 128 bytes of CSS header.
+ */
+ err = btintel_secure_send(hdev, 0x00, 128, fw->data);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to send firmware header (%d)", err);
+ goto done;
+ }
+
+ /* Send the 256 bytes of public key information from the firmware
+ * as the PKey fragment.
+ */
+ err = btintel_secure_send(hdev, 0x03, 256, fw->data + 128);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to send firmware pkey (%d)", err);
+ goto done;
+ }
+
+ /* Send the 256 bytes of signature information from the firmware
+ * as the Sign fragment.
+ */
+ err = btintel_secure_send(hdev, 0x02, 256, fw->data + 388);
+ if (err < 0) {
+ bt_dev_err(hdev, "Failed to send firmware signature (%d)", err);
+ goto done;
+ }
+
+ fw_ptr = fw->data + 644;
+ frag_len = 0;
+
+ while (fw_ptr - fw->data < fw->size) {
+ struct hci_command_hdr *cmd = (void *)(fw_ptr + frag_len);
+
+ /* Each SKU has a different reset parameter to use in the
+ * HCI_Intel_Reset command and it is embedded in the firmware
+ * data. So, instead of using static value per SKU, check
+ * the firmware data and save it for later use.
+ */
+ if (le16_to_cpu(cmd->opcode) == 0xfc0e) {
+ /* The boot parameter is the first 32-bit value
+ * and rest of 3 octets are reserved.
+ */
+ *boot_param = get_unaligned_le32(fw_ptr + sizeof(*cmd));
+
+ bt_dev_dbg(hdev, "boot_param=0x%x", *boot_param);
+ }
+
+ frag_len += sizeof(*cmd) + cmd->plen;
+
+ /* The parameter length of the secure send command requires
+ * a 4 byte alignment. It happens so that the firmware file
+ * contains proper Intel_NOP commands to align the fragments
+ * as needed.
+ *
+ * Send set of commands with 4 byte alignment from the
+ * firmware data buffer as a single Data fragement.
+ */
+ if (!(frag_len % 4)) {
+ err = btintel_secure_send(hdev, 0x01, frag_len, fw_ptr);
+ if (err < 0) {
+ bt_dev_err(hdev,
+ "Failed to send firmware data (%d)",
+ err);
+ goto done;
+ }
+
+ fw_ptr += frag_len;
+ frag_len = 0;
+ }
+ }
+
+done:
+ return err;
+}
+EXPORT_SYMBOL_GPL(btintel_download_firmware);
+
MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
MODULE_DESCRIPTION("Bluetooth support for Intel devices ver " VERSION);
MODULE_VERSION(VERSION);
diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h
index 1e8955aaafed..41c642cc523f 100644
--- a/drivers/bluetooth/btintel.h
+++ b/drivers/bluetooth/btintel.h
@@ -69,6 +69,14 @@ struct intel_secure_send_result {
__u8 status;
} __packed;
+struct intel_reset {
+ __u8 reset_type;
+ __u8 patch_enable;
+ __u8 ddc_reload;
+ __u8 boot_option;
+ __le32 boot_param;
+} __packed;
+
#if IS_ENABLED(CONFIG_BT_INTEL)
int btintel_check_bdaddr(struct hci_dev *hdev);
@@ -89,7 +97,11 @@ int btintel_read_version(struct hci_dev *hdev, struct intel_version *ver);
struct regmap *btintel_regmap_init(struct hci_dev *hdev, u16 opcode_read,
u16 opcode_write);
-
+int btintel_send_intel_reset(struct hci_dev *hdev, u32 boot_param);
+int btintel_read_boot_params(struct hci_dev *hdev,
+ struct intel_boot_params *params);
+int btintel_download_firmware(struct hci_dev *dev, const struct firmware *fw,
+ u32 *boot_param);
#else
static inline int btintel_check_bdaddr(struct hci_dev *hdev)
@@ -165,4 +177,23 @@ static inline struct regmap *btintel_regmap_init(struct hci_dev *hdev,
{
return ERR_PTR(-EINVAL);
}
+
+static inline int btintel_send_intel_reset(struct hci_dev *hdev,
+ u32 reset_param)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int btintel_read_boot_params(struct hci_dev *hdev,
+ struct intel_boot_params *params)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int btintel_download_firmware(struct hci_dev *dev,
+ const struct firmware *fw,
+ u32 *boot_param)
+{
+ return -EOPNOTSUPP;
+}
#endif
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 29977ebfd031..2a55380ad730 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2009,15 +2009,11 @@ static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb)
static int btusb_setup_intel_new(struct hci_dev *hdev)
{
- static const u8 reset_param[] = { 0x00, 0x01, 0x00, 0x01,
- 0x00, 0x08, 0x04, 0x00 };
struct btusb_data *data = hci_get_drvdata(hdev);
- struct sk_buff *skb;
struct intel_version ver;
- struct intel_boot_params *params;
+ struct intel_boot_params params;
const struct firmware *fw;
- const u8 *fw_ptr;
- u32 frag_len;
+ u32 boot_param;
char fwname[64];
ktime_t calltime, delta, rettime;
unsigned long long duration;
@@ -2025,6 +2021,12 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
BT_DBG("%s", hdev->name);
+ /* Set the default boot parameter to 0x0 and it is updated to
+ * SKU specific boot parameter after reading Intel_Write_Boot_Params
+ * command while downloading the firmware.
+ */
+ boot_param = 0x00000000;
+
calltime = ktime_get();
/* Read the Intel version information to determine if the device
@@ -2095,55 +2097,24 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
/* Read the secure boot parameters to identify the operating
* details of the bootloader.
*/
- skb = __hci_cmd_sync(hdev, 0xfc0d, 0, NULL, HCI_INIT_TIMEOUT);
- if (IS_ERR(skb)) {
- BT_ERR("%s: Reading Intel boot parameters failed (%ld)",
- hdev->name, PTR_ERR(skb));
- return PTR_ERR(skb);
- }
-
- if (skb->len != sizeof(*params)) {
- BT_ERR("%s: Intel boot parameters size mismatch", hdev->name);
- kfree_skb(skb);
- return -EILSEQ;
- }
-
- params = (struct intel_boot_params *)skb->data;
-
- bt_dev_info(hdev, "Device revision is %u",
- le16_to_cpu(params->dev_revid));
-
- bt_dev_info(hdev, "Secure boot is %s",
- params->secure_boot ? "enabled" : "disabled");
-
- bt_dev_info(hdev, "OTP lock is %s",
- params->otp_lock ? "enabled" : "disabled");
-
- bt_dev_info(hdev, "API lock is %s",
- params->api_lock ? "enabled" : "disabled");
-
- bt_dev_info(hdev, "Debug lock is %s",
- params->debug_lock ? "enabled" : "disabled");
-
- bt_dev_info(hdev, "Minimum firmware build %u week %u %u",
- params->min_fw_build_nn, params->min_fw_build_cw,
- 2000 + params->min_fw_build_yy);
+ err = btintel_read_boot_params(hdev, &params);
+ if (err)
+ return err;
/* It is required that every single firmware fragment is acknowledged
* with a command complete event. If the boot parameters indicate
* that this bootloader does not send them, then abort the setup.
*/
- if (params->limited_cce != 0x00) {
+ if (params.limited_cce != 0x00) {
BT_ERR("%s: Unsupported Intel firmware loading method (%u)",
- hdev->name, params->limited_cce);
- kfree_skb(skb);
+ hdev->name, params.limited_cce);
return -EINVAL;
}
/* If the OTP has no valid Bluetooth device address, then there will
* also be no valid address for the operational firmware.
*/
- if (!bacmp(&params->otp_bdaddr, BDADDR_ANY)) {
+ if (!bacmp(&params.otp_bdaddr, BDADDR_ANY)) {
bt_dev_info(hdev, "No device address configured");
set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
}
@@ -2174,7 +2145,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
case 0x0c: /* WsP */
snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi",
le16_to_cpu(ver.hw_variant),
- le16_to_cpu(params->dev_revid));
+ le16_to_cpu(params.dev_revid));
break;
case 0x11: /* JfP */
case 0x12: /* ThP */
@@ -2192,7 +2163,6 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
if (err < 0) {
BT_ERR("%s: Failed to load Intel firmware file (%d)",
hdev->name, err);
- kfree_skb(skb);
return err;
}
@@ -2206,7 +2176,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
case 0x0c: /* WsP */
snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc",
le16_to_cpu(ver.hw_variant),
- le16_to_cpu(params->dev_revid));
+ le16_to_cpu(params.dev_revid));
break;
case 0x11: /* JfP */
case 0x12: /* ThP */
@@ -2220,8 +2190,6 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
return -EINVAL;
}
- kfree_skb(skb);
-
if (fw->size < 644) {
BT_ERR("%s: Invalid size of firmware file (%zu)",
hdev->name, fw->size);
@@ -2231,64 +2199,10 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
set_bit(BTUSB_DOWNLOADING, &data->flags);
- /* Start the firmware download transaction with the Init fragment
- * represented by the 128 bytes of CSS header.
- */
- err = btintel_secure_send(hdev, 0x00, 128, fw->data);
- if (err < 0) {
- BT_ERR("%s: Failed to send firmware header (%d)",
- hdev->name, err);
- goto done;
- }
-
- /* Send the 256 bytes of public key information from the firmware
- * as the PKey fragment.
- */
- err = btintel_secure_send(hdev, 0x03, 256, fw->data + 128);
- if (err < 0) {
- BT_ERR("%s: Failed to send firmware public key (%d)",
- hdev->name, err);
- goto done;
- }
-
- /* Send the 256 bytes of signature information from the firmware
- * as the Sign fragment.
- */
- err = btintel_secure_send(hdev, 0x02, 256, fw->data + 388);
- if (err < 0) {
- BT_ERR("%s: Failed to send firmware signature (%d)",
- hdev->name, err);
+ /* Start firmware downloading and get boot parameter */
+ err = btintel_download_firmware(hdev, fw, &boot_param);
+ if (err < 0)
goto done;
- }
-
- fw_ptr = fw->data + 644;
- frag_len = 0;
-
- while (fw_ptr - fw->data < fw->size) {
- struct hci_command_hdr *cmd = (void *)(fw_ptr + frag_len);
-
- frag_len += sizeof(*cmd) + cmd->plen;
-
- /* The parameter length of the secure send command requires
- * a 4 byte alignment. It happens so that the firmware file
- * contains proper Intel_NOP commands to align the fragments
- * as needed.
- *
- * Send set of commands with 4 byte alignment from the
- * firmware data buffer as a single Data fragement.
- */
- if (!(frag_len % 4)) {
- err = btintel_secure_send(hdev, 0x01, frag_len, fw_ptr);
- if (err < 0) {
- BT_ERR("%s: Failed to send firmware data (%d)",
- hdev->name, err);
- goto done;
- }
-
- fw_ptr += frag_len;
- frag_len = 0;
- }
- }
set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
@@ -2341,12 +2255,9 @@ done:
set_bit(BTUSB_BOOTING, &data->flags);
- skb = __hci_cmd_sync(hdev, 0xfc01, sizeof(reset_param), reset_param,
- HCI_INIT_TIMEOUT);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- kfree_skb(skb);
+ err = btintel_send_intel_reset(hdev, boot_param);
+ if (err)
+ return err;
/* The bootloader will not indicate when the device is ready. This
* is done by the operational firmware sending bootup notification.
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 64800cd2796c..0438a64b8185 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -412,8 +412,11 @@ out:
return 0;
err_unset_hu:
+ if (hu->serdev)
+ serdev_device_close(hu->serdev);
#ifdef CONFIG_PM
- bcm->dev->hu = NULL;
+ else
+ bcm->dev->hu = NULL;
#endif
err_free:
mutex_unlock(&bcm_device_lock);
diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
index aad07e40ea4f..7c166e3b308b 100644
--- a/drivers/bluetooth/hci_intel.c
+++ b/drivers/bluetooth/hci_intel.c
@@ -540,18 +540,15 @@ static int intel_set_baudrate(struct hci_uart *hu, unsigned int speed)
static int intel_setup(struct hci_uart *hu)
{
- static const u8 reset_param[] = { 0x00, 0x01, 0x00, 0x01,
- 0x00, 0x08, 0x04, 0x00 };
struct intel_data *intel = hu->priv;
struct hci_dev *hdev = hu->hdev;
struct sk_buff *skb;
struct intel_version ver;
- struct intel_boot_params *params;
+ struct intel_boot_params params;
struct list_head *p;
const struct firmware *fw;
- const u8 *fw_ptr;
char fwname[64];
- u32 frag_len;
+ u32 boot_param;
ktime_t calltime, delta, rettime;
unsigned long long duration;
unsigned int init_speed, oper_speed;
@@ -563,6 +560,12 @@ static int intel_setup(struct hci_uart *hu)
hu->hdev->set_diag = btintel_set_diag;
hu->hdev->set_bdaddr = btintel_set_bdaddr;
+ /* Set the default boot parameter to 0x0 and it is updated to
+ * SKU specific boot parameter after reading Intel_Write_Boot_Params
+ * command while downloading the firmware.
+ */
+ boot_param = 0x00000000;
+
calltime = ktime_get();
if (hu->init_speed)
@@ -656,85 +659,95 @@ static int intel_setup(struct hci_uart *hu)
/* Read the secure boot parameters to identify the operating
* details of the bootloader.
*/
- skb = __hci_cmd_sync(hdev, 0xfc0d, 0, NULL, HCI_CMD_TIMEOUT);
- if (IS_ERR(skb)) {
- bt_dev_err(hdev, "Reading Intel boot parameters failed (%ld)",
- PTR_ERR(skb));
- return PTR_ERR(skb);
- }
-
- if (skb->len != sizeof(*params)) {
- bt_dev_err(hdev, "Intel boot parameters size mismatch");
- kfree_skb(skb);
- return -EILSEQ;
- }
-
- params = (struct intel_boot_params *)skb->data;
- if (params->status) {
- bt_dev_err(hdev, "Intel boot parameters command failure (%02x)",
- params->status);
- err = -bt_to_errno(params->status);
- kfree_skb(skb);
+ err = btintel_read_boot_params(hdev, &params);
+ if (err)
return err;
- }
-
- bt_dev_info(hdev, "Device revision is %u",
- le16_to_cpu(params->dev_revid));
-
- bt_dev_info(hdev, "Secure boot is %s",
- params->secure_boot ? "enabled" : "disabled");
-
- bt_dev_info(hdev, "Minimum firmware build %u week %u %u",
- params->min_fw_build_nn, params->min_fw_build_cw,
- 2000 + params->min_fw_build_yy);
/* It is required that every single firmware fragment is acknowledged
* with a command complete event. If the boot parameters indicate
* that this bootloader does not send them, then abort the setup.
*/
- if (params->limited_cce != 0x00) {
+ if (params.limited_cce != 0x00) {
bt_dev_err(hdev, "Unsupported Intel firmware loading method (%u)",
- params->limited_cce);
- kfree_skb(skb);
+ params.limited_cce);
return -EINVAL;
}
/* If the OTP has no valid Bluetooth device address, then there will
* also be no valid address for the operational firmware.
*/
- if (!bacmp(&params->otp_bdaddr, BDADDR_ANY)) {
+ if (!bacmp(&params.otp_bdaddr, BDADDR_ANY)) {
bt_dev_info(hdev, "No device address configured");
set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
}
/* With this Intel bootloader only the hardware variant and device
- * revision information are used to select the right firmware.
+ * revision information are used to select the right firmware for SfP
+ * and WsP.
*
* The firmware filename is ibt-<hw_variant>-<dev_revid>.sfi.
*
* Currently the supported hardware variants are:
* 11 (0x0b) for iBT 3.0 (LnP/SfP)
+ * 12 (0x0c) for iBT 3.5 (WsP)
+ *
+ * For ThP/JfP and for future SKU's, the FW name varies based on HW
+ * variant, HW revision and FW revision, as these are dependent on CNVi
+ * and RF Combination.
+ *
+ * 18 (0x12) for iBT3.5 (ThP/JfP)
+ *
+ * The firmware file name for these will be
+ * ibt-<hw_variant>-<hw_revision>-<fw_revision>.sfi.
+ *
*/
- snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi",
- le16_to_cpu(ver.hw_variant),
- le16_to_cpu(params->dev_revid));
+ switch (ver.hw_variant) {
+ case 0x0b: /* SfP */
+ case 0x0c: /* WsP */
+ snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.sfi",
+ le16_to_cpu(ver.hw_variant),
+ le16_to_cpu(params.dev_revid));
+ break;
+ case 0x12: /* ThP */
+ snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.sfi",
+ le16_to_cpu(ver.hw_variant),
+ le16_to_cpu(ver.hw_revision),
+ le16_to_cpu(ver.fw_revision));
+ break;
+ default:
+ bt_dev_err(hdev, "Unsupported Intel hardware variant (%u)",
+ ver.hw_variant);
+ return -EINVAL;
+ }
err = request_firmware(&fw, fwname, &hdev->dev);
if (err < 0) {
bt_dev_err(hdev, "Failed to load Intel firmware file (%d)",
err);
- kfree_skb(skb);
return err;
}
bt_dev_info(hdev, "Found device firmware: %s", fwname);
/* Save the DDC file name for later */
- snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc",
- le16_to_cpu(ver.hw_variant),
- le16_to_cpu(params->dev_revid));
-
- kfree_skb(skb);
+ switch (ver.hw_variant) {
+ case 0x0b: /* SfP */
+ case 0x0c: /* WsP */
+ snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u.ddc",
+ le16_to_cpu(ver.hw_variant),
+ le16_to_cpu(params.dev_revid));
+ break;
+ case 0x12: /* ThP */
+ snprintf(fwname, sizeof(fwname), "intel/ibt-%u-%u-%u.ddc",
+ le16_to_cpu(ver.hw_variant),
+ le16_to_cpu(ver.hw_revision),
+ le16_to_cpu(ver.fw_revision));
+ break;
+ default:
+ bt_dev_err(hdev, "Unsupported Intel hardware variant (%u)",
+ ver.hw_variant);
+ return -EINVAL;
+ }
if (fw->size < 644) {
bt_dev_err(hdev, "Invalid size of firmware file (%zu)",
@@ -745,70 +758,10 @@ static int intel_setup(struct hci_uart *hu)
set_bit(STATE_DOWNLOADING, &intel->flags);
- /* Start the firmware download transaction with the Init fragment
- * represented by the 128 bytes of CSS header.
- */
- err = btintel_secure_send(hdev, 0x00, 128, fw->data);
- if (err < 0) {
- bt_dev_err(hdev, "Failed to send firmware header (%d)", err);
- goto done;
- }
-
- /* Send the 256 bytes of public key information from the firmware
- * as the PKey fragment.
- */
- err = btintel_secure_send(hdev, 0x03, 256, fw->data + 128);
- if (err < 0) {
- bt_dev_err(hdev, "Failed to send firmware public key (%d)",
- err);
- goto done;
- }
-
- /* Send the 256 bytes of signature information from the firmware
- * as the Sign fragment.
- */
- err = btintel_secure_send(hdev, 0x02, 256, fw->data + 388);
- if (err < 0) {
- bt_dev_err(hdev, "Failed to send firmware signature (%d)",
- err);
+ /* Start firmware downloading and get boot parameter */
+ err = btintel_download_firmware(hdev, fw, &boot_param);
+ if (err < 0)
goto done;
- }
-
- fw_ptr = fw->data + 644;
- frag_len = 0;
-
- while (fw_ptr - fw->data < fw->size) {
- struct hci_command_hdr *cmd = (void *)(fw_ptr + frag_len);
-
- frag_len += sizeof(*cmd) + cmd->plen;
-
- bt_dev_dbg(hdev, "Patching %td/%zu", (fw_ptr - fw->data),
- fw->size);
-
- /* The parameter length of the secure send command requires
- * a 4 byte alignment. It happens so that the firmware file
- * contains proper Intel_NOP commands to align the fragments
- * as needed.
- *
- * Send set of commands with 4 byte alignment from the
- * firmware data buffer as a single Data fragement.
- */
- if (frag_len % 4)
- continue;
-
- /* Send each command from the firmware data buffer as
- * a single Data fragment.
- */
- err = btintel_secure_send(hdev, 0x01, frag_len, fw_ptr);
- if (err < 0) {
- bt_dev_err(hdev, "Failed to send firmware data (%d)",
- err);
- goto done;
- }
-
- fw_ptr += frag_len;
- frag_len = 0;
- }
set_bit(STATE_FIRMWARE_LOADED, &intel->flags);
@@ -869,12 +822,9 @@ done:
set_bit(STATE_BOOTING, &intel->flags);
- skb = __hci_cmd_sync(hdev, 0xfc01, sizeof(reset_param), reset_param,
- HCI_CMD_TIMEOUT);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- kfree_skb(skb);
+ err = btintel_send_intel_reset(hdev, boot_param);
+ if (err)
+ return err;
/* The bootloader will not indicate when the device is ready. This
* is done by the operational firmware sending bootup notification.
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9fc12f556534..554d60394c06 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1954,10 +1954,15 @@ static int crypt_setkey(struct crypt_config *cc)
/* Ignore extra keys (which are used for IV etc) */
subkey_size = crypt_subkey_size(cc);
- if (crypt_integrity_hmac(cc))
+ if (crypt_integrity_hmac(cc)) {
+ if (subkey_size < cc->key_mac_size)
+ return -EINVAL;
+
crypt_copy_authenckey(cc->authenc_key, cc->key,
subkey_size - cc->key_mac_size,
cc->key_mac_size);
+ }
+
for (i = 0; i < cc->tfms_count; i++) {
if (crypt_integrity_hmac(cc))
r = crypto_aead_setkey(cc->cipher_tfm.tfms_aead[i],
@@ -2053,9 +2058,6 @@ static int crypt_set_keyring_key(struct crypt_config *cc, const char *key_string
ret = crypt_setkey(cc);
- /* wipe the kernel key payload copy in each case */
- memset(cc->key, 0, cc->key_size * sizeof(u8));
-
if (!ret) {
set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
kzfree(cc->key_string);
@@ -2523,6 +2525,10 @@ static int crypt_ctr_cipher(struct dm_target *ti, char *cipher_in, char *key)
}
}
+ /* wipe the kernel key payload copy */
+ if (cc->key_string)
+ memset(cc->key, 0, cc->key_size * sizeof(u8));
+
return ret;
}
@@ -2740,6 +2746,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->tag_pool_max_sectors * cc->on_disk_tag_size);
if (!cc->tag_pool) {
ti->error = "Cannot allocate integrity tags mempool";
+ ret = -ENOMEM;
goto bad;
}
@@ -2961,6 +2968,9 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
return ret;
if (cc->iv_gen_ops && cc->iv_gen_ops->init)
ret = cc->iv_gen_ops->init(cc);
+ /* wipe the kernel key payload copy */
+ if (cc->key_string)
+ memset(cc->key, 0, cc->key_size * sizeof(u8));
return ret;
}
if (argc == 2 && !strcasecmp(argv[1], "wipe")) {
@@ -3007,7 +3017,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 18, 0},
+ .version = {1, 18, 1},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 05c7bfd0c9d9..46d7c8749222 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2559,7 +2559,8 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
int r = 0;
unsigned i;
__u64 journal_pages, journal_desc_size, journal_tree_size;
- unsigned char *crypt_data = NULL;
+ unsigned char *crypt_data = NULL, *crypt_iv = NULL;
+ struct skcipher_request *req = NULL;
ic->commit_ids[0] = cpu_to_le64(0x1111111111111111ULL);
ic->commit_ids[1] = cpu_to_le64(0x2222222222222222ULL);
@@ -2617,9 +2618,20 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
if (blocksize == 1) {
struct scatterlist *sg;
- SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt);
- unsigned char iv[ivsize];
- skcipher_request_set_tfm(req, ic->journal_crypt);
+
+ req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
+ if (!req) {
+ *error = "Could not allocate crypt request";
+ r = -ENOMEM;
+ goto bad;
+ }
+
+ crypt_iv = kmalloc(ivsize, GFP_KERNEL);
+ if (!crypt_iv) {
+ *error = "Could not allocate iv";
+ r = -ENOMEM;
+ goto bad;
+ }
ic->journal_xor = dm_integrity_alloc_page_list(ic);
if (!ic->journal_xor) {
@@ -2641,9 +2653,9 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
sg_set_buf(&sg[i], va, PAGE_SIZE);
}
sg_set_buf(&sg[i], &ic->commit_ids, sizeof ic->commit_ids);
- memset(iv, 0x00, ivsize);
+ memset(crypt_iv, 0x00, ivsize);
- skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, iv);
+ skcipher_request_set_crypt(req, sg, sg, PAGE_SIZE * ic->journal_pages + sizeof ic->commit_ids, crypt_iv);
init_completion(&comp.comp);
comp.in_flight = (atomic_t)ATOMIC_INIT(1);
if (do_crypt(true, req, &comp))
@@ -2659,10 +2671,22 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
crypto_free_skcipher(ic->journal_crypt);
ic->journal_crypt = NULL;
} else {
- SKCIPHER_REQUEST_ON_STACK(req, ic->journal_crypt);
- unsigned char iv[ivsize];
unsigned crypt_len = roundup(ivsize, blocksize);
+ req = skcipher_request_alloc(ic->journal_crypt, GFP_KERNEL);
+ if (!req) {
+ *error = "Could not allocate crypt request";
+ r = -ENOMEM;
+ goto bad;
+ }
+
+ crypt_iv = kmalloc(ivsize, GFP_KERNEL);
+ if (!crypt_iv) {
+ *error = "Could not allocate iv";
+ r = -ENOMEM;
+ goto bad;
+ }
+
crypt_data = kmalloc(crypt_len, GFP_KERNEL);
if (!crypt_data) {
*error = "Unable to allocate crypt data";
@@ -2670,8 +2694,6 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
goto bad;
}
- skcipher_request_set_tfm(req, ic->journal_crypt);
-
ic->journal_scatterlist = dm_integrity_alloc_journal_scatterlist(ic, ic->journal);
if (!ic->journal_scatterlist) {
*error = "Unable to allocate sg list";
@@ -2695,12 +2717,12 @@ static int create_journal(struct dm_integrity_c *ic, char **error)
struct skcipher_request *section_req;
__u32 section_le = cpu_to_le32(i);
- memset(iv, 0x00, ivsize);
+ memset(crypt_iv, 0x00, ivsize);
memset(crypt_data, 0x00, crypt_len);
memcpy(crypt_data, &section_le, min((size_t)crypt_len, sizeof(section_le)));
sg_init_one(&sg, crypt_data, crypt_len);
- skcipher_request_set_crypt(req, &sg, &sg, crypt_len, iv);
+ skcipher_request_set_crypt(req, &sg, &sg, crypt_len, crypt_iv);
init_completion(&comp.comp);
comp.in_flight = (atomic_t)ATOMIC_INIT(1);
if (do_crypt(true, req, &comp))
@@ -2758,6 +2780,9 @@ retest_commit_id:
}
bad:
kfree(crypt_data);
+ kfree(crypt_iv);
+ skcipher_request_free(req);
+
return r;
}
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index d31d18d9727c..36ef284ad086 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -80,10 +80,14 @@
#define SECTOR_TO_BLOCK_SHIFT 3
/*
+ * For btree insert:
* 3 for btree insert +
* 2 for btree lookup used within space map
+ * For btree remove:
+ * 2 for shadow spine +
+ * 4 for rebalance 3 child node
*/
-#define THIN_MAX_CONCURRENT_LOCKS 5
+#define THIN_MAX_CONCURRENT_LOCKS 6
/* This should be plenty */
#define SPACE_MAP_ROOT_SIZE 128
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index f21ce6a3d4cf..58b319757b1e 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -683,23 +683,8 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
pn->keys[1] = rn->keys[0];
memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64));
- /*
- * rejig the spine. This is ugly, since it knows too
- * much about the spine
- */
- if (s->nodes[0] != new_parent) {
- unlock_block(s->info, s->nodes[0]);
- s->nodes[0] = new_parent;
- }
- if (key < le64_to_cpu(rn->keys[0])) {
- unlock_block(s->info, right);
- s->nodes[1] = left;
- } else {
- unlock_block(s->info, left);
- s->nodes[1] = right;
- }
- s->count = 2;
-
+ unlock_block(s->info, left);
+ unlock_block(s->info, right);
return 0;
}
diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
index b8029ea03307..433a14b9f731 100644
--- a/drivers/net/caif/caif_hsi.c
+++ b/drivers/net/caif/caif_hsi.c
@@ -264,7 +264,6 @@ static int cfhsi_tx_frm(struct cfhsi_desc *desc, struct cfhsi *cfhsi)
}
/* Create payload CAIF frames. */
- pfrm = desc->emb_frm + CFHSI_MAX_EMB_FRM_SZ;
while (nfrms < CFHSI_MAX_PKTS) {
struct caif_payload_info *info;
int hpad;
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index cc94604b23e0..b1779566c5bb 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -412,7 +412,7 @@ EXPORT_SYMBOL_GPL(can_change_state);
* Local echo of CAN messages
*
* CAN network devices *should* support a local echo functionality
- * (see Documentation/networking/can.txt). To test the handling of CAN
+ * (see Documentation/networking/can.rst). To test the handling of CAN
* interfaces that do not support the local echo both driver types are
* implemented. In the case that the driver does not support the echo
* the IFF_ECHO remains clear in dev->flags. This causes the PF_CAN core
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index a8cb33264ff1..c2b04f505e16 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -61,7 +61,7 @@ MODULE_ALIAS_RTNL_LINK(DRV_NAME);
/*
* CAN test feature:
* Enable the echo on driver level for testing the CAN core echo modes.
- * See Documentation/networking/can.txt for details.
+ * See Documentation/networking/can.rst for details.
*/
static bool echo; /* echo testing. Default: 0 (Off) */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index f5dd5f75a40f..22889fc158f2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -47,7 +47,7 @@ static const struct pci_device_id aq_pci_tbl[] = {
{}
};
-const struct aq_board_revision_s hw_atl_boards[] = {
+static const struct aq_board_revision_s hw_atl_boards[] = {
{ AQ_DEVICE_ID_0001, AQ_HWREV_1, &hw_atl_ops_a0, &hw_atl_a0_caps_aqc107, },
{ AQ_DEVICE_ID_D100, AQ_HWREV_1, &hw_atl_ops_a0, &hw_atl_a0_caps_aqc100, },
{ AQ_DEVICE_ID_D107, AQ_HWREV_1, &hw_atl_ops_a0, &hw_atl_a0_caps_aqc107, },
@@ -210,8 +210,10 @@ static int aq_pci_probe(struct pci_dev *pdev,
goto err_pci_func;
ndev = aq_ndev_alloc();
- if (!ndev)
+ if (!ndev) {
+ err = -ENOMEM;
goto err_ndev;
+ }
self = netdev_priv(ndev);
self->pdev = pdev;
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 7919f6112ecf..5e34b34f7740 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -5818,8 +5818,8 @@ bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
struct l2_fhdr *rx_hdr;
int ret = -ENODEV;
struct bnx2_napi *bnapi = &bp->bnx2_napi[0], *tx_napi;
- struct bnx2_tx_ring_info *txr = &bnapi->tx_ring;
- struct bnx2_rx_ring_info *rxr = &bnapi->rx_ring;
+ struct bnx2_tx_ring_info *txr;
+ struct bnx2_rx_ring_info *rxr;
tx_napi = bnapi;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6b7e99675571..4b001d2050c2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -7778,7 +7778,8 @@ static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
{
struct bnxt *bp = cb_priv;
- if (!bnxt_tc_flower_enabled(bp) || !tc_can_offload(bp->dev))
+ if (!bnxt_tc_flower_enabled(bp) ||
+ !tc_cls_can_offload_and_chain0(bp->dev, type_data))
return -EOPNOTSUPP;
switch (type) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 2ece1645f55d..fbe6e208e17b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -1474,9 +1474,6 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
{
int rc = 0;
- if (cls_flower->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
rc = bnxt_tc_add_flow(bp, src_fid, cls_flower);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index 2ca11be64182..26290403f38f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -124,7 +124,8 @@ static int bnxt_vf_rep_setup_tc_block_cb(enum tc_setup_type type,
struct bnxt *bp = vf_rep->bp;
int vf_fid = bp->pf.vf[vf_rep->vf_idx].fw_fid;
- if (!bnxt_tc_flower_enabled(vf_rep->bp) || !tc_can_offload(bp->dev))
+ if (!bnxt_tc_flower_enabled(vf_rep->bp) ||
+ !tc_cls_can_offload_and_chain0(bp->dev, type_data))
return -EOPNOTSUPP;
switch (type) {
diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index 5713e83be08c..e2cdfa75673f 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -69,6 +69,7 @@ config CHELSIO_T4
depends on PCI && (IPV6 || IPV6=n)
select FW_LOADER
select MDIO
+ select ZLIB_DEFLATE
---help---
This driver supports Chelsio T4, T5 & T6 based gigabit, 10Gb Ethernet
adapter and T5/T6 based 40Gb and T6 based 25Gb, 50Gb and 100Gb
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 6a015362c340..185fe8df7628 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -3304,6 +3304,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->ethtool_ops = &cxgb_ethtool_ops;
netdev->min_mtu = 81;
netdev->max_mtu = ETH_MAX_MTU;
+ netdev->dev_port = pi->port_id;
}
pci_set_drvdata(pdev, adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 5df923798669..53b6a02c778e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -8,8 +8,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
- cudbg_common.o cudbg_lib.o
+ cudbg_common.o cudbg_lib.o cudbg_zlib.o
cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o
cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o
cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
-cxgb4-$(CONFIG_ZLIB_DEFLATE) += cudbg_zlib.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index 8b95117c2923..557fd8bfd54e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -1567,6 +1567,12 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init,
tid1->ver_hdr.size = sizeof(struct cudbg_tid_info_region_rev1) -
sizeof(struct cudbg_ver_hdr);
+ /* If firmware is not attached/alive, use backdoor register
+ * access to collect dump.
+ */
+ if (!is_fw_attached(pdbg_init))
+ goto fill_tid;
+
#define FW_PARAM_PFVF_A(param) \
(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \
FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param) | \
@@ -1604,6 +1610,9 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init,
tid->nhpftids = val[1] - val[0] + 1;
}
+#undef FW_PARAM_PFVF_A
+
+fill_tid:
tid->ntids = padap->tids.ntids;
tid->nstids = padap->tids.nstids;
tid->stid_base = padap->tids.stid_base;
@@ -1623,8 +1632,6 @@ int cudbg_collect_tid(struct cudbg_init *pdbg_init,
tid->ip_users = t4_read_reg(padap, LE_DB_ACT_CNT_IPV4_A);
tid->ipv6_users = t4_read_reg(padap, LE_DB_ACT_CNT_IPV6_A);
-#undef FW_PARAM_PFVF_A
-
return cudbg_write_and_release_buff(pdbg_init, &temp_buff, dbg_buff);
}
@@ -1866,11 +1873,18 @@ int cudbg_collect_dump_context(struct cudbg_init *pdbg_init,
max_ctx_size = region_info[i].end - region_info[i].start + 1;
max_ctx_qid = max_ctx_size / SGE_CTXT_SIZE;
- t4_sge_ctxt_flush(padap, padap->mbox, i);
- rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type[i],
- region_info[i].start, max_ctx_size,
- (__be32 *)ctx_buf, 1);
- if (rc) {
+ /* If firmware is not attached/alive, use backdoor register
+ * access to collect dump.
+ */
+ if (is_fw_attached(pdbg_init)) {
+ t4_sge_ctxt_flush(padap, padap->mbox, i);
+
+ rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type[i],
+ region_info[i].start, max_ctx_size,
+ (__be32 *)ctx_buf, 1);
+ }
+
+ if (rc || !is_fw_attached(pdbg_init)) {
max_ctx_qid = CUDBG_LOWMEM_MAX_CTXT_QIDS;
cudbg_get_sge_ctxt_fw(pdbg_init, max_ctx_qid, i,
&buff);
@@ -1946,9 +1960,10 @@ static void cudbg_mps_rpl_backdoor(struct adapter *padap,
mps_rplc->rplc31_0 = htonl(t4_read_reg(padap, MPS_VF_RPLCT_MAP0_A));
}
-static int cudbg_collect_tcam_index(struct adapter *padap,
+static int cudbg_collect_tcam_index(struct cudbg_init *pdbg_init,
struct cudbg_mps_tcam *tcam, u32 idx)
{
+ struct adapter *padap = pdbg_init->adap;
u64 tcamy, tcamx, val;
u32 ctl, data2;
int rc = 0;
@@ -2033,12 +2048,22 @@ static int cudbg_collect_tcam_index(struct adapter *padap,
htons(FW_LDST_CMD_FID_V(FW_LDST_MPS_RPLC) |
FW_LDST_CMD_IDX_V(idx));
- rc = t4_wr_mbox(padap, padap->mbox, &ldst_cmd, sizeof(ldst_cmd),
- &ldst_cmd);
- if (rc)
+ /* If firmware is not attached/alive, use backdoor register
+ * access to collect dump.
+ */
+ if (is_fw_attached(pdbg_init))
+ rc = t4_wr_mbox(padap, padap->mbox, &ldst_cmd,
+ sizeof(ldst_cmd), &ldst_cmd);
+
+ if (rc || !is_fw_attached(pdbg_init)) {
cudbg_mps_rpl_backdoor(padap, &mps_rplc);
- else
+ /* Ignore error since we collected directly from
+ * reading registers.
+ */
+ rc = 0;
+ } else {
mps_rplc = ldst_cmd.u.mps.rplc;
+ }
tcam->rplc[0] = ntohl(mps_rplc.rplc31_0);
tcam->rplc[1] = ntohl(mps_rplc.rplc63_32);
@@ -2075,7 +2100,7 @@ int cudbg_collect_mps_tcam(struct cudbg_init *pdbg_init,
tcam = (struct cudbg_mps_tcam *)temp_buff.data;
for (i = 0; i < n; i++) {
- rc = cudbg_collect_tcam_index(padap, tcam, i);
+ rc = cudbg_collect_tcam_index(pdbg_init, tcam, i);
if (rc) {
cudbg_err->sys_err = rc;
cudbg_put_buff(pdbg_init, &temp_buff);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c
index 4c3854cbeb6c..25cc06d75cff 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.c
@@ -40,8 +40,8 @@ int cudbg_compress_buff(struct cudbg_init *pdbg_init,
struct cudbg_buffer *pin_buff,
struct cudbg_buffer *pout_buff)
{
- struct z_stream_s compress_stream = { 0 };
struct cudbg_buffer temp_buff = { 0 };
+ struct z_stream_s compress_stream;
struct cudbg_compress_hdr *c_hdr;
int rc;
@@ -53,6 +53,7 @@ int cudbg_compress_buff(struct cudbg_init *pdbg_init,
c_hdr = (struct cudbg_compress_hdr *)temp_buff.data;
c_hdr->compress_id = CUDBG_ZLIB_COMPRESS_ID;
+ memset(&compress_stream, 0, sizeof(struct z_stream_s));
compress_stream.workspace = pdbg_init->workspace;
rc = zlib_deflateInit2(&compress_stream, Z_DEFAULT_COMPRESSION,
Z_DEFLATED, CUDBG_ZLIB_WIN_BITS,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h
index 9d55c4c38c84..60d23805dfc3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_zlib.h
@@ -33,24 +33,11 @@ struct cudbg_compress_hdr {
static inline int cudbg_get_workspace_size(void)
{
-#ifdef CONFIG_ZLIB_DEFLATE
return zlib_deflate_workspacesize(CUDBG_ZLIB_WIN_BITS,
CUDBG_ZLIB_MEM_LVL);
-#else
- return 0;
-#endif /* CONFIG_ZLIB_DEFLATE */
}
-#ifndef CONFIG_ZLIB_DEFLATE
-static inline int cudbg_compress_buff(struct cudbg_init *pdbg_init,
- struct cudbg_buffer *pin_buff,
- struct cudbg_buffer *pout_buff)
-{
- return 0;
-}
-#else
int cudbg_compress_buff(struct cudbg_init *pdbg_init,
struct cudbg_buffer *pin_buff,
struct cudbg_buffer *pout_buff);
-#endif /* CONFIG_ZLIB_DEFLATE */
#endif /* __CUDBG_ZLIB_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index f05b58f74c7a..429467364219 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -820,6 +820,7 @@ struct vf_info {
unsigned char vf_mac_addr[ETH_ALEN];
unsigned int tx_rate;
bool pf_set_mac;
+ u16 vlan;
};
struct mbox_list {
@@ -846,6 +847,8 @@ struct adapter {
int msg_enable;
__be16 vxlan_port;
u8 vxlan_port_cnt;
+ __be16 geneve_port;
+ u8 geneve_port_cnt;
struct adapter_params params;
struct cxgb4_virt_res vres;
@@ -1736,4 +1739,6 @@ void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
void free_tx_desc(struct adapter *adap, struct sge_txq *q,
unsigned int n, bool unmap);
void free_txq(struct adapter *adap, struct sge_txq *q);
+int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
+ u16 vlan);
#endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
index 9e0a8a8186c4..30485f9a598f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -406,14 +406,15 @@ static void cudbg_free_compress_buff(struct cudbg_init *pdbg_init)
int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
u32 flag)
{
- struct cudbg_init cudbg_init = { 0 };
struct cudbg_buffer dbg_buff = { 0 };
u32 size, min_size, total_size = 0;
+ struct cudbg_init cudbg_init;
struct cudbg_hdr *cudbg_hdr;
int rc;
size = *buf_size;
+ memset(&cudbg_init, 0, sizeof(struct cudbg_init));
cudbg_init.adap = adap;
cudbg_init.outbuf = buf;
cudbg_init.outbuf_size = size;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 11fe5961040a..1e3cd8abc56d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2783,7 +2783,30 @@ static int cxgb4_mgmt_set_vf_rate(struct net_device *dev, int vf,
return 0;
}
-#endif
+static int cxgb4_mgmt_set_vf_vlan(struct net_device *dev, int vf,
+ u16 vlan, u8 qos, __be16 vlan_proto)
+{
+ struct port_info *pi = netdev_priv(dev);
+ struct adapter *adap = pi->adapter;
+ int ret;
+
+ if (vf >= adap->num_vfs || vlan > 4095 || qos > 7)
+ return -EINVAL;
+
+ if (vlan_proto != htons(ETH_P_8021Q) || qos != 0)
+ return -EPROTONOSUPPORT;
+
+ ret = t4_set_vlan_acl(adap, adap->mbox, vf + 1, vlan);
+ if (!ret) {
+ adap->vfinfo[vf].vlan = vlan;
+ return 0;
+ }
+
+ dev_err(adap->pdev_dev, "Err %d %s VLAN ACL for PF/VF %d/%d\n",
+ ret, (vlan ? "setting" : "clearing"), adap->pf, vf);
+ return ret;
+}
+#endif /* CONFIG_PCI_IOV */
static int cxgb_set_mac_addr(struct net_device *dev, void *p)
{
@@ -2905,9 +2928,6 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
static int cxgb_setup_tc_flower(struct net_device *dev,
struct tc_cls_flower_offload *cls_flower)
{
- if (cls_flower->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return cxgb4_tc_flower_replace(dev, cls_flower);
@@ -2923,9 +2943,6 @@ static int cxgb_setup_tc_flower(struct net_device *dev,
static int cxgb_setup_tc_cls_u32(struct net_device *dev,
struct tc_cls_u32_offload *cls_u32)
{
- if (cls_u32->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_u32->command) {
case TC_CLSU32_NEW_KNODE:
case TC_CLSU32_REPLACE_KNODE:
@@ -2951,7 +2968,7 @@ static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
return -EINVAL;
}
- if (!tc_can_offload(dev))
+ if (!tc_cls_can_offload_and_chain0(dev, type_data))
return -EOPNOTSUPP;
switch (type) {
@@ -3020,6 +3037,17 @@ static void cxgb_del_udp_tunnel(struct net_device *netdev,
adapter->vxlan_port = 0;
t4_write_reg(adapter, MPS_RX_VXLAN_TYPE_A, 0);
break;
+ case UDP_TUNNEL_TYPE_GENEVE:
+ if (!adapter->geneve_port_cnt ||
+ adapter->geneve_port != ti->port)
+ return; /* Invalid GENEVE destination port */
+
+ adapter->geneve_port_cnt--;
+ if (adapter->geneve_port_cnt)
+ return;
+
+ adapter->geneve_port = 0;
+ t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A, 0);
default:
return;
}
@@ -3055,17 +3083,11 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev,
u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 };
int i, ret;
- if (chip_ver < CHELSIO_T6)
+ if (chip_ver < CHELSIO_T6 || !adapter->rawf_cnt)
return;
switch (ti->type) {
case UDP_TUNNEL_TYPE_VXLAN:
- /* For T6 fw reserves last 2 entries for
- * storing match all mac filter (config file entry).
- */
- if (!adapter->rawf_cnt)
- return;
-
/* Callback for adding vxlan port can be called with the same
* port for both IPv4 and IPv6. We should not disable the
* offloading when the same port for both protocols is added
@@ -3091,6 +3113,26 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev,
t4_write_reg(adapter, MPS_RX_VXLAN_TYPE_A,
VXLAN_V(be16_to_cpu(ti->port)) | VXLAN_EN_F);
break;
+ case UDP_TUNNEL_TYPE_GENEVE:
+ if (adapter->geneve_port_cnt &&
+ adapter->geneve_port == ti->port) {
+ adapter->geneve_port_cnt++;
+ return;
+ }
+
+ /* We will support only one GENEVE port */
+ if (adapter->geneve_port_cnt) {
+ netdev_info(netdev, "UDP port %d already offloaded, not adding port %d\n",
+ be16_to_cpu(adapter->geneve_port),
+ be16_to_cpu(ti->port));
+ return;
+ }
+
+ adapter->geneve_port = ti->port;
+ adapter->geneve_port_cnt = 1;
+
+ t4_write_reg(adapter, MPS_RX_GENEVE_TYPE_A,
+ GENEVE_V(be16_to_cpu(ti->port)) | GENEVE_EN_F);
default:
return;
}
@@ -3101,24 +3143,22 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev,
* we will remove this 'match all' entry and fallback to adding
* exact match filters.
*/
- if (adapter->rawf_cnt) {
- for_each_port(adapter, i) {
- pi = adap2pinfo(adapter, i);
-
- ret = t4_alloc_raw_mac_filt(adapter, pi->viid,
- match_all_mac,
- match_all_mac,
- adapter->rawf_start +
- pi->port_id,
- 1, pi->port_id, true);
- if (ret < 0) {
- netdev_info(netdev, "Failed to allocate a mac filter entry, not adding port %d\n",
- be16_to_cpu(ti->port));
- cxgb_del_udp_tunnel(netdev, ti);
- return;
- }
- atomic_inc(&adapter->mps_encap[ret].refcnt);
+ for_each_port(adapter, i) {
+ pi = adap2pinfo(adapter, i);
+
+ ret = t4_alloc_raw_mac_filt(adapter, pi->viid,
+ match_all_mac,
+ match_all_mac,
+ adapter->rawf_start +
+ pi->port_id,
+ 1, pi->port_id, true);
+ if (ret < 0) {
+ netdev_info(netdev, "Failed to allocate a mac filter entry, not adding port %d\n",
+ be16_to_cpu(ti->port));
+ cxgb_del_udp_tunnel(netdev, ti);
+ return;
}
+ atomic_inc(&adapter->mps_encap[ret].refcnt);
}
}
@@ -3184,6 +3224,7 @@ static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
.ndo_get_vf_config = cxgb4_mgmt_get_vf_config,
.ndo_set_vf_rate = cxgb4_mgmt_set_vf_rate,
.ndo_get_phys_port_id = cxgb4_mgmt_get_phys_port_id,
+ .ndo_set_vf_vlan = cxgb4_mgmt_set_vf_vlan,
};
#endif
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 9b9f3f99b39d..36563364bae7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -43,7 +43,7 @@
#define STATS_CHECK_PERIOD (HZ / 2)
-struct ch_tc_pedit_fields pedits[] = {
+static struct ch_tc_pedit_fields pedits[] = {
PEDIT_FIELDS(ETH_, DMAC_31_0, 4, dmac, 0),
PEDIT_FIELDS(ETH_, DMAC_47_32, 2, dmac, 4),
PEDIT_FIELDS(ETH_, SMAC_15_0, 2, smac, 0),
@@ -111,6 +111,9 @@ static void cxgb4_process_flow_match(struct net_device *dev,
ethtype_mask = 0;
}
+ if (ethtype_key == ETH_P_IPV6)
+ fs->type = 1;
+
fs->val.ethtype = ethtype_key;
fs->mask.ethtype = ethtype_mask;
fs->val.proto = key->ip_proto;
@@ -205,8 +208,8 @@ static void cxgb4_process_flow_match(struct net_device *dev,
VLAN_PRIO_SHIFT);
vlan_tci_mask = mask->vlan_id | (mask->vlan_priority <<
VLAN_PRIO_SHIFT);
- fs->val.ivlan = cpu_to_be16(vlan_tci);
- fs->mask.ivlan = cpu_to_be16(vlan_tci_mask);
+ fs->val.ivlan = vlan_tci;
+ fs->mask.ivlan = vlan_tci_mask;
/* Chelsio adapters use ivlan_vld bit to match vlan packets
* as 802.1Q. Also, when vlan tag is present in packets,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index eab781fab2a8..a7af71bf14fb 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1199,6 +1199,8 @@ enum cpl_tx_tnl_lso_type cxgb_encap_offload_supported(struct sk_buff *skb)
case IPPROTO_UDP:
if (adapter->vxlan_port == udp_hdr(skb)->dest)
tnl_type = TX_TNL_TYPE_VXLAN;
+ else if (adapter->geneve_port == udp_hdr(skb)->dest)
+ tnl_type = TX_TNL_TYPE_GENEVE;
break;
default:
return tnl_type;
@@ -1238,6 +1240,7 @@ static inline void t6_fill_tnl_lso(struct sk_buff *skb,
switch (tnl_type) {
case TX_TNL_TYPE_VXLAN:
+ case TX_TNL_TYPE_GENEVE:
tnl_lso->UdpLenSetOut_to_TnlHdrLen =
htons(CPL_TX_TNL_LSO_UDPCHKCLROUT_F |
CPL_TX_TNL_LSO_UDPLENSETOUT_F);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 6d76851a4da9..047609ef0515 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -195,9 +195,11 @@ static void t4_report_fw_error(struct adapter *adap)
u32 pcie_fw;
pcie_fw = t4_read_reg(adap, PCIE_FW_A);
- if (pcie_fw & PCIE_FW_ERR_F)
+ if (pcie_fw & PCIE_FW_ERR_F) {
dev_err(adap->pdev_dev, "Firmware reports adapter error: %s\n",
reason[PCIE_FW_EVAL_G(pcie_fw)]);
+ adap->flags &= ~FW_OK;
+ }
}
/*
@@ -317,9 +319,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
* wait [for a while] till we're at the front [or bail out with an
* EBUSY] ...
*/
- spin_lock(&adap->mbox_lock);
+ spin_lock_bh(&adap->mbox_lock);
list_add_tail(&entry.list, &adap->mlist.list);
- spin_unlock(&adap->mbox_lock);
+ spin_unlock_bh(&adap->mbox_lock);
delay_idx = 0;
ms = delay[0];
@@ -332,9 +334,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
*/
pcie_fw = t4_read_reg(adap, PCIE_FW_A);
if (i > FW_CMD_MAX_TIMEOUT || (pcie_fw & PCIE_FW_ERR_F)) {
- spin_lock(&adap->mbox_lock);
+ spin_lock_bh(&adap->mbox_lock);
list_del(&entry.list);
- spin_unlock(&adap->mbox_lock);
+ spin_unlock_bh(&adap->mbox_lock);
ret = (pcie_fw & PCIE_FW_ERR_F) ? -ENXIO : -EBUSY;
t4_record_mbox(adap, cmd, size, access, ret);
return ret;
@@ -365,9 +367,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
for (i = 0; v == MBOX_OWNER_NONE && i < 3; i++)
v = MBOWNER_G(t4_read_reg(adap, ctl_reg));
if (v != MBOX_OWNER_DRV) {
- spin_lock(&adap->mbox_lock);
+ spin_lock_bh(&adap->mbox_lock);
list_del(&entry.list);
- spin_unlock(&adap->mbox_lock);
+ spin_unlock_bh(&adap->mbox_lock);
ret = (v == MBOX_OWNER_FW) ? -EBUSY : -ETIMEDOUT;
t4_record_mbox(adap, cmd, size, access, ret);
return ret;
@@ -418,9 +420,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
execute = i + ms;
t4_record_mbox(adap, cmd_rpl,
MBOX_LEN, access, execute);
- spin_lock(&adap->mbox_lock);
+ spin_lock_bh(&adap->mbox_lock);
list_del(&entry.list);
- spin_unlock(&adap->mbox_lock);
+ spin_unlock_bh(&adap->mbox_lock);
return -FW_CMD_RETVAL_G((int)res);
}
}
@@ -430,9 +432,9 @@ int t4_wr_mbox_meat_timeout(struct adapter *adap, int mbox, const void *cmd,
dev_err(adap->pdev_dev, "command %#x in mailbox %d timed out\n",
*(const u8 *)cmd, mbox);
t4_report_fw_error(adap);
- spin_lock(&adap->mbox_lock);
+ spin_lock_bh(&adap->mbox_lock);
list_del(&entry.list);
- spin_unlock(&adap->mbox_lock);
+ spin_unlock_bh(&adap->mbox_lock);
t4_fatal_err(adap);
return ret;
}
@@ -5088,7 +5090,7 @@ int t4_read_rss(struct adapter *adapter, u16 *map)
static unsigned int t4_use_ldst(struct adapter *adap)
{
- return (adap->flags & FW_OK) || !adap->use_bd;
+ return (adap->flags & FW_OK) && !adap->use_bd;
}
/**
@@ -9899,3 +9901,35 @@ int t4_i2c_rd(struct adapter *adap, unsigned int mbox, int port,
return ret;
}
+
+/**
+ * t4_set_vlan_acl - Set a VLAN id for the specified VF
+ * @adapter: the adapter
+ * @mbox: mailbox to use for the FW command
+ * @vf: one of the VFs instantiated by the specified PF
+ * @vlan: The vlanid to be set
+ */
+int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
+ u16 vlan)
+{
+ struct fw_acl_vlan_cmd vlan_cmd;
+ unsigned int enable;
+
+ enable = (vlan ? FW_ACL_VLAN_CMD_EN_F : 0);
+ memset(&vlan_cmd, 0, sizeof(vlan_cmd));
+ vlan_cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_VLAN_CMD) |
+ FW_CMD_REQUEST_F |
+ FW_CMD_WRITE_F |
+ FW_CMD_EXEC_F |
+ FW_ACL_VLAN_CMD_PFN_V(adap->pf) |
+ FW_ACL_VLAN_CMD_VFN_V(vf));
+ vlan_cmd.en_to_len16 = cpu_to_be32(enable | FW_LEN16(vlan_cmd));
+ /* Drop all packets that donot match vlan id */
+ vlan_cmd.dropnovlan_fm = FW_ACL_VLAN_CMD_FM_F;
+ if (enable != 0) {
+ vlan_cmd.nvlan = 1;
+ vlan_cmd.vlanid[0] = cpu_to_be16(vlan);
+ }
+
+ return t4_wr_mbox(adap, adap->mbox, &vlan_cmd, sizeof(vlan_cmd), NULL);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index d9c06d6dc7b2..a6df73398d17 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -2522,6 +2522,17 @@
#define VXLAN_V(x) ((x) << VXLAN_S)
#define VXLAN_G(x) (((x) >> VXLAN_S) & VXLAN_M)
+#define MPS_RX_GENEVE_TYPE_A 0x11238
+
+#define GENEVE_EN_S 16
+#define GENEVE_EN_V(x) ((x) << GENEVE_EN_S)
+#define GENEVE_EN_F GENEVE_EN_V(1U)
+
+#define GENEVE_S 0
+#define GENEVE_M 0xffffU
+#define GENEVE_V(x) ((x) << GENEVE_S)
+#define GENEVE_G(x) (((x) >> GENEVE_S) & GENEVE_M)
+
#define MPS_CLS_TCAM_Y_L_A 0xf000
#define MPS_CLS_TCAM_DATA0_A 0xf000
#define MPS_CLS_TCAM_DATA1_A 0xf004
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index f3310d5b3c4c..f88766d2401d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -2353,14 +2353,22 @@ struct fw_acl_vlan_cmd {
#define FW_ACL_VLAN_CMD_VFN_S 0
#define FW_ACL_VLAN_CMD_VFN_V(x) ((x) << FW_ACL_VLAN_CMD_VFN_S)
-#define FW_ACL_VLAN_CMD_EN_S 31
-#define FW_ACL_VLAN_CMD_EN_V(x) ((x) << FW_ACL_VLAN_CMD_EN_S)
+#define FW_ACL_VLAN_CMD_EN_S 31
+#define FW_ACL_VLAN_CMD_EN_M 0x1
+#define FW_ACL_VLAN_CMD_EN_V(x) ((x) << FW_ACL_VLAN_CMD_EN_S)
+#define FW_ACL_VLAN_CMD_EN_G(x) \
+ (((x) >> S_FW_ACL_VLAN_CMD_EN_S) & FW_ACL_VLAN_CMD_EN_M)
+#define FW_ACL_VLAN_CMD_EN_F FW_ACL_VLAN_CMD_EN_V(1U)
#define FW_ACL_VLAN_CMD_DROPNOVLAN_S 7
#define FW_ACL_VLAN_CMD_DROPNOVLAN_V(x) ((x) << FW_ACL_VLAN_CMD_DROPNOVLAN_S)
-#define FW_ACL_VLAN_CMD_FM_S 6
-#define FW_ACL_VLAN_CMD_FM_V(x) ((x) << FW_ACL_VLAN_CMD_FM_S)
+#define FW_ACL_VLAN_CMD_FM_S 6
+#define FW_ACL_VLAN_CMD_FM_M 0x1
+#define FW_ACL_VLAN_CMD_FM_V(x) ((x) << FW_ACL_VLAN_CMD_FM_S)
+#define FW_ACL_VLAN_CMD_FM_G(x) \
+ (((x) >> FW_ACL_VLAN_CMD_FM_S) & FW_ACL_VLAN_CMD_FM_M)
+#define FW_ACL_VLAN_CMD_FM_F FW_ACL_VLAN_CMD_FM_V(1U)
/* old 16-bit port capabilities bitmap (fw_port_cap16_t) */
enum fw_port_cap {
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 08c6ddb84a04..5883f09e3804 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -92,6 +92,7 @@ struct sge_rspq;
*/
struct port_info {
struct adapter *adapter; /* our adapter */
+ u32 vlan_id; /* vlan id for VST */
u16 viid; /* virtual interface ID */
s16 xact_addr_filt; /* index of our MAC address filter */
u16 rss_size; /* size of VI's RSS table slice */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 96f69f80dc99..b7e79e64d2ed 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -791,6 +791,8 @@ static int cxgb4vf_open(struct net_device *dev)
if (err)
goto err_unwind;
+ pi->vlan_id = t4vf_get_vf_vlan_acl(adapter);
+
netif_tx_start_all_queues(dev);
set_bit(pi->port_id, &adapter->open_device_map);
return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 129b914a434c..dfce5df7538e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -1202,6 +1202,10 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
BUG_ON(qidx >= pi->nqsets);
txq = &adapter->sge.ethtxq[pi->first_qset + qidx];
+ if (pi->vlan_id && !skb_vlan_tag_present(skb))
+ __vlan_hwaccel_put_tag(skb, cpu_to_be16(ETH_P_8021Q),
+ pi->vlan_id);
+
/*
* Take this opportunity to reclaim any TX Descriptors whose DMA
* transfers have completed.
@@ -1570,6 +1574,7 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
{
struct adapter *adapter = rxq->rspq.adapter;
struct sge *s = &adapter->sge;
+ struct port_info *pi;
int ret;
struct sk_buff *skb;
@@ -1586,8 +1591,9 @@ static void do_gro(struct sge_eth_rxq *rxq, const struct pkt_gl *gl,
skb->truesize += skb->data_len;
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb_record_rx_queue(skb, rxq->rspq.idx);
+ pi = netdev_priv(skb->dev);
- if (pkt->vlan_ex) {
+ if (pkt->vlan_ex && !pi->vlan_id) {
__vlan_hwaccel_put_tag(skb, cpu_to_be16(ETH_P_8021Q),
be16_to_cpu(pkt->vlan));
rxq->stats.vlan_ex++;
@@ -1620,6 +1626,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp,
struct sge_eth_rxq *rxq = container_of(rspq, struct sge_eth_rxq, rspq);
struct adapter *adapter = rspq->adapter;
struct sge *s = &adapter->sge;
+ struct port_info *pi;
/*
* If this is a good TCP packet and we have Generic Receive Offload
@@ -1644,6 +1651,7 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp,
__skb_pull(skb, s->pktshift);
skb->protocol = eth_type_trans(skb, rspq->netdev);
skb_record_rx_queue(skb, rspq->idx);
+ pi = netdev_priv(skb->dev);
rxq->stats.pkts++;
if (csum_ok && !pkt->err_vec &&
@@ -1660,9 +1668,10 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp,
} else
skb_checksum_none_assert(skb);
- if (pkt->vlan_ex) {
+ if (pkt->vlan_ex && !pi->vlan_id) {
rxq->stats.vlan_ex++;
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(pkt->vlan));
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ be16_to_cpu(pkt->vlan));
}
netif_receive_skb(skb);
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index 9cf9c56b0f73..712e8f0c71b4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -413,5 +413,6 @@ int t4vf_handle_fw_rpl(struct adapter *, const __be64 *);
int t4vf_prep_adapter(struct adapter *);
int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
unsigned int *naddr, u8 *addr);
+int t4vf_get_vf_vlan_acl(struct adapter *adapter);
#endif /* __T4VF_COMMON_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 67aec59a14e6..798695bf8678 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -2147,3 +2147,31 @@ int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
return ret;
}
+
+/**
+ * t4vf_get_vf_vlan_acl - Get the VLAN ID to be set to
+ * the VI of this VF.
+ * @adapter: The adapter
+ *
+ * Find the VLAN ID to be set to the VF's VI. The requested VLAN ID
+ * is from the host OS via callback in the PF driver.
+ */
+int t4vf_get_vf_vlan_acl(struct adapter *adapter)
+{
+ struct fw_acl_vlan_cmd cmd;
+ int vlan = 0;
+ int ret = 0;
+
+ cmd.op_to_vfn = htonl(FW_CMD_OP_V(FW_ACL_VLAN_CMD) |
+ FW_CMD_REQUEST_F | FW_CMD_READ_F);
+
+ /* Note: Do not enable the ACL */
+ cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+
+ ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &cmd);
+
+ if (!ret)
+ vlan = be16_to_cpu(cmd.vlanid[0]);
+
+ return vlan;
+}
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index c6e859a27ee6..c36c81959198 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4634,6 +4634,14 @@ int be_update_queues(struct be_adapter *adapter)
be_schedule_worker(adapter);
+ /* The IF was destroyed and re-created. We need to clear
+ * all promiscuous flags valid for the destroyed IF.
+ * Without this promisc mode is not restored during
+ * be_open() because the driver thinks that it is
+ * already enabled in HW.
+ */
+ adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
+
if (netif_running(netdev))
status = be_open(netdev);
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 5385074b3b7d..e7381f8ef89d 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -20,7 +20,8 @@
#include <linux/timecounter.h>
#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
- defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
+ defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \
+ defined(CONFIG_ARM64)
/*
* Just figures, Motorola would have to change the offsets for
* registers in the same peripheral device on different models
@@ -195,7 +196,7 @@
* Evidently, ARM SoCs have the FEC block generated in a
* little endian mode so adjust endianness accordingly.
*/
-#if defined(CONFIG_ARM)
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
#define fec32_to_cpu le32_to_cpu
#define fec16_to_cpu le16_to_cpu
#define cpu_to_fec32 cpu_to_le32
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 90aa69a08922..7a7f3a42b2aa 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -195,7 +195,8 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
* account when setting it.
*/
#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
- defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
+ defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \
+ defined(CONFIG_ARM64)
#define OPT_FRAME_SIZE (PKT_MAXBUF_SIZE << 16)
#else
#define OPT_FRAME_SIZE 0
@@ -2109,7 +2110,8 @@ static int fec_enet_get_regs_len(struct net_device *ndev)
/* List of registers that can be safety be read to dump them with ethtool */
#if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
- defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM)
+ defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \
+ defined(CONFIG_ARM64)
static u32 fec_enet_register_offset[] = {
FEC_IEVENT, FEC_IMASK, FEC_R_DES_ACTIVE_0, FEC_X_DES_ACTIVE_0,
FEC_ECNTRL, FEC_MII_DATA, FEC_MII_SPEED, FEC_MIB_CTRLSTAT, FEC_R_CNTRL,
@@ -3139,7 +3141,7 @@ static int fec_enet_init(struct net_device *ndev)
unsigned dsize_log2 = __fls(dsize);
WARN_ON(dsize != (1 << dsize_log2));
-#if defined(CONFIG_ARM)
+#if defined(CONFIG_ARM) || defined(CONFIG_ARM64)
fep->rx_align = 0xf;
fep->tx_align = 0xf;
#else
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index ca247c2cc238..acf29633ec79 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -44,12 +44,17 @@ static void dsaf_write_sub(struct dsaf_device *dsaf_dev, u32 reg, u32 val)
static u32 dsaf_read_sub(struct dsaf_device *dsaf_dev, u32 reg)
{
- u32 ret;
-
- if (dsaf_dev->sub_ctrl)
- ret = dsaf_read_syscon(dsaf_dev->sub_ctrl, reg);
- else
+ u32 ret = 0;
+ int err;
+
+ if (dsaf_dev->sub_ctrl) {
+ err = dsaf_read_syscon(dsaf_dev->sub_ctrl, reg, &ret);
+ if (err)
+ dev_err(dsaf_dev->dev, "dsaf_read_syscon error %d!\n",
+ err);
+ } else {
ret = dsaf_read_reg(dsaf_dev->sc_base, reg);
+ }
return ret;
}
@@ -188,18 +193,23 @@ static void cpld_led_reset_acpi(struct hns_mac_cb *mac_cb)
static int cpld_set_led_id(struct hns_mac_cb *mac_cb,
enum hnae_led_state status)
{
+ u32 val = 0;
+ int ret;
+
if (!mac_cb->cpld_ctrl)
return 0;
switch (status) {
case HNAE_LED_ACTIVE:
- mac_cb->cpld_led_value =
- dsaf_read_syscon(mac_cb->cpld_ctrl,
- mac_cb->cpld_ctrl_reg);
- dsaf_set_bit(mac_cb->cpld_led_value, DSAF_LED_ANCHOR_B,
- CPLD_LED_ON_VALUE);
+ ret = dsaf_read_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg,
+ &val);
+ if (ret)
+ return ret;
+
+ dsaf_set_bit(val, DSAF_LED_ANCHOR_B, CPLD_LED_ON_VALUE);
dsaf_write_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg,
- mac_cb->cpld_led_value);
+ val);
+ mac_cb->cpld_led_value = val;
break;
case HNAE_LED_INACTIVE:
dsaf_set_bit(mac_cb->cpld_led_value, DSAF_LED_ANCHOR_B,
@@ -560,12 +570,19 @@ static phy_interface_t hns_mac_get_phy_if_acpi(struct hns_mac_cb *mac_cb)
int hns_mac_get_sfp_prsnt(struct hns_mac_cb *mac_cb, int *sfp_prsnt)
{
+ u32 val = 0;
+ int ret;
+
if (!mac_cb->cpld_ctrl)
return -ENODEV;
- *sfp_prsnt = !dsaf_read_syscon(mac_cb->cpld_ctrl, mac_cb->cpld_ctrl_reg
- + MAC_SFP_PORT_OFFSET);
+ ret = dsaf_read_syscon(mac_cb->cpld_ctrl,
+ mac_cb->cpld_ctrl_reg + MAC_SFP_PORT_OFFSET,
+ &val);
+ if (ret)
+ return ret;
+ *sfp_prsnt = !val;
return 0;
}
@@ -615,7 +632,7 @@ static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en)
#define RX_CSR(lane, reg) ((0x4080 + (reg) * 0x0002 + (lane) * 0x0200) * 2)
u64 reg_offset = RX_CSR(lane_id[mac_cb->mac_id], 0);
- int sfp_prsnt;
+ int sfp_prsnt = 0;
int ret = hns_mac_get_sfp_prsnt(mac_cb, &sfp_prsnt);
if (!mac_cb->phy_dev) {
@@ -627,7 +644,7 @@ static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en)
}
if (mac_cb->serdes_ctrl) {
- u32 origin;
+ u32 origin = 0;
if (!AE_IS_VER1(mac_cb->dsaf_dev->dsaf_ver)) {
#define HILINK_ACCESS_SEL_CFG 0x40008
@@ -644,7 +661,10 @@ static int hns_mac_config_sds_loopback(struct hns_mac_cb *mac_cb, bool en)
HILINK_ACCESS_SEL_CFG, 3);
}
- origin = dsaf_read_syscon(mac_cb->serdes_ctrl, reg_offset);
+ ret = dsaf_read_syscon(mac_cb->serdes_ctrl, reg_offset,
+ &origin);
+ if (ret)
+ return ret;
dsaf_set_field(origin, 1ull << 10, 10, en);
dsaf_write_syscon(mac_cb->serdes_ctrl, reg_offset, origin);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index 46a52d9bb196..886cbbf25761 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -1034,12 +1034,9 @@ static inline void dsaf_write_syscon(struct regmap *base, u32 reg, u32 value)
regmap_write(base, reg, value);
}
-static inline u32 dsaf_read_syscon(struct regmap *base, u32 reg)
+static inline int dsaf_read_syscon(struct regmap *base, u32 reg, u32 *val)
{
- unsigned int val;
-
- regmap_read(base, reg, &val);
- return val;
+ return regmap_read(base, reg, val);
}
#define dsaf_read_dev(a, reg) \
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 634e9327968b..fd06bc78c58e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -356,7 +356,8 @@ struct hnae3_ae_ops {
u32 stringset, u8 *data);
int (*get_sset_count)(struct hnae3_handle *handle, int stringset);
- void (*get_regs)(struct hnae3_handle *handle, void *data);
+ void (*get_regs)(struct hnae3_handle *handle, u32 *version,
+ void *data);
int (*get_regs_len)(struct hnae3_handle *handle);
u32 (*get_rss_key_size)(struct hnae3_handle *handle);
@@ -404,6 +405,8 @@ struct hnae3_ae_ops {
int (*set_channels)(struct hnae3_handle *handle, u32 new_tqps_num);
void (*get_flowctrl_adv)(struct hnae3_handle *handle,
u32 *flowctrl_adv);
+ int (*set_led_id)(struct hnae3_handle *handle,
+ enum ethtool_phys_id_state status);
};
struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index ac848163ccae..601b6295d3f8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -3318,8 +3318,8 @@ static int hns3_reset_notify(struct hnae3_handle *handle,
switch (type) {
case HNAE3_UP_CLIENT:
- ret = hns3_reset_notify_up_enet(handle);
- break;
+ ret = hns3_reset_notify_up_enet(handle);
+ break;
case HNAE3_DOWN_CLIENT:
ret = hns3_reset_notify_down_enet(handle);
break;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 358f78036941..b034c7f24eda 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -1063,6 +1063,38 @@ static int hns3_set_coalesce(struct net_device *netdev,
return 0;
}
+static int hns3_get_regs_len(struct net_device *netdev)
+{
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+
+ if (!h->ae_algo->ops->get_regs_len)
+ return -EOPNOTSUPP;
+
+ return h->ae_algo->ops->get_regs_len(h);
+}
+
+static void hns3_get_regs(struct net_device *netdev,
+ struct ethtool_regs *cmd, void *data)
+{
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+
+ if (!h->ae_algo->ops->get_regs)
+ return;
+
+ h->ae_algo->ops->get_regs(h, &cmd->version, data);
+}
+
+static int hns3_set_phys_id(struct net_device *netdev,
+ enum ethtool_phys_id_state state)
+{
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+
+ if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_led_id)
+ return -EOPNOTSUPP;
+
+ return h->ae_algo->ops->set_led_id(h, state);
+}
+
static const struct ethtool_ops hns3vf_ethtool_ops = {
.get_drvinfo = hns3_get_drvinfo,
.get_ringparam = hns3_get_ringparam,
@@ -1077,6 +1109,8 @@ static const struct ethtool_ops hns3vf_ethtool_ops = {
.set_rxfh = hns3_set_rss,
.get_link_ksettings = hns3_get_link_ksettings,
.get_channels = hns3_get_channels,
+ .get_coalesce = hns3_get_coalesce,
+ .set_coalesce = hns3_set_coalesce,
};
static const struct ethtool_ops hns3_ethtool_ops = {
@@ -1103,6 +1137,9 @@ static const struct ethtool_ops hns3_ethtool_ops = {
.set_channels = hns3_set_channels,
.get_coalesce = hns3_get_coalesce,
.set_coalesce = hns3_set_coalesce,
+ .get_regs_len = hns3_get_regs_len,
+ .get_regs = hns3_get_regs,
+ .set_phys_id = hns3_set_phys_id,
};
void hns3_ethtool_set_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 3c3159b2d3bf..3fd10a6bec53 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -102,6 +102,10 @@ enum hclge_opcode_type {
HCLGE_OPC_STATS_64_BIT = 0x0030,
HCLGE_OPC_STATS_32_BIT = 0x0031,
HCLGE_OPC_STATS_MAC = 0x0032,
+
+ HCLGE_OPC_QUERY_REG_NUM = 0x0040,
+ HCLGE_OPC_QUERY_32_BIT_REG = 0x0041,
+ HCLGE_OPC_QUERY_64_BIT_REG = 0x0042,
/* Device management command */
/* MAC commond */
@@ -111,6 +115,7 @@ enum hclge_opcode_type {
HCLGE_OPC_QUERY_LINK_STATUS = 0x0307,
HCLGE_OPC_CONFIG_MAX_FRM_SIZE = 0x0308,
HCLGE_OPC_CONFIG_SPEED_DUP = 0x0309,
+ HCLGE_OPC_STATS_MAC_TRAFFIC = 0x0314,
/* MACSEC command */
/* PFC/Pause CMD*/
@@ -223,6 +228,9 @@ enum hclge_opcode_type {
/* Mailbox cmd */
HCLGEVF_OPC_MBX_PF_TO_VF = 0x2000,
+
+ /* Led command */
+ HCLGE_OPC_LED_STATUS_CFG = 0xB000,
};
#define HCLGE_TQP_REG_OFFSET 0x80000
@@ -601,6 +609,28 @@ struct hclge_mac_vlan_mask_entry_cmd {
u8 rsv2[14];
};
+#define HCLGE_MAC_MGR_MASK_VLAN_B BIT(0)
+#define HCLGE_MAC_MGR_MASK_MAC_B BIT(1)
+#define HCLGE_MAC_MGR_MASK_ETHERTYPE_B BIT(2)
+#define HCLGE_MAC_ETHERTYPE_LLDP 0x88cc
+
+struct hclge_mac_mgr_tbl_entry_cmd {
+ u8 flags;
+ u8 resp_code;
+ __le16 vlan_tag;
+ __le32 mac_addr_hi32;
+ __le16 mac_addr_lo16;
+ __le16 rsv1;
+ __le16 ethter_type;
+ __le16 egress_port;
+ __le16 egress_queue;
+ u8 sw_port_id_aware;
+ u8 rsv2;
+ u8 i_port_bitmap;
+ u8 i_port_direction;
+ u8 rsv3[2];
+};
+
#define HCLGE_CFG_MTA_MAC_SEL_S 0x0
#define HCLGE_CFG_MTA_MAC_SEL_M GENMASK(1, 0)
#define HCLGE_CFG_MTA_MAC_EN_B 0x7
@@ -781,6 +811,23 @@ struct hclge_reset_cmd {
#define HCLGE_NIC_CMQ_DESC_NUM 1024
#define HCLGE_NIC_CMQ_DESC_NUM_S 3
+#define HCLGE_LED_PORT_SPEED_STATE_S 0
+#define HCLGE_LED_PORT_SPEED_STATE_M GENMASK(5, 0)
+#define HCLGE_LED_ACTIVITY_STATE_S 0
+#define HCLGE_LED_ACTIVITY_STATE_M GENMASK(1, 0)
+#define HCLGE_LED_LINK_STATE_S 0
+#define HCLGE_LED_LINK_STATE_M GENMASK(1, 0)
+#define HCLGE_LED_LOCATE_STATE_S 0
+#define HCLGE_LED_LOCATE_STATE_M GENMASK(1, 0)
+
+struct hclge_set_led_state_cmd {
+ u8 port_speed_led_config;
+ u8 link_led_config;
+ u8 activity_led_config;
+ u8 locate_led_config;
+ u8 rsv[20];
+};
+
int hclge_cmd_init(struct hclge_dev *hdev);
static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
{
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 27f0ab695f5a..32bc6f68e297 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -39,6 +39,7 @@ static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu);
static int hclge_init_vlan_config(struct hclge_dev *hdev);
static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
+static int hclge_update_led_status(struct hclge_dev *hdev);
static struct hnae3_ae_algo ae_algo;
@@ -392,6 +393,16 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = {
HCLGE_MAC_STATS_FIELD_OFF(mac_rx_send_app_bad_pkt_num)}
};
+static const struct hclge_mac_mgr_tbl_entry_cmd hclge_mgr_table[] = {
+ {
+ .flags = HCLGE_MAC_MGR_MASK_VLAN_B,
+ .ethter_type = cpu_to_le16(HCLGE_MAC_ETHERTYPE_LLDP),
+ .mac_addr_hi32 = cpu_to_le32(htonl(0x0180C200)),
+ .mac_addr_lo16 = cpu_to_le16(htons(0x000E)),
+ .i_port_bitmap = 0x1,
+ },
+};
+
static int hclge_64_bit_update_stats(struct hclge_dev *hdev)
{
#define HCLGE_64_BIT_CMD_NUM 5
@@ -495,6 +506,38 @@ static int hclge_32_bit_update_stats(struct hclge_dev *hdev)
return 0;
}
+static int hclge_mac_get_traffic_stats(struct hclge_dev *hdev)
+{
+ struct hclge_mac_stats *mac_stats = &hdev->hw_stats.mac_stats;
+ struct hclge_desc desc;
+ __le64 *desc_data;
+ int ret;
+
+ /* for fiber port, need to query the total rx/tx packets statstics,
+ * used for data transferring checking.
+ */
+ if (hdev->hw.mac.media_type != HNAE3_MEDIA_TYPE_FIBER)
+ return 0;
+
+ if (test_bit(HCLGE_STATE_STATISTICS_UPDATING, &hdev->state))
+ return 0;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_STATS_MAC_TRAFFIC, true);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "Get MAC total pkt stats fail, ret = %d\n", ret);
+
+ return ret;
+ }
+
+ desc_data = (__le64 *)(&desc.data[0]);
+ mac_stats->mac_tx_total_pkt_num += le64_to_cpu(*desc_data++);
+ mac_stats->mac_rx_total_pkt_num += le64_to_cpu(*desc_data);
+
+ return 0;
+}
+
static int hclge_mac_update_stats(struct hclge_dev *hdev)
{
#define HCLGE_MAC_CMD_NUM 21
@@ -2836,13 +2879,20 @@ static void hclge_service_task(struct work_struct *work)
struct hclge_dev *hdev =
container_of(work, struct hclge_dev, service_task);
+ /* The total rx/tx packets statstics are wanted to be updated
+ * per second. Both hclge_update_stats_for_all() and
+ * hclge_mac_get_traffic_stats() can do it.
+ */
if (hdev->hw_stats.stats_timer >= HCLGE_STATS_TIMER_INTERVAL) {
hclge_update_stats_for_all(hdev);
hdev->hw_stats.stats_timer = 0;
+ } else {
+ hclge_mac_get_traffic_stats(hdev);
}
hclge_update_speed_duplex(hdev);
hclge_update_link_status(hdev);
+ hclge_update_led_status(hdev);
hclge_service_complete(hdev);
}
@@ -4249,6 +4299,91 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport,
return status;
}
+static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev,
+ u16 cmdq_resp, u8 resp_code)
+{
+#define HCLGE_ETHERTYPE_SUCCESS_ADD 0
+#define HCLGE_ETHERTYPE_ALREADY_ADD 1
+#define HCLGE_ETHERTYPE_MGR_TBL_OVERFLOW 2
+#define HCLGE_ETHERTYPE_KEY_CONFLICT 3
+
+ int return_status;
+
+ if (cmdq_resp) {
+ dev_err(&hdev->pdev->dev,
+ "cmdq execute failed for get_mac_ethertype_cmd_status, status=%d.\n",
+ cmdq_resp);
+ return -EIO;
+ }
+
+ switch (resp_code) {
+ case HCLGE_ETHERTYPE_SUCCESS_ADD:
+ case HCLGE_ETHERTYPE_ALREADY_ADD:
+ return_status = 0;
+ break;
+ case HCLGE_ETHERTYPE_MGR_TBL_OVERFLOW:
+ dev_err(&hdev->pdev->dev,
+ "add mac ethertype failed for manager table overflow.\n");
+ return_status = -EIO;
+ break;
+ case HCLGE_ETHERTYPE_KEY_CONFLICT:
+ dev_err(&hdev->pdev->dev,
+ "add mac ethertype failed for key conflict.\n");
+ return_status = -EIO;
+ break;
+ default:
+ dev_err(&hdev->pdev->dev,
+ "add mac ethertype failed for undefined, code=%d.\n",
+ resp_code);
+ return_status = -EIO;
+ }
+
+ return return_status;
+}
+
+static int hclge_add_mgr_tbl(struct hclge_dev *hdev,
+ const struct hclge_mac_mgr_tbl_entry_cmd *req)
+{
+ struct hclge_desc desc;
+ u8 resp_code;
+ u16 retval;
+ int ret;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_ETHTYPE_ADD, false);
+ memcpy(desc.data, req, sizeof(struct hclge_mac_mgr_tbl_entry_cmd));
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "add mac ethertype failed for cmd_send, ret =%d.\n",
+ ret);
+ return ret;
+ }
+
+ resp_code = (le32_to_cpu(desc.data[0]) >> 8) & 0xff;
+ retval = le16_to_cpu(desc.retval);
+
+ return hclge_get_mac_ethertype_cmd_status(hdev, retval, resp_code);
+}
+
+static int init_mgr_tbl(struct hclge_dev *hdev)
+{
+ int ret;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(hclge_mgr_table); i++) {
+ ret = hclge_add_mgr_tbl(hdev, &hclge_mgr_table[i]);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "add mac ethertype failed, ret =%d.\n",
+ ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
static void hclge_get_mac_addr(struct hnae3_handle *handle, u8 *p)
{
struct hclge_vport *vport = hclge_get_vport(handle);
@@ -5271,6 +5406,12 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
return ret;
}
+ ret = init_mgr_tbl(hdev);
+ if (ret) {
+ dev_err(&pdev->dev, "manager table init fail, ret =%d\n", ret);
+ return ret;
+ }
+
hclge_dcb_ops_set(hdev);
timer_setup(&hdev->service_timer, hclge_service_timer, 0);
@@ -5544,6 +5685,318 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num)
return ret;
}
+static int hclge_get_regs_num(struct hclge_dev *hdev, u32 *regs_num_32_bit,
+ u32 *regs_num_64_bit)
+{
+ struct hclge_desc desc;
+ u32 total_num;
+ int ret;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_REG_NUM, true);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "Query register number cmd failed, ret = %d.\n", ret);
+ return ret;
+ }
+
+ *regs_num_32_bit = le32_to_cpu(desc.data[0]);
+ *regs_num_64_bit = le32_to_cpu(desc.data[1]);
+
+ total_num = *regs_num_32_bit + *regs_num_64_bit;
+ if (!total_num)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int hclge_get_32_bit_regs(struct hclge_dev *hdev, u32 regs_num,
+ void *data)
+{
+#define HCLGE_32_BIT_REG_RTN_DATANUM 8
+
+ struct hclge_desc *desc;
+ u32 *reg_val = data;
+ __le32 *desc_data;
+ int cmd_num;
+ int i, k, n;
+ int ret;
+
+ if (regs_num == 0)
+ return 0;
+
+ cmd_num = DIV_ROUND_UP(regs_num + 2, HCLGE_32_BIT_REG_RTN_DATANUM);
+ desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_32_BIT_REG, true);
+ ret = hclge_cmd_send(&hdev->hw, desc, cmd_num);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "Query 32 bit register cmd failed, ret = %d.\n", ret);
+ kfree(desc);
+ return ret;
+ }
+
+ for (i = 0; i < cmd_num; i++) {
+ if (i == 0) {
+ desc_data = (__le32 *)(&desc[i].data[0]);
+ n = HCLGE_32_BIT_REG_RTN_DATANUM - 2;
+ } else {
+ desc_data = (__le32 *)(&desc[i]);
+ n = HCLGE_32_BIT_REG_RTN_DATANUM;
+ }
+ for (k = 0; k < n; k++) {
+ *reg_val++ = le32_to_cpu(*desc_data++);
+
+ regs_num--;
+ if (!regs_num)
+ break;
+ }
+ }
+
+ kfree(desc);
+ return 0;
+}
+
+static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num,
+ void *data)
+{
+#define HCLGE_64_BIT_REG_RTN_DATANUM 4
+
+ struct hclge_desc *desc;
+ u64 *reg_val = data;
+ __le64 *desc_data;
+ int cmd_num;
+ int i, k, n;
+ int ret;
+
+ if (regs_num == 0)
+ return 0;
+
+ cmd_num = DIV_ROUND_UP(regs_num + 1, HCLGE_64_BIT_REG_RTN_DATANUM);
+ desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_64_BIT_REG, true);
+ ret = hclge_cmd_send(&hdev->hw, desc, cmd_num);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "Query 64 bit register cmd failed, ret = %d.\n", ret);
+ kfree(desc);
+ return ret;
+ }
+
+ for (i = 0; i < cmd_num; i++) {
+ if (i == 0) {
+ desc_data = (__le64 *)(&desc[i].data[0]);
+ n = HCLGE_64_BIT_REG_RTN_DATANUM - 1;
+ } else {
+ desc_data = (__le64 *)(&desc[i]);
+ n = HCLGE_64_BIT_REG_RTN_DATANUM;
+ }
+ for (k = 0; k < n; k++) {
+ *reg_val++ = le64_to_cpu(*desc_data++);
+
+ regs_num--;
+ if (!regs_num)
+ break;
+ }
+ }
+
+ kfree(desc);
+ return 0;
+}
+
+static int hclge_get_regs_len(struct hnae3_handle *handle)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ u32 regs_num_32_bit, regs_num_64_bit;
+ int ret;
+
+ ret = hclge_get_regs_num(hdev, &regs_num_32_bit, &regs_num_64_bit);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "Get register number failed, ret = %d.\n", ret);
+ return -EOPNOTSUPP;
+ }
+
+ return regs_num_32_bit * sizeof(u32) + regs_num_64_bit * sizeof(u64);
+}
+
+static void hclge_get_regs(struct hnae3_handle *handle, u32 *version,
+ void *data)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ u32 regs_num_32_bit, regs_num_64_bit;
+ int ret;
+
+ *version = hdev->fw_version;
+
+ ret = hclge_get_regs_num(hdev, &regs_num_32_bit, &regs_num_64_bit);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "Get register number failed, ret = %d.\n", ret);
+ return;
+ }
+
+ ret = hclge_get_32_bit_regs(hdev, regs_num_32_bit, data);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "Get 32 bit register failed, ret = %d.\n", ret);
+ return;
+ }
+
+ data = (u32 *)data + regs_num_32_bit;
+ ret = hclge_get_64_bit_regs(hdev, regs_num_64_bit,
+ data);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "Get 64 bit register failed, ret = %d.\n", ret);
+}
+
+static int hclge_set_led_status_sfp(struct hclge_dev *hdev, u8 speed_led_status,
+ u8 act_led_status, u8 link_led_status,
+ u8 locate_led_status)
+{
+ struct hclge_set_led_state_cmd *req;
+ struct hclge_desc desc;
+ int ret;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_LED_STATUS_CFG, false);
+
+ req = (struct hclge_set_led_state_cmd *)desc.data;
+ hnae_set_field(req->port_speed_led_config, HCLGE_LED_PORT_SPEED_STATE_M,
+ HCLGE_LED_PORT_SPEED_STATE_S, speed_led_status);
+ hnae_set_field(req->link_led_config, HCLGE_LED_ACTIVITY_STATE_M,
+ HCLGE_LED_ACTIVITY_STATE_S, act_led_status);
+ hnae_set_field(req->activity_led_config, HCLGE_LED_LINK_STATE_M,
+ HCLGE_LED_LINK_STATE_S, link_led_status);
+ hnae_set_field(req->locate_led_config, HCLGE_LED_LOCATE_STATE_M,
+ HCLGE_LED_LOCATE_STATE_S, locate_led_status);
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "Send set led state cmd error, ret =%d\n", ret);
+
+ return ret;
+}
+
+enum hclge_led_status {
+ HCLGE_LED_OFF,
+ HCLGE_LED_ON,
+ HCLGE_LED_NO_CHANGE = 0xFF,
+};
+
+static int hclge_set_led_id(struct hnae3_handle *handle,
+ enum ethtool_phys_id_state status)
+{
+#define BLINK_FREQUENCY 2
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ struct phy_device *phydev = hdev->hw.mac.phydev;
+ int ret = 0;
+
+ if (phydev || hdev->hw.mac.media_type != HNAE3_MEDIA_TYPE_FIBER)
+ return -EOPNOTSUPP;
+
+ switch (status) {
+ case ETHTOOL_ID_ACTIVE:
+ ret = hclge_set_led_status_sfp(hdev,
+ HCLGE_LED_NO_CHANGE,
+ HCLGE_LED_NO_CHANGE,
+ HCLGE_LED_NO_CHANGE,
+ HCLGE_LED_ON);
+ break;
+ case ETHTOOL_ID_INACTIVE:
+ ret = hclge_set_led_status_sfp(hdev,
+ HCLGE_LED_NO_CHANGE,
+ HCLGE_LED_NO_CHANGE,
+ HCLGE_LED_NO_CHANGE,
+ HCLGE_LED_OFF);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+enum hclge_led_port_speed {
+ HCLGE_SPEED_LED_FOR_1G,
+ HCLGE_SPEED_LED_FOR_10G,
+ HCLGE_SPEED_LED_FOR_25G,
+ HCLGE_SPEED_LED_FOR_40G,
+ HCLGE_SPEED_LED_FOR_50G,
+ HCLGE_SPEED_LED_FOR_100G,
+};
+
+static u8 hclge_led_get_speed_status(u32 speed)
+{
+ u8 speed_led;
+
+ switch (speed) {
+ case HCLGE_MAC_SPEED_1G:
+ speed_led = HCLGE_SPEED_LED_FOR_1G;
+ break;
+ case HCLGE_MAC_SPEED_10G:
+ speed_led = HCLGE_SPEED_LED_FOR_10G;
+ break;
+ case HCLGE_MAC_SPEED_25G:
+ speed_led = HCLGE_SPEED_LED_FOR_25G;
+ break;
+ case HCLGE_MAC_SPEED_40G:
+ speed_led = HCLGE_SPEED_LED_FOR_40G;
+ break;
+ case HCLGE_MAC_SPEED_50G:
+ speed_led = HCLGE_SPEED_LED_FOR_50G;
+ break;
+ case HCLGE_MAC_SPEED_100G:
+ speed_led = HCLGE_SPEED_LED_FOR_100G;
+ break;
+ default:
+ speed_led = HCLGE_LED_NO_CHANGE;
+ }
+
+ return speed_led;
+}
+
+static int hclge_update_led_status(struct hclge_dev *hdev)
+{
+ u8 port_speed_status, link_status, activity_status;
+ u64 rx_pkts, tx_pkts;
+
+ if (hdev->hw.mac.media_type != HNAE3_MEDIA_TYPE_FIBER)
+ return 0;
+
+ port_speed_status = hclge_led_get_speed_status(hdev->hw.mac.speed);
+
+ rx_pkts = hdev->hw_stats.mac_stats.mac_rx_total_pkt_num;
+ tx_pkts = hdev->hw_stats.mac_stats.mac_tx_total_pkt_num;
+ if (rx_pkts != hdev->rx_pkts_for_led ||
+ tx_pkts != hdev->tx_pkts_for_led)
+ activity_status = HCLGE_LED_ON;
+ else
+ activity_status = HCLGE_LED_OFF;
+ hdev->rx_pkts_for_led = rx_pkts;
+ hdev->tx_pkts_for_led = tx_pkts;
+
+ if (hdev->hw.mac.link)
+ link_status = HCLGE_LED_ON;
+ else
+ link_status = HCLGE_LED_OFF;
+
+ return hclge_set_led_status_sfp(hdev, port_speed_status,
+ activity_status, link_status,
+ HCLGE_LED_NO_CHANGE);
+}
+
static const struct hnae3_ae_ops hclge_ops = {
.init_ae_dev = hclge_init_ae_dev,
.uninit_ae_dev = hclge_uninit_ae_dev,
@@ -5595,6 +6048,9 @@ static const struct hnae3_ae_ops hclge_ops = {
.set_channels = hclge_set_channels,
.get_channels = hclge_get_channels,
.get_flowctrl_adv = hclge_get_flowctrl_adv,
+ .get_regs_len = hclge_get_regs_len,
+ .get_regs = hclge_get_regs,
+ .set_led_id = hclge_set_led_id,
};
static struct hnae3_ae_algo ae_algo = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index eeb6c8d66e4e..d99a76a9557c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -550,6 +550,9 @@ struct hclge_dev {
bool accept_mta_mc; /* Whether accept mta filter multicast */
struct hclge_vlan_type_cfg vlan_type_cfg;
+
+ u64 rx_pkts_for_led;
+ u64 tx_pkts_for_led;
};
/* VPort level vlan tag configuration for TX direction */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 96f453ff84b5..f38fc5ce9f51 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -116,6 +116,9 @@ static int hclge_get_ring_chain_from_mbx(
hnae_set_bit(ring_chain->flag, HNAE3_RING_TYPE_B, req->msg[3]);
ring_chain->tqp_index =
hclge_get_queue_id(vport->nic.kinfo.tqp[req->msg[4]]);
+ hnae_set_field(ring_chain->int_gl_idx, HCLGE_INT_GL_IDX_M,
+ HCLGE_INT_GL_IDX_S,
+ req->msg[5]);
cur_chain = ring_chain;
@@ -133,6 +136,11 @@ static int hclge_get_ring_chain_from_mbx(
[req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 1]]);
+ hnae_set_field(new_chain->int_gl_idx, HCLGE_INT_GL_IDX_M,
+ HCLGE_INT_GL_IDX_S,
+ req->msg[HCLGE_RING_NODE_VARIABLE_NUM * i +
+ HCLGE_RING_MAP_MBX_BASIC_MSG_NUM + 2]);
+
cur_chain->next = new_chain;
cur_chain = new_chain;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 3d2bc9a971fa..0d89965f7928 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -565,6 +565,11 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en,
hnae_get_bit(node->flag, HNAE3_RING_TYPE_B);
req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 1] =
node->tqp_index;
+ req->msg[HCLGEVF_RING_NODE_VARIABLE_NUM * i + 2] =
+ hnae_get_field(node->int_gl_idx,
+ HNAE3_RING_GL_IDX_M,
+ HNAE3_RING_GL_IDX_S);
+
if (i == (HCLGE_MBX_VF_MSG_DATA_NUM -
HCLGEVF_RING_MAP_MBX_BASIC_MSG_NUM) /
HCLGEVF_RING_NODE_VARIABLE_NUM) {
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 71ddad13baf4..354c0982847b 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -494,6 +494,9 @@ static u32 __emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_s
case 16384:
ret |= EMAC_MR1_RFS_16K;
break;
+ case 8192:
+ ret |= EMAC4_MR1_RFS_8K;
+ break;
case 4096:
ret |= EMAC_MR1_RFS_4K;
break;
@@ -516,6 +519,9 @@ static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_
case 16384:
ret |= EMAC4_MR1_TFS_16K;
break;
+ case 8192:
+ ret |= EMAC4_MR1_TFS_8K;
+ break;
case 4096:
ret |= EMAC4_MR1_TFS_4K;
break;
diff --git a/drivers/net/ethernet/ibm/emac/emac.h b/drivers/net/ethernet/ibm/emac/emac.h
index bc14dcf27b6b..e2f80cca9bed 100644
--- a/drivers/net/ethernet/ibm/emac/emac.h
+++ b/drivers/net/ethernet/ibm/emac/emac.h
@@ -138,9 +138,11 @@ struct emac_regs {
#define EMAC4_MR1_RFS_2K 0x00100000
#define EMAC4_MR1_RFS_4K 0x00180000
+#define EMAC4_MR1_RFS_8K 0x00200000
#define EMAC4_MR1_RFS_16K 0x00280000
#define EMAC4_MR1_TFS_2K 0x00020000
#define EMAC4_MR1_TFS_4K 0x00030000
+#define EMAC4_MR1_TFS_8K 0x00040000
#define EMAC4_MR1_TFS_16K 0x00050000
#define EMAC4_MR1_TR 0x00008000
#define EMAC4_MR1_MWSW_001 0x00001000
@@ -229,7 +231,7 @@ struct emac_regs {
#define EMAC_STACR_PHYE 0x00004000
#define EMAC_STACR_STAC_MASK 0x00003000
#define EMAC_STACR_STAC_READ 0x00001000
-#define EMAC_STACR_STAC_WRITE 0x00002000
+#define EMAC_STACR_STAC_WRITE 0x00000800
#define EMAC_STACR_OPBC_MASK 0x00000C00
#define EMAC_STACR_OPBC_50 0x00000000
#define EMAC_STACR_OPBC_66 0x00000400
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index be2ce8dece4a..8f2a77ecf4fb 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -411,6 +411,10 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
struct ibmvnic_rx_pool *rx_pool;
int rx_scrqs;
int i, j, rc;
+ u64 *size_array;
+
+ size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
+ be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
for (i = 0; i < rx_scrqs; i++) {
@@ -418,7 +422,17 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
netdev_dbg(adapter->netdev, "Re-setting rx_pool[%d]\n", i);
- rc = reset_long_term_buff(adapter, &rx_pool->long_term_buff);
+ if (rx_pool->buff_size != be64_to_cpu(size_array[i])) {
+ free_long_term_buff(adapter, &rx_pool->long_term_buff);
+ rx_pool->buff_size = be64_to_cpu(size_array[i]);
+ alloc_long_term_buff(adapter, &rx_pool->long_term_buff,
+ rx_pool->size *
+ rx_pool->buff_size);
+ } else {
+ rc = reset_long_term_buff(adapter,
+ &rx_pool->long_term_buff);
+ }
+
if (rc)
return rc;
@@ -440,14 +454,12 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter)
static void release_rx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_rx_pool *rx_pool;
- int rx_scrqs;
int i, j;
if (!adapter->rx_pool)
return;
- rx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_rxadd_subcrqs);
- for (i = 0; i < rx_scrqs; i++) {
+ for (i = 0; i < adapter->num_active_rx_pools; i++) {
rx_pool = &adapter->rx_pool[i];
netdev_dbg(adapter->netdev, "Releasing rx_pool[%d]\n", i);
@@ -470,6 +482,7 @@ static void release_rx_pools(struct ibmvnic_adapter *adapter)
kfree(adapter->rx_pool);
adapter->rx_pool = NULL;
+ adapter->num_active_rx_pools = 0;
}
static int init_rx_pools(struct net_device *netdev)
@@ -494,6 +507,8 @@ static int init_rx_pools(struct net_device *netdev)
return -1;
}
+ adapter->num_active_rx_pools = 0;
+
for (i = 0; i < rxadd_subcrqs; i++) {
rx_pool = &adapter->rx_pool[i];
@@ -537,6 +552,8 @@ static int init_rx_pools(struct net_device *netdev)
rx_pool->next_free = 0;
}
+ adapter->num_active_rx_pools = rxadd_subcrqs;
+
return 0;
}
@@ -587,13 +604,12 @@ static void release_vpd_data(struct ibmvnic_adapter *adapter)
static void release_tx_pools(struct ibmvnic_adapter *adapter)
{
struct ibmvnic_tx_pool *tx_pool;
- int i, tx_scrqs;
+ int i;
if (!adapter->tx_pool)
return;
- tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs);
- for (i = 0; i < tx_scrqs; i++) {
+ for (i = 0; i < adapter->num_active_tx_pools; i++) {
netdev_dbg(adapter->netdev, "Releasing tx_pool[%d]\n", i);
tx_pool = &adapter->tx_pool[i];
kfree(tx_pool->tx_buff);
@@ -604,6 +620,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
kfree(adapter->tx_pool);
adapter->tx_pool = NULL;
+ adapter->num_active_tx_pools = 0;
}
static int init_tx_pools(struct net_device *netdev)
@@ -620,6 +637,8 @@ static int init_tx_pools(struct net_device *netdev)
if (!adapter->tx_pool)
return -1;
+ adapter->num_active_tx_pools = 0;
+
for (i = 0; i < tx_subcrqs; i++) {
tx_pool = &adapter->tx_pool[i];
@@ -667,6 +686,8 @@ static int init_tx_pools(struct net_device *netdev)
tx_pool->producer_index = 0;
}
+ adapter->num_active_tx_pools = tx_subcrqs;
+
return 0;
}
@@ -861,7 +882,7 @@ static int ibmvnic_get_vpd(struct ibmvnic_adapter *adapter)
if (adapter->vpd->buff)
len = adapter->vpd->len;
- reinit_completion(&adapter->fw_done);
+ init_completion(&adapter->fw_done);
crq.get_vpd_size.first = IBMVNIC_CRQ_CMD;
crq.get_vpd_size.cmd = GET_VPD_SIZE;
ibmvnic_send_crq(adapter, &crq);
@@ -923,6 +944,13 @@ static int init_resources(struct ibmvnic_adapter *adapter)
if (!adapter->vpd)
return -ENOMEM;
+ /* Vital Product Data (VPD) */
+ rc = ibmvnic_get_vpd(adapter);
+ if (rc) {
+ netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n");
+ return rc;
+ }
+
adapter->map_id = 1;
adapter->napi = kcalloc(adapter->req_rx_queues,
sizeof(struct napi_struct), GFP_KERNEL);
@@ -996,7 +1024,7 @@ static int __ibmvnic_open(struct net_device *netdev)
static int ibmvnic_open(struct net_device *netdev)
{
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- int rc, vpd;
+ int rc;
mutex_lock(&adapter->reset_lock);
@@ -1019,11 +1047,6 @@ static int ibmvnic_open(struct net_device *netdev)
rc = __ibmvnic_open(netdev);
netif_carrier_on(netdev);
- /* Vital Product Data (VPD) */
- vpd = ibmvnic_get_vpd(adapter);
- if (vpd)
- netdev_err(netdev, "failed to initialize Vital Product Data (VPD)\n");
-
mutex_unlock(&adapter->reset_lock);
return rc;
@@ -1553,6 +1576,7 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
static int do_reset(struct ibmvnic_adapter *adapter,
struct ibmvnic_rwi *rwi, u32 reset_state)
{
+ u64 old_num_rx_queues, old_num_tx_queues;
struct net_device *netdev = adapter->netdev;
int i, rc;
@@ -1562,6 +1586,9 @@ static int do_reset(struct ibmvnic_adapter *adapter,
netif_carrier_off(netdev);
adapter->reset_reason = rwi->reset_reason;
+ old_num_rx_queues = adapter->req_rx_queues;
+ old_num_tx_queues = adapter->req_tx_queues;
+
if (rwi->reset_reason == VNIC_RESET_MOBILITY) {
rc = ibmvnic_reenable_crq_queue(adapter);
if (rc)
@@ -1606,6 +1633,12 @@ static int do_reset(struct ibmvnic_adapter *adapter,
rc = init_resources(adapter);
if (rc)
return rc;
+ } else if (adapter->req_rx_queues != old_num_rx_queues ||
+ adapter->req_tx_queues != old_num_tx_queues) {
+ release_rx_pools(adapter);
+ release_tx_pools(adapter);
+ init_rx_pools(netdev);
+ init_tx_pools(netdev);
} else {
rc = reset_tx_pools(adapter);
if (rc)
@@ -3603,7 +3636,17 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq,
*req_value,
(long int)be64_to_cpu(crq->request_capability_rsp.
number), name);
- *req_value = be64_to_cpu(crq->request_capability_rsp.number);
+
+ if (be16_to_cpu(crq->request_capability_rsp.capability) ==
+ REQ_MTU) {
+ pr_err("mtu of %llu is not supported. Reverting.\n",
+ *req_value);
+ *req_value = adapter->fallback.mtu;
+ } else {
+ *req_value =
+ be64_to_cpu(crq->request_capability_rsp.number);
+ }
+
ibmvnic_send_req_caps(adapter, 1);
return;
default:
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 2df79fdd800b..fe21a6e2ddae 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -1091,6 +1091,8 @@ struct ibmvnic_adapter {
u64 opt_rxba_entries_per_subcrq;
__be64 tx_rx_desc_req;
u8 map_id;
+ u64 num_active_rx_pools;
+ u64 num_active_tx_pools;
struct tasklet_struct tasklet;
enum vnic_state state;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 9f18d39bdc8f..1298b69f990b 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3303,9 +3303,11 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
if (adapter->flags & FLAG_IS_ICH) {
u32 rxdctl = er32(RXDCTL(0));
- ew32(RXDCTL(0), rxdctl | 0x3);
+ ew32(RXDCTL(0), rxdctl | 0x3 | BIT(8));
}
+ dev_info(&adapter->pdev->dev,
+ "Some CPU C-states have been disabled in order to enable jumbo frames\n");
pm_qos_update_request(&adapter->pm_qos_req, lat);
} else {
pm_qos_update_request(&adapter->pm_qos_req,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index ea3ab24265ee..760cfa52d02c 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -353,7 +353,7 @@ int fm10k_iov_resume(struct pci_dev *pdev)
struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
/* allocate all but the last GLORT to the VFs */
- if (i == ((~hw->mac.dglort_map) >> FM10K_DGLORTMAP_MASK_SHIFT))
+ if (i == (~hw->mac.dglort_map >> FM10K_DGLORTMAP_MASK_SHIFT))
break;
/* assign GLORT to VF, and restrict it to multicast */
@@ -511,7 +511,7 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs)
return err;
/* allocate VFs if not already allocated */
- if (num_vfs && (num_vfs != current_vfs)) {
+ if (num_vfs && num_vfs != current_vfs) {
/* Disable completer abort error reporting as
* the VFs can trigger this any time they read a queue
* that they don't own.
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index adc62fb38c49..a38ae5c54da3 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -934,8 +934,12 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
if (vid >= VLAN_N_VID)
return -EINVAL;
- /* Verify we have permission to add VLANs */
- if (hw->mac.vlan_override)
+ /* Verify that we have permission to add VLANs. If this is a request
+ * to remove a VLAN, we still want to allow the user to remove the
+ * VLAN device. In that case, we need to clear the bit in the
+ * active_vlans bitmask.
+ */
+ if (set && hw->mac.vlan_override)
return -EACCES;
/* update active_vlans bitmask */
@@ -954,6 +958,12 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
rx_ring->vid &= ~FM10K_VLAN_CLEAR;
}
+ /* If our VLAN has been overridden, there is no reason to send VLAN
+ * removal requests as they will be silently ignored.
+ */
+ if (hw->mac.vlan_override)
+ return 0;
+
/* Do not remove default VLAN ID related entries from VLAN and MAC
* tables
*/
@@ -1040,14 +1050,13 @@ static int __fm10k_uc_sync(struct net_device *dev,
const unsigned char *addr, bool sync)
{
struct fm10k_intfc *interface = netdev_priv(dev);
- struct fm10k_hw *hw = &interface->hw;
u16 vid, glort = interface->glort;
s32 err;
if (!is_valid_ether_addr(addr))
return -EADDRNOTAVAIL;
- for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
+ for (vid = fm10k_find_next_vlan(interface, 0);
vid < VLAN_N_VID;
vid = fm10k_find_next_vlan(interface, vid)) {
err = fm10k_queue_mac_request(interface, glort,
@@ -1106,14 +1115,13 @@ static int __fm10k_mc_sync(struct net_device *dev,
const unsigned char *addr, bool sync)
{
struct fm10k_intfc *interface = netdev_priv(dev);
- struct fm10k_hw *hw = &interface->hw;
u16 vid, glort = interface->glort;
s32 err;
if (!is_multicast_ether_addr(addr))
return -EADDRNOTAVAIL;
- for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
+ for (vid = fm10k_find_next_vlan(interface, 0);
vid < VLAN_N_VID;
vid = fm10k_find_next_vlan(interface, vid)) {
err = fm10k_queue_mac_request(interface, glort,
@@ -1157,10 +1165,12 @@ static void fm10k_set_rx_mode(struct net_device *dev)
/* update xcast mode first, but only if it changed */
if (interface->xcast_mode != xcast_mode) {
- /* update VLAN table */
+ /* update VLAN table when entering promiscuous mode */
if (xcast_mode == FM10K_XCAST_MODE_PROMISC)
fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL,
0, true);
+
+ /* clear VLAN table when exiting promiscuous mode */
if (interface->xcast_mode == FM10K_XCAST_MODE_PROMISC)
fm10k_clear_unused_vlans(interface);
@@ -1182,9 +1192,10 @@ static void fm10k_set_rx_mode(struct net_device *dev)
void fm10k_restore_rx_state(struct fm10k_intfc *interface)
{
+ struct fm10k_l2_accel *l2_accel = interface->l2_accel;
struct net_device *netdev = interface->netdev;
struct fm10k_hw *hw = &interface->hw;
- int xcast_mode;
+ int xcast_mode, i;
u16 vid, glort;
/* record glort for this interface */
@@ -1211,11 +1222,8 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, 0,
xcast_mode == FM10K_XCAST_MODE_PROMISC);
- /* Add filter for VLAN 0 */
- fm10k_queue_vlan_request(interface, 0, 0, true);
-
/* update table with current entries */
- for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
+ for (vid = fm10k_find_next_vlan(interface, 0);
vid < VLAN_N_VID;
vid = fm10k_find_next_vlan(interface, vid)) {
fm10k_queue_vlan_request(interface, vid, 0, true);
@@ -1234,6 +1242,24 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
__dev_uc_sync(netdev, fm10k_uc_sync, fm10k_uc_unsync);
__dev_mc_sync(netdev, fm10k_mc_sync, fm10k_mc_unsync);
+ /* synchronize macvlan addresses */
+ if (l2_accel) {
+ for (i = 0; i < l2_accel->size; i++) {
+ struct net_device *sdev = l2_accel->macvlan[i];
+
+ if (!sdev)
+ continue;
+
+ glort = l2_accel->dglort + 1 + i;
+
+ hw->mac.ops.update_xcast_mode(hw, glort,
+ FM10K_XCAST_MODE_MULTI);
+ fm10k_queue_mac_request(interface, glort,
+ sdev->dev_addr,
+ hw->mac.default_vid, true);
+ }
+ }
+
fm10k_mbx_unlock(interface);
/* record updated xcast mode state */
@@ -1490,7 +1516,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
hw->mac.ops.update_xcast_mode(hw, glort,
FM10K_XCAST_MODE_MULTI);
fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
- 0, true);
+ hw->mac.default_vid, true);
}
fm10k_mbx_unlock(interface);
@@ -1530,7 +1556,7 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv)
hw->mac.ops.update_xcast_mode(hw, glort,
FM10K_XCAST_MODE_NONE);
fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
- 0, false);
+ hw->mac.default_vid, false);
}
fm10k_mbx_unlock(interface);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 425d814aed4d..d6406fc31ffb 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -866,7 +866,7 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw,
/* Determine correct default VLAN ID. The FM10K_VLAN_OVERRIDE bit is
* used here to indicate to the VF that it will not have privilege to
* write VLAN_TABLE. All policy is enforced on the PF but this allows
- * the VF to correctly report errors to userspace rqeuests.
+ * the VF to correctly report errors to userspace requests.
*/
if (vf_info->pf_vid)
vf_vid = vf_info->pf_vid | FM10K_VLAN_OVERRIDE;
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index e019baa905c5..46e9f4e0a02c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -508,39 +508,40 @@ struct i40e_pf {
#define I40E_HW_PORT_ID_VALID BIT(17)
#define I40E_HW_RESTART_AUTONEG BIT(18)
- u32 flags;
-#define I40E_FLAG_RX_CSUM_ENABLED BIT(0)
-#define I40E_FLAG_MSI_ENABLED BIT(1)
-#define I40E_FLAG_MSIX_ENABLED BIT(2)
-#define I40E_FLAG_RSS_ENABLED BIT(3)
-#define I40E_FLAG_VMDQ_ENABLED BIT(4)
-#define I40E_FLAG_FILTER_SYNC BIT(5)
-#define I40E_FLAG_SRIOV_ENABLED BIT(6)
-#define I40E_FLAG_DCB_CAPABLE BIT(7)
-#define I40E_FLAG_DCB_ENABLED BIT(8)
-#define I40E_FLAG_FD_SB_ENABLED BIT(9)
-#define I40E_FLAG_FD_ATR_ENABLED BIT(10)
-#define I40E_FLAG_FD_SB_AUTO_DISABLED BIT(11)
-#define I40E_FLAG_FD_ATR_AUTO_DISABLED BIT(12)
-#define I40E_FLAG_MFP_ENABLED BIT(13)
-#define I40E_FLAG_UDP_FILTER_SYNC BIT(14)
-#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT(15)
-#define I40E_FLAG_VEB_MODE_ENABLED BIT(16)
-#define I40E_FLAG_VEB_STATS_ENABLED BIT(17)
-#define I40E_FLAG_LINK_POLLING_ENABLED BIT(18)
-#define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT(19)
-#define I40E_FLAG_TEMP_LINK_POLLING BIT(20)
-#define I40E_FLAG_LEGACY_RX BIT(21)
-#define I40E_FLAG_PTP BIT(22)
-#define I40E_FLAG_IWARP_ENABLED BIT(23)
-#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT(24)
-#define I40E_FLAG_CLIENT_L2_CHANGE BIT(25)
-#define I40E_FLAG_CLIENT_RESET BIT(26)
-#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT(27)
-#define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT(28)
-#define I40E_FLAG_TC_MQPRIO BIT(29)
-#define I40E_FLAG_FD_SB_INACTIVE BIT(30)
-#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER BIT(31)
+ u64 flags;
+#define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(0)
+#define I40E_FLAG_MSI_ENABLED BIT_ULL(1)
+#define I40E_FLAG_MSIX_ENABLED BIT_ULL(2)
+#define I40E_FLAG_RSS_ENABLED BIT_ULL(3)
+#define I40E_FLAG_VMDQ_ENABLED BIT_ULL(4)
+#define I40E_FLAG_FILTER_SYNC BIT_ULL(5)
+#define I40E_FLAG_SRIOV_ENABLED BIT_ULL(6)
+#define I40E_FLAG_DCB_CAPABLE BIT_ULL(7)
+#define I40E_FLAG_DCB_ENABLED BIT_ULL(8)
+#define I40E_FLAG_FD_SB_ENABLED BIT_ULL(9)
+#define I40E_FLAG_FD_ATR_ENABLED BIT_ULL(10)
+#define I40E_FLAG_FD_SB_AUTO_DISABLED BIT_ULL(11)
+#define I40E_FLAG_FD_ATR_AUTO_DISABLED BIT_ULL(12)
+#define I40E_FLAG_MFP_ENABLED BIT_ULL(13)
+#define I40E_FLAG_UDP_FILTER_SYNC BIT_ULL(14)
+#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT_ULL(15)
+#define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(16)
+#define I40E_FLAG_VEB_STATS_ENABLED BIT_ULL(17)
+#define I40E_FLAG_LINK_POLLING_ENABLED BIT_ULL(18)
+#define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT_ULL(19)
+#define I40E_FLAG_TEMP_LINK_POLLING BIT_ULL(20)
+#define I40E_FLAG_LEGACY_RX BIT_ULL(21)
+#define I40E_FLAG_PTP BIT_ULL(22)
+#define I40E_FLAG_IWARP_ENABLED BIT_ULL(23)
+#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT_ULL(24)
+#define I40E_FLAG_CLIENT_L2_CHANGE BIT_ULL(25)
+#define I40E_FLAG_CLIENT_RESET BIT_ULL(26)
+#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT_ULL(27)
+#define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT_ULL(28)
+#define I40E_FLAG_TC_MQPRIO BIT_ULL(29)
+#define I40E_FLAG_FD_SB_INACTIVE BIT_ULL(30)
+#define I40E_FLAG_FD_SB_TO_CLOUD_FILTER BIT_ULL(31)
+#define I40E_FLAG_DISABLE_FW_LLDP BIT_ULL(32)
struct i40e_client_instance *cinst;
bool stat_offsets_loaded;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 9af74253c3f7..e78971605e0b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -907,10 +907,15 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw,
/* update the error if time out occurred */
if ((!cmd_completed) &&
(!details->async && !details->postpone)) {
- i40e_debug(hw,
- I40E_DEBUG_AQ_MESSAGE,
- "AQTX: Writeback timeout.\n");
- status = I40E_ERR_ADMIN_QUEUE_TIMEOUT;
+ if (rd32(hw, hw->aq.asq.len) & I40E_GL_ATQLEN_ATQCRIT_MASK) {
+ i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: AQ Critical error.\n");
+ status = I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR;
+ } else {
+ i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: Writeback timeout.\n");
+ status = I40E_ERR_ADMIN_QUEUE_TIMEOUT;
+ }
}
asq_send_command_error:
@@ -971,7 +976,7 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
}
/* set next_to_use to head */
- ntu = (rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK);
+ ntu = rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK;
if (ntu == ntc) {
/* nothing to do - shouldn't need to update ring's values */
ret_code = I40E_ERR_ADMIN_QUEUE_NO_WORK;
@@ -1027,7 +1032,7 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
hw->aq.arq.next_to_clean = ntc;
hw->aq.arq.next_to_use = ntu;
- i40e_nvmupd_check_wait_event(hw, le16_to_cpu(e->desc.opcode));
+ i40e_nvmupd_check_wait_event(hw, le16_to_cpu(e->desc.opcode), &e->desc);
clean_arq_element_out:
/* Set pending if needed, unlock and return */
if (pending)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index c5776340517c..a852775d3059 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -205,6 +205,7 @@ enum i40e_admin_queue_opc {
/* DCB commands */
i40e_aqc_opc_dcb_ignore_pfc = 0x0301,
i40e_aqc_opc_dcb_updated = 0x0302,
+ i40e_aqc_opc_set_dcb_parameters = 0x0303,
/* TX scheduler */
i40e_aqc_opc_configure_vsi_bw_limit = 0x0400,
@@ -2231,8 +2232,12 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
*/
struct i40e_aqc_nvm_update {
u8 command_flags;
-#define I40E_AQ_NVM_LAST_CMD 0x01
-#define I40E_AQ_NVM_FLASH_ONLY 0x80
+#define I40E_AQ_NVM_LAST_CMD 0x01
+#define I40E_AQ_NVM_FLASH_ONLY 0x80
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT 1
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_MASK 0x03
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED 0x03
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_ALL 0x01
u8 module_pointer;
__le16 length;
__le32 offset;
@@ -2492,6 +2497,17 @@ struct i40e_aqc_lldp_start {
I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_start);
+/* Set DCB (direct 0x0303) */
+struct i40e_aqc_set_dcb_parameters {
+ u8 command;
+#define I40E_AQ_DCB_SET_AGENT 0x1
+#define I40E_DCB_VALID 0x1
+ u8 valid_flags;
+ u8 reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_dcb_parameters);
+
/* Get CEE DCBX Oper Config (0x0A07)
* uses the generic descriptor struct
* returns below as indirect response
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 1b1e2acbd07f..0de9610c1d8d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -378,11 +378,11 @@ void i40e_client_subtask(struct i40e_pf *pf)
if (!client || !cdev)
return;
- /* Here we handle client opens. If the client is down, but
- * the netdev is up, then open the client.
+ /* Here we handle client opens. If the client is down, and
+ * the netdev is registered, then open the client.
*/
if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
- if (!test_bit(__I40E_VSI_DOWN, vsi->state) &&
+ if (vsi->netdev_registered &&
client->ops && client->ops->open) {
set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
ret = client->ops->open(&cdev->lan_info, client);
@@ -393,17 +393,19 @@ void i40e_client_subtask(struct i40e_pf *pf)
i40e_client_del_instance(pf);
}
}
- } else {
- /* Likewise for client close. If the client is up, but the netdev
- * is down, then close the client.
- */
- if (test_bit(__I40E_VSI_DOWN, vsi->state) &&
- client->ops && client->ops->close) {
- clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
- client->ops->close(&cdev->lan_info, client, false);
- i40e_client_release_qvlist(&cdev->lan_info);
- }
}
+
+ /* enable/disable PE TCP_ENA flag based on netdev down/up
+ */
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ i40e_client_update_vsi_ctxt(&cdev->lan_info, client,
+ 0, 0, 0,
+ I40E_CLIENT_VSI_FLAG_TCP_ENABLE);
+ else
+ i40e_client_update_vsi_ctxt(&cdev->lan_info, client,
+ 0, 0,
+ I40E_CLIENT_VSI_FLAG_TCP_ENABLE,
+ I40E_CLIENT_VSI_FLAG_TCP_ENABLE);
}
/**
@@ -717,13 +719,13 @@ static int i40e_client_update_vsi_ctxt(struct i40e_info *ldev,
return -ENOENT;
}
- if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) &&
- (flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) {
+ if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE) &&
+ (flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE)) {
ctxt.info.valid_sections =
cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
- } else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE) &&
- !(flag & I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE)) {
+ } else if ((valid_flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE) &&
+ !(flag & I40E_CLIENT_VSI_FLAG_TCP_ENABLE)) {
ctxt.info.valid_sections =
cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
ctxt.info.queueing_opt_flags &= ~I40E_AQ_VSI_QUE_OPT_TCP_ENA;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
index 15b21a5315b5..ba55c889e4c5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h
@@ -132,7 +132,7 @@ struct i40e_info {
#define I40E_CLIENT_RESET_LEVEL_PF 1
#define I40E_CLIENT_RESET_LEVEL_CORE 2
-#define I40E_CLIENT_VSI_FLAG_TCP_PACKET_ENABLE BIT(1)
+#define I40E_CLIENT_VSI_FLAG_TCP_ENABLE BIT(1)
struct i40e_ops {
/* setup_q_vector_list enables queues with a particular vector */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 40c5f7628aa1..ef5a868aae46 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -278,6 +278,8 @@ const char *i40e_stat_str(struct i40e_hw *hw, i40e_status stat_err)
return "I40E_NOT_SUPPORTED";
case I40E_ERR_FIRMWARE_API_VERSION:
return "I40E_ERR_FIRMWARE_API_VERSION";
+ case I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR:
+ return "I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR";
}
snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
@@ -1486,6 +1488,7 @@ u32 i40e_led_get(struct i40e_hw *hw)
case I40E_COMBINED_ACTIVITY:
case I40E_FILTER_ACTIVITY:
case I40E_MAC_ACTIVITY:
+ case I40E_LINK_ACTIVITY:
continue;
default:
break;
@@ -1534,6 +1537,7 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
case I40E_COMBINED_ACTIVITY:
case I40E_FILTER_ACTIVITY:
case I40E_MAC_ACTIVITY:
+ case I40E_LINK_ACTIVITY:
continue;
default:
break;
@@ -1544,9 +1548,6 @@ void i40e_led_set(struct i40e_hw *hw, u32 mode, bool blink)
gpio_val |= ((mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) &
I40E_GLGEN_GPIO_CTL_LED_MODE_MASK);
- if (mode == I40E_LINK_ACTIVITY)
- blink = false;
-
if (blink)
gpio_val |= BIT(I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT);
else
@@ -3465,13 +3466,14 @@ exit:
* @length: length of the section to be written (in bytes from the offset)
* @data: command buffer (size [bytes] = length)
* @last_command: tells if this is the last command in a series
+ * @preservation_flags: Preservation mode flags
* @cmd_details: pointer to command details structure or NULL
*
* Update the NVM using the admin queue commands
**/
i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
u32 offset, u16 length, void *data,
- bool last_command,
+ bool last_command, u8 preservation_flags,
struct i40e_asq_cmd_details *cmd_details)
{
struct i40e_aq_desc desc;
@@ -3490,6 +3492,16 @@ i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
/* If this is the last command in a series, set the proper flag. */
if (last_command)
cmd->command_flags |= I40E_AQ_NVM_LAST_CMD;
+ if (hw->mac.type == I40E_MAC_X722) {
+ if (preservation_flags == I40E_NVM_PRESERVATION_FLAGS_SELECTED)
+ cmd->command_flags |=
+ (I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED <<
+ I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT);
+ else if (preservation_flags == I40E_NVM_PRESERVATION_FLAGS_ALL)
+ cmd->command_flags |=
+ (I40E_AQ_NVM_PRESERVATION_FLAGS_ALL <<
+ I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT);
+ }
cmd->module_pointer = module_pointer;
cmd->offset = cpu_to_le32(offset);
cmd->length = cpu_to_le16(length);
@@ -3629,7 +3641,34 @@ i40e_status i40e_aq_start_lldp(struct i40e_hw *hw,
i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_start);
cmd->command = I40E_AQ_LLDP_AGENT_START;
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+/**
+ * i40e_aq_set_dcb_parameters
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ * @dcb_enable: True if DCB configuration needs to be applied
+ *
+ **/
+enum i40e_status_code
+i40e_aq_set_dcb_parameters(struct i40e_hw *hw, bool dcb_enable,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_set_dcb_parameters *cmd =
+ (struct i40e_aqc_set_dcb_parameters *)&desc.params.raw;
+ i40e_status status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_dcb_parameters);
+
+ if (dcb_enable) {
+ cmd->valid_flags = I40E_DCB_VALID;
+ cmd->command = I40E_AQ_DCB_SET_AGENT;
+ }
status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
return status;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 34173f821fd9..2f5bee713fef 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -233,6 +233,7 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
I40E_PRIV_FLAG("disable-source-pruning",
I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
+ I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_DISABLE_FW_LLDP, 0),
};
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
@@ -2305,6 +2306,8 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
struct ethtool_coalesce *ec,
int queue)
{
+ struct i40e_ring *rx_ring = vsi->rx_rings[queue];
+ struct i40e_ring *tx_ring = vsi->tx_rings[queue];
struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = &pf->hw;
struct i40e_q_vector *q_vector;
@@ -2312,26 +2315,26 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
intrl = i40e_intrl_usec_to_reg(vsi->int_rate_limit);
- vsi->rx_rings[queue]->rx_itr_setting = ec->rx_coalesce_usecs;
- vsi->tx_rings[queue]->tx_itr_setting = ec->tx_coalesce_usecs;
+ rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
+ tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
if (ec->use_adaptive_rx_coalesce)
- vsi->rx_rings[queue]->rx_itr_setting |= I40E_ITR_DYNAMIC;
+ rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
else
- vsi->rx_rings[queue]->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+ rx_ring->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
if (ec->use_adaptive_tx_coalesce)
- vsi->tx_rings[queue]->tx_itr_setting |= I40E_ITR_DYNAMIC;
+ tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
else
- vsi->tx_rings[queue]->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
+ tx_ring->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
- q_vector = vsi->rx_rings[queue]->q_vector;
- q_vector->rx.itr = ITR_TO_REG(vsi->rx_rings[queue]->rx_itr_setting);
+ q_vector = rx_ring->q_vector;
+ q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
vector = vsi->base_vector + q_vector->v_idx;
wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
- q_vector = vsi->tx_rings[queue]->q_vector;
- q_vector->tx.itr = ITR_TO_REG(vsi->tx_rings[queue]->tx_itr_setting);
+ q_vector = tx_ring->q_vector;
+ q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
vector = vsi->base_vector + q_vector->v_idx;
wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
@@ -2746,16 +2749,16 @@ static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
no_input_set:
if (input_set & I40E_L3_SRC_MASK)
- fsp->m_u.tcp_ip4_spec.ip4src = htonl(0xFFFF);
+ fsp->m_u.tcp_ip4_spec.ip4src = htonl(0xFFFFFFFF);
if (input_set & I40E_L3_DST_MASK)
- fsp->m_u.tcp_ip4_spec.ip4dst = htonl(0xFFFF);
+ fsp->m_u.tcp_ip4_spec.ip4dst = htonl(0xFFFFFFFF);
if (input_set & I40E_L4_SRC_MASK)
- fsp->m_u.tcp_ip4_spec.psrc = htons(0xFFFFFFFF);
+ fsp->m_u.tcp_ip4_spec.psrc = htons(0xFFFF);
if (input_set & I40E_L4_DST_MASK)
- fsp->m_u.tcp_ip4_spec.pdst = htons(0xFFFFFFFF);
+ fsp->m_u.tcp_ip4_spec.pdst = htons(0xFFFF);
if (rule->dest_ctl == I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET)
fsp->ring_cookie = RX_CLS_FLOW_DISC;
@@ -3806,6 +3809,16 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
i40e_write_fd_input_set(pf, index, new_mask);
+ /* IP_USER_FLOW filters match both IPv4/Other and IPv4/Fragmented
+ * frames. If we're programming the input set for IPv4/Other, we also
+ * need to program the IPv4/Fragmented input set. Since we don't have
+ * separate support, we'll always assume and enforce that the two flow
+ * types must have matching input sets.
+ */
+ if (index == I40E_FILTER_PCTYPE_NONF_IPV4_OTHER)
+ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV4,
+ new_mask);
+
/* Add the new offset and update table, if necessary */
if (new_flex_offset) {
err = i40e_add_flex_offset(&pf->l4_flex_pit_list, src_offset,
@@ -3828,6 +3841,87 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi,
}
/**
+ * i40e_match_fdir_filter - Return true of two filters match
+ * @a: pointer to filter struct
+ * @b: pointer to filter struct
+ *
+ * Returns true if the two filters match exactly the same criteria. I.e. they
+ * match the same flow type and have the same parameters. We don't need to
+ * check any input-set since all filters of the same flow type must use the
+ * same input set.
+ **/
+static bool i40e_match_fdir_filter(struct i40e_fdir_filter *a,
+ struct i40e_fdir_filter *b)
+{
+ /* The filters do not much if any of these criteria differ. */
+ if (a->dst_ip != b->dst_ip ||
+ a->src_ip != b->src_ip ||
+ a->dst_port != b->dst_port ||
+ a->src_port != b->src_port ||
+ a->flow_type != b->flow_type ||
+ a->ip4_proto != b->ip4_proto)
+ return false;
+
+ return true;
+}
+
+/**
+ * i40e_disallow_matching_filters - Check that new filters differ
+ * @vsi: pointer to the targeted VSI
+ * @input: new filter to check
+ *
+ * Due to hardware limitations, it is not possible for two filters that match
+ * similar criteria to be programmed at the same time. This is true for a few
+ * reasons:
+ *
+ * (a) all filters matching a particular flow type must use the same input
+ * set, that is they must match the same criteria.
+ * (b) different flow types will never match the same packet, as the flow type
+ * is decided by hardware before checking which rules apply.
+ * (c) hardware has no way to distinguish which order filters apply in.
+ *
+ * Due to this, we can't really support using the location data to order
+ * filters in the hardware parsing. It is technically possible for the user to
+ * request two filters matching the same criteria but which select different
+ * queues. In this case, rather than keep both filters in the list, we reject
+ * the 2nd filter when the user requests adding it.
+ *
+ * This avoids needing to track location for programming the filter to
+ * hardware, and ensures that we avoid some strange scenarios involving
+ * deleting filters which match the same criteria.
+ **/
+static int i40e_disallow_matching_filters(struct i40e_vsi *vsi,
+ struct i40e_fdir_filter *input)
+{
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_fdir_filter *rule;
+ struct hlist_node *node2;
+
+ /* Loop through every filter, and check that it doesn't match */
+ hlist_for_each_entry_safe(rule, node2,
+ &pf->fdir_filter_list, fdir_node) {
+ /* Don't check the filters match if they share the same fd_id,
+ * since the new filter is actually just updating the target
+ * of the old filter.
+ */
+ if (rule->fd_id == input->fd_id)
+ continue;
+
+ /* If any filters match, then print a warning message to the
+ * kernel message buffer and bail out.
+ */
+ if (i40e_match_fdir_filter(rule, input)) {
+ dev_warn(&pf->pdev->dev,
+ "Existing user defined filter %d already matches this flow.\n",
+ rule->fd_id);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/**
* i40e_add_fdir_ethtool - Add/Remove Flow Director filters
* @vsi: pointer to the targeted VSI
* @cmd: command to get or set RX flow classification rules
@@ -3939,19 +4033,25 @@ static int i40e_add_fdir_ethtool(struct i40e_vsi *vsi,
input->flex_offset = userdef.flex_offset;
}
- ret = i40e_add_del_fdir(vsi, input, true);
+ /* Avoid programming two filters with identical match criteria. */
+ ret = i40e_disallow_matching_filters(vsi, input);
if (ret)
- goto free_input;
+ goto free_filter_memory;
/* Add the input filter to the fdir_input_list, possibly replacing
* a previous filter. Do not free the input structure after adding it
* to the list as this would cause a use-after-free bug.
*/
i40e_update_ethtool_fdir_entry(vsi, input, fsp->location, NULL);
-
+ ret = i40e_add_del_fdir(vsi, input, true);
+ if (ret)
+ goto remove_sw_rule;
return 0;
-free_input:
+remove_sw_rule:
+ hlist_del(&input->fdir_node);
+ pf->fdir_pf_active_filters--;
+free_filter_memory:
kfree(input);
return ret;
}
@@ -4264,7 +4364,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
- u32 orig_flags, new_flags, changed_flags;
+ u64 orig_flags, new_flags, changed_flags;
u32 i, j;
orig_flags = READ_ONCE(pf->flags);
@@ -4315,13 +4415,32 @@ flags_complete:
!(pf->hw_features & I40E_HW_ATR_EVICT_CAPABLE))
return -EOPNOTSUPP;
+ /* Disable FW LLDP not supported if NPAR active or if FW
+ * API version < 1.7
+ */
+ if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+ if (pf->hw.func_caps.npar_enable) {
+ dev_warn(&pf->pdev->dev,
+ "Unable to stop FW LLDP if NPAR active\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (pf->hw.aq.api_maj_ver < 1 ||
+ (pf->hw.aq.api_maj_ver == 1 &&
+ pf->hw.aq.api_min_ver < 7)) {
+ dev_warn(&pf->pdev->dev,
+ "FW ver does not support stopping FW LLDP\n");
+ return -EOPNOTSUPP;
+ }
+ }
+
/* Compare and exchange the new flags into place. If we failed, that
* is if cmpxchg returns anything but the old value, this means that
* something else has modified the flags variable since we copied it
* originally. We'll just punt with an error and log something in the
* message buffer.
*/
- if (cmpxchg(&pf->flags, orig_flags, new_flags) != orig_flags) {
+ if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
dev_warn(&pf->pdev->dev,
"Unable to update pf->flags as it was modified by another thread...\n");
return -EAGAIN;
@@ -4360,12 +4479,37 @@ flags_complete:
}
}
+ if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) {
+ if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) {
+ struct i40e_dcbx_config *dcbcfg;
+ int i;
+
+ i40e_aq_stop_lldp(&pf->hw, true, NULL);
+ i40e_aq_set_dcb_parameters(&pf->hw, true, NULL);
+ /* reset local_dcbx_config to default */
+ dcbcfg = &pf->hw.local_dcbx_config;
+ dcbcfg->etscfg.willing = 1;
+ dcbcfg->etscfg.maxtcs = 0;
+ dcbcfg->etscfg.tcbwtable[0] = 100;
+ for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++)
+ dcbcfg->etscfg.tcbwtable[i] = 0;
+ for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
+ dcbcfg->etscfg.prioritytable[i] = 0;
+ dcbcfg->etscfg.tsatable[0] = I40E_IEEE_TSA_ETS;
+ dcbcfg->pfc.willing = 1;
+ dcbcfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
+ } else {
+ i40e_aq_start_lldp(&pf->hw, NULL);
+ }
+ }
+
/* Issue reset to cause things to take effect, as additional bits
* are added we will need to create a mask of bits requiring reset
*/
if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
I40E_FLAG_LEGACY_RX |
- I40E_FLAG_SOURCE_PRUNING_DISABLED))
+ I40E_FLAG_SOURCE_PRUNING_DISABLED |
+ I40E_FLAG_DISABLE_FW_LLDP))
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
return 0;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2ab22eba0c7c..f95ce9b5e4fb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1818,6 +1818,10 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
num_tc_qps = qcount / numtc;
num_tc_qps = min_t(int, num_tc_qps, i40e_pf_get_max_q_per_tc(pf));
+ /* Do not allow use more TC queue pairs than MSI-X vectors exist */
+ if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+ num_tc_qps = min_t(int, num_tc_qps, pf->num_lan_msix);
+
/* Setup queue offset/count for all TCs for given VSI */
for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
/* See if the given TC is enabled for the given VSI */
@@ -4122,6 +4126,7 @@ static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi)
num_ringpairs = DIV_ROUND_UP(qp_remaining, q_vectors - v_start);
q_vector->num_ringpairs = num_ringpairs;
+ q_vector->reg_idx = q_vector->v_idx + vsi->base_vector - 1;
q_vector->rx.count = 0;
q_vector->tx.count = 0;
@@ -4877,104 +4882,6 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
#endif
/**
- * i40e_detect_recover_hung_queue - Function to detect and recover hung_queue
- * @q_idx: TX queue number
- * @vsi: Pointer to VSI struct
- *
- * This function checks specified queue for given VSI. Detects hung condition.
- * We proactively detect hung TX queues by checking if interrupts are disabled
- * but there are pending descriptors. If it appears hung, attempt to recover
- * by triggering a SW interrupt.
- **/
-static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi)
-{
- struct i40e_ring *tx_ring = NULL;
- struct i40e_pf *pf;
- u32 val, tx_pending;
- int i;
-
- pf = vsi->back;
-
- /* now that we have an index, find the tx_ring struct */
- for (i = 0; i < vsi->num_queue_pairs; i++) {
- if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) {
- if (q_idx == vsi->tx_rings[i]->queue_index) {
- tx_ring = vsi->tx_rings[i];
- break;
- }
- }
- }
-
- if (!tx_ring)
- return;
-
- /* Read interrupt register */
- if (pf->flags & I40E_FLAG_MSIX_ENABLED)
- val = rd32(&pf->hw,
- I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
- tx_ring->vsi->base_vector - 1));
- else
- val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
-
- tx_pending = i40e_get_tx_pending(tx_ring);
-
- /* Interrupts are disabled and TX pending is non-zero,
- * trigger the SW interrupt (don't wait). Worst case
- * there will be one extra interrupt which may result
- * into not cleaning any queues because queues are cleaned.
- */
- if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK)))
- i40e_force_wb(vsi, tx_ring->q_vector);
-}
-
-/**
- * i40e_detect_recover_hung - Function to detect and recover hung_queues
- * @pf: pointer to PF struct
- *
- * LAN VSI has netdev and netdev has TX queues. This function is to check
- * each of those TX queues if they are hung, trigger recovery by issuing
- * SW interrupt.
- **/
-static void i40e_detect_recover_hung(struct i40e_pf *pf)
-{
- struct net_device *netdev;
- struct i40e_vsi *vsi;
- unsigned int i;
-
- /* Only for LAN VSI */
- vsi = pf->vsi[pf->lan_vsi];
-
- if (!vsi)
- return;
-
- /* Make sure, VSI state is not DOWN/RECOVERY_PENDING */
- if (test_bit(__I40E_VSI_DOWN, vsi->back->state) ||
- test_bit(__I40E_RESET_RECOVERY_PENDING, vsi->back->state))
- return;
-
- /* Make sure type is MAIN VSI */
- if (vsi->type != I40E_VSI_MAIN)
- return;
-
- netdev = vsi->netdev;
- if (!netdev)
- return;
-
- /* Bail out if netif_carrier is not OK */
- if (!netif_carrier_ok(netdev))
- return;
-
- /* Go thru' TX queues for netdev */
- for (i = 0; i < netdev->num_tx_queues; i++) {
- struct netdev_queue *q;
-
- q = netdev_get_tx_queue(netdev, i);
- if (q)
- i40e_detect_recover_hung_queue(i, vsi);
- }
-}
-
-/**
* i40e_get_iscsi_tc_map - Return TC map for iSCSI APP
* @pf: pointer to PF
*
@@ -5342,6 +5249,8 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi,
static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
{
u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
struct i40e_vsi_context ctxt;
int ret = 0;
int i;
@@ -5359,10 +5268,40 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
if (ret) {
- dev_info(&vsi->back->pdev->dev,
+ struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
+
+ dev_info(&pf->pdev->dev,
"Failed configuring TC map %d for VSI %d\n",
enabled_tc, vsi->seid);
- goto out;
+ ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid,
+ &bw_config, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Failed querying vsi bw info, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ goto out;
+ }
+ if ((bw_config.tc_valid_bits & enabled_tc) != enabled_tc) {
+ u8 valid_tc = bw_config.tc_valid_bits & enabled_tc;
+
+ if (!valid_tc)
+ valid_tc = bw_config.tc_valid_bits;
+ /* Always enable TC0, no matter what */
+ valid_tc |= 1;
+ dev_info(&pf->pdev->dev,
+ "Requested tc 0x%x, but FW reports 0x%x as valid. Attempting to use 0x%x.\n",
+ enabled_tc, bw_config.tc_valid_bits, valid_tc);
+ enabled_tc = valid_tc;
+ }
+
+ ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
+ if (ret) {
+ dev_err(&pf->pdev->dev,
+ "Unable to configure TC map %d for VSI %d\n",
+ enabled_tc, vsi->seid);
+ goto out;
+ }
}
/* Update Queue Pairs Mapping for currently enabled UPs */
@@ -5402,13 +5341,12 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
/* Update the VSI after updating the VSI queue-mapping
* information
*/
- ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
if (ret) {
- dev_info(&vsi->back->pdev->dev,
+ dev_info(&pf->pdev->dev,
"Update vsi tc config failed, err %s aq_err %s\n",
- i40e_stat_str(&vsi->back->hw, ret),
- i40e_aq_str(&vsi->back->hw,
- vsi->back->hw.aq.asq_last_status));
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
goto out;
}
/* update the local VSI info with updated queue map */
@@ -5418,11 +5356,10 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
/* Update current VSI BW information */
ret = i40e_vsi_get_bw_info(vsi);
if (ret) {
- dev_info(&vsi->back->pdev->dev,
+ dev_info(&pf->pdev->dev,
"Failed updating vsi bw info, err %s aq_err %s\n",
- i40e_stat_str(&vsi->back->hw, ret),
- i40e_aq_str(&vsi->back->hw,
- vsi->back->hw.aq.asq_last_status));
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
goto out;
}
@@ -6388,8 +6325,11 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
struct i40e_hw *hw = &pf->hw;
int err = 0;
- /* Do not enable DCB for SW1 and SW2 images even if the FW is capable */
- if (pf->hw_features & I40E_HW_NO_DCB_SUPPORT)
+ /* Do not enable DCB for SW1 and SW2 images even if the FW is capable
+ * Also do not enable DCBx if FW LLDP agent is disabled
+ */
+ if ((pf->hw_features & I40E_HW_NO_DCB_SUPPORT) ||
+ (pf->flags & I40E_FLAG_DISABLE_FW_LLDP))
goto out;
/* Get the initial DCB configuration */
@@ -6416,6 +6356,9 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf)
dev_dbg(&pf->pdev->dev,
"DCBX offload is supported for this PF.\n");
}
+ } else if (pf->hw.aq.asq_last_status == I40E_AQ_RC_EPERM) {
+ dev_info(&pf->pdev->dev, "FW LLDP disabled for this PF.\n");
+ pf->flags |= I40E_FLAG_DISABLE_FW_LLDP;
} else {
dev_info(&pf->pdev->dev,
"Query for DCB configuration failed, err %s aq_err %s\n",
@@ -7505,9 +7448,6 @@ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np,
{
struct i40e_vsi *vsi = np->vsi;
- if (cls_flower->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return i40e_configure_clsflower(vsi, cls_flower);
@@ -7525,6 +7465,9 @@ static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
{
struct i40e_netdev_priv *np = cb_priv;
+ if (!tc_cls_can_offload_and_chain0(np->vsi->netdev, type_data))
+ return -EOPNOTSUPP;
+
switch (type) {
case TC_SETUP_CLSFLOWER:
return i40e_setup_tc_cls_flower(np, type_data);
@@ -7742,6 +7685,9 @@ static void i40e_fdir_filter_exit(struct i40e_pf *pf)
/* Reprogram the default input set for Other/IPv4 */
i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER,
I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
+
+ i40e_write_fd_input_set(pf, I40E_FILTER_PCTYPE_FRAG_IPV4,
+ I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
}
/**
@@ -9075,6 +9021,17 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi)
vsi->uplink_seid);
return ret;
}
+ /* Reconfigure TX queues using QTX_CTL register */
+ ret = i40e_channel_config_tx_ring(vsi->back, vsi, ch);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "failed to configure TX rings for channel %u\n",
+ ch->seid);
+ return ret;
+ }
+ /* update 'next_base_queue' */
+ vsi->next_base_queue = vsi->next_base_queue +
+ ch->num_queue_pairs;
if (ch->max_tx_rate) {
u64 credits = ch->max_tx_rate;
@@ -9278,6 +9235,9 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
goto end_core_reset;
}
+ /* Enable FW to write a default DCB config on link-up */
+ i40e_aq_set_dcb_parameters(hw, true, NULL);
+
#ifdef CONFIG_I40E_DCB
ret = i40e_init_pf_dcb(pf);
if (ret) {
@@ -9695,7 +9655,7 @@ static void i40e_service_task(struct work_struct *work)
if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state))
return;
- i40e_detect_recover_hung(pf);
+ i40e_detect_recover_hung(pf->vsi[pf->lan_vsi]);
i40e_sync_filters_subtask(pf);
i40e_reset_subtask(pf);
i40e_handle_mdd_event(pf);
@@ -10462,10 +10422,9 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
/* set up vector assignment tracking */
size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * vectors);
pf->irq_pile = kzalloc(size, GFP_KERNEL);
- if (!pf->irq_pile) {
- dev_err(&pf->pdev->dev, "error allocating irq_pile memory\n");
+ if (!pf->irq_pile)
return -ENOMEM;
- }
+
pf->irq_pile->num_entries = vectors;
pf->irq_pile->search_hint = 0;
@@ -10783,8 +10742,13 @@ static int i40e_pf_config_rss(struct i40e_pf *pf)
/* Determine the RSS size of the VSI */
if (!vsi->rss_size) {
u16 qcount;
-
- qcount = vsi->num_queue_pairs / vsi->tc_config.numtc;
+ /* If the firmware does something weird during VSI init, we
+ * could end up with zero TCs. Check for that to avoid
+ * divide-by-zero. It probably won't pass traffic, but it also
+ * won't panic.
+ */
+ qcount = vsi->num_queue_pairs /
+ (vsi->tc_config.numtc ? vsi->tc_config.numtc : 1);
vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
}
if (!vsi->rss_size)
@@ -10972,7 +10936,7 @@ i40e_status i40e_commit_partition_bw_setting(struct i40e_pf *pf)
ret = i40e_aq_update_nvm(&pf->hw,
I40E_SR_NVM_CONTROL_WORD,
0x10, sizeof(nvm_word),
- &nvm_word, true, NULL);
+ &nvm_word, true, 0, NULL);
/* Save off last admin queue command status before releasing
* the NVM
*/
@@ -11113,13 +11077,13 @@ static int i40e_sw_init(struct i40e_pf *pf)
pf->hw.aq.fw_maj_ver >= 6)
pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
- if (pf->hw.func_caps.vmdq) {
+ if (pf->hw.func_caps.vmdq && num_online_cpus() != 1) {
pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
pf->flags |= I40E_FLAG_VMDQ_ENABLED;
pf->num_vmdq_qps = i40e_default_queues_per_vmdq(pf);
}
- if (pf->hw.func_caps.iwarp) {
+ if (pf->hw.func_caps.iwarp && num_online_cpus() != 1) {
pf->flags |= I40E_FLAG_IWARP_ENABLED;
/* IWARP needs one extra vector for CQP just like MISC.*/
pf->num_iwarp_msix = (int)num_online_cpus() + 1;
@@ -13596,6 +13560,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, pf);
pci_save_state(pdev);
+
+ /* Enable FW to write default DCB config on link-up */
+ i40e_aq_set_dcb_parameters(hw, true, NULL);
+
#ifdef CONFIG_I40E_DCB
err = i40e_init_pf_dcb(pf);
if (err) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 425713fb72e5..76a5cb04e4fe 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -239,8 +239,9 @@ read_nvm_exit:
*
* Writes a 16 bit words buffer to the Shadow RAM using the admin command.
**/
-static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
- u32 offset, u16 words, void *data,
+static i40e_status i40e_read_nvm_aq(struct i40e_hw *hw,
+ u8 module_pointer, u32 offset,
+ u16 words, void *data,
bool last_command)
{
i40e_status ret_code = I40E_ERR_NVM;
@@ -496,7 +497,8 @@ static i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
ret_code = i40e_aq_update_nvm(hw, module_pointer,
2 * offset, /*bytes*/
2 * words, /*bytes*/
- data, last_command, &cmd_details);
+ data, last_command, 0,
+ &cmd_details);
return ret_code;
}
@@ -677,6 +679,9 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
static i40e_status i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
struct i40e_nvm_access *cmd,
u8 *bytes, int *perrno);
+static i40e_status i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
+ struct i40e_nvm_access *cmd,
+ u8 *bytes, int *perrno);
static inline u8 i40e_nvmupd_get_module(u32 val)
{
return (u8)(val & I40E_NVM_MOD_PNT_MASK);
@@ -686,6 +691,12 @@ static inline u8 i40e_nvmupd_get_transaction(u32 val)
return (u8)((val & I40E_NVM_TRANS_MASK) >> I40E_NVM_TRANS_SHIFT);
}
+static inline u8 i40e_nvmupd_get_preservation_flags(u32 val)
+{
+ return (u8)((val & I40E_NVM_PRESERVATION_FLAGS_MASK) >>
+ I40E_NVM_PRESERVATION_FLAGS_SHIFT);
+}
+
static const char * const i40e_nvm_update_state_str[] = {
"I40E_NVMUPD_INVALID",
"I40E_NVMUPD_READ_CON",
@@ -703,6 +714,7 @@ static const char * const i40e_nvm_update_state_str[] = {
"I40E_NVMUPD_STATUS",
"I40E_NVMUPD_EXEC_AQ",
"I40E_NVMUPD_GET_AQ_RESULT",
+ "I40E_NVMUPD_GET_AQ_EVENT",
};
/**
@@ -798,9 +810,9 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
* the wait info and return before doing anything else
*/
if (cmd->offset == 0xffff) {
- i40e_nvmupd_check_wait_event(hw, hw->nvm_wait_opcode);
+ i40e_nvmupd_clear_wait_state(hw);
status = 0;
- goto exit;
+ break;
}
status = I40E_ERR_NOT_READY;
@@ -815,7 +827,7 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
*perrno = -ESRCH;
break;
}
-exit:
+
mutex_unlock(&hw->aq.arq_mutex);
return status;
}
@@ -944,6 +956,10 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw,
status = i40e_nvmupd_get_aq_result(hw, cmd, bytes, perrno);
break;
+ case I40E_NVMUPD_GET_AQ_EVENT:
+ status = i40e_nvmupd_get_aq_event(hw, cmd, bytes, perrno);
+ break;
+
default:
i40e_debug(hw, I40E_DEBUG_NVM,
"NVMUPD: bad cmd %s in init state\n",
@@ -1118,38 +1134,53 @@ retry:
}
/**
- * i40e_nvmupd_check_wait_event - handle NVM update operation events
+ * i40e_nvmupd_clear_wait_state - clear wait state on hw
* @hw: pointer to the hardware structure
- * @opcode: the event that just happened
**/
-void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode)
+void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw)
{
- if (opcode == hw->nvm_wait_opcode) {
- i40e_debug(hw, I40E_DEBUG_NVM,
- "NVMUPD: clearing wait on opcode 0x%04x\n", opcode);
- if (hw->nvm_release_on_done) {
- i40e_release_nvm(hw);
- hw->nvm_release_on_done = false;
- }
- hw->nvm_wait_opcode = 0;
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "NVMUPD: clearing wait on opcode 0x%04x\n",
+ hw->nvm_wait_opcode);
- if (hw->aq.arq_last_status) {
- hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR;
- return;
- }
+ if (hw->nvm_release_on_done) {
+ i40e_release_nvm(hw);
+ hw->nvm_release_on_done = false;
+ }
+ hw->nvm_wait_opcode = 0;
- switch (hw->nvmupd_state) {
- case I40E_NVMUPD_STATE_INIT_WAIT:
- hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
- break;
+ if (hw->aq.arq_last_status) {
+ hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR;
+ return;
+ }
- case I40E_NVMUPD_STATE_WRITE_WAIT:
- hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING;
- break;
+ switch (hw->nvmupd_state) {
+ case I40E_NVMUPD_STATE_INIT_WAIT:
+ hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+ break;
- default:
- break;
- }
+ case I40E_NVMUPD_STATE_WRITE_WAIT:
+ hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING;
+ break;
+
+ default:
+ break;
+ }
+}
+
+/**
+ * i40e_nvmupd_check_wait_event - handle NVM update operation events
+ * @hw: pointer to the hardware structure
+ * @opcode: the event that just happened
+ **/
+void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode,
+ struct i40e_aq_desc *desc)
+{
+ u32 aq_desc_len = sizeof(struct i40e_aq_desc);
+
+ if (opcode == hw->nvm_wait_opcode) {
+ memcpy(&hw->nvm_aq_event_desc, desc, aq_desc_len);
+ i40e_nvmupd_clear_wait_state(hw);
}
}
@@ -1205,6 +1236,9 @@ static enum i40e_nvmupd_cmd i40e_nvmupd_validate_command(struct i40e_hw *hw,
else if (module == 0)
upd_cmd = I40E_NVMUPD_GET_AQ_RESULT;
break;
+ case I40E_NVM_AQE:
+ upd_cmd = I40E_NVMUPD_GET_AQ_EVENT;
+ break;
}
break;
@@ -1267,6 +1301,9 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
u32 aq_data_len;
i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__);
+ if (cmd->offset == 0xffff)
+ return 0;
+
memset(&cmd_details, 0, sizeof(cmd_details));
cmd_details.wb_desc = &hw->nvm_wb_desc;
@@ -1302,6 +1339,9 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
}
}
+ if (cmd->offset)
+ memset(&hw->nvm_aq_event_desc, 0, aq_desc_len);
+
/* and away we go! */
status = i40e_asq_send_command(hw, aq_desc, buff,
buff_size, &cmd_details);
@@ -1311,6 +1351,7 @@ static i40e_status i40e_nvmupd_exec_aq(struct i40e_hw *hw,
i40e_stat_str(hw, status),
i40e_aq_str(hw, hw->aq.asq_last_status));
*perrno = i40e_aq_rc_to_posix(status, hw->aq.asq_last_status);
+ return status;
}
/* should we wait for a followup event? */
@@ -1392,6 +1433,40 @@ static i40e_status i40e_nvmupd_get_aq_result(struct i40e_hw *hw,
}
/**
+ * i40e_nvmupd_get_aq_event - Get the Admin Queue event from previous exec_aq
+ * @hw: pointer to hardware structure
+ * @cmd: pointer to nvm update command buffer
+ * @bytes: pointer to the data buffer
+ * @perrno: pointer to return error code
+ *
+ * cmd structure contains identifiers and data buffer
+ **/
+static i40e_status i40e_nvmupd_get_aq_event(struct i40e_hw *hw,
+ struct i40e_nvm_access *cmd,
+ u8 *bytes, int *perrno)
+{
+ u32 aq_total_len;
+ u32 aq_desc_len;
+
+ i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: %s\n", __func__);
+
+ aq_desc_len = sizeof(struct i40e_aq_desc);
+ aq_total_len = aq_desc_len + le16_to_cpu(hw->nvm_aq_event_desc.datalen);
+
+ /* check copylength range */
+ if (cmd->data_size > aq_total_len) {
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "%s: copy length %d too big, trimming to %d\n",
+ __func__, cmd->data_size, aq_total_len);
+ cmd->data_size = aq_total_len;
+ }
+
+ memcpy(bytes, &hw->nvm_aq_event_desc, cmd->data_size);
+
+ return 0;
+}
+
+/**
* i40e_nvmupd_nvm_read - Read NVM
* @hw: pointer to hardware structure
* @cmd: pointer to nvm update command buffer
@@ -1486,18 +1561,20 @@ static i40e_status i40e_nvmupd_nvm_write(struct i40e_hw *hw,
i40e_status status = 0;
struct i40e_asq_cmd_details cmd_details;
u8 module, transaction;
+ u8 preservation_flags;
bool last;
transaction = i40e_nvmupd_get_transaction(cmd->config);
module = i40e_nvmupd_get_module(cmd->config);
last = (transaction & I40E_NVM_LCB);
+ preservation_flags = i40e_nvmupd_get_preservation_flags(cmd->config);
memset(&cmd_details, 0, sizeof(cmd_details));
cmd_details.wb_desc = &hw->nvm_wb_desc;
status = i40e_aq_update_nvm(hw, module, cmd->offset,
(u16)cmd->data_size, bytes, last,
- &cmd_details);
+ preservation_flags, &cmd_details);
if (status) {
i40e_debug(hw, I40E_DEBUG_NVM,
"i40e_nvmupd_nvm_write mod 0x%x off 0x%x len 0x%x\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index b3cc89cc3a86..83798b7841b9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -214,7 +214,7 @@ i40e_status i40e_aq_discover_capabilities(struct i40e_hw *hw,
struct i40e_asq_cmd_details *cmd_details);
i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
u32 offset, u16 length, void *data,
- bool last_command,
+ bool last_command, u8 preservation_flags,
struct i40e_asq_cmd_details *cmd_details);
i40e_status i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
u8 mib_type, void *buff, u16 buff_size,
@@ -225,6 +225,10 @@ i40e_status i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
struct i40e_asq_cmd_details *cmd_details);
i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_set_dcb_parameters(struct i40e_hw *hw,
+ bool dcb_enable,
+ struct i40e_asq_cmd_details
+ *cmd_details);
i40e_status i40e_aq_start_lldp(struct i40e_hw *hw,
struct i40e_asq_cmd_details *cmd_details);
i40e_status i40e_aq_get_cee_dcb_config(struct i40e_hw *hw,
@@ -333,7 +337,9 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
struct i40e_nvm_access *cmd,
u8 *bytes, int *);
-void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode);
+void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode,
+ struct i40e_aq_desc *desc);
+void i40e_nvmupd_clear_wait_state(struct i40e_hw *hw);
void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
diff --git a/drivers/net/ethernet/intel/i40e/i40e_status.h b/drivers/net/ethernet/intel/i40e/i40e_status.h
index 5f9cac55aa55..afb72e711d43 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_status.h
@@ -95,6 +95,7 @@ enum i40e_status_code {
I40E_ERR_NOT_READY = -63,
I40E_NOT_SUPPORTED = -64,
I40E_ERR_FIRMWARE_API_VERSION = -65,
+ I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR = -66,
};
#endif /* _I40E_STATUS_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 40edb6e5e6f6..e554aa6cf070 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -726,6 +726,59 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring)
return 0;
}
+/**
+ * i40e_detect_recover_hung - Function to detect and recover hung_queues
+ * @vsi: pointer to vsi struct with tx queues
+ *
+ * VSI has netdev and netdev has TX queues. This function is to check each of
+ * those TX queues if they are hung, trigger recovery by issuing SW interrupt.
+ **/
+void i40e_detect_recover_hung(struct i40e_vsi *vsi)
+{
+ struct i40e_ring *tx_ring = NULL;
+ struct net_device *netdev;
+ unsigned int i;
+ int packets;
+
+ if (!vsi)
+ return;
+
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ return;
+
+ netdev = vsi->netdev;
+ if (!netdev)
+ return;
+
+ if (!netif_carrier_ok(netdev))
+ return;
+
+ for (i = 0; i < vsi->num_queue_pairs; i++) {
+ tx_ring = vsi->tx_rings[i];
+ if (tx_ring && tx_ring->desc) {
+ /* If packet counter has not changed the queue is
+ * likely stalled, so force an interrupt for this
+ * queue.
+ *
+ * prev_pkt_ctr would be negative if there was no
+ * pending work.
+ */
+ packets = tx_ring->stats.packets & INT_MAX;
+ if (tx_ring->tx_stats.prev_pkt_ctr == packets) {
+ i40e_force_wb(vsi, tx_ring->q_vector);
+ continue;
+ }
+
+ /* Memory barrier between read of packet count and call
+ * to i40e_get_tx_pending()
+ */
+ smp_rmb();
+ tx_ring->tx_stats.prev_pkt_ctr =
+ i40e_get_tx_pending(tx_ring) ? packets : -1;
+ }
+ }
+}
+
#define WB_STRIDE 4
/**
@@ -903,7 +956,7 @@ static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
wr32(&vsi->back->hw,
- I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
+ I40E_PFINT_DYN_CTLN(q_vector->reg_idx),
val);
} else {
val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
@@ -930,8 +983,7 @@ void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
/* allow 00 to be written to the index */
wr32(&vsi->back->hw,
- I40E_PFINT_DYN_CTLN(q_vector->v_idx +
- vsi->base_vector - 1), val);
+ I40E_PFINT_DYN_CTLN(q_vector->reg_idx), val);
} else {
u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
@@ -1163,6 +1215,7 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
+ tx_ring->tx_stats.prev_pkt_ctr = -1;
return 0;
err:
@@ -2257,7 +2310,6 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_hw *hw = &vsi->back->hw;
bool rx = false, tx = false;
u32 rxval, txval;
- int vector;
int idx = q_vector->v_idx;
int rx_itr_setting, tx_itr_setting;
@@ -2267,8 +2319,6 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
return;
}
- vector = (q_vector->v_idx + vsi->base_vector);
-
/* avoid dynamic calculation if in countdown mode OR if
* all dynamic is disabled
*/
@@ -2317,12 +2367,12 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
*/
rxval |= BIT(31);
/* don't check _DOWN because interrupt isn't being enabled */
- wr32(hw, INTREG(vector - 1), rxval);
+ wr32(hw, INTREG(q_vector->reg_idx), rxval);
}
enable_int:
if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(vector - 1), txval);
+ wr32(hw, INTREG(q_vector->reg_idx), txval);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 2d08760fc4ce..d4799b41e98a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -333,6 +333,7 @@ struct i40e_tx_queue_stats {
u64 tx_done_old;
u64 tx_linearize;
u64 tx_force_wb;
+ int prev_pkt_ctr;
};
struct i40e_rx_queue_stats {
@@ -501,6 +502,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring);
int i40e_napi_poll(struct napi_struct *napi, int budget);
void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
u32 i40e_get_tx_pending(struct i40e_ring *ring);
+void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 5a708c363d99..cd294e6a8587 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -402,6 +402,7 @@ enum i40e_nvmupd_cmd {
I40E_NVMUPD_STATUS,
I40E_NVMUPD_EXEC_AQ,
I40E_NVMUPD_GET_AQ_RESULT,
+ I40E_NVMUPD_GET_AQ_EVENT,
};
enum i40e_nvmupd_state {
@@ -421,15 +422,21 @@ enum i40e_nvmupd_state {
#define I40E_NVM_MOD_PNT_MASK 0xFF
-#define I40E_NVM_TRANS_SHIFT 8
-#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT)
-#define I40E_NVM_CON 0x0
-#define I40E_NVM_SNT 0x1
-#define I40E_NVM_LCB 0x2
-#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB)
-#define I40E_NVM_ERA 0x4
-#define I40E_NVM_CSUM 0x8
-#define I40E_NVM_EXEC 0xf
+#define I40E_NVM_TRANS_SHIFT 8
+#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT)
+#define I40E_NVM_PRESERVATION_FLAGS_SHIFT 12
+#define I40E_NVM_PRESERVATION_FLAGS_MASK \
+ (0x3 << I40E_NVM_PRESERVATION_FLAGS_SHIFT)
+#define I40E_NVM_PRESERVATION_FLAGS_SELECTED 0x01
+#define I40E_NVM_PRESERVATION_FLAGS_ALL 0x02
+#define I40E_NVM_CON 0x0
+#define I40E_NVM_SNT 0x1
+#define I40E_NVM_LCB 0x2
+#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB)
+#define I40E_NVM_ERA 0x4
+#define I40E_NVM_CSUM 0x8
+#define I40E_NVM_AQE 0xe
+#define I40E_NVM_EXEC 0xf
#define I40E_NVM_ADAPT_SHIFT 16
#define I40E_NVM_ADAPT_MASK (0xffff << I40E_NVM_ADAPT_SHIFT)
@@ -611,6 +618,7 @@ struct i40e_hw {
/* state of nvm update process */
enum i40e_nvmupd_state nvmupd_state;
struct i40e_aq_desc nvm_wb_desc;
+ struct i40e_aq_desc nvm_aq_event_desc;
struct i40e_virt_mem nvm_buff;
bool nvm_release_on_done;
u16 nvm_wait_opcode;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
index 8b0d4b255dea..d1aab6b8bfb1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
@@ -837,10 +837,15 @@ i40e_status i40evf_asq_send_command(struct i40e_hw *hw,
/* update the error if time out occurred */
if ((!cmd_completed) &&
(!details->async && !details->postpone)) {
- i40e_debug(hw,
- I40E_DEBUG_AQ_MESSAGE,
- "AQTX: Writeback timeout.\n");
- status = I40E_ERR_ADMIN_QUEUE_TIMEOUT;
+ if (rd32(hw, hw->aq.asq.len) & I40E_VF_ATQLEN1_ATQCRIT_MASK) {
+ i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: AQ Critical error.\n");
+ status = I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR;
+ } else {
+ i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+ "AQTX: Writeback timeout.\n");
+ status = I40E_ERR_ADMIN_QUEUE_TIMEOUT;
+ }
}
asq_send_command_error:
@@ -901,7 +906,7 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw,
}
/* set next_to_use to head */
- ntu = (rd32(hw, hw->aq.arq.head) & I40E_VF_ARQH1_ARQH_MASK);
+ ntu = rd32(hw, hw->aq.arq.head) & I40E_VF_ARQH1_ARQH_MASK;
if (ntu == ntc) {
/* nothing to do - shouldn't need to update ring's values */
ret_code = I40E_ERR_ADMIN_QUEUE_NO_WORK;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 435a112d09f5..815de8d9c3fb 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -205,6 +205,7 @@ enum i40e_admin_queue_opc {
/* DCB commands */
i40e_aqc_opc_dcb_ignore_pfc = 0x0301,
i40e_aqc_opc_dcb_updated = 0x0302,
+ i40e_aqc_opc_set_dcb_parameters = 0x0303,
/* TX scheduler */
i40e_aqc_opc_configure_vsi_bw_limit = 0x0400,
@@ -2196,8 +2197,12 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
*/
struct i40e_aqc_nvm_update {
u8 command_flags;
-#define I40E_AQ_NVM_LAST_CMD 0x01
-#define I40E_AQ_NVM_FLASH_ONLY 0x80
+#define I40E_AQ_NVM_LAST_CMD 0x01
+#define I40E_AQ_NVM_FLASH_ONLY 0x80
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT 1
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_MASK 0x03
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_SELECTED 0x03
+#define I40E_AQ_NVM_PRESERVATION_FLAGS_ALL 0x01
u8 module_pointer;
__le16 length;
__le32 offset;
@@ -2457,6 +2462,17 @@ struct i40e_aqc_lldp_start {
I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_start);
+/* Set DCB (direct 0x0303) */
+struct i40e_aqc_set_dcb_parameters {
+ u8 command;
+#define I40E_AQ_DCB_SET_AGENT 0x1
+#define I40E_DCB_VALID 0x1
+ u8 valid_flags;
+ u8 reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_dcb_parameters);
+
/* Apply MIB changes (0x0A07)
* uses the generic struc as it contains no data
*/
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index a94648429a5b..67bf5cebb76f 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -284,6 +284,8 @@ const char *i40evf_stat_str(struct i40e_hw *hw, i40e_status stat_err)
return "I40E_NOT_SUPPORTED";
case I40E_ERR_FIRMWARE_API_VERSION:
return "I40E_ERR_FIRMWARE_API_VERSION";
+ case I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR:
+ return "I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR";
}
snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_status.h b/drivers/net/ethernet/intel/i40evf/i40e_status.h
index 7fa7a41915c1..5b222246e08b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_status.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_status.h
@@ -95,6 +95,7 @@ enum i40e_status_code {
I40E_ERR_NOT_READY = -63,
I40E_NOT_SUPPORTED = -64,
I40E_ERR_FIRMWARE_API_VERSION = -65,
+ I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR = -66,
};
#endif /* _I40E_STATUS_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 1ba29bb85b67..357d6051281f 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -148,6 +148,59 @@ u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw)
return 0;
}
+/**
+ * i40evf_detect_recover_hung - Function to detect and recover hung_queues
+ * @vsi: pointer to vsi struct with tx queues
+ *
+ * VSI has netdev and netdev has TX queues. This function is to check each of
+ * those TX queues if they are hung, trigger recovery by issuing SW interrupt.
+ **/
+void i40evf_detect_recover_hung(struct i40e_vsi *vsi)
+{
+ struct i40e_ring *tx_ring = NULL;
+ struct net_device *netdev;
+ unsigned int i;
+ int packets;
+
+ if (!vsi)
+ return;
+
+ if (test_bit(__I40E_VSI_DOWN, vsi->state))
+ return;
+
+ netdev = vsi->netdev;
+ if (!netdev)
+ return;
+
+ if (!netif_carrier_ok(netdev))
+ return;
+
+ for (i = 0; i < vsi->back->num_active_queues; i++) {
+ tx_ring = &vsi->back->tx_rings[i];
+ if (tx_ring && tx_ring->desc) {
+ /* If packet counter has not changed the queue is
+ * likely stalled, so force an interrupt for this
+ * queue.
+ *
+ * prev_pkt_ctr would be negative if there was no
+ * pending work.
+ */
+ packets = tx_ring->stats.packets & INT_MAX;
+ if (tx_ring->tx_stats.prev_pkt_ctr == packets) {
+ i40evf_force_wb(vsi, tx_ring->q_vector);
+ continue;
+ }
+
+ /* Memory barrier between read of packet count and call
+ * to i40evf_get_tx_pending()
+ */
+ smp_rmb();
+ tx_ring->tx_stats.prev_pkt_ctr =
+ i40evf_get_tx_pending(tx_ring, false) ? packets : -1;
+ }
+ }
+}
+
#define WB_STRIDE 4
/**
@@ -316,8 +369,7 @@ static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK; /* set noitr */
wr32(&vsi->back->hw,
- I40E_VFINT_DYN_CTLN1(q_vector->v_idx +
- vsi->base_vector - 1), val);
+ I40E_VFINT_DYN_CTLN1(q_vector->reg_idx), val);
q_vector->arm_wb_state = true;
}
@@ -336,7 +388,7 @@ void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
/* allow 00 to be written to the index */;
wr32(&vsi->back->hw,
- I40E_VFINT_DYN_CTLN1(q_vector->v_idx + vsi->base_vector - 1),
+ I40E_VFINT_DYN_CTLN1(q_vector->reg_idx),
val);
}
@@ -469,6 +521,7 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring)
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
+ tx_ring->tx_stats.prev_pkt_ctr = -1;
return 0;
err:
@@ -1444,12 +1497,9 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
struct i40e_hw *hw = &vsi->back->hw;
bool rx = false, tx = false;
u32 rxval, txval;
- int vector;
int idx = q_vector->v_idx;
int rx_itr_setting, tx_itr_setting;
- vector = (q_vector->v_idx + vsi->base_vector);
-
/* avoid dynamic calculation if in countdown mode OR if
* all dynamic is disabled
*/
@@ -1498,12 +1548,12 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
*/
rxval |= BIT(31);
/* don't check _DOWN because interrupt isn't being enabled */
- wr32(hw, INTREG(vector - 1), rxval);
+ wr32(hw, INTREG(q_vector->reg_idx), rxval);
}
enable_int:
if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- wr32(hw, INTREG(vector - 1), txval);
+ wr32(hw, INTREG(q_vector->reg_idx), txval);
if (q_vector->itr_countdown)
q_vector->itr_countdown--;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 8d26c85d12e1..e72f16b4555b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -313,6 +313,7 @@ struct i40e_tx_queue_stats {
u64 tx_done_old;
u64 tx_linearize;
u64 tx_force_wb;
+ int prev_pkt_ctr;
u64 tx_lost_interrupt;
};
@@ -467,6 +468,7 @@ void i40evf_free_rx_resources(struct i40e_ring *rx_ring);
int i40evf_napi_poll(struct napi_struct *napi, int budget);
void i40evf_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector);
u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw);
+void i40evf_detect_recover_hung(struct i40e_vsi *vsi);
int __i40evf_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40evf_chk_linearize(struct sk_buff *skb);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 6afc31616e04..54951c84a481 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -361,6 +361,7 @@ enum i40e_nvmupd_cmd {
I40E_NVMUPD_STATUS,
I40E_NVMUPD_EXEC_AQ,
I40E_NVMUPD_GET_AQ_RESULT,
+ I40E_NVMUPD_GET_AQ_EVENT,
};
enum i40e_nvmupd_state {
@@ -380,15 +381,21 @@ enum i40e_nvmupd_state {
#define I40E_NVM_MOD_PNT_MASK 0xFF
-#define I40E_NVM_TRANS_SHIFT 8
-#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT)
-#define I40E_NVM_CON 0x0
-#define I40E_NVM_SNT 0x1
-#define I40E_NVM_LCB 0x2
-#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB)
-#define I40E_NVM_ERA 0x4
-#define I40E_NVM_CSUM 0x8
-#define I40E_NVM_EXEC 0xf
+#define I40E_NVM_TRANS_SHIFT 8
+#define I40E_NVM_TRANS_MASK (0xf << I40E_NVM_TRANS_SHIFT)
+#define I40E_NVM_PRESERVATION_FLAGS_SHIFT 12
+#define I40E_NVM_PRESERVATION_FLAGS_MASK \
+ (0x3 << I40E_NVM_PRESERVATION_FLAGS_SHIFT)
+#define I40E_NVM_PRESERVATION_FLAGS_SELECTED 0x01
+#define I40E_NVM_PRESERVATION_FLAGS_ALL 0x02
+#define I40E_NVM_CON 0x0
+#define I40E_NVM_SNT 0x1
+#define I40E_NVM_LCB 0x2
+#define I40E_NVM_SA (I40E_NVM_SNT | I40E_NVM_LCB)
+#define I40E_NVM_ERA 0x4
+#define I40E_NVM_CSUM 0x8
+#define I40E_NVM_AQE 0xe
+#define I40E_NVM_EXEC 0xf
#define I40E_NVM_ADAPT_SHIFT 16
#define I40E_NVM_ADAPT_MASK (0xffff << I40E_NVM_ADAPT_SHIFT)
@@ -561,6 +568,7 @@ struct i40e_hw {
/* state of nvm update process */
enum i40e_nvmupd_state nvmupd_state;
struct i40e_aq_desc nvm_wb_desc;
+ struct i40e_aq_desc nvm_aq_event_desc;
struct i40e_virt_mem nvm_buff;
bool nvm_release_on_done;
u16 nvm_wait_opcode;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 47040ab2e298..9690c1ea019e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -114,14 +114,14 @@ struct i40e_q_vector {
struct i40evf_adapter *adapter;
struct i40e_vsi *vsi;
struct napi_struct napi;
- unsigned long reg_idx;
struct i40e_ring_container rx;
struct i40e_ring_container tx;
u32 ring_mask;
u8 num_ringpairs; /* total number of ring pairs in vector */
#define ITR_COUNTDOWN_START 100
u8 itr_countdown; /* when 0 or 1 update ITR */
- int v_idx; /* vector index in list */
+ u16 v_idx; /* index in the vsi->q_vector array. */
+ u16 reg_idx; /* register index of the interrupt */
char name[IFNAMSIZ + 15];
bool arm_wb_state;
cpumask_t affinity_mask;
@@ -187,6 +187,7 @@ enum i40evf_state_t {
enum i40evf_critical_section_t {
__I40EVF_IN_CRITICAL_TASK, /* cannot be interrupted */
__I40EVF_IN_CLIENT_TASK,
+ __I40EVF_IN_REMOVE_TASK, /* device being removed */
};
/* board specific private data structure */
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index da006fa3fec1..e2d8aa19d205 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -512,31 +512,31 @@ static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
struct ethtool_coalesce *ec,
int queue)
{
+ struct i40e_ring *rx_ring = &adapter->rx_rings[queue];
+ struct i40e_ring *tx_ring = &adapter->tx_rings[queue];
struct i40e_vsi *vsi = &adapter->vsi;
struct i40e_hw *hw = &adapter->hw;
struct i40e_q_vector *q_vector;
u16 vector;
- adapter->rx_rings[queue].rx_itr_setting = ec->rx_coalesce_usecs;
- adapter->tx_rings[queue].tx_itr_setting = ec->tx_coalesce_usecs;
+ rx_ring->rx_itr_setting = ec->rx_coalesce_usecs;
+ tx_ring->tx_itr_setting = ec->tx_coalesce_usecs;
- if (ec->use_adaptive_rx_coalesce)
- adapter->rx_rings[queue].rx_itr_setting |= I40E_ITR_DYNAMIC;
- else
- adapter->rx_rings[queue].rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+ rx_ring->rx_itr_setting |= I40E_ITR_DYNAMIC;
+ if (!ec->use_adaptive_rx_coalesce)
+ rx_ring->rx_itr_setting ^= I40E_ITR_DYNAMIC;
- if (ec->use_adaptive_tx_coalesce)
- adapter->tx_rings[queue].tx_itr_setting |= I40E_ITR_DYNAMIC;
- else
- adapter->tx_rings[queue].tx_itr_setting &= ~I40E_ITR_DYNAMIC;
+ tx_ring->tx_itr_setting |= I40E_ITR_DYNAMIC;
+ if (!ec->use_adaptive_tx_coalesce)
+ tx_ring->tx_itr_setting ^= I40E_ITR_DYNAMIC;
- q_vector = adapter->rx_rings[queue].q_vector;
- q_vector->rx.itr = ITR_TO_REG(adapter->rx_rings[queue].rx_itr_setting);
+ q_vector = rx_ring->q_vector;
+ q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
vector = vsi->base_vector + q_vector->v_idx;
wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
- q_vector = adapter->tx_rings[queue].q_vector;
- q_vector->tx.itr = ITR_TO_REG(adapter->tx_rings[queue].tx_itr_setting);
+ q_vector = tx_ring->q_vector;
+ q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
vector = vsi->base_vector + q_vector->v_idx;
wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index f92587aba3c7..16989ad2ca90 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -1387,6 +1387,7 @@ static int i40evf_alloc_q_vectors(struct i40evf_adapter *adapter)
q_vector->adapter = adapter;
q_vector->vsi = &adapter->vsi;
q_vector->v_idx = q_idx;
+ q_vector->reg_idx = q_idx;
cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
netif_napi_add(adapter->netdev, &q_vector->napi,
i40evf_napi_poll, NAPI_POLL_WEIGHT);
@@ -1716,6 +1717,8 @@ static void i40evf_watchdog_task(struct work_struct *work)
if (adapter->state == __I40EVF_RUNNING)
i40evf_request_stats(adapter);
watchdog_done:
+ if (adapter->state == __I40EVF_RUNNING)
+ i40evf_detect_recover_hung(&adapter->vsi);
clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
restart_watchdog:
if (adapter->state == __I40EVF_REMOVE)
@@ -1803,6 +1806,12 @@ static void i40evf_reset_task(struct work_struct *work)
int i = 0, err;
bool running;
+ /* When device is being removed it doesn't make sense to run the reset
+ * task, just return in such a case.
+ */
+ if (test_bit(__I40EVF_IN_REMOVE_TASK, &adapter->crit_section))
+ return;
+
while (test_and_set_bit(__I40EVF_IN_CLIENT_TASK,
&adapter->crit_section))
usleep_range(500, 1000);
@@ -2336,13 +2345,19 @@ static int i40evf_set_features(struct net_device *netdev,
{
struct i40evf_adapter *adapter = netdev_priv(netdev);
- if (!VLAN_ALLOWED(adapter))
+ /* Don't allow changing VLAN_RX flag when VLAN is set for VF
+ * and return an error in this case
+ */
+ if (VLAN_ALLOWED(adapter)) {
+ if (features & NETIF_F_HW_VLAN_CTAG_RX)
+ adapter->aq_required |=
+ I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
+ else
+ adapter->aq_required |=
+ I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
+ } else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) {
return -EINVAL;
-
- if (features & NETIF_F_HW_VLAN_CTAG_RX)
- adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_VLAN_STRIPPING;
- else
- adapter->aq_required |= I40EVF_FLAG_AQ_DISABLE_VLAN_STRIPPING;
+ }
return 0;
}
@@ -3053,7 +3068,8 @@ static void i40evf_remove(struct pci_dev *pdev)
struct i40evf_mac_filter *f, *ftmp;
struct i40e_hw *hw = &adapter->hw;
int err;
-
+ /* Indicate we are in remove and not to run reset_task */
+ set_bit(__I40EVF_IN_REMOVE_TASK, &adapter->crit_section);
cancel_delayed_work_sync(&adapter->init_task);
cancel_work_sync(&adapter->reset_task);
cancel_delayed_work_sync(&adapter->client_task);
@@ -3088,8 +3104,6 @@ static void i40evf_remove(struct pci_dev *pdev)
if (adapter->watchdog_timer.function)
del_timer_sync(&adapter->watchdog_timer);
- flush_scheduled_work();
-
i40evf_free_rss(adapter);
if (hw->aq.asq.count)
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index feb95b62a077..50ce0d6c09ef 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -1001,23 +1001,34 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
if (v_opcode == VIRTCHNL_OP_EVENT) {
struct virtchnl_pf_event *vpe =
(struct virtchnl_pf_event *)msg;
+ bool link_up = vpe->event_data.link_event.link_status;
switch (vpe->event) {
case VIRTCHNL_EVENT_LINK_CHANGE:
adapter->link_speed =
vpe->event_data.link_event.link_speed;
- if (adapter->link_up !=
- vpe->event_data.link_event.link_status) {
- adapter->link_up =
- vpe->event_data.link_event.link_status;
- if (adapter->link_up) {
- netif_tx_start_all_queues(netdev);
- netif_carrier_on(netdev);
- } else {
- netif_tx_stop_all_queues(netdev);
- netif_carrier_off(netdev);
- }
- i40evf_print_link_message(adapter);
+
+ /* we've already got the right link status, bail */
+ if (adapter->link_up == link_up)
+ break;
+
+ /* If we get link up message and start queues before
+ * our queues are configured it will trigger a TX hang.
+ * In that case, just ignore the link status message,
+ * we'll get another one after we enable queues and
+ * actually prepared to send traffic.
+ */
+ if (link_up && adapter->state != __I40EVF_RUNNING)
+ break;
+
+ adapter->link_up = link_up;
+ if (link_up) {
+ netif_tx_start_all_queues(netdev);
+ netif_carrier_on(netdev);
+ } else {
+ netif_tx_stop_all_queues(netdev);
+ netif_carrier_off(netdev);
}
+ i40evf_print_link_message(adapter);
break;
case VIRTCHNL_EVENT_RESET_IMPENDING:
dev_info(&adapter->pdev->dev, "PF reset warning received\n");
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 92845692087a..1c6b8d9176a8 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -690,6 +690,7 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
void igb_set_flag_queue_pairs(struct igb_adapter *, const u32);
+unsigned int igb_get_max_rss_queues(struct igb_adapter *);
#ifdef CONFIG_IGB_HWMON
void igb_sysfs_exit(struct igb_adapter *adapter);
int igb_sysfs_init(struct igb_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index d06a8db514d4..606e6761758f 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -3338,37 +3338,7 @@ static int igb_set_rxfh(struct net_device *netdev, const u32 *indir,
static unsigned int igb_max_channels(struct igb_adapter *adapter)
{
- struct e1000_hw *hw = &adapter->hw;
- unsigned int max_combined = 0;
-
- switch (hw->mac.type) {
- case e1000_i211:
- max_combined = IGB_MAX_RX_QUEUES_I211;
- break;
- case e1000_82575:
- case e1000_i210:
- max_combined = IGB_MAX_RX_QUEUES_82575;
- break;
- case e1000_i350:
- if (!!adapter->vfs_allocated_count) {
- max_combined = 1;
- break;
- }
- /* fall through */
- case e1000_82576:
- if (!!adapter->vfs_allocated_count) {
- max_combined = 2;
- break;
- }
- /* fall through */
- case e1000_82580:
- case e1000_i354:
- default:
- max_combined = IGB_MAX_RX_QUEUES;
- break;
- }
-
- return max_combined;
+ return igb_get_max_rss_queues(adapter);
}
static void igb_get_channels(struct net_device *netdev,
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index c208753ff5b7..b88fae785369 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -1744,6 +1744,20 @@ static void igb_configure_cbs(struct igb_adapter *adapter, int queue,
* value = idleSlope * 61034
* ----------------- (E6)
* 1000000
+ *
+ * NOTE: For i210, given the above, we can see that idleslope
+ * is represented in 16.38431 kbps units by the value at
+ * the TQAVCC register (1Gbps / 61034), which reduces
+ * the granularity for idleslope increments.
+ * For instance, if you want to configure a 2576kbps
+ * idleslope, the value to be written on the register
+ * would have to be 157.23. If rounded down, you end
+ * up with less bandwidth available than originally
+ * required (~2572 kbps). If rounded up, you end up
+ * with a higher bandwidth (~2589 kbps). Below the
+ * approach we take is to always round up the
+ * calculated value, so the resulting bandwidth might
+ * be slightly higher for some configurations.
*/
value = DIV_ROUND_UP_ULL(idleslope * 61034ULL, 1000000);
@@ -3200,8 +3214,6 @@ static int igb_enable_sriov(struct pci_dev *pdev, int num_vfs)
/* if allocation failed then we do not support SR-IOV */
if (!adapter->vf_data) {
adapter->vfs_allocated_count = 0;
- dev_err(&pdev->dev,
- "Unable to allocate memory for VF Data Storage\n");
err = -ENOMEM;
goto out;
}
@@ -3373,10 +3385,10 @@ static void igb_probe_vfs(struct igb_adapter *adapter)
#endif /* CONFIG_PCI_IOV */
}
-static void igb_init_queue_configuration(struct igb_adapter *adapter)
+unsigned int igb_get_max_rss_queues(struct igb_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
- u32 max_rss_queues;
+ unsigned int max_rss_queues;
/* Determine the maximum number of RSS queues supported. */
switch (hw->mac.type) {
@@ -3407,6 +3419,14 @@ static void igb_init_queue_configuration(struct igb_adapter *adapter)
break;
}
+ return max_rss_queues;
+}
+
+static void igb_init_queue_configuration(struct igb_adapter *adapter)
+{
+ u32 max_rss_queues;
+
+ max_rss_queues = igb_get_max_rss_queues(adapter);
adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
igb_set_flag_queue_pairs(adapter, max_rss_queues);
@@ -3676,7 +3696,7 @@ static int __igb_close(struct net_device *netdev, bool suspending)
int igb_close(struct net_device *netdev)
{
- if (netif_device_present(netdev))
+ if (netif_device_present(netdev) || netdev->dismantle)
return __igb_close(netdev, false);
return 0;
}
@@ -8718,7 +8738,8 @@ static void igb_rar_set_index(struct igb_adapter *adapter, u32 index)
/* Indicate to hardware the Address is Valid. */
if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE) {
- rar_high |= E1000_RAH_AV;
+ if (is_valid_ether_addr(addr))
+ rar_high |= E1000_RAH_AV;
if (hw->mac.type == e1000_82575)
rar_high |= E1000_RAH_POOL_1 *
@@ -8756,17 +8777,36 @@ static int igb_set_vf_mac(struct igb_adapter *adapter,
static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
{
struct igb_adapter *adapter = netdev_priv(netdev);
- if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
+
+ if (vf >= adapter->vfs_allocated_count)
+ return -EINVAL;
+
+ /* Setting the VF MAC to 0 reverts the IGB_VF_FLAG_PF_SET_MAC
+ * flag and allows to overwrite the MAC via VF netdev. This
+ * is necessary to allow libvirt a way to restore the original
+ * MAC after unbinding vfio-pci and reloading igbvf after shutting
+ * down a VM.
+ */
+ if (is_zero_ether_addr(mac)) {
+ adapter->vf_data[vf].flags &= ~IGB_VF_FLAG_PF_SET_MAC;
+ dev_info(&adapter->pdev->dev,
+ "remove administratively set MAC on VF %d\n",
+ vf);
+ } else if (is_valid_ether_addr(mac)) {
+ adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
+ dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n",
+ mac, vf);
+ dev_info(&adapter->pdev->dev,
+ "Reload the VF driver to make this change effective.");
+ /* Generate additional warning if PF is down */
+ if (test_bit(__IGB_DOWN, &adapter->state)) {
+ dev_warn(&adapter->pdev->dev,
+ "The VF MAC address has been set, but the PF device is not up.\n");
+ dev_warn(&adapter->pdev->dev,
+ "Bring the PF device up before attempting to use the VF device.\n");
+ }
+ } else {
return -EINVAL;
- adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
- dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
- dev_info(&adapter->pdev->dev,
- "Reload the VF driver to make this change effective.");
- if (test_bit(__IGB_DOWN, &adapter->state)) {
- dev_warn(&adapter->pdev->dev,
- "The VF MAC address has been set, but the PF device is not up.\n");
- dev_warn(&adapter->pdev->dev,
- "Bring the PF device up before attempting to use the VF device.\n");
}
return igb_set_vf_mac(adapter, vf, mac);
}
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 841c2a083349..0746b19ec6d3 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -643,6 +643,10 @@ static void igb_ptp_tx_work(struct work_struct *work)
adapter->ptp_tx_skb = NULL;
clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
adapter->tx_hwtstamp_timeouts++;
+ /* Clear the tx valid bit in TSYNCTXCTL register to enable
+ * interrupt
+ */
+ rd32(E1000_TXSTMPH);
dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
return;
}
@@ -717,6 +721,7 @@ void igb_ptp_rx_hang(struct igb_adapter *adapter)
*/
void igb_ptp_tx_hang(struct igb_adapter *adapter)
{
+ struct e1000_hw *hw = &adapter->hw;
bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
IGB_PTP_TX_TIMEOUT);
@@ -736,6 +741,10 @@ void igb_ptp_tx_hang(struct igb_adapter *adapter)
adapter->ptp_tx_skb = NULL;
clear_bit_unlock(__IGB_PTP_TX_IN_PROGRESS, &adapter->state);
adapter->tx_hwtstamp_timeouts++;
+ /* Clear the tx valid bit in TSYNCTXCTL register to enable
+ * interrupt
+ */
+ rd32(E1000_TXSTMPH);
dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
}
}
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index 35e6fa643c7e..8319465eb38d 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -42,3 +42,4 @@ ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o \
ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o
ixgbe-$(CONFIG_DEBUG_FS) += ixgbe_debugfs.o
ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o
+ixgbe-$(CONFIG_XFRM_OFFLOAD) += ixgbe_ipsec.o
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 49ab0c7a9cd5..c1e3a0039ea5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -52,6 +52,7 @@
#ifdef CONFIG_IXGBE_DCA
#include <linux/dca.h>
#endif
+#include "ixgbe_ipsec.h"
#include <net/xdp.h>
#include <net/busy_poll.h>
@@ -171,10 +172,11 @@ enum ixgbe_tx_flags {
IXGBE_TX_FLAGS_CC = 0x08,
IXGBE_TX_FLAGS_IPV4 = 0x10,
IXGBE_TX_FLAGS_CSUM = 0x20,
+ IXGBE_TX_FLAGS_IPSEC = 0x40,
/* software defined flags */
- IXGBE_TX_FLAGS_SW_VLAN = 0x40,
- IXGBE_TX_FLAGS_FCOE = 0x80,
+ IXGBE_TX_FLAGS_SW_VLAN = 0x80,
+ IXGBE_TX_FLAGS_FCOE = 0x100,
};
/* VLAN info */
@@ -629,15 +631,18 @@ struct ixgbe_adapter {
#define IXGBE_FLAG2_EEE_CAPABLE BIT(14)
#define IXGBE_FLAG2_EEE_ENABLED BIT(15)
#define IXGBE_FLAG2_RX_LEGACY BIT(16)
+#define IXGBE_FLAG2_IPSEC_ENABLED BIT(17)
/* Tx fast path data */
int num_tx_queues;
u16 tx_itr_setting;
u16 tx_work_limit;
+ u64 tx_ipsec;
/* Rx fast path data */
int num_rx_queues;
u16 rx_itr_setting;
+ u64 rx_ipsec;
/* Port number used to identify VXLAN traffic */
__be16 vxlan_port;
@@ -781,6 +786,10 @@ struct ixgbe_adapter {
#define IXGBE_RSS_KEY_SIZE 40 /* size of RSS Hash Key in bytes */
u32 *rss_key;
+
+#ifdef CONFIG_XFRM
+ struct ixgbe_ipsec *ipsec;
+#endif /* CONFIG_XFRM */
};
static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter)
@@ -1011,4 +1020,24 @@ void ixgbe_store_key(struct ixgbe_adapter *adapter);
void ixgbe_store_reta(struct ixgbe_adapter *adapter);
s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
+#ifdef CONFIG_XFRM_OFFLOAD
+void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter);
+void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter);
+void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter);
+void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb);
+int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring, struct ixgbe_tx_buffer *first,
+ struct ixgbe_ipsec_tx_data *itd);
+#else
+static inline void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter) { };
+static inline void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb) { };
+static inline int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
+ struct ixgbe_tx_buffer *first,
+ struct ixgbe_ipsec_tx_data *itd) { return 0; };
+#endif /* CONFIG_XFRM_OFFLOAD */
#endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 7ac7ef9b37ff..61188f343955 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -4087,7 +4087,7 @@ void ixgbe_get_oem_prod_version(struct ixgbe_hw *hw,
hw->eeprom.ops.read(hw, NVM_OEM_PROD_VER_PTR, &offset);
/* Return is offset to OEM Product Version block is invalid */
- if (offset == 0x0 && offset == NVM_INVALID_PTR)
+ if (offset == 0x0 || offset == NVM_INVALID_PTR)
return;
/* Read product version block */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index f064099733b6..221f15803480 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -115,6 +115,8 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
{"tx_hwtstamp_timeouts", IXGBE_STAT(tx_hwtstamp_timeouts)},
{"tx_hwtstamp_skipped", IXGBE_STAT(tx_hwtstamp_skipped)},
{"rx_hwtstamp_cleared", IXGBE_STAT(rx_hwtstamp_cleared)},
+ {"tx_ipsec", IXGBE_STAT(tx_ipsec)},
+ {"rx_ipsec", IXGBE_STAT(rx_ipsec)},
#ifdef IXGBE_FCOE
{"fcoe_bad_fccrc", IXGBE_STAT(stats.fccrc)},
{"rx_fcoe_dropped", IXGBE_STAT(stats.fcoerpdc)},
@@ -3083,26 +3085,9 @@ static int ixgbe_get_ts_info(struct net_device *dev,
case ixgbe_mac_X550EM_x:
case ixgbe_mac_x550em_a:
info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL);
- /* fallthrough */
+ break;
case ixgbe_mac_X540:
case ixgbe_mac_82599EB:
- info->so_timestamping =
- SOF_TIMESTAMPING_TX_SOFTWARE |
- SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE |
- SOF_TIMESTAMPING_TX_HARDWARE |
- SOF_TIMESTAMPING_RX_HARDWARE |
- SOF_TIMESTAMPING_RAW_HARDWARE;
-
- if (adapter->ptp_clock)
- info->phc_index = ptp_clock_index(adapter->ptp_clock);
- else
- info->phc_index = -1;
-
- info->tx_types =
- BIT(HWTSTAMP_TX_OFF) |
- BIT(HWTSTAMP_TX_ON);
-
info->rx_filters |=
BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
@@ -3111,6 +3096,24 @@ static int ixgbe_get_ts_info(struct net_device *dev,
default:
return ethtool_op_get_ts_info(dev, info);
}
+
+ info->so_timestamping =
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ if (adapter->ptp_clock)
+ info->phc_index = ptp_clock_index(adapter->ptp_clock);
+ else
+ info->phc_index = -1;
+
+ info->tx_types =
+ BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ON);
+
return 0;
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
new file mode 100644
index 000000000000..93eacddb6704
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -0,0 +1,941 @@
+/*******************************************************************************
+ *
+ * Intel 10 Gigabit PCI Express Linux driver
+ * Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * Linux NICS <linux.nics@intel.com>
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "ixgbe.h"
+#include <net/xfrm.h>
+#include <crypto/aead.h>
+
+/**
+ * ixgbe_ipsec_set_tx_sa - set the Tx SA registers
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @key: key byte array
+ * @salt: salt bytes
+ **/
+static void ixgbe_ipsec_set_tx_sa(struct ixgbe_hw *hw, u16 idx,
+ u32 key[], u32 salt)
+{
+ u32 reg;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXKEY(i), cpu_to_be32(key[3 - i]));
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXSALT, cpu_to_be32(salt));
+ IXGBE_WRITE_FLUSH(hw);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_IPSTXIDX);
+ reg &= IXGBE_RXTXIDX_IPS_EN;
+ reg |= idx << IXGBE_RXTXIDX_IDX_SHIFT | IXGBE_RXTXIDX_WRITE;
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, reg);
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_item - set an Rx table item
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @tbl: table selector
+ *
+ * Trigger the device to store into a particular Rx table the
+ * data that has already been loaded into the input register
+ **/
+static void ixgbe_ipsec_set_rx_item(struct ixgbe_hw *hw, u16 idx,
+ enum ixgbe_ipsec_tbl_sel tbl)
+{
+ u32 reg;
+
+ reg = IXGBE_READ_REG(hw, IXGBE_IPSRXIDX);
+ reg &= IXGBE_RXTXIDX_IPS_EN;
+ reg |= tbl << IXGBE_RXIDX_TBL_SHIFT |
+ idx << IXGBE_RXTXIDX_IDX_SHIFT |
+ IXGBE_RXTXIDX_WRITE;
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, reg);
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_sa - set up the register bits to save SA info
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @spi: security parameter index
+ * @key: key byte array
+ * @salt: salt bytes
+ * @mode: rx decrypt control bits
+ * @ip_idx: index into IP table for related IP address
+ **/
+static void ixgbe_ipsec_set_rx_sa(struct ixgbe_hw *hw, u16 idx, __be32 spi,
+ u32 key[], u32 salt, u32 mode, u32 ip_idx)
+{
+ int i;
+
+ /* store the SPI (in bigendian) and IPidx */
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXSPI, cpu_to_le32(spi));
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPIDX, ip_idx);
+ IXGBE_WRITE_FLUSH(hw);
+
+ ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_spi_tbl);
+
+ /* store the key, salt, and mode */
+ for (i = 0; i < 4; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXKEY(i), cpu_to_be32(key[3 - i]));
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXSALT, cpu_to_be32(salt));
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXMOD, mode);
+ IXGBE_WRITE_FLUSH(hw);
+
+ ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_key_tbl);
+}
+
+/**
+ * ixgbe_ipsec_set_rx_ip - set up the register bits to save SA IP addr info
+ * @hw: hw specific details
+ * @idx: register index to write
+ * @addr: IP address byte array
+ **/
+static void ixgbe_ipsec_set_rx_ip(struct ixgbe_hw *hw, u16 idx, __be32 addr[])
+{
+ int i;
+
+ /* store the ip address */
+ for (i = 0; i < 4; i++)
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIPADDR(i), cpu_to_le32(addr[i]));
+ IXGBE_WRITE_FLUSH(hw);
+
+ ixgbe_ipsec_set_rx_item(hw, idx, ips_rx_ip_tbl);
+}
+
+/**
+ * ixgbe_ipsec_clear_hw_tables - because some tables don't get cleared on reset
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_clear_hw_tables(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 buf[4] = {0, 0, 0, 0};
+ u16 idx;
+
+ /* disable Rx and Tx SA lookup */
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0);
+
+ /* scrub the tables - split the loops for the max of the IP table */
+ for (idx = 0; idx < IXGBE_IPSEC_MAX_RX_IP_COUNT; idx++) {
+ ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
+ ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
+ ixgbe_ipsec_set_rx_ip(hw, idx, (__be32 *)buf);
+ }
+ for (; idx < IXGBE_IPSEC_MAX_SA_COUNT; idx++) {
+ ixgbe_ipsec_set_tx_sa(hw, idx, buf, 0);
+ ixgbe_ipsec_set_rx_sa(hw, idx, 0, buf, 0, 0, 0);
+ }
+
+ ipsec->num_rx_sa = 0;
+ ipsec->num_tx_sa = 0;
+}
+
+/**
+ * ixgbe_ipsec_stop_data
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_stop_data(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ bool link = adapter->link_up;
+ u32 t_rdy, r_rdy;
+ u32 limit;
+ u32 reg;
+
+ /* halt data paths */
+ reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+ reg |= IXGBE_SECTXCTRL_TX_DIS;
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+ reg |= IXGBE_SECRXCTRL_RX_DIS;
+ IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg);
+
+ IXGBE_WRITE_FLUSH(hw);
+
+ /* If the tx fifo doesn't have link, but still has data,
+ * we can't clear the tx sec block. Set the MAC loopback
+ * before block clear
+ */
+ if (!link) {
+ reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+ reg |= IXGBE_MACC_FLU;
+ IXGBE_WRITE_REG(hw, IXGBE_MACC, reg);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+ reg |= IXGBE_HLREG0_LPBK;
+ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+ IXGBE_WRITE_FLUSH(hw);
+ mdelay(3);
+ }
+
+ /* wait for the paths to empty */
+ limit = 20;
+ do {
+ mdelay(10);
+ t_rdy = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT) &
+ IXGBE_SECTXSTAT_SECTX_RDY;
+ r_rdy = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT) &
+ IXGBE_SECRXSTAT_SECRX_RDY;
+ } while (!t_rdy && !r_rdy && limit--);
+
+ /* undo loopback if we played with it earlier */
+ if (!link) {
+ reg = IXGBE_READ_REG(hw, IXGBE_MACC);
+ reg &= ~IXGBE_MACC_FLU;
+ IXGBE_WRITE_REG(hw, IXGBE_MACC, reg);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
+ reg &= ~IXGBE_HLREG0_LPBK;
+ IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg);
+
+ IXGBE_WRITE_FLUSH(hw);
+ }
+}
+
+/**
+ * ixgbe_ipsec_stop_engine
+ * @adapter: board private structure
+ **/
+static void ixgbe_ipsec_stop_engine(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 reg;
+
+ ixgbe_ipsec_stop_data(adapter);
+
+ /* disable Rx and Tx SA lookup */
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, 0);
+
+ /* disable the Rx and Tx engines and full packet store-n-forward */
+ reg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL);
+ reg |= IXGBE_SECTXCTRL_SECTX_DIS;
+ reg &= ~IXGBE_SECTXCTRL_STORE_FORWARD;
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, reg);
+
+ reg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL);
+ reg |= IXGBE_SECRXCTRL_SECRX_DIS;
+ IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, reg);
+
+ /* restore the "tx security buffer almost full threshold" to 0x250 */
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, 0x250);
+
+ /* Set minimum IFG between packets back to the default 0x1 */
+ reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+ reg = (reg & 0xfffffff0) | 0x1;
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+ /* final set for normal (no ipsec offload) processing */
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_SECTX_DIS);
+ IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, IXGBE_SECRXCTRL_SECRX_DIS);
+
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_start_engine
+ * @adapter: board private structure
+ *
+ * NOTE: this increases power consumption whether being used or not
+ **/
+static void ixgbe_ipsec_start_engine(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 reg;
+
+ ixgbe_ipsec_stop_data(adapter);
+
+ /* Set minimum IFG between packets to 3 */
+ reg = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG);
+ reg = (reg & 0xfffffff0) | 0x3;
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, reg);
+
+ /* Set "tx security buffer almost full threshold" to 0x15 so that the
+ * almost full indication is generated only after buffer contains at
+ * least an entire jumbo packet.
+ */
+ reg = IXGBE_READ_REG(hw, IXGBE_SECTXBUFFAF);
+ reg = (reg & 0xfffffc00) | 0x15;
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXBUFFAF, reg);
+
+ /* restart the data paths by clearing the DISABLE bits */
+ IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, 0);
+ IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, IXGBE_SECTXCTRL_STORE_FORWARD);
+
+ /* enable Rx and Tx SA lookup */
+ IXGBE_WRITE_REG(hw, IXGBE_IPSTXIDX, IXGBE_RXTXIDX_IPS_EN);
+ IXGBE_WRITE_REG(hw, IXGBE_IPSRXIDX, IXGBE_RXTXIDX_IPS_EN);
+
+ IXGBE_WRITE_FLUSH(hw);
+}
+
+/**
+ * ixgbe_ipsec_restore - restore the ipsec HW settings after a reset
+ * @adapter: board private structure
+ **/
+void ixgbe_ipsec_restore(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int i;
+
+ if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED))
+ return;
+
+ /* clean up and restart the engine */
+ ixgbe_ipsec_stop_engine(adapter);
+ ixgbe_ipsec_clear_hw_tables(adapter);
+ ixgbe_ipsec_start_engine(adapter);
+
+ /* reload the IP addrs */
+ for (i = 0; i < IXGBE_IPSEC_MAX_RX_IP_COUNT; i++) {
+ struct rx_ip_sa *ipsa = &ipsec->ip_tbl[i];
+
+ if (ipsa->used)
+ ixgbe_ipsec_set_rx_ip(hw, i, ipsa->ipaddr);
+ }
+
+ /* reload the Rx and Tx keys */
+ for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+ struct rx_sa *rsa = &ipsec->rx_tbl[i];
+ struct tx_sa *tsa = &ipsec->tx_tbl[i];
+
+ if (rsa->used)
+ ixgbe_ipsec_set_rx_sa(hw, i, rsa->xs->id.spi,
+ rsa->key, rsa->salt,
+ rsa->mode, rsa->iptbl_ind);
+
+ if (tsa->used)
+ ixgbe_ipsec_set_tx_sa(hw, i, tsa->key, tsa->salt);
+ }
+}
+
+/**
+ * ixgbe_ipsec_find_empty_idx - find the first unused security parameter index
+ * @ipsec: pointer to ipsec struct
+ * @rxtable: true if we need to look in the Rx table
+ *
+ * Returns the first unused index in either the Rx or Tx SA table
+ **/
+static int ixgbe_ipsec_find_empty_idx(struct ixgbe_ipsec *ipsec, bool rxtable)
+{
+ u32 i;
+
+ if (rxtable) {
+ if (ipsec->num_rx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+ return -ENOSPC;
+
+ /* search rx sa table */
+ for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+ if (!ipsec->rx_tbl[i].used)
+ return i;
+ }
+ } else {
+ if (ipsec->num_tx_sa == IXGBE_IPSEC_MAX_SA_COUNT)
+ return -ENOSPC;
+
+ /* search tx sa table */
+ for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT; i++) {
+ if (!ipsec->tx_tbl[i].used)
+ return i;
+ }
+ }
+
+ return -ENOSPC;
+}
+
+/**
+ * ixgbe_ipsec_find_rx_state - find the state that matches
+ * @ipsec: pointer to ipsec struct
+ * @daddr: inbound address to match
+ * @proto: protocol to match
+ * @spi: SPI to match
+ * @ip4: true if using an ipv4 address
+ *
+ * Returns a pointer to the matching SA state information
+ **/
+static struct xfrm_state *ixgbe_ipsec_find_rx_state(struct ixgbe_ipsec *ipsec,
+ __be32 *daddr, u8 proto,
+ __be32 spi, bool ip4)
+{
+ struct rx_sa *rsa;
+ struct xfrm_state *ret = NULL;
+
+ rcu_read_lock();
+ hash_for_each_possible_rcu(ipsec->rx_sa_list, rsa, hlist, spi)
+ if (spi == rsa->xs->id.spi &&
+ ((ip4 && *daddr == rsa->xs->id.daddr.a4) ||
+ (!ip4 && !memcmp(daddr, &rsa->xs->id.daddr.a6,
+ sizeof(rsa->xs->id.daddr.a6)))) &&
+ proto == rsa->xs->id.proto) {
+ ret = rsa->xs;
+ xfrm_state_hold(ret);
+ break;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+/**
+ * ixgbe_ipsec_parse_proto_keys - find the key and salt based on the protocol
+ * @xs: pointer to xfrm_state struct
+ * @mykey: pointer to key array to populate
+ * @mysalt: pointer to salt value to populate
+ *
+ * This copies the protocol keys and salt to our own data tables. The
+ * 82599 family only supports the one algorithm.
+ **/
+static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
+ u32 *mykey, u32 *mysalt)
+{
+ struct net_device *dev = xs->xso.dev;
+ unsigned char *key_data;
+ char *alg_name = NULL;
+ const char aes_gcm_name[] = "rfc4106(gcm(aes))";
+ int key_len;
+
+ if (xs->aead) {
+ key_data = &xs->aead->alg_key[0];
+ key_len = xs->aead->alg_key_len;
+ alg_name = xs->aead->alg_name;
+ } else {
+ netdev_err(dev, "Unsupported IPsec algorithm\n");
+ return -EINVAL;
+ }
+
+ if (strcmp(alg_name, aes_gcm_name)) {
+ netdev_err(dev, "Unsupported IPsec algorithm - please use %s\n",
+ aes_gcm_name);
+ return -EINVAL;
+ }
+
+ /* The key bytes come down in a bigendian array of bytes, so
+ * we don't need to do any byteswapping.
+ * 160 accounts for 16 byte key and 4 byte salt
+ */
+ if (key_len == 160) {
+ *mysalt = ((u32 *)key_data)[4];
+ } else if (key_len != 128) {
+ netdev_err(dev, "IPsec hw offload only supports keys up to 128 bits with a 32 bit salt\n");
+ return -EINVAL;
+ } else {
+ netdev_info(dev, "IPsec hw offload parameters missing 32 bit salt value\n");
+ *mysalt = 0;
+ }
+ memcpy(mykey, key_data, 16);
+
+ return 0;
+}
+
+/**
+ * ixgbe_ipsec_add_sa - program device with a security association
+ * @xs: pointer to transformer state struct
+ **/
+static int ixgbe_ipsec_add_sa(struct xfrm_state *xs)
+{
+ struct net_device *dev = xs->xso.dev;
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int checked, match, first;
+ u16 sa_idx;
+ int ret;
+ int i;
+
+ if (xs->id.proto != IPPROTO_ESP && xs->id.proto != IPPROTO_AH) {
+ netdev_err(dev, "Unsupported protocol 0x%04x for ipsec offload\n",
+ xs->id.proto);
+ return -EINVAL;
+ }
+
+ if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+ struct rx_sa rsa;
+
+ if (xs->calg) {
+ netdev_err(dev, "Compression offload not supported\n");
+ return -EINVAL;
+ }
+
+ /* find the first unused index */
+ ret = ixgbe_ipsec_find_empty_idx(ipsec, true);
+ if (ret < 0) {
+ netdev_err(dev, "No space for SA in Rx table!\n");
+ return ret;
+ }
+ sa_idx = (u16)ret;
+
+ memset(&rsa, 0, sizeof(rsa));
+ rsa.used = true;
+ rsa.xs = xs;
+
+ if (rsa.xs->id.proto & IPPROTO_ESP)
+ rsa.decrypt = xs->ealg || xs->aead;
+
+ /* get the key and salt */
+ ret = ixgbe_ipsec_parse_proto_keys(xs, rsa.key, &rsa.salt);
+ if (ret) {
+ netdev_err(dev, "Failed to get key data for Rx SA table\n");
+ return ret;
+ }
+
+ /* get ip for rx sa table */
+ if (xs->props.family == AF_INET6)
+ memcpy(rsa.ipaddr, &xs->id.daddr.a6, 16);
+ else
+ memcpy(&rsa.ipaddr[3], &xs->id.daddr.a4, 4);
+
+ /* The HW does not have a 1:1 mapping from keys to IP addrs, so
+ * check for a matching IP addr entry in the table. If the addr
+ * already exists, use it; else find an unused slot and add the
+ * addr. If one does not exist and there are no unused table
+ * entries, fail the request.
+ */
+
+ /* Find an existing match or first not used, and stop looking
+ * after we've checked all we know we have.
+ */
+ checked = 0;
+ match = -1;
+ first = -1;
+ for (i = 0;
+ i < IXGBE_IPSEC_MAX_RX_IP_COUNT &&
+ (checked < ipsec->num_rx_sa || first < 0);
+ i++) {
+ if (ipsec->ip_tbl[i].used) {
+ if (!memcmp(ipsec->ip_tbl[i].ipaddr,
+ rsa.ipaddr, sizeof(rsa.ipaddr))) {
+ match = i;
+ break;
+ }
+ checked++;
+ } else if (first < 0) {
+ first = i; /* track the first empty seen */
+ }
+ }
+
+ if (ipsec->num_rx_sa == 0)
+ first = 0;
+
+ if (match >= 0) {
+ /* addrs are the same, we should use this one */
+ rsa.iptbl_ind = match;
+ ipsec->ip_tbl[match].ref_cnt++;
+
+ } else if (first >= 0) {
+ /* no matches, but here's an empty slot */
+ rsa.iptbl_ind = first;
+
+ memcpy(ipsec->ip_tbl[first].ipaddr,
+ rsa.ipaddr, sizeof(rsa.ipaddr));
+ ipsec->ip_tbl[first].ref_cnt = 1;
+ ipsec->ip_tbl[first].used = true;
+
+ ixgbe_ipsec_set_rx_ip(hw, rsa.iptbl_ind, rsa.ipaddr);
+
+ } else {
+ /* no match and no empty slot */
+ netdev_err(dev, "No space for SA in Rx IP SA table\n");
+ memset(&rsa, 0, sizeof(rsa));
+ return -ENOSPC;
+ }
+
+ rsa.mode = IXGBE_RXMOD_VALID;
+ if (rsa.xs->id.proto & IPPROTO_ESP)
+ rsa.mode |= IXGBE_RXMOD_PROTO_ESP;
+ if (rsa.decrypt)
+ rsa.mode |= IXGBE_RXMOD_DECRYPT;
+ if (rsa.xs->props.family == AF_INET6)
+ rsa.mode |= IXGBE_RXMOD_IPV6;
+
+ /* the preparations worked, so save the info */
+ memcpy(&ipsec->rx_tbl[sa_idx], &rsa, sizeof(rsa));
+
+ ixgbe_ipsec_set_rx_sa(hw, sa_idx, rsa.xs->id.spi, rsa.key,
+ rsa.salt, rsa.mode, rsa.iptbl_ind);
+ xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_RX_INDEX;
+
+ ipsec->num_rx_sa++;
+
+ /* hash the new entry for faster search in Rx path */
+ hash_add_rcu(ipsec->rx_sa_list, &ipsec->rx_tbl[sa_idx].hlist,
+ rsa.xs->id.spi);
+ } else {
+ struct tx_sa tsa;
+
+ /* find the first unused index */
+ ret = ixgbe_ipsec_find_empty_idx(ipsec, false);
+ if (ret < 0) {
+ netdev_err(dev, "No space for SA in Tx table\n");
+ return ret;
+ }
+ sa_idx = (u16)ret;
+
+ memset(&tsa, 0, sizeof(tsa));
+ tsa.used = true;
+ tsa.xs = xs;
+
+ if (xs->id.proto & IPPROTO_ESP)
+ tsa.encrypt = xs->ealg || xs->aead;
+
+ ret = ixgbe_ipsec_parse_proto_keys(xs, tsa.key, &tsa.salt);
+ if (ret) {
+ netdev_err(dev, "Failed to get key data for Tx SA table\n");
+ memset(&tsa, 0, sizeof(tsa));
+ return ret;
+ }
+
+ /* the preparations worked, so save the info */
+ memcpy(&ipsec->tx_tbl[sa_idx], &tsa, sizeof(tsa));
+
+ ixgbe_ipsec_set_tx_sa(hw, sa_idx, tsa.key, tsa.salt);
+
+ xs->xso.offload_handle = sa_idx + IXGBE_IPSEC_BASE_TX_INDEX;
+
+ ipsec->num_tx_sa++;
+ }
+
+ /* enable the engine if not already warmed up */
+ if (!(adapter->flags2 & IXGBE_FLAG2_IPSEC_ENABLED)) {
+ ixgbe_ipsec_start_engine(adapter);
+ adapter->flags2 |= IXGBE_FLAG2_IPSEC_ENABLED;
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_ipsec_del_sa - clear out this specific SA
+ * @xs: pointer to transformer state struct
+ **/
+static void ixgbe_ipsec_del_sa(struct xfrm_state *xs)
+{
+ struct net_device *dev = xs->xso.dev;
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct ixgbe_hw *hw = &adapter->hw;
+ u32 zerobuf[4] = {0, 0, 0, 0};
+ u16 sa_idx;
+
+ if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
+ struct rx_sa *rsa;
+ u8 ipi;
+
+ sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_RX_INDEX;
+ rsa = &ipsec->rx_tbl[sa_idx];
+
+ if (!rsa->used) {
+ netdev_err(dev, "Invalid Rx SA selected sa_idx=%d offload_handle=%lu\n",
+ sa_idx, xs->xso.offload_handle);
+ return;
+ }
+
+ ixgbe_ipsec_set_rx_sa(hw, sa_idx, 0, zerobuf, 0, 0, 0);
+ hash_del_rcu(&rsa->hlist);
+
+ /* if the IP table entry is referenced by only this SA,
+ * i.e. ref_cnt is only 1, clear the IP table entry as well
+ */
+ ipi = rsa->iptbl_ind;
+ if (ipsec->ip_tbl[ipi].ref_cnt > 0) {
+ ipsec->ip_tbl[ipi].ref_cnt--;
+
+ if (!ipsec->ip_tbl[ipi].ref_cnt) {
+ memset(&ipsec->ip_tbl[ipi], 0,
+ sizeof(struct rx_ip_sa));
+ ixgbe_ipsec_set_rx_ip(hw, ipi, zerobuf);
+ }
+ }
+
+ memset(rsa, 0, sizeof(struct rx_sa));
+ ipsec->num_rx_sa--;
+ } else {
+ sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+
+ if (!ipsec->tx_tbl[sa_idx].used) {
+ netdev_err(dev, "Invalid Tx SA selected sa_idx=%d offload_handle=%lu\n",
+ sa_idx, xs->xso.offload_handle);
+ return;
+ }
+
+ ixgbe_ipsec_set_tx_sa(hw, sa_idx, zerobuf, 0);
+ memset(&ipsec->tx_tbl[sa_idx], 0, sizeof(struct tx_sa));
+ ipsec->num_tx_sa--;
+ }
+
+ /* if there are no SAs left, stop the engine to save energy */
+ if (ipsec->num_rx_sa == 0 && ipsec->num_tx_sa == 0) {
+ adapter->flags2 &= ~IXGBE_FLAG2_IPSEC_ENABLED;
+ ixgbe_ipsec_stop_engine(adapter);
+ }
+}
+
+/**
+ * ixgbe_ipsec_offload_ok - can this packet use the xfrm hw offload
+ * @skb: current data packet
+ * @xs: pointer to transformer state struct
+ **/
+static bool ixgbe_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
+{
+ if (xs->props.family == AF_INET) {
+ /* Offload with IPv4 options is not supported yet */
+ if (ip_hdr(skb)->ihl != 5)
+ return false;
+ } else {
+ /* Offload with IPv6 extension headers is not support yet */
+ if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ixgbe_ipsec_free - called by xfrm garbage collections
+ * @xs: pointer to transformer state struct
+ *
+ * We don't have any garbage to collect, so we shouldn't bother
+ * implementing this function, but the XFRM code doesn't check for
+ * existence before calling the API callback.
+ **/
+static void ixgbe_ipsec_free(struct xfrm_state *xs)
+{
+}
+
+static const struct xfrmdev_ops ixgbe_xfrmdev_ops = {
+ .xdo_dev_state_add = ixgbe_ipsec_add_sa,
+ .xdo_dev_state_delete = ixgbe_ipsec_del_sa,
+ .xdo_dev_offload_ok = ixgbe_ipsec_offload_ok,
+ .xdo_dev_state_free = ixgbe_ipsec_free,
+};
+
+/**
+ * ixgbe_ipsec_tx - setup Tx flags for ipsec offload
+ * @tx_ring: outgoing context
+ * @first: current data packet
+ * @itd: ipsec Tx data for later use in building context descriptor
+ **/
+int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
+ struct ixgbe_tx_buffer *first,
+ struct ixgbe_ipsec_tx_data *itd)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev);
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct xfrm_state *xs;
+ struct tx_sa *tsa;
+
+ if (unlikely(!first->skb->sp->len)) {
+ netdev_err(tx_ring->netdev, "%s: no xfrm state len = %d\n",
+ __func__, first->skb->sp->len);
+ return 0;
+ }
+
+ xs = xfrm_input_state(first->skb);
+ if (unlikely(!xs)) {
+ netdev_err(tx_ring->netdev, "%s: no xfrm_input_state() xs = %p\n",
+ __func__, xs);
+ return 0;
+ }
+
+ itd->sa_idx = xs->xso.offload_handle - IXGBE_IPSEC_BASE_TX_INDEX;
+ if (unlikely(itd->sa_idx > IXGBE_IPSEC_MAX_SA_COUNT)) {
+ netdev_err(tx_ring->netdev, "%s: bad sa_idx=%d handle=%lu\n",
+ __func__, itd->sa_idx, xs->xso.offload_handle);
+ return 0;
+ }
+
+ tsa = &ipsec->tx_tbl[itd->sa_idx];
+ if (unlikely(!tsa->used)) {
+ netdev_err(tx_ring->netdev, "%s: unused sa_idx=%d\n",
+ __func__, itd->sa_idx);
+ return 0;
+ }
+
+ first->tx_flags |= IXGBE_TX_FLAGS_IPSEC | IXGBE_TX_FLAGS_CC;
+
+ itd->flags = 0;
+ if (xs->id.proto == IPPROTO_ESP) {
+ itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP |
+ IXGBE_ADVTXD_TUCMD_L4T_TCP;
+ if (first->protocol == htons(ETH_P_IP))
+ itd->flags |= IXGBE_ADVTXD_TUCMD_IPV4;
+ itd->trailer_len = xs->props.trailer_len;
+ }
+ if (tsa->encrypt)
+ itd->flags |= IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN;
+
+ return 1;
+}
+
+/**
+ * ixgbe_ipsec_rx - decode ipsec bits from Rx descriptor
+ * @rx_ring: receiving ring
+ * @rx_desc: receive data descriptor
+ * @skb: current data packet
+ *
+ * Determine if there was an ipsec encapsulation noticed, and if so set up
+ * the resulting status for later in the receive stack.
+ **/
+void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
+ union ixgbe_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(rx_ring->netdev);
+ __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
+ __le16 ipsec_pkt_types = cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH |
+ IXGBE_RXDADV_PKTTYPE_IPSEC_ESP);
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+ struct xfrm_offload *xo = NULL;
+ struct xfrm_state *xs = NULL;
+ struct ipv6hdr *ip6 = NULL;
+ struct iphdr *ip4 = NULL;
+ void *daddr;
+ __be32 spi;
+ u8 *c_hdr;
+ u8 proto;
+
+ /* Find the ip and crypto headers in the data.
+ * We can assume no vlan header in the way, b/c the
+ * hw won't recognize the IPsec packet and anyway the
+ * currently vlan device doesn't support xfrm offload.
+ */
+ if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV4)) {
+ ip4 = (struct iphdr *)(skb->data + ETH_HLEN);
+ daddr = &ip4->daddr;
+ c_hdr = (u8 *)ip4 + ip4->ihl * 4;
+ } else if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPV6)) {
+ ip6 = (struct ipv6hdr *)(skb->data + ETH_HLEN);
+ daddr = &ip6->daddr;
+ c_hdr = (u8 *)ip6 + sizeof(struct ipv6hdr);
+ } else {
+ return;
+ }
+
+ switch (pkt_info & ipsec_pkt_types) {
+ case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH):
+ spi = ((struct ip_auth_hdr *)c_hdr)->spi;
+ proto = IPPROTO_AH;
+ break;
+ case cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_ESP):
+ spi = ((struct ip_esp_hdr *)c_hdr)->spi;
+ proto = IPPROTO_ESP;
+ break;
+ default:
+ return;
+ }
+
+ xs = ixgbe_ipsec_find_rx_state(ipsec, daddr, proto, spi, !!ip4);
+ if (unlikely(!xs))
+ return;
+
+ skb->sp = secpath_dup(skb->sp);
+ if (unlikely(!skb->sp))
+ return;
+
+ skb->sp->xvec[skb->sp->len++] = xs;
+ skb->sp->olen++;
+ xo = xfrm_offload(skb);
+ xo->flags = CRYPTO_DONE;
+ xo->status = CRYPTO_SUCCESS;
+
+ adapter->rx_ipsec++;
+}
+
+/**
+ * ixgbe_init_ipsec_offload - initialize security registers for IPSec operation
+ * @adapter: board private structure
+ **/
+void ixgbe_init_ipsec_offload(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_ipsec *ipsec;
+ size_t size;
+
+ if (adapter->hw.mac.type == ixgbe_mac_82598EB)
+ return;
+
+ ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
+ if (!ipsec)
+ goto err1;
+ hash_init(ipsec->rx_sa_list);
+
+ size = sizeof(struct rx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+ ipsec->rx_tbl = kzalloc(size, GFP_KERNEL);
+ if (!ipsec->rx_tbl)
+ goto err2;
+
+ size = sizeof(struct tx_sa) * IXGBE_IPSEC_MAX_SA_COUNT;
+ ipsec->tx_tbl = kzalloc(size, GFP_KERNEL);
+ if (!ipsec->tx_tbl)
+ goto err2;
+
+ size = sizeof(struct rx_ip_sa) * IXGBE_IPSEC_MAX_RX_IP_COUNT;
+ ipsec->ip_tbl = kzalloc(size, GFP_KERNEL);
+ if (!ipsec->ip_tbl)
+ goto err2;
+
+ ipsec->num_rx_sa = 0;
+ ipsec->num_tx_sa = 0;
+
+ adapter->ipsec = ipsec;
+ ixgbe_ipsec_stop_engine(adapter);
+ ixgbe_ipsec_clear_hw_tables(adapter);
+
+ adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
+ adapter->netdev->features |= NETIF_F_HW_ESP;
+ adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+ return;
+
+err2:
+ kfree(ipsec->ip_tbl);
+ kfree(ipsec->rx_tbl);
+ kfree(ipsec->tx_tbl);
+err1:
+ kfree(adapter->ipsec);
+ netdev_err(adapter->netdev, "Unable to allocate memory for SA tables");
+}
+
+/**
+ * ixgbe_stop_ipsec_offload - tear down the ipsec offload
+ * @adapter: board private structure
+ **/
+void ixgbe_stop_ipsec_offload(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_ipsec *ipsec = adapter->ipsec;
+
+ adapter->ipsec = NULL;
+ if (ipsec) {
+ kfree(ipsec->ip_tbl);
+ kfree(ipsec->rx_tbl);
+ kfree(ipsec->tx_tbl);
+ kfree(ipsec);
+ }
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
new file mode 100644
index 000000000000..da3ce7849e85
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.h
@@ -0,0 +1,93 @@
+/*******************************************************************************
+
+ Intel 10 Gigabit PCI Express Linux driver
+ Copyright(c) 2017 Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms and conditions of the GNU General Public License,
+ version 2, as published by the Free Software Foundation.
+
+ This program is distributed in the hope it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program. If not, see <http://www.gnu.org/licenses/>.
+
+ The full GNU General Public License is included in this distribution in
+ the file called "COPYING".
+
+ Contact Information:
+ Linux NICS <linux.nics@intel.com>
+ e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
+*******************************************************************************/
+
+#ifndef _IXGBE_IPSEC_H_
+#define _IXGBE_IPSEC_H_
+
+#define IXGBE_IPSEC_MAX_SA_COUNT 1024
+#define IXGBE_IPSEC_MAX_RX_IP_COUNT 128
+#define IXGBE_IPSEC_BASE_RX_INDEX 0
+#define IXGBE_IPSEC_BASE_TX_INDEX IXGBE_IPSEC_MAX_SA_COUNT
+
+#define IXGBE_RXTXIDX_IPS_EN 0x00000001
+#define IXGBE_RXIDX_TBL_SHIFT 1
+enum ixgbe_ipsec_tbl_sel {
+ ips_rx_ip_tbl = 0x01,
+ ips_rx_spi_tbl = 0x02,
+ ips_rx_key_tbl = 0x03,
+};
+
+#define IXGBE_RXTXIDX_IDX_SHIFT 3
+#define IXGBE_RXTXIDX_READ 0x40000000
+#define IXGBE_RXTXIDX_WRITE 0x80000000
+
+#define IXGBE_RXMOD_VALID 0x00000001
+#define IXGBE_RXMOD_PROTO_ESP 0x00000004
+#define IXGBE_RXMOD_DECRYPT 0x00000008
+#define IXGBE_RXMOD_IPV6 0x00000010
+
+struct rx_sa {
+ struct hlist_node hlist;
+ struct xfrm_state *xs;
+ __be32 ipaddr[4];
+ u32 key[4];
+ u32 salt;
+ u32 mode;
+ u8 iptbl_ind;
+ bool used;
+ bool decrypt;
+};
+
+struct rx_ip_sa {
+ __be32 ipaddr[4];
+ u32 ref_cnt;
+ bool used;
+};
+
+struct tx_sa {
+ struct xfrm_state *xs;
+ u32 key[4];
+ u32 salt;
+ bool encrypt;
+ bool used;
+};
+
+struct ixgbe_ipsec_tx_data {
+ u32 flags;
+ u16 trailer_len;
+ u16 sa_idx;
+};
+
+struct ixgbe_ipsec {
+ u16 num_rx_sa;
+ u16 num_tx_sa;
+ struct rx_ip_sa *ip_tbl;
+ struct rx_sa *rx_tbl;
+ struct tx_sa *tx_tbl;
+ DECLARE_HASHTABLE(rx_sa_list, 10);
+};
+#endif /* _IXGBE_IPSEC_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index b3c282d09b18..4242f0213e46 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -1282,7 +1282,7 @@ void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
}
void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
- u32 fcoe_sof_eof, u32 type_tucmd, u32 mss_l4len_idx)
+ u32 fceof_saidx, u32 type_tucmd, u32 mss_l4len_idx)
{
struct ixgbe_adv_tx_context_desc *context_desc;
u16 i = tx_ring->next_to_use;
@@ -1296,7 +1296,7 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens,
type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens);
- context_desc->seqnum_seed = cpu_to_le32(fcoe_sof_eof);
+ context_desc->fceof_saidx = cpu_to_le32(fceof_saidx);
context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd);
context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 4f28621b76e1..0da5aa2c8aba 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1171,7 +1171,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
struct ixgbe_adapter *adapter = q_vector->adapter;
struct ixgbe_tx_buffer *tx_buffer;
union ixgbe_adv_tx_desc *tx_desc;
- unsigned int total_bytes = 0, total_packets = 0;
+ unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0;
unsigned int budget = q_vector->tx.work_limit;
unsigned int i = tx_ring->next_to_clean;
@@ -1202,6 +1202,8 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
/* update the statistics for this packet */
total_bytes += tx_buffer->bytecount;
total_packets += tx_buffer->gso_segs;
+ if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC)
+ total_ipsec++;
/* free the skb */
if (ring_is_xdp(tx_ring))
@@ -1264,6 +1266,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
u64_stats_update_end(&tx_ring->syncp);
q_vector->tx.total_bytes += total_bytes;
q_vector->tx.total_packets += total_packets;
+ adapter->tx_ipsec += total_ipsec;
if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
/* schedule immediate reset if we believe we hung */
@@ -1752,6 +1755,9 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
}
+ if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
+ ixgbe_ipsec_rx(rx_ring, rx_desc, skb);
+
skb->protocol = eth_type_trans(skb, dev);
/* record Rx queue, or update MACVLAN statistics */
@@ -4127,11 +4133,15 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
rxdctl &= ~0x3FFFFF;
rxdctl |= 0x080420;
#if (PAGE_SIZE < 8192)
- } else {
+ /* RXDCTL.RLPML does not work on 82599 */
+ } else if (hw->mac.type != ixgbe_mac_82599EB) {
rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
IXGBE_RXDCTL_RLPML_EN);
- /* Limit the maximum frame size so we don't overrun the skb */
+ /* Limit the maximum frame size so we don't overrun the skb.
+ * This can happen in SRIOV mode when the MTU of the VF is
+ * higher than the MTU of the PF.
+ */
if (ring_uses_build_skb(ring) &&
!test_bit(__IXGBE_RX_3K_BUFFER, &ring->state))
rxdctl |= IXGBE_MAX_2K_FRAME_BUILD_SKB |
@@ -5425,6 +5435,7 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter)
ixgbe_set_rx_mode(adapter->netdev);
ixgbe_restore_vlan(adapter);
+ ixgbe_ipsec_restore(adapter);
switch (hw->mac.type) {
case ixgbe_mac_82599EB:
@@ -7252,6 +7263,9 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter)
case IXGBE_LINK_SPEED_10GB_FULL:
speed_str = "10 Gbps";
break;
+ case IXGBE_LINK_SPEED_5GB_FULL:
+ speed_str = "5 Gbps";
+ break;
case IXGBE_LINK_SPEED_2_5GB_FULL:
speed_str = "2.5 Gbps";
break;
@@ -7795,10 +7809,12 @@ static inline bool ixgbe_ipv6_csum_is_sctp(struct sk_buff *skb)
}
static void ixgbe_tx_csum(struct ixgbe_ring *tx_ring,
- struct ixgbe_tx_buffer *first)
+ struct ixgbe_tx_buffer *first,
+ struct ixgbe_ipsec_tx_data *itd)
{
struct sk_buff *skb = first->skb;
u32 vlan_macip_lens = 0;
+ u32 fceof_saidx = 0;
u32 type_tucmd = 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -7839,7 +7855,12 @@ no_csum:
vlan_macip_lens |= skb_network_offset(skb) << IXGBE_ADVTXD_MACLEN_SHIFT;
vlan_macip_lens |= first->tx_flags & IXGBE_TX_FLAGS_VLAN_MASK;
- ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, 0, type_tucmd, 0);
+ if (first->tx_flags & IXGBE_TX_FLAGS_IPSEC) {
+ fceof_saidx |= itd->sa_idx;
+ type_tucmd |= itd->flags | itd->trailer_len;
+ }
+
+ ixgbe_tx_ctxtdesc(tx_ring, vlan_macip_lens, fceof_saidx, type_tucmd, 0);
}
#define IXGBE_SET_FLAG(_input, _flag, _result) \
@@ -7882,11 +7903,16 @@ static void ixgbe_tx_olinfo_status(union ixgbe_adv_tx_desc *tx_desc,
IXGBE_TX_FLAGS_CSUM,
IXGBE_ADVTXD_POPTS_TXSM);
- /* enble IPv4 checksum for TSO */
+ /* enable IPv4 checksum for TSO */
olinfo_status |= IXGBE_SET_FLAG(tx_flags,
IXGBE_TX_FLAGS_IPV4,
IXGBE_ADVTXD_POPTS_IXSM);
+ /* enable IPsec */
+ olinfo_status |= IXGBE_SET_FLAG(tx_flags,
+ IXGBE_TX_FLAGS_IPSEC,
+ IXGBE_ADVTXD_POPTS_IPSEC);
+
/*
* Check Context must be set if Tx switch is enabled, which it
* always is for case where virtual functions are running
@@ -8350,6 +8376,7 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
u32 tx_flags = 0;
unsigned short f;
u16 count = TXD_USE_COUNT(skb_headlen(skb));
+ struct ixgbe_ipsec_tx_data ipsec_tx = { 0 };
__be16 protocol = skb->protocol;
u8 hdr_len = 0;
@@ -8454,11 +8481,16 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
}
#endif /* IXGBE_FCOE */
+
+#ifdef CONFIG_XFRM_OFFLOAD
+ if (skb->sp && !ixgbe_ipsec_tx(tx_ring, first, &ipsec_tx))
+ goto out_drop;
+#endif
tso = ixgbe_tso(tx_ring, first, &hdr_len);
if (tso < 0)
goto out_drop;
else if (!tso)
- ixgbe_tx_csum(tx_ring, first);
+ ixgbe_tx_csum(tx_ring, first, &ipsec_tx);
/* add the ATR filter if ATR is on */
if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state))
@@ -9278,9 +9310,6 @@ free_jump:
static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter,
struct tc_cls_u32_offload *cls_u32)
{
- if (cls_u32->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_u32->command) {
case TC_CLSU32_NEW_KNODE:
case TC_CLSU32_REPLACE_KNODE:
@@ -9302,7 +9331,7 @@ static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
{
struct ixgbe_adapter *adapter = cb_priv;
- if (!tc_can_offload(adapter->netdev))
+ if (!tc_cls_can_offload_and_chain0(adapter->netdev, type_data))
return -EOPNOTSUPP;
switch (type) {
@@ -9870,6 +9899,12 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
features &= ~NETIF_F_TSO;
+#ifdef CONFIG_XFRM_OFFLOAD
+ /* IPsec offload doesn't get along well with others *yet* */
+ if (skb->sp)
+ features &= ~(NETIF_F_TSO | NETIF_F_HW_CSUM);
+#endif
+
return features;
}
@@ -10459,6 +10494,7 @@ skip_sriov:
NETIF_F_FCOE_MTU;
}
#endif /* IXGBE_FCOE */
+ ixgbe_init_ipsec_offload(adapter);
if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE)
netdev->hw_features |= NETIF_F_LRO;
@@ -10694,6 +10730,7 @@ static void ixgbe_remove(struct pci_dev *pdev)
if (netdev->reg_state == NETREG_REGISTERED)
unregister_netdev(netdev);
+ ixgbe_stop_ipsec_offload(adapter);
ixgbe_clear_interrupt_scheme(adapter);
ixgbe_release_hw_control(adapter);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 21eb79ae3c30..ca45359686d3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2360,11 +2360,6 @@ enum {
#define IXGBE_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */
#define IXGBE_TXD_STAT_DD 0x00000001 /* Descriptor Done */
-#define IXGBE_RXDADV_IPSEC_STATUS_SECP 0x00020000
-#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000
-#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000
-#define IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED 0x18000000
-#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000
/* Multiple Transmit Queue Command Register */
#define IXGBE_MTQC_RT_ENA 0x1 /* DCB Enable */
#define IXGBE_MTQC_VT_ENA 0x2 /* VMDQ2 Enable */
@@ -2416,6 +2411,9 @@ enum {
#define IXGBE_RXDADV_ERR_LE 0x02000000 /* Length Error */
#define IXGBE_RXDADV_ERR_PE 0x08000000 /* Packet Error */
#define IXGBE_RXDADV_ERR_OSE 0x10000000 /* Oversize Error */
+#define IXGBE_RXDADV_ERR_IPSEC_INV_PROTOCOL 0x08000000 /* overlap ERR_PE */
+#define IXGBE_RXDADV_ERR_IPSEC_INV_LENGTH 0x10000000 /* overlap ERR_OSE */
+#define IXGBE_RXDADV_ERR_IPSEC_AUTH_FAILED 0x18000000
#define IXGBE_RXDADV_ERR_USE 0x20000000 /* Undersize Error */
#define IXGBE_RXDADV_ERR_TCPE 0x40000000 /* TCP/UDP Checksum Error */
#define IXGBE_RXDADV_ERR_IPE 0x80000000 /* IP Checksum Error */
@@ -2437,6 +2435,7 @@ enum {
#define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */
#define IXGBE_RXDADV_STAT_FCSTAT_DDP 0x00000030 /* 11: Ctxt w/ DDP */
#define IXGBE_RXDADV_STAT_TS 0x00010000 /* IEEE 1588 Time Stamp */
+#define IXGBE_RXDADV_STAT_SECP 0x00020000 /* IPsec/MACsec pkt found */
/* PSRTYPE bit definitions */
#define IXGBE_PSRTYPE_TCPHDR 0x00000010
@@ -2503,13 +2502,6 @@ enum {
#define IXGBE_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */
#define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */
-/* Security Processing bit Indication */
-#define IXGBE_RXDADV_LNKSEC_STATUS_SECP 0x00020000
-#define IXGBE_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000
-#define IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000
-
/* Masks to determine if packets should be dropped due to frame errors */
#define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \
IXGBE_RXD_ERR_CE | \
@@ -2523,6 +2515,8 @@ enum {
IXGBE_RXDADV_ERR_LE | \
IXGBE_RXDADV_ERR_PE | \
IXGBE_RXDADV_ERR_OSE | \
+ IXGBE_RXDADV_ERR_IPSEC_INV_PROTOCOL | \
+ IXGBE_RXDADV_ERR_IPSEC_INV_LENGTH | \
IXGBE_RXDADV_ERR_USE)
/* Multicast bit mask */
@@ -2901,7 +2895,7 @@ union ixgbe_adv_rx_desc {
/* Context descriptors */
struct ixgbe_adv_tx_context_desc {
__le32 vlan_macip_lens;
- __le32 seqnum_seed;
+ __le32 fceof_saidx;
__le32 type_tucmd_mlhl;
__le32 mss_l4len_idx;
};
@@ -2932,6 +2926,7 @@ struct ixgbe_adv_tx_context_desc {
IXGBE_ADVTXD_POPTS_SHIFT)
#define IXGBE_ADVTXD_POPTS_TXSM (IXGBE_TXD_POPTS_TXSM << \
IXGBE_ADVTXD_POPTS_SHIFT)
+#define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */
#define IXGBE_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */
#define IXGBE_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */
#define IXGBE_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */
@@ -2947,7 +2942,6 @@ struct ixgbe_adv_tx_context_desc {
#define IXGBE_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */
#define IXGBE_ADVTXD_TUCMD_L4T_RSV 0x00001800 /* RSV L4 Packet TYPE */
#define IXGBE_ADVTXD_TUCMD_MKRREQ 0x00002000 /*Req requires Markers and CRC*/
-#define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */
#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */
#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */
#define IXGBE_ADVTXT_TUCMD_FCOE 0x00008000 /* FCoE Frame Type */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 3bce26e77090..f470d0204771 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -949,7 +949,7 @@ static s32 ixgbe_checksum_ptr_x550(struct ixgbe_hw *hw, u16 ptr,
u16 length, bufsz, i, start;
u16 *local_buffer;
- bufsz = sizeof(buf) / sizeof(buf[0]);
+ bufsz = ARRAY_SIZE(buf);
/* Read a chunk at the pointer location */
if (!buffer) {
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index ff9d05f308ee..4400e49090b4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -75,6 +75,9 @@ static struct ixgbe_stats ixgbevf_gstrings_stats[] = {
IXGBEVF_STAT("tx_timeout_count", tx_timeout_count),
IXGBEVF_NETDEV_STAT(multicast),
IXGBEVF_STAT("rx_csum_offload_errors", hw_csum_rx_error),
+ IXGBEVF_STAT("alloc_rx_page", alloc_rx_page),
+ IXGBEVF_STAT("alloc_rx_page_failed", alloc_rx_page_failed),
+ IXGBEVF_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed),
};
#define IXGBEVF_QUEUE_STATS_LEN ( \
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 581f44bbd7b3..f6952425c87d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -62,7 +62,12 @@ struct ixgbevf_tx_buffer {
struct ixgbevf_rx_buffer {
dma_addr_t dma;
struct page *page;
- unsigned int page_offset;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+ __u32 page_offset;
+#else
+ __u16 page_offset;
+#endif
+ __u16 pagecnt_bias;
};
struct ixgbevf_stats {
@@ -79,6 +84,7 @@ struct ixgbevf_tx_queue_stats {
struct ixgbevf_rx_queue_stats {
u64 alloc_rx_page_failed;
u64 alloc_rx_buff_failed;
+ u64 alloc_rx_page;
u64 csum_err;
};
@@ -260,6 +266,9 @@ static inline void ixgbevf_write_tail(struct ixgbevf_ring *ring, u32 value)
#define MIN_MSIX_Q_VECTORS 1
#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NON_Q_VECTORS)
+#define IXGBEVF_RX_DMA_ATTR \
+ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
+
/* board specific private data structure */
struct ixgbevf_adapter {
/* this field must be first, see ixgbevf_process_skb_fields */
@@ -287,8 +296,9 @@ struct ixgbevf_adapter {
u64 hw_csum_rx_error;
u64 hw_rx_no_dma_resources;
int num_msix_vectors;
- u32 alloc_rx_page_failed;
- u32 alloc_rx_buff_failed;
+ u64 alloc_rx_page_failed;
+ u64 alloc_rx_buff_failed;
+ u64 alloc_rx_page;
struct msix_entry *msix_entries;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index ed5c3aea7939..9b3d43d28106 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -206,28 +206,6 @@ static void ixgbevf_set_ivar(struct ixgbevf_adapter *adapter, s8 direction,
}
}
-static void ixgbevf_unmap_and_free_tx_resource(struct ixgbevf_ring *tx_ring,
- struct ixgbevf_tx_buffer *tx_buffer)
-{
- if (tx_buffer->skb) {
- dev_kfree_skb_any(tx_buffer->skb);
- if (dma_unmap_len(tx_buffer, len))
- dma_unmap_single(tx_ring->dev,
- dma_unmap_addr(tx_buffer, dma),
- dma_unmap_len(tx_buffer, len),
- DMA_TO_DEVICE);
- } else if (dma_unmap_len(tx_buffer, len)) {
- dma_unmap_page(tx_ring->dev,
- dma_unmap_addr(tx_buffer, dma),
- dma_unmap_len(tx_buffer, len),
- DMA_TO_DEVICE);
- }
- tx_buffer->next_to_watch = NULL;
- tx_buffer->skb = NULL;
- dma_unmap_len_set(tx_buffer, len, 0);
- /* tx_buffer must be completely set up in the transmit path */
-}
-
static u64 ixgbevf_get_tx_completed(struct ixgbevf_ring *ring)
{
return ring->stats.packets;
@@ -349,7 +327,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector,
DMA_TO_DEVICE);
/* clear tx_buffer data */
- tx_buffer->skb = NULL;
dma_unmap_len_set(tx_buffer, len, 0);
/* unmap remaining buffers */
@@ -595,8 +572,8 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
}
/* map page for use */
- dma = dma_map_page(rx_ring->dev, page, 0,
- PAGE_SIZE, DMA_FROM_DEVICE);
+ dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR);
/* if mapping failed free memory back to system since
* there isn't much point in holding memory we can't use
@@ -604,13 +581,15 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring,
if (dma_mapping_error(rx_ring->dev, dma)) {
__free_page(page);
- rx_ring->rx_stats.alloc_rx_buff_failed++;
+ rx_ring->rx_stats.alloc_rx_page_failed++;
return false;
}
bi->dma = dma;
bi->page = page;
bi->page_offset = 0;
+ bi->pagecnt_bias = 1;
+ rx_ring->rx_stats.alloc_rx_page++;
return true;
}
@@ -639,6 +618,12 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
if (!ixgbevf_alloc_mapped_page(rx_ring, bi))
break;
+ /* sync the buffer for use by the device */
+ dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+ bi->page_offset,
+ IXGBEVF_RX_BUFSZ,
+ DMA_FROM_DEVICE);
+
/* Refresh the desc even if pkt_addr didn't change
* because each write-back erases this info.
*/
@@ -653,8 +638,8 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
i -= rx_ring->count;
}
- /* clear the hdr_addr for the next_to_use descriptor */
- rx_desc->read.hdr_addr = 0;
+ /* clear the length for the next_to_use descriptor */
+ rx_desc->wb.upper.length = 0;
cleaned_count--;
} while (cleaned_count);
@@ -741,12 +726,7 @@ static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring,
new_buff->page = old_buff->page;
new_buff->dma = old_buff->dma;
new_buff->page_offset = old_buff->page_offset;
-
- /* sync the buffer for use by the device */
- dma_sync_single_range_for_device(rx_ring->dev, new_buff->dma,
- new_buff->page_offset,
- IXGBEVF_RX_BUFSZ,
- DMA_FROM_DEVICE);
+ new_buff->pagecnt_bias = old_buff->pagecnt_bias;
}
static inline bool ixgbevf_page_is_reserved(struct page *page)
@@ -754,6 +734,45 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
}
+static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer,
+ struct page *page,
+ const unsigned int truesize)
+{
+ unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
+
+ /* avoid re-using remote pages */
+ if (unlikely(ixgbevf_page_is_reserved(page)))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely(page_ref_count(page) != pagecnt_bias))
+ return false;
+
+ /* flip page offset to other buffer */
+ rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
+
+#else
+ /* move offset up to the next cache line */
+ rx_buffer->page_offset += truesize;
+
+ if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
+ return false;
+
+#endif
+
+ /* If we have drained the page fragment pool we need to update
+ * the pagecnt_bias and page count so that we fully restock the
+ * number of references the driver holds.
+ */
+ if (unlikely(pagecnt_bias == 1)) {
+ page_ref_add(page, USHRT_MAX);
+ rx_buffer->pagecnt_bias = USHRT_MAX;
+ }
+
+ return true;
+}
+
/**
* ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff
* @rx_ring: rx descriptor ring to transact packets on
@@ -771,12 +790,12 @@ static inline bool ixgbevf_page_is_reserved(struct page *page)
**/
static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
struct ixgbevf_rx_buffer *rx_buffer,
+ u16 size,
union ixgbe_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
struct page *page = rx_buffer->page;
unsigned char *va = page_address(page) + rx_buffer->page_offset;
- unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
#if (PAGE_SIZE < 8192)
unsigned int truesize = IXGBEVF_RX_BUFSZ;
#else
@@ -795,7 +814,6 @@ static bool ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring,
return true;
/* this page cannot be reused so discard it */
- put_page(page);
return false;
}
@@ -815,32 +833,7 @@ add_tail_frag:
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
(unsigned long)va & ~PAGE_MASK, size, truesize);
- /* avoid re-using remote pages */
- if (unlikely(ixgbevf_page_is_reserved(page)))
- return false;
-
-#if (PAGE_SIZE < 8192)
- /* if we are only owner of page we can reuse it */
- if (unlikely(page_count(page) != 1))
- return false;
-
- /* flip page offset to other buffer */
- rx_buffer->page_offset ^= IXGBEVF_RX_BUFSZ;
-
-#else
- /* move offset up to the next cache line */
- rx_buffer->page_offset += truesize;
-
- if (rx_buffer->page_offset > (PAGE_SIZE - IXGBEVF_RX_BUFSZ))
- return false;
-
-#endif
- /* Even if we own the page, we are not allowed to use atomic_set()
- * This would break get_page_unless_zero() users.
- */
- page_ref_inc(page);
-
- return true;
+ return ixgbevf_can_reuse_rx_page(rx_buffer, page, truesize);
}
static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
@@ -849,11 +842,19 @@ static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
{
struct ixgbevf_rx_buffer *rx_buffer;
struct page *page;
+ u16 size = le16_to_cpu(rx_desc->wb.upper.length);
rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
page = rx_buffer->page;
prefetchw(page);
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ size,
+ DMA_FROM_DEVICE);
+
if (likely(!skb)) {
void *page_addr = page_address(page) +
rx_buffer->page_offset;
@@ -879,21 +880,18 @@ static struct sk_buff *ixgbevf_fetch_rx_buffer(struct ixgbevf_ring *rx_ring,
prefetchw(skb->data);
}
- /* we are reusing so sync this buffer for CPU use */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_buffer->dma,
- rx_buffer->page_offset,
- IXGBEVF_RX_BUFSZ,
- DMA_FROM_DEVICE);
-
/* pull page into skb */
- if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+ if (ixgbevf_add_rx_frag(rx_ring, rx_buffer, size, rx_desc, skb)) {
/* hand second half of page back to the ring */
ixgbevf_reuse_rx_page(rx_ring, rx_buffer);
} else {
- /* we are not reusing the buffer so unmap it */
- dma_unmap_page(rx_ring->dev, rx_buffer->dma,
- PAGE_SIZE, DMA_FROM_DEVICE);
+ /* We are not reusing the buffer so unmap it and free
+ * any references we are holding to it
+ */
+ dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
+ PAGE_SIZE, DMA_FROM_DEVICE,
+ IXGBEVF_RX_DMA_ATTR);
+ __page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
}
/* clear contents of buffer_info */
@@ -930,7 +928,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean);
- if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_DD))
+ if (!rx_desc->wb.upper.length)
break;
/* This memory barrier is needed to keep us from reading
@@ -943,8 +941,10 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
skb = ixgbevf_fetch_rx_buffer(rx_ring, rx_desc, skb);
/* exit if we failed to retrieve a buffer */
- if (!skb)
+ if (!skb) {
+ rx_ring->rx_stats.alloc_rx_buff_failed++;
break;
+ }
cleaned_count++;
@@ -1553,6 +1553,10 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter,
txdctl |= (1u << 8) | /* HTHRESH = 1 */
32; /* PTHRESH = 32 */
+ /* reinitialize tx_buffer_info */
+ memset(ring->tx_buffer_info, 0,
+ sizeof(struct ixgbevf_tx_buffer) * ring->count);
+
clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state);
IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), txdctl);
@@ -1721,6 +1725,7 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
struct ixgbevf_ring *ring)
{
struct ixgbe_hw *hw = &adapter->hw;
+ union ixgbe_adv_rx_desc *rx_desc;
u64 rdba = ring->dma;
u32 rxdctl;
u8 reg_idx = ring->reg_idx;
@@ -1749,6 +1754,14 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter,
IXGBE_WRITE_REG(hw, IXGBE_VFRDT(reg_idx), 0);
ring->tail = adapter->io_addr + IXGBE_VFRDT(reg_idx);
+ /* initialize rx_buffer_info */
+ memset(ring->rx_buffer_info, 0,
+ sizeof(struct ixgbevf_rx_buffer) * ring->count);
+
+ /* initialize Rx descriptor 0 */
+ rx_desc = IXGBEVF_RX_DESC(ring, 0);
+ rx_desc->wb.upper.length = 0;
+
/* reset ntu and ntc to place SW in sync with hardwdare */
ring->next_to_clean = 0;
ring->next_to_use = 0;
@@ -2103,9 +2116,7 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter)
**/
static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
{
- struct device *dev = rx_ring->dev;
- unsigned long size;
- unsigned int i;
+ u16 i = rx_ring->next_to_clean;
/* Free Rx ring sk_buff */
if (rx_ring->skb) {
@@ -2113,29 +2124,39 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
rx_ring->skb = NULL;
}
- /* ring already cleared, nothing to do */
- if (!rx_ring->rx_buffer_info)
- return;
-
/* Free all the Rx ring pages */
- for (i = 0; i < rx_ring->count; i++) {
+ while (i != rx_ring->next_to_alloc) {
struct ixgbevf_rx_buffer *rx_buffer;
rx_buffer = &rx_ring->rx_buffer_info[i];
- if (rx_buffer->dma)
- dma_unmap_page(dev, rx_buffer->dma,
- PAGE_SIZE, DMA_FROM_DEVICE);
- rx_buffer->dma = 0;
- if (rx_buffer->page)
- __free_page(rx_buffer->page);
- rx_buffer->page = NULL;
- }
- size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count;
- memset(rx_ring->rx_buffer_info, 0, size);
+ /* Invalidate cache lines that may have been written to by
+ * device so that we avoid corrupting memory.
+ */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ IXGBEVF_RX_BUFSZ,
+ DMA_FROM_DEVICE);
+
+ /* free resources associated with mapping */
+ dma_unmap_page_attrs(rx_ring->dev,
+ rx_buffer->dma,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE,
+ IXGBEVF_RX_DMA_ATTR);
+
+ __page_frag_cache_drain(rx_buffer->page,
+ rx_buffer->pagecnt_bias);
- /* Zero out the descriptor ring */
- memset(rx_ring->desc, 0, rx_ring->size);
+ i++;
+ if (i == rx_ring->count)
+ i = 0;
+ }
+
+ rx_ring->next_to_alloc = 0;
+ rx_ring->next_to_clean = 0;
+ rx_ring->next_to_use = 0;
}
/**
@@ -2144,23 +2165,57 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring)
**/
static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring)
{
- struct ixgbevf_tx_buffer *tx_buffer_info;
- unsigned long size;
- unsigned int i;
+ u16 i = tx_ring->next_to_clean;
+ struct ixgbevf_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
- if (!tx_ring->tx_buffer_info)
- return;
+ while (i != tx_ring->next_to_use) {
+ union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
- /* Free all the Tx ring sk_buffs */
- for (i = 0; i < tx_ring->count; i++) {
- tx_buffer_info = &tx_ring->tx_buffer_info[i];
- ixgbevf_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
+ /* Free all the Tx ring sk_buffs */
+ dev_kfree_skb_any(tx_buffer->skb);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+
+ /* check for eop_desc to determine the end of the packet */
+ eop_desc = tx_buffer->next_to_watch;
+ tx_desc = IXGBEVF_TX_DESC(tx_ring, i);
+
+ /* unmap remaining buffers */
+ while (tx_desc != eop_desc) {
+ tx_buffer++;
+ tx_desc++;
+ i++;
+ if (unlikely(i == tx_ring->count)) {
+ i = 0;
+ tx_buffer = tx_ring->tx_buffer_info;
+ tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
+ }
+
+ /* unmap any remaining paged data */
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ }
+
+ /* move us one more past the eop_desc for start of next pkt */
+ tx_buffer++;
+ i++;
+ if (unlikely(i == tx_ring->count)) {
+ i = 0;
+ tx_buffer = tx_ring->tx_buffer_info;
+ }
}
- size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count;
- memset(tx_ring->tx_buffer_info, 0, size);
+ /* reset next_to_use and next_to_clean */
+ tx_ring->next_to_use = 0;
+ tx_ring->next_to_clean = 0;
- memset(tx_ring->desc, 0, tx_ring->size);
}
/**
@@ -2712,6 +2767,8 @@ out:
void ixgbevf_update_stats(struct ixgbevf_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
+ u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+ u64 alloc_rx_page = 0, hw_csum_rx_error = 0;
int i;
if (test_bit(__IXGBEVF_DOWN, &adapter->state) ||
@@ -2732,10 +2789,18 @@ void ixgbevf_update_stats(struct ixgbevf_adapter *adapter)
adapter->stats.vfmprc);
for (i = 0; i < adapter->num_rx_queues; i++) {
- adapter->hw_csum_rx_error +=
- adapter->rx_ring[i]->hw_csum_rx_error;
- adapter->rx_ring[i]->hw_csum_rx_error = 0;
+ struct ixgbevf_ring *rx_ring = adapter->rx_ring[i];
+
+ hw_csum_rx_error += rx_ring->rx_stats.csum_err;
+ alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
+ alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
+ alloc_rx_page += rx_ring->rx_stats.alloc_rx_page;
}
+
+ adapter->hw_csum_rx_error = hw_csum_rx_error;
+ adapter->alloc_rx_page_failed = alloc_rx_page_failed;
+ adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
+ adapter->alloc_rx_page = alloc_rx_page;
}
/**
@@ -2980,7 +3045,7 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring)
int size;
size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count;
- tx_ring->tx_buffer_info = vzalloc(size);
+ tx_ring->tx_buffer_info = vmalloc(size);
if (!tx_ring->tx_buffer_info)
goto err;
@@ -3040,7 +3105,7 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_ring *rx_ring)
int size;
size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count;
- rx_ring->rx_buffer_info = vzalloc(size);
+ rx_ring->rx_buffer_info = vmalloc(size);
if (!rx_ring->rx_buffer_info)
goto err;
@@ -3482,34 +3547,37 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
struct ixgbevf_tx_buffer *first,
const u8 hdr_len)
{
- dma_addr_t dma;
struct sk_buff *skb = first->skb;
struct ixgbevf_tx_buffer *tx_buffer;
union ixgbe_adv_tx_desc *tx_desc;
- struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
- unsigned int data_len = skb->data_len;
- unsigned int size = skb_headlen(skb);
- unsigned int paylen = skb->len - hdr_len;
+ struct skb_frag_struct *frag;
+ dma_addr_t dma;
+ unsigned int data_len, size;
u32 tx_flags = first->tx_flags;
- __le32 cmd_type;
+ __le32 cmd_type = ixgbevf_tx_cmd_type(tx_flags);
u16 i = tx_ring->next_to_use;
tx_desc = IXGBEVF_TX_DESC(tx_ring, i);
- ixgbevf_tx_olinfo_status(tx_desc, tx_flags, paylen);
- cmd_type = ixgbevf_tx_cmd_type(tx_flags);
+ ixgbevf_tx_olinfo_status(tx_desc, tx_flags, skb->len - hdr_len);
+
+ size = skb_headlen(skb);
+ data_len = skb->data_len;
dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
- if (dma_mapping_error(tx_ring->dev, dma))
- goto dma_error;
- /* record length, and DMA address */
- dma_unmap_len_set(first, len, size);
- dma_unmap_addr_set(first, dma, dma);
+ tx_buffer = first;
+
+ for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+ if (dma_mapping_error(tx_ring->dev, dma))
+ goto dma_error;
+
+ /* record length, and DMA address */
+ dma_unmap_len_set(tx_buffer, len, size);
+ dma_unmap_addr_set(tx_buffer, dma, dma);
- tx_desc->read.buffer_addr = cpu_to_le64(dma);
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
- for (;;) {
while (unlikely(size > IXGBE_MAX_DATA_PER_TXD)) {
tx_desc->read.cmd_type_len =
cmd_type | cpu_to_le32(IXGBE_MAX_DATA_PER_TXD);
@@ -3520,12 +3588,12 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
i = 0;
}
+ tx_desc->read.olinfo_status = 0;
dma += IXGBE_MAX_DATA_PER_TXD;
size -= IXGBE_MAX_DATA_PER_TXD;
tx_desc->read.buffer_addr = cpu_to_le64(dma);
- tx_desc->read.olinfo_status = 0;
}
if (likely(!data_len))
@@ -3539,23 +3607,15 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
tx_desc = IXGBEVF_TX_DESC(tx_ring, 0);
i = 0;
}
+ tx_desc->read.olinfo_status = 0;
size = skb_frag_size(frag);
data_len -= size;
dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
DMA_TO_DEVICE);
- if (dma_mapping_error(tx_ring->dev, dma))
- goto dma_error;
tx_buffer = &tx_ring->tx_buffer_info[i];
- dma_unmap_len_set(tx_buffer, len, size);
- dma_unmap_addr_set(tx_buffer, dma, dma);
-
- tx_desc->read.buffer_addr = cpu_to_le64(dma);
- tx_desc->read.olinfo_status = 0;
-
- frag++;
}
/* write last descriptor with RS and EOP bits */
@@ -3589,18 +3649,32 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
return;
dma_error:
dev_err(tx_ring->dev, "TX DMA map failed\n");
+ tx_buffer = &tx_ring->tx_buffer_info[i];
/* clear dma mappings for failed tx_buffer_info map */
- for (;;) {
+ while (tx_buffer != first) {
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_page(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ if (i-- == 0)
+ i += tx_ring->count;
tx_buffer = &tx_ring->tx_buffer_info[i];
- ixgbevf_unmap_and_free_tx_resource(tx_ring, tx_buffer);
- if (tx_buffer == first)
- break;
- if (i == 0)
- i = tx_ring->count;
- i--;
}
+ if (dma_unmap_len(tx_buffer, len))
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ dev_kfree_skb_any(tx_buffer->skb);
+ tx_buffer->skb = NULL;
+
tx_ring->next_to_use = i;
}
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 64c93e8becc6..38d3a327c1bc 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -286,7 +286,7 @@ static s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
ether_addr_copy(msg_addr, addr);
ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
- sizeof(msgbuf) / sizeof(u32));
+ ARRAY_SIZE(msgbuf));
if (!ret_val) {
msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
@@ -456,8 +456,7 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
ether_addr_copy(msg_addr, addr);
ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
- sizeof(msgbuf) / sizeof(u32));
-
+ ARRAY_SIZE(msgbuf));
msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
/* if nacked the address was rejected, use "perm_addr" */
@@ -574,7 +573,7 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
msgbuf[1] = xcast_mode;
err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
- sizeof(msgbuf) / sizeof(u32));
+ ARRAY_SIZE(msgbuf));
if (err)
return err;
@@ -614,7 +613,7 @@ static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind,
msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT;
err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
- sizeof(msgbuf) / sizeof(u32));
+ ARRAY_SIZE(msgbuf));
if (err)
goto mbx_err;
@@ -826,7 +825,7 @@ static s32 ixgbevf_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size)
msgbuf[1] = max_size;
ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
- sizeof(msgbuf) / sizeof(u32));
+ ARRAY_SIZE(msgbuf));
if (ret_val)
return ret_val;
if ((msgbuf[0] & IXGBE_VF_SET_LPE) &&
@@ -872,8 +871,7 @@ static int ixgbevf_negotiate_api_version_vf(struct ixgbe_hw *hw, int api)
msg[1] = api;
msg[2] = 0;
- err = ixgbevf_write_msg_read_ack(hw, msg, msg,
- sizeof(msg) / sizeof(u32));
+ err = ixgbevf_write_msg_read_ack(hw, msg, msg, ARRAY_SIZE(msg));
if (!err) {
msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
@@ -924,8 +922,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
msg[0] = IXGBE_VF_GET_QUEUE;
msg[1] = msg[2] = msg[3] = msg[4] = 0;
- err = ixgbevf_write_msg_read_ack(hw, msg, msg,
- sizeof(msg) / sizeof(u32));
+ err = ixgbevf_write_msg_read_ack(hw, msg, msg, ARRAY_SIZE(msg));
if (!err) {
msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index a19760736b71..a1d7b88cf083 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -10,6 +10,7 @@
* warranty of any kind, whether express or implied.
*/
+#include <linux/acpi.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -865,7 +866,7 @@ struct mvpp2 {
/* List of pointers to port structures */
int port_count;
- struct mvpp2_port **port_list;
+ struct mvpp2_port *port_list[MVPP2_MAX_PORTS];
/* Aggregated TXQs */
struct mvpp2_tx_queue *aggr_txqs;
@@ -932,6 +933,9 @@ struct mvpp2_port {
struct mvpp2 *priv;
+ /* Firmware node associated to the port */
+ struct fwnode_handle *fwnode;
+
/* Per-port registers' base address */
void __iomem *base;
void __iomem *stats_base;
@@ -7499,7 +7503,10 @@ static int mvpp2_multi_queue_vectors_init(struct mvpp2_port *port,
strncpy(irqname, "rx-shared", sizeof(irqname));
}
- v->irq = of_irq_get_byname(port_node, irqname);
+ if (port_node)
+ v->irq = of_irq_get_byname(port_node, irqname);
+ else
+ v->irq = fwnode_irq_get(port->fwnode, i);
if (v->irq <= 0) {
ret = -EINVAL;
goto err;
@@ -7711,17 +7718,16 @@ static bool mvpp2_port_has_tx_irqs(struct mvpp2 *priv,
}
static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
- struct device_node *port_node,
+ struct fwnode_handle *fwnode,
char **mac_from)
{
struct mvpp2_port *port = netdev_priv(dev);
char hw_mac_addr[ETH_ALEN] = {0};
- const char *dt_mac_addr;
+ char fw_mac_addr[ETH_ALEN];
- dt_mac_addr = of_get_mac_address(port_node);
- if (dt_mac_addr && is_valid_ether_addr(dt_mac_addr)) {
- *mac_from = "device tree";
- ether_addr_copy(dev->dev_addr, dt_mac_addr);
+ if (fwnode_get_mac_address(fwnode, fw_mac_addr, ETH_ALEN)) {
+ *mac_from = "firmware node";
+ ether_addr_copy(dev->dev_addr, fw_mac_addr);
return;
}
@@ -7740,13 +7746,14 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
/* Ports initialization */
static int mvpp2_port_probe(struct platform_device *pdev,
- struct device_node *port_node,
- struct mvpp2 *priv, int index)
+ struct fwnode_handle *port_fwnode,
+ struct mvpp2 *priv)
{
struct device_node *phy_node;
- struct phy *comphy;
+ struct phy *comphy = NULL;
struct mvpp2_port *port;
struct mvpp2_port_pcpu *port_pcpu;
+ struct device_node *port_node = to_of_node(port_fwnode);
struct net_device *dev;
struct resource *res;
char *mac_from = "";
@@ -7757,7 +7764,12 @@ static int mvpp2_port_probe(struct platform_device *pdev,
int phy_mode;
int err, i, cpu;
- has_tx_irqs = mvpp2_port_has_tx_irqs(priv, port_node);
+ if (port_node) {
+ has_tx_irqs = mvpp2_port_has_tx_irqs(priv, port_node);
+ } else {
+ has_tx_irqs = true;
+ queue_mode = MVPP2_QDIST_MULTI_MODE;
+ }
if (!has_tx_irqs)
queue_mode = MVPP2_QDIST_SINGLE_MODE;
@@ -7772,24 +7784,30 @@ static int mvpp2_port_probe(struct platform_device *pdev,
if (!dev)
return -ENOMEM;
- phy_node = of_parse_phandle(port_node, "phy", 0);
- phy_mode = of_get_phy_mode(port_node);
+ if (port_node)
+ phy_node = of_parse_phandle(port_node, "phy", 0);
+ else
+ phy_node = NULL;
+
+ phy_mode = fwnode_get_phy_mode(port_fwnode);
if (phy_mode < 0) {
dev_err(&pdev->dev, "incorrect phy mode\n");
err = phy_mode;
goto err_free_netdev;
}
- comphy = devm_of_phy_get(&pdev->dev, port_node, NULL);
- if (IS_ERR(comphy)) {
- if (PTR_ERR(comphy) == -EPROBE_DEFER) {
- err = -EPROBE_DEFER;
- goto err_free_netdev;
+ if (port_node) {
+ comphy = devm_of_phy_get(&pdev->dev, port_node, NULL);
+ if (IS_ERR(comphy)) {
+ if (PTR_ERR(comphy) == -EPROBE_DEFER) {
+ err = -EPROBE_DEFER;
+ goto err_free_netdev;
+ }
+ comphy = NULL;
}
- comphy = NULL;
}
- if (of_property_read_u32(port_node, "port-id", &id)) {
+ if (fwnode_property_read_u32(port_fwnode, "port-id", &id)) {
err = -EINVAL;
dev_err(&pdev->dev, "missing port-id value\n");
goto err_free_netdev;
@@ -7802,6 +7820,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
port = netdev_priv(dev);
port->dev = dev;
+ port->fwnode = port_fwnode;
port->ntxqs = ntxqs;
port->nrxqs = nrxqs;
port->priv = priv;
@@ -7811,7 +7830,10 @@ static int mvpp2_port_probe(struct platform_device *pdev,
if (err)
goto err_free_netdev;
- port->link_irq = of_irq_get_byname(port_node, "link");
+ if (port_node)
+ port->link_irq = of_irq_get_byname(port_node, "link");
+ else
+ port->link_irq = fwnode_irq_get(port_fwnode, port->nqvecs + 1);
if (port->link_irq == -EPROBE_DEFER) {
err = -EPROBE_DEFER;
goto err_deinit_qvecs;
@@ -7820,7 +7842,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
/* the link irq is optional */
port->link_irq = 0;
- if (of_property_read_bool(port_node, "marvell,loopback"))
+ if (fwnode_property_read_bool(port_fwnode, "marvell,loopback"))
port->flags |= MVPP2_F_LOOPBACK;
port->id = id;
@@ -7845,8 +7867,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
MVPP21_MIB_COUNTERS_OFFSET +
port->gop_id * MVPP21_MIB_COUNTERS_PORT_SZ;
} else {
- if (of_property_read_u32(port_node, "gop-port-id",
- &port->gop_id)) {
+ if (fwnode_property_read_u32(port_fwnode, "gop-port-id",
+ &port->gop_id)) {
err = -EINVAL;
dev_err(&pdev->dev, "missing gop-port-id value\n");
goto err_deinit_qvecs;
@@ -7876,7 +7898,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
mutex_init(&port->gather_stats_lock);
INIT_DELAYED_WORK(&port->stats_work, mvpp2_gather_hw_statistics);
- mvpp2_port_copy_mac_addr(dev, priv, port_node, &mac_from);
+ mvpp2_port_copy_mac_addr(dev, priv, port_fwnode, &mac_from);
port->tx_ring_size = MVPP2_MAX_TXD_DFLT;
port->rx_ring_size = MVPP2_MAX_RXD_DFLT;
@@ -7934,7 +7956,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
}
netdev_info(dev, "Using %s mac address %pM\n", mac_from, dev->dev_addr);
- priv->port_list[index] = port;
+ priv->port_list[priv->port_count++] = port;
+
return 0;
err_free_port_pcpu:
@@ -8193,8 +8216,9 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
static int mvpp2_probe(struct platform_device *pdev)
{
- struct device_node *dn = pdev->dev.of_node;
- struct device_node *port_node;
+ const struct acpi_device_id *acpi_id;
+ struct fwnode_handle *fwnode = pdev->dev.fwnode;
+ struct fwnode_handle *port_fwnode;
struct mvpp2 *priv;
struct resource *res;
void __iomem *base;
@@ -8205,8 +8229,14 @@ static int mvpp2_probe(struct platform_device *pdev)
if (!priv)
return -ENOMEM;
- priv->hw_version =
- (unsigned long)of_device_get_match_data(&pdev->dev);
+ if (has_acpi_companion(&pdev->dev)) {
+ acpi_id = acpi_match_device(pdev->dev.driver->acpi_match_table,
+ &pdev->dev);
+ priv->hw_version = (unsigned long)acpi_id->driver_data;
+ } else {
+ priv->hw_version =
+ (unsigned long)of_device_get_match_data(&pdev->dev);
+ }
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
base = devm_ioremap_resource(&pdev->dev, res);
@@ -8220,10 +8250,23 @@ static int mvpp2_probe(struct platform_device *pdev)
return PTR_ERR(priv->lms_base);
} else {
res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (has_acpi_companion(&pdev->dev)) {
+ /* In case the MDIO memory region is declared in
+ * the ACPI, it can already appear as 'in-use'
+ * in the OS. Because it is overlapped by second
+ * region of the network controller, make
+ * sure it is released, before requesting it again.
+ * The care is taken by mvpp2 driver to avoid
+ * concurrent access to this memory region.
+ */
+ release_resource(res);
+ }
priv->iface_base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(priv->iface_base))
return PTR_ERR(priv->iface_base);
+ }
+ if (priv->hw_version == MVPP22 && dev_of_node(&pdev->dev)) {
priv->sysctrl_base =
syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
"marvell,system-controller");
@@ -8249,32 +8292,34 @@ static int mvpp2_probe(struct platform_device *pdev)
else
priv->max_port_rxqs = 32;
- priv->pp_clk = devm_clk_get(&pdev->dev, "pp_clk");
- if (IS_ERR(priv->pp_clk))
- return PTR_ERR(priv->pp_clk);
- err = clk_prepare_enable(priv->pp_clk);
- if (err < 0)
- return err;
-
- priv->gop_clk = devm_clk_get(&pdev->dev, "gop_clk");
- if (IS_ERR(priv->gop_clk)) {
- err = PTR_ERR(priv->gop_clk);
- goto err_pp_clk;
- }
- err = clk_prepare_enable(priv->gop_clk);
- if (err < 0)
- goto err_pp_clk;
+ if (dev_of_node(&pdev->dev)) {
+ priv->pp_clk = devm_clk_get(&pdev->dev, "pp_clk");
+ if (IS_ERR(priv->pp_clk))
+ return PTR_ERR(priv->pp_clk);
+ err = clk_prepare_enable(priv->pp_clk);
+ if (err < 0)
+ return err;
- if (priv->hw_version == MVPP22) {
- priv->mg_clk = devm_clk_get(&pdev->dev, "mg_clk");
- if (IS_ERR(priv->mg_clk)) {
- err = PTR_ERR(priv->mg_clk);
- goto err_gop_clk;
+ priv->gop_clk = devm_clk_get(&pdev->dev, "gop_clk");
+ if (IS_ERR(priv->gop_clk)) {
+ err = PTR_ERR(priv->gop_clk);
+ goto err_pp_clk;
}
-
- err = clk_prepare_enable(priv->mg_clk);
+ err = clk_prepare_enable(priv->gop_clk);
if (err < 0)
- goto err_gop_clk;
+ goto err_pp_clk;
+
+ if (priv->hw_version == MVPP22) {
+ priv->mg_clk = devm_clk_get(&pdev->dev, "mg_clk");
+ if (IS_ERR(priv->mg_clk)) {
+ err = PTR_ERR(priv->mg_clk);
+ goto err_gop_clk;
+ }
+
+ err = clk_prepare_enable(priv->mg_clk);
+ if (err < 0)
+ goto err_gop_clk;
+ }
priv->axi_clk = devm_clk_get(&pdev->dev, "axi_clk");
if (IS_ERR(priv->axi_clk)) {
@@ -8287,10 +8332,14 @@ static int mvpp2_probe(struct platform_device *pdev)
if (err < 0)
goto err_gop_clk;
}
- }
- /* Get system's tclk rate */
- priv->tclk = clk_get_rate(priv->pp_clk);
+ /* Get system's tclk rate */
+ priv->tclk = clk_get_rate(priv->pp_clk);
+ } else if (device_property_read_u32(&pdev->dev, "clock-frequency",
+ &priv->tclk)) {
+ dev_err(&pdev->dev, "missing clock-frequency value\n");
+ return -EINVAL;
+ }
if (priv->hw_version == MVPP22) {
err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(40));
@@ -8313,30 +8362,19 @@ static int mvpp2_probe(struct platform_device *pdev)
goto err_mg_clk;
}
- priv->port_count = of_get_available_child_count(dn);
+ /* Initialize ports */
+ fwnode_for_each_available_child_node(fwnode, port_fwnode) {
+ err = mvpp2_port_probe(pdev, port_fwnode, priv);
+ if (err < 0)
+ goto err_port_probe;
+ }
+
if (priv->port_count == 0) {
dev_err(&pdev->dev, "no ports enabled\n");
err = -ENODEV;
goto err_mg_clk;
}
- priv->port_list = devm_kcalloc(&pdev->dev, priv->port_count,
- sizeof(*priv->port_list),
- GFP_KERNEL);
- if (!priv->port_list) {
- err = -ENOMEM;
- goto err_mg_clk;
- }
-
- /* Initialize ports */
- i = 0;
- for_each_available_child_of_node(dn, port_node) {
- err = mvpp2_port_probe(pdev, port_node, priv, i);
- if (err < 0)
- goto err_port_probe;
- i++;
- }
-
/* Statistics must be gathered regularly because some of them (like
* packets counters) are 32-bit registers and could overflow quite
* quickly. For instance, a 10Gb link used at full bandwidth with the
@@ -8357,7 +8395,7 @@ static int mvpp2_probe(struct platform_device *pdev)
err_port_probe:
i = 0;
- for_each_available_child_of_node(dn, port_node) {
+ fwnode_for_each_available_child_node(fwnode, port_fwnode) {
if (priv->port_list[i])
mvpp2_port_remove(priv->port_list[i]);
i++;
@@ -8376,14 +8414,14 @@ err_pp_clk:
static int mvpp2_remove(struct platform_device *pdev)
{
struct mvpp2 *priv = platform_get_drvdata(pdev);
- struct device_node *dn = pdev->dev.of_node;
- struct device_node *port_node;
+ struct fwnode_handle *fwnode = pdev->dev.fwnode;
+ struct fwnode_handle *port_fwnode;
int i = 0;
flush_workqueue(priv->stats_queue);
destroy_workqueue(priv->stats_queue);
- for_each_available_child_of_node(dn, port_node) {
+ fwnode_for_each_available_child_node(fwnode, port_fwnode) {
if (priv->port_list[i]) {
mutex_destroy(&priv->port_list[i]->gather_stats_lock);
mvpp2_port_remove(priv->port_list[i]);
@@ -8406,6 +8444,9 @@ static int mvpp2_remove(struct platform_device *pdev)
aggr_txq->descs_dma);
}
+ if (is_acpi_node(port_fwnode))
+ return 0;
+
clk_disable_unprepare(priv->axi_clk);
clk_disable_unprepare(priv->mg_clk);
clk_disable_unprepare(priv->pp_clk);
@@ -8427,12 +8468,19 @@ static const struct of_device_id mvpp2_match[] = {
};
MODULE_DEVICE_TABLE(of, mvpp2_match);
+static const struct acpi_device_id mvpp2_acpi_match[] = {
+ { "MRVL0110", MVPP22 },
+ { },
+};
+MODULE_DEVICE_TABLE(acpi, mvpp2_acpi_match);
+
static struct platform_driver mvpp2_driver = {
.probe = mvpp2_probe,
.remove = mvpp2_remove,
.driver = {
.name = MVPP2_DRIVER_NAME,
.of_match_table = mvpp2_match,
+ .acpi_match_table = ACPI_PTR(mvpp2_acpi_match),
},
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 8530c770c873..47bab842c5ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2944,9 +2944,6 @@ out:
static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *cls_flower)
{
- if (cls_flower->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return mlx5e_configure_flower(priv, cls_flower);
@@ -2964,7 +2961,7 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
{
struct mlx5e_priv *priv = cb_priv;
- if (!tc_can_offload(priv->netdev))
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
return -EOPNOTSUPP;
switch (type) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 10fa6a18fcf9..363d8dcb7f17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -719,9 +719,6 @@ static int
mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *cls_flower)
{
- if (cls_flower->common.chain_index)
- return -EOPNOTSUPP;
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return mlx5e_configure_flower(priv, cls_flower);
@@ -739,7 +736,7 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
{
struct mlx5e_priv *priv = cb_priv;
- if (!tc_can_offload(priv->netdev))
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
return -EOPNOTSUPP;
switch (type) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 833cd0a96fd9..3dcc58d61506 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1738,9 +1738,6 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_cls_matchall_offload *f,
bool ingress)
{
- if (f->common.chain_index)
- return -EOPNOTSUPP;
-
switch (f->command) {
case TC_CLSMATCHALL_REPLACE:
return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f,
@@ -1780,7 +1777,8 @@ static int mlxsw_sp_setup_tc_block_cb_matchall(enum tc_setup_type type,
switch (type) {
case TC_SETUP_CLSMATCHALL:
- if (!tc_can_offload(mlxsw_sp_port->dev))
+ if (!tc_cls_can_offload_and_chain0(mlxsw_sp_port->dev,
+ type_data))
return -EOPNOTSUPP;
return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 31891ae11c9b..f0b25baba09a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -71,6 +71,7 @@
#include "spectrum_mr_tcam.h"
#include "spectrum_router.h"
+struct mlxsw_sp_fib;
struct mlxsw_sp_vr;
struct mlxsw_sp_lpm_tree;
struct mlxsw_sp_rif_ops;
@@ -84,6 +85,8 @@ struct mlxsw_sp_router {
struct rhashtable nexthop_ht;
struct list_head nexthop_list;
struct {
+ /* One tree for each protocol: IPv4 and IPv6 */
+ struct mlxsw_sp_lpm_tree *proto_trees[2];
struct mlxsw_sp_lpm_tree *trees;
unsigned int tree_count;
} lpm;
@@ -162,6 +165,15 @@ struct mlxsw_sp_rif_ops {
struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif);
};
+static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
+static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_lpm_tree *lpm_tree);
+static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_fib *fib,
+ u8 tree_id);
+static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_fib *fib);
+
static unsigned int *
mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
enum mlxsw_sp_rif_counter_dir dir)
@@ -349,14 +361,6 @@ mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
}
-static bool
-mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
-{
- struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
-
- return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
-}
-
static void
mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
struct mlxsw_sp_prefix_usage *prefix_usage2)
@@ -398,7 +402,6 @@ enum mlxsw_sp_fib_entry_type {
};
struct mlxsw_sp_nexthop_group;
-struct mlxsw_sp_fib;
struct mlxsw_sp_fib_node {
struct list_head entry_list;
@@ -445,6 +448,7 @@ struct mlxsw_sp_lpm_tree {
u8 id; /* tree ID */
unsigned int ref_count;
enum mlxsw_sp_l3proto proto;
+ unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
struct mlxsw_sp_prefix_usage prefix_usage;
};
@@ -453,8 +457,6 @@ struct mlxsw_sp_fib {
struct list_head node_list;
struct mlxsw_sp_vr *vr;
struct mlxsw_sp_lpm_tree *lpm_tree;
- unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
- struct mlxsw_sp_prefix_usage prefix_usage;
enum mlxsw_sp_l3proto proto;
};
@@ -469,12 +471,15 @@ struct mlxsw_sp_vr {
static const struct rhashtable_params mlxsw_sp_fib_ht_params;
-static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
+static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_vr *vr,
enum mlxsw_sp_l3proto proto)
{
+ struct mlxsw_sp_lpm_tree *lpm_tree;
struct mlxsw_sp_fib *fib;
int err;
+ lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
fib = kzalloc(sizeof(*fib), GFP_KERNEL);
if (!fib)
return ERR_PTR(-ENOMEM);
@@ -484,17 +489,26 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr,
INIT_LIST_HEAD(&fib->node_list);
fib->proto = proto;
fib->vr = vr;
+ fib->lpm_tree = lpm_tree;
+ mlxsw_sp_lpm_tree_hold(lpm_tree);
+ err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
+ if (err)
+ goto err_lpm_tree_bind;
return fib;
+err_lpm_tree_bind:
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
err_rhashtable_init:
kfree(fib);
return ERR_PTR(err);
}
-static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
+static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib *fib)
{
+ mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
WARN_ON(!list_empty(&fib->node_list));
- WARN_ON(fib->lpm_tree);
rhashtable_destroy(&fib->ht);
kfree(fib);
}
@@ -581,6 +595,9 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
goto err_left_struct_set;
memcpy(&lpm_tree->prefix_usage, prefix_usage,
sizeof(lpm_tree->prefix_usage));
+ memset(&lpm_tree->prefix_ref_count, 0,
+ sizeof(lpm_tree->prefix_ref_count));
+ lpm_tree->ref_count = 1;
return lpm_tree;
err_left_struct_set:
@@ -607,8 +624,10 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
if (lpm_tree->ref_count != 0 &&
lpm_tree->proto == proto &&
mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
- prefix_usage))
+ prefix_usage)) {
+ mlxsw_sp_lpm_tree_hold(lpm_tree);
return lpm_tree;
+ }
}
return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
}
@@ -629,9 +648,10 @@ static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
{
+ struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
struct mlxsw_sp_lpm_tree *lpm_tree;
u64 max_trees;
- int i;
+ int err, i;
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
return -EIO;
@@ -649,11 +669,42 @@ static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
}
+ lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+ MLXSW_SP_L3_PROTO_IPV4);
+ if (IS_ERR(lpm_tree)) {
+ err = PTR_ERR(lpm_tree);
+ goto err_ipv4_tree_get;
+ }
+ mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
+
+ lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+ MLXSW_SP_L3_PROTO_IPV6);
+ if (IS_ERR(lpm_tree)) {
+ err = PTR_ERR(lpm_tree);
+ goto err_ipv6_tree_get;
+ }
+ mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
+
return 0;
+
+err_ipv6_tree_get:
+ lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+err_ipv4_tree_get:
+ kfree(mlxsw_sp->router->lpm.trees);
+ return err;
}
static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
{
+ struct mlxsw_sp_lpm_tree *lpm_tree;
+
+ lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+
+ lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+
kfree(mlxsw_sp->router->lpm.trees);
}
@@ -745,10 +796,10 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
return ERR_PTR(-EBUSY);
}
- vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
+ vr->fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
if (IS_ERR(vr->fib4))
return ERR_CAST(vr->fib4);
- vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6);
+ vr->fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
if (IS_ERR(vr->fib6)) {
err = PTR_ERR(vr->fib6);
goto err_fib6_create;
@@ -763,21 +814,22 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
return vr;
err_mr_table_create:
- mlxsw_sp_fib_destroy(vr->fib6);
+ mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
vr->fib6 = NULL;
err_fib6_create:
- mlxsw_sp_fib_destroy(vr->fib4);
+ mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
vr->fib4 = NULL;
return ERR_PTR(err);
}
-static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
+static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_vr *vr)
{
mlxsw_sp_mr_table_destroy(vr->mr4_table);
vr->mr4_table = NULL;
- mlxsw_sp_fib_destroy(vr->fib6);
+ mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
vr->fib6 = NULL;
- mlxsw_sp_fib_destroy(vr->fib4);
+ mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
vr->fib4 = NULL;
}
@@ -793,12 +845,12 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
return vr;
}
-static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
+static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
{
if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
list_empty(&vr->fib6->node_list) &&
mlxsw_sp_mr_table_empty(vr->mr4_table))
- mlxsw_sp_vr_destroy(vr);
+ mlxsw_sp_vr_destroy(mlxsw_sp, vr);
}
static bool
@@ -809,7 +861,7 @@ mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
if (!mlxsw_sp_vr_is_used(vr))
return false;
- if (fib->lpm_tree && fib->lpm_tree->id == tree_id)
+ if (fib->lpm_tree->id == tree_id)
return true;
return false;
}
@@ -839,14 +891,13 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib *fib,
struct mlxsw_sp_lpm_tree *new_tree)
{
- struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
enum mlxsw_sp_l3proto proto = fib->proto;
+ struct mlxsw_sp_lpm_tree *old_tree;
u8 old_id, new_id = new_tree->id;
struct mlxsw_sp_vr *vr;
int i, err;
- if (!old_tree)
- goto no_replace;
+ old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
old_id = old_tree->id;
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
@@ -860,6 +911,11 @@ static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
goto err_tree_replace;
}
+ memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
+ sizeof(new_tree->prefix_ref_count));
+ mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
+
return 0;
err_tree_replace:
@@ -871,36 +927,6 @@ err_tree_replace:
old_tree);
}
return err;
-
-no_replace:
- fib->lpm_tree = new_tree;
- mlxsw_sp_lpm_tree_hold(new_tree);
- err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
- if (err) {
- mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
- fib->lpm_tree = NULL;
- return err;
- }
- return 0;
-}
-
-static void
-mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp,
- enum mlxsw_sp_l3proto proto,
- struct mlxsw_sp_prefix_usage *req_prefix_usage)
-{
- int i;
-
- for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
- struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
- struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
- unsigned char prefix;
-
- if (!mlxsw_sp_vr_is_used(vr))
- continue;
- mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage)
- mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix);
- }
}
static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
@@ -1942,11 +1968,8 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
dipn = htonl(dip);
dev = mlxsw_sp->router->rifs[rif]->dev;
n = neigh_lookup(&arp_tbl, &dipn, dev);
- if (!n) {
- netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
- &dip);
+ if (!n)
return;
- }
netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
neigh_event_send(n, NULL);
@@ -1973,11 +1996,8 @@ static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
dev = mlxsw_sp->router->rifs[rif]->dev;
n = neigh_lookup(&nd_tbl, &dip, dev);
- if (!n) {
- netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n",
- &dip);
+ if (!n)
return;
- }
netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
neigh_event_send(n, NULL);
@@ -4201,68 +4221,66 @@ mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
}
static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib *fib,
struct mlxsw_sp_fib_node *fib_node)
{
- struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
+ struct mlxsw_sp_prefix_usage req_prefix_usage;
+ struct mlxsw_sp_fib *fib = fib_node->fib;
struct mlxsw_sp_lpm_tree *lpm_tree;
int err;
- /* Since the tree is shared between all virtual routers we must
- * make sure it contains all the required prefix lengths. This
- * can be computed by either adding the new prefix length to the
- * existing prefix usage of a bound tree, or by aggregating the
- * prefix lengths across all virtual routers and adding the new
- * one as well.
- */
- if (fib->lpm_tree)
- mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
- &fib->lpm_tree->prefix_usage);
- else
- mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage);
- mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
+ lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
+ if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
+ goto out;
+ mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
+ mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
fib->proto);
if (IS_ERR(lpm_tree))
return PTR_ERR(lpm_tree);
- if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id)
- return 0;
-
err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
if (err)
- return err;
+ goto err_lpm_tree_replace;
+out:
+ lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
return 0;
-}
-static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib *fib)
-{
- if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage))
- return;
- mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
- mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
- fib->lpm_tree = NULL;
+err_lpm_tree_replace:
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
+ return err;
}
-static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
+static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_node *fib_node)
{
- unsigned char prefix_len = fib_node->key.prefix_len;
+ struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
+ struct mlxsw_sp_prefix_usage req_prefix_usage;
struct mlxsw_sp_fib *fib = fib_node->fib;
+ int err;
- if (fib->prefix_ref_count[prefix_len]++ == 0)
- mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
-}
+ if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
+ return;
+ /* Try to construct a new LPM tree from the current prefix usage
+ * minus the unused one. If we fail, continue using the old one.
+ */
+ mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
+ mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
+ fib_node->key.prefix_len);
+ lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
+ fib->proto);
+ if (IS_ERR(lpm_tree))
+ return;
-static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
-{
- unsigned char prefix_len = fib_node->key.prefix_len;
- struct mlxsw_sp_fib *fib = fib_node->fib;
+ err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
+ if (err)
+ goto err_lpm_tree_replace;
- if (--fib->prefix_ref_count[prefix_len] == 0)
- mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
+ return;
+
+err_lpm_tree_replace:
+ mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
}
static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
@@ -4276,12 +4294,10 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
return err;
fib_node->fib = fib;
- err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node);
+ err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
if (err)
goto err_fib_lpm_tree_link;
- mlxsw_sp_fib_node_prefix_inc(fib_node);
-
return 0;
err_fib_lpm_tree_link:
@@ -4295,8 +4311,7 @@ static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
{
struct mlxsw_sp_fib *fib = fib_node->fib;
- mlxsw_sp_fib_node_prefix_dec(fib_node);
- mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib);
+ mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
fib_node->fib = NULL;
mlxsw_sp_fib_node_remove(fib, fib_node);
}
@@ -4335,7 +4350,7 @@ mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
err_fib_node_init:
mlxsw_sp_fib_node_destroy(fib_node);
err_fib_node_create:
- mlxsw_sp_vr_put(vr);
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
return ERR_PTR(err);
}
@@ -4348,7 +4363,7 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
return;
mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
mlxsw_sp_fib_node_destroy(fib_node);
- mlxsw_sp_vr_put(vr);
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
}
static struct mlxsw_sp_fib4_entry *
@@ -5371,7 +5386,7 @@ static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
return;
mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
- mlxsw_sp_vr_put(vr);
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
}
static int
@@ -5408,7 +5423,7 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
return;
mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
- mlxsw_sp_vr_put(vr);
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
}
static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
@@ -6057,7 +6072,7 @@ err_fid_get:
err_rif_alloc:
err_rif_index_alloc:
vr->rif_count--;
- mlxsw_sp_vr_put(vr);
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
return ERR_PTR(err);
}
@@ -6080,7 +6095,7 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
mlxsw_sp_fid_put(fid);
kfree(rif);
vr->rif_count--;
- mlxsw_sp_vr_put(vr);
+ mlxsw_sp_vr_put(mlxsw_sp, vr);
}
static void
@@ -6870,7 +6885,7 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
return 0;
err_loopback_op:
- mlxsw_sp_vr_put(ul_vr);
+ mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
return err;
}
@@ -6884,7 +6899,7 @@ static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false);
--ul_vr->rif_count;
- mlxsw_sp_vr_put(ul_vr);
+ mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
}
static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = {
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 4ee11bf2aed7..322027792fe8 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -54,7 +54,7 @@ static bool nfp_net_ebpf_capable(struct nfp_net *nn)
static int
nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
- struct bpf_prog *prog)
+ struct bpf_prog *prog, struct netlink_ext_ack *extack)
{
bool running, xdp_running;
int ret;
@@ -70,10 +70,10 @@ nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
if (prog && running && !xdp_running)
return -EBUSY;
- ret = nfp_net_bpf_offload(nn, prog, running);
+ ret = nfp_net_bpf_offload(nn, prog, running, extack);
/* Stop offload if replace not possible */
if (ret && prog)
- nfp_bpf_xdp_offload(app, nn, NULL);
+ nfp_bpf_xdp_offload(app, nn, NULL, extack);
nn->dp.bpf_offload_xdp = prog && !ret;
return ret;
@@ -125,17 +125,29 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
struct nfp_bpf_vnic *bv;
int err;
- if (type != TC_SETUP_CLSBPF ||
- !tc_can_offload(nn->dp.netdev) ||
- !nfp_net_ebpf_capable(nn) ||
- cls_bpf->common.protocol != htons(ETH_P_ALL) ||
- cls_bpf->common.chain_index)
+ if (type != TC_SETUP_CLSBPF) {
+ NL_SET_ERR_MSG_MOD(cls_bpf->common.extack,
+ "only offload of BPF classifiers supported");
return -EOPNOTSUPP;
+ }
+ if (!tc_cls_can_offload_and_chain0(nn->dp.netdev, &cls_bpf->common))
+ return -EOPNOTSUPP;
+ if (!nfp_net_ebpf_capable(nn)) {
+ NL_SET_ERR_MSG_MOD(cls_bpf->common.extack,
+ "NFP firmware does not support eBPF offload");
+ return -EOPNOTSUPP;
+ }
+ if (cls_bpf->common.protocol != htons(ETH_P_ALL)) {
+ NL_SET_ERR_MSG_MOD(cls_bpf->common.extack,
+ "only ETH_P_ALL supported as filter protocol");
+ return -EOPNOTSUPP;
+ }
/* Only support TC direct action */
if (!cls_bpf->exts_integrated ||
tcf_exts_has_actions(cls_bpf->exts)) {
- nn_err(nn, "only direct action with no legacy actions supported\n");
+ NL_SET_ERR_MSG_MOD(cls_bpf->common.extack,
+ "only direct action with no legacy actions supported");
return -EOPNOTSUPP;
}
@@ -152,7 +164,8 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
return 0;
}
- err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
+ err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog,
+ cls_bpf->common.extack);
if (err)
return err;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index c476bca15ba4..424fe8338105 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -335,7 +335,7 @@ struct nfp_net;
int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *bpf);
int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
- bool old_prog);
+ bool old_prog, struct netlink_ext_ack *extack);
struct nfp_insn_meta *
nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 1a357aacc444..0a7732385469 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -281,7 +281,9 @@ int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf)
}
}
-static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog)
+static int
+nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
{
struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv;
unsigned int max_mtu;
@@ -291,7 +293,7 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog)
max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
if (max_mtu < nn->dp.netdev->mtu) {
- nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n");
+ NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with MTU larger than HW packet split boundary");
return -EOPNOTSUPP;
}
@@ -313,7 +315,8 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog)
/* Load up the JITed code */
err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF);
if (err)
- nn_err(nn, "FW command error while loading BPF: %d\n", err);
+ NL_SET_ERR_MSG_MOD(extack,
+ "FW command error while loading BPF");
dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64),
DMA_TO_DEVICE);
@@ -322,7 +325,8 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog)
return err;
}
-static void nfp_net_bpf_start(struct nfp_net *nn)
+static void
+nfp_net_bpf_start(struct nfp_net *nn, struct netlink_ext_ack *extack)
{
int err;
@@ -331,7 +335,8 @@ static void nfp_net_bpf_start(struct nfp_net *nn)
nn_writel(nn, NFP_NET_CFG_CTRL, nn->dp.ctrl);
err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
if (err)
- nn_err(nn, "FW command error while enabling BPF: %d\n", err);
+ NL_SET_ERR_MSG_MOD(extack,
+ "FW command error while enabling BPF");
}
static int nfp_net_bpf_stop(struct nfp_net *nn)
@@ -346,7 +351,7 @@ static int nfp_net_bpf_stop(struct nfp_net *nn)
}
int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
- bool old_prog)
+ bool old_prog, struct netlink_ext_ack *extack)
{
int err;
@@ -364,7 +369,8 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
cap = nn_readb(nn, NFP_NET_CFG_BPF_CAP);
if (!(cap & NFP_NET_BPF_CAP_RELO)) {
- nn_err(nn, "FW does not support live reload\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "FW does not support live reload");
return -EBUSY;
}
}
@@ -376,12 +382,12 @@ int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog,
if (old_prog && !prog)
return nfp_net_bpf_stop(nn);
- err = nfp_net_bpf_load(nn, prog);
+ err = nfp_net_bpf_load(nn, prog, extack);
if (err)
return err;
if (!old_prog)
- nfp_net_bpf_start(nn);
+ nfp_net_bpf_start(nn, extack);
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 837134a9137c..08c4c6dc5f7f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -483,8 +483,7 @@ static int
nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
struct tc_cls_flower_offload *flower, bool egress)
{
- if (!eth_proto_is_802_3(flower->common.protocol) ||
- flower->common.chain_index)
+ if (!eth_proto_is_802_3(flower->common.protocol))
return -EOPNOTSUPP;
switch (flower->command) {
@@ -504,7 +503,7 @@ int nfp_flower_setup_tc_egress_cb(enum tc_setup_type type, void *type_data,
{
struct nfp_repr *repr = cb_priv;
- if (!tc_can_offload(repr->netdev))
+ if (!tc_cls_can_offload_and_chain0(repr->netdev, type_data))
return -EOPNOTSUPP;
switch (type) {
@@ -521,7 +520,7 @@ static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type,
{
struct nfp_repr *repr = cb_priv;
- if (!tc_can_offload(repr->netdev))
+ if (!tc_cls_can_offload_and_chain0(repr->netdev, type_data))
return -EOPNOTSUPP;
switch (type) {
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index 7e474df90598..437964afa8ee 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -43,6 +43,7 @@
struct bpf_prog;
struct net_device;
struct netdev_bpf;
+struct netlink_ext_ack;
struct pci_dev;
struct sk_buff;
struct sk_buff;
@@ -138,7 +139,8 @@ struct nfp_app_type {
int (*bpf)(struct nfp_app *app, struct nfp_net *nn,
struct netdev_bpf *xdp);
int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn,
- struct bpf_prog *prog);
+ struct bpf_prog *prog,
+ struct netlink_ext_ack *extack);
int (*sriov_enable)(struct nfp_app *app, int num_vfs);
void (*sriov_disable)(struct nfp_app *app);
@@ -324,11 +326,12 @@ static inline int nfp_app_bpf(struct nfp_app *app, struct nfp_net *nn,
}
static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
- struct bpf_prog *prog)
+ struct bpf_prog *prog,
+ struct netlink_ext_ack *extack)
{
if (!app || !app->type->xdp_offload)
return -EOPNOTSUPP;
- return app->type->xdp_offload(app, nn, prog);
+ return app->type->xdp_offload(app, nn, prog, extack);
}
static inline bool __nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index c5b91040b12e..cc570bb6563c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -518,6 +518,7 @@ static int nfp_pci_probe(struct pci_dev *pdev,
dev_err(&pdev->dev,
"Error: %d VFs already enabled, but loaded FW can only support %d\n",
pf->num_vfs, pf->limit_vfs);
+ err = -EINVAL;
goto err_fw_unload;
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index cdf52421eaca..c0fd351c86b1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3403,7 +3403,7 @@ nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog, u32 flags,
if (err)
return err;
- err = nfp_app_xdp_offload(nn->app, nn, offload_prog);
+ err = nfp_app_xdp_offload(nn->app, nn, offload_prog, extack);
if (err && flags & XDP_FLAGS_HW_MODE)
return err;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
index e6f19f44b461..bb8ed460086e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugdump.c
@@ -640,8 +640,8 @@ nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec,
struct nfp_dump_rtsym *dump_header = dump->p;
struct nfp_dumpspec_cpp_isl_id cpp_params;
struct nfp_rtsym_table *rtbl = pf->rtbl;
+ u32 header_size, total_size, sym_size;
const struct nfp_rtsym *sym;
- u32 header_size, total_size;
u32 tl_len, key_len;
int bytes_read;
u32 cpp_id;
@@ -657,9 +657,14 @@ nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec,
if (!sym)
return nfp_dump_error_tlv(&spec->tl, -ENOENT, dump);
+ if (sym->type == NFP_RTSYM_TYPE_ABS)
+ sym_size = sizeof(sym->addr);
+ else
+ sym_size = sym->size;
+
header_size =
ALIGN8(offsetof(struct nfp_dump_rtsym, rtsym) + key_len + 1);
- total_size = header_size + ALIGN8(sym->size);
+ total_size = header_size + ALIGN8(sym_size);
dest = dump->p + header_size;
err = nfp_add_tlv(be32_to_cpu(spec->tl.type), total_size, dump);
@@ -669,9 +674,9 @@ nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec,
dump_header->padded_name_length =
header_size - offsetof(struct nfp_dump_rtsym, rtsym);
memcpy(dump_header->rtsym, spec->rtsym, key_len + 1);
+ dump_header->cpp.dump_length = cpu_to_be32(sym_size);
if (sym->type == NFP_RTSYM_TYPE_ABS) {
- dump_header->cpp.dump_length = cpu_to_be32(sizeof(sym->addr));
*(u64 *)dest = sym->addr;
} else {
cpp_params.target = sym->target;
@@ -681,10 +686,9 @@ nfp_dump_single_rtsym(struct nfp_pf *pf, struct nfp_dumpspec_rtsym *spec,
cpp_id = nfp_get_numeric_cpp_id(&cpp_params);
dump_header->cpp.cpp_id = cpp_params;
dump_header->cpp.offset = cpu_to_be32(sym->addr);
- dump_header->cpp.dump_length = cpu_to_be32(sym->size);
bytes_read = nfp_cpp_read(pf->cpp, cpp_id, sym->addr, dest,
- sym->size);
- if (bytes_read != sym->size) {
+ sym_size);
+ if (bytes_read != sym_size) {
if (bytes_read >= 0)
bytes_read = -EIO;
dump_header->error = cpu_to_be32(bytes_read);
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index a3f6d514c0f2..66c665d0b926 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -795,7 +795,7 @@ struct fe_priv {
*/
union ring_type get_rx, put_rx, last_rx;
struct nv_skb_map *get_rx_ctx, *put_rx_ctx;
- struct nv_skb_map *first_rx_ctx, *last_rx_ctx;
+ struct nv_skb_map *last_rx_ctx;
struct nv_skb_map *rx_skb;
union ring_type rx_ring;
@@ -1835,7 +1835,7 @@ static int nv_alloc_rx(struct net_device *dev)
if (unlikely(np->put_rx.orig++ == np->last_rx.orig))
np->put_rx.orig = np->rx_ring.orig;
if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
- np->put_rx_ctx = np->first_rx_ctx;
+ np->put_rx_ctx = np->rx_skb;
} else {
packet_dropped:
u64_stats_update_begin(&np->swstats_rx_syncp);
@@ -1877,7 +1877,7 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
if (unlikely(np->put_rx.ex++ == np->last_rx.ex))
np->put_rx.ex = np->rx_ring.ex;
if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
- np->put_rx_ctx = np->first_rx_ctx;
+ np->put_rx_ctx = np->rx_skb;
} else {
packet_dropped:
u64_stats_update_begin(&np->swstats_rx_syncp);
@@ -1910,7 +1910,8 @@ static void nv_init_rx(struct net_device *dev)
np->last_rx.orig = &np->rx_ring.orig[np->rx_ring_size-1];
else
np->last_rx.ex = &np->rx_ring.ex[np->rx_ring_size-1];
- np->get_rx_ctx = np->put_rx_ctx = np->first_rx_ctx = np->rx_skb;
+ np->get_rx_ctx = np->rx_skb;
+ np->put_rx_ctx = np->rx_skb;
np->last_rx_ctx = &np->rx_skb[np->rx_ring_size-1];
for (i = 0; i < np->rx_ring_size; i++) {
@@ -2914,7 +2915,7 @@ next_pkt:
if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
np->get_rx.orig = np->rx_ring.orig;
if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
- np->get_rx_ctx = np->first_rx_ctx;
+ np->get_rx_ctx = np->rx_skb;
rx_work++;
}
@@ -3003,7 +3004,7 @@ next_pkt:
if (unlikely(np->get_rx.ex++ == np->last_rx.ex))
np->get_rx.ex = np->rx_ring.ex;
if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
- np->get_rx_ctx = np->first_rx_ctx;
+ np->get_rx_ctx = np->rx_skb;
rx_work++;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index bdc46f11ce45..5d040b873137 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -358,10 +358,27 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
kfree(p_rdma_info);
}
+static void qed_rdma_free_tid(void *rdma_cxt, u32 itid)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid);
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+ qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+static void qed_rdma_free_reserved_lkey(struct qed_hwfn *p_hwfn)
+{
+ qed_rdma_free_tid(p_hwfn, p_hwfn->p_rdma_info->dev->reserved_lkey);
+}
+
static void qed_rdma_free(struct qed_hwfn *p_hwfn)
{
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n");
+ qed_rdma_free_reserved_lkey(p_hwfn);
qed_rdma_resc_free(p_hwfn);
}
@@ -615,9 +632,6 @@ static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn)
{
struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
- /* The first DPI is reserved for the Kernel */
- __set_bit(0, p_hwfn->p_rdma_info->dpi_map.bitmap);
-
/* Tid 0 will be used as the key for "reserved MR".
* The driver should allocate memory for it so it can be loaded but no
* ramrod should be passed on it.
@@ -797,17 +811,6 @@ static struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt)
return p_hwfn->p_rdma_info->dev;
}
-static void qed_rdma_free_tid(void *rdma_cxt, u32 itid)
-{
- struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
-
- DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid);
-
- spin_lock_bh(&p_hwfn->p_rdma_info->lock);
- qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid);
- spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
-}
-
static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod)
{
struct qed_hwfn *p_hwfn;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.h b/drivers/net/ethernet/qualcomm/emac/emac-mac.h
index 5028fb4bec2b..4beedb8faa1e 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.h
@@ -114,8 +114,9 @@ struct emac_tpd {
#define TPD_INSTC_SET(tpd, val) BITS_SET((tpd)->word[3], 17, 17, val)
/* High-14bit Buffer Address, So, the 64b-bit address is
* {DESC_CTRL_11_TX_DATA_HIADDR[17:0],(register) BUFFER_ADDR_H, BUFFER_ADDR_L}
+ * Extend TPD_BUFFER_ADDR_H to [31, 18], because we never enable timestamping.
*/
-#define TPD_BUFFER_ADDR_H_SET(tpd, val) BITS_SET((tpd)->word[3], 18, 30, val)
+#define TPD_BUFFER_ADDR_H_SET(tpd, val) BITS_SET((tpd)->word[3], 18, 31, val)
/* Format D. Word offset from the 1st byte of this packet to start to calculate
* the custom checksum.
*/
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 38c924bdd32e..13235baf4766 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -615,8 +615,11 @@ static int emac_probe(struct platform_device *pdev)
u32 reg;
int ret;
- /* The TPD buffer address is limited to 45 bits. */
- ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(45));
+ /* The TPD buffer address is limited to:
+ * 1. PTP: 45bits. (Driver doesn't support yet.)
+ * 2. NON-PTP: 46bits.
+ */
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(46));
if (ret) {
dev_err(&pdev->dev, "could not set DMA mask\n");
return ret;
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 6d6fb8cf3e7c..6473cc68c2d5 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -789,7 +789,6 @@ static int ofdpa_flow_tbl_add(struct ofdpa_port *ofdpa_port,
ofdpa_flags_nowait(flags),
ofdpa_cmd_flow_tbl_add,
found, NULL, NULL);
- return 0;
}
static int ofdpa_flow_tbl_del(struct ofdpa_port *ofdpa_port,
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 8ae467db9162..75fbf58e421c 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -322,6 +322,25 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
return 0;
}
+static void efx_ef10_read_licensed_features(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_LICENSING_V3_IN_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_LICENSING_V3_OUT_LEN);
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, LICENSING_V3_IN_OP,
+ MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE);
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_LICENSING_V3, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc || (outlen < MC_CMD_LICENSING_V3_OUT_LEN))
+ return;
+
+ nic_data->licensed_features = MCDI_QWORD(outbuf,
+ LICENSING_V3_OUT_LICENSED_FEATURES);
+}
+
static int efx_ef10_get_sysclk_freq(struct efx_nic *efx)
{
MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CLOCK_OUT_LEN);
@@ -722,6 +741,8 @@ static int efx_ef10_probe(struct efx_nic *efx)
if (rc < 0)
goto fail5;
+ efx_ef10_read_licensed_features(efx);
+
/* We can have one VI for each vi_stride-byte region.
* However, until we use TX option descriptors we need two TX queues
* per channel.
@@ -760,14 +781,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
if (rc && rc != -EPERM)
goto fail5;
- rc = efx_ptp_probe(efx, NULL);
- /* Failure to probe PTP is not fatal.
- * In the case of EPERM, efx_ptp_probe will print its own message (in
- * efx_ptp_get_attributes()), so we don't need to.
- */
- if (rc && rc != -EPERM)
- netif_warn(efx, drv, efx->net_dev,
- "Failed to probe PTP, rc=%d\n", rc);
+ efx_ptp_defer_probe_with_channel(efx);
#ifdef CONFIG_SFC_SRIOV
if ((efx->pci_dev->physfn) && (!efx->pci_dev->is_physfn)) {
@@ -937,6 +951,11 @@ static int efx_ef10_link_piobufs(struct efx_nic *efx)
/* Link a buffer to each TX queue */
efx_for_each_channel(channel, efx) {
+ /* Extra channels, even those with TXQs (PTP), do not require
+ * PIO resources.
+ */
+ if (!channel->type->want_pio)
+ continue;
efx_for_each_channel_tx_queue(tx_queue, channel) {
/* We assign the PIO buffers to queues in
* reverse order to allow for the following
@@ -1284,7 +1303,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
void __iomem *membase;
int rc;
- channel_vis = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES);
+ channel_vis = max(efx->n_channels,
+ (efx->n_tx_channels + efx->n_extra_tx_channels) *
+ EFX_TXQ_TYPES);
#ifdef EFX_USE_PIO
/* Try to allocate PIO buffers if wanted and if the full
@@ -2408,12 +2429,25 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
int i;
BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
+ /* Only attempt to enable TX timestamping if we have the license for it,
+ * otherwise TXQ init will fail
+ */
+ if (!(nic_data->licensed_features &
+ (1 << LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN))) {
+ tx_queue->timestamping = false;
+ /* Disable sync events on this channel. */
+ if (efx->type->ptp_set_ts_sync_events)
+ efx->type->ptp_set_ts_sync_events(efx, false, false);
+ }
+
/* TSOv2 is a limited resource that can only be configured on a limited
* number of queues. TSO without checksum offload is not really a thing,
* so we only enable it for those queues.
+ * TSOv2 cannot be used with Hardware timestamping.
*/
if (csum_offload && (nic_data->datapath_caps2 &
- (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))) {
+ (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) &&
+ !tx_queue->timestamping) {
tso_v2 = true;
netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
channel->channel);
@@ -2439,14 +2473,16 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
do {
- MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS,
+ MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS,
/* This flag was removed from mcdi_pcol.h for
* the non-_EXT version of INIT_TXQ. However,
* firmware still honours it.
*/
INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
- INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);
+ INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload,
+ INIT_TXQ_EXT_IN_FLAG_TIMESTAMP,
+ tx_queue->timestamping);
rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
NULL, 0, NULL);
@@ -2472,12 +2508,13 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
tx_queue->buffer[0].flags = EFX_TX_BUF_OPTION;
tx_queue->insert_count = 1;
txd = efx_tx_desc(tx_queue, 0);
- EFX_POPULATE_QWORD_4(*txd,
+ EFX_POPULATE_QWORD_5(*txd,
ESF_DZ_TX_DESC_IS_OPT, true,
ESF_DZ_TX_OPTION_TYPE,
ESE_DZ_TX_OPTION_DESC_CRC_CSUM,
ESF_DZ_TX_OPTION_UDP_TCP_CSUM, csum_offload,
- ESF_DZ_TX_OPTION_IP_CSUM, csum_offload);
+ ESF_DZ_TX_OPTION_IP_CSUM, csum_offload,
+ ESF_DZ_TX_TIMESTAMP, tx_queue->timestamping);
tx_queue->write_count = 1;
if (tso_v2) {
@@ -3572,31 +3609,92 @@ static int efx_ef10_handle_rx_event(struct efx_channel *channel,
return n_packets;
}
-static int
+static u32 efx_ef10_extract_event_ts(efx_qword_t *event)
+{
+ u32 tstamp;
+
+ tstamp = EFX_QWORD_FIELD(*event, TX_TIMESTAMP_EVENT_TSTAMP_DATA_HI);
+ tstamp <<= 16;
+ tstamp |= EFX_QWORD_FIELD(*event, TX_TIMESTAMP_EVENT_TSTAMP_DATA_LO);
+
+ return tstamp;
+}
+
+static void
efx_ef10_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
{
struct efx_nic *efx = channel->efx;
struct efx_tx_queue *tx_queue;
unsigned int tx_ev_desc_ptr;
unsigned int tx_ev_q_label;
- int tx_descs = 0;
+ unsigned int tx_ev_type;
+ u64 ts_part;
if (unlikely(READ_ONCE(efx->reset_pending)))
- return 0;
+ return;
if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_TX_DROP_EVENT)))
- return 0;
+ return;
- /* Transmit completion */
- tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX);
+ /* Get the transmit queue */
tx_ev_q_label = EFX_QWORD_FIELD(*event, ESF_DZ_TX_QLABEL);
tx_queue = efx_channel_get_tx_queue(channel,
tx_ev_q_label % EFX_TXQ_TYPES);
- tx_descs = ((tx_ev_desc_ptr + 1 - tx_queue->read_count) &
- tx_queue->ptr_mask);
- efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask);
- return tx_descs;
+ if (!tx_queue->timestamping) {
+ /* Transmit completion */
+ tx_ev_desc_ptr = EFX_QWORD_FIELD(*event, ESF_DZ_TX_DESCR_INDX);
+ efx_xmit_done(tx_queue, tx_ev_desc_ptr & tx_queue->ptr_mask);
+ return;
+ }
+
+ /* Transmit timestamps are only available for 8XXX series. They result
+ * in three events per packet. These occur in order, and are:
+ * - the normal completion event
+ * - the low part of the timestamp
+ * - the high part of the timestamp
+ *
+ * Each part of the timestamp is itself split across two 16 bit
+ * fields in the event.
+ */
+ tx_ev_type = EFX_QWORD_FIELD(*event, ESF_EZ_TX_SOFT1);
+
+ switch (tx_ev_type) {
+ case TX_TIMESTAMP_EVENT_TX_EV_COMPLETION:
+ /* In case of Queue flush or FLR, we might have received
+ * the previous TX completion event but not the Timestamp
+ * events.
+ */
+ if (tx_queue->completed_desc_ptr != tx_queue->ptr_mask)
+ efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr);
+
+ tx_ev_desc_ptr = EFX_QWORD_FIELD(*event,
+ ESF_DZ_TX_DESCR_INDX);
+ tx_queue->completed_desc_ptr =
+ tx_ev_desc_ptr & tx_queue->ptr_mask;
+ break;
+
+ case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_LO:
+ ts_part = efx_ef10_extract_event_ts(event);
+ tx_queue->completed_timestamp_minor = ts_part;
+ break;
+
+ case TX_TIMESTAMP_EVENT_TX_EV_TSTAMP_HI:
+ ts_part = efx_ef10_extract_event_ts(event);
+ tx_queue->completed_timestamp_major = ts_part;
+
+ efx_xmit_done(tx_queue, tx_queue->completed_desc_ptr);
+ tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
+ break;
+
+ default:
+ netif_err(efx, hw, efx->net_dev,
+ "channel %d unknown tx event type %d (data "
+ EFX_QWORD_FMT ")\n",
+ channel->channel, tx_ev_type,
+ EFX_QWORD_VAL(*event));
+ break;
+ }
}
static void
@@ -3658,7 +3756,6 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota)
efx_qword_t event, *p_event;
unsigned int read_ptr;
int ev_code;
- int tx_descs = 0;
int spent = 0;
if (quota <= 0)
@@ -3698,13 +3795,7 @@ static int efx_ef10_ev_process(struct efx_channel *channel, int quota)
}
break;
case ESE_DZ_EV_CODE_TX_EV:
- tx_descs += efx_ef10_handle_tx_event(channel, &event);
- if (tx_descs > efx->txq_entries) {
- spent = quota;
- goto out;
- } else if (++spent == quota) {
- goto out;
- }
+ efx_ef10_handle_tx_event(channel, &event);
break;
case ESE_DZ_EV_CODE_DRIVER_EV:
efx_ef10_handle_driver_event(channel, &event);
@@ -6179,7 +6270,8 @@ static int efx_ef10_ptp_set_ts_sync_events(struct efx_nic *efx, bool en,
efx_ef10_rx_enable_timestamping :
efx_ef10_rx_disable_timestamping;
- efx_for_each_channel(channel, efx) {
+ channel = efx_ptp_channel(efx);
+ if (channel) {
int rc = set(channel, temp);
if (en && rc != 0) {
efx_ef10_ptp_set_ts_sync_events(efx, false, temp);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 12f0abc30cb1..16757cfc5b29 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -896,12 +896,20 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
}
+static bool efx_default_channel_want_txqs(struct efx_channel *channel)
+{
+ return channel->channel - channel->efx->tx_channel_offset <
+ channel->efx->n_tx_channels;
+}
+
static const struct efx_channel_type efx_default_channel_type = {
.pre_probe = efx_channel_dummy_op_int,
.post_remove = efx_channel_dummy_op_void,
.get_name = efx_get_channel_name,
.copy = efx_copy_channel,
+ .want_txqs = efx_default_channel_want_txqs,
.keep_eventq = false,
+ .want_pio = true,
};
int efx_channel_dummy_op_int(struct efx_channel *channel)
@@ -1501,6 +1509,7 @@ static int efx_probe_interrupts(struct efx_nic *efx)
}
/* Assign extra channels if possible */
+ efx->n_extra_tx_channels = 0;
j = efx->n_channels;
for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
if (!efx->extra_channel_type[i])
@@ -1512,6 +1521,8 @@ static int efx_probe_interrupts(struct efx_nic *efx)
--j;
efx_get_channel(efx, j)->type =
efx->extra_channel_type[i];
+ if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
+ efx->n_extra_tx_channels++;
}
}
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 5334dc83d926..266b9bee1f3a 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -818,17 +818,16 @@ static void efx_farch_magic_event(struct efx_channel *channel, u32 magic)
* The NIC batches TX completion events; the message we receive is of
* the form "complete all TX events up to this index".
*/
-static int
+static void
efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
{
unsigned int tx_ev_desc_ptr;
unsigned int tx_ev_q_label;
struct efx_tx_queue *tx_queue;
struct efx_nic *efx = channel->efx;
- int tx_packets = 0;
if (unlikely(READ_ONCE(efx->reset_pending)))
- return 0;
+ return;
if (likely(EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) {
/* Transmit completion */
@@ -836,8 +835,6 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
tx_ev_q_label = EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
tx_queue = efx_channel_get_tx_queue(
channel, tx_ev_q_label % EFX_TXQ_TYPES);
- tx_packets = ((tx_ev_desc_ptr - tx_queue->read_count) &
- tx_queue->ptr_mask);
efx_xmit_done(tx_queue, tx_ev_desc_ptr);
} else if (EFX_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
/* Rewrite the FIFO write pointer */
@@ -856,8 +853,6 @@ efx_farch_handle_tx_event(struct efx_channel *channel, efx_qword_t *event)
EFX_QWORD_FMT"\n", channel->channel,
EFX_QWORD_VAL(*event));
}
-
- return tx_packets;
}
/* Detect errors included in the rx_evt_pkt_ok bit. */
@@ -1090,7 +1085,7 @@ efx_farch_handle_tx_flush_done(struct efx_nic *efx, efx_qword_t *event)
int qid;
qid = EFX_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
- if (qid < EFX_TXQ_TYPES * efx->n_tx_channels) {
+ if (qid < EFX_TXQ_TYPES * (efx->n_tx_channels + efx->n_extra_tx_channels)) {
tx_queue = efx_get_tx_queue(efx, qid / EFX_TXQ_TYPES,
qid % EFX_TXQ_TYPES);
if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) {
@@ -1270,7 +1265,6 @@ int efx_farch_ev_process(struct efx_channel *channel, int budget)
unsigned int read_ptr;
efx_qword_t event, *p_event;
int ev_code;
- int tx_packets = 0;
int spent = 0;
if (budget <= 0)
@@ -1304,12 +1298,7 @@ int efx_farch_ev_process(struct efx_channel *channel, int budget)
goto out;
break;
case FSE_AZ_EV_CODE_TX_EV:
- tx_packets += efx_farch_handle_tx_event(channel,
- &event);
- if (tx_packets > efx->txq_entries) {
- spent = budget;
- goto out;
- }
+ efx_farch_handle_tx_event(channel, &event);
break;
case FSE_AZ_EV_CODE_DRV_GEN_EV:
efx_farch_handle_generated_event(channel, &event);
@@ -1680,20 +1669,21 @@ void efx_farch_rx_pull_indir_table(struct efx_nic *efx)
*/
void efx_farch_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw)
{
- unsigned vi_count, buftbl_min;
+ unsigned vi_count, buftbl_min, total_tx_channels;
#ifdef CONFIG_SFC_SRIOV
struct siena_nic_data *nic_data = efx->nic_data;
#endif
+ total_tx_channels = efx->n_tx_channels + efx->n_extra_tx_channels;
/* Account for the buffer table entries backing the datapath channels
* and the descriptor caches for those channels.
*/
buftbl_min = ((efx->n_rx_channels * EFX_MAX_DMAQ_SIZE +
- efx->n_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE +
+ total_tx_channels * EFX_TXQ_TYPES * EFX_MAX_DMAQ_SIZE +
efx->n_channels * EFX_MAX_EVQ_SIZE)
* sizeof(efx_qword_t) / EFX_BUF_SIZE);
- vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES);
+ vi_count = max(efx->n_channels, total_tx_channels * EFX_TXQ_TYPES);
#ifdef CONFIG_SFC_SRIOV
if (efx->type->sriov_wanted) {
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 3dd42f3136fe..d20a8660ee48 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -191,6 +191,7 @@ struct efx_tx_buffer {
* Size of the region is efx_piobuf_size.
* @piobuf_offset: Buffer offset to be specified in PIO descriptors
* @initialised: Has hardware queue been initialised?
+ * @timestamping: Is timestamping enabled for this channel?
* @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and
* may also map tx data, depending on the nature of the TSO implementation.
* @read_count: Current read pointer.
@@ -202,6 +203,10 @@ struct efx_tx_buffer {
* avoid cache-line ping-pong between the xmit path and the
* completion path.
* @merge_events: Number of TX merged completion events
+ * @completed_desc_ptr: Most recent completed pointer - only used with
+ * timestamping.
+ * @completed_timestamp_major: Top part of the most recent tx timestamp.
+ * @completed_timestamp_minor: Low part of the most recent tx timestamp.
* @insert_count: Current insert pointer
* This is the number of buffers that have been added to the
* software ring.
@@ -247,6 +252,7 @@ struct efx_tx_queue {
void __iomem *piobuf;
unsigned int piobuf_offset;
bool initialised;
+ bool timestamping;
/* Function pointers used in the fast path. */
int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *);
@@ -257,6 +263,9 @@ struct efx_tx_queue {
unsigned int merge_events;
unsigned int bytes_compl;
unsigned int pkts_compl;
+ unsigned int completed_desc_ptr;
+ u32 completed_timestamp_major;
+ u32 completed_timestamp_minor;
/* Members used only on the xmit path */
unsigned int insert_count ____cacheline_aligned_in_smp;
@@ -522,8 +531,12 @@ struct efx_msi_context {
* @copy: Copy the channel state prior to reallocation. May be %NULL if
* reallocation is not supported.
* @receive_skb: Handle an skb ready to be passed to netif_receive_skb()
+ * @want_txqs: Determine whether this channel should have TX queues
+ * created. If %NULL, TX queues are not created.
* @keep_eventq: Flag for whether event queue should be kept initialised
* while the device is stopped
+ * @want_pio: Flag for whether PIO buffers should be linked to this
+ * channel's TX queues.
*/
struct efx_channel_type {
void (*handle_no_channel)(struct efx_nic *);
@@ -532,7 +545,9 @@ struct efx_channel_type {
void (*get_name)(struct efx_channel *, char *buf, size_t len);
struct efx_channel *(*copy)(const struct efx_channel *);
bool (*receive_skb)(struct efx_channel *, struct sk_buff *);
+ bool (*want_txqs)(struct efx_channel *);
bool keep_eventq;
+ bool want_pio;
};
enum efx_led_mode {
@@ -735,6 +750,7 @@ struct vfdi_status;
* @n_channels: Number of channels in use
* @n_rx_channels: Number of channels used for RX (= number of RX queues)
* @n_tx_channels: Number of channels used for TX
+ * @n_extra_tx_channels: Number of extra channels with TX queues
* @rx_ip_align: RX DMA address offset to have IP header aligned in
* in accordance with NET_IP_ALIGN
* @rx_dma_len: Current maximum RX DMA length
@@ -881,6 +897,7 @@ struct efx_nic {
unsigned rss_spread;
unsigned tx_channel_offset;
unsigned n_tx_channels;
+ unsigned n_extra_tx_channels;
unsigned int rx_ip_align;
unsigned int rx_dma_len;
unsigned int rx_buffer_order;
@@ -1363,8 +1380,8 @@ efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type)
static inline bool efx_channel_has_tx_queues(struct efx_channel *channel)
{
- return channel->channel - channel->efx->tx_channel_offset <
- channel->efx->n_tx_channels;
+ return channel->type && channel->type->want_txqs &&
+ channel->type->want_txqs(channel);
}
static inline struct efx_tx_queue *
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 763052214525..6549fc685a48 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -440,6 +440,7 @@ struct efx_ef10_nic_data {
struct efx_udp_tunnel udp_tunnels[16];
bool udp_tunnels_dirty;
struct mutex udp_tunnels_lock;
+ u64 licensed_features;
};
int efx_init_sriov(void);
@@ -448,6 +449,7 @@ void efx_fini_sriov(void);
struct ethtool_ts_info;
int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
+struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
void efx_ptp_remove(struct efx_nic *efx);
int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);
@@ -471,6 +473,8 @@ static inline void efx_rx_skb_attach_timestamp(struct efx_channel *channel,
}
void efx_ptp_start_datapath(struct efx_nic *efx);
void efx_ptp_stop_datapath(struct efx_nic *efx);
+bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx);
+ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue);
extern const struct efx_nic_type falcon_a1_nic_type;
extern const struct efx_nic_type falcon_b0_nic_type;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 3b37d7ded3c4..f21661532ed3 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -149,18 +149,14 @@ enum ptp_packet_state {
/* Maximum parts-per-billion adjustment that is acceptable */
#define MAX_PPB 1000000
-/* Number of bits required to hold the above */
-#define MAX_PPB_BITS 20
-
-/* Number of extra bits allowed when calculating fractional ns.
- * EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS + MAX_PPB_BITS should
- * be less than 63.
- */
-#define PPB_EXTRA_BITS 2
-
/* Precalculate scale word to avoid long long division at runtime */
-#define PPB_SCALE_WORD ((1LL << (PPB_EXTRA_BITS + MC_CMD_PTP_IN_ADJUST_BITS +\
- MAX_PPB_BITS)) / 1000000000LL)
+/* This is equivalent to 2^66 / 10^9. */
+#define PPB_SCALE_WORD ((1LL << (57)) / 1953125LL)
+
+/* How much to shift down after scaling to convert to FP40 */
+#define PPB_SHIFT_FP40 26
+/* ... and FP44. */
+#define PPB_SHIFT_FP44 22
#define PTP_SYNC_ATTEMPTS 4
@@ -218,8 +214,8 @@ struct efx_ptp_timeset {
* @channel: The PTP channel (Siena only)
* @rx_ts_inline: Flag for whether RX timestamps are inline (else they are
* separate events)
- * @rxq: Receive queue (awaiting timestamps)
- * @txq: Transmit queue
+ * @rxq: Receive SKB queue (awaiting timestamps)
+ * @txq: Transmit SKB queue
* @evt_list: List of MC receive events awaiting packets
* @evt_free_list: List of free events
* @evt_lock: Lock for manipulating evt_list and evt_free_list
@@ -233,19 +229,36 @@ struct efx_ptp_timeset {
* @config: Current timestamp configuration
* @enabled: PTP operation enabled
* @mode: Mode in which PTP operating (PTP version)
- * @time_format: Time format supported by this NIC
* @ns_to_nic_time: Function to convert from scalar nanoseconds to NIC time
* @nic_to_kernel_time: Function to convert from NIC to kernel time
+ * @nic_time.minor_max: Wrap point for NIC minor times
+ * @nic_time.sync_event_diff_min: Minimum acceptable difference between time
+ * in packet prefix and last MCDI time sync event i.e. how much earlier than
+ * the last sync event time a packet timestamp can be.
+ * @nic_time.sync_event_diff_max: Maximum acceptable difference between time
+ * in packet prefix and last MCDI time sync event i.e. how much later than
+ * the last sync event time a packet timestamp can be.
+ * @nic_time.sync_event_minor_shift: Shift required to make minor time from
+ * field in MCDI time sync event.
* @min_synchronisation_ns: Minimum acceptable corrected sync window
- * @ts_corrections.tx: Required driver correction of transmit timestamps
- * @ts_corrections.rx: Required driver correction of receive timestamps
+ * @capabilities: Capabilities flags from the NIC
+ * @ts_corrections.ptp_tx: Required driver correction of PTP packet transmit
+ * timestamps
+ * @ts_corrections.ptp_rx: Required driver correction of PTP packet receive
+ * timestamps
* @ts_corrections.pps_out: PPS output error (information only)
* @ts_corrections.pps_in: Required driver correction of PPS input timestamps
+ * @ts_corrections.general_tx: Required driver correction of general packet
+ * transmit timestamps
+ * @ts_corrections.general_rx: Required driver correction of general packet
+ * receive timestamps
* @evt_frags: Partly assembled PTP events
* @evt_frag_idx: Current fragment number
* @evt_code: Last event code
* @start: Address at which MC indicates ready for synchronisation
* @host_time_pps: Host time at last PPS
+ * @adjfreq_ppb_shift: Shift required to convert scaled parts-per-billion
+ * frequency adjustment into a fixed point fractional nanosecond format.
* @current_adjfreq: Current ppb adjustment.
* @phc_clock: Pointer to registered phc device (if primary function)
* @phc_clock_info: Registration structure for phc device
@@ -264,6 +277,7 @@ struct efx_ptp_timeset {
* @oversize_sync_windows: Number of corrected sync windows that are too large
* @rx_no_timestamp: Number of packets received without a timestamp.
* @timeset: Last set of synchronisation statistics.
+ * @xmit_skb: Transmit SKB function.
*/
struct efx_ptp_data {
struct efx_nic *efx;
@@ -284,22 +298,31 @@ struct efx_ptp_data {
struct hwtstamp_config config;
bool enabled;
unsigned int mode;
- unsigned int time_format;
void (*ns_to_nic_time)(s64 ns, u32 *nic_major, u32 *nic_minor);
ktime_t (*nic_to_kernel_time)(u32 nic_major, u32 nic_minor,
s32 correction);
+ struct {
+ u32 minor_max;
+ u32 sync_event_diff_min;
+ u32 sync_event_diff_max;
+ unsigned int sync_event_minor_shift;
+ } nic_time;
unsigned int min_synchronisation_ns;
+ unsigned int capabilities;
struct {
- s32 tx;
- s32 rx;
+ s32 ptp_tx;
+ s32 ptp_rx;
s32 pps_out;
s32 pps_in;
+ s32 general_tx;
+ s32 general_rx;
} ts_corrections;
efx_qword_t evt_frags[MAX_EVENT_FRAGS];
int evt_frag_idx;
int evt_code;
struct efx_buffer start;
struct pps_event_time host_time_pps;
+ unsigned int adjfreq_ppb_shift;
s64 current_adjfreq;
struct ptp_clock *phc_clock;
struct ptp_clock_info phc_clock_info;
@@ -319,6 +342,7 @@ struct efx_ptp_data {
unsigned int rx_no_timestamp;
struct efx_ptp_timeset
timeset[MC_CMD_PTP_OUT_SYNCHRONIZE_TIMESET_MAXNUM];
+ void (*xmit_skb)(struct efx_nic *efx, struct sk_buff *skb);
};
static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta);
@@ -329,6 +353,24 @@ static int efx_phc_settime(struct ptp_clock_info *ptp,
static int efx_phc_enable(struct ptp_clock_info *ptp,
struct ptp_clock_request *request, int on);
+bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx)
+{
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+ return ((efx_nic_rev(efx) >= EFX_REV_HUNT_A0) &&
+ (nic_data->datapath_caps2 &
+ (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_LBN)
+ ));
+}
+
+/* PTP 'extra' channel is still a traffic channel, but we only create TX queues
+ * if PTP uses MAC TX timestamps, not if PTP uses the MC directly to transmit.
+ */
+static bool efx_ptp_want_txqs(struct efx_channel *channel)
+{
+ return efx_ptp_use_mac_tx_timestamps(channel->efx);
+}
+
#define PTP_SW_STAT(ext_name, field_name) \
{ #ext_name, 0, offsetof(struct efx_ptp_data, field_name) }
#define PTP_MC_STAT(ext_name, mcdi_name) \
@@ -471,6 +513,89 @@ static ktime_t efx_ptp_s27_to_ktime_correction(u32 nic_major, u32 nic_minor,
return efx_ptp_s27_to_ktime(nic_major, nic_minor);
}
+/* For Medford2 platforms the time is in seconds and quarter nanoseconds. */
+static void efx_ptp_ns_to_s_qns(s64 ns, u32 *nic_major, u32 *nic_minor)
+{
+ struct timespec64 ts = ns_to_timespec64(ns);
+
+ *nic_major = (u32)ts.tv_sec;
+ *nic_minor = ts.tv_nsec * 4;
+}
+
+static ktime_t efx_ptp_s_qns_to_ktime_correction(u32 nic_major, u32 nic_minor,
+ s32 correction)
+{
+ ktime_t kt;
+
+ nic_minor = DIV_ROUND_CLOSEST(nic_minor, 4);
+ correction = DIV_ROUND_CLOSEST(correction, 4);
+
+ kt = ktime_set(nic_major, nic_minor);
+
+ if (correction >= 0)
+ kt = ktime_add_ns(kt, (u64)correction);
+ else
+ kt = ktime_sub_ns(kt, (u64)-correction);
+ return kt;
+}
+
+struct efx_channel *efx_ptp_channel(struct efx_nic *efx)
+{
+ return efx->ptp_data ? efx->ptp_data->channel : NULL;
+}
+
+static u32 last_sync_timestamp_major(struct efx_nic *efx)
+{
+ struct efx_channel *channel = efx_ptp_channel(efx);
+ u32 major = 0;
+
+ if (channel)
+ major = channel->sync_timestamp_major;
+ return major;
+}
+
+/* The 8000 series and later can provide the time from the MAC, which is only
+ * 48 bits long and provides meta-information in the top 2 bits.
+ */
+static ktime_t
+efx_ptp_mac_nic_to_ktime_correction(struct efx_nic *efx,
+ struct efx_ptp_data *ptp,
+ u32 nic_major, u32 nic_minor,
+ s32 correction)
+{
+ ktime_t kt = { 0 };
+
+ if (!(nic_major & 0x80000000)) {
+ WARN_ON_ONCE(nic_major >> 16);
+ /* Use the top bits from the latest sync event. */
+ nic_major &= 0xffff;
+ nic_major |= (last_sync_timestamp_major(efx) & 0xffff0000);
+
+ kt = ptp->nic_to_kernel_time(nic_major, nic_minor,
+ correction);
+ }
+ return kt;
+}
+
+ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_ptp_data *ptp = efx->ptp_data;
+ ktime_t kt;
+
+ if (efx_ptp_use_mac_tx_timestamps(efx))
+ kt = efx_ptp_mac_nic_to_ktime_correction(efx, ptp,
+ tx_queue->completed_timestamp_major,
+ tx_queue->completed_timestamp_minor,
+ ptp->ts_corrections.general_tx);
+ else
+ kt = ptp->nic_to_kernel_time(
+ tx_queue->completed_timestamp_major,
+ tx_queue->completed_timestamp_minor,
+ ptp->ts_corrections.general_tx);
+ return kt;
+}
+
/* Get PTP attributes and set up time conversions */
static int efx_ptp_get_attributes(struct efx_nic *efx)
{
@@ -502,31 +627,71 @@ static int efx_ptp_get_attributes(struct efx_nic *efx)
return rc;
}
- if (fmt == MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION) {
+ switch (fmt) {
+ case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_27FRACTION:
ptp->ns_to_nic_time = efx_ptp_ns_to_s27;
ptp->nic_to_kernel_time = efx_ptp_s27_to_ktime_correction;
- } else if (fmt == MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS) {
+ ptp->nic_time.minor_max = 1 << 27;
+ ptp->nic_time.sync_event_minor_shift = 19;
+ break;
+ case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS:
ptp->ns_to_nic_time = efx_ptp_ns_to_s_ns;
ptp->nic_to_kernel_time = efx_ptp_s_ns_to_ktime_correction;
- } else {
+ ptp->nic_time.minor_max = 1000000000;
+ ptp->nic_time.sync_event_minor_shift = 22;
+ break;
+ case MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_QTR_NANOSECONDS:
+ ptp->ns_to_nic_time = efx_ptp_ns_to_s_qns;
+ ptp->nic_to_kernel_time = efx_ptp_s_qns_to_ktime_correction;
+ ptp->nic_time.minor_max = 4000000000UL;
+ ptp->nic_time.sync_event_minor_shift = 24;
+ break;
+ default:
return -ERANGE;
}
- ptp->time_format = fmt;
-
- /* MC_CMD_PTP_OP_GET_ATTRIBUTES is an extended version of an older
- * operation MC_CMD_PTP_OP_GET_TIME_FORMAT that also returns a value
- * to use for the minimum acceptable corrected synchronization window.
+ /* Precalculate acceptable difference between the minor time in the
+ * packet prefix and the last MCDI time sync event. We expect the
+ * packet prefix timestamp to be after of sync event by up to one
+ * sync event interval (0.25s) but we allow it to exceed this by a
+ * fuzz factor of (0.1s)
+ */
+ ptp->nic_time.sync_event_diff_min = ptp->nic_time.minor_max
+ - (ptp->nic_time.minor_max / 10);
+ ptp->nic_time.sync_event_diff_max = (ptp->nic_time.minor_max / 4)
+ + (ptp->nic_time.minor_max / 10);
+
+ /* MC_CMD_PTP_OP_GET_ATTRIBUTES has been extended twice from an older
+ * operation MC_CMD_PTP_OP_GET_TIME_FORMAT. The function now may return
+ * a value to use for the minimum acceptable corrected synchronization
+ * window and may return further capabilities.
* If we have the extra information store it. For older firmware that
* does not implement the extended command use the default value.
*/
- if (rc == 0 && out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN)
+ if (rc == 0 &&
+ out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_CAPABILITIES_OFST)
ptp->min_synchronisation_ns =
MCDI_DWORD(outbuf,
PTP_OUT_GET_ATTRIBUTES_SYNC_WINDOW_MIN);
else
ptp->min_synchronisation_ns = DEFAULT_MIN_SYNCHRONISATION_NS;
+ if (rc == 0 &&
+ out_len >= MC_CMD_PTP_OUT_GET_ATTRIBUTES_LEN)
+ ptp->capabilities = MCDI_DWORD(outbuf,
+ PTP_OUT_GET_ATTRIBUTES_CAPABILITIES);
+ else
+ ptp->capabilities = 0;
+
+ /* Set up the shift for conversion between frequency
+ * adjustments in parts-per-billion and the fixed-point
+ * fractional ns format that the adapter uses.
+ */
+ if (ptp->capabilities & (1 << MC_CMD_PTP_OUT_GET_ATTRIBUTES_FP44_FREQ_ADJ_LBN))
+ ptp->adjfreq_ppb_shift = PPB_SHIFT_FP44;
+ else
+ ptp->adjfreq_ppb_shift = PPB_SHIFT_FP40;
+
return 0;
}
@@ -534,8 +699,9 @@ static int efx_ptp_get_attributes(struct efx_nic *efx)
static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx)
{
MCDI_DECLARE_BUF(inbuf, MC_CMD_PTP_IN_GET_TIMESTAMP_CORRECTIONS_LEN);
- MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_LEN);
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN);
int rc;
+ size_t out_len;
/* Get the timestamp corrections from the NIC. If this operation is
* not supported (older NICs) then no correction is required.
@@ -545,21 +711,37 @@ static int efx_ptp_get_timestamp_corrections(struct efx_nic *efx)
MCDI_SET_DWORD(inbuf, PTP_IN_PERIPH_ID, 0);
rc = efx_mcdi_rpc_quiet(efx, MC_CMD_PTP, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), NULL);
+ outbuf, sizeof(outbuf), &out_len);
if (rc == 0) {
- efx->ptp_data->ts_corrections.tx = MCDI_DWORD(outbuf,
+ efx->ptp_data->ts_corrections.ptp_tx = MCDI_DWORD(outbuf,
PTP_OUT_GET_TIMESTAMP_CORRECTIONS_TRANSMIT);
- efx->ptp_data->ts_corrections.rx = MCDI_DWORD(outbuf,
+ efx->ptp_data->ts_corrections.ptp_rx = MCDI_DWORD(outbuf,
PTP_OUT_GET_TIMESTAMP_CORRECTIONS_RECEIVE);
efx->ptp_data->ts_corrections.pps_out = MCDI_DWORD(outbuf,
PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_OUT);
efx->ptp_data->ts_corrections.pps_in = MCDI_DWORD(outbuf,
PTP_OUT_GET_TIMESTAMP_CORRECTIONS_PPS_IN);
+
+ if (out_len >= MC_CMD_PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_LEN) {
+ efx->ptp_data->ts_corrections.general_tx = MCDI_DWORD(
+ outbuf,
+ PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_TX);
+ efx->ptp_data->ts_corrections.general_rx = MCDI_DWORD(
+ outbuf,
+ PTP_OUT_GET_TIMESTAMP_CORRECTIONS_V2_GENERAL_RX);
+ } else {
+ efx->ptp_data->ts_corrections.general_tx =
+ efx->ptp_data->ts_corrections.ptp_tx;
+ efx->ptp_data->ts_corrections.general_rx =
+ efx->ptp_data->ts_corrections.ptp_rx;
+ }
} else if (rc == -EINVAL) {
- efx->ptp_data->ts_corrections.tx = 0;
- efx->ptp_data->ts_corrections.rx = 0;
+ efx->ptp_data->ts_corrections.ptp_tx = 0;
+ efx->ptp_data->ts_corrections.ptp_rx = 0;
efx->ptp_data->ts_corrections.pps_out = 0;
efx->ptp_data->ts_corrections.pps_in = 0;
+ efx->ptp_data->ts_corrections.general_tx = 0;
+ efx->ptp_data->ts_corrections.general_rx = 0;
} else {
efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), outbuf,
sizeof(outbuf), rc);
@@ -873,8 +1055,24 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings)
return rc;
}
+/* Transmit a PTP packet via the dedicated hardware timestamped queue. */
+static void efx_ptp_xmit_skb_queue(struct efx_nic *efx, struct sk_buff *skb)
+{
+ struct efx_ptp_data *ptp_data = efx->ptp_data;
+ struct efx_tx_queue *tx_queue;
+ u8 type = skb->ip_summed == CHECKSUM_PARTIAL ? EFX_TXQ_TYPE_OFFLOAD : 0;
+
+ tx_queue = &ptp_data->channel->tx_queue[type];
+ if (tx_queue && tx_queue->timestamping) {
+ efx_enqueue_skb(tx_queue, skb);
+ } else {
+ WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
+ dev_kfree_skb_any(skb);
+ }
+}
+
/* Transmit a PTP packet, via the MCDI interface, to the wire. */
-static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb)
+static void efx_ptp_xmit_skb_mc(struct efx_nic *efx, struct sk_buff *skb)
{
struct efx_ptp_data *ptp_data = efx->ptp_data;
struct skb_shared_hwtstamps timestamps;
@@ -910,16 +1108,16 @@ static int efx_ptp_xmit_skb(struct efx_nic *efx, struct sk_buff *skb)
timestamps.hwtstamp = ptp_data->nic_to_kernel_time(
MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MAJOR),
MCDI_DWORD(txtime, PTP_OUT_TRANSMIT_MINOR),
- ptp_data->ts_corrections.tx);
+ ptp_data->ts_corrections.ptp_tx);
skb_tstamp_tx(skb, &timestamps);
rc = 0;
fail:
- dev_kfree_skb(skb);
+ dev_kfree_skb_any(skb);
- return rc;
+ return;
}
static void efx_ptp_drop_time_expired_events(struct efx_nic *efx)
@@ -1189,7 +1387,7 @@ static void efx_ptp_worker(struct work_struct *work)
efx_ptp_process_events(efx, &tempq);
while ((skb = skb_dequeue(&ptp_data->txq)))
- efx_ptp_xmit_skb(efx, skb);
+ ptp_data->xmit_skb(efx, skb);
while ((skb = __skb_dequeue(&tempq)))
efx_ptp_process_rx(efx, skb);
@@ -1239,6 +1437,14 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel)
goto fail2;
}
+ if (efx_ptp_use_mac_tx_timestamps(efx)) {
+ ptp->xmit_skb = efx_ptp_xmit_skb_queue;
+ /* Request sync events on this channel. */
+ channel->sync_events_state = SYNC_EVENTS_QUIESCENT;
+ } else {
+ ptp->xmit_skb = efx_ptp_xmit_skb_mc;
+ }
+
INIT_WORK(&ptp->work, efx_ptp_worker);
ptp->config.flags = 0;
ptp->config.tx_type = HWTSTAMP_TX_OFF;
@@ -1303,11 +1509,21 @@ fail1:
static int efx_ptp_probe_channel(struct efx_channel *channel)
{
struct efx_nic *efx = channel->efx;
+ int rc;
channel->irq_moderation_us = 0;
channel->rx_queue.core_index = 0;
- return efx_ptp_probe(efx, channel);
+ rc = efx_ptp_probe(efx, channel);
+ /* Failure to probe PTP is not fatal; this channel will just not be
+ * used for anything.
+ * In the case of EPERM, efx_ptp_probe will print its own message (in
+ * efx_ptp_get_attributes()), so we don't need to.
+ */
+ if (rc && rc != -EPERM)
+ netif_warn(efx, drv, efx->net_dev,
+ "Failed to probe PTP, rc=%d\n", rc);
+ return 0;
}
void efx_ptp_remove(struct efx_nic *efx)
@@ -1332,6 +1548,7 @@ void efx_ptp_remove(struct efx_nic *efx)
efx_nic_free_buffer(efx, &efx->ptp_data->start);
kfree(efx->ptp_data);
+ efx->ptp_data = NULL;
}
static void efx_ptp_remove_channel(struct efx_channel *channel)
@@ -1548,6 +1765,17 @@ void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info)
ts_info->so_timestamping |= (SOF_TIMESTAMPING_TX_HARDWARE |
SOF_TIMESTAMPING_RX_HARDWARE |
SOF_TIMESTAMPING_RAW_HARDWARE);
+ /* Check licensed features. If we don't have the license for TX
+ * timestamps, the NIC will not support them.
+ */
+ if (efx_ptp_use_mac_tx_timestamps(efx)) {
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+ if (!(nic_data->licensed_features &
+ (1 << LICENSED_V3_FEATURES_TX_TIMESTAMPS_LBN)))
+ ts_info->so_timestamping &=
+ ~SOF_TIMESTAMPING_TX_HARDWARE;
+ }
if (primary && primary->ptp_data && primary->ptp_data->phc_clock)
ts_info->phc_index =
ptp_clock_index(primary->ptp_data->phc_clock);
@@ -1627,7 +1855,7 @@ static void ptp_event_rx(struct efx_nic *efx, struct efx_ptp_data *ptp)
evt->hwtimestamp = efx->ptp_data->nic_to_kernel_time(
EFX_QWORD_FIELD(ptp->evt_frags[0], MCDI_EVENT_DATA),
EFX_QWORD_FIELD(ptp->evt_frags[1], MCDI_EVENT_DATA),
- ptp->ts_corrections.rx);
+ ptp->ts_corrections.ptp_rx);
evt->expiry = jiffies + msecs_to_jiffies(PKT_EVENT_LIFETIME_MS);
list_add_tail(&evt->link, &ptp->evt_list);
@@ -1709,9 +1937,20 @@ void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev)
void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev)
{
+ struct efx_nic *efx = channel->efx;
+ struct efx_ptp_data *ptp = efx->ptp_data;
+
+ /* When extracting the sync timestamp minor value, we should discard
+ * the least significant two bits. These are not required in order
+ * to reconstruct full-range timestamps and they are optionally used
+ * to report status depending on the options supplied when subscribing
+ * for sync events.
+ */
channel->sync_timestamp_major = MCDI_EVENT_FIELD(*ev, PTP_TIME_MAJOR);
channel->sync_timestamp_minor =
- MCDI_EVENT_FIELD(*ev, PTP_TIME_MINOR_26_19) << 19;
+ (MCDI_EVENT_FIELD(*ev, PTP_TIME_MINOR_MS_8BITS) & 0xFC)
+ << ptp->nic_time.sync_event_minor_shift;
+
/* if sync events have been disabled then we want to silently ignore
* this event, so throw away result.
*/
@@ -1719,15 +1958,6 @@ void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev)
SYNC_EVENTS_VALID);
}
-/* make some assumptions about the time representation rather than abstract it,
- * since we currently only support one type of inline timestamping and only on
- * EF10.
- */
-#define MINOR_TICKS_PER_SECOND 0x8000000
-/* Fuzz factor for sync events to be out of order with RX events */
-#define FUZZ (MINOR_TICKS_PER_SECOND / 10)
-#define EXPECTED_SYNC_EVENTS_PER_SECOND 4
-
static inline u32 efx_rx_buf_timestamp_minor(struct efx_nic *efx, const u8 *eh)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
@@ -1745,31 +1975,33 @@ void __efx_rx_skb_attach_timestamp(struct efx_channel *channel,
struct sk_buff *skb)
{
struct efx_nic *efx = channel->efx;
+ struct efx_ptp_data *ptp = efx->ptp_data;
u32 pkt_timestamp_major, pkt_timestamp_minor;
u32 diff, carry;
struct skb_shared_hwtstamps *timestamps;
- pkt_timestamp_minor = (efx_rx_buf_timestamp_minor(efx,
- skb_mac_header(skb)) +
- (u32) efx->ptp_data->ts_corrections.rx) &
- (MINOR_TICKS_PER_SECOND - 1);
+ if (channel->sync_events_state != SYNC_EVENTS_VALID)
+ return;
+
+ pkt_timestamp_minor = efx_rx_buf_timestamp_minor(efx, skb_mac_header(skb));
/* get the difference between the packet and sync timestamps,
* modulo one second
*/
- diff = (pkt_timestamp_minor - channel->sync_timestamp_minor) &
- (MINOR_TICKS_PER_SECOND - 1);
+ diff = pkt_timestamp_minor - channel->sync_timestamp_minor;
+ if (pkt_timestamp_minor < channel->sync_timestamp_minor)
+ diff += ptp->nic_time.minor_max;
+
/* do we roll over a second boundary and need to carry the one? */
- carry = channel->sync_timestamp_minor + diff > MINOR_TICKS_PER_SECOND ?
+ carry = (channel->sync_timestamp_minor >= ptp->nic_time.minor_max - diff) ?
1 : 0;
- if (diff <= MINOR_TICKS_PER_SECOND / EXPECTED_SYNC_EVENTS_PER_SECOND +
- FUZZ) {
+ if (diff <= ptp->nic_time.sync_event_diff_max) {
/* packet is ahead of the sync event by a quarter of a second or
* less (allowing for fuzz)
*/
pkt_timestamp_major = channel->sync_timestamp_major + carry;
- } else if (diff >= MINOR_TICKS_PER_SECOND - FUZZ) {
+ } else if (diff >= ptp->nic_time.sync_event_diff_min) {
/* packet is behind the sync event but within the fuzz factor.
* This means the RX packet and sync event crossed as they were
* placed on the event queue, which can sometimes happen.
@@ -1791,7 +2023,9 @@ void __efx_rx_skb_attach_timestamp(struct efx_channel *channel,
/* attach the timestamps to the skb */
timestamps = skb_hwtstamps(skb);
timestamps->hwtstamp =
- efx_ptp_s27_to_ktime(pkt_timestamp_major, pkt_timestamp_minor);
+ ptp->nic_to_kernel_time(pkt_timestamp_major,
+ pkt_timestamp_minor,
+ ptp->ts_corrections.general_rx);
}
static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta)
@@ -1809,9 +2043,10 @@ static int efx_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta)
else if (delta < -MAX_PPB)
delta = -MAX_PPB;
- /* Convert ppb to fixed point ns. */
- adjustment_ns = (((s64)delta * PPB_SCALE_WORD) >>
- (PPB_EXTRA_BITS + MAX_PPB_BITS));
+ /* Convert ppb to fixed point ns taking care to round correctly. */
+ adjustment_ns = ((s64)delta * PPB_SCALE_WORD +
+ (1 << (ptp_data->adjfreq_ppb_shift - 1))) >>
+ ptp_data->adjfreq_ppb_shift;
MCDI_SET_DWORD(inadj, PTP_IN_OP, MC_CMD_PTP_OP_ADJUST);
MCDI_SET_DWORD(inadj, PTP_IN_PERIPH_ID, 0);
@@ -1918,6 +2153,7 @@ static const struct efx_channel_type efx_ptp_channel_type = {
.get_name = efx_ptp_get_channel_name,
/* no copy operation; there is no need to reallocate this channel */
.receive_skb = efx_ptp_rx,
+ .want_txqs = efx_ptp_want_txqs,
.keep_eventq = false,
};
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 9937a2450e57..cece961f2e82 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -77,9 +77,23 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
}
if (buffer->flags & EFX_TX_BUF_SKB) {
+ struct sk_buff *skb = (struct sk_buff *)buffer->skb;
+
EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
(*pkts_compl)++;
- (*bytes_compl) += buffer->skb->len;
+ (*bytes_compl) += skb->len;
+ if (tx_queue->timestamping &&
+ (tx_queue->completed_timestamp_major ||
+ tx_queue->completed_timestamp_minor)) {
+ struct skb_shared_hwtstamps hwtstamp;
+
+ hwtstamp.hwtstamp =
+ efx_ptp_nic_to_kernel_time(tx_queue);
+ skb_tstamp_tx(skb, &hwtstamp);
+
+ tx_queue->completed_timestamp_major = 0;
+ tx_queue->completed_timestamp_minor = 0;
+ }
dev_consume_skb_any((struct sk_buff *)buffer->skb);
netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
"TX queue %d transmission id %x complete\n",
@@ -828,6 +842,11 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
tx_queue->old_read_count = 0;
tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
tx_queue->xmit_more_available = false;
+ tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
+ tx_queue->channel == efx_ptp_channel(efx));
+ tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
+ tx_queue->completed_timestamp_major = 0;
+ tx_queue->completed_timestamp_minor = 0;
/* Set up default function pointers. These may get replaced by
* efx_nic_init_tx() based off NIC/queue capabilities.
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index ce2ea2d491ac..2ffe76c0ff74 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -474,7 +474,7 @@ struct mac_device_info;
/* Helpers to program the MAC core */
struct stmmac_ops {
/* MAC core initialization */
- void (*core_init)(struct mac_device_info *hw, int mtu);
+ void (*core_init)(struct mac_device_info *hw, struct net_device *dev);
/* Enable the MAC RX/TX */
void (*set_mac)(void __iomem *ioaddr, bool enable);
/* Enable and verify that the IPC module is supported */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 9eb7f65d8000..a3fa65b1ca8e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -483,7 +483,8 @@ static int sun8i_dwmac_init(struct platform_device *pdev, void *priv)
return 0;
}
-static void sun8i_dwmac_core_init(struct mac_device_info *hw, int mtu)
+static void sun8i_dwmac_core_init(struct mac_device_info *hw,
+ struct net_device *dev)
{
void __iomem *ioaddr = hw->pcsr;
u32 v;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 8a86340ff2d3..540d21786a43 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -25,18 +25,28 @@
#include <linux/crc32.h>
#include <linux/slab.h>
#include <linux/ethtool.h>
+#include <net/dsa.h>
#include <asm/io.h>
#include "stmmac_pcs.h"
#include "dwmac1000.h"
-static void dwmac1000_core_init(struct mac_device_info *hw, int mtu)
+static void dwmac1000_core_init(struct mac_device_info *hw,
+ struct net_device *dev)
{
void __iomem *ioaddr = hw->pcsr;
u32 value = readl(ioaddr + GMAC_CONTROL);
+ int mtu = dev->mtu;
/* Configure GMAC core */
value |= GMAC_CORE_INIT;
+ /* Clear ACS bit because Ethernet switch tagging formats such as
+ * Broadcom tags can look like invalid LLC/SNAP packets and cause the
+ * hardware to truncate packets on reception.
+ */
+ if (netdev_uses_dsa(dev))
+ value &= ~GMAC_CONTROL_ACS;
+
if (mtu > 1500)
value |= GMAC_CONTROL_2K;
if (mtu > 2000)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index 8ef517356313..91b23f9db31a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -25,15 +25,26 @@
*******************************************************************************/
#include <linux/crc32.h>
+#include <net/dsa.h>
#include <asm/io.h>
#include "dwmac100.h"
-static void dwmac100_core_init(struct mac_device_info *hw, int mtu)
+static void dwmac100_core_init(struct mac_device_info *hw,
+ struct net_device *dev)
{
void __iomem *ioaddr = hw->pcsr;
u32 value = readl(ioaddr + MAC_CONTROL);
- writel((value | MAC_CORE_INIT), ioaddr + MAC_CONTROL);
+ value |= MAC_CORE_INIT;
+
+ /* Clear ASTP bit because Ethernet switch tagging formats such as
+ * Broadcom tags can look like invalid LLC/SNAP packets and cause the
+ * hardware to truncate packets on reception.
+ */
+ if (netdev_uses_dsa(dev))
+ value &= ~MAC_CONTROL_ASTP;
+
+ writel(value, ioaddr + MAC_CONTROL);
#ifdef STMMAC_VLAN_TAG_USED
writel(ETH_P_8021Q, ioaddr + MAC_VLAN1);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index f3ed8f7853eb..ed222b20fcf1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -17,16 +17,26 @@
#include <linux/slab.h>
#include <linux/ethtool.h>
#include <linux/io.h>
+#include <net/dsa.h>
#include "stmmac_pcs.h"
#include "dwmac4.h"
-static void dwmac4_core_init(struct mac_device_info *hw, int mtu)
+static void dwmac4_core_init(struct mac_device_info *hw,
+ struct net_device *dev)
{
void __iomem *ioaddr = hw->pcsr;
u32 value = readl(ioaddr + GMAC_CONFIG);
+ int mtu = dev->mtu;
value |= GMAC_CORE_INIT;
+ /* Clear ACS bit because Ethernet switch tagging formats such as
+ * Broadcom tags can look like invalid LLC/SNAP packets and cause the
+ * hardware to truncate packets on reception.
+ */
+ if (netdev_uses_dsa(dev))
+ value &= ~GMAC_CONFIG_ACS;
+
if (mtu > 1500)
value |= GMAC_CONFIG_2K;
if (mtu > 2000)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 2fd8456999f6..c728ffa095de 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -334,7 +334,7 @@ static void dwmac4_rd_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
if (tx_own)
tdes3 |= TDES3_OWN;
- if (is_fs & tx_own)
+ if (is_fs && tx_own)
/* When the own bit, for the first frame, has to be set, all
* descriptors for the same frame has to be set before, to
* avoid race condition.
@@ -377,7 +377,7 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
if (tx_own)
tdes3 |= TDES3_OWN;
- if (is_fs & tx_own)
+ if (is_fs && tx_own)
/* When the own bit, for the first frame, has to be set, all
* descriptors for the same frame has to be set before, to
* avoid race condition.
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index b47cb5c4da51..6768a25b6aa0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -341,7 +341,7 @@ static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
if (tx_own)
tdes0 |= ETDES0_OWN;
- if (is_fs & tx_own)
+ if (is_fs && tx_own)
/* When the own bit, for the first frame, has to be set, all
* descriptors for the same frame has to be set before, to
* avoid race condition.
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f99f14c35063..7ad841434ec8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2527,7 +2527,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
}
/* Initialize the MAC Core */
- priv->hw->mac->core_init(priv->hw, dev->mtu);
+ priv->hw->mac->core_init(priv->hw, dev);
/* Initialize MTL*/
if (priv->synopsys_id >= DWMAC_CORE_4_00)
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 91a67c5297f7..c3ca191fea7f 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -1221,7 +1221,6 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
struct ndis_recv_scale_cap rsscap;
u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
u32 mtu, size;
- const struct cpumask *node_cpu_mask;
u32 num_possible_rss_qs;
int i, ret;
@@ -1290,14 +1289,8 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
if (ret || rsscap.num_recv_que < 2)
goto out;
- /*
- * We will limit the VRSS channels to the number CPUs in the NUMA node
- * the primary channel is currently bound to.
- *
- * This also guarantees that num_possible_rss_qs <= num_online_cpus
- */
- node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu));
- num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask),
+ /* This guarantees that num_possible_rss_qs <= num_online_cpus */
+ num_possible_rss_qs = min_t(u32, num_online_cpus(),
rsscap.num_recv_que);
net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs);
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index f522715c6595..7de88b33d5b9 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -396,8 +396,6 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb)
#define MACSEC_GCM_AES_128_SAK_LEN 16
#define MACSEC_GCM_AES_256_SAK_LEN 32
-#define MAX_SAK_LEN MACSEC_GCM_AES_256_SAK_LEN
-
#define DEFAULT_SAK_LEN MACSEC_GCM_AES_128_SAK_LEN
#define DEFAULT_SEND_SCI true
#define DEFAULT_ENCRYPT false
@@ -1605,7 +1603,7 @@ static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = {
[MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY,
.len = MACSEC_KEYID_LEN, },
[MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY,
- .len = MAX_SAK_LEN, },
+ .len = MACSEC_MAX_KEY_LEN, },
};
static int parse_sa_config(struct nlattr **attrs, struct nlattr **tb_sa)
@@ -2374,7 +2372,7 @@ static int nla_put_secy(struct macsec_secy *secy, struct sk_buff *skb)
switch (secy->key_len) {
case MACSEC_GCM_AES_128_SAK_LEN:
- csid = MACSEC_CIPHER_ID_GCM_AES_128;
+ csid = MACSEC_DEFAULT_CIPHER_ID;
break;
case MACSEC_GCM_AES_256_SAK_LEN:
csid = MACSEC_CIPHER_ID_GCM_AES_256;
@@ -3076,7 +3074,7 @@ static int macsec_changelink_common(struct net_device *dev,
if (data[IFLA_MACSEC_CIPHER_SUITE]) {
switch (nla_get_u64(data[IFLA_MACSEC_CIPHER_SUITE])) {
case MACSEC_CIPHER_ID_GCM_AES_128:
- case MACSEC_DEFAULT_CIPHER_ALT:
+ case MACSEC_DEFAULT_CIPHER_ID:
secy->key_len = MACSEC_GCM_AES_128_SAK_LEN;
break;
case MACSEC_CIPHER_ID_GCM_AES_256:
@@ -3355,7 +3353,7 @@ static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[],
switch (csid) {
case MACSEC_CIPHER_ID_GCM_AES_128:
case MACSEC_CIPHER_ID_GCM_AES_256:
- case MACSEC_DEFAULT_CIPHER_ALT:
+ case MACSEC_DEFAULT_CIPHER_ID:
if (icv_len < MACSEC_MIN_ICV_LEN ||
icv_len > MACSEC_STD_ICV_LEN)
return -EINVAL;
@@ -3428,7 +3426,7 @@ static int macsec_fill_info(struct sk_buff *skb,
switch (secy->key_len) {
case MACSEC_GCM_AES_128_SAK_LEN:
- csid = MACSEC_CIPHER_ID_GCM_AES_128;
+ csid = MACSEC_DEFAULT_CIPHER_ID;
break;
case MACSEC_GCM_AES_256_SAK_LEN:
csid = MACSEC_CIPHER_ID_GCM_AES_256;
diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile
index 074ddebbc41d..09388c06171d 100644
--- a/drivers/net/netdevsim/Makefile
+++ b/drivers/net/netdevsim/Makefile
@@ -4,4 +4,8 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o
netdevsim-objs := \
netdev.o \
- bpf.o \
+
+ifeq ($(CONFIG_BPF_SYSCALL),y)
+netdevsim-objs += \
+ bpf.o
+endif
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index b3851bbefad3..de73c1ff0939 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -23,6 +23,9 @@
#include "netdevsim.h"
+#define pr_vlog(env, fmt, ...) \
+ bpf_verifier_log_write(env, "[netdevsim] " fmt, ##__VA_ARGS__)
+
struct nsim_bpf_bound_prog {
struct netdevsim *ns;
struct bpf_prog *prog;
@@ -77,6 +80,9 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
if (state->ns->bpf_bind_verifier_delay && !insn_idx)
msleep(state->ns->bpf_bind_verifier_delay);
+ if (insn_idx == env->prog->len - 1)
+ pr_vlog(env, "Hello from netdevsim!\n");
+
return 0;
}
@@ -123,17 +129,32 @@ int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
struct netdevsim *ns = cb_priv;
struct bpf_prog *oldprog;
- if (type != TC_SETUP_CLSBPF ||
- !tc_can_offload(ns->netdev) ||
- cls_bpf->common.protocol != htons(ETH_P_ALL) ||
- cls_bpf->common.chain_index)
+ if (type != TC_SETUP_CLSBPF) {
+ NSIM_EA(cls_bpf->common.extack,
+ "only offload of BPF classifiers supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!tc_cls_can_offload_and_chain0(ns->netdev, &cls_bpf->common))
return -EOPNOTSUPP;
- if (!ns->bpf_tc_accept)
+ if (cls_bpf->common.protocol != htons(ETH_P_ALL)) {
+ NSIM_EA(cls_bpf->common.extack,
+ "only ETH_P_ALL supported as filter protocol");
return -EOPNOTSUPP;
+ }
+
+ if (!ns->bpf_tc_accept) {
+ NSIM_EA(cls_bpf->common.extack,
+ "netdevsim configured to reject BPF TC offload");
+ return -EOPNOTSUPP;
+ }
/* Note: progs without skip_sw will probably not be dev bound */
- if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept)
+ if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept) {
+ NSIM_EA(cls_bpf->common.extack,
+ "netdevsim configured to reject unbound programs");
return -EOPNOTSUPP;
+ }
if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
return -EOPNOTSUPP;
@@ -145,8 +166,11 @@ int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
oldprog = NULL;
if (!cls_bpf->prog)
return 0;
- if (ns->bpf_offloaded)
+ if (ns->bpf_offloaded) {
+ NSIM_EA(cls_bpf->common.extack,
+ "driver and netdev offload states mismatch");
return -EBUSY;
+ }
}
return nsim_bpf_offload(ns, cls_bpf->prog, oldprog);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index b80361200302..ea081c10efb8 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -68,12 +68,40 @@ struct netdevsim {
extern struct dentry *nsim_ddir;
+#ifdef CONFIG_BPF_SYSCALL
int nsim_bpf_init(struct netdevsim *ns);
void nsim_bpf_uninit(struct netdevsim *ns);
int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf);
int nsim_bpf_disable_tc(struct netdevsim *ns);
int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
void *type_data, void *cb_priv);
+#else
+static inline int nsim_bpf_init(struct netdevsim *ns)
+{
+ return 0;
+}
+
+static inline void nsim_bpf_uninit(struct netdevsim *ns)
+{
+}
+
+static inline int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ return bpf->command == XDP_QUERY_PROG ? 0 : -EOPNOTSUPP;
+}
+
+static inline int nsim_bpf_disable_tc(struct netdevsim *ns)
+{
+ return 0;
+}
+
+static inline int
+nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ return -EOPNOTSUPP;
+}
+#endif
static inline struct netdevsim *to_nsim(struct device *ptr)
{
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index bdc4bb3c8288..8961209ee949 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(sfp_upstream_stop);
/**
* sfp_register_upstream() - Register the neighbouring device
- * @np: device node for the SFP bus
+ * @fwnode: firmware node for the SFP bus
* @ndev: network device associated with the interface
* @upstream: the upstream private data
* @ops: the upstream's &struct sfp_upstream_ops
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 4e1da1645b15..5aa59f41bf8c 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -842,6 +842,7 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
struct pppoe_hdr *ph;
struct net_device *dev;
char *start;
+ int hlen;
lock_sock(sk);
if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) {
@@ -860,16 +861,16 @@ static int pppoe_sendmsg(struct socket *sock, struct msghdr *m,
if (total_len > (dev->mtu + dev->hard_header_len))
goto end;
-
- skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32,
- 0, GFP_KERNEL);
+ hlen = LL_RESERVED_SPACE(dev);
+ skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len +
+ dev->needed_tailroom, 0, GFP_KERNEL);
if (!skb) {
error = -ENOMEM;
goto end;
}
/* Reserve space for headers. */
- skb_reserve(skb, dev->hard_header_len);
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
skb->dev = dev;
@@ -930,7 +931,7 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb)
/* Copy the data if there is no space for the header or if it's
* read-only.
*/
- if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len))
+ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph)))
goto abort;
__skb_push(skb, sizeof(*ph));
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 698874684b4e..a0c5cb1a1617 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -774,14 +774,12 @@ static void tun_detach_all(struct net_device *dev)
tun_napi_del(tun, tfile);
/* Drop read queue */
tun_queue_purge(tfile);
- xdp_rxq_info_unreg(&tfile->xdp_rxq);
sock_put(&tfile->sk);
tun_cleanup_tx_ring(tfile);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
tun_enable_queue(tfile);
tun_queue_purge(tfile);
- xdp_rxq_info_unreg(&tfile->xdp_rxq);
sock_put(&tfile->sk);
tun_cleanup_tx_ring(tfile);
}
@@ -2225,7 +2223,8 @@ static void tun_prog_free(struct rcu_head *rcu)
kfree(prog);
}
-static int __tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
+static int __tun_set_ebpf(struct tun_struct *tun,
+ struct tun_prog __rcu **prog_p,
struct bpf_prog *prog)
{
struct tun_prog *old, *new = NULL;
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index d56fe32bf48d..8a22ff67b026 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -457,12 +457,10 @@ static enum skb_state defer_bh(struct usbnet *dev, struct sk_buff *skb,
void usbnet_defer_kevent (struct usbnet *dev, int work)
{
set_bit (work, &dev->flags);
- if (!schedule_work (&dev->kevent)) {
- if (net_ratelimit())
- netdev_err(dev->net, "kevent %d may have been dropped\n", work);
- } else {
+ if (!schedule_work (&dev->kevent))
+ netdev_dbg(dev->net, "kevent %d may have been dropped\n", work);
+ else
netdev_dbg(dev->net, "kevent %d scheduled\n", work);
- }
}
EXPORT_SYMBOL_GPL(usbnet_defer_kevent);
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 12dfc5fee58e..626c27352ae2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -66,16 +66,39 @@ static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_UFO
};
-struct virtnet_stats {
- struct u64_stats_sync tx_syncp;
- struct u64_stats_sync rx_syncp;
- u64 tx_bytes;
- u64 tx_packets;
-
- u64 rx_bytes;
- u64 rx_packets;
+struct virtnet_stat_desc {
+ char desc[ETH_GSTRING_LEN];
+ size_t offset;
};
+struct virtnet_sq_stats {
+ struct u64_stats_sync syncp;
+ u64 packets;
+ u64 bytes;
+};
+
+struct virtnet_rq_stats {
+ struct u64_stats_sync syncp;
+ u64 packets;
+ u64 bytes;
+};
+
+#define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m)
+#define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m)
+
+static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = {
+ { "packets", VIRTNET_SQ_STAT(packets) },
+ { "bytes", VIRTNET_SQ_STAT(bytes) },
+};
+
+static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = {
+ { "packets", VIRTNET_RQ_STAT(packets) },
+ { "bytes", VIRTNET_RQ_STAT(bytes) },
+};
+
+#define VIRTNET_SQ_STATS_LEN ARRAY_SIZE(virtnet_sq_stats_desc)
+#define VIRTNET_RQ_STATS_LEN ARRAY_SIZE(virtnet_rq_stats_desc)
+
/* Internal representation of a send virtqueue */
struct send_queue {
/* Virtqueue associated with this send _queue */
@@ -87,6 +110,8 @@ struct send_queue {
/* Name of the send queue: output.$index */
char name[40];
+ struct virtnet_sq_stats stats;
+
struct napi_struct napi;
};
@@ -99,6 +124,8 @@ struct receive_queue {
struct bpf_prog __rcu *xdp_prog;
+ struct virtnet_rq_stats stats;
+
/* Chain pages by the private ptr. */
struct page *pages;
@@ -152,9 +179,6 @@ struct virtnet_info {
/* Packet virtio header size */
u8 hdr_len;
- /* Active statistics */
- struct virtnet_stats __percpu *stats;
-
/* Work struct for refilling if we run low on memory. */
struct delayed_work refill;
@@ -1127,7 +1151,6 @@ static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit)
struct virtnet_info *vi = rq->vq->vdev->priv;
unsigned int len, received = 0, bytes = 0;
void *buf;
- struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
if (!vi->big_packets || vi->mergeable_rx_bufs) {
void *ctx;
@@ -1150,10 +1173,10 @@ static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit)
schedule_delayed_work(&vi->refill, 0);
}
- u64_stats_update_begin(&stats->rx_syncp);
- stats->rx_bytes += bytes;
- stats->rx_packets += received;
- u64_stats_update_end(&stats->rx_syncp);
+ u64_stats_update_begin(&rq->stats.syncp);
+ rq->stats.bytes += bytes;
+ rq->stats.packets += received;
+ u64_stats_update_end(&rq->stats.syncp);
return received;
}
@@ -1162,8 +1185,6 @@ static void free_old_xmit_skbs(struct send_queue *sq)
{
struct sk_buff *skb;
unsigned int len;
- struct virtnet_info *vi = sq->vq->vdev->priv;
- struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
unsigned int packets = 0;
unsigned int bytes = 0;
@@ -1182,10 +1203,10 @@ static void free_old_xmit_skbs(struct send_queue *sq)
if (!packets)
return;
- u64_stats_update_begin(&stats->tx_syncp);
- stats->tx_bytes += bytes;
- stats->tx_packets += packets;
- u64_stats_update_end(&stats->tx_syncp);
+ u64_stats_update_begin(&sq->stats.syncp);
+ sq->stats.bytes += bytes;
+ sq->stats.packets += packets;
+ u64_stats_update_end(&sq->stats.syncp);
}
static void virtnet_poll_cleantx(struct receive_queue *rq)
@@ -1474,24 +1495,25 @@ static void virtnet_stats(struct net_device *dev,
struct rtnl_link_stats64 *tot)
{
struct virtnet_info *vi = netdev_priv(dev);
- int cpu;
unsigned int start;
+ int i;
- for_each_possible_cpu(cpu) {
- struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
+ for (i = 0; i < vi->max_queue_pairs; i++) {
u64 tpackets, tbytes, rpackets, rbytes;
+ struct receive_queue *rq = &vi->rq[i];
+ struct send_queue *sq = &vi->sq[i];
do {
- start = u64_stats_fetch_begin_irq(&stats->tx_syncp);
- tpackets = stats->tx_packets;
- tbytes = stats->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&stats->tx_syncp, start));
+ start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
+ tpackets = sq->stats.packets;
+ tbytes = sq->stats.bytes;
+ } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
do {
- start = u64_stats_fetch_begin_irq(&stats->rx_syncp);
- rpackets = stats->rx_packets;
- rbytes = stats->rx_bytes;
- } while (u64_stats_fetch_retry_irq(&stats->rx_syncp, start));
+ start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
+ rpackets = rq->stats.packets;
+ rbytes = rq->stats.bytes;
+ } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
tot->rx_packets += rpackets;
tot->tx_packets += tpackets;
@@ -1829,6 +1851,83 @@ static int virtnet_set_channels(struct net_device *dev,
return err;
}
+static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ char *p = (char *)data;
+ unsigned int i, j;
+
+ switch (stringset) {
+ case ETH_SS_STATS:
+ for (i = 0; i < vi->curr_queue_pairs; i++) {
+ for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
+ snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s",
+ i, virtnet_rq_stats_desc[j].desc);
+ p += ETH_GSTRING_LEN;
+ }
+ }
+
+ for (i = 0; i < vi->curr_queue_pairs; i++) {
+ for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
+ snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s",
+ i, virtnet_sq_stats_desc[j].desc);
+ p += ETH_GSTRING_LEN;
+ }
+ }
+ break;
+ }
+}
+
+static int virtnet_get_sset_count(struct net_device *dev, int sset)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+
+ switch (sset) {
+ case ETH_SS_STATS:
+ return vi->curr_queue_pairs * (VIRTNET_RQ_STATS_LEN +
+ VIRTNET_SQ_STATS_LEN);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void virtnet_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ unsigned int idx = 0, start, i, j;
+ const u8 *stats_base;
+ size_t offset;
+
+ for (i = 0; i < vi->curr_queue_pairs; i++) {
+ struct receive_queue *rq = &vi->rq[i];
+
+ stats_base = (u8 *)&rq->stats;
+ do {
+ start = u64_stats_fetch_begin_irq(&rq->stats.syncp);
+ for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
+ offset = virtnet_rq_stats_desc[j].offset;
+ data[idx + j] = *(u64 *)(stats_base + offset);
+ }
+ } while (u64_stats_fetch_retry_irq(&rq->stats.syncp, start));
+ idx += VIRTNET_RQ_STATS_LEN;
+ }
+
+ for (i = 0; i < vi->curr_queue_pairs; i++) {
+ struct send_queue *sq = &vi->sq[i];
+
+ stats_base = (u8 *)&sq->stats;
+ do {
+ start = u64_stats_fetch_begin_irq(&sq->stats.syncp);
+ for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
+ offset = virtnet_sq_stats_desc[j].offset;
+ data[idx + j] = *(u64 *)(stats_base + offset);
+ }
+ } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start));
+ idx += VIRTNET_SQ_STATS_LEN;
+ }
+}
+
static void virtnet_get_channels(struct net_device *dev,
struct ethtool_channels *channels)
{
@@ -1928,6 +2027,9 @@ static const struct ethtool_ops virtnet_ethtool_ops = {
.get_drvinfo = virtnet_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_ringparam = virtnet_get_ringparam,
+ .get_strings = virtnet_get_strings,
+ .get_sset_count = virtnet_get_sset_count,
+ .get_ethtool_stats = virtnet_get_ethtool_stats,
.set_channels = virtnet_set_channels,
.get_channels = virtnet_get_channels,
.get_ts_info = ethtool_op_get_ts_info,
@@ -2420,6 +2522,9 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
+
+ u64_stats_init(&vi->rq[i].stats.syncp);
+ u64_stats_init(&vi->sq[i].stats.syncp);
}
return 0;
@@ -2544,7 +2649,7 @@ static int virtnet_validate(struct virtio_device *vdev)
static int virtnet_probe(struct virtio_device *vdev)
{
- int i, err;
+ int i, err = -ENOMEM;
struct net_device *dev;
struct virtnet_info *vi;
u16 max_queue_pairs;
@@ -2621,17 +2726,6 @@ static int virtnet_probe(struct virtio_device *vdev)
vi->dev = dev;
vi->vdev = vdev;
vdev->priv = vi;
- vi->stats = alloc_percpu(struct virtnet_stats);
- err = -ENOMEM;
- if (vi->stats == NULL)
- goto free;
-
- for_each_possible_cpu(i) {
- struct virtnet_stats *virtnet_stats;
- virtnet_stats = per_cpu_ptr(vi->stats, i);
- u64_stats_init(&virtnet_stats->tx_syncp);
- u64_stats_init(&virtnet_stats->rx_syncp);
- }
INIT_WORK(&vi->config_work, virtnet_config_changed_work);
@@ -2668,7 +2762,7 @@ static int virtnet_probe(struct virtio_device *vdev)
*/
dev_err(&vdev->dev, "device MTU appears to have changed "
"it is now %d < %d", mtu, dev->min_mtu);
- goto free_stats;
+ goto free;
}
dev->mtu = mtu;
@@ -2692,7 +2786,7 @@ static int virtnet_probe(struct virtio_device *vdev)
/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
err = init_vqs(vi);
if (err)
- goto free_stats;
+ goto free;
#ifdef CONFIG_SYSFS
if (vi->mergeable_rx_bufs)
@@ -2747,8 +2841,6 @@ free_vqs:
cancel_delayed_work_sync(&vi->refill);
free_receive_page_frags(vi);
virtnet_del_vqs(vi);
-free_stats:
- free_percpu(vi->stats);
free:
free_netdev(dev);
return err;
@@ -2781,7 +2873,6 @@ static void virtnet_remove(struct virtio_device *vdev)
remove_vq_common(vi);
- free_percpu(vi->stats);
free_netdev(vi->dev);
}
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index d1c7029ded7c..cf95290b160c 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1616,7 +1616,6 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
rq->rx_ring[i].basePA);
rq->rx_ring[i].base = NULL;
}
- rq->buf_info[i] = NULL;
}
if (rq->data_ring.base) {
@@ -1638,6 +1637,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
(rq->rx_ring[0].size + rq->rx_ring[1].size);
dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0],
rq->buf_info_pa);
+ rq->buf_info[0] = rq->buf_info[1] = NULL;
}
}
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index e54255597fac..1cf22e62e3dd 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -32,6 +32,7 @@
#include <net/genetlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/rhashtable.h>
#include "mac80211_hwsim.h"
#define WARN_QUEUE 100
@@ -490,6 +491,7 @@ static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = {
static spinlock_t hwsim_radio_lock;
static LIST_HEAD(hwsim_radios);
static struct workqueue_struct *hwsim_wq;
+static struct rhashtable hwsim_radios_rht;
static int hwsim_radio_idx;
static struct platform_driver mac80211_hwsim_driver = {
@@ -500,6 +502,7 @@ static struct platform_driver mac80211_hwsim_driver = {
struct mac80211_hwsim_data {
struct list_head list;
+ struct rhash_head rht;
struct ieee80211_hw *hw;
struct device *dev;
struct ieee80211_supported_band bands[NUM_NL80211_BANDS];
@@ -574,6 +577,13 @@ struct mac80211_hwsim_data {
u64 tx_failed;
};
+static const struct rhashtable_params hwsim_rht_params = {
+ .nelem_hint = 2,
+ .automatic_shrinking = true,
+ .key_len = ETH_ALEN,
+ .key_offset = offsetof(struct mac80211_hwsim_data, addresses[1]),
+ .head_offset = offsetof(struct mac80211_hwsim_data, rht),
+};
struct hwsim_radiotap_hdr {
struct ieee80211_radiotap_header hdr;
@@ -1009,6 +1019,36 @@ static int hwsim_unicast_netgroup(struct mac80211_hwsim_data *data,
return res;
}
+static inline u16 trans_tx_rate_flags_ieee2hwsim(struct ieee80211_tx_rate *rate)
+{
+ u16 result = 0;
+
+ if (rate->flags & IEEE80211_TX_RC_USE_RTS_CTS)
+ result |= MAC80211_HWSIM_TX_RC_USE_RTS_CTS;
+ if (rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT)
+ result |= MAC80211_HWSIM_TX_RC_USE_CTS_PROTECT;
+ if (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)
+ result |= MAC80211_HWSIM_TX_RC_USE_SHORT_PREAMBLE;
+ if (rate->flags & IEEE80211_TX_RC_MCS)
+ result |= MAC80211_HWSIM_TX_RC_MCS;
+ if (rate->flags & IEEE80211_TX_RC_GREEN_FIELD)
+ result |= MAC80211_HWSIM_TX_RC_GREEN_FIELD;
+ if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
+ result |= MAC80211_HWSIM_TX_RC_40_MHZ_WIDTH;
+ if (rate->flags & IEEE80211_TX_RC_DUP_DATA)
+ result |= MAC80211_HWSIM_TX_RC_DUP_DATA;
+ if (rate->flags & IEEE80211_TX_RC_SHORT_GI)
+ result |= MAC80211_HWSIM_TX_RC_SHORT_GI;
+ if (rate->flags & IEEE80211_TX_RC_VHT_MCS)
+ result |= MAC80211_HWSIM_TX_RC_VHT_MCS;
+ if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
+ result |= MAC80211_HWSIM_TX_RC_80_MHZ_WIDTH;
+ if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
+ result |= MAC80211_HWSIM_TX_RC_160_MHZ_WIDTH;
+
+ return result;
+}
+
static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
struct sk_buff *my_skb,
int dst_portid)
@@ -1021,6 +1061,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
unsigned int hwsim_flags = 0;
int i;
struct hwsim_tx_rate tx_attempts[IEEE80211_TX_MAX_RATES];
+ struct hwsim_tx_rate_flag tx_attempts_flags[IEEE80211_TX_MAX_RATES];
uintptr_t cookie;
if (data->ps != PS_DISABLED)
@@ -1072,7 +1113,11 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
tx_attempts[i].idx = info->status.rates[i].idx;
+ tx_attempts_flags[i].idx = info->status.rates[i].idx;
tx_attempts[i].count = info->status.rates[i].count;
+ tx_attempts_flags[i].flags =
+ trans_tx_rate_flags_ieee2hwsim(
+ &info->status.rates[i]);
}
if (nla_put(skb, HWSIM_ATTR_TX_INFO,
@@ -1080,6 +1125,11 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
tx_attempts))
goto nla_put_failure;
+ if (nla_put(skb, HWSIM_ATTR_TX_INFO_FLAGS,
+ sizeof(struct hwsim_tx_rate_flag) * IEEE80211_TX_MAX_RATES,
+ tx_attempts_flags))
+ goto nla_put_failure;
+
/* We create a cookie to identify this skb */
data->pending_cookie++;
cookie = data->pending_cookie;
@@ -2732,6 +2782,15 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
spin_lock_bh(&hwsim_radio_lock);
+ err = rhashtable_insert_fast(&hwsim_radios_rht, &data->rht,
+ hwsim_rht_params);
+ if (err < 0) {
+ pr_debug("mac80211_hwsim: radio index %d already present\n",
+ idx);
+ spin_unlock_bh(&hwsim_radio_lock);
+ goto failed_final_insert;
+ }
+
list_add_tail(&data->list, &hwsim_radios);
spin_unlock_bh(&hwsim_radio_lock);
@@ -2740,6 +2799,9 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
return idx;
+failed_final_insert:
+ debugfs_remove_recursive(data->debugfs);
+ ieee80211_unregister_hw(data->hw);
failed_hw:
device_release_driver(data->dev);
failed_bind:
@@ -2875,22 +2937,9 @@ static void hwsim_mon_setup(struct net_device *dev)
static struct mac80211_hwsim_data *get_hwsim_data_ref_from_addr(const u8 *addr)
{
- struct mac80211_hwsim_data *data;
- bool _found = false;
-
- spin_lock_bh(&hwsim_radio_lock);
- list_for_each_entry(data, &hwsim_radios, list) {
- if (memcmp(data->addresses[1].addr, addr, ETH_ALEN) == 0) {
- _found = true;
- break;
- }
- }
- spin_unlock_bh(&hwsim_radio_lock);
-
- if (!_found)
- return NULL;
-
- return data;
+ return rhashtable_lookup_fast(&hwsim_radios_rht,
+ addr,
+ hwsim_rht_params);
}
static void hwsim_register_wmediumd(struct net *net, u32 portid)
@@ -2975,7 +3024,6 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
txi->status.rates[i].idx = tx_attempts[i].idx;
txi->status.rates[i].count = tx_attempts[i].count;
- /*txi->status.rates[i].flags = 0;*/
}
txi->status.ack_signal = nla_get_u32(info->attrs[HWSIM_ATTR_SIGNAL]);
@@ -3155,8 +3203,10 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
if (info->attrs[HWSIM_ATTR_REG_CUSTOM_REG]) {
u32 idx = nla_get_u32(info->attrs[HWSIM_ATTR_REG_CUSTOM_REG]);
- if (idx >= ARRAY_SIZE(hwsim_world_regdom_custom))
+ if (idx >= ARRAY_SIZE(hwsim_world_regdom_custom)) {
+ kfree(hwname);
return -EINVAL;
+ }
param.regd = hwsim_world_regdom_custom[idx];
}
@@ -3197,6 +3247,8 @@ static int hwsim_del_radio_nl(struct sk_buff *msg, struct genl_info *info)
continue;
list_del(&data->list);
+ rhashtable_remove_fast(&hwsim_radios_rht, &data->rht,
+ hwsim_rht_params);
spin_unlock_bh(&hwsim_radio_lock);
mac80211_hwsim_del_radio(data, wiphy_name(data->hw->wiphy),
info);
@@ -3352,6 +3404,8 @@ static void remove_user_radios(u32 portid)
list_for_each_entry_safe(entry, tmp, &hwsim_radios, list) {
if (entry->destroy_on_close && entry->portid == portid) {
list_del(&entry->list);
+ rhashtable_remove_fast(&hwsim_radios_rht, &entry->rht,
+ hwsim_rht_params);
INIT_WORK(&entry->destroy_work, destroy_radio);
queue_work(hwsim_wq, &entry->destroy_work);
}
@@ -3427,6 +3481,8 @@ static void __net_exit hwsim_exit_net(struct net *net)
continue;
list_del(&data->list);
+ rhashtable_remove_fast(&hwsim_radios_rht, &data->rht,
+ hwsim_rht_params);
INIT_WORK(&data->destroy_work, destroy_radio);
queue_work(hwsim_wq, &data->destroy_work);
}
@@ -3463,6 +3519,7 @@ static int __init init_mac80211_hwsim(void)
hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0);
if (!hwsim_wq)
return -ENOMEM;
+ rhashtable_init(&hwsim_radios_rht, &hwsim_rht_params);
err = register_pernet_device(&hwsim_net_ops);
if (err)
@@ -3604,6 +3661,7 @@ static void __exit exit_mac80211_hwsim(void)
mac80211_hwsim_free();
flush_workqueue(hwsim_wq);
+ rhashtable_destroy(&hwsim_radios_rht);
unregister_netdev(hwsim_mon);
platform_driver_unregister(&mac80211_hwsim_driver);
unregister_pernet_device(&hwsim_net_ops);
diff --git a/drivers/net/wireless/mac80211_hwsim.h b/drivers/net/wireless/mac80211_hwsim.h
index 3f5eda591dba..a96a79c1eff5 100644
--- a/drivers/net/wireless/mac80211_hwsim.h
+++ b/drivers/net/wireless/mac80211_hwsim.h
@@ -64,7 +64,8 @@ enum hwsim_tx_control_flags {
* @HWSIM_CMD_TX_INFO_FRAME: Transmission info report from user space to
* kernel, uses:
* %HWSIM_ATTR_ADDR_TRANSMITTER, %HWSIM_ATTR_FLAGS,
- * %HWSIM_ATTR_TX_INFO, %HWSIM_ATTR_SIGNAL, %HWSIM_ATTR_COOKIE
+ * %HWSIM_ATTR_TX_INFO, %WSIM_ATTR_TX_INFO_FLAGS,
+ * %HWSIM_ATTR_SIGNAL, %HWSIM_ATTR_COOKIE
* @HWSIM_CMD_NEW_RADIO: create a new radio with the given parameters,
* returns the radio ID (>= 0) or negative on errors, if successful
* then multicast the result
@@ -123,6 +124,8 @@ enum {
* @HWSIM_ATTR_RADIO_NAME: Name of radio, e.g. phy666
* @HWSIM_ATTR_NO_VIF: Do not create vif (wlanX) when creating radio.
* @HWSIM_ATTR_FREQ: Frequency at which packet is transmitted or received.
+ * @HWSIM_ATTR_TX_INFO_FLAGS: additional flags for corresponding
+ * rates of %HWSIM_ATTR_TX_INFO
* @__HWSIM_ATTR_MAX: enum limit
*/
@@ -149,6 +152,7 @@ enum {
HWSIM_ATTR_NO_VIF,
HWSIM_ATTR_FREQ,
HWSIM_ATTR_PAD,
+ HWSIM_ATTR_TX_INFO_FLAGS,
__HWSIM_ATTR_MAX,
};
#define HWSIM_ATTR_MAX (__HWSIM_ATTR_MAX - 1)
@@ -171,4 +175,66 @@ struct hwsim_tx_rate {
u8 count;
} __packed;
+/**
+ * enum hwsim_tx_rate_flags - per-rate flags set by the rate control algorithm.
+ * Inspired by structure mac80211_rate_control_flags. New flags may be
+ * appended, but old flags not deleted, to keep compatibility for
+ * userspace.
+ *
+ * These flags are set by the Rate control algorithm for each rate during tx,
+ * in the @flags member of struct ieee80211_tx_rate.
+ *
+ * @MAC80211_HWSIM_TX_RC_USE_RTS_CTS: Use RTS/CTS exchange for this rate.
+ * @MAC80211_HWSIM_TX_RC_USE_CTS_PROTECT: CTS-to-self protection is required.
+ * This is set if the current BSS requires ERP protection.
+ * @MAC80211_HWSIM_TX_RC_USE_SHORT_PREAMBLE: Use short preamble.
+ * @MAC80211_HWSIM_TX_RC_MCS: HT rate.
+ * @MAC80211_HWSIM_TX_RC_VHT_MCS: VHT MCS rate, in this case the idx field is
+ * split into a higher 4 bits (Nss) and lower 4 bits (MCS number)
+ * @MAC80211_HWSIM_TX_RC_GREEN_FIELD: Indicates whether this rate should be used
+ * in Greenfield mode.
+ * @MAC80211_HWSIM_TX_RC_40_MHZ_WIDTH: Indicates if the Channel Width should be
+ * 40 MHz.
+ * @MAC80211_HWSIM_TX_RC_80_MHZ_WIDTH: Indicates 80 MHz transmission
+ * @MAC80211_HWSIM_TX_RC_160_MHZ_WIDTH: Indicates 160 MHz transmission
+ * (80+80 isn't supported yet)
+ * @MAC80211_HWSIM_TX_RC_DUP_DATA: The frame should be transmitted on both of
+ * the adjacent 20 MHz channels, if the current channel type is
+ * NL80211_CHAN_HT40MINUS or NL80211_CHAN_HT40PLUS.
+ * @MAC80211_HWSIM_TX_RC_SHORT_GI: Short Guard interval should be used for this
+ * rate.
+ */
+enum hwsim_tx_rate_flags {
+ MAC80211_HWSIM_TX_RC_USE_RTS_CTS = BIT(0),
+ MAC80211_HWSIM_TX_RC_USE_CTS_PROTECT = BIT(1),
+ MAC80211_HWSIM_TX_RC_USE_SHORT_PREAMBLE = BIT(2),
+
+ /* rate index is an HT/VHT MCS instead of an index */
+ MAC80211_HWSIM_TX_RC_MCS = BIT(3),
+ MAC80211_HWSIM_TX_RC_GREEN_FIELD = BIT(4),
+ MAC80211_HWSIM_TX_RC_40_MHZ_WIDTH = BIT(5),
+ MAC80211_HWSIM_TX_RC_DUP_DATA = BIT(6),
+ MAC80211_HWSIM_TX_RC_SHORT_GI = BIT(7),
+ MAC80211_HWSIM_TX_RC_VHT_MCS = BIT(8),
+ MAC80211_HWSIM_TX_RC_80_MHZ_WIDTH = BIT(9),
+ MAC80211_HWSIM_TX_RC_160_MHZ_WIDTH = BIT(10),
+};
+
+/**
+ * struct hwsim_tx_rate - rate selection/status
+ *
+ * @idx: rate index to attempt to send with
+ * @count: number of tries in this rate before going to the next rate
+ *
+ * A value of -1 for @idx indicates an invalid rate and, if used
+ * in an array of retry rates, that no more rates should be tried.
+ *
+ * When used for transmit status reporting, the driver should
+ * always report the rate and number of retries used.
+ *
+ */
+struct hwsim_tx_rate_flag {
+ s8 idx;
+ u16 flags;
+} __packed;
#endif /* __MAC80211_HWSIM_H */
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 58476b728c57..c9406852c3e9 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -486,15 +486,28 @@ static int sas_queue_reset(struct domain_device *dev, int reset_type,
int sas_eh_abort_handler(struct scsi_cmnd *cmd)
{
- int res;
+ int res = TMF_RESP_FUNC_FAILED;
struct sas_task *task = TO_SAS_TASK(cmd);
struct Scsi_Host *host = cmd->device->host;
+ struct domain_device *dev = cmd_to_domain_dev(cmd);
struct sas_internal *i = to_sas_internal(host->transportt);
+ unsigned long flags;
if (!i->dft->lldd_abort_task)
return FAILED;
- res = i->dft->lldd_abort_task(task);
+ spin_lock_irqsave(host->host_lock, flags);
+ /* We cannot do async aborts for SATA devices */
+ if (dev_is_sata(dev) && !host->host_eh_scheduled) {
+ spin_unlock_irqrestore(host->host_lock, flags);
+ return FAILED;
+ }
+ spin_unlock_irqrestore(host->host_lock, flags);
+
+ if (task)
+ res = i->dft->lldd_abort_task(task);
+ else
+ SAS_DPRINTK("no task to abort\n");
if (res == TMF_RESP_FUNC_SUCC || res == TMF_RESP_FUNC_COMPLETE)
return SUCCESS;
diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
index 1bef39828ca7..3c573a8caa80 100644
--- a/drivers/tty/serdev/core.c
+++ b/drivers/tty/serdev/core.c
@@ -225,6 +225,18 @@ void serdev_device_set_flow_control(struct serdev_device *serdev, bool enable)
}
EXPORT_SYMBOL_GPL(serdev_device_set_flow_control);
+int serdev_device_set_parity(struct serdev_device *serdev,
+ enum serdev_parity parity)
+{
+ struct serdev_controller *ctrl = serdev->ctrl;
+
+ if (!ctrl || !ctrl->ops->set_parity)
+ return -ENOTSUPP;
+
+ return ctrl->ops->set_parity(ctrl, parity);
+}
+EXPORT_SYMBOL_GPL(serdev_device_set_parity);
+
void serdev_device_wait_until_sent(struct serdev_device *serdev, long timeout)
{
struct serdev_controller *ctrl = serdev->ctrl;
diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c
index 247788a16f0b..1bdf2959ac5c 100644
--- a/drivers/tty/serdev/serdev-ttyport.c
+++ b/drivers/tty/serdev/serdev-ttyport.c
@@ -190,6 +190,29 @@ static void ttyport_set_flow_control(struct serdev_controller *ctrl, bool enable
tty_set_termios(tty, &ktermios);
}
+static int ttyport_set_parity(struct serdev_controller *ctrl,
+ enum serdev_parity parity)
+{
+ struct serport *serport = serdev_controller_get_drvdata(ctrl);
+ struct tty_struct *tty = serport->tty;
+ struct ktermios ktermios = tty->termios;
+
+ ktermios.c_cflag &= ~(PARENB | PARODD | CMSPAR);
+ if (parity != SERDEV_PARITY_NONE) {
+ ktermios.c_cflag |= PARENB;
+ if (parity == SERDEV_PARITY_ODD)
+ ktermios.c_cflag |= PARODD;
+ }
+
+ tty_set_termios(tty, &ktermios);
+
+ if ((tty->termios.c_cflag & (PARENB | PARODD | CMSPAR)) !=
+ (ktermios.c_cflag & (PARENB | PARODD | CMSPAR)))
+ return -EINVAL;
+
+ return 0;
+}
+
static void ttyport_wait_until_sent(struct serdev_controller *ctrl, long timeout)
{
struct serport *serport = serdev_controller_get_drvdata(ctrl);
@@ -227,6 +250,7 @@ static const struct serdev_controller_ops ctrl_ops = {
.open = ttyport_open,
.close = ttyport_close,
.set_flow_control = ttyport_set_flow_control,
+ .set_parity = ttyport_set_parity,
.set_baudrate = ttyport_set_baudrate,
.wait_until_sent = ttyport_wait_until_sent,
.get_tiocm = ttyport_get_tiocm,
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 33ac2b186b85..5727b186b3ca 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -904,7 +904,7 @@ static void vhost_dev_lock_vqs(struct vhost_dev *d)
{
int i = 0;
for (i = 0; i < d->nvqs; ++i)
- mutex_lock(&d->vqs[i]->mutex);
+ mutex_lock_nested(&d->vqs[i]->mutex, i);
}
static void vhost_dev_unlock_vqs(struct vhost_dev *d)
@@ -1015,6 +1015,10 @@ static int vhost_process_iotlb_msg(struct vhost_dev *dev,
vhost_iotlb_notify_vq(dev, msg);
break;
case VHOST_IOTLB_INVALIDATE:
+ if (!dev->iotlb) {
+ ret = -EFAULT;
+ break;
+ }
vhost_vq_meta_reset(dev);
vhost_del_umem_range(dev->iotlb, msg->iova,
msg->iova + msg->size - 1);
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index f650e475d8f0..fdf2aad73470 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -60,10 +60,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
gi->gid[i] = exp->ex_anon_gid;
else
gi->gid[i] = rqgi->gid[i];
-
- /* Each thread allocates its own gi, no race */
- groups_sort(gi);
}
+
+ /* Each thread allocates its own gi, no race */
+ groups_sort(gi);
} else {
gi = get_group_info(rqgi);
}
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index ded456f17de6..c584ad8d023c 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -162,7 +162,7 @@ static ssize_t orangefs_devreq_read(struct file *file,
struct orangefs_kernel_op_s *op, *temp;
__s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
- struct orangefs_kernel_op_s *cur_op = NULL;
+ struct orangefs_kernel_op_s *cur_op;
unsigned long ret;
/* We do not support blocking IO. */
@@ -186,6 +186,7 @@ static ssize_t orangefs_devreq_read(struct file *file,
return -EAGAIN;
restart:
+ cur_op = NULL;
/* Get next op (if any) from top of list. */
spin_lock(&orangefs_request_list_lock);
list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
index 835c6e148afc..0577d6dba8c8 100644
--- a/fs/orangefs/waitqueue.c
+++ b/fs/orangefs/waitqueue.c
@@ -29,10 +29,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
*/
void purge_waiting_ops(void)
{
- struct orangefs_kernel_op_s *op;
+ struct orangefs_kernel_op_s *op, *tmp;
spin_lock(&orangefs_request_list_lock);
- list_for_each_entry(op, &orangefs_request_list, list) {
+ list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) {
gossip_debug(GOSSIP_WAIT_DEBUG,
"pvfs2-client-core: purging op tag %llu %s\n",
llu(op->tag),
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index dc1ebfeeb5ec..f05b9b6cd43f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -56,6 +56,8 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev)
#define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \
acpi_fwnode_handle(adev) : NULL)
#define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev))
+#define ACPI_HANDLE_FWNODE(fwnode) \
+ acpi_device_handle(to_acpi_device_node(fwnode))
static inline struct fwnode_handle *acpi_alloc_fwnode_static(void)
{
@@ -626,6 +628,7 @@ int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count)
#define ACPI_COMPANION(dev) (NULL)
#define ACPI_COMPANION_SET(dev, adev) do { } while (0)
#define ACPI_HANDLE(dev) (NULL)
+#define ACPI_HANDLE_FWNODE(fwnode) (NULL)
#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0),
struct fwnode_handle;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 425056c7f96c..276932d75975 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -688,6 +688,8 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
void bpf_prog_free(struct bpf_prog *fp);
+bool bpf_opcode_in_insntable(u8 code);
+
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags);
@@ -1003,10 +1005,20 @@ struct bpf_sock_ops_kern {
struct sock *sk;
u32 op;
union {
+ u32 args[4];
u32 reply;
u32 replylong[4];
};
u32 is_fullsock;
+ u64 temp; /* temp and everything after is not
+ * initialized to 0 before calling
+ * the BPF program. New fields that
+ * should be initialized to 0 should
+ * be inserted before temp.
+ * temp is scratch storage used by
+ * sock_ops_convert_ctx_access
+ * as temporary storage of a register.
+ */
};
#endif /* __LINUX_FILTER_H__ */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2bab81951ced..3319df9727aa 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -332,6 +332,8 @@ extern int ftrace_text_reserved(const void *start, const void *end);
extern int ftrace_nr_registered_ops(void);
+struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr);
+
bool is_ftrace_trampoline(unsigned long addr);
/*
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 1ac5bf95bfdd..e16fe7d44a71 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -173,7 +173,7 @@ static inline struct net_device *ip_dev_find(struct net *net, __be32 addr)
}
int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
-int devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
+int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *);
void devinet_init(void);
struct in_device *inetdev_by_index(struct net *, int);
__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
diff --git a/include/linux/net.h b/include/linux/net.h
index caeb159abda5..68acc54976bf 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -306,7 +306,6 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags);
int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
-int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how);
/* Routine returns the IP overhead imposed by a (caller-protected) socket. */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ed0799a12bf2..cd46d3d63aa0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -851,6 +851,7 @@ struct xfrmdev_ops {
void (*xdo_dev_state_free) (struct xfrm_state *x);
bool (*xdo_dev_offload_ok) (struct sk_buff *skb,
struct xfrm_state *x);
+ void (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
};
#endif
@@ -1469,8 +1470,6 @@ enum netdev_priv_flags {
* @base_addr: Device I/O address
* @irq: Device IRQ number
*
- * @carrier_changes: Stats to monitor carrier on<->off transitions
- *
* @state: Generic network queuing layer state, see netdev_state_t
* @dev_list: The global list of network devices
* @napi_list: List entry used for polling NAPI devices
@@ -1506,6 +1505,8 @@ enum netdev_priv_flags {
* do not use this in drivers
* @rx_nohandler: nohandler dropped packets by core network on
* inactive devices, do not use this in drivers
+ * @carrier_up_count: Number of times the carrier has been up
+ * @carrier_down_count: Number of times the carrier has been down
*
* @wireless_handlers: List of functions to handle Wireless Extensions,
* instead of ioctl,
@@ -1680,8 +1681,6 @@ struct net_device {
unsigned long base_addr;
int irq;
- atomic_t carrier_changes;
-
/*
* Some hardware also needs these fields (state,dev_list,
* napi_list,unreg_list,close_list) but they are not
@@ -1719,6 +1718,10 @@ struct net_device {
atomic_long_t tx_dropped;
atomic_long_t rx_nohandler;
+ /* Stats to monitor link on/off, flapping */
+ atomic_t carrier_up_count;
+ atomic_t carrier_down_count;
+
#ifdef CONFIG_WIRELESS_EXT
const struct iw_handler_def *wireless_handlers;
struct iw_public_data *wireless_data;
@@ -2759,7 +2762,8 @@ static inline bool dev_validate_header(const struct net_device *dev,
return false;
}
-typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr, int len);
+typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr,
+ int len, int size);
int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
static inline int unregister_gifconf(unsigned int family)
{
@@ -3312,7 +3316,9 @@ int netdev_rx_handler_register(struct net_device *dev,
void netdev_rx_handler_unregister(struct net_device *dev);
bool dev_valid_name(const char *name);
-int dev_ioctl(struct net *net, unsigned int cmd, void __user *);
+int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr,
+ bool *need_copyout);
+int dev_ifconf(struct net *net, struct ifconf *, int);
int dev_ethtool(struct net *net, struct ifreq *);
unsigned int dev_get_flags(const struct net_device *);
int __dev_change_flags(struct net_device *, unsigned int flags);
diff --git a/include/linux/property.h b/include/linux/property.h
index f6189a3ac63c..5b0563ad79a5 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -83,11 +83,17 @@ struct fwnode_handle *fwnode_get_next_parent(
struct fwnode_handle *fwnode);
struct fwnode_handle *fwnode_get_next_child_node(
const struct fwnode_handle *fwnode, struct fwnode_handle *child);
+struct fwnode_handle *fwnode_get_next_available_child_node(
+ const struct fwnode_handle *fwnode, struct fwnode_handle *child);
#define fwnode_for_each_child_node(fwnode, child) \
for (child = fwnode_get_next_child_node(fwnode, NULL); child; \
child = fwnode_get_next_child_node(fwnode, child))
+#define fwnode_for_each_available_child_node(fwnode, child) \
+ for (child = fwnode_get_next_available_child_node(fwnode, NULL); child;\
+ child = fwnode_get_next_available_child_node(fwnode, child))
+
struct fwnode_handle *device_get_next_child_node(
struct device *dev, struct fwnode_handle *child);
@@ -103,6 +109,8 @@ struct fwnode_handle *device_get_named_child_node(struct device *dev,
struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode);
void fwnode_handle_put(struct fwnode_handle *fwnode);
+int fwnode_irq_get(struct fwnode_handle *fwnode, unsigned int index);
+
unsigned int device_get_child_node_count(struct device *dev);
static inline bool device_property_read_bool(struct device *dev,
@@ -279,6 +287,9 @@ int device_get_phy_mode(struct device *dev);
void *device_get_mac_address(struct device *dev, char *addr, int alen);
+int fwnode_get_phy_mode(struct fwnode_handle *fwnode);
+void *fwnode_get_mac_address(struct fwnode_handle *fwnode,
+ char *addr, int alen);
struct fwnode_handle *fwnode_graph_get_next_endpoint(
const struct fwnode_handle *fwnode, struct fwnode_handle *prev);
struct fwnode_handle *
diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index d609e6dc5bad..a73d87b483b4 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -76,6 +76,12 @@ static inline struct serdev_device_driver *to_serdev_device_driver(struct device
return container_of(d, struct serdev_device_driver, driver);
}
+enum serdev_parity {
+ SERDEV_PARITY_NONE,
+ SERDEV_PARITY_EVEN,
+ SERDEV_PARITY_ODD,
+};
+
/*
* serdev controller structures
*/
@@ -86,6 +92,7 @@ struct serdev_controller_ops {
int (*open)(struct serdev_controller *);
void (*close)(struct serdev_controller *);
void (*set_flow_control)(struct serdev_controller *, bool);
+ int (*set_parity)(struct serdev_controller *, enum serdev_parity);
unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int);
void (*wait_until_sent)(struct serdev_controller *, long);
int (*get_tiocm)(struct serdev_controller *);
@@ -298,6 +305,9 @@ static inline int serdev_device_set_rts(struct serdev_device *serdev, bool enabl
return serdev_device_set_tiocm(serdev, 0, TIOCM_RTS);
}
+int serdev_device_set_parity(struct serdev_device *serdev,
+ enum serdev_parity parity);
+
/*
* serdev hooks into TTY core
*/
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 9c5a2628d6ce..1d3877c39a00 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -124,6 +124,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry)
return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
}
+static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
+{
+ return swp_offset(entry);
+}
+
static inline struct page *device_private_entry_to_page(swp_entry_t entry)
{
return pfn_to_page(swp_offset(entry));
@@ -154,6 +159,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry)
return false;
}
+static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
+{
+ return 0;
+}
+
static inline struct page *device_private_entry_to_page(swp_entry_t entry)
{
return NULL;
@@ -189,6 +199,11 @@ static inline int is_write_migration_entry(swp_entry_t entry)
return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE);
}
+static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
+{
+ return swp_offset(entry);
+}
+
static inline struct page *migration_entry_to_page(swp_entry_t entry)
{
struct page *p = pfn_to_page(swp_offset(entry));
@@ -218,6 +233,12 @@ static inline int is_migration_entry(swp_entry_t swp)
{
return 0;
}
+
+static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
+{
+ return 0;
+}
+
static inline struct page *migration_entry_to_page(swp_entry_t entry)
{
return NULL;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4f93f0953c41..8f4c54986f97 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -335,6 +335,17 @@ struct tcp_sock {
int linger2;
+
+/* Sock_ops bpf program related variables */
+#ifdef CONFIG_BPF
+ u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs
+ * values defined in uapi/linux/tcp.h
+ */
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG)
+#else
+#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
+#endif
+
/* Receiver side RTT estimation */
struct {
u32 rtt_us;
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 853291714ae0..bae807eb2933 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -31,17 +31,11 @@
#else
#define MODULE_RANDSTRUCT_PLUGIN
#endif
-#ifdef RETPOLINE
-#define MODULE_VERMAGIC_RETPOLINE "retpoline "
-#else
-#define MODULE_VERMAGIC_RETPOLINE ""
-#endif
#define VERMAGIC_STRING \
UTS_RELEASE " " \
MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \
MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \
MODULE_ARCH_VERMAGIC \
- MODULE_RANDSTRUCT_PLUGIN \
- MODULE_VERMAGIC_RETPOLINE
+ MODULE_RANDSTRUCT_PLUGIN
diff --git a/include/net/erspan.h b/include/net/erspan.h
index acdf6843095d..5daa4866412b 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -46,6 +46,8 @@
* GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
*/
+#include <uapi/linux/erspan.h>
+
#define ERSPAN_VERSION 0x1 /* ERSPAN type II */
#define VER_MASK 0xf000
#define VLAN_MASK 0x0fff
@@ -65,17 +67,8 @@
#define GRA_MASK 0x0006
#define O_MASK 0x0001
-/* ERSPAN version 2 metadata header */
-struct erspan_md2 {
- __be32 timestamp;
- __be16 sgt; /* security group tag */
- __be16 flags;
-#define P_OFFSET 15
-#define FT_OFFSET 10
-#define HWID_OFFSET 4
-#define DIR_OFFSET 3
-#define GRA_OFFSET 1
-};
+#define HWID_OFFSET 4
+#define DIR_OFFSET 3
enum erspan_encap_type {
ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */
@@ -86,24 +79,64 @@ enum erspan_encap_type {
#define ERSPAN_V1_MDSIZE 4
#define ERSPAN_V2_MDSIZE 8
-struct erspan_metadata {
- union {
- __be32 index; /* Version 1 (type II)*/
- struct erspan_md2 md2; /* Version 2 (type III) */
- } u;
- int version;
-};
struct erspan_base_hdr {
- __be16 ver_vlan;
-#define VER_OFFSET 12
- __be16 session_id;
-#define COS_OFFSET 13
-#define EN_OFFSET 11
-#define BSO_OFFSET EN_OFFSET
-#define T_OFFSET 10
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 vlan_upper:4,
+ ver:4;
+ __u8 vlan:8;
+ __u8 session_id_upper:2,
+ t:1,
+ en:2,
+ cos:3;
+ __u8 session_id:8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 ver: 4,
+ vlan_upper:4;
+ __u8 vlan:8;
+ __u8 cos:3,
+ en:2,
+ t:1,
+ session_id_upper:2;
+ __u8 session_id:8;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
};
+static inline void set_session_id(struct erspan_base_hdr *ershdr, u16 id)
+{
+ ershdr->session_id = id & 0xff;
+ ershdr->session_id_upper = (id >> 8) & 0x3;
+}
+
+static inline u16 get_session_id(const struct erspan_base_hdr *ershdr)
+{
+ return (ershdr->session_id_upper << 8) + ershdr->session_id;
+}
+
+static inline void set_vlan(struct erspan_base_hdr *ershdr, u16 vlan)
+{
+ ershdr->vlan = vlan & 0xff;
+ ershdr->vlan_upper = (vlan >> 8) & 0xf;
+}
+
+static inline u16 get_vlan(const struct erspan_base_hdr *ershdr)
+{
+ return (ershdr->vlan_upper << 8) + ershdr->vlan;
+}
+
+static inline void set_hwid(struct erspan_md2 *md2, u8 hwid)
+{
+ md2->hwid = hwid & 0xf;
+ md2->hwid_upper = (hwid >> 4) & 0x3;
+}
+
+static inline u8 get_hwid(const struct erspan_md2 *md2)
+{
+ return (md2->hwid_upper << 4) + md2->hwid;
+}
+
static inline int erspan_hdr_len(int version)
{
return sizeof(struct erspan_base_hdr) +
@@ -120,10 +153,10 @@ static inline u8 tos_to_cos(u8 tos)
}
static inline void erspan_build_header(struct sk_buff *skb,
- __be32 id, u32 index,
+ u32 id, u32 index,
bool truncate, bool is_ipv4)
{
- struct ethhdr *eth = eth_hdr(skb);
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
enum erspan_encap_type enc_type;
struct erspan_base_hdr *ershdr;
struct erspan_metadata *ersmd;
@@ -154,12 +187,12 @@ static inline void erspan_build_header(struct sk_buff *skb,
memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V1_MDSIZE);
/* Build base header */
- ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
- (ERSPAN_VERSION << VER_OFFSET));
- ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
- ((tos_to_cos(tos) << COS_OFFSET) & COS_MASK) |
- (enc_type << EN_OFFSET & EN_MASK) |
- ((truncate << T_OFFSET) & T_MASK));
+ ershdr->ver = ERSPAN_VERSION;
+ ershdr->cos = tos_to_cos(tos);
+ ershdr->en = enc_type;
+ ershdr->t = truncate;
+ set_vlan(ershdr, vlan_tci);
+ set_session_id(ershdr, id);
/* Build metadata */
ersmd = (struct erspan_metadata *)(ershdr + 1);
@@ -187,10 +220,10 @@ static inline __be32 erspan_get_timestamp(void)
}
static inline void erspan_build_header_v2(struct sk_buff *skb,
- __be32 id, u8 direction, u16 hwid,
+ u32 id, u8 direction, u16 hwid,
bool truncate, bool is_ipv4)
{
- struct ethhdr *eth = eth_hdr(skb);
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
struct erspan_base_hdr *ershdr;
struct erspan_metadata *md;
struct qtag_prefix {
@@ -198,7 +231,6 @@ static inline void erspan_build_header_v2(struct sk_buff *skb,
__be16 tci;
} *qp;
u16 vlan_tci = 0;
- u16 session_id;
u8 gra = 0; /* 100 usec */
u8 bso = 0; /* Bad/Short/Oversized */
u8 sgt = 0;
@@ -221,22 +253,23 @@ static inline void erspan_build_header_v2(struct sk_buff *skb,
memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE);
/* Build base header */
- ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
- (ERSPAN_VERSION2 << VER_OFFSET));
- session_id = (u16)(ntohl(id) & ID_MASK) |
- ((tos_to_cos(tos) << COS_OFFSET) & COS_MASK) |
- (bso << BSO_OFFSET & BSO_MASK) |
- ((truncate << T_OFFSET) & T_MASK);
- ershdr->session_id = htons(session_id);
+ ershdr->ver = ERSPAN_VERSION2;
+ ershdr->cos = tos_to_cos(tos);
+ ershdr->en = bso;
+ ershdr->t = truncate;
+ set_vlan(ershdr, vlan_tci);
+ set_session_id(ershdr, id);
/* Build metadata */
md = (struct erspan_metadata *)(ershdr + 1);
md->u.md2.timestamp = erspan_get_timestamp();
md->u.md2.sgt = htons(sgt);
- md->u.md2.flags = htons(((1 << P_OFFSET) & P_MASK) |
- ((hwid << HWID_OFFSET) & HWID_MASK) |
- ((direction << DIR_OFFSET) & DIR_MASK) |
- ((gra << GRA_OFFSET) & GRA_MASK));
+ md->u.md2.p = 1;
+ md->u.md2.ft = 0;
+ md->u.md2.dir = direction;
+ md->u.md2.gra = gra;
+ md->u.md2.o = 0;
+ set_hwid(&md->u.md2, hwid);
}
#endif
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9dc1230d789c..8606c9113d3f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -332,6 +332,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int flags);
int ip6_flowlabel_init(void);
void ip6_flowlabel_cleanup(void);
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np);
static inline void fl6_sock_release(struct ip6_flowlabel *fl)
{
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 2e4b8e436d25..87406252f0a3 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -535,7 +535,7 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
{
switch (layer) {
case TCF_LAYER_LINK:
- return skb->data;
+ return skb_mac_header(skb);
case TCF_LAYER_NETWORK:
return skb_network_header(skb);
case TCF_LAYER_TRANSPORT:
@@ -602,17 +602,9 @@ struct tc_cls_common_offload {
u32 chain_index;
__be16 protocol;
u32 prio;
+ struct netlink_ext_ack *extack;
};
-static inline void
-tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
- const struct tcf_proto *tp)
-{
- cls_common->chain_index = tp->chain->index;
- cls_common->protocol = tp->protocol;
- cls_common->prio = tp->prio;
-}
-
struct tc_cls_u32_knode {
struct tcf_exts *exts;
struct tc_u32_sel *sel;
@@ -653,6 +645,31 @@ static inline bool tc_can_offload(const struct net_device *dev)
return dev->features & NETIF_F_HW_TC;
}
+static inline bool tc_can_offload_extack(const struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ bool can = tc_can_offload(dev);
+
+ if (!can)
+ NL_SET_ERR_MSG(extack, "TC offload is disabled on net device");
+
+ return can;
+}
+
+static inline bool
+tc_cls_can_offload_and_chain0(const struct net_device *dev,
+ struct tc_cls_common_offload *common)
+{
+ if (!tc_can_offload_extack(dev, common->extack))
+ return false;
+ if (common->chain_index) {
+ NL_SET_ERR_MSG(common->extack,
+ "Driver supports only offload of chain 0");
+ return false;
+ }
+ return true;
+}
+
static inline bool tc_skip_hw(u32 flags)
{
return (flags & TCA_CLS_FLAGS_SKIP_HW) ? true : false;
@@ -680,6 +697,18 @@ static inline bool tc_in_hw(u32 flags)
return (flags & TCA_CLS_FLAGS_IN_HW) ? true : false;
}
+static inline void
+tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
+ const struct tcf_proto *tp, u32 flags,
+ struct netlink_ext_ack *extack)
+{
+ cls_common->chain_index = tp->chain->index;
+ cls_common->protocol = tp->protocol;
+ cls_common->prio = tp->prio;
+ if (tc_skip_sw(flags))
+ cls_common->extack = extack;
+}
+
enum tc_fl_command {
TC_CLSFLOWER_REPLACE,
TC_CLSFLOWER_DESTROY,
@@ -722,7 +751,6 @@ struct tc_cls_bpf_offload {
struct bpf_prog *oldprog;
const char *name;
bool exts_integrated;
- u32 gen_flags;
};
struct tc_mqprio_qopt_offload {
diff --git a/include/net/route.h b/include/net/route.h
index d538e6db1afe..1eb9ce470e25 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -217,7 +217,7 @@ unsigned int inet_addr_type_dev_table(struct net *net,
const struct net_device *dev,
__be32 addr);
void ip_rt_multicast_event(struct in_device *);
-int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg);
+int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt);
void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
struct rtable *rt_dst_alloc(struct net_device *dev,
unsigned int flags, u16 type,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index cd1be1f25c36..eac43e8ca96d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -233,7 +233,8 @@ struct tcf_proto_ops {
const struct tcf_proto *,
struct tcf_result *);
int (*init)(struct tcf_proto*);
- void (*destroy)(struct tcf_proto*);
+ void (*destroy)(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack);
void* (*get)(struct tcf_proto*, u32 handle);
int (*change)(struct net *net, struct sk_buff *,
diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h
index 781f3433a0be..9470fd7e4350 100644
--- a/include/net/tc_act/tc_csum.h
+++ b/include/net/tc_act/tc_csum.h
@@ -6,10 +6,16 @@
#include <net/act_api.h>
#include <linux/tc_act/tc_csum.h>
+struct tcf_csum_params {
+ int action;
+ u32 update_flags;
+ struct rcu_head rcu;
+};
+
struct tcf_csum {
struct tc_action common;
- u32 update_flags;
+ struct tcf_csum_params __rcu *params;
};
#define to_tcf_csum(a) ((struct tcf_csum *)a)
@@ -24,7 +30,13 @@ static inline bool is_tcf_csum(const struct tc_action *a)
static inline u32 tcf_csum_update_flags(const struct tc_action *a)
{
- return to_tcf_csum(a)->update_flags;
+ u32 update_flags;
+
+ rcu_read_lock();
+ update_flags = rcu_dereference(to_tcf_csum(a)->params)->update_flags;
+ rcu_read_unlock();
+
+ return update_flags;
}
#endif /* __NET_TC_CSUM_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5a1d26a18599..093e967a2960 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -2006,12 +2006,12 @@ void tcp_cleanup_ulp(struct sock *sk);
* program loaded).
*/
#ifdef CONFIG_BPF
-static inline int tcp_call_bpf(struct sock *sk, int op)
+static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
struct bpf_sock_ops_kern sock_ops;
int ret;
- memset(&sock_ops, 0, sizeof(sock_ops));
+ memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
if (sk_fullsock(sk)) {
sock_ops.is_fullsock = 1;
sock_owned_by_me(sk);
@@ -2019,6 +2019,8 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
sock_ops.sk = sk;
sock_ops.op = op;
+ if (nargs > 0)
+ memcpy(sock_ops.args, args, nargs * sizeof(*args));
ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
if (ret == 0)
@@ -2027,18 +2029,46 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
ret = -1;
return ret;
}
+
+static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
+{
+ u32 args[2] = {arg1, arg2};
+
+ return tcp_call_bpf(sk, op, 2, args);
+}
+
+static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
+ u32 arg3)
+{
+ u32 args[3] = {arg1, arg2, arg3};
+
+ return tcp_call_bpf(sk, op, 3, args);
+}
+
#else
-static inline int tcp_call_bpf(struct sock *sk, int op)
+static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
return -EPERM;
}
+
+static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
+{
+ return -EPERM;
+}
+
+static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
+ u32 arg3)
+{
+ return -EPERM;
+}
+
#endif
static inline u32 tcp_timeout_init(struct sock *sk)
{
int timeout;
- timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
+ timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL);
if (timeout <= 0)
timeout = TCP_TIMEOUT_INIT;
@@ -2049,7 +2079,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
{
int rwnd;
- rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
+ rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL);
if (rwnd < 0)
rwnd = 0;
@@ -2058,7 +2088,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
{
- return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
+ return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}
#if IS_ENABLED(CONFIG_SMC)
diff --git a/include/net/wext.h b/include/net/wext.h
index e51f067fdb3a..aa192a670304 100644
--- a/include/net/wext.h
+++ b/include/net/wext.h
@@ -7,7 +7,7 @@
struct net;
#ifdef CONFIG_WEXT_CORE
-int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
+int wext_handle_ioctl(struct net *net, unsigned int cmd,
void __user *arg);
int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
unsigned long arg);
@@ -15,7 +15,7 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
struct iw_statistics *get_wireless_stats(struct net_device *dev);
int call_commit_handler(struct net_device *dev);
#else
-static inline int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
+static inline int wext_handle_ioctl(struct net *net, unsigned int cmd,
void __user *arg)
{
return -EINVAL;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2e6d4fe6b0ba..7d2077665c0b 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1904,6 +1904,14 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
struct xfrm_user_offload *xuo);
bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
+static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
+{
+ struct xfrm_state_offload *xso = &x->xso;
+
+ if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn)
+ xso->dev->xfrmdev_ops->xdo_dev_state_advance_esn(x);
+}
+
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
struct xfrm_state *x = dst->xfrm;
@@ -1974,6 +1982,10 @@ static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x
return false;
}
+static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
+{
+}
+
static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
{
return false;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 406c19d6016b..db6bdc375126 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -642,6 +642,14 @@ union bpf_attr {
* @optlen: length of optval in bytes
* Return: 0 or negative error
*
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
* Grow or shrink room in sk_buff.
* @skb: pointer to skb
@@ -748,7 +756,8 @@ union bpf_attr {
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
FN(getsockopt), \
- FN(override_return),
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -952,8 +961,9 @@ struct bpf_map_info {
struct bpf_sock_ops {
__u32 op;
union {
- __u32 reply;
- __u32 replylong[4];
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
};
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
@@ -968,8 +978,39 @@ struct bpf_sock_ops {
*/
__u32 snd_cwnd;
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
};
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -1003,6 +1044,43 @@ enum {
* a congestion threshold. RTTs above
* this indicate congestion
*/
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
};
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
diff --git a/include/uapi/linux/erspan.h b/include/uapi/linux/erspan.h
new file mode 100644
index 000000000000..841573019ae1
--- /dev/null
+++ b/include/uapi/linux/erspan.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ERSPAN Tunnel Metadata
+ *
+ * Copyright (c) 2018 VMware
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * Userspace API for metadata mode ERSPAN tunnel
+ */
+#ifndef _UAPI_ERSPAN_H
+#define _UAPI_ERSPAN_H
+
+#include <linux/types.h> /* For __beXX in userspace */
+#include <asm/byteorder.h>
+
+/* ERSPAN version 2 metadata header */
+struct erspan_md2 {
+ __be32 timestamp;
+ __be16 sgt; /* security group tag */
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 hwid_upper:2,
+ ft:5,
+ p:1;
+ __u8 o:1,
+ gra:2,
+ dir:1,
+ hwid:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 p:1,
+ ft:5,
+ hwid_upper:2;
+ __u8 hwid:4,
+ dir:1,
+ gra:2,
+ o:1;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+};
+
+struct erspan_metadata {
+ int version;
+ union {
+ __be32 index; /* Version 1 (type II)*/
+ struct erspan_md2 md2; /* Version 2 (type III) */
+ } u;
+};
+
+#endif /* _UAPI_ERSPAN_H */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index f8f04fed6186..8616131e2c61 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -161,6 +161,8 @@ enum {
IFLA_EVENT,
IFLA_NEW_NETNSID,
IFLA_IF_NETNSID,
+ IFLA_CARRIER_UP_COUNT,
+ IFLA_CARRIER_DOWN_COUNT,
__IFLA_MAX
};
diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h
index 2e522835a4af..98e4d5d7c45c 100644
--- a/include/uapi/linux/if_macsec.h
+++ b/include/uapi/linux/if_macsec.h
@@ -18,7 +18,7 @@
#define MACSEC_GENL_NAME "macsec"
#define MACSEC_GENL_VERSION 1
-#define MACSEC_MAX_KEY_LEN 256
+#define MACSEC_MAX_KEY_LEN 128
#define MACSEC_KEYID_LEN 16
@@ -26,9 +26,9 @@
#define MACSEC_CIPHER_ID_GCM_AES_128 0x0080C20001000001ULL
#define MACSEC_CIPHER_ID_GCM_AES_256 0x0080C20001000002ULL
-#define MACSEC_DEFAULT_CIPHER_ID MACSEC_CIPHER_ID_GCM_AES_128
/* deprecated cipher ID for GCM-AES-128 */
-#define MACSEC_DEFAULT_CIPHER_ALT 0x0080020001000001ULL
+#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL
+#define MACSEC_DEFAULT_CIPHER_ALT MACSEC_CIPHER_ID_GCM_AES_128
#define MACSEC_MIN_ICV_LEN 8
#define MACSEC_MAX_ICV_LEN 32
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 496e59a2738b..8fb90a0819c3 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -932,6 +932,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_HYPERV_SYNIC2 148
#define KVM_CAP_HYPERV_VP_INDEX 149
#define KVM_CAP_S390_AIS_MIGRATION 150
+#define KVM_CAP_PPC_GET_CPU_CHAR 151
+#define KVM_CAP_S390_BPB 152
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1261,6 +1263,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
/* Available with KVM_CAP_PPC_RADIX_MMU */
#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
+/* Available with KVM_CAP_PPC_GET_CPU_CHAR */
+#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index dcfab5e3b55c..713e56ce681f 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -363,6 +363,7 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */
OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */
OVS_TUNNEL_KEY_ATTR_PAD,
+ OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */
__OVS_TUNNEL_KEY_ATTR_MAX
};
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3aa0658add76..5f35f93dcab2 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -782,6 +782,137 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
}
EXPORT_SYMBOL_GPL(__bpf_call_base);
+/* All UAPI available opcodes. */
+#define BPF_INSN_MAP(INSN_2, INSN_3) \
+ /* 32 bit ALU operations. */ \
+ /* Register based. */ \
+ INSN_3(ALU, ADD, X), \
+ INSN_3(ALU, SUB, X), \
+ INSN_3(ALU, AND, X), \
+ INSN_3(ALU, OR, X), \
+ INSN_3(ALU, LSH, X), \
+ INSN_3(ALU, RSH, X), \
+ INSN_3(ALU, XOR, X), \
+ INSN_3(ALU, MUL, X), \
+ INSN_3(ALU, MOV, X), \
+ INSN_3(ALU, DIV, X), \
+ INSN_3(ALU, MOD, X), \
+ INSN_2(ALU, NEG), \
+ INSN_3(ALU, END, TO_BE), \
+ INSN_3(ALU, END, TO_LE), \
+ /* Immediate based. */ \
+ INSN_3(ALU, ADD, K), \
+ INSN_3(ALU, SUB, K), \
+ INSN_3(ALU, AND, K), \
+ INSN_3(ALU, OR, K), \
+ INSN_3(ALU, LSH, K), \
+ INSN_3(ALU, RSH, K), \
+ INSN_3(ALU, XOR, K), \
+ INSN_3(ALU, MUL, K), \
+ INSN_3(ALU, MOV, K), \
+ INSN_3(ALU, DIV, K), \
+ INSN_3(ALU, MOD, K), \
+ /* 64 bit ALU operations. */ \
+ /* Register based. */ \
+ INSN_3(ALU64, ADD, X), \
+ INSN_3(ALU64, SUB, X), \
+ INSN_3(ALU64, AND, X), \
+ INSN_3(ALU64, OR, X), \
+ INSN_3(ALU64, LSH, X), \
+ INSN_3(ALU64, RSH, X), \
+ INSN_3(ALU64, XOR, X), \
+ INSN_3(ALU64, MUL, X), \
+ INSN_3(ALU64, MOV, X), \
+ INSN_3(ALU64, ARSH, X), \
+ INSN_3(ALU64, DIV, X), \
+ INSN_3(ALU64, MOD, X), \
+ INSN_2(ALU64, NEG), \
+ /* Immediate based. */ \
+ INSN_3(ALU64, ADD, K), \
+ INSN_3(ALU64, SUB, K), \
+ INSN_3(ALU64, AND, K), \
+ INSN_3(ALU64, OR, K), \
+ INSN_3(ALU64, LSH, K), \
+ INSN_3(ALU64, RSH, K), \
+ INSN_3(ALU64, XOR, K), \
+ INSN_3(ALU64, MUL, K), \
+ INSN_3(ALU64, MOV, K), \
+ INSN_3(ALU64, ARSH, K), \
+ INSN_3(ALU64, DIV, K), \
+ INSN_3(ALU64, MOD, K), \
+ /* Call instruction. */ \
+ INSN_2(JMP, CALL), \
+ /* Exit instruction. */ \
+ INSN_2(JMP, EXIT), \
+ /* Jump instructions. */ \
+ /* Register based. */ \
+ INSN_3(JMP, JEQ, X), \
+ INSN_3(JMP, JNE, X), \
+ INSN_3(JMP, JGT, X), \
+ INSN_3(JMP, JLT, X), \
+ INSN_3(JMP, JGE, X), \
+ INSN_3(JMP, JLE, X), \
+ INSN_3(JMP, JSGT, X), \
+ INSN_3(JMP, JSLT, X), \
+ INSN_3(JMP, JSGE, X), \
+ INSN_3(JMP, JSLE, X), \
+ INSN_3(JMP, JSET, X), \
+ /* Immediate based. */ \
+ INSN_3(JMP, JEQ, K), \
+ INSN_3(JMP, JNE, K), \
+ INSN_3(JMP, JGT, K), \
+ INSN_3(JMP, JLT, K), \
+ INSN_3(JMP, JGE, K), \
+ INSN_3(JMP, JLE, K), \
+ INSN_3(JMP, JSGT, K), \
+ INSN_3(JMP, JSLT, K), \
+ INSN_3(JMP, JSGE, K), \
+ INSN_3(JMP, JSLE, K), \
+ INSN_3(JMP, JSET, K), \
+ INSN_2(JMP, JA), \
+ /* Store instructions. */ \
+ /* Register based. */ \
+ INSN_3(STX, MEM, B), \
+ INSN_3(STX, MEM, H), \
+ INSN_3(STX, MEM, W), \
+ INSN_3(STX, MEM, DW), \
+ INSN_3(STX, XADD, W), \
+ INSN_3(STX, XADD, DW), \
+ /* Immediate based. */ \
+ INSN_3(ST, MEM, B), \
+ INSN_3(ST, MEM, H), \
+ INSN_3(ST, MEM, W), \
+ INSN_3(ST, MEM, DW), \
+ /* Load instructions. */ \
+ /* Register based. */ \
+ INSN_3(LDX, MEM, B), \
+ INSN_3(LDX, MEM, H), \
+ INSN_3(LDX, MEM, W), \
+ INSN_3(LDX, MEM, DW), \
+ /* Immediate based. */ \
+ INSN_3(LD, IMM, DW), \
+ /* Misc (old cBPF carry-over). */ \
+ INSN_3(LD, ABS, B), \
+ INSN_3(LD, ABS, H), \
+ INSN_3(LD, ABS, W), \
+ INSN_3(LD, IND, B), \
+ INSN_3(LD, IND, H), \
+ INSN_3(LD, IND, W)
+
+bool bpf_opcode_in_insntable(u8 code)
+{
+#define BPF_INSN_2_TBL(x, y) [BPF_##x | BPF_##y] = true
+#define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true
+ static const bool public_insntable[256] = {
+ [0 ... 255] = false,
+ /* Now overwrite non-defaults ... */
+ BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
+ };
+#undef BPF_INSN_3_TBL
+#undef BPF_INSN_2_TBL
+ return public_insntable[code];
+}
+
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
/**
* __bpf_prog_run - run eBPF program on a given context
@@ -793,115 +924,18 @@ EXPORT_SYMBOL_GPL(__bpf_call_base);
static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u64 *stack)
{
u64 tmp;
+#define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y
+#define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z
static const void *jumptable[256] = {
[0 ... 255] = &&default_label,
/* Now overwrite non-defaults ... */
- /* 32 bit ALU operations */
- [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
- [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
- [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
- [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
- [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
- [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
- [BPF_ALU | BPF_OR | BPF_X] = &&ALU_OR_X,
- [BPF_ALU | BPF_OR | BPF_K] = &&ALU_OR_K,
- [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
- [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
- [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
- [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
- [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
- [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
- [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
- [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
- [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
- [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
- [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
- [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
- [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
- [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
- [BPF_ALU | BPF_NEG] = &&ALU_NEG,
- [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
- [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
- /* 64 bit ALU operations */
- [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
- [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
- [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
- [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
- [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
- [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
- [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
- [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
- [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
- [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
- [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
- [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
- [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
- [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
- [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
- [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
- [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
- [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
- [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
- [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
- [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
- [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
- [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
- [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
- [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
- /* Call instruction */
- [BPF_JMP | BPF_CALL] = &&JMP_CALL,
+ BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL),
+ /* Non-UAPI available opcodes. */
[BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS,
[BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL,
- /* Jumps */
- [BPF_JMP | BPF_JA] = &&JMP_JA,
- [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
- [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
- [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
- [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
- [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
- [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
- [BPF_JMP | BPF_JLT | BPF_X] = &&JMP_JLT_X,
- [BPF_JMP | BPF_JLT | BPF_K] = &&JMP_JLT_K,
- [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
- [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
- [BPF_JMP | BPF_JLE | BPF_X] = &&JMP_JLE_X,
- [BPF_JMP | BPF_JLE | BPF_K] = &&JMP_JLE_K,
- [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
- [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
- [BPF_JMP | BPF_JSLT | BPF_X] = &&JMP_JSLT_X,
- [BPF_JMP | BPF_JSLT | BPF_K] = &&JMP_JSLT_K,
- [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
- [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
- [BPF_JMP | BPF_JSLE | BPF_X] = &&JMP_JSLE_X,
- [BPF_JMP | BPF_JSLE | BPF_K] = &&JMP_JSLE_K,
- [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
- [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
- /* Program return */
- [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
- /* Store instructions */
- [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
- [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
- [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
- [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
- [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
- [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
- [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
- [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
- [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
- [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
- /* Load instructions */
- [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
- [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
- [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
- [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
- [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
- [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
- [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
- [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
- [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
- [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
- [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW,
};
+#undef BPF_INSN_3_LBL
+#undef BPF_INSN_2_LBL
u32 tail_call_cnt = 0;
void *ptr;
int off;
@@ -965,14 +999,10 @@ select_insn:
(*(s64 *) &DST) >>= IMM;
CONT;
ALU64_MOD_X:
- if (unlikely(SRC == 0))
- return 0;
div64_u64_rem(DST, SRC, &tmp);
DST = tmp;
CONT;
ALU_MOD_X:
- if (unlikely((u32)SRC == 0))
- return 0;
tmp = (u32) DST;
DST = do_div(tmp, (u32) SRC);
CONT;
@@ -985,13 +1015,9 @@ select_insn:
DST = do_div(tmp, (u32) IMM);
CONT;
ALU64_DIV_X:
- if (unlikely(SRC == 0))
- return 0;
DST = div64_u64(DST, SRC);
CONT;
ALU_DIV_X:
- if (unlikely((u32)SRC == 0))
- return 0;
tmp = (u32) DST;
do_div(tmp, (u32) SRC);
DST = (u32) tmp;
@@ -1302,8 +1328,14 @@ load_byte:
goto load_byte;
default_label:
- /* If we ever reach this, we have a bug somewhere. */
- WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
+ /* If we ever reach this, we have a bug somewhere. Die hard here
+ * instead of just returning 0; we could be somewhere in a subprog,
+ * so execution could continue otherwise which we do /not/ want.
+ *
+ * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable().
+ */
+ pr_warn("BPF interpreter: unknown opcode %02x\n", insn->code);
+ BUG_ON(1);
return 0;
}
STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index d7ea96218516..7b469d10d0e9 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -593,11 +593,10 @@ unlock:
static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
{
+ struct lpm_trie_node *node, *next_node = NULL, *parent, *search_root;
struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
struct bpf_lpm_trie_key *key = _key, *next_key = _next_key;
- struct lpm_trie_node *node, *next_node = NULL, *parent;
struct lpm_trie_node **node_stack = NULL;
- struct lpm_trie_node __rcu **root;
int err = 0, stack_ptr = -1;
unsigned int next_bit;
size_t matchlen;
@@ -614,22 +613,21 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
*/
/* Empty trie */
- if (!rcu_dereference(trie->root))
+ search_root = rcu_dereference(trie->root);
+ if (!search_root)
return -ENOENT;
/* For invalid key, find the leftmost node in the trie */
- if (!key || key->prefixlen > trie->max_prefixlen) {
- root = &trie->root;
+ if (!key || key->prefixlen > trie->max_prefixlen)
goto find_leftmost;
- }
node_stack = kmalloc(trie->max_prefixlen * sizeof(struct lpm_trie_node *),
- GFP_USER | __GFP_NOWARN);
+ GFP_ATOMIC | __GFP_NOWARN);
if (!node_stack)
return -ENOMEM;
/* Try to find the exact node for the given key */
- for (node = rcu_dereference(trie->root); node;) {
+ for (node = search_root; node;) {
node_stack[++stack_ptr] = node;
matchlen = longest_prefix_match(trie, node, key);
if (node->prefixlen != matchlen ||
@@ -640,10 +638,8 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
node = rcu_dereference(node->child[next_bit]);
}
if (!node || node->prefixlen != key->prefixlen ||
- (node->flags & LPM_TREE_NODE_FLAG_IM)) {
- root = &trie->root;
+ (node->flags & LPM_TREE_NODE_FLAG_IM))
goto find_leftmost;
- }
/* The node with the exactly-matching key has been found,
* find the first node in postorder after the matched node.
@@ -651,10 +647,10 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
node = node_stack[stack_ptr];
while (stack_ptr > 0) {
parent = node_stack[stack_ptr - 1];
- if (rcu_dereference(parent->child[0]) == node &&
- rcu_dereference(parent->child[1])) {
- root = &parent->child[1];
- goto find_leftmost;
+ if (rcu_dereference(parent->child[0]) == node) {
+ search_root = rcu_dereference(parent->child[1]);
+ if (search_root)
+ goto find_leftmost;
}
if (!(parent->flags & LPM_TREE_NODE_FLAG_IM)) {
next_node = parent;
@@ -673,7 +669,7 @@ find_leftmost:
/* Find the leftmost non-intermediate node, all intermediate nodes
* have exact two children, so this function will never return NULL.
*/
- for (node = rcu_dereference(*root); node;) {
+ for (node = search_root; node;) {
if (!(node->flags & LPM_TREE_NODE_FLAG_IM))
next_node = node;
node = rcu_dereference(node->child[0]);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5bdb0cc84ad2..e24aa3241387 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -709,10 +709,7 @@ static int map_update_elem(union bpf_attr *attr)
err = bpf_percpu_hash_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_update(map, key, value, attr->flags);
- } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
- map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
- map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
- map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
+ } else if (IS_FD_ARRAY(map)) {
rcu_read_lock();
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
attr->flags);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index dfb138b46488..5fb69a85d967 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4981,6 +4981,13 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
next_insn:
insn++;
i++;
+ continue;
+ }
+
+ /* Basic sanity check before we invest more work here. */
+ if (!bpf_opcode_in_insntable(insn->code)) {
+ verbose(env, "unknown opcode %02x\n", insn->code);
+ return -EINVAL;
}
}
@@ -5064,14 +5071,21 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
return new_prog;
}
-/* The verifier does more data flow analysis than llvm and will not explore
- * branches that are dead at run time. Malicious programs can have dead code
- * too. Therefore replace all dead at-run-time code with nops.
+/* The verifier does more data flow analysis than llvm and will not
+ * explore branches that are dead at run time. Malicious programs can
+ * have dead code too. Therefore replace all dead at-run-time code
+ * with 'ja -1'.
+ *
+ * Just nops are not optimal, e.g. if they would sit at the end of the
+ * program and through another bug we would manage to jump there, then
+ * we'd execute beyond program memory otherwise. Returning exception
+ * code also wouldn't work since we can have subprogs where the dead
+ * code could be located.
*/
static void sanitize_dead_code(struct bpf_verifier_env *env)
{
struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
- struct bpf_insn nop = BPF_MOV64_REG(BPF_REG_0, BPF_REG_0);
+ struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
struct bpf_insn *insn = env->prog->insnsi;
const int insn_cnt = env->prog->len;
int i;
@@ -5079,7 +5093,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++) {
if (aux_data[i].seen)
continue;
- memcpy(insn + i, &nop, sizeof(nop));
+ memcpy(insn + i, &trap, sizeof(trap));
}
}
@@ -5386,15 +5400,37 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
int i, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
- if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
+ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
+ insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
+ insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
- /* due to JIT bugs clear upper 32-bits of src register
- * before div/mod operation
- */
- insn_buf[0] = BPF_MOV32_REG(insn->src_reg, insn->src_reg);
- insn_buf[1] = *insn;
- cnt = 2;
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+ bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+ struct bpf_insn mask_and_div[] = {
+ BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+ /* Rx div 0 -> 0 */
+ BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
+ BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ *insn,
+ };
+ struct bpf_insn mask_and_mod[] = {
+ BPF_MOV32_REG(insn->src_reg, insn->src_reg),
+ /* Rx mod 0 -> Rx */
+ BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
+ *insn,
+ };
+ struct bpf_insn *patchlet;
+
+ if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
+ insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
+ patchlet = mask_and_div + (is64 ? 1 : 0);
+ cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
+ } else {
+ patchlet = mask_and_mod + (is64 ? 1 : 0);
+ cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
+ }
+
+ new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
if (!new_prog)
return -ENOMEM;
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 0ba0dd8863a7..5187dfe809ac 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -321,15 +321,23 @@ void irq_matrix_remove_reserved(struct irq_matrix *m)
int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk,
bool reserved, unsigned int *mapped_cpu)
{
- unsigned int cpu;
+ unsigned int cpu, best_cpu, maxavl = 0;
+ struct cpumap *cm;
+ unsigned int bit;
+ best_cpu = UINT_MAX;
for_each_cpu(cpu, msk) {
- struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
- unsigned int bit;
+ cm = per_cpu_ptr(m->maps, cpu);
- if (!cm->online)
+ if (!cm->online || cm->available <= maxavl)
continue;
+ best_cpu = cpu;
+ maxavl = cm->available;
+ }
+
+ if (maxavl) {
+ cm = per_cpu_ptr(m->maps, best_cpu);
bit = matrix_alloc_area(m, cm, 1, false);
if (bit < m->alloc_end) {
cm->allocated++;
@@ -338,8 +346,8 @@ int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk,
m->global_available--;
if (reserved)
m->global_reserved--;
- *mapped_cpu = cpu;
- trace_irq_matrix_alloc(bit, cpu, m, cm);
+ *mapped_cpu = best_cpu;
+ trace_irq_matrix_alloc(bit, best_cpu, m, cm);
return bit;
}
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ccdf3664e4a9..554b517c61a0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1119,15 +1119,11 @@ static struct ftrace_ops global_ops = {
};
/*
- * This is used by __kernel_text_address() to return true if the
- * address is on a dynamically allocated trampoline that would
- * not return true for either core_kernel_text() or
- * is_module_text_address().
+ * Used by the stack undwinder to know about dynamic ftrace trampolines.
*/
-bool is_ftrace_trampoline(unsigned long addr)
+struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr)
{
- struct ftrace_ops *op;
- bool ret = false;
+ struct ftrace_ops *op = NULL;
/*
* Some of the ops may be dynamically allocated,
@@ -1144,15 +1140,24 @@ bool is_ftrace_trampoline(unsigned long addr)
if (op->trampoline && op->trampoline_size)
if (addr >= op->trampoline &&
addr < op->trampoline + op->trampoline_size) {
- ret = true;
- goto out;
+ preempt_enable_notrace();
+ return op;
}
} while_for_each_ftrace_op(op);
-
- out:
preempt_enable_notrace();
- return ret;
+ return NULL;
+}
+
+/*
+ * This is used by __kernel_text_address() to return true if the
+ * address is on a dynamically allocated trampoline that would
+ * not return true for either core_kernel_text() or
+ * is_module_text_address().
+ */
+bool is_ftrace_trampoline(unsigned long addr)
+{
+ return ftrace_ops_trampoline(addr) != NULL;
}
struct ftrace_page {
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a8d8a294345..8e3f20a18a06 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2374,6 +2374,15 @@ void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
}
EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
+/*
+ * Skip 3:
+ *
+ * trace_buffer_unlock_commit_regs()
+ * trace_event_buffer_commit()
+ * trace_event_raw_event_xxx()
+*/
+# define STACK_SKIP 3
+
void trace_buffer_unlock_commit_regs(struct trace_array *tr,
struct ring_buffer *buffer,
struct ring_buffer_event *event,
@@ -2383,16 +2392,12 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
__buffer_unlock_commit(buffer, event);
/*
- * If regs is not set, then skip the following callers:
- * trace_buffer_unlock_commit_regs
- * event_trigger_unlock_commit
- * trace_event_buffer_commit
- * trace_event_raw_event_sched_switch
+ * If regs is not set, then skip the necessary functions.
* Note, we can still get here via blktrace, wakeup tracer
* and mmiotrace, but that's ok if they lose a function or
- * two. They are that meaningful.
+ * two. They are not that meaningful.
*/
- ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
+ ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
}
@@ -2579,11 +2584,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
trace.skip = skip;
/*
- * Add two, for this function and the call to save_stack_trace()
+ * Add one, for this function and the call to save_stack_trace()
* If regs is set, then these functions will not be in the way.
*/
+#ifndef CONFIG_UNWINDER_ORC
if (!regs)
- trace.skip += 2;
+ trace.skip++;
+#endif
/*
* Since events can happen in NMIs there's no safe way to
@@ -2711,11 +2718,10 @@ void trace_dump_stack(int skip)
local_save_flags(flags);
- /*
- * Skip 3 more, seems to get us at the caller of
- * this function.
- */
- skip += 3;
+#ifndef CONFIG_UNWINDER_ORC
+ /* Skip 1 to skip this function. */
+ skip++;
+#endif
__ftrace_trace_stack(global_trace.trace_buffer.buffer,
flags, skip, preempt_count(), NULL);
}
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index f2ac9d44f6c4..87411482a46f 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1123,13 +1123,22 @@ static __init int register_trigger_snapshot_cmd(void) { return 0; }
#endif /* CONFIG_TRACER_SNAPSHOT */
#ifdef CONFIG_STACKTRACE
+#ifdef CONFIG_UNWINDER_ORC
+/* Skip 2:
+ * event_triggers_post_call()
+ * trace_event_raw_event_xxx()
+ */
+# define STACK_SKIP 2
+#else
/*
- * Skip 3:
+ * Skip 4:
* stacktrace_trigger()
* event_triggers_post_call()
+ * trace_event_buffer_commit()
* trace_event_raw_event_xxx()
*/
-#define STACK_SKIP 3
+#define STACK_SKIP 4
+#endif
static void
stacktrace_trigger(struct event_trigger_data *data, void *rec)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 27f7ad12c4b1..b611cd36e22d 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -154,6 +154,24 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
preempt_enable_notrace();
}
+#ifdef CONFIG_UNWINDER_ORC
+/*
+ * Skip 2:
+ *
+ * function_stack_trace_call()
+ * ftrace_call()
+ */
+#define STACK_SKIP 2
+#else
+/*
+ * Skip 3:
+ * __trace_stack()
+ * function_stack_trace_call()
+ * ftrace_call()
+ */
+#define STACK_SKIP 3
+#endif
+
static void
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
@@ -180,15 +198,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
if (likely(disabled == 1)) {
pc = preempt_count();
trace_function(tr, ip, parent_ip, flags, pc);
- /*
- * skip over 5 funcs:
- * __ftrace_trace_stack,
- * __trace_stack,
- * function_stack_trace_call
- * ftrace_list_func
- * ftrace_call
- */
- __trace_stack(tr, flags, 5, pc);
+ __trace_stack(tr, flags, STACK_SKIP, pc);
}
atomic_dec(&data->disabled);
@@ -367,14 +377,27 @@ ftrace_traceoff(unsigned long ip, unsigned long parent_ip,
tracer_tracing_off(tr);
}
+#ifdef CONFIG_UNWINDER_ORC
/*
- * Skip 4:
+ * Skip 3:
+ *
+ * function_trace_probe_call()
+ * ftrace_ops_assist_func()
+ * ftrace_call()
+ */
+#define FTRACE_STACK_SKIP 3
+#else
+/*
+ * Skip 5:
+ *
+ * __trace_stack()
* ftrace_stacktrace()
* function_trace_probe_call()
- * ftrace_ops_list_func()
+ * ftrace_ops_assist_func()
* ftrace_call()
*/
-#define STACK_SKIP 4
+#define FTRACE_STACK_SKIP 5
+#endif
static __always_inline void trace_stack(struct trace_array *tr)
{
@@ -384,7 +407,7 @@ static __always_inline void trace_stack(struct trace_array *tr)
local_save_flags(flags);
pc = preempt_count();
- __trace_stack(tr, flags, STACK_SKIP, pc);
+ __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc);
}
static void
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index e3938e395cba..4cd9ea9b3449 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -2003,10 +2003,14 @@ static struct bpf_test tests[] = {
{ { 4, 0 }, { 5, 10 } }
},
{
- "INT: DIV by zero",
+ /* This one doesn't go through verifier, but is just raw insn
+ * as opposed to cBPF tests from here. Thus div by 0 tests are
+ * done in test_verifier in BPF kselftests.
+ */
+ "INT: DIV by -1",
.u.insns_int = {
BPF_ALU64_REG(BPF_MOV, R6, R1),
- BPF_ALU64_IMM(BPF_MOV, R7, 0),
+ BPF_ALU64_IMM(BPF_MOV, R7, -1),
BPF_LD_ABS(BPF_B, 3),
BPF_ALU32_REG(BPF_DIV, R0, R7),
BPF_EXIT_INSN(),
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index d22b84310f6d..ae3c2a35d61b 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -30,10 +30,37 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
return true;
}
+static inline bool pfn_in_hpage(struct page *hpage, unsigned long pfn)
+{
+ unsigned long hpage_pfn = page_to_pfn(hpage);
+
+ /* THP can be referenced by any subpage */
+ return pfn >= hpage_pfn && pfn - hpage_pfn < hpage_nr_pages(hpage);
+}
+
+/**
+ * check_pte - check if @pvmw->page is mapped at the @pvmw->pte
+ *
+ * page_vma_mapped_walk() found a place where @pvmw->page is *potentially*
+ * mapped. check_pte() has to validate this.
+ *
+ * @pvmw->pte may point to empty PTE, swap PTE or PTE pointing to arbitrary
+ * page.
+ *
+ * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration
+ * entry that points to @pvmw->page or any subpage in case of THP.
+ *
+ * If PVMW_MIGRATION flag is not set, returns true if @pvmw->pte points to
+ * @pvmw->page or any subpage in case of THP.
+ *
+ * Otherwise, return false.
+ *
+ */
static bool check_pte(struct page_vma_mapped_walk *pvmw)
{
+ unsigned long pfn;
+
if (pvmw->flags & PVMW_MIGRATION) {
-#ifdef CONFIG_MIGRATION
swp_entry_t entry;
if (!is_swap_pte(*pvmw->pte))
return false;
@@ -41,38 +68,25 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
if (!is_migration_entry(entry))
return false;
- if (migration_entry_to_page(entry) - pvmw->page >=
- hpage_nr_pages(pvmw->page)) {
- return false;
- }
- if (migration_entry_to_page(entry) < pvmw->page)
- return false;
-#else
- WARN_ON_ONCE(1);
-#endif
- } else {
- if (is_swap_pte(*pvmw->pte)) {
- swp_entry_t entry;
- entry = pte_to_swp_entry(*pvmw->pte);
- if (is_device_private_entry(entry) &&
- device_private_entry_to_page(entry) == pvmw->page)
- return true;
- }
+ pfn = migration_entry_to_pfn(entry);
+ } else if (is_swap_pte(*pvmw->pte)) {
+ swp_entry_t entry;
- if (!pte_present(*pvmw->pte))
+ /* Handle un-addressable ZONE_DEVICE memory */
+ entry = pte_to_swp_entry(*pvmw->pte);
+ if (!is_device_private_entry(entry))
return false;
- /* THP can be referenced by any subpage */
- if (pte_page(*pvmw->pte) - pvmw->page >=
- hpage_nr_pages(pvmw->page)) {
- return false;
- }
- if (pte_page(*pvmw->pte) < pvmw->page)
+ pfn = device_private_entry_to_pfn(entry);
+ } else {
+ if (!pte_present(*pvmw->pte))
return false;
+
+ pfn = pte_pfn(*pvmw->pte);
}
- return true;
+ return pfn_in_hpage(pvmw->page, pfn);
}
/**
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 80559fd11b7e..8e13a64d8c99 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -760,7 +760,7 @@ static inline void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
static inline bool br_multicast_is_router(struct net_bridge *br)
{
- return 0;
+ return false;
}
static inline bool br_multicast_querier_exists(struct net_bridge *br,
diff --git a/net/can/Kconfig b/net/can/Kconfig
index a15c0e0d1fc7..a4399be54ff4 100644
--- a/net/can/Kconfig
+++ b/net/can/Kconfig
@@ -11,7 +11,7 @@ menuconfig CAN
1991, mainly for automotive, but now widely used in marine
(NMEA2000), industrial, and medical applications.
More information on the CAN network protocol family PF_CAN
- is contained in <Documentation/networking/can.txt>.
+ is contained in <Documentation/networking/can.rst>.
If you want CAN support you should say Y here and also to the
specific driver for your controller(s) below.
diff --git a/net/core/dev.c b/net/core/dev.c
index 94435cd09072..4670ccabe23a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1694,7 +1694,6 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
/**
* call_netdevice_notifiers_info - call all network notifier blocks
* @val: value passed unmodified to notifier function
- * @dev: net_device pointer passed unmodified to notifier function
* @info: notifier information data
*
* Call all network notifier blocks. Parameters and return value
@@ -3167,10 +3166,21 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)
hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
/* + transport layer */
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- hdr_len += tcp_hdrlen(skb);
- else
- hdr_len += sizeof(struct udphdr);
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ const struct tcphdr *th;
+ struct tcphdr _tcphdr;
+
+ th = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_tcphdr), &_tcphdr);
+ if (likely(th))
+ hdr_len += __tcp_hdrlen(th);
+ } else {
+ struct udphdr _udphdr;
+
+ if (skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_udphdr), &_udphdr))
+ hdr_len += sizeof(struct udphdr);
+ }
if (shinfo->gso_type & SKB_GSO_DODGY)
gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
@@ -6425,6 +6435,7 @@ rollback:
* netdev_upper_dev_link - Add a link to the upper device
* @dev: device
* @upper_dev: new upper device
+ * @extack: netlink extended ack
*
* Adds a link to device which is upper to this one. The caller must hold
* the RTNL lock. On a failure a negative errno code is returned.
@@ -6446,6 +6457,7 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* @upper_dev: new upper device
* @upper_priv: upper device private
* @upper_info: upper info to be passed down via notifier
+ * @extack: netlink extended ack
*
* Adds a link to device which is upper to this one. In this case, only
* one master upper device can be linked, although other non-master devices
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 7e690d0ccd05..0ab1af04296c 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -18,26 +18,10 @@
* match. --pb
*/
-static int dev_ifname(struct net *net, struct ifreq __user *arg)
+static int dev_ifname(struct net *net, struct ifreq *ifr)
{
- struct ifreq ifr;
- int error;
-
- /*
- * Fetch the caller's info block.
- */
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- return -EFAULT;
- ifr.ifr_name[IFNAMSIZ-1] = 0;
-
- error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
- if (error)
- return error;
-
- if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
- return -EFAULT;
- return 0;
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
+ return netdev_get_name(net, ifr->ifr_name, ifr->ifr_ifindex);
}
static gifconf_func_t *gifconf_list[NPROTO];
@@ -66,9 +50,8 @@ EXPORT_SYMBOL(register_gifconf);
* Thus we will need a 'compatibility mode'.
*/
-static int dev_ifconf(struct net *net, char __user *arg)
+int dev_ifconf(struct net *net, struct ifconf *ifc, int size)
{
- struct ifconf ifc;
struct net_device *dev;
char __user *pos;
int len;
@@ -79,11 +62,8 @@ static int dev_ifconf(struct net *net, char __user *arg)
* Fetch the caller's info block.
*/
- if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
- return -EFAULT;
-
- pos = ifc.ifc_buf;
- len = ifc.ifc_len;
+ pos = ifc->ifc_buf;
+ len = ifc->ifc_len;
/*
* Loop over the interfaces, and write an info block for each.
@@ -95,10 +75,10 @@ static int dev_ifconf(struct net *net, char __user *arg)
if (gifconf_list[i]) {
int done;
if (!pos)
- done = gifconf_list[i](dev, NULL, 0);
+ done = gifconf_list[i](dev, NULL, 0, size);
else
done = gifconf_list[i](dev, pos + total,
- len - total);
+ len - total, size);
if (done < 0)
return -EFAULT;
total += done;
@@ -109,12 +89,12 @@ static int dev_ifconf(struct net *net, char __user *arg)
/*
* All done. Write the updated control block back to the caller.
*/
- ifc.ifc_len = total;
+ ifc->ifc_len = total;
/*
* Both BSD and Solaris return 0 here, so we do too.
*/
- return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
+ return 0;
}
/*
@@ -406,53 +386,24 @@ EXPORT_SYMBOL(dev_load);
* positive or a negative errno code on error.
*/
-int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout)
{
- struct ifreq ifr;
int ret;
char *colon;
- /* One special case: SIOCGIFCONF takes ifconf argument
- and requires shared lock, because it sleeps writing
- to user space.
- */
-
- if (cmd == SIOCGIFCONF) {
- rtnl_lock();
- ret = dev_ifconf(net, (char __user *) arg);
- rtnl_unlock();
- return ret;
- }
+ if (need_copyout)
+ *need_copyout = true;
if (cmd == SIOCGIFNAME)
- return dev_ifname(net, (struct ifreq __user *)arg);
-
- /*
- * Take care of Wireless Extensions. Unfortunately struct iwreq
- * isn't a proper subset of struct ifreq (it's 8 byte shorter)
- * so we need to treat it specially, otherwise applications may
- * fault if the struct they're passing happens to land at the
- * end of a mapped page.
- */
- if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- struct iwreq iwr;
-
- if (copy_from_user(&iwr, arg, sizeof(iwr)))
- return -EFAULT;
-
- iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
+ return dev_ifname(net, ifr);
- return wext_handle_ioctl(net, &iwr, cmd, arg);
- }
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- return -EFAULT;
-
- ifr.ifr_name[IFNAMSIZ-1] = 0;
+ ifr->ifr_name[IFNAMSIZ-1] = 0;
- colon = strchr(ifr.ifr_name, ':');
+ colon = strchr(ifr->ifr_name, ':');
if (colon)
*colon = 0;
+ dev_load(net, ifr->ifr_name);
+
/*
* See which interface the caller is talking about.
*/
@@ -472,31 +423,19 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCGIFMAP:
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
- dev_load(net, ifr.ifr_name);
rcu_read_lock();
- ret = dev_ifsioc_locked(net, &ifr, cmd);
+ ret = dev_ifsioc_locked(net, ifr, cmd);
rcu_read_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
case SIOCETHTOOL:
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ethtool(net, &ifr);
+ ret = dev_ethtool(net, ifr);
rtnl_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
/*
@@ -510,17 +449,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSIFNAME:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
- if (!ret) {
- if (colon)
- *colon = ':';
- if (copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
- }
+ if (colon)
+ *colon = ':';
return ret;
/*
@@ -561,10 +494,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
+ if (need_copyout)
+ *need_copyout = false;
return ret;
case SIOCGIFMEM:
@@ -584,13 +518,9 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
cmd == SIOCGHWTSTAMP ||
(cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15)) {
- dev_load(net, ifr.ifr_name);
rtnl_lock();
- ret = dev_ifsioc(net, &ifr, cmd);
+ ret = dev_ifsioc(net, ifr, cmd);
rtnl_unlock();
- if (!ret && copy_to_user(arg, &ifr,
- sizeof(struct ifreq)))
- ret = -EFAULT;
return ret;
}
return -ENOTTY;
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 66d36705fb9d..18d385ed8237 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3182,6 +3182,7 @@ int devlink_resource_register(struct devlink *devlink,
resource_list = &parent_resource->resource_list;
resource->parent = parent_resource;
} else {
+ kfree(resource);
err = -EINVAL;
goto out;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index 18da42a81d0c..08ab4c65a998 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -401,8 +401,8 @@ do_pass:
/* Classic BPF expects A and X to be reset first. These need
* to be guaranteed to be the first two instructions.
*/
- *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
- *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
+ *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+ *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
/* All programs must keep CTX in callee saved BPF_REG_CTX.
* In eBPF case it's done by the compiler, here we need to
@@ -459,8 +459,15 @@ do_pass:
break;
if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
- fp->code == (BPF_ALU | BPF_MOD | BPF_X))
+ fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
*insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
+ /* Error with exception code on div/mod by 0.
+ * For cBPF programs, this was always return 0.
+ */
+ *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
+ *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
+ *insn++ = BPF_EXIT_INSN();
+ }
*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
break;
@@ -3232,6 +3239,29 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
ret = -EINVAL;
}
#ifdef CONFIG_INET
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (level == SOL_IPV6) {
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
+ return -EINVAL;
+
+ val = *((int *)optval);
+ /* Only some options are supported */
+ switch (optname) {
+ case IPV6_TCLASS:
+ if (val < -1 || val > 0xff) {
+ ret = -EINVAL;
+ } else {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (val == -1)
+ val = 0;
+ np->tclass = val;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ }
+#endif
} else if (level == SOL_TCP &&
sk->sk_prot->setsockopt == tcp_setsockopt) {
if (optname == TCP_CONGESTION) {
@@ -3241,7 +3271,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
strncpy(name, optval, min_t(long, optlen,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
- ret = tcp_set_congestion_control(sk, name, false, reinit);
+ ret = tcp_set_congestion_control(sk, name, false,
+ reinit);
} else {
struct tcp_sock *tp = tcp_sk(sk);
@@ -3307,6 +3338,22 @@ BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
} else {
goto err_clear;
}
+#if IS_ENABLED(CONFIG_IPV6)
+ } else if (level == SOL_IPV6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
+ goto err_clear;
+
+ /* Only some options are supported */
+ switch (optname) {
+ case IPV6_TCLASS:
+ *((int *)optval) = (int)np->tclass;
+ break;
+ default:
+ goto err_clear;
+ }
+#endif
} else {
goto err_clear;
}
@@ -3328,6 +3375,33 @@ static const struct bpf_func_proto bpf_getsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
+ int, argval)
+{
+ struct sock *sk = bpf_sock->sk;
+ int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
+
+ if (!sk_fullsock(sk))
+ return -EINVAL;
+
+#ifdef CONFIG_INET
+ if (val)
+ tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
+
+ return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
+#else
+ return -EINVAL;
+#endif
+}
+
+static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
+ .func = bpf_sock_ops_cb_flags_set,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3510,6 +3584,8 @@ static const struct bpf_func_proto *
return &bpf_setsockopt_proto;
case BPF_FUNC_getsockopt:
return &bpf_getsockopt_proto;
+ case BPF_FUNC_sock_ops_cb_flags_set:
+ return &bpf_sock_ops_cb_flags_set_proto;
case BPF_FUNC_sock_map_update:
return &bpf_sock_map_update_proto;
default:
@@ -3826,34 +3902,44 @@ void bpf_warn_invalid_xdp_action(u32 act)
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
-static bool __is_valid_sock_ops_access(int off, int size)
+static bool sock_ops_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
{
+ const int size_default = sizeof(__u32);
+
if (off < 0 || off >= sizeof(struct bpf_sock_ops))
return false;
+
/* The verifier guarantees that size > 0. */
if (off % size != 0)
return false;
- if (size != sizeof(__u32))
- return false;
-
- return true;
-}
-static bool sock_ops_is_valid_access(int off, int size,
- enum bpf_access_type type,
- struct bpf_insn_access_aux *info)
-{
if (type == BPF_WRITE) {
switch (off) {
- case offsetof(struct bpf_sock_ops, op) ...
- offsetof(struct bpf_sock_ops, replylong[3]):
+ case offsetof(struct bpf_sock_ops, reply):
+ case offsetof(struct bpf_sock_ops, sk_txhash):
+ if (size != size_default)
+ return false;
break;
default:
return false;
}
+ } else {
+ switch (off) {
+ case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
+ bytes_acked):
+ if (size != sizeof(__u64))
+ return false;
+ break;
+ default:
+ if (size != size_default)
+ return false;
+ break;
+ }
}
- return __is_valid_sock_ops_access(off, size);
+ return true;
}
static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
@@ -4470,10 +4556,37 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
is_fullsock));
break;
-/* Helper macro for adding read access to tcp_sock fields. */
-#define SOCK_OPS_GET_TCP32(FIELD_NAME) \
+ case offsetof(struct bpf_sock_ops, state):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_state));
+ break;
+
+ case offsetof(struct bpf_sock_ops, rtt_min):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ sizeof(struct minmax));
+ BUILD_BUG_ON(sizeof(struct minmax) <
+ sizeof(struct minmax_sample));
+
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct bpf_sock_ops_kern, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct bpf_sock_ops_kern, sk));
+ *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+ offsetof(struct tcp_sock, rtt_min) +
+ FIELD_SIZEOF(struct minmax_sample, t));
+ break;
+
+/* Helper macro for adding read access to tcp_sock or sock fields. */
+#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD_NAME) != 4); \
+ BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
+ FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
is_fullsock), \
@@ -4485,17 +4598,159 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
struct bpf_sock_ops_kern, sk),\
si->dst_reg, si->src_reg, \
offsetof(struct bpf_sock_ops_kern, sk));\
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, \
- offsetof(struct tcp_sock, FIELD_NAME)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
+ OBJ_FIELD), \
+ si->dst_reg, si->dst_reg, \
+ offsetof(OBJ, OBJ_FIELD)); \
+ } while (0)
+
+/* Helper macro for adding write access to tcp_sock or sock fields.
+ * The macro is called with two registers, dst_reg which contains a pointer
+ * to ctx (context) and src_reg which contains the value that should be
+ * stored. However, we need an additional register since we cannot overwrite
+ * dst_reg because it may be used later in the program.
+ * Instead we "borrow" one of the other register. We first save its value
+ * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
+ * it at the end of the macro.
+ */
+#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
+ do { \
+ int reg = BPF_REG_9; \
+ BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
+ FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ if (si->dst_reg == reg || si->src_reg == reg) \
+ reg--; \
+ if (si->dst_reg == reg || si->src_reg == reg) \
+ reg--; \
+ *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ temp)); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, \
+ is_fullsock), \
+ reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ is_fullsock)); \
+ *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
+ struct bpf_sock_ops_kern, sk),\
+ reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, sk));\
+ *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
+ reg, si->src_reg, \
+ offsetof(OBJ, OBJ_FIELD)); \
+ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
+ offsetof(struct bpf_sock_ops_kern, \
+ temp)); \
+ } while (0)
+
+#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
+ do { \
+ if (TYPE == BPF_WRITE) \
+ SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
+ else \
+ SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
} while (0)
case offsetof(struct bpf_sock_ops, snd_cwnd):
- SOCK_OPS_GET_TCP32(snd_cwnd);
+ SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
break;
case offsetof(struct bpf_sock_ops, srtt_us):
- SOCK_OPS_GET_TCP32(srtt_us);
+ SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
break;
+
+ case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
+ SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_ssthresh):
+ SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rcv_nxt):
+ SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_nxt):
+ SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, snd_una):
+ SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, mss_cache):
+ SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, ecn_flags):
+ SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rate_delivered):
+ SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, rate_interval_us):
+ SOCK_OPS_GET_FIELD(rate_interval_us, rate_interval_us,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, packets_out):
+ SOCK_OPS_GET_FIELD(packets_out, packets_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, retrans_out):
+ SOCK_OPS_GET_FIELD(retrans_out, retrans_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, total_retrans):
+ SOCK_OPS_GET_FIELD(total_retrans, total_retrans,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, segs_in):
+ SOCK_OPS_GET_FIELD(segs_in, segs_in, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, data_segs_in):
+ SOCK_OPS_GET_FIELD(data_segs_in, data_segs_in, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, segs_out):
+ SOCK_OPS_GET_FIELD(segs_out, segs_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, data_segs_out):
+ SOCK_OPS_GET_FIELD(data_segs_out, data_segs_out,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, lost_out):
+ SOCK_OPS_GET_FIELD(lost_out, lost_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, sacked_out):
+ SOCK_OPS_GET_FIELD(sacked_out, sacked_out, struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, sk_txhash):
+ SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
+ struct sock, type);
+ break;
+
+ case offsetof(struct bpf_sock_ops, bytes_received):
+ SOCK_OPS_GET_FIELD(bytes_received, bytes_received,
+ struct tcp_sock);
+ break;
+
+ case offsetof(struct bpf_sock_ops, bytes_acked):
+ SOCK_OPS_GET_FIELD(bytes_acked, bytes_acked, struct tcp_sock);
+ break;
+
}
return insn - insn_buf;
}
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 982861607f88..e38e641e98d5 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -92,7 +92,7 @@ static bool linkwatch_urgent_event(struct net_device *dev)
if (dev->ifindex != dev_get_iflink(dev))
return true;
- if (dev->priv_flags & IFF_TEAM_PORT)
+ if (netif_is_lag_port(dev) || netif_is_lag_master(dev))
return true;
return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7bf8b85ade16..c4a28f4667b6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -295,10 +295,31 @@ static ssize_t carrier_changes_show(struct device *dev,
struct net_device *netdev = to_net_dev(dev);
return sprintf(buf, fmt_dec,
- atomic_read(&netdev->carrier_changes));
+ atomic_read(&netdev->carrier_up_count) +
+ atomic_read(&netdev->carrier_down_count));
}
static DEVICE_ATTR_RO(carrier_changes);
+static ssize_t carrier_up_count_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+
+ return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_up_count));
+}
+static DEVICE_ATTR_RO(carrier_up_count);
+
+static ssize_t carrier_down_count_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct net_device *netdev = to_net_dev(dev);
+
+ return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_down_count));
+}
+static DEVICE_ATTR_RO(carrier_down_count);
+
/* read-write attributes */
static int change_mtu(struct net_device *dev, unsigned long new_mtu)
@@ -547,6 +568,8 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
&dev_attr_phys_port_name.attr,
&dev_attr_phys_switch_id.attr,
&dev_attr_proto_down.attr,
+ &dev_attr_carrier_up_count.attr,
+ &dev_attr_carrier_down_count.attr,
NULL,
};
ATTRIBUTE_GROUPS(net_class);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1ccb953b3b09..3cad5f51afd3 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -439,13 +439,40 @@ struct net *copy_net_ns(unsigned long flags,
return net;
}
+static void unhash_nsid(struct net *net, struct net *last)
+{
+ struct net *tmp;
+ /* This function is only called from cleanup_net() work,
+ * and this work is the only process, that may delete
+ * a net from net_namespace_list. So, when the below
+ * is executing, the list may only grow. Thus, we do not
+ * use for_each_net_rcu() or rtnl_lock().
+ */
+ for_each_net(tmp) {
+ int id;
+
+ spin_lock_bh(&tmp->nsid_lock);
+ id = __peernet2id(tmp, net);
+ if (id >= 0)
+ idr_remove(&tmp->netns_ids, id);
+ spin_unlock_bh(&tmp->nsid_lock);
+ if (id >= 0)
+ rtnl_net_notifyid(tmp, RTM_DELNSID, id);
+ if (tmp == last)
+ break;
+ }
+ spin_lock_bh(&net->nsid_lock);
+ idr_destroy(&net->netns_ids);
+ spin_unlock_bh(&net->nsid_lock);
+}
+
static DEFINE_SPINLOCK(cleanup_list_lock);
static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
static void cleanup_net(struct work_struct *work)
{
const struct pernet_operations *ops;
- struct net *net, *tmp;
+ struct net *net, *tmp, *last;
struct list_head net_kill_list;
LIST_HEAD(net_exit_list);
@@ -458,26 +485,25 @@ static void cleanup_net(struct work_struct *work)
/* Don't let anyone else find us. */
rtnl_lock();
- list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ list_for_each_entry(net, &net_kill_list, cleanup_list)
list_del_rcu(&net->list);
- list_add_tail(&net->exit_list, &net_exit_list);
- for_each_net(tmp) {
- int id;
-
- spin_lock_bh(&tmp->nsid_lock);
- id = __peernet2id(tmp, net);
- if (id >= 0)
- idr_remove(&tmp->netns_ids, id);
- spin_unlock_bh(&tmp->nsid_lock);
- if (id >= 0)
- rtnl_net_notifyid(tmp, RTM_DELNSID, id);
- }
- spin_lock_bh(&net->nsid_lock);
- idr_destroy(&net->netns_ids);
- spin_unlock_bh(&net->nsid_lock);
+ /* Cache last net. After we unlock rtnl, no one new net
+ * added to net_namespace_list can assign nsid pointer
+ * to a net from net_kill_list (see peernet2id_alloc()).
+ * So, we skip them in unhash_nsid().
+ *
+ * Note, that unhash_nsid() does not delete nsid links
+ * between net_kill_list's nets, as they've already
+ * deleted from net_namespace_list. But, this would be
+ * useless anyway, as netns_ids are destroyed there.
+ */
+ last = list_last_entry(&net_namespace_list, struct net, list);
+ rtnl_unlock();
+ list_for_each_entry(net, &net_kill_list, cleanup_list) {
+ unhash_nsid(net, last);
+ list_add_tail(&net->exit_list, &net_exit_list);
}
- rtnl_unlock();
/*
* Another CPU might be rcu-iterating the list, wait for it.
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 4fcfcb14e7c6..b8ab5c829511 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -184,25 +184,44 @@
#define func_enter() pr_debug("entering %s\n", __func__);
+#define PKT_FLAGS \
+ pf(IPV6) /* Interface in IPV6 Mode */ \
+ pf(IPSRC_RND) /* IP-Src Random */ \
+ pf(IPDST_RND) /* IP-Dst Random */ \
+ pf(TXSIZE_RND) /* Transmit size is random */ \
+ pf(UDPSRC_RND) /* UDP-Src Random */ \
+ pf(UDPDST_RND) /* UDP-Dst Random */ \
+ pf(UDPCSUM) /* Include UDP checksum */ \
+ pf(NO_TIMESTAMP) /* Don't timestamp packets (default TS) */ \
+ pf(MPLS_RND) /* Random MPLS labels */ \
+ pf(QUEUE_MAP_RND) /* queue map Random */ \
+ pf(QUEUE_MAP_CPU) /* queue map mirrors smp_processor_id() */ \
+ pf(FLOW_SEQ) /* Sequential flows */ \
+ pf(IPSEC) /* ipsec on for flows */ \
+ pf(MACSRC_RND) /* MAC-Src Random */ \
+ pf(MACDST_RND) /* MAC-Dst Random */ \
+ pf(VID_RND) /* Random VLAN ID */ \
+ pf(SVID_RND) /* Random SVLAN ID */ \
+ pf(NODE) /* Node memory alloc*/ \
+
+#define pf(flag) flag##_SHIFT,
+enum pkt_flags {
+ PKT_FLAGS
+};
+#undef pf
+
/* Device flag bits */
-#define F_IPSRC_RND (1<<0) /* IP-Src Random */
-#define F_IPDST_RND (1<<1) /* IP-Dst Random */
-#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */
-#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */
-#define F_MACSRC_RND (1<<4) /* MAC-Src Random */
-#define F_MACDST_RND (1<<5) /* MAC-Dst Random */
-#define F_TXSIZE_RND (1<<6) /* Transmit size is random */
-#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */
-#define F_MPLS_RND (1<<8) /* Random MPLS labels */
-#define F_VID_RND (1<<9) /* Random VLAN ID */
-#define F_SVID_RND (1<<10) /* Random SVLAN ID */
-#define F_FLOW_SEQ (1<<11) /* Sequential flows */
-#define F_IPSEC_ON (1<<12) /* ipsec on for flows */
-#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */
-#define F_QUEUE_MAP_CPU (1<<14) /* queue map mirrors smp_processor_id() */
-#define F_NODE (1<<15) /* Node memory alloc*/
-#define F_UDPCSUM (1<<16) /* Include UDP checksum */
-#define F_NO_TIMESTAMP (1<<17) /* Don't timestamp packets (default TS) */
+#define pf(flag) static const __u32 F_##flag = (1<<flag##_SHIFT);
+PKT_FLAGS
+#undef pf
+
+#define pf(flag) __stringify(flag),
+static char *pkt_flag_names[] = {
+ PKT_FLAGS
+};
+#undef pf
+
+#define NR_PKT_FLAGS ARRAY_SIZE(pkt_flag_names)
/* Thread control flag bits */
#define T_STOP (1<<0) /* Stop run */
@@ -534,6 +553,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
{
const struct pktgen_dev *pkt_dev = seq->private;
ktime_t stopped;
+ unsigned int i;
u64 idle;
seq_printf(seq,
@@ -595,7 +615,6 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
pkt_dev->src_mac_count, pkt_dev->dst_mac_count);
if (pkt_dev->nr_labels) {
- unsigned int i;
seq_puts(seq, " mpls: ");
for (i = 0; i < pkt_dev->nr_labels; i++)
seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]),
@@ -631,68 +650,21 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, " Flags: ");
- if (pkt_dev->flags & F_IPV6)
- seq_puts(seq, "IPV6 ");
-
- if (pkt_dev->flags & F_IPSRC_RND)
- seq_puts(seq, "IPSRC_RND ");
-
- if (pkt_dev->flags & F_IPDST_RND)
- seq_puts(seq, "IPDST_RND ");
-
- if (pkt_dev->flags & F_TXSIZE_RND)
- seq_puts(seq, "TXSIZE_RND ");
-
- if (pkt_dev->flags & F_UDPSRC_RND)
- seq_puts(seq, "UDPSRC_RND ");
-
- if (pkt_dev->flags & F_UDPDST_RND)
- seq_puts(seq, "UDPDST_RND ");
-
- if (pkt_dev->flags & F_UDPCSUM)
- seq_puts(seq, "UDPCSUM ");
-
- if (pkt_dev->flags & F_NO_TIMESTAMP)
- seq_puts(seq, "NO_TIMESTAMP ");
-
- if (pkt_dev->flags & F_MPLS_RND)
- seq_puts(seq, "MPLS_RND ");
-
- if (pkt_dev->flags & F_QUEUE_MAP_RND)
- seq_puts(seq, "QUEUE_MAP_RND ");
+ for (i = 0; i < NR_PKT_FLAGS; i++) {
+ if (i == F_FLOW_SEQ)
+ if (!pkt_dev->cflows)
+ continue;
- if (pkt_dev->flags & F_QUEUE_MAP_CPU)
- seq_puts(seq, "QUEUE_MAP_CPU ");
-
- if (pkt_dev->cflows) {
- if (pkt_dev->flags & F_FLOW_SEQ)
- seq_puts(seq, "FLOW_SEQ "); /*in sequence flows*/
- else
- seq_puts(seq, "FLOW_RND ");
- }
+ if (pkt_dev->flags & (1 << i))
+ seq_printf(seq, "%s ", pkt_flag_names[i]);
+ else if (i == F_FLOW_SEQ)
+ seq_puts(seq, "FLOW_RND ");
#ifdef CONFIG_XFRM
- if (pkt_dev->flags & F_IPSEC_ON) {
- seq_puts(seq, "IPSEC ");
- if (pkt_dev->spi)
+ if (i == F_IPSEC && pkt_dev->spi)
seq_printf(seq, "spi:%u", pkt_dev->spi);
- }
#endif
-
- if (pkt_dev->flags & F_MACSRC_RND)
- seq_puts(seq, "MACSRC_RND ");
-
- if (pkt_dev->flags & F_MACDST_RND)
- seq_puts(seq, "MACDST_RND ");
-
- if (pkt_dev->flags & F_VID_RND)
- seq_puts(seq, "VID_RND ");
-
- if (pkt_dev->flags & F_SVID_RND)
- seq_puts(seq, "SVID_RND ");
-
- if (pkt_dev->flags & F_NODE)
- seq_puts(seq, "NODE_ALLOC ");
+ }
seq_puts(seq, "\n");
@@ -858,6 +830,35 @@ static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev)
return i;
}
+static __u32 pktgen_read_flag(const char *f, bool *disable)
+{
+ __u32 i;
+
+ if (f[0] == '!') {
+ *disable = true;
+ f++;
+ }
+
+ for (i = 0; i < NR_PKT_FLAGS; i++) {
+ if (!IS_ENABLED(CONFIG_XFRM) && i == IPSEC_SHIFT)
+ continue;
+
+ /* allow only disabling ipv6 flag */
+ if (!*disable && i == IPV6_SHIFT)
+ continue;
+
+ if (strcmp(f, pkt_flag_names[i]) == 0)
+ return 1 << i;
+ }
+
+ if (strcmp(f, "FLOW_RND") == 0) {
+ *disable = !*disable;
+ return F_FLOW_SEQ;
+ }
+
+ return 0;
+}
+
static ssize_t pktgen_if_write(struct file *file,
const char __user * user_buffer, size_t count,
loff_t * offset)
@@ -1215,7 +1216,10 @@ static ssize_t pktgen_if_write(struct file *file,
return count;
}
if (!strcmp(name, "flag")) {
+ __u32 flag;
char f[32];
+ bool disable = false;
+
memset(f, 0, 32);
len = strn_len(&user_buffer[i], sizeof(f) - 1);
if (len < 0)
@@ -1224,107 +1228,15 @@ static ssize_t pktgen_if_write(struct file *file,
if (copy_from_user(f, &user_buffer[i], len))
return -EFAULT;
i += len;
- if (strcmp(f, "IPSRC_RND") == 0)
- pkt_dev->flags |= F_IPSRC_RND;
-
- else if (strcmp(f, "!IPSRC_RND") == 0)
- pkt_dev->flags &= ~F_IPSRC_RND;
-
- else if (strcmp(f, "TXSIZE_RND") == 0)
- pkt_dev->flags |= F_TXSIZE_RND;
-
- else if (strcmp(f, "!TXSIZE_RND") == 0)
- pkt_dev->flags &= ~F_TXSIZE_RND;
-
- else if (strcmp(f, "IPDST_RND") == 0)
- pkt_dev->flags |= F_IPDST_RND;
-
- else if (strcmp(f, "!IPDST_RND") == 0)
- pkt_dev->flags &= ~F_IPDST_RND;
-
- else if (strcmp(f, "UDPSRC_RND") == 0)
- pkt_dev->flags |= F_UDPSRC_RND;
-
- else if (strcmp(f, "!UDPSRC_RND") == 0)
- pkt_dev->flags &= ~F_UDPSRC_RND;
-
- else if (strcmp(f, "UDPDST_RND") == 0)
- pkt_dev->flags |= F_UDPDST_RND;
-
- else if (strcmp(f, "!UDPDST_RND") == 0)
- pkt_dev->flags &= ~F_UDPDST_RND;
-
- else if (strcmp(f, "MACSRC_RND") == 0)
- pkt_dev->flags |= F_MACSRC_RND;
-
- else if (strcmp(f, "!MACSRC_RND") == 0)
- pkt_dev->flags &= ~F_MACSRC_RND;
-
- else if (strcmp(f, "MACDST_RND") == 0)
- pkt_dev->flags |= F_MACDST_RND;
-
- else if (strcmp(f, "!MACDST_RND") == 0)
- pkt_dev->flags &= ~F_MACDST_RND;
-
- else if (strcmp(f, "MPLS_RND") == 0)
- pkt_dev->flags |= F_MPLS_RND;
-
- else if (strcmp(f, "!MPLS_RND") == 0)
- pkt_dev->flags &= ~F_MPLS_RND;
- else if (strcmp(f, "VID_RND") == 0)
- pkt_dev->flags |= F_VID_RND;
+ flag = pktgen_read_flag(f, &disable);
- else if (strcmp(f, "!VID_RND") == 0)
- pkt_dev->flags &= ~F_VID_RND;
-
- else if (strcmp(f, "SVID_RND") == 0)
- pkt_dev->flags |= F_SVID_RND;
-
- else if (strcmp(f, "!SVID_RND") == 0)
- pkt_dev->flags &= ~F_SVID_RND;
-
- else if (strcmp(f, "FLOW_SEQ") == 0)
- pkt_dev->flags |= F_FLOW_SEQ;
-
- else if (strcmp(f, "QUEUE_MAP_RND") == 0)
- pkt_dev->flags |= F_QUEUE_MAP_RND;
-
- else if (strcmp(f, "!QUEUE_MAP_RND") == 0)
- pkt_dev->flags &= ~F_QUEUE_MAP_RND;
-
- else if (strcmp(f, "QUEUE_MAP_CPU") == 0)
- pkt_dev->flags |= F_QUEUE_MAP_CPU;
-
- else if (strcmp(f, "!QUEUE_MAP_CPU") == 0)
- pkt_dev->flags &= ~F_QUEUE_MAP_CPU;
-#ifdef CONFIG_XFRM
- else if (strcmp(f, "IPSEC") == 0)
- pkt_dev->flags |= F_IPSEC_ON;
-#endif
-
- else if (strcmp(f, "!IPV6") == 0)
- pkt_dev->flags &= ~F_IPV6;
-
- else if (strcmp(f, "NODE_ALLOC") == 0)
- pkt_dev->flags |= F_NODE;
-
- else if (strcmp(f, "!NODE_ALLOC") == 0)
- pkt_dev->flags &= ~F_NODE;
-
- else if (strcmp(f, "UDPCSUM") == 0)
- pkt_dev->flags |= F_UDPCSUM;
-
- else if (strcmp(f, "!UDPCSUM") == 0)
- pkt_dev->flags &= ~F_UDPCSUM;
-
- else if (strcmp(f, "NO_TIMESTAMP") == 0)
- pkt_dev->flags |= F_NO_TIMESTAMP;
-
- else if (strcmp(f, "!NO_TIMESTAMP") == 0)
- pkt_dev->flags &= ~F_NO_TIMESTAMP;
-
- else {
+ if (flag) {
+ if (disable)
+ pkt_dev->flags &= ~flag;
+ else
+ pkt_dev->flags |= flag;
+ } else {
sprintf(pg_result,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
@@ -2541,7 +2453,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
pkt_dev->flows[flow].cur_daddr =
pkt_dev->cur_daddr;
#ifdef CONFIG_XFRM
- if (pkt_dev->flags & F_IPSEC_ON)
+ if (pkt_dev->flags & F_IPSEC)
get_ipsec_sa(pkt_dev, flow);
#endif
pkt_dev->nflows++;
@@ -2646,7 +2558,7 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
static int process_ipsec(struct pktgen_dev *pkt_dev,
struct sk_buff *skb, __be16 protocol)
{
- if (pkt_dev->flags & F_IPSEC_ON) {
+ if (pkt_dev->flags & F_IPSEC) {
struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
int nhead = 0;
if (x) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 16d644a4f974..97874daa1336 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -990,6 +990,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_NEW_NETNSID */
+ nla_total_size(1) /* IFLA_PROTO_DOWN */
+ nla_total_size(4) /* IFLA_IF_NETNSID */
+ + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */
+ + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */
+ 0;
}
@@ -1551,8 +1553,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
- atomic_read(&dev->carrier_changes)) ||
- nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
+ atomic_read(&dev->carrier_up_count) +
+ atomic_read(&dev->carrier_down_count)) ||
+ nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down) ||
+ nla_put_u32(skb, IFLA_CARRIER_UP_COUNT,
+ atomic_read(&dev->carrier_up_count)) ||
+ nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT,
+ atomic_read(&dev->carrier_down_count)))
goto nla_put_failure;
if (event != IFLA_EVENT_NONE) {
@@ -1656,6 +1663,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_EVENT] = { .type = NLA_U32 },
[IFLA_GROUP] = { .type = NLA_U32 },
[IFLA_IF_NETNSID] = { .type = NLA_S32 },
+ [IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 },
+ [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 21f9bed11988..adf50fbc4c13 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -271,13 +271,12 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
case DSA_PORT_TYPE_DSA:
- err = dsa_port_fixed_link_register_of(dp);
+ err = dsa_port_link_register_of(dp);
if (err) {
- dev_err(ds->dev, "failed to register fixed link for port %d.%d\n",
+ dev_err(ds->dev, "failed to setup link for port %d.%d\n",
ds->index, dp->index);
return err;
}
-
break;
case DSA_PORT_TYPE_USER:
err = dsa_slave_create(dp);
@@ -301,7 +300,7 @@ static void dsa_port_teardown(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
case DSA_PORT_TYPE_DSA:
- dsa_port_fixed_link_unregister_of(dp);
+ dsa_port_link_unregister_of(dp);
break;
case DSA_PORT_TYPE_USER:
if (dp->slave) {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index cefb0c3c6d51..70de7895e5b8 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -166,8 +166,8 @@ int dsa_port_vlan_add(struct dsa_port *dp,
struct switchdev_trans *trans);
int dsa_port_vlan_del(struct dsa_port *dp,
const struct switchdev_obj_port_vlan *vlan);
-int dsa_port_fixed_link_register_of(struct dsa_port *dp);
-void dsa_port_fixed_link_unregister_of(struct dsa_port *dp);
+int dsa_port_link_register_of(struct dsa_port *dp);
+void dsa_port_link_unregister_of(struct dsa_port *dp);
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index aa56d3fb5da4..cb54b81d0bd9 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -86,7 +86,7 @@ static int dsa_cpu_dsa_setups(struct dsa_switch *ds)
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- ret = dsa_port_fixed_link_register_of(&ds->ports[port]);
+ ret = dsa_port_link_register_of(&ds->ports[port]);
if (ret)
return ret;
}
@@ -275,7 +275,7 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
for (port = 0; port < ds->num_ports; port++) {
if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)))
continue;
- dsa_port_fixed_link_unregister_of(&ds->ports[port]);
+ dsa_port_link_unregister_of(&ds->ports[port]);
}
if (ds->slave_mii_bus && ds->ops->phy_read)
diff --git a/net/dsa/port.c b/net/dsa/port.c
index bb4be2679904..7acc1169d75e 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -273,7 +273,56 @@ int dsa_port_vlan_del(struct dsa_port *dp,
return 0;
}
-int dsa_port_fixed_link_register_of(struct dsa_port *dp)
+static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
+{
+ struct device_node *port_dn = dp->dn;
+ struct device_node *phy_dn;
+ struct dsa_switch *ds = dp->ds;
+ struct phy_device *phydev;
+ int port = dp->index;
+ int err = 0;
+
+ phy_dn = of_parse_phandle(port_dn, "phy-handle", 0);
+ if (!phy_dn)
+ return 0;
+
+ phydev = of_phy_find_device(phy_dn);
+ if (!phydev) {
+ err = -EPROBE_DEFER;
+ goto err_put_of;
+ }
+
+ if (enable) {
+ err = genphy_config_init(phydev);
+ if (err < 0)
+ goto err_put_dev;
+
+ err = genphy_resume(phydev);
+ if (err < 0)
+ goto err_put_dev;
+
+ err = genphy_read_status(phydev);
+ if (err < 0)
+ goto err_put_dev;
+ } else {
+ err = genphy_suspend(phydev);
+ if (err < 0)
+ goto err_put_dev;
+ }
+
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
+
+ dev_dbg(ds->dev, "enabled port's phy: %s", phydev_name(phydev));
+
+err_put_dev:
+ put_device(&phydev->mdio.dev);
+err_put_of:
+ of_node_put(phy_dn);
+ return err;
+}
+
+static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
{
struct device_node *dn = dp->dn;
struct dsa_switch *ds = dp->ds;
@@ -282,38 +331,44 @@ int dsa_port_fixed_link_register_of(struct dsa_port *dp)
int mode;
int err;
- if (of_phy_is_fixed_link(dn)) {
- err = of_phy_register_fixed_link(dn);
- if (err) {
- dev_err(ds->dev,
- "failed to register the fixed PHY of port %d\n",
- port);
- return err;
- }
+ err = of_phy_register_fixed_link(dn);
+ if (err) {
+ dev_err(ds->dev,
+ "failed to register the fixed PHY of port %d\n",
+ port);
+ return err;
+ }
- phydev = of_phy_find_device(dn);
+ phydev = of_phy_find_device(dn);
- mode = of_get_phy_mode(dn);
- if (mode < 0)
- mode = PHY_INTERFACE_MODE_NA;
- phydev->interface = mode;
+ mode = of_get_phy_mode(dn);
+ if (mode < 0)
+ mode = PHY_INTERFACE_MODE_NA;
+ phydev->interface = mode;
- genphy_config_init(phydev);
- genphy_read_status(phydev);
+ genphy_config_init(phydev);
+ genphy_read_status(phydev);
- if (ds->ops->adjust_link)
- ds->ops->adjust_link(ds, port, phydev);
+ if (ds->ops->adjust_link)
+ ds->ops->adjust_link(ds, port, phydev);
- put_device(&phydev->mdio.dev);
- }
+ put_device(&phydev->mdio.dev);
return 0;
}
-void dsa_port_fixed_link_unregister_of(struct dsa_port *dp)
+int dsa_port_link_register_of(struct dsa_port *dp)
{
- struct device_node *dn = dp->dn;
+ if (of_phy_is_fixed_link(dp->dn))
+ return dsa_port_fixed_link_register_of(dp);
+ else
+ return dsa_port_setup_phy_of(dp, true);
+}
- if (of_phy_is_fixed_link(dn))
- of_phy_deregister_fixed_link(dn);
+void dsa_port_link_unregister_of(struct dsa_port *dp)
+{
+ if (of_phy_is_fixed_link(dp->dn))
+ of_phy_deregister_fixed_link(dp->dn);
+ else
+ dsa_port_setup_phy_of(dp, false);
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 54cccdd8b1e3..c24008daa3d8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -872,6 +872,9 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
struct sock *sk = sock->sk;
int err = 0;
struct net *net = sock_net(sk);
+ void __user *p = (void __user *)arg;
+ struct ifreq ifr;
+ struct rtentry rt;
switch (cmd) {
case SIOCGSTAMP:
@@ -882,8 +885,12 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
case SIOCADDRT:
case SIOCDELRT:
+ if (copy_from_user(&rt, p, sizeof(struct rtentry)))
+ return -EFAULT;
+ err = ip_rt_ioctl(net, cmd, &rt);
+ break;
case SIOCRTMSG:
- err = ip_rt_ioctl(net, cmd, (void __user *)arg);
+ err = -EINVAL;
break;
case SIOCDARP:
case SIOCGARP:
@@ -891,17 +898,26 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
err = arp_ioctl(net, cmd, (void __user *)arg);
break;
case SIOCGIFADDR:
- case SIOCSIFADDR:
case SIOCGIFBRDADDR:
- case SIOCSIFBRDADDR:
case SIOCGIFNETMASK:
- case SIOCSIFNETMASK:
case SIOCGIFDSTADDR:
+ case SIOCGIFPFLAGS:
+ if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = devinet_ioctl(net, cmd, &ifr);
+ if (!err && copy_to_user(p, &ifr, sizeof(struct ifreq)))
+ err = -EFAULT;
+ break;
+
+ case SIOCSIFADDR:
+ case SIOCSIFBRDADDR:
+ case SIOCSIFNETMASK:
case SIOCSIFDSTADDR:
case SIOCSIFPFLAGS:
- case SIOCGIFPFLAGS:
case SIOCSIFFLAGS:
- err = devinet_ioctl(net, cmd, (void __user *)arg);
+ if (copy_from_user(&ifr, p, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = devinet_ioctl(net, cmd, &ifr);
break;
default:
if (sk->sk_prot->ioctl)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7a93359fbc72..e056c0067f2c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -946,11 +946,10 @@ static int inet_abc_len(__be32 addr)
}
-int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int devinet_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr)
{
- struct ifreq ifr;
struct sockaddr_in sin_orig;
- struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
+ struct sockaddr_in *sin = (struct sockaddr_in *)&ifr->ifr_addr;
struct in_device *in_dev;
struct in_ifaddr **ifap = NULL;
struct in_ifaddr *ifa = NULL;
@@ -959,22 +958,16 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
int ret = -EFAULT;
int tryaddrmatch = 0;
- /*
- * Fetch the caller's info block into kernel space
- */
-
- if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
- goto out;
- ifr.ifr_name[IFNAMSIZ - 1] = 0;
+ ifr->ifr_name[IFNAMSIZ - 1] = 0;
/* save original address for comparison */
memcpy(&sin_orig, sin, sizeof(*sin));
- colon = strchr(ifr.ifr_name, ':');
+ colon = strchr(ifr->ifr_name, ':');
if (colon)
*colon = 0;
- dev_load(net, ifr.ifr_name);
+ dev_load(net, ifr->ifr_name);
switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
@@ -1014,7 +1007,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
rtnl_lock();
ret = -ENODEV;
- dev = __dev_get_by_name(net, ifr.ifr_name);
+ dev = __dev_get_by_name(net, ifr->ifr_name);
if (!dev)
goto done;
@@ -1031,7 +1024,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
This is checked above. */
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next) {
- if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
+ if (!strcmp(ifr->ifr_name, ifa->ifa_label) &&
sin_orig.sin_addr.s_addr ==
ifa->ifa_local) {
break; /* found */
@@ -1044,7 +1037,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ifa) {
for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
ifap = &ifa->ifa_next)
- if (!strcmp(ifr.ifr_name, ifa->ifa_label))
+ if (!strcmp(ifr->ifr_name, ifa->ifa_label))
break;
}
}
@@ -1056,19 +1049,19 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
switch (cmd) {
case SIOCGIFADDR: /* Get interface address */
sin->sin_addr.s_addr = ifa->ifa_local;
- goto rarok;
+ break;
case SIOCGIFBRDADDR: /* Get the broadcast address */
sin->sin_addr.s_addr = ifa->ifa_broadcast;
- goto rarok;
+ break;
case SIOCGIFDSTADDR: /* Get the destination address */
sin->sin_addr.s_addr = ifa->ifa_address;
- goto rarok;
+ break;
case SIOCGIFNETMASK: /* Get the netmask for the interface */
sin->sin_addr.s_addr = ifa->ifa_mask;
- goto rarok;
+ break;
case SIOCSIFFLAGS:
if (colon) {
@@ -1076,11 +1069,11 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ifa)
break;
ret = 0;
- if (!(ifr.ifr_flags & IFF_UP))
+ if (!(ifr->ifr_flags & IFF_UP))
inet_del_ifa(in_dev, ifap, 1);
break;
}
- ret = dev_change_flags(dev, ifr.ifr_flags);
+ ret = dev_change_flags(dev, ifr->ifr_flags);
break;
case SIOCSIFADDR: /* Set interface address (and family) */
@@ -1095,7 +1088,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
break;
INIT_HLIST_NODE(&ifa->hash);
if (colon)
- memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
+ memcpy(ifa->ifa_label, ifr->ifr_name, IFNAMSIZ);
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
} else {
@@ -1182,28 +1175,27 @@ done:
rtnl_unlock();
out:
return ret;
-rarok:
- rtnl_unlock();
- ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
- goto out;
}
-static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
+static int inet_gifconf(struct net_device *dev, char __user *buf, int len, int size)
{
struct in_device *in_dev = __in_dev_get_rtnl(dev);
struct in_ifaddr *ifa;
struct ifreq ifr;
int done = 0;
+ if (WARN_ON(size > sizeof(struct ifreq)))
+ goto out;
+
if (!in_dev)
goto out;
for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
if (!buf) {
- done += sizeof(ifr);
+ done += size;
continue;
}
- if (len < (int) sizeof(ifr))
+ if (len < size)
break;
memset(&ifr, 0, sizeof(struct ifreq));
strcpy(ifr.ifr_name, ifa->ifa_label);
@@ -1212,13 +1204,12 @@ static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
(*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
ifa->ifa_local;
- if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
+ if (copy_to_user(buf + done, &ifr, size)) {
done = -EFAULT;
break;
}
- buf += sizeof(struct ifreq);
- len -= sizeof(struct ifreq);
- done += sizeof(struct ifreq);
+ len -= size;
+ done += size;
}
out:
return done;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 32fbd9ba3609..da5635fc52c2 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -118,6 +118,9 @@ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
if (!xo)
return ERR_PTR(-EINVAL);
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
+ return ERR_PTR(-EINVAL);
+
x = skb->sp->xvec[skb->sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 08259d078b1c..f05afaf3235c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -587,10 +587,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
* Handle IP routing ioctl calls.
* These are used to manipulate the routing tables
*/
-int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
{
struct fib_config cfg;
- struct rtentry rt;
int err;
switch (cmd) {
@@ -599,11 +598,8 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&rt, arg, sizeof(rt)))
- return -EFAULT;
-
rtnl_lock();
- err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
+ err = rtentry_to_fib_config(net, cmd, rt, &cfg);
if (err == 0) {
struct fib_table *tb;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 02f00be12bb0..10f7f74a0831 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct net_device *dev,
return htonl(INADDR_ANY);
for_ifa(in_dev) {
- if (inet_ifa_match(fl4->saddr, ifa))
+ if (fl4->saddr == ifa->ifa_local)
return fl4->saddr;
} endfor_ifa(in_dev);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index b61f2285816d..6ec670fbbbdd 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -114,7 +114,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
static int ipgre_tunnel_init(struct net_device *dev);
static void erspan_build_header(struct sk_buff *skb,
- __be32 id, u32 index,
+ u32 id, u32 index,
bool truncate, bool is_ipv4);
static unsigned int ipgre_net_id __read_mostly;
@@ -273,12 +273,12 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
iph = ip_hdr(skb);
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
- ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
+ ver = ershdr->ver;
/* The original GRE header does not have key field,
* Use ERSPAN 10-bit session ID as key.
*/
- tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
@@ -324,14 +324,8 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
if (ver == 1) {
tunnel->index = ntohl(pkt_md->u.index);
} else {
- u16 md2_flags;
- u16 dir, hwid;
-
- md2_flags = ntohs(pkt_md->u.md2.flags);
- dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
- tunnel->dir = dir;
- tunnel->hwid = hwid;
+ tunnel->dir = pkt_md->u.md2.dir;
+ tunnel->hwid = get_hwid(&pkt_md->u.md2);
}
}
@@ -615,19 +609,14 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
}
if (version == 1) {
- erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+ erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
ntohl(md->u.index), truncate, true);
} else if (version == 2) {
- u16 md2_flags;
- u8 direction;
- u16 hwid;
-
- md2_flags = ntohs(md->u.md2.flags);
- direction = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
-
- erspan_build_header_v2(skb, tunnel_id_to_key32(key->tun_id),
- direction, hwid, truncate, true);
+ erspan_build_header_v2(skb,
+ ntohl(tunnel_id_to_key32(key->tun_id)),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, true);
} else {
goto err_free_rt;
}
@@ -733,10 +722,11 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
/* Push ERSPAN header */
if (tunnel->erspan_ver == 1)
- erspan_build_header(skb, tunnel->parms.o_key, tunnel->index,
+ erspan_build_header(skb, ntohl(tunnel->parms.o_key),
+ tunnel->index,
truncate, true);
else
- erspan_build_header_v2(skb, tunnel->parms.o_key,
+ erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
tunnel->dir, tunnel->hwid,
truncate, true);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 60fb1eb7d7d8..6cc70fa488cb 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -808,6 +808,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
{
struct net_device *dev = NULL;
int ifindex;
+ int midx;
if (optlen != sizeof(int))
goto e_inval;
@@ -823,10 +824,13 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EADDRNOTAVAIL;
if (!dev)
break;
+
+ midx = l3mdev_master_ifindex(dev);
dev_put(dev);
err = -EINVAL;
- if (sk->sk_bound_dev_if)
+ if (sk->sk_bound_dev_if &&
+ (!midx || midx != sk->sk_bound_dev_if))
break;
inet->uc_index = ifindex;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5ddb1cb52bd4..141f5e865731 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -711,9 +711,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
- init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ if (tunnel->fwmark) {
+ init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ tunnel->fwmark);
+ }
+ else {
+ init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
+ skb->mark);
+ }
if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
goto tx_error;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e9e488e72900..f75802ad960f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -329,39 +329,6 @@ set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port)
sin->sin_port = port;
}
-static int __init ic_devinet_ioctl(unsigned int cmd, struct ifreq *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
-static int __init ic_dev_ioctl(unsigned int cmd, struct ifreq *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = dev_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
-static int __init ic_route_ioctl(unsigned int cmd, struct rtentry *arg)
-{
- int res;
-
- mm_segment_t oldfs = get_fs();
- set_fs(get_ds());
- res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
- set_fs(oldfs);
- return res;
-}
-
/*
* Set up interface addresses and routes.
*/
@@ -375,19 +342,19 @@ static int __init ic_setup_if(void)
memset(&ir, 0, sizeof(ir));
strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name);
set_sockaddr(sin, ic_myaddr, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFADDR, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface address (%d)\n",
err);
return -1;
}
set_sockaddr(sin, ic_netmask, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFNETMASK, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFNETMASK, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface netmask (%d)\n",
err);
return -1;
}
set_sockaddr(sin, ic_myaddr | ~ic_netmask, 0);
- if ((err = ic_devinet_ioctl(SIOCSIFBRDADDR, &ir)) < 0) {
+ if ((err = devinet_ioctl(&init_net, SIOCSIFBRDADDR, &ir)) < 0) {
pr_err("IP-Config: Unable to set interface broadcast address (%d)\n",
err);
return -1;
@@ -397,11 +364,11 @@ static int __init ic_setup_if(void)
* out, we'll try to muddle along.
*/
if (ic_dev_mtu != 0) {
- strcpy(ir.ifr_name, ic_dev->dev->name);
- ir.ifr_mtu = ic_dev_mtu;
- if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
+ rtnl_lock();
+ if ((err = dev_set_mtu(ic_dev->dev, ic_dev_mtu)) < 0)
pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
ic_dev_mtu, err);
+ rtnl_unlock();
}
return 0;
}
@@ -423,7 +390,7 @@ static int __init ic_setup_routes(void)
set_sockaddr((struct sockaddr_in *) &rm.rt_genmask, 0, 0);
set_sockaddr((struct sockaddr_in *) &rm.rt_gateway, ic_gateway, 0);
rm.rt_flags = RTF_UP | RTF_GATEWAY;
- if ((err = ic_route_ioctl(SIOCADDRT, &rm)) < 0) {
+ if ((err = ip_rt_ioctl(&init_net, SIOCADDRT, &rm)) < 0) {
pr_err("IP-Config: Cannot add default route (%d)\n",
err);
return -1;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 136544b36a46..7c509697ebc7 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -617,8 +617,21 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
- } else if (!ipc.oif)
+ } else if (!ipc.oif) {
ipc.oif = inet->uc_index;
+ } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
+ /* oif is set, packet is to local broadcast and
+ * and uc_index is set. oif is most likely set
+ * by sk_bound_dev_if. If uc_index != oif check if the
+ * oif is an L3 master and uc_index is an L3 slave.
+ * If so, we want to allow the send using the uc_index.
+ */
+ if (ipc.oif != inet->uc_index &&
+ ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
+ inet->uc_index)) {
+ ipc.oif = inet->uc_index;
+ }
+ }
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d7cf861bf699..f013ddc191e0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -463,7 +463,7 @@ void tcp_init_transfer(struct sock *sk, int bpf_op)
tcp_mtup_init(sk);
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_metrics(sk);
- tcp_call_bpf(sk, bpf_op);
+ tcp_call_bpf(sk, bpf_op, 0, NULL);
tcp_init_congestion_control(sk);
tcp_init_buffer_space(sk);
}
@@ -2042,6 +2042,30 @@ void tcp_set_state(struct sock *sk, int state)
{
int oldstate = sk->sk_state;
+ /* We defined a new enum for TCP states that are exported in BPF
+ * so as not force the internal TCP states to be frozen. The
+ * following checks will detect if an internal state value ever
+ * differs from the BPF value. If this ever happens, then we will
+ * need to remap the internal value to the BPF value before calling
+ * tcp_call_bpf_2arg.
+ */
+ BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED);
+ BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT);
+ BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1);
+ BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2);
+ BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT);
+ BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK);
+ BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
+ BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
+ BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
+
+ if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG))
+ tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state);
+
switch (state) {
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED)
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 0b5a05bd82e3..ddbce73edae8 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -146,7 +146,7 @@ static void tcpnv_init(struct sock *sk)
* within a datacenter, where we have reasonable estimates of
* RTTs
*/
- base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT);
+ base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT, 0, NULL);
if (base_rtt > 0) {
ca->nv_base_rtt = base_rtt;
ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b6a2aa1dcf56..4d58e2ce0b5b 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buff *skb, unsigned int ts_seq,
static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 95461f02ac9a..e9f985e42405 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2905,6 +2905,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RETRANS_CB_FLAG))
+ tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RETRANS_CB,
+ TCP_SKB_CB(skb)->seq, segs, err);
+
if (likely(!err)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
trace_tcp_retransmit_skb(sk, skb);
@@ -3469,7 +3473,7 @@ int tcp_connect(struct sock *sk)
struct sk_buff *buff;
int err;
- tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
+ tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB, 0, NULL);
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
return -EHOSTUNREACH; /* Routing failure or similar. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 6db3124cdbda..257abdde23b0 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -213,11 +213,18 @@ static int tcp_write_timeout(struct sock *sk)
icsk->icsk_user_timeout);
}
tcp_fastopen_active_detect_blackhole(sk, expired);
+
+ if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
+ tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,
+ icsk->icsk_retransmits,
+ icsk->icsk_rto, (int)expired);
+
if (expired) {
/* Has it gone just too far? */
tcp_write_err(sk);
return 1;
}
+
return 0;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 853321555a4e..3f018f34cf56 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -977,8 +977,21 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!saddr)
saddr = inet->mc_addr;
connected = 0;
- } else if (!ipc.oif)
+ } else if (!ipc.oif) {
ipc.oif = inet->uc_index;
+ } else if (ipv4_is_lbcast(daddr) && inet->uc_index) {
+ /* oif is set, packet is to local broadcast and
+ * and uc_index is set. oif is most likely set
+ * by sk_bound_dev_if. If uc_index != oif check if the
+ * oif is an L3 master and uc_index is an L3 slave.
+ * If so, we want to allow the send using the uc_index.
+ */
+ if (ipc.oif != inet->uc_index &&
+ ipc.oif == l3mdev_master_ifindex_by_index(sock_net(sk),
+ inet->uc_index)) {
+ ipc.oif = inet->uc_index;
+ }
+ }
if (connected)
rt = (struct rtable *)sk_dst_check(sk, 0);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 01801b77bd0d..ea6e6e7df0ee 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
goto out;
}
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 8affc6d83d58..63faeee989a9 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -92,6 +92,7 @@ static int xfrm4_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 44d109c435bc..3fd1ec775dc2 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -145,6 +145,9 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
if (!xo)
return ERR_PTR(-EINVAL);
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
+ return ERR_PTR(-EINVAL);
+
x = skb->sp->xvec[skb->sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index a88480193d77..05f070e123e4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -513,8 +513,8 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
- ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
- tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
+ ver = ershdr->ver;
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
tunnel = ip6gre_tunnel_lookup(skb->dev,
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
@@ -565,14 +565,8 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
if (ver == 1) {
tunnel->parms.index = ntohl(pkt_md->u.index);
} else {
- u16 md2_flags;
- u16 dir, hwid;
-
- md2_flags = ntohs(pkt_md->u.md2.flags);
- dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
- tunnel->parms.dir = dir;
- tunnel->parms.hwid = hwid;
+ tunnel->parms.dir = pkt_md->u.md2.dir;
+ tunnel->parms.hwid = get_hwid(&pkt_md->u.md2);
}
ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
@@ -925,6 +919,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
struct erspan_metadata *md;
+ __be32 tun_id;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info ||
@@ -944,23 +939,18 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
if (!md)
goto tx_err;
+ tun_id = tunnel_id_to_key32(key->tun_id);
if (md->version == 1) {
erspan_build_header(skb,
- tunnel_id_to_key32(key->tun_id),
+ ntohl(tun_id),
ntohl(md->u.index), truncate,
false);
} else if (md->version == 2) {
- u16 md2_flags;
- u16 dir, hwid;
-
- md2_flags = ntohs(md->u.md2.flags);
- dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
- hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
-
erspan_build_header_v2(skb,
- tunnel_id_to_key32(key->tun_id),
- dir, hwid, truncate,
- false);
+ ntohl(tun_id),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, false);
}
} else {
switch (skb->protocol) {
@@ -982,11 +972,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
}
if (t->parms.erspan_ver == 1)
- erspan_build_header(skb, t->parms.o_key,
+ erspan_build_header(skb, ntohl(t->parms.o_key),
t->parms.index,
truncate, false);
else
- erspan_build_header_v2(skb, t->parms.o_key,
+ erspan_build_header_v2(skb, ntohl(t->parms.o_key),
t->parms.dir,
t->parms.hwid,
truncate, false);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a4a94452132b..997c7f19ad62 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -174,7 +174,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
-static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
+bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
{
if (!np->autoflowlabel_set)
return ip6_default_np_autolabel(net);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2d4680e0376f..e8ffb5b5d84e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1336,7 +1336,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_AUTOFLOWLABEL:
- val = np->autoflowlabel;
+ val = ip6_autoflowlabel(sock_net(sk), np);
break;
case IPV6_RECVFRAGSIZE:
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f85da2f1e729..fe3966a9c999 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2440,7 +2440,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
static struct rt6_info *ip6_nh_lookup_table(struct net *net,
struct fib6_config *cfg,
- const struct in6_addr *gw_addr)
+ const struct in6_addr *gw_addr,
+ u32 tbid, int flags)
{
struct flowi6 fl6 = {
.flowi6_oif = cfg->fc_ifindex,
@@ -2449,15 +2450,15 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
};
struct fib6_table *table;
struct rt6_info *rt;
- int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
- table = fib6_get_table(net, cfg->fc_table);
+ table = fib6_get_table(net, tbid);
if (!table)
return NULL;
if (!ipv6_addr_any(&cfg->fc_prefsrc))
flags |= RT6_LOOKUP_F_HAS_SADDR;
+ flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
/* if table lookup failed, fall back to full lookup */
@@ -2469,6 +2470,82 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
return rt;
}
+static int ip6_route_check_nh_onlink(struct net *net,
+ struct fib6_config *cfg,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
+ struct rt6_info *grt;
+ int err;
+
+ err = 0;
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
+ if (grt) {
+ if (grt->rt6i_flags & flags || dev != grt->dst.dev) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+ err = -EINVAL;
+ }
+
+ ip6_rt_put(grt);
+ }
+
+ return err;
+}
+
+static int ip6_route_check_nh(struct net *net,
+ struct fib6_config *cfg,
+ struct net_device **_dev,
+ struct inet6_dev **idev)
+{
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ struct net_device *dev = _dev ? *_dev : NULL;
+ struct rt6_info *grt = NULL;
+ int err = -EHOSTUNREACH;
+
+ if (cfg->fc_table) {
+ int flags = RT6_LOOKUP_F_IFACE;
+
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr,
+ cfg->fc_table, flags);
+ if (grt) {
+ if (grt->rt6i_flags & RTF_GATEWAY ||
+ (dev && dev != grt->dst.dev)) {
+ ip6_rt_put(grt);
+ grt = NULL;
+ }
+ }
+ }
+
+ if (!grt)
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+
+ if (!grt)
+ goto out;
+
+ if (dev) {
+ if (dev != grt->dst.dev) {
+ ip6_rt_put(grt);
+ goto out;
+ }
+ } else {
+ *_dev = dev = grt->dst.dev;
+ *idev = grt->rt6i_idev;
+ dev_hold(dev);
+ in6_dev_hold(grt->rt6i_idev);
+ }
+
+ if (!(grt->rt6i_flags & RTF_GATEWAY))
+ err = 0;
+
+ ip6_rt_put(grt);
+
+out:
+ return err;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -2520,6 +2597,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_metric == 0)
cfg->fc_metric = IP6_RT_PRIO_USER;
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device required for onlink");
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+ }
+
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
!(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
@@ -2664,8 +2756,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- struct rt6_info *grt = NULL;
-
/* IPv6 strictly inhibits using not link-local
addresses as nexthop address.
Otherwise, router will not able to send redirects.
@@ -2682,40 +2772,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
- if (cfg->fc_table) {
- grt = ip6_nh_lookup_table(net, cfg, gw_addr);
-
- if (grt) {
- if (grt->rt6i_flags & RTF_GATEWAY ||
- (dev && dev != grt->dst.dev)) {
- ip6_rt_put(grt);
- grt = NULL;
- }
- }
- }
-
- if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL,
- cfg->fc_ifindex, 1);
-
- err = -EHOSTUNREACH;
- if (!grt)
- goto out;
- if (dev) {
- if (dev != grt->dst.dev) {
- ip6_rt_put(grt);
- goto out;
- }
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ err = ip6_route_check_nh_onlink(net, cfg, dev,
+ extack);
} else {
- dev = grt->dst.dev;
- idev = grt->rt6i_idev;
- dev_hold(dev);
- in6_dev_hold(grt->rt6i_idev);
+ err = ip6_route_check_nh(net, cfg, &dev, &idev);
}
- if (!(grt->rt6i_flags & RTF_GATEWAY))
- err = 0;
- ip6_rt_put(grt);
-
if (err)
goto out;
}
@@ -2734,6 +2796,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!dev)
goto out;
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
@@ -2751,6 +2819,7 @@ install_route:
if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
!netif_carrier_ok(dev))
rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+ rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
@@ -3820,6 +3889,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rtm->rtm_flags & RTM_F_CLONED)
cfg->fc_flags |= RTF_CACHE;
+ cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
+
cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -4225,6 +4296,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
+ *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
*flags |= RTNH_F_OFFLOAD;
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index d883c9204c01..278e49cd67d4 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
{
struct tcphdr *th;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
+ return ERR_PTR(-EINVAL);
+
if (!pskb_may_pull(skb, sizeof(*th)))
return ERR_PTR(-EINVAL);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a0f89ad76f9d..2a04dc9c781b 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
const struct ipv6hdr *ipv6h;
struct udphdr *uh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP))
+ goto out;
+
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 4e12859bc2ee..bb935a3b7fea 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -92,6 +92,7 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
skb_reset_network_header(skb);
skb_mac_header_rebuild(skb);
+ eth_hdr(skb)->h_proto = skb->protocol;
err = 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d4e98f20fc2a..4a8d407f8902 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1387,8 +1387,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
if (!csk)
return -EINVAL;
- /* We must prevent loops or risk deadlock ! */
- if (csk->sk_family == PF_KCM)
+ /* Only allow TCP sockets to be attached for now */
+ if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
+ csk->sk_protocol != IPPROTO_TCP)
+ return -EOPNOTSUPP;
+
+ /* Don't allow listeners or closed sockets */
+ if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE)
return -EOPNOTSUPP;
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
@@ -1405,9 +1410,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
return err;
}
- sock_hold(csk);
-
write_lock_bh(&csk->sk_callback_lock);
+
+ /* Check if sk_user_data is aready by KCM or someone else.
+ * Must be done under lock to prevent race conditions.
+ */
+ if (csk->sk_user_data) {
+ write_unlock_bh(&csk->sk_callback_lock);
+ strp_done(&psock->strp);
+ kmem_cache_free(kcm_psockp, psock);
+ return -EALREADY;
+ }
+
psock->save_data_ready = csk->sk_data_ready;
psock->save_write_space = csk->sk_write_space;
psock->save_state_change = csk->sk_state_change;
@@ -1415,8 +1429,11 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
csk->sk_data_ready = psock_data_ready;
csk->sk_write_space = psock_write_space;
csk->sk_state_change = psock_state_change;
+
write_unlock_bh(&csk->sk_callback_lock);
+ sock_hold(csk);
+
/* Finished initialization, now add the psock to the MUX. */
spin_lock_bh(&mux->lock);
head = &mux->psocks;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index b15412c21ac9..444ea8d127fe 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -420,7 +420,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
default:
p += scnprintf(p, sizeof(buf) + buf - p,
"\t\tMAX-MPDU-UNKNOWN\n");
- };
+ }
switch (vhtc->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {
case 0:
p += scnprintf(p, sizeof(buf) + buf - p,
@@ -438,7 +438,7 @@ static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf,
p += scnprintf(p, sizeof(buf) + buf - p,
"\t\tUNKNOWN-MHZ: 0x%x\n",
(vhtc->cap >> 2) & 0x3);
- };
+ }
PFLAG(RXLDPC, "RXLDPC");
PFLAG(SHORT_GI_80, "SHORT-GI-80");
PFLAG(SHORT_GI_160, "SHORT-GI-160");
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index eb55f1b3d047..7322aa1e382e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,6 +49,7 @@
#include <net/mpls.h>
#include <net/vxlan.h>
#include <net/tun_proto.h>
+#include <net/erspan.h>
#include "flow_netlink.h"
@@ -329,7 +330,8 @@ size_t ovs_tun_key_attr_size(void)
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
- /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
+ /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and
+ * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with
* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
*/
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
@@ -400,6 +402,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
.next = ovs_vxlan_ext_key_lens },
[OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE },
};
static const struct ovs_len_tbl
@@ -631,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
return 0;
}
+static int erspan_tun_opt_from_nlattr(const struct nlattr *a,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
+{
+ unsigned long opt_key_offset;
+
+ BUILD_BUG_ON(sizeof(struct erspan_metadata) >
+ sizeof(match->key->tun_opts));
+
+ if (nla_len(a) > sizeof(match->key->tun_opts)) {
+ OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).",
+ nla_len(a), sizeof(match->key->tun_opts));
+ return -EINVAL;
+ }
+
+ if (!is_mask)
+ SW_FLOW_KEY_PUT(match, tun_opts_len,
+ sizeof(struct erspan_metadata), false);
+ else
+ SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+ opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
+ nla_len(a), is_mask);
+ return 0;
+}
+
static int ip_tun_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask,
bool log)
@@ -738,6 +768,20 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ if (opts_type) {
+ OVS_NLERR(log, "Multiple metadata blocks provided");
+ return -EINVAL;
+ }
+
+ err = erspan_tun_opt_from_nlattr(a, match, is_mask,
+ log);
+ if (err)
+ return err;
+
+ tun_flags |= TUNNEL_ERSPAN_OPT;
+ opts_type = type;
+ break;
default:
OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
@@ -862,6 +906,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
+ else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+ nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+ swkey_tun_opts_len, tun_opts))
+ return -EMSGSIZE;
}
return 0;
@@ -2486,6 +2534,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ break;
}
}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 2e554ef6d75f..9920d2f84eff 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -90,9 +90,10 @@ void rds_tcp_nonagle(struct socket *sock)
sizeof(val));
}
-u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
+u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
{
- return tcp_sk(tc->t_sock->sk)->snd_nxt;
+ /* seq# of the last byte of data in tcp send buffer */
+ return tcp_sk(tc->t_sock->sk)->write_seq;
}
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index e7858ee8ed8b..c6fa080e9b6d 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -55,7 +55,7 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
void rds_tcp_restore_callbacks(struct socket *sock,
struct rds_tcp_connection *tc);
-u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
+u32 rds_tcp_write_seq(struct rds_tcp_connection *tc);
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
extern struct rds_transport rds_tcp_transport;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 73c74763ca72..16f65744d984 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -86,7 +86,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
* m_ack_seq is set to the sequence number of the last byte of
* header and data. see rds_tcp_is_acked().
*/
- tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc);
+ tc->t_last_sent_nxt = rds_tcp_write_seq(tc);
rm->m_ack_seq = tc->t_last_sent_nxt +
sizeof(struct rds_header) +
be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
@@ -98,7 +98,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
- rm, rds_tcp_snd_nxt(tc),
+ rm, rds_tcp_write_seq(tc),
(unsigned long long)rm->m_ack_seq);
}
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index af4b8ec60d9a..b7ba9b06b147 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -49,6 +49,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
int bind)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
+ struct tcf_csum_params *params_old, *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
struct tc_csum *parm;
struct tcf_csum *p;
@@ -67,7 +68,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
if (!tcf_idr_check(tn, parm->index, a, bind)) {
ret = tcf_idr_create(tn, parm->index, est, a,
- &act_csum_ops, bind, false);
+ &act_csum_ops, bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -80,10 +81,21 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}
p = to_tcf_csum(*a);
- spin_lock_bh(&p->tcf_lock);
- p->tcf_action = parm->action;
- p->update_flags = parm->update_flags;
- spin_unlock_bh(&p->tcf_lock);
+ ASSERT_RTNL();
+
+ params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+ if (unlikely(!params_new)) {
+ if (ret == ACT_P_CREATED)
+ tcf_idr_release(*a, bind);
+ return -ENOMEM;
+ }
+ params_old = rtnl_dereference(p->params);
+
+ params_new->action = parm->action;
+ params_new->update_flags = parm->update_flags;
+ rcu_assign_pointer(p->params, params_new);
+ if (params_old)
+ kfree_rcu(params_old, rcu);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
@@ -539,19 +551,21 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
- int action;
+ struct tcf_csum_params *params;
u32 update_flags;
+ int action;
+
+ rcu_read_lock();
+ params = rcu_dereference(p->params);
- spin_lock(&p->tcf_lock);
tcf_lastuse_update(&p->tcf_tm);
- bstats_update(&p->tcf_bstats, skb);
- action = p->tcf_action;
- update_flags = p->update_flags;
- spin_unlock(&p->tcf_lock);
+ bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
+ action = params->action;
if (unlikely(action == TC_ACT_SHOT))
- goto drop;
+ goto drop_stats;
+ update_flags = params->update_flags;
switch (tc_skb_protocol(skb)) {
case cpu_to_be16(ETH_P_IP):
if (!tcf_csum_ipv4(skb, update_flags))
@@ -563,13 +577,16 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
break;
}
+unlock:
+ rcu_read_unlock();
return action;
drop:
- spin_lock(&p->tcf_lock);
- p->tcf_qstats.drops++;
- spin_unlock(&p->tcf_lock);
- return TC_ACT_SHOT;
+ action = TC_ACT_SHOT;
+
+drop_stats:
+ qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
+ goto unlock;
}
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -577,15 +594,18 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_csum *p = to_tcf_csum(a);
+ struct tcf_csum_params *params;
struct tc_csum opt = {
- .update_flags = p->update_flags,
.index = p->tcf_index,
- .action = p->tcf_action,
.refcnt = p->tcf_refcnt - ref,
.bindcnt = p->tcf_bindcnt - bind,
};
struct tcf_t t;
+ params = rtnl_dereference(p->params);
+ opt.action = params->action;
+ opt.update_flags = params->update_flags;
+
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -600,6 +620,15 @@ nla_put_failure:
return -1;
}
+static void tcf_csum_cleanup(struct tc_action *a)
+{
+ struct tcf_csum *p = to_tcf_csum(a);
+ struct tcf_csum_params *params;
+
+ params = rcu_dereference_protected(p->params, 1);
+ kfree_rcu(params, rcu);
+}
+
static int tcf_csum_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
const struct tc_action_ops *ops)
@@ -623,6 +652,7 @@ static struct tc_action_ops act_csum_ops = {
.act = tcf_csum,
.dump = tcf_csum_dump,
.init = tcf_csum_init,
+ .cleanup = tcf_csum_cleanup,
.walk = tcf_csum_walker,
.lookup = tcf_csum_search,
.size = sizeof(struct tcf_csum),
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index f5d293416f46..bcb4ccb5f894 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -172,9 +172,10 @@ errout:
return ERR_PTR(err);
}
-static void tcf_proto_destroy(struct tcf_proto *tp)
+static void tcf_proto_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
- tp->ops->destroy(tp);
+ tp->ops->destroy(tp, extack);
module_put(tp->ops->owner);
kfree_rcu(tp, rcu);
}
@@ -223,7 +224,7 @@ static void tcf_chain_flush(struct tcf_chain *chain)
tcf_chain_head_change(chain, NULL);
while (tp) {
RCU_INIT_POINTER(chain->filter_chain, tp->next);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
tp = rtnl_dereference(chain->filter_chain);
tcf_chain_put(chain);
}
@@ -1182,7 +1183,7 @@ replay:
tcf_chain_tp_remove(chain, &chain_info, tp);
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_DELTFILTER, false);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, extack);
err = 0;
goto errout;
}
@@ -1200,7 +1201,7 @@ replay:
case RTM_NEWTFILTER:
if (n->nlmsg_flags & NLM_F_EXCL) {
if (tp_created)
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
NL_SET_ERR_MSG(extack, "Filter already exists");
err = -EEXIST;
goto errout;
@@ -1214,7 +1215,7 @@ replay:
goto errout;
if (last) {
tcf_chain_tp_remove(chain, &chain_info, tp);
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, extack);
}
goto errout;
case RTM_GETTFILTER:
@@ -1240,7 +1241,7 @@ replay:
RTM_NEWTFILTER, false);
} else {
if (tp_created)
- tcf_proto_destroy(tp);
+ tcf_proto_destroy(tp, NULL);
}
errout:
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6088be65d167..d333f5c5101d 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -112,7 +112,7 @@ static void basic_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void basic_destroy(struct tcf_proto *tp)
+static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 988ad45d78b8..8e5326bc6440 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -147,7 +147,8 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
}
static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
- struct cls_bpf_prog *oldprog)
+ struct cls_bpf_prog *oldprog,
+ struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_bpf_offload cls_bpf = {};
@@ -158,14 +159,14 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
skip_sw = prog && tc_skip_sw(prog->gen_flags);
obj = prog ?: oldprog;
- tc_cls_common_offload_init(&cls_bpf.common, tp);
+ tc_cls_common_offload_init(&cls_bpf.common, tp, obj->gen_flags,
+ extack);
cls_bpf.command = TC_CLSBPF_OFFLOAD;
cls_bpf.exts = &obj->exts;
cls_bpf.prog = prog ? prog->filter : NULL;
cls_bpf.oldprog = oldprog ? oldprog->filter : NULL;
cls_bpf.name = obj->bpf_name;
cls_bpf.exts_integrated = obj->exts_integrated;
- cls_bpf.gen_flags = obj->gen_flags;
if (oldprog)
tcf_block_offload_dec(block, &oldprog->gen_flags);
@@ -173,7 +174,7 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
if (prog) {
if (err < 0) {
- cls_bpf_offload_cmd(tp, oldprog, prog);
+ cls_bpf_offload_cmd(tp, oldprog, prog, extack);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &prog->gen_flags);
@@ -192,7 +193,8 @@ static u32 cls_bpf_flags(u32 flags)
}
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
- struct cls_bpf_prog *oldprog)
+ struct cls_bpf_prog *oldprog,
+ struct netlink_ext_ack *extack)
{
if (prog && oldprog &&
cls_bpf_flags(prog->gen_flags) !=
@@ -206,15 +208,16 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
if (!prog && !oldprog)
return 0;
- return cls_bpf_offload_cmd(tp, prog, oldprog);
+ return cls_bpf_offload_cmd(tp, prog, oldprog, extack);
}
static void cls_bpf_stop_offload(struct tcf_proto *tp,
- struct cls_bpf_prog *prog)
+ struct cls_bpf_prog *prog,
+ struct netlink_ext_ack *extack)
{
int err;
- err = cls_bpf_offload_cmd(tp, NULL, prog);
+ err = cls_bpf_offload_cmd(tp, NULL, prog, extack);
if (err)
pr_err("Stopping hardware offload failed: %d\n", err);
}
@@ -225,13 +228,12 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
struct tcf_block *block = tp->chain->block;
struct tc_cls_bpf_offload cls_bpf = {};
- tc_cls_common_offload_init(&cls_bpf.common, tp);
+ tc_cls_common_offload_init(&cls_bpf.common, tp, prog->gen_flags, NULL);
cls_bpf.command = TC_CLSBPF_STATS;
cls_bpf.exts = &prog->exts;
cls_bpf.prog = prog->filter;
cls_bpf.name = prog->bpf_name;
cls_bpf.exts_integrated = prog->exts_integrated;
- cls_bpf.gen_flags = prog->gen_flags;
tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, false);
}
@@ -288,12 +290,13 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
tcf_queue_work(&prog->work);
}
-static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
+static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
idr_remove_ext(&head->handle_idr, prog->handle);
- cls_bpf_stop_offload(tp, prog);
+ cls_bpf_stop_offload(tp, prog, extack);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
if (tcf_exts_get_net(&prog->exts))
@@ -307,18 +310,19 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
- __cls_bpf_delete(tp, arg);
+ __cls_bpf_delete(tp, arg, extack);
*last = list_empty(&head->plist);
return 0;
}
-static void cls_bpf_destroy(struct tcf_proto *tp)
+static void cls_bpf_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog, *tmp;
list_for_each_entry_safe(prog, tmp, &head->plist, link)
- __cls_bpf_delete(tp, prog);
+ __cls_bpf_delete(tp, prog, extack);
idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
@@ -514,7 +518,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (ret < 0)
goto errout_idr;
- ret = cls_bpf_offload(tp, prog, oldprog);
+ ret = cls_bpf_offload(tp, prog, oldprog, extack);
if (ret)
goto errout_parms;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 1b54fbfca414..762da5c0cf5e 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -143,7 +143,8 @@ errout:
return err;
}
-static void cls_cgroup_destroy(struct tcf_proto *tp)
+static void cls_cgroup_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 64c24b488058..cd5fe383afdd 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -600,7 +600,7 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static void flow_destroy(struct tcf_proto *tp)
+static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index c6ac4a612c4a..dc9acaafc0a8 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -218,12 +218,13 @@ static void fl_destroy_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_flower.common, tp);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long) f;
@@ -235,14 +236,15 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
static int fl_hw_replace_filter(struct tcf_proto *tp,
struct flow_dissector *dissector,
struct fl_flow_key *mask,
- struct cls_fl_filter *f)
+ struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
bool skip_sw = tc_skip_sw(f->flags);
int err;
- tc_cls_common_offload_init(&cls_flower.common, tp);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_REPLACE;
cls_flower.cookie = (unsigned long) f;
cls_flower.dissector = dissector;
@@ -254,7 +256,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
&cls_flower, skip_sw);
if (err < 0) {
- fl_hw_destroy_filter(tp, f);
+ fl_hw_destroy_filter(tp, f, NULL);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &f->flags);
@@ -271,7 +273,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_flower.common, tp);
+ tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
cls_flower.exts = &f->exts;
@@ -281,14 +283,15 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
&cls_flower, false);
}
-static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
+static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
idr_remove_ext(&head->handle_idr, f->handle);
list_del_rcu(&f->list);
if (!tc_skip_hw(f->flags))
- fl_hw_destroy_filter(tp, f);
+ fl_hw_destroy_filter(tp, f, extack);
tcf_unbind_filter(tp, &f->res);
if (tcf_exts_get_net(&f->exts))
call_rcu(&f->rcu, fl_destroy_filter);
@@ -314,13 +317,13 @@ static void fl_destroy_rcu(struct rcu_head *rcu)
schedule_work(&head->work);
}
-static void fl_destroy(struct tcf_proto *tp)
+static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f, *next;
list_for_each_entry_safe(f, next, &head->filters, list)
- __fl_delete(tp, f);
+ __fl_delete(tp, f, extack);
idr_destroy(&head->handle_idr);
__module_get(THIS_MODULE);
@@ -943,7 +946,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
err = fl_hw_replace_filter(tp,
&head->dissector,
&mask.key,
- fnew);
+ fnew,
+ extack);
if (err)
goto errout_idr;
}
@@ -956,7 +960,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
rhashtable_remove_fast(&head->ht, &fold->ht_node,
head->ht_params);
if (!tc_skip_hw(fold->flags))
- fl_hw_destroy_filter(tp, fold);
+ fl_hw_destroy_filter(tp, fold, NULL);
}
*arg = fnew;
@@ -995,7 +999,7 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
if (!tc_skip_sw(f->flags))
rhashtable_remove_fast(&head->ht, &f->ht_node,
head->ht_params);
- __fl_delete(tp, f);
+ __fl_delete(tp, f, extack);
*last = list_empty(&head->filters);
return 0;
}
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 94d159a8869a..8b207723fbc2 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -149,7 +149,7 @@ static void fw_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void fw_destroy(struct tcf_proto *tp)
+static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index f67d3d7fcf40..2ba721a590a7 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -71,12 +71,13 @@ static void mall_destroy_rcu(struct rcu_head *rcu)
static void mall_destroy_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
- unsigned long cookie)
+ unsigned long cookie,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
- tc_cls_common_offload_init(&cls_mall.common, tp);
+ tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = TC_CLSMATCHALL_DESTROY;
cls_mall.cookie = cookie;
@@ -86,14 +87,15 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp,
static int mall_replace_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
- unsigned long cookie)
+ unsigned long cookie,
+ struct netlink_ext_ack *extack)
{
struct tc_cls_matchall_offload cls_mall = {};
struct tcf_block *block = tp->chain->block;
bool skip_sw = tc_skip_sw(head->flags);
int err;
- tc_cls_common_offload_init(&cls_mall.common, tp);
+ tc_cls_common_offload_init(&cls_mall.common, tp, head->flags, extack);
cls_mall.command = TC_CLSMATCHALL_REPLACE;
cls_mall.exts = &head->exts;
cls_mall.cookie = cookie;
@@ -101,7 +103,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
&cls_mall, skip_sw);
if (err < 0) {
- mall_destroy_hw_filter(tp, head, cookie);
+ mall_destroy_hw_filter(tp, head, cookie, NULL);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &head->flags);
@@ -113,7 +115,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
return 0;
}
-static void mall_destroy(struct tcf_proto *tp)
+static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
@@ -121,7 +123,7 @@ static void mall_destroy(struct tcf_proto *tp)
return;
if (!tc_skip_hw(head->flags))
- mall_destroy_hw_filter(tp, head, (unsigned long) head);
+ mall_destroy_hw_filter(tp, head, (unsigned long) head, extack);
if (tcf_exts_get_net(&head->exts))
call_rcu(&head->rcu, mall_destroy_rcu);
@@ -205,7 +207,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
goto err_set_parms;
if (!tc_skip_hw(new->flags)) {
- err = mall_replace_hw_filter(tp, new, (unsigned long)new);
+ err = mall_replace_hw_filter(tp, new, (unsigned long)new,
+ extack);
if (err)
goto err_replace_hw_filter;
}
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 55467c30d524..21a03a8ee029 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -281,7 +281,7 @@ static void route4_delete_filter(struct rcu_head *head)
tcf_queue_work(&f->work);
}
-static void route4_destroy(struct tcf_proto *tp)
+static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 5cc0df690cff..4f1297657c27 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -322,7 +322,7 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
__rsvp_delete_filter(f);
}
-static void rsvp_destroy(struct tcf_proto *tp)
+static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 01a163e0b6aa..b49cc990a000 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -581,7 +581,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static void tcindex_destroy(struct tcf_proto *tp)
+static void tcindex_destroy(struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcf_walker walker;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 57113e936155..60c892c36a60 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -87,6 +87,7 @@ struct tc_u_hnode {
unsigned int divisor;
struct idr handle_idr;
struct rcu_head rcu;
+ u32 flags;
/* The 'ht' field MUST be the last field in structure to allow for
* more entries allocated at end of structure.
*/
@@ -486,12 +487,13 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
-static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
+ struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, h->flags, extack);
cls_u32.command = TC_CLSU32_DELETE_HNODE;
cls_u32.hnode.divisor = h->divisor;
cls_u32.hnode.handle = h->handle;
@@ -501,7 +503,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
}
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
- u32 flags)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
@@ -509,7 +511,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
bool offloaded = false;
int err;
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
cls_u32.command = TC_CLSU32_NEW_HNODE;
cls_u32.hnode.divisor = h->divisor;
cls_u32.hnode.handle = h->handle;
@@ -517,7 +519,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
- u32_clear_hw_hnode(tp, h);
+ u32_clear_hw_hnode(tp, h, NULL);
return err;
} else if (err > 0) {
offloaded = true;
@@ -529,12 +531,13 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
return 0;
}
-static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n)
+static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
+ struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, n->flags, extack);
cls_u32.command = TC_CLSU32_DELETE_KNODE;
cls_u32.knode.handle = n->handle;
@@ -543,14 +546,14 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n)
}
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
- u32 flags)
+ u32 flags, struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
bool skip_sw = tc_skip_sw(flags);
int err;
- tc_cls_common_offload_init(&cls_u32.common, tp);
+ tc_cls_common_offload_init(&cls_u32.common, tp, flags, extack);
cls_u32.command = TC_CLSU32_REPLACE_KNODE;
cls_u32.knode.handle = n->handle;
cls_u32.knode.fshift = n->fshift;
@@ -568,7 +571,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
if (err < 0) {
- u32_remove_hw_knode(tp, n);
+ u32_remove_hw_knode(tp, n, NULL);
return err;
} else if (err > 0) {
tcf_block_offload_inc(block, &n->flags);
@@ -580,7 +583,8 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
return 0;
}
-static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+ struct netlink_ext_ack *extack)
{
struct tc_u_knode *n;
unsigned int h;
@@ -590,7 +594,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
RCU_INIT_POINTER(ht->ht[h],
rtnl_dereference(n->next));
tcf_unbind_filter(tp, &n->res);
- u32_remove_hw_knode(tp, n);
+ u32_remove_hw_knode(tp, n, extack);
idr_remove_ext(&ht->handle_idr, n->handle);
if (tcf_exts_get_net(&n->exts))
call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
@@ -600,7 +604,8 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
}
}
-static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
+static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
+ struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode __rcu **hn;
@@ -608,14 +613,14 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
WARN_ON(ht->refcnt);
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(tp, ht, extack);
hn = &tp_c->hlist;
for (phn = rtnl_dereference(*hn);
phn;
hn = &phn->next, phn = rtnl_dereference(*hn)) {
if (phn == ht) {
- u32_clear_hw_hnode(tp, ht);
+ u32_clear_hw_hnode(tp, ht, extack);
idr_destroy(&ht->handle_idr);
idr_remove_ext(&tp_c->handle_idr, ht->handle);
RCU_INIT_POINTER(*hn, ht->next);
@@ -638,7 +643,7 @@ static bool ht_empty(struct tc_u_hnode *ht)
return true;
}
-static void u32_destroy(struct tcf_proto *tp)
+static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -646,7 +651,7 @@ static void u32_destroy(struct tcf_proto *tp)
WARN_ON(root_ht == NULL);
if (root_ht && --root_ht->refcnt == 0)
- u32_destroy_hnode(tp, root_ht);
+ u32_destroy_hnode(tp, root_ht, extack);
if (--tp_c->refcnt == 0) {
struct tc_u_hnode *ht;
@@ -657,7 +662,7 @@ static void u32_destroy(struct tcf_proto *tp)
ht;
ht = rtnl_dereference(ht->next)) {
ht->refcnt--;
- u32_clear_hnode(tp, ht);
+ u32_clear_hnode(tp, ht, extack);
}
while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) {
@@ -684,7 +689,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
goto out;
if (TC_U32_KEY(ht->handle)) {
- u32_remove_hw_knode(tp, (struct tc_u_knode *)ht);
+ u32_remove_hw_knode(tp, (struct tc_u_knode *)ht, extack);
ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
goto out;
}
@@ -696,7 +701,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
if (ht->refcnt == 1) {
ht->refcnt--;
- u32_destroy_hnode(tp, ht);
+ u32_destroy_hnode(tp, ht, extack);
} else {
NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
return -EBUSY;
@@ -965,7 +970,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
}
- err = u32_replace_hw_knode(tp, new, flags);
+ err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
u32_destroy_key(tp, new, false);
return err;
@@ -1015,8 +1020,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht->handle = handle;
ht->prio = tp->prio;
idr_init(&ht->handle_idr);
+ ht->flags = flags;
- err = u32_replace_hw_hnode(tp, ht, flags);
+ err = u32_replace_hw_hnode(tp, ht, flags, extack);
if (err) {
idr_remove_ext(&tp_c->handle_idr, handle);
kfree(ht);
@@ -1122,7 +1128,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tc_u_knode __rcu **ins;
struct tc_u_knode *pins;
- err = u32_replace_hw_knode(tp, n, flags);
+ err = u32_replace_hw_knode(tp, n, flags, extack);
if (err)
goto errhw;
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index df3110d69585..07c10bac06a0 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -51,7 +51,7 @@ static int em_nbyte_match(struct sk_buff *skb, struct tcf_ematch *em,
if (!tcf_valid_offset(skb, ptr, nbyte->hdr.len))
return 0;
- return !memcmp(ptr + nbyte->hdr.off, nbyte->pattern, nbyte->hdr.len);
+ return !memcmp(ptr, nbyte->pattern, nbyte->hdr.len);
}
static struct tcf_ematch_ops em_nbyte_ops = {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ef8b4ecde2ac..1816bde47256 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -510,7 +510,7 @@ void netif_carrier_on(struct net_device *dev)
if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
- atomic_inc(&dev->carrier_changes);
+ atomic_inc(&dev->carrier_up_count);
linkwatch_fire_event(dev);
if (netif_running(dev))
__netdev_watchdog_up(dev);
@@ -529,7 +529,7 @@ void netif_carrier_off(struct net_device *dev)
if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
if (dev->reg_state == NETREG_UNINITIALIZED)
return;
- atomic_inc(&dev->carrier_changes);
+ atomic_inc(&dev->carrier_down_count);
linkwatch_fire_event(dev);
}
}
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index 275925b93b29..35bc7106d182 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment(struct sk_buff *skb,
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct sctphdr *sh;
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP))
+ goto out;
+
sh = sctp_hdr(skb);
if (!pskb_may_pull(skb, sizeof(*sh)))
goto out;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7ff444ecee75..a40fa53c93ef 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4860,9 +4860,10 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
struct net *net, int *pos, void *p) {
struct rhashtable_iter hti;
struct sctp_transport *tsp;
- int ret = 0;
+ int ret;
again:
+ ret = 0;
sctp_transport_walk_start(&hti);
tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index daf8075f5a4c..267e68379110 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -115,7 +115,6 @@ static int smc_release(struct socket *sock)
goto out;
smc = smc_sk(sk);
- sock_hold(sk);
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
@@ -124,10 +123,7 @@ static int smc_release(struct socket *sock)
else
lock_sock(sk);
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- sk->sk_state_change(sk);
- } else {
+ if (!smc->use_fallback) {
rc = smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -136,20 +132,21 @@ static int smc_release(struct socket *sock)
sock_release(smc->clcsock);
smc->clcsock = NULL;
}
+ if (smc->use_fallback) {
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ sk->sk_state_change(sk);
+ }
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
- if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
+ if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
out:
return rc;
}
@@ -181,7 +178,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock)
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
- INIT_DELAYED_WORK(&smc->sock_put_work, smc_close_sock_put_work);
sk->sk_prot->hash(sk);
sk_refcnt_debug_inc(sk);
@@ -377,6 +373,15 @@ static void smc_link_save_peer_info(struct smc_link *link,
link->peer_mtu = clc->qp_mtu;
}
+static void smc_lgr_forget(struct smc_link_group *lgr)
+{
+ spin_lock_bh(&smc_lgr_list.lock);
+ /* do not use this link group for new connections */
+ if (!list_empty(&lgr->list))
+ list_del_init(&lgr->list);
+ spin_unlock_bh(&smc_lgr_list.lock);
+}
+
/* setup for RDMA connection of client */
static int smc_connect_rdma(struct smc_sock *smc)
{
@@ -390,6 +395,8 @@ static int smc_connect_rdma(struct smc_sock *smc)
int rc = 0;
u8 ibport;
+ sock_hold(&smc->sk); /* sock put in passive closing */
+
if (!tcp_sk(smc->clcsock->sk)->syn_smc) {
/* peer has not signalled SMC-capability */
smc->use_fallback = true;
@@ -513,6 +520,8 @@ out_connected:
return rc ? rc : local_contact;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
decline_rdma:
@@ -526,9 +535,13 @@ decline_rdma:
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
out_err:
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return rc;
}
@@ -581,40 +594,33 @@ out_err:
static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
{
- struct sock *sk = &lsmc->sk;
- struct socket *new_clcsock;
+ struct socket *new_clcsock = NULL;
+ struct sock *lsk = &lsmc->sk;
struct sock *new_sk;
int rc;
- release_sock(&lsmc->sk);
- new_sk = smc_sock_alloc(sock_net(sk), NULL);
+ release_sock(lsk);
+ new_sk = smc_sock_alloc(sock_net(lsk), NULL);
if (!new_sk) {
rc = -ENOMEM;
- lsmc->sk.sk_err = ENOMEM;
+ lsk->sk_err = ENOMEM;
*new_smc = NULL;
- lock_sock(&lsmc->sk);
+ lock_sock(lsk);
goto out;
}
*new_smc = smc_sk(new_sk);
rc = kernel_accept(lsmc->clcsock, &new_clcsock, 0);
- lock_sock(&lsmc->sk);
- if (rc < 0) {
- lsmc->sk.sk_err = -rc;
- new_sk->sk_state = SMC_CLOSED;
- sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
- *new_smc = NULL;
- goto out;
- }
- if (lsmc->sk.sk_state == SMC_CLOSED) {
+ lock_sock(lsk);
+ if (rc < 0)
+ lsk->sk_err = -rc;
+ if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
sock_set_flag(new_sk, SOCK_DEAD);
- sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ new_sk->sk_prot->unhash(new_sk);
+ sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
}
@@ -631,7 +637,7 @@ static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
{
struct smc_sock *par = smc_sk(parent);
- sock_hold(sk);
+ sock_hold(sk); /* sock_put in smc_accept_unlink () */
spin_lock(&par->accept_q_lock);
list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
spin_unlock(&par->accept_q_lock);
@@ -647,7 +653,7 @@ static void smc_accept_unlink(struct sock *sk)
list_del_init(&smc_sk(sk)->accept_q);
spin_unlock(&par->accept_q_lock);
sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
- sock_put(sk);
+ sock_put(sk); /* sock_hold in smc_accept_enqueue */
}
/* remove a sock from the accept queue to bind it to a new socket created
@@ -664,8 +670,12 @@ struct sock *smc_accept_dequeue(struct sock *parent,
smc_accept_unlink(new_sk);
if (new_sk->sk_state == SMC_CLOSED) {
+ if (isk->clcsock) {
+ sock_release(isk->clcsock);
+ isk->clcsock = NULL;
+ }
new_sk->sk_prot->unhash(new_sk);
- sock_put(new_sk);
+ sock_put(new_sk); /* final */
continue;
}
if (new_sock)
@@ -680,14 +690,11 @@ void smc_close_non_accepted(struct sock *sk)
{
struct smc_sock *smc = smc_sk(sk);
- sock_hold(sk);
lock_sock(sk);
if (!sk->sk_lingertime)
/* wait for peer closing */
sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT;
- if (smc->use_fallback) {
- sk->sk_state = SMC_CLOSED;
- } else {
+ if (!smc->use_fallback) {
smc_close_active(smc);
sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
@@ -700,14 +707,15 @@ void smc_close_non_accepted(struct sock *sk)
sock_release(tcp);
}
if (smc->use_fallback) {
- schedule_delayed_work(&smc->sock_put_work, TCP_TIMEWAIT_LEN);
- } else if (sk->sk_state == SMC_CLOSED) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
+ sock_put(sk); /* passive closing */
+ sk->sk_state = SMC_CLOSED;
+ } else {
+ if (sk->sk_state == SMC_CLOSED)
+ smc_conn_free(&smc->conn);
}
release_sock(sk);
- sock_put(sk);
+ sk->sk_prot->unhash(sk);
+ sock_put(sk); /* final sock_put */
}
static int smc_serv_conf_first_link(struct smc_sock *smc)
@@ -913,6 +921,8 @@ enqueue:
return;
decline_rdma_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
decline_rdma:
/* RDMA setup failed, switch back to TCP */
@@ -925,8 +935,12 @@ decline_rdma:
goto out_connected;
out_err_unlock:
+ if (local_contact == SMC_FIRST_CONTACT)
+ smc_lgr_forget(new_smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
out_err:
+ if (newsmcsk->sk_state == SMC_INIT)
+ sock_put(&new_smc->sk); /* passive closing */
newsmcsk->sk_state = SMC_CLOSED;
smc_conn_free(&new_smc->conn);
goto enqueue; /* queue new sock with sk_err set */
@@ -936,11 +950,12 @@ static void smc_tcp_listen_work(struct work_struct *work)
{
struct smc_sock *lsmc = container_of(work, struct smc_sock,
tcp_listen_work);
+ struct sock *lsk = &lsmc->sk;
struct smc_sock *new_smc;
int rc = 0;
- lock_sock(&lsmc->sk);
- while (lsmc->sk.sk_state == SMC_LISTEN) {
+ lock_sock(lsk);
+ while (lsk->sk_state == SMC_LISTEN) {
rc = smc_clcsock_accept(lsmc, &new_smc);
if (rc)
goto out;
@@ -949,15 +964,25 @@ static void smc_tcp_listen_work(struct work_struct *work)
new_smc->listen_smc = lsmc;
new_smc->use_fallback = false; /* assume rdma capability first*/
- sock_hold(&lsmc->sk); /* sock_put in smc_listen_work */
+ sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- schedule_work(&new_smc->smc_listen_work);
+ sock_hold(&new_smc->sk); /* sock_put in passive closing */
+ if (!schedule_work(&new_smc->smc_listen_work))
+ sock_put(&new_smc->sk);
}
out:
- release_sock(&lsmc->sk);
- lsmc->sk.sk_data_ready(&lsmc->sk); /* no more listening, wake accept */
+ if (lsmc->clcsock) {
+ sock_release(lsmc->clcsock);
+ lsmc->clcsock = NULL;
+ }
+ release_sock(lsk);
+ /* no more listening, wake up smc_close_wait_listen_clcsock and
+ * accept
+ */
+ lsk->sk_state_change(lsk);
+ sock_put(&lsmc->sk); /* sock_hold in smc_listen */
}
static int smc_listen(struct socket *sock, int backlog)
@@ -991,7 +1016,9 @@ static int smc_listen(struct socket *sock, int backlog)
sk->sk_ack_backlog = 0;
sk->sk_state = SMC_LISTEN;
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
- schedule_work(&smc->tcp_listen_work);
+ sock_hold(sk); /* sock_hold in tcp_listen_worker */
+ if (!schedule_work(&smc->tcp_listen_work))
+ sock_put(sk);
out:
release_sock(sk);
@@ -1008,6 +1035,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
int rc = 0;
lsmc = smc_sk(sk);
+ sock_hold(sk); /* sock_put below */
lock_sock(sk);
if (lsmc->sk.sk_state != SMC_LISTEN) {
@@ -1042,6 +1070,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock,
out:
release_sock(sk);
+ sock_put(sk); /* sock_hold above */
return rc;
}
@@ -1111,21 +1140,15 @@ out:
static unsigned int smc_accept_poll(struct sock *parent)
{
- struct smc_sock *isk;
- struct sock *sk;
+ struct smc_sock *isk = smc_sk(parent);
+ int mask = 0;
- lock_sock(parent);
- list_for_each_entry(isk, &smc_sk(parent)->accept_q, accept_q) {
- sk = (struct sock *)isk;
-
- if (sk->sk_state == SMC_ACTIVE) {
- release_sock(parent);
- return POLLIN | POLLRDNORM;
- }
- }
- release_sock(parent);
+ spin_lock(&isk->accept_q_lock);
+ if (!list_empty(&isk->accept_q))
+ mask = POLLIN | POLLRDNORM;
+ spin_unlock(&isk->accept_q_lock);
- return 0;
+ return mask;
}
static unsigned int smc_poll(struct file *file, struct socket *sock,
@@ -1136,9 +1159,15 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
struct smc_sock *smc;
int rc;
+ if (!sk)
+ return POLLNVAL;
+
smc = smc_sk(sock->sk);
+ sock_hold(sk);
+ lock_sock(sk);
if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
/* delegate to CLC child sock */
+ release_sock(sk);
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
/* if non-blocking connect finished ... */
lock_sock(sk);
@@ -1150,37 +1179,43 @@ static unsigned int smc_poll(struct file *file, struct socket *sock,
rc = smc_connect_rdma(smc);
if (rc < 0)
mask |= POLLERR;
- else
- /* success cases including fallback */
- mask |= POLLOUT | POLLWRNORM;
+ /* success cases including fallback */
+ mask |= POLLOUT | POLLWRNORM;
}
}
- release_sock(sk);
} else {
- sock_poll_wait(file, sk_sleep(sk), wait);
- if (sk->sk_state == SMC_LISTEN)
- /* woken up by sk_data_ready in smc_listen_work() */
- mask |= smc_accept_poll(sk);
+ if (sk->sk_state != SMC_CLOSED) {
+ release_sock(sk);
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ lock_sock(sk);
+ }
if (sk->sk_err)
mask |= POLLERR;
- if (atomic_read(&smc->conn.sndbuf_space) ||
- (sk->sk_shutdown & SEND_SHUTDOWN)) {
- mask |= POLLOUT | POLLWRNORM;
- } else {
- sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- }
- if (atomic_read(&smc->conn.bytes_to_rcv))
- mask |= POLLIN | POLLRDNORM;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
(sk->sk_state == SMC_CLOSED))
mask |= POLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLIN | POLLRDNORM | POLLRDHUP;
- if (sk->sk_state == SMC_APPCLOSEWAIT1)
- mask |= POLLIN;
+ if (sk->sk_state == SMC_LISTEN) {
+ /* woken up by sk_data_ready in smc_listen_work() */
+ mask = smc_accept_poll(sk);
+ } else {
+ if (atomic_read(&smc->conn.sndbuf_space) ||
+ sk->sk_shutdown & SEND_SHUTDOWN) {
+ mask |= POLLOUT | POLLWRNORM;
+ } else {
+ sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+ set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+ }
+ if (atomic_read(&smc->conn.bytes_to_rcv))
+ mask |= POLLIN | POLLRDNORM;
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ mask |= POLLIN | POLLRDNORM | POLLRDHUP;
+ if (sk->sk_state == SMC_APPCLOSEWAIT1)
+ mask |= POLLIN;
+ }
}
+ release_sock(sk);
+ sock_put(sk);
return mask;
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 0bee9d16cf29..9518986c97b1 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -178,7 +178,6 @@ struct smc_sock { /* smc sock container */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
spinlock_t accept_q_lock; /* protects accept_q */
- struct delayed_work sock_put_work; /* final socket freeing */
bool use_fallback; /* fallback to tcp */
u8 wait_close_tx_prepared : 1;
/* shutdown wr or close
@@ -253,12 +252,12 @@ static inline int smc_uncompress_bufsize(u8 compressed)
static inline bool using_ipsec(struct smc_sock *smc)
{
return (smc->clcsock->sk->sk_policy[0] ||
- smc->clcsock->sk->sk_policy[1]) ? 1 : 0;
+ smc->clcsock->sk->sk_policy[1]) ? true : false;
}
#else
static inline bool using_ipsec(struct smc_sock *smc)
{
- return 0;
+ return false;
}
#endif
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index d4155ff6acde..3cd086e5bd28 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -57,9 +57,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
cdcpend->conn);
}
smc_tx_sndbuf_nonfull(smc);
- if (smc->sk.sk_state != SMC_ACTIVE)
- /* wake up smc_close_wait_tx_pends() */
- smc->sk.sk_state_change(&smc->sk);
bh_unlock_sock(&smc->sk);
}
@@ -68,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+ int rc;
- return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
- (struct smc_wr_tx_pend_priv **)pend);
+ rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
+ (struct smc_wr_tx_pend_priv **)pend);
+ if (!conn->alert_token_local)
+ /* abnormal termination */
+ rc = -EPIPE;
+ return rc;
}
static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
@@ -155,14 +157,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
(unsigned long)conn);
}
-bool smc_cdc_tx_has_pending(struct smc_connection *conn)
-{
- struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
-
- return smc_wr_tx_has_pending(link, SMC_CDC_MSG_TYPE,
- smc_cdc_tx_filter, (unsigned long)conn);
-}
-
/********************************* receive ***********************************/
static inline bool smc_cdc_before(u16 seq1, u16 seq2)
@@ -218,6 +212,14 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc->sk.sk_data_ready(&smc->sk);
}
+ /* piggy backed tx info */
+ /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
+ if (diff_cons && smc_tx_prepared_sends(conn)) {
+ smc_tx_sndbuf_nonempty(conn);
+ /* trigger socket release if connection closed */
+ smc_close_wake_tx_prepared(smc);
+ }
+
if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
smc->sk.sk_err = ECONNRESET;
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
@@ -227,15 +229,9 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
if (smc->clcsock && smc->clcsock->sk)
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(&smc->sk, SOCK_DONE);
- schedule_work(&conn->close_work);
- }
-
- /* piggy backed tx info */
- /* trigger sndbuf consumer: RDMA write into peer RMBE and CDC */
- if (diff_cons && smc_tx_prepared_sends(conn)) {
- smc_tx_sndbuf_nonempty(conn);
- /* trigger socket release if connection closed */
- smc_close_wake_tx_prepared(smc);
+ sock_hold(&smc->sk); /* sock_put in close_work */
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
}
}
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 149ceda1b088..ab240b37ad11 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -214,7 +214,6 @@ void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
struct smc_cdc_tx_pend *pend);
int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
-bool smc_cdc_tx_has_pending(struct smc_connection *conn);
int smc_cdc_init(void) __init;
#endif /* SMC_CDC_H */
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index e194c6cc308a..e339c0186dcf 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -19,7 +19,7 @@
#include "smc_cdc.h"
#include "smc_close.h"
-#define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
+#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
static void smc_close_cleanup_listen(struct sock *parent)
{
@@ -30,23 +30,24 @@ static void smc_close_cleanup_listen(struct sock *parent)
smc_close_non_accepted(sk);
}
-static void smc_close_wait_tx_pends(struct smc_sock *smc)
+static void smc_close_wait_listen_clcsock(struct smc_sock *smc)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct sock *sk = &smc->sk;
signed long timeout;
- timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
+ timeout = SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME;
add_wait_queue(sk_sleep(sk), &wait);
- while (!signal_pending(current) && timeout) {
- int rc;
-
- rc = sk_wait_event(sk, &timeout,
- !smc_cdc_tx_has_pending(&smc->conn),
- &wait);
- if (rc)
+ do {
+ release_sock(sk);
+ if (smc->clcsock)
+ timeout = wait_woken(&wait, TASK_UNINTERRUPTIBLE,
+ timeout);
+ sched_annotate_sleep();
+ lock_sock(sk);
+ if (!smc->clcsock)
break;
- }
+ } while (timeout);
remove_wait_queue(sk_sleep(sk), &wait);
}
@@ -111,58 +112,63 @@ static int smc_close_abort(struct smc_connection *conn)
}
/* terminate smc socket abnormally - active abort
- * RDMA communication no longer possible
+ * link group is terminated, i.e. RDMA communication no longer possible
*/
static void smc_close_active_abort(struct smc_sock *smc)
{
+ struct sock *sk = &smc->sk;
+
struct smc_cdc_conn_state_flags *txflags =
&smc->conn.local_tx_ctrl.conn_state_flags;
- smc->sk.sk_err = ECONNABORTED;
+ sk->sk_err = ECONNABORTED;
if (smc->clcsock && smc->clcsock->sk) {
smc->clcsock->sk->sk_err = ECONNABORTED;
smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
}
- switch (smc->sk.sk_state) {
+ switch (sk->sk_state) {
case SMC_INIT:
case SMC_ACTIVE:
- smc->sk.sk_state = SMC_PEERABORTWAIT;
+ sk->sk_state = SMC_PEERABORTWAIT;
+ release_sock(sk);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
+ sock_put(sk); /* passive closing */
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
if (!smc_cdc_rxed_any_close(&smc->conn))
- smc->sk.sk_state = SMC_PEERABORTWAIT;
+ sk->sk_state = SMC_PEERABORTWAIT;
else
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
+ release_sock(sk);
+ cancel_delayed_work_sync(&smc->conn.tx_work);
+ lock_sock(sk);
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (!txflags->peer_conn_closed) {
- smc->sk.sk_state = SMC_PEERABORTWAIT;
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
+ /* just SHUTDOWN_SEND done */
+ sk->sk_state = SMC_PEERABORTWAIT;
} else {
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
}
+ sock_put(sk); /* passive closing */
break;
case SMC_PROCESSABORT:
case SMC_APPFINCLOSEWAIT:
- if (!txflags->peer_conn_closed) {
- txflags->peer_conn_abort = 1;
- sock_release(smc->clcsock);
- }
- smc->sk.sk_state = SMC_CLOSED;
+ sk->sk_state = SMC_CLOSED;
break;
case SMC_PEERFINCLOSEWAIT:
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
break;
}
- sock_set_flag(&smc->sk, SOCK_DEAD);
- smc->sk.sk_state_change(&smc->sk);
+ sock_set_flag(sk, SOCK_DEAD);
+ sk->sk_state_change(sk);
}
static inline bool smc_close_sent_any_close(struct smc_connection *conn)
@@ -185,13 +191,11 @@ int smc_close_active(struct smc_sock *smc)
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
-again:
old_state = sk->sk_state;
- switch (old_state) {
+again:
+ switch (sk->sk_state) {
case SMC_INIT:
sk->sk_state = SMC_CLOSED;
- if (smc->smc_listen_work.func)
- cancel_work_sync(&smc->smc_listen_work);
break;
case SMC_LISTEN:
sk->sk_state = SMC_CLOSED;
@@ -200,11 +204,9 @@ again:
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
/* wake up kernel_accept of smc_tcp_listen_worker */
smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
+ smc_close_wait_listen_clcsock(smc);
}
- release_sock(sk);
smc_close_cleanup_listen(sk);
- cancel_work_sync(&smc->smc_listen_work);
- lock_sock(sk);
break;
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
@@ -214,6 +216,8 @@ again:
if (sk->sk_state == SMC_ACTIVE) {
/* send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
} else {
/* peer event has changed the state */
@@ -226,9 +230,10 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
}
sk->sk_state = SMC_CLOSED;
- smc_close_wait_tx_pends(smc);
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
@@ -237,19 +242,21 @@ again:
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
- if (sk->sk_err != ECONNABORTED) {
- /* confirm close from peer */
- rc = smc_close_final(conn);
- if (rc)
- break;
- }
- if (smc_cdc_rxed_any_close(conn))
+ if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
+ sk->sk_state != SMC_APPCLOSEWAIT2)
+ goto again;
+ /* confirm close from peer */
+ rc = smc_close_final(conn);
+ if (rc)
+ break;
+ if (smc_cdc_rxed_any_close(conn)) {
/* peer has closed the socket already */
sk->sk_state = SMC_CLOSED;
- else
+ sock_put(sk); /* postponed passive closing */
+ } else {
/* peer has just issued a shutdown write */
sk->sk_state = SMC_PEERFINCLOSEWAIT;
- smc_close_wait_tx_pends(smc);
+ }
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
@@ -257,6 +264,8 @@ again:
!smc_close_sent_any_close(conn)) {
/* just shutdown wr done, send close request */
rc = smc_close_final(conn);
+ if (rc)
+ break;
}
/* peer sending PeerConnectionClosed will cause transition */
break;
@@ -264,12 +273,8 @@ again:
/* peer sending PeerConnectionClosed will cause transition */
break;
case SMC_PROCESSABORT:
- release_sock(sk);
- cancel_delayed_work_sync(&conn->tx_work);
- lock_sock(sk);
smc_close_abort(conn);
sk->sk_state = SMC_CLOSED;
- smc_close_wait_tx_pends(smc);
break;
case SMC_PEERABORTWAIT:
case SMC_CLOSED:
@@ -278,7 +283,7 @@ again:
}
if (old_state != sk->sk_state)
- sk->sk_state_change(&smc->sk);
+ sk->sk_state_change(sk);
return rc;
}
@@ -289,37 +294,42 @@ static void smc_close_passive_abort_received(struct smc_sock *smc)
struct sock *sk = &smc->sk;
switch (sk->sk_state) {
+ case SMC_INIT:
case SMC_ACTIVE:
- case SMC_APPFINCLOSEWAIT:
case SMC_APPCLOSEWAIT1:
- case SMC_APPCLOSEWAIT2:
- smc_close_abort(&smc->conn);
+ sk->sk_state = SMC_PROCESSABORT;
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_APPFINCLOSEWAIT:
sk->sk_state = SMC_PROCESSABORT;
break;
case SMC_PEERCLOSEWAIT1:
case SMC_PEERCLOSEWAIT2:
if (txflags->peer_done_writing &&
- !smc_close_sent_any_close(&smc->conn)) {
+ !smc_close_sent_any_close(&smc->conn))
/* just shutdown, but not yet closed locally */
- smc_close_abort(&smc->conn);
sk->sk_state = SMC_PROCESSABORT;
- } else {
+ else
sk->sk_state = SMC_CLOSED;
- }
+ sock_put(sk); /* passive closing */
break;
+ case SMC_APPCLOSEWAIT2:
case SMC_PEERFINCLOSEWAIT:
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ break;
case SMC_PEERABORTWAIT:
sk->sk_state = SMC_CLOSED;
break;
- case SMC_INIT:
case SMC_PROCESSABORT:
/* nothing to do, add tracing in future patch */
break;
}
}
-/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
- * or peer_done_writing.
+/* Either some kind of closing has been received: peer_conn_closed,
+ * peer_conn_abort, or peer_done_writing
+ * or the link group of the connection terminates abnormally.
*/
static void smc_close_passive_work(struct work_struct *work)
{
@@ -331,7 +341,7 @@ static void smc_close_passive_work(struct work_struct *work)
struct sock *sk = &smc->sk;
int old_state;
- lock_sock(&smc->sk);
+ lock_sock(sk);
old_state = sk->sk_state;
if (!conn->alert_token_local) {
@@ -340,23 +350,32 @@ static void smc_close_passive_work(struct work_struct *work)
goto wakeup;
}
- rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
+ rxflags = &conn->local_rx_ctrl.conn_state_flags;
if (rxflags->peer_conn_abort) {
+ /* peer has not received all data */
smc_close_passive_abort_received(smc);
+ release_sock(&smc->sk);
+ cancel_delayed_work_sync(&conn->tx_work);
+ lock_sock(&smc->sk);
goto wakeup;
}
switch (sk->sk_state) {
case SMC_INIT:
- if (atomic_read(&smc->conn.bytes_to_rcv) ||
+ if (atomic_read(&conn->bytes_to_rcv) ||
(rxflags->peer_done_writing &&
- !smc_cdc_rxed_any_close(conn)))
+ !smc_cdc_rxed_any_close(conn))) {
sk->sk_state = SMC_APPCLOSEWAIT1;
- else
+ } else {
sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_ACTIVE:
sk->sk_state = SMC_APPCLOSEWAIT1;
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
break;
case SMC_PEERCLOSEWAIT1:
if (rxflags->peer_done_writing)
@@ -364,8 +383,7 @@ static void smc_close_passive_work(struct work_struct *work)
/* fall through */
/* to check for closing */
case SMC_PEERCLOSEWAIT2:
- case SMC_PEERFINCLOSEWAIT:
- if (!smc_cdc_rxed_any_close(&smc->conn))
+ if (!smc_cdc_rxed_any_close(conn))
break;
if (sock_flag(sk, SOCK_DEAD) &&
smc_close_sent_any_close(conn)) {
@@ -375,9 +393,20 @@ static void smc_close_passive_work(struct work_struct *work)
/* just shutdown, but not yet closed locally */
sk->sk_state = SMC_APPFINCLOSEWAIT;
}
+ sock_put(sk); /* passive closing */
+ break;
+ case SMC_PEERFINCLOSEWAIT:
+ if (smc_cdc_rxed_any_close(conn)) {
+ sk->sk_state = SMC_CLOSED;
+ sock_put(sk); /* passive closing */
+ }
break;
case SMC_APPCLOSEWAIT1:
case SMC_APPCLOSEWAIT2:
+ /* postpone sock_put() for passive closing to cover
+ * received SEND_SHUTDOWN as well
+ */
+ break;
case SMC_APPFINCLOSEWAIT:
case SMC_PEERABORTWAIT:
case SMC_PROCESSABORT:
@@ -393,23 +422,11 @@ wakeup:
if (old_state != sk->sk_state) {
sk->sk_state_change(sk);
if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
- smc_conn_free(&smc->conn);
- schedule_delayed_work(&smc->sock_put_work,
- SMC_CLOSE_SOCK_PUT_DELAY);
- }
+ (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
+ smc_conn_free(conn);
}
- release_sock(&smc->sk);
-}
-
-void smc_close_sock_put_work(struct work_struct *work)
-{
- struct smc_sock *smc = container_of(to_delayed_work(work),
- struct smc_sock,
- sock_put_work);
-
- smc->sk.sk_prot->unhash(&smc->sk);
- sock_put(&smc->sk);
+ release_sock(sk);
+ sock_put(sk); /* sock_hold done by schedulers of close_work */
}
int smc_close_shutdown_write(struct smc_sock *smc)
@@ -424,20 +441,21 @@ int smc_close_shutdown_write(struct smc_sock *smc)
0 : sock_flag(sk, SOCK_LINGER) ?
sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
-again:
old_state = sk->sk_state;
- switch (old_state) {
+again:
+ switch (sk->sk_state) {
case SMC_ACTIVE:
smc_close_stream_wait(smc, timeout);
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
+ if (sk->sk_state != SMC_ACTIVE)
+ goto again;
/* send close wr request */
rc = smc_close_wr(conn);
- if (sk->sk_state == SMC_ACTIVE)
- sk->sk_state = SMC_PEERCLOSEWAIT1;
- else
- goto again;
+ if (rc)
+ break;
+ sk->sk_state = SMC_PEERCLOSEWAIT1;
break;
case SMC_APPCLOSEWAIT1:
/* passive close */
@@ -446,8 +464,12 @@ again:
release_sock(sk);
cancel_delayed_work_sync(&conn->tx_work);
lock_sock(sk);
+ if (sk->sk_state != SMC_APPCLOSEWAIT1)
+ goto again;
/* confirm close from peer */
rc = smc_close_wr(conn);
+ if (rc)
+ break;
sk->sk_state = SMC_APPCLOSEWAIT2;
break;
case SMC_APPCLOSEWAIT2:
@@ -462,7 +484,7 @@ again:
}
if (old_state != sk->sk_state)
- sk->sk_state_change(&smc->sk);
+ sk->sk_state_change(sk);
return rc;
}
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 8c498885d758..19eb6a211c23 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -21,7 +21,6 @@
void smc_close_wake_tx_prepared(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
-void smc_close_sock_put_work(struct work_struct *work);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 94f21116dac5..2424c7100aaf 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work)
bool conns;
spin_lock_bh(&smc_lgr_list.lock);
+ if (list_empty(&lgr->list))
+ goto free;
read_lock_bh(&lgr->conns_lock);
conns = RB_EMPTY_ROOT(&lgr->conns_all);
read_unlock_bh(&lgr->conns_lock);
@@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work)
return;
}
list_del_init(&lgr->list); /* remove from smc_lgr_list */
+free:
spin_unlock_bh(&smc_lgr_list.lock);
smc_lgr_free(lgr);
}
@@ -231,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn)
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
- struct smc_link_group *lgr = conn->lgr;
-
- if (!lgr)
+ if (!conn->lgr)
return;
smc_cdc_tx_dismiss_slots(conn);
smc_lgr_unregister_conn(conn);
@@ -327,13 +328,17 @@ void smc_lgr_terminate(struct smc_link_group *lgr)
while (node) {
conn = rb_entry(node, struct smc_connection, alert_node);
smc = container_of(conn, struct smc_sock, conn);
- sock_hold(&smc->sk);
+ sock_hold(&smc->sk); /* sock_put in close work */
+ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
__smc_lgr_unregister_conn(conn);
- schedule_work(&conn->close_work);
- sock_put(&smc->sk);
+ write_unlock_bh(&lgr->conns_lock);
+ if (!schedule_work(&conn->close_work))
+ sock_put(&smc->sk);
+ write_lock_bh(&lgr->conns_lock);
node = rb_first(&lgr->conns_all);
}
write_unlock_bh(&lgr->conns_lock);
+ wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait);
}
/* Determine vlan of internal TCP socket.
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index d2d01cf70224..427b91c1c964 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
goto errout;
- if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
+ if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
+ smc->conn.alert_token_local) {
struct smc_connection *conn = &smc->conn;
struct smc_diag_conninfo cinfo = {
.token = conn->alert_token_local,
@@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}
- if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
+ if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
+ !list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 90f1a7f9085c..2a8957bd6d38 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -141,6 +141,17 @@ out:
return rc;
}
+static void smc_ib_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
+{
+ struct smc_link_group *lgr, *l;
+
+ list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
+ if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
+ lgr->lnk[SMC_SINGLE_LINK].ibport == ibport)
+ smc_lgr_terminate(lgr);
+ }
+}
+
/* process context wrapper for might_sleep smc_ib_remember_port_attr */
static void smc_ib_port_event_work(struct work_struct *work)
{
@@ -151,6 +162,8 @@ static void smc_ib_port_event_work(struct work_struct *work)
for_each_set_bit(port_idx, &smcibdev->port_event_mask, SMC_MAX_PORTS) {
smc_ib_remember_port_attr(smcibdev, port_idx + 1);
clear_bit(port_idx, &smcibdev->port_event_mask);
+ if (!smc_ib_port_active(smcibdev, port_idx + 1))
+ smc_ib_port_terminate(smcibdev, port_idx + 1);
}
}
@@ -165,15 +178,7 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
switch (ibevent->event) {
case IB_EVENT_PORT_ERR:
- port_idx = ibevent->element.port_num - 1;
- set_bit(port_idx, &smcibdev->port_event_mask);
- schedule_work(&smcibdev->port_event_work);
- /* fall through */
case IB_EVENT_DEVICE_FATAL:
- /* tbd in follow-on patch:
- * abnormal close of corresponding connections
- */
- break;
case IB_EVENT_PORT_ACTIVE:
port_idx = ibevent->element.port_num - 1;
set_bit(port_idx, &smcibdev->port_event_mask);
@@ -186,7 +191,8 @@ static void smc_ib_global_event_handler(struct ib_event_handler *handler,
void smc_ib_dealloc_protection_domain(struct smc_link *lnk)
{
- ib_dealloc_pd(lnk->roce_pd);
+ if (lnk->roce_pd)
+ ib_dealloc_pd(lnk->roce_pd);
lnk->roce_pd = NULL;
}
@@ -203,14 +209,18 @@ int smc_ib_create_protection_domain(struct smc_link *lnk)
static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
{
+ struct smc_ib_device *smcibdev =
+ (struct smc_ib_device *)ibevent->device;
+ u8 port_idx;
+
switch (ibevent->event) {
case IB_EVENT_DEVICE_FATAL:
case IB_EVENT_GID_CHANGE:
case IB_EVENT_PORT_ERR:
case IB_EVENT_QP_ACCESS_ERR:
- /* tbd in follow-on patch:
- * abnormal close of corresponding connections
- */
+ port_idx = ibevent->element.port_num - 1;
+ set_bit(port_idx, &smcibdev->port_event_mask);
+ schedule_work(&smcibdev->port_event_work);
break;
default:
break;
@@ -219,7 +229,8 @@ static void smc_ib_qp_event_handler(struct ib_event *ibevent, void *priv)
void smc_ib_destroy_queue_pair(struct smc_link *lnk)
{
- ib_destroy_qp(lnk->roce_qp);
+ if (lnk->roce_qp)
+ ib_destroy_qp(lnk->roce_qp);
lnk->roce_qp = NULL;
}
@@ -462,6 +473,7 @@ static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev)
{
if (!smcibdev->initialized)
return;
+ smcibdev->initialized = 0;
smc_wr_remove_dev(smcibdev);
ib_unregister_event_handler(&smcibdev->event_handler);
ib_destroy_cq(smcibdev->roce_cq_recv);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 2e50fddf8ce9..838bce20c361 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -86,7 +86,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
rc = -EPIPE;
break;
}
- if (conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) {
+ if (smc_cdc_rxed_any_close(conn)) {
rc = -ECONNRESET;
break;
}
@@ -107,7 +107,7 @@ static int smc_tx_wait_memory(struct smc_sock *smc, int flags)
sk_wait_event(sk, &timeo,
sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
- smc_cdc_rxed_any_close_or_senddone(conn) ||
+ smc_cdc_rxed_any_close(conn) ||
atomic_read(&conn->sndbuf_space),
&wait);
}
@@ -248,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
peer_rmbe_offset;
rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr);
- if (rc)
+ if (rc) {
conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+ smc_lgr_terminate(lgr);
+ }
return rc;
}
@@ -406,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock;
}
rc = 0;
- schedule_delayed_work(&conn->tx_work,
- SMC_TX_WORK_DELAY);
+ if (conn->alert_token_local) /* connection healthy */
+ schedule_delayed_work(&conn->tx_work,
+ SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
@@ -438,10 +441,17 @@ static void smc_tx_work(struct work_struct *work)
int rc;
lock_sock(&smc->sk);
+ if (smc->sk.sk_err ||
+ !conn->alert_token_local ||
+ conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
+ goto out;
+
rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0;
+
+out:
release_sock(&smc->sk);
}
@@ -462,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
- if (smc_cdc_get_slot_and_msg_send(conn) < 0) {
+ if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
+ conn->alert_token_local) { /* connection healthy */
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index de4537f66832..1b8af23e6e2b 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -122,6 +122,7 @@ static void smc_wr_tx_tasklet_fn(unsigned long data)
again:
polled++;
do {
+ memset(&wc, 0, sizeof(wc));
rc = ib_poll_cq(dev->roce_cq_send, SMC_WR_MAX_POLL_CQE, wc);
if (polled == 1) {
ib_req_notify_cq(dev->roce_cq_send,
@@ -173,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
struct smc_wr_tx_pend *wr_pend;
+ u32 idx = link->wr_tx_cnt;
struct ib_send_wr *wr_ib;
u64 wr_id;
- u32 idx;
int rc;
*wr_buf = NULL;
@@ -185,21 +186,20 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
if (rc)
return rc;
} else {
- rc = wait_event_interruptible_timeout(
+ struct smc_link_group *lgr;
+
+ lgr = container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+ rc = wait_event_timeout(
link->wr_tx_wait,
+ list_empty(&lgr->list) || /* lgr terminated */
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
- struct smc_link_group *lgr;
-
- lgr = container_of(link, struct smc_link_group,
- lnk[SMC_SINGLE_LINK]);
smc_lgr_terminate(lgr);
return -EPIPE;
}
- if (rc == -ERESTARTSYS)
- return -EINTR;
if (idx == link->wr_tx_cnt)
return -EPIPE;
}
@@ -249,8 +249,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
pend = container_of(priv, struct smc_wr_tx_pend, priv);
rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
&failed_wr);
- if (rc)
+ if (rc) {
+ struct smc_link_group *lgr =
+ container_of(link, struct smc_link_group,
+ lnk[SMC_SINGLE_LINK]);
+
smc_wr_tx_put_slot(link, priv);
+ smc_lgr_terminate(lgr);
+ }
return rc;
}
@@ -300,18 +306,18 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
return rc;
}
-void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
+void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
unsigned long data)
{
struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_rx;
+ struct smc_wr_rx_hdr *wr_tx;
int i;
for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
- if (wr_rx->type != wr_rx_hdr_type)
+ wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i];
+ if (wr_tx->type != wr_tx_hdr_type)
continue;
tx_pend = &link->wr_tx_pends[i].priv;
if (filter(tx_pend, data))
@@ -319,24 +325,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type,
}
}
-bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter, unsigned long data)
-{
- struct smc_wr_tx_pend_priv *tx_pend;
- struct smc_wr_rx_hdr *wr_rx;
- int i;
-
- for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) {
- wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[i];
- if (wr_rx->type != wr_rx_hdr_type)
- continue;
- tx_pend = &link->wr_tx_pends[i].priv;
- if (filter(tx_pend, data))
- return true;
- }
- return false;
-}
-
/****************************** receive queue ********************************/
int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler)
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 2acf12b06063..ef0c3494c9cb 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -93,8 +93,6 @@ int smc_wr_tx_put_slot(struct smc_link *link,
int smc_wr_tx_send(struct smc_link *link,
struct smc_wr_tx_pend_priv *wr_pend_priv);
void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
-bool smc_wr_tx_has_pending(struct smc_link *link, u8 wr_rx_hdr_type,
- smc_wr_tx_filter filter, unsigned long data);
void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
smc_wr_tx_filter filter,
smc_wr_tx_dismisser dismisser,
diff --git a/net/socket.c b/net/socket.c
index 1536515b6437..11cc2cd0f37b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -961,9 +961,28 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
* If this ioctl is unknown try to hand it down
* to the NIC driver.
*/
- if (err == -ENOIOCTLCMD)
- err = dev_ioctl(net, cmd, argp);
+ if (err != -ENOIOCTLCMD)
+ return err;
+ if (cmd == SIOCGIFCONF) {
+ struct ifconf ifc;
+ if (copy_from_user(&ifc, argp, sizeof(struct ifconf)))
+ return -EFAULT;
+ rtnl_lock();
+ err = dev_ifconf(net, &ifc, sizeof(struct ifreq));
+ rtnl_unlock();
+ if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
+ err = -EFAULT;
+ } else {
+ struct ifreq ifr;
+ bool need_copyout;
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ if (!err && need_copyout)
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
+ }
return err;
}
@@ -988,12 +1007,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
sock = file->private_data;
sk = sock->sk;
net = sock_net(sk);
- if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
- err = dev_ioctl(net, cmd, argp);
+ if (unlikely(cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))) {
+ struct ifreq ifr;
+ bool need_copyout;
+ if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
+ return -EFAULT;
+ err = dev_ioctl(net, cmd, &ifr, &need_copyout);
+ if (!err && need_copyout)
+ if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
+ return -EFAULT;
} else
#ifdef CONFIG_WEXT_CORE
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
- err = dev_ioctl(net, cmd, argp);
+ err = wext_handle_ioctl(net, cmd, argp);
} else
#endif
switch (cmd) {
@@ -2654,89 +2680,25 @@ static int do_siocgstampns(struct net *net, struct socket *sock,
return err;
}
-static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
-{
- struct ifreq __user *uifr;
- int err;
-
- uifr = compat_alloc_user_space(sizeof(struct ifreq));
- if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- err = dev_ioctl(net, SIOCGIFNAME, uifr);
- if (err)
- return err;
-
- if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- return 0;
-}
-
-static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
+static int compat_dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
{
struct compat_ifconf ifc32;
struct ifconf ifc;
- struct ifconf __user *uifc;
- struct compat_ifreq __user *ifr32;
- struct ifreq __user *ifr;
- unsigned int i, j;
int err;
if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
return -EFAULT;
- memset(&ifc, 0, sizeof(ifc));
- if (ifc32.ifcbuf == 0) {
- ifc32.ifc_len = 0;
- ifc.ifc_len = 0;
- ifc.ifc_req = NULL;
- uifc = compat_alloc_user_space(sizeof(struct ifconf));
- } else {
- size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
- sizeof(struct ifreq);
- uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
- ifc.ifc_len = len;
- ifr = ifc.ifc_req = (void __user *)(uifc + 1);
- ifr32 = compat_ptr(ifc32.ifcbuf);
- for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
- if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
- ifr++;
- ifr32++;
- }
- }
- if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
- return -EFAULT;
+ ifc.ifc_len = ifc32.ifc_len;
+ ifc.ifc_req = compat_ptr(ifc32.ifcbuf);
- err = dev_ioctl(net, SIOCGIFCONF, uifc);
+ rtnl_lock();
+ err = dev_ifconf(net, &ifc, sizeof(struct compat_ifreq));
+ rtnl_unlock();
if (err)
return err;
- if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
- return -EFAULT;
-
- ifr = ifc.ifc_req;
- ifr32 = compat_ptr(ifc32.ifcbuf);
- for (i = 0, j = 0;
- i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
- i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
- if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
- return -EFAULT;
- ifr32++;
- ifr++;
- }
-
- if (ifc32.ifcbuf == 0) {
- /* Translate from 64-bit structure multiple to
- * a 32-bit one.
- */
- i = ifc.ifc_len;
- i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
- ifc32.ifc_len = i;
- } else {
- ifc32.ifc_len = i;
- }
+ ifc32.ifc_len = ifc.ifc_len;
if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
return -EFAULT;
@@ -2747,9 +2709,9 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
{
struct compat_ethtool_rxnfc __user *compat_rxnfc;
bool convert_in = false, convert_out = false;
- size_t buf_size = ALIGN(sizeof(struct ifreq), 8);
- struct ethtool_rxnfc __user *rxnfc;
- struct ifreq __user *ifr;
+ size_t buf_size = 0;
+ struct ethtool_rxnfc __user *rxnfc = NULL;
+ struct ifreq ifr;
u32 rule_cnt = 0, actual_rule_cnt;
u32 ethcmd;
u32 data;
@@ -2786,18 +2748,14 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
case ETHTOOL_SRXCLSRLDEL:
buf_size += sizeof(struct ethtool_rxnfc);
convert_in = true;
+ rxnfc = compat_alloc_user_space(buf_size);
break;
}
- ifr = compat_alloc_user_space(buf_size);
- rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8);
-
- if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
+ if (copy_from_user(&ifr.ifr_name, &ifr32->ifr_name, IFNAMSIZ))
return -EFAULT;
- if (put_user(convert_in ? rxnfc : compat_ptr(data),
- &ifr->ifr_ifru.ifru_data))
- return -EFAULT;
+ ifr.ifr_data = convert_in ? rxnfc : (void __user *)compat_rxnfc;
if (convert_in) {
/* We expect there to be holes between fs.m_ext and
@@ -2825,7 +2783,7 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
return -EFAULT;
}
- ret = dev_ioctl(net, SIOCETHTOOL, ifr);
+ ret = dev_ioctl(net, SIOCETHTOOL, &ifr, NULL);
if (ret)
return ret;
@@ -2866,113 +2824,43 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
{
- void __user *uptr;
compat_uptr_t uptr32;
- struct ifreq __user *uifr;
+ struct ifreq ifr;
+ void __user *saved;
+ int err;
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
+ if (copy_from_user(&ifr, uifr32, sizeof(struct compat_ifreq)))
return -EFAULT;
if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
return -EFAULT;
- uptr = compat_ptr(uptr32);
-
- if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
- return -EFAULT;
-
- return dev_ioctl(net, SIOCWANDEV, uifr);
-}
-
-static int bond_ioctl(struct net *net, unsigned int cmd,
- struct compat_ifreq __user *ifr32)
-{
- struct ifreq kifr;
- mm_segment_t old_fs;
- int err;
-
- switch (cmd) {
- case SIOCBONDENSLAVE:
- case SIOCBONDRELEASE:
- case SIOCBONDSETHWADDR:
- case SIOCBONDCHANGEACTIVE:
- if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
- return -EFAULT;
-
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = dev_ioctl(net, cmd,
- (struct ifreq __user __force *) &kifr);
- set_fs(old_fs);
+ saved = ifr.ifr_settings.ifs_ifsu.raw_hdlc;
+ ifr.ifr_settings.ifs_ifsu.raw_hdlc = compat_ptr(uptr32);
- return err;
- default:
- return -ENOIOCTLCMD;
+ err = dev_ioctl(net, SIOCWANDEV, &ifr, NULL);
+ if (!err) {
+ ifr.ifr_settings.ifs_ifsu.raw_hdlc = saved;
+ if (copy_to_user(uifr32, &ifr, sizeof(struct compat_ifreq)))
+ err = -EFAULT;
}
+ return err;
}
/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */
static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
struct compat_ifreq __user *u_ifreq32)
{
- struct ifreq __user *u_ifreq64;
- char tmp_buf[IFNAMSIZ];
- void __user *data64;
+ struct ifreq ifreq;
u32 data32;
- if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
- IFNAMSIZ))
- return -EFAULT;
- if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
- return -EFAULT;
- data64 = compat_ptr(data32);
-
- u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
-
- if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
- IFNAMSIZ))
+ if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
return -EFAULT;
- if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
+ if (get_user(data32, &u_ifreq32->ifr_data))
return -EFAULT;
+ ifreq.ifr_data = compat_ptr(data32);
- return dev_ioctl(net, cmd, u_ifreq64);
-}
-
-static int dev_ifsioc(struct net *net, struct socket *sock,
- unsigned int cmd, struct compat_ifreq __user *uifr32)
-{
- struct ifreq __user *uifr;
- int err;
-
- uifr = compat_alloc_user_space(sizeof(*uifr));
- if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
- return -EFAULT;
-
- err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
-
- if (!err) {
- switch (cmd) {
- case SIOCGIFFLAGS:
- case SIOCGIFMETRIC:
- case SIOCGIFMTU:
- case SIOCGIFMEM:
- case SIOCGIFHWADDR:
- case SIOCGIFINDEX:
- case SIOCGIFADDR:
- case SIOCGIFBRDADDR:
- case SIOCGIFDSTADDR:
- case SIOCGIFNETMASK:
- case SIOCGIFPFLAGS:
- case SIOCGIFTXQLEN:
- case SIOCGMIIPHY:
- case SIOCGMIIREG:
- if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
- err = -EFAULT;
- break;
- }
- }
- return err;
+ return dev_ioctl(net, cmd, &ifreq, NULL);
}
static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
@@ -2980,7 +2868,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
{
struct ifreq ifr;
struct compat_ifmap __user *uifmap32;
- mm_segment_t old_fs;
int err;
uifmap32 = &uifr32->ifr_ifru.ifru_map;
@@ -2994,10 +2881,7 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
if (err)
return -EFAULT;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = dev_ioctl(net, cmd, (void __user __force *)&ifr);
- set_fs(old_fs);
+ err = dev_ioctl(net, cmd, &ifr, NULL);
if (cmd == SIOCGIFMAP && !err) {
err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
@@ -3130,10 +3014,8 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCSIFBR:
case SIOCGIFBR:
return old_bridge_ioctl(argp);
- case SIOCGIFNAME:
- return dev_ifname32(net, argp);
case SIOCGIFCONF:
- return dev_ifconf(net, argp);
+ return compat_dev_ifconf(net, argp);
case SIOCETHTOOL:
return ethtool_ioctl(net, argp);
case SIOCWANDEV:
@@ -3141,11 +3023,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGIFMAP:
case SIOCSIFMAP:
return compat_sioc_ifmap(net, cmd, argp);
- case SIOCBONDENSLAVE:
- case SIOCBONDRELEASE:
- case SIOCBONDSETHWADDR:
- case SIOCBONDCHANGEACTIVE:
- return bond_ioctl(net, cmd, argp);
case SIOCADDRT:
case SIOCDELRT:
return routing_ioctl(net, sock, cmd, argp);
@@ -3205,12 +3082,15 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
- return dev_ifsioc(net, sock, cmd, argp);
-
case SIOCSARP:
case SIOCGARP:
case SIOCDARP:
case SIOCATMARK:
+ case SIOCBONDENSLAVE:
+ case SIOCBONDRELEASE:
+ case SIOCBONDSETHWADDR:
+ case SIOCBONDCHANGEACTIVE:
+ case SIOCGIFNAME:
return sock_do_ioctl(net, sock, cmd, arg);
}
@@ -3365,19 +3245,6 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage_locked);
-int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
-{
- mm_segment_t oldfs = get_fs();
- int err;
-
- set_fs(KERNEL_DS);
- err = sock->ops->ioctl(sock, cmd, arg);
- set_fs(oldfs);
-
- return err;
-}
-EXPORT_SYMBOL(kernel_sock_ioctl);
-
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
{
return sock->ops->shutdown(sock, how);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 61f394d369bf..0a9b72fbd761 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -577,6 +577,8 @@ alloc_payload:
get_page(page);
sg = ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem;
sg_set_page(sg, page, copy, offset);
+ sg_unmark_end(sg);
+
ctx->sg_plaintext_num_elem++;
sk_mem_charge(sk, copy);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 6cdb054484d6..9efbfc753347 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -1035,18 +1035,23 @@ static int ioctl_standard_call(struct net_device * dev,
}
-int wext_handle_ioctl(struct net *net, struct iwreq *iwr, unsigned int cmd,
- void __user *arg)
+int wext_handle_ioctl(struct net *net, unsigned int cmd, void __user *arg)
{
struct iw_request_info info = { .cmd = cmd, .flags = 0 };
+ struct iwreq iwr;
int ret;
- ret = wext_ioctl_dispatch(net, iwr, cmd, &info,
+ if (copy_from_user(&iwr, arg, sizeof(iwr)))
+ return -EFAULT;
+
+ iwr.ifr_name[sizeof(iwr.ifr_name) - 1] = 0;
+
+ ret = wext_ioctl_dispatch(net, &iwr, cmd, &info,
ioctl_standard_call,
ioctl_private_call);
if (ret >= 0 &&
IW_IS_GET(cmd) &&
- copy_to_user(arg, iwr, sizeof(struct iwreq)))
+ copy_to_user(arg, &iwr, sizeof(struct iwreq)))
return -EFAULT;
return ret;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 75982506617b..8e70291e586a 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -147,8 +147,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
if (!x->type_offload)
return -EINVAL;
- /* We don't yet support UDP encapsulation, TFC padding and ESN. */
- if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN))
+ /* We don't yet support UDP encapsulation and TFC padding. */
+ if (x->encap || x->tfcpad)
return -EINVAL;
dev = dev_get_by_index(net, xuo->ifindex);
@@ -178,12 +178,20 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
return 0;
}
+ if (x->props.flags & XFRM_STATE_ESN &&
+ !dev->xfrmdev_ops->xdo_dev_state_advance_esn) {
+ xso->dev = NULL;
+ dev_put(dev);
+ return -EINVAL;
+ }
+
xso->dev = dev;
xso->num_exthdrs = 1;
xso->flags = xuo->flags;
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
+ xso->dev = NULL;
dev_put(dev);
return err;
}
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 02501817227b..1d38c6acf8af 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -551,6 +551,8 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
bitnr = replay_esn->replay_window - (diff - pos);
}
+ xfrm_dev_state_advance_esn(x);
+
nr = bitnr >> 5;
bitnr = bitnr & 0x1F;
replay_esn->bmp[nr] |= (1U << bitnr);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 20b1e414dbee..54e21f19d722 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -317,7 +317,7 @@ retry:
if (!type && try_load) {
request_module("xfrm-offload-%d-%d", family, proto);
- try_load = 0;
+ try_load = false;
goto retry;
}
@@ -2279,8 +2279,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
goto error;
}
- x->km.state = XFRM_STATE_VALID;
-
error:
return err;
}
@@ -2289,7 +2287,13 @@ EXPORT_SYMBOL(__xfrm_init_state);
int xfrm_init_state(struct xfrm_state *x)
{
- return __xfrm_init_state(x, true, false);
+ int err;
+
+ err = __xfrm_init_state(x, true, false);
+ if (!err)
+ x->km.state = XFRM_STATE_VALID;
+
+ return err;
}
EXPORT_SYMBOL(xfrm_init_state);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index bdb48e5dba04..7f52b8eb177d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -598,13 +598,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
goto error;
}
- if (attrs[XFRMA_OFFLOAD_DEV]) {
- err = xfrm_dev_state_add(net, x,
- nla_data(attrs[XFRMA_OFFLOAD_DEV]));
- if (err)
- goto error;
- }
-
if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
attrs[XFRMA_REPLAY_ESN_VAL])))
goto error;
@@ -620,6 +613,14 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
/* override default values from above */
xfrm_update_ae_params(x, attrs, 0);
+ /* configure the hardware if offload is requested */
+ if (attrs[XFRMA_OFFLOAD_DEV]) {
+ err = xfrm_dev_state_add(net, x,
+ nla_data(attrs[XFRMA_OFFLOAD_DEV]));
+ if (err)
+ goto error;
+ }
+
return x;
error:
@@ -662,6 +663,9 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
}
+ if (x->km.state == XFRM_STATE_VOID)
+ x->km.state = XFRM_STATE_VALID;
+
c.seq = nlh->nlmsg_seq;
c.portid = nlh->nlmsg_pid;
c.event = nlh->nlmsg_type;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 7f61a3d57fa7..64335bb94f9f 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -201,13 +201,16 @@ CLANG_ARCH_ARGS = -target $(ARCH)
endif
# Trick to allow make to be run from this directory
-all:
+all: $(LIBBPF)
$(MAKE) -C ../../ $(CURDIR)/
clean:
$(MAKE) -C ../../ M=$(CURDIR) clean
@rm -f *~
+$(LIBBPF): FORCE
+ $(MAKE) -C $(dir $@) $(notdir $@)
+
$(obj)/syscall_nrs.s: $(src)/syscall_nrs.c
$(call if_changed_dep,cc_s_c)
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 7cc9d228216f..7c25c0c112bc 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -23,8 +23,11 @@
#include <stdbool.h>
#include <signal.h>
#include <fcntl.h>
+#include <sys/wait.h>
+#include <time.h>
#include <sys/time.h>
+#include <sys/resource.h>
#include <sys/types.h>
#include <linux/netlink.h>
@@ -35,6 +38,8 @@
#include <assert.h>
#include <libgen.h>
+#include <getopt.h>
+
#include "../bpf/bpf_load.h"
#include "../bpf/bpf_util.h"
#include "../bpf/libbpf.h"
@@ -46,15 +51,42 @@ void running_handler(int a);
#define S1_PORT 10000
#define S2_PORT 10001
-static int sockmap_test_sockets(int rate, int dot)
+/* global sockets */
+int s1, s2, c1, c2, p1, p2;
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"cgroup", required_argument, NULL, 'c' },
+ {"rate", required_argument, NULL, 'r' },
+ {"verbose", no_argument, NULL, 'v' },
+ {"iov_count", required_argument, NULL, 'i' },
+ {"length", required_argument, NULL, 'l' },
+ {"test", required_argument, NULL, 't' },
+ {0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
+ printf(" options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value:%d)\n",
+ *long_options[i].flag);
+ else
+ printf(" -%c\n", long_options[i].val);
+ }
+ printf("\n");
+}
+
+static int sockmap_init_sockets(void)
{
- int i, sc, err, max_fd, one = 1;
- int s1, s2, c1, c2, p1, p2;
+ int i, err, one = 1;
struct sockaddr_in addr;
- struct timeval timeout;
- char buf[1024] = {0};
int *fds[4] = {&s1, &s2, &c1, &c2};
- fd_set w;
s1 = s2 = p1 = p2 = c1 = c2 = 0;
@@ -63,8 +95,7 @@ static int sockmap_test_sockets(int rate, int dot)
*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
if (*fds[i] < 0) {
perror("socket s1 failed()");
- err = *fds[i];
- goto out;
+ return errno;
}
}
@@ -74,16 +105,16 @@ static int sockmap_test_sockets(int rate, int dot)
(char *)&one, sizeof(one));
if (err) {
perror("setsockopt failed()");
- goto out;
+ return errno;
}
}
/* Non-blocking sockets */
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < 2; i++) {
err = ioctl(*fds[i], FIONBIO, (char *)&one);
if (err < 0) {
perror("ioctl s1 failed()");
- goto out;
+ return errno;
}
}
@@ -96,14 +127,14 @@ static int sockmap_test_sockets(int rate, int dot)
err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
perror("bind s1 failed()\n");
- goto out;
+ return errno;
}
addr.sin_port = htons(S2_PORT);
err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
perror("bind s2 failed()\n");
- goto out;
+ return errno;
}
/* Listen server sockets */
@@ -111,14 +142,14 @@ static int sockmap_test_sockets(int rate, int dot)
err = listen(s1, 32);
if (err < 0) {
perror("listen s1 failed()\n");
- goto out;
+ return errno;
}
addr.sin_port = htons(S2_PORT);
err = listen(s2, 32);
if (err < 0) {
perror("listen s1 failed()\n");
- goto out;
+ return errno;
}
/* Initiate Connect */
@@ -126,46 +157,232 @@ static int sockmap_test_sockets(int rate, int dot)
err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
perror("connect c1 failed()\n");
- goto out;
+ return errno;
}
addr.sin_port = htons(S2_PORT);
err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
perror("connect c2 failed()\n");
- goto out;
+ return errno;
+ } else if (err < 0) {
+ err = 0;
}
/* Accept Connecrtions */
p1 = accept(s1, NULL, NULL);
if (p1 < 0) {
perror("accept s1 failed()\n");
- goto out;
+ return errno;
}
p2 = accept(s2, NULL, NULL);
if (p2 < 0) {
perror("accept s1 failed()\n");
- goto out;
+ return errno;
}
- max_fd = p2;
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
-
printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
c1, s1, c2, s2);
+ return 0;
+}
+
+struct msg_stats {
+ size_t bytes_sent;
+ size_t bytes_recvd;
+ struct timespec start;
+ struct timespec end;
+};
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+ struct msg_stats *s, bool tx)
+{
+ struct msghdr msg = {0};
+ int err, i, flags = MSG_NOSIGNAL;
+ struct iovec *iov;
+
+ iov = calloc(iov_count, sizeof(struct iovec));
+ if (!iov)
+ return errno;
+
+ for (i = 0; i < iov_count; i++) {
+ char *d = calloc(iov_length, sizeof(char));
+
+ if (!d) {
+ fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
+ goto out_errno;
+ }
+ iov[i].iov_base = d;
+ iov[i].iov_len = iov_length;
+ }
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_count;
+
+ if (tx) {
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendmsg(fd, &msg, flags);
+
+ if (sent < 0) {
+ perror("send loop error:");
+ goto out_errno;
+ }
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ } else {
+ int slct, recv, max_fd = fd;
+ struct timeval timeout;
+ float total_bytes;
+ fd_set w;
+
+ total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+ err = clock_gettime(CLOCK_MONOTONIC, &s->start);
+ if (err < 0)
+ perror("recv start time: ");
+ while (s->bytes_recvd < total_bytes) {
+ timeout.tv_sec = 1;
+ timeout.tv_usec = 0;
+
+ /* FD sets */
+ FD_ZERO(&w);
+ FD_SET(fd, &w);
+
+ slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
+ if (slct == -1) {
+ perror("select()");
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ } else if (!slct) {
+ fprintf(stderr, "unexpected timeout\n");
+ errno = -EIO;
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ }
+
+ recv = recvmsg(fd, &msg, flags);
+ if (recv < 0) {
+ if (errno != EWOULDBLOCK) {
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ perror("recv failed()\n");
+ goto out_errno;
+ }
+ }
+
+ s->bytes_recvd += recv;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ }
+
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return 0;
+out_errno:
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return errno;
+}
+
+static float giga = 1000000000;
+
+static inline float sentBps(struct msg_stats s)
+{
+ return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static inline float recvdBps(struct msg_stats s)
+{
+ return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static int sendmsg_test(int iov_count, int iov_buf, int cnt,
+ int verbose, bool base)
+{
+ float sent_Bps = 0, recvd_Bps = 0;
+ int rx_fd, txpid, rxpid, err = 0;
+ struct msg_stats s = {0};
+ int status;
+
+ errno = 0;
+
+ if (base)
+ rx_fd = p1;
+ else
+ rx_fd = p2;
+
+ rxpid = fork();
+ if (rxpid == 0) {
+ err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false);
+ if (err)
+ fprintf(stderr,
+ "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(p2, SHUT_RDWR);
+ shutdown(p1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ fprintf(stdout,
+ "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ exit(1);
+ } else if (rxpid == -1) {
+ perror("msg_loop_rx: ");
+ return errno;
+ }
+
+ txpid = fork();
+ if (txpid == 0) {
+ err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
+ if (err)
+ fprintf(stderr,
+ "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(c1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ fprintf(stdout,
+ "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ exit(1);
+ } else if (txpid == -1) {
+ perror("msg_loop_tx: ");
+ return errno;
+ }
+
+ assert(waitpid(rxpid, &status, 0) == rxpid);
+ assert(waitpid(txpid, &status, 0) == txpid);
+ return err;
+}
+
+static int forever_ping_pong(int rate, int verbose)
+{
+ struct timeval timeout;
+ char buf[1024] = {0};
+ int sc;
+
+ timeout.tv_sec = 10;
+ timeout.tv_usec = 0;
/* Ping/Pong data from client to server */
sc = send(c1, buf, sizeof(buf), 0);
if (sc < 0) {
perror("send failed()\n");
- goto out;
+ return sc;
}
do {
- int s, rc, i;
+ int s, rc, i, max_fd = p2;
+ fd_set w;
/* FD sets */
FD_ZERO(&w);
@@ -193,7 +410,7 @@ static int sockmap_test_sockets(int rate, int dot)
if (rc < 0) {
if (errno != EWOULDBLOCK) {
perror("recv failed()\n");
- break;
+ return rc;
}
}
@@ -205,35 +422,92 @@ static int sockmap_test_sockets(int rate, int dot)
sc = send(i, buf, rc, 0);
if (sc < 0) {
perror("send failed()\n");
- break;
+ return sc;
}
}
- sleep(rate);
- if (dot) {
+
+ if (rate)
+ sleep(rate);
+
+ if (verbose) {
printf(".");
fflush(stdout);
}
} while (running);
-out:
- close(s1);
- close(s2);
- close(p1);
- close(p2);
- close(c1);
- close(c2);
- return err;
+ return 0;
}
+enum {
+ PING_PONG,
+ SENDMSG,
+ BASE,
+};
+
int main(int argc, char **argv)
{
- int rate = 1, dot = 1;
+ int iov_count = 1, length = 1024, rate = 1, verbose = 0;
+ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ int opt, longindex, err, cg_fd = 0;
+ int test = PING_PONG;
char filename[256];
- int err, cg_fd;
- char *cg_path;
- cg_path = argv[argc - 1];
+ while ((opt = getopt_long(argc, argv, "hvc:r:i:l:t:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ /* Cgroup configuration */
+ case 'c':
+ cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+ break;
+ case 'r':
+ rate = atoi(optarg);
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'i':
+ iov_count = atoi(optarg);
+ break;
+ case 'l':
+ length = atoi(optarg);
+ break;
+ case 't':
+ if (strcmp(optarg, "ping") == 0) {
+ test = PING_PONG;
+ } else if (strcmp(optarg, "sendmsg") == 0) {
+ test = SENDMSG;
+ } else if (strcmp(optarg, "base") == 0) {
+ test = BASE;
+ } else {
+ usage(argv);
+ return -1;
+ }
+ break;
+ case 'h':
+ default:
+ usage(argv);
+ return -1;
+ }
+ }
+
+ if (!cg_fd) {
+ fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+ argv[0]);
+ return -1;
+ }
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
running = 1;
@@ -241,20 +515,16 @@ int main(int argc, char **argv)
/* catch SIGINT */
signal(SIGINT, running_handler);
+ /* If base test skip BPF setup */
+ if (test == BASE)
+ goto run;
+
if (load_bpf_file(filename)) {
fprintf(stderr, "load_bpf_file: (%s) %s\n",
filename, strerror(errno));
return 1;
}
- /* Cgroup configuration */
- cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
- if (cg_fd < 0) {
- fprintf(stderr, "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, cg_path);
- return cg_fd;
- }
-
/* Attach programs to sockmap */
err = bpf_prog_attach(prog_fd[0], map_fd[0],
BPF_SK_SKB_STREAM_PARSER, 0);
@@ -280,12 +550,30 @@ int main(int argc, char **argv)
return err;
}
- err = sockmap_test_sockets(rate, dot);
+run:
+ err = sockmap_init_sockets();
if (err) {
fprintf(stderr, "ERROR: test socket failed: %d\n", err);
- return err;
+ goto out;
}
- return 0;
+
+ if (test == PING_PONG)
+ err = forever_ping_pong(rate, verbose);
+ else if (test == SENDMSG)
+ err = sendmsg_test(iov_count, length, rate, verbose, false);
+ else if (test == BASE)
+ err = sendmsg_test(iov_count, length, rate, verbose, true);
+ else
+ fprintf(stderr, "unknown test\n");
+out:
+ close(s1);
+ close(s2);
+ close(p1);
+ close(p2);
+ close(c1);
+ close(c2);
+ close(cg_fd);
+ return err;
}
void running_handler(int a)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index af1f49ad8b88..db6bdc375126 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -17,7 +17,7 @@
#define BPF_ALU64 0x07 /* alu mode in double word width */
/* ld/ldx fields */
-#define BPF_DW 0x18 /* double word */
+#define BPF_DW 0x18 /* double word (64-bit) */
#define BPF_XADD 0xc0 /* exclusive add */
/* alu/jmp fields */
@@ -642,6 +642,14 @@ union bpf_attr {
* @optlen: length of optval in bytes
* Return: 0 or negative error
*
+ * int bpf_sock_ops_cb_flags_set(bpf_sock_ops, flags)
+ * Set callback flags for sock_ops
+ * @bpf_sock_ops: pointer to bpf_sock_ops_kern struct
+ * @flags: flags value
+ * Return: 0 for no error
+ * -EINVAL if there is no full tcp socket
+ * bits in flags that are not supported by current kernel
+ *
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
* Grow or shrink room in sk_buff.
* @skb: pointer to skb
@@ -748,7 +756,8 @@ union bpf_attr {
FN(perf_event_read_value), \
FN(perf_prog_read_value), \
FN(getsockopt), \
- FN(override_return),
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -952,8 +961,9 @@ struct bpf_map_info {
struct bpf_sock_ops {
__u32 op;
union {
- __u32 reply;
- __u32 replylong[4];
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
};
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
@@ -968,8 +978,39 @@ struct bpf_sock_ops {
*/
__u32 snd_cwnd;
__u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
};
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
*/
@@ -1003,6 +1044,43 @@ enum {
* a congestion threshold. RTTs above
* this indicate congestion
*/
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
};
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3a44b655d852..bf05bc5e36e5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -11,16 +11,16 @@ ifneq ($(wildcard $(GENHDR)),)
endif
CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
-LDLIBS += -lcap -lelf -lrt
+LDLIBS += -lcap -lelf -lrt -lpthread
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
- test_align test_verifier_log test_dev_cgroup
+ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
- sample_map_ret0.o
+ sample_map_ret0.o test_tcpbpf_kern.o
TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
test_offload.py
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index 33cb00e46c49..dde2c11d7771 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -71,6 +71,8 @@ static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) =
(void *) BPF_FUNC_getsockopt;
+static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
+ (void *) BPF_FUNC_sock_ops_cb_flags_set;
static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value,
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
new file mode 100755
index 000000000000..481dccdf140c
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python2
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+ buf = ''
+ while len(buf) < n:
+ rem = n - len(buf)
+ try: s = sock.recv(rem)
+ except (socket.error), e: return ''
+ buf += s
+ return buf
+
+def send(sock, s):
+ total = len(s)
+ count = 0
+ while count < total:
+ try: n = sock.send(s)
+ except (socket.error), e: n = 0
+ if n == 0:
+ return count;
+ count += n
+ return count
+
+
+serverPort = int(sys.argv[1])
+HostName = socket.gethostname()
+
+# create active socket
+sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+try:
+ sock.connect((HostName, serverPort))
+except socket.error as e:
+ sys.exit(1)
+
+buf = ''
+n = 0
+while n < 1000:
+ buf += '+'
+ n += 1
+
+sock.settimeout(1);
+n = send(sock, buf)
+n = read(sock, 500)
+sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
new file mode 100755
index 000000000000..bc454d7d0be2
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python2
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+ buf = ''
+ while len(buf) < n:
+ rem = n - len(buf)
+ try: s = sock.recv(rem)
+ except (socket.error), e: return ''
+ buf += s
+ return buf
+
+def send(sock, s):
+ total = len(s)
+ count = 0
+ while count < total:
+ try: n = sock.send(s)
+ except (socket.error), e: n = 0
+ if n == 0:
+ return count;
+ count += n
+ return count
+
+
+SERVER_PORT = 12877
+MAX_PORTS = 2
+
+serverPort = SERVER_PORT
+serverSocket = None
+
+HostName = socket.gethostname()
+
+# create passive socket
+serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+host = socket.gethostname()
+
+try: serverSocket.bind((host, 0))
+except socket.error as msg:
+ print 'bind fails: ', msg
+
+sn = serverSocket.getsockname()
+serverPort = sn[1]
+
+cmdStr = ("./tcp_client.py %d &") % (serverPort)
+os.system(cmdStr)
+
+buf = ''
+n = 0
+while n < 500:
+ buf += '.'
+ n += 1
+
+serverSocket.listen(MAX_PORTS)
+readList = [serverSocket]
+
+while True:
+ readyRead, readyWrite, inError = \
+ select.select(readList, [], [], 2)
+
+ if len(readyRead) > 0:
+ waitCount = 0
+ for sock in readyRead:
+ if sock == serverSocket:
+ (clientSocket, address) = serverSocket.accept()
+ address = str(address[0])
+ readList.append(clientSocket)
+ else:
+ sock.settimeout(1);
+ s = read(sock, 1000)
+ n = send(sock, buf)
+ sock.close()
+ serverSocket.close()
+ sys.exit(0)
+ else:
+ print 'Select timeout!'
+ sys.exit(1)
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index e19b410125eb..ff8bd7e3e50c 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -446,11 +446,9 @@ static struct bpf_align_test tests[] = {
.insns = {
PREP_PKT_POINTERS,
BPF_MOV64_IMM(BPF_REG_0, 0),
- /* ptr & const => unknown & const */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40),
- /* ptr << const => unknown << const */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ /* (ptr - ptr) << 2 */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
/* We have a (4n) value. Let's make a packet offset
* out of it. First add 14, to make it a (4n+2)
@@ -473,8 +471,26 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
- /* R5 bitwise operator &= on pointer prohibited */
+ {4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+ /* (ptr - ptr) << 2 == unknown, (4n) */
+ {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+ /* (4n) + 14 == (4n+2). We blow our bounds, because
+ * the add could overflow.
+ */
+ {7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+ /* Checked s>=0 */
+ {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ /* packet pointer + nonnegative (4n+2) */
+ {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
+ * We checked the bounds, but it might have been able
+ * to overflow if the packet pointer started in the
+ * upper half of the address space.
+ * So we did not get a 'range' on R6, and the access
+ * attempt will fail.
+ */
+ {15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
}
},
{
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index c1535b34f14f..3489cc283433 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -21,7 +21,7 @@
#define DEV_CGROUP_PROG "./dev_cgroup.o"
-#define TEST_CGROUP "test-bpf-based-device-cgroup/"
+#define TEST_CGROUP "/test-bpf-based-device-cgroup/"
int main(int argc, char **argv)
{
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 081510853c6d..2be87e9ee28d 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -14,6 +14,7 @@
#include <errno.h>
#include <inttypes.h>
#include <linux/bpf.h>
+#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -641,6 +642,98 @@ static void test_lpm_get_next_key(void)
close(map_fd);
}
+#define MAX_TEST_KEYS 4
+struct lpm_mt_test_info {
+ int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */
+ int iter;
+ int map_fd;
+ struct {
+ __u32 prefixlen;
+ __u32 data;
+ } key[MAX_TEST_KEYS];
+};
+
+static void *lpm_test_command(void *arg)
+{
+ int i, j, ret, iter, key_size;
+ struct lpm_mt_test_info *info = arg;
+ struct bpf_lpm_trie_key *key_p;
+
+ key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
+ key_p = alloca(key_size);
+ for (iter = 0; iter < info->iter; iter++)
+ for (i = 0; i < MAX_TEST_KEYS; i++) {
+ /* first half of iterations in forward order,
+ * and second half in backward order.
+ */
+ j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1;
+ key_p->prefixlen = info->key[j].prefixlen;
+ memcpy(key_p->data, &info->key[j].data, sizeof(__u32));
+ if (info->cmd == 0) {
+ __u32 value = j;
+ /* update must succeed */
+ assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0);
+ } else if (info->cmd == 1) {
+ ret = bpf_map_delete_elem(info->map_fd, key_p);
+ assert(ret == 0 || errno == ENOENT);
+ } else if (info->cmd == 2) {
+ __u32 value;
+ ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
+ assert(ret == 0 || errno == ENOENT);
+ } else {
+ struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
+ ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
+ assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
+ }
+ }
+
+ // Pass successful exit info back to the main thread
+ pthread_exit((void *)info);
+}
+
+static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
+{
+ info->iter = 2000;
+ info->map_fd = map_fd;
+ info->key[0].prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", &info->key[0].data);
+ info->key[1].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", &info->key[1].data);
+ info->key[2].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", &info->key[2].data);
+ info->key[3].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", &info->key[3].data);
+}
+
+static void test_lpm_multi_thread(void)
+{
+ struct lpm_mt_test_info info[4];
+ size_t key_size, value_size;
+ pthread_t thread_id[4];
+ int i, map_fd;
+ void *ret;
+
+ /* create a trie */
+ value_size = sizeof(__u32);
+ key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
+ map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
+ 100, BPF_F_NO_PREALLOC);
+
+ /* create 4 threads to test update, delete, lookup and get_next_key */
+ setup_lpm_mt_test_info(&info[0], map_fd);
+ for (i = 0; i < 4; i++) {
+ if (i != 0)
+ memcpy(&info[i], &info[0], sizeof(info[i]));
+ info[i].cmd = i;
+ assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0);
+ }
+
+ for (i = 0; i < 4; i++)
+ assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]);
+
+ close(map_fd);
+}
+
int main(void)
{
struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
@@ -667,6 +760,8 @@ int main(void)
test_lpm_get_next_key();
+ test_lpm_multi_thread();
+
printf("test_lpm: OK\n");
return 0;
}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 040356ecc862..436c4c72414f 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -242,7 +242,7 @@ static void test_hashmap_percpu(int task, void *data)
static void test_hashmap_walk(int task, void *data)
{
- int fd, i, max_entries = 100000;
+ int fd, i, max_entries = 1000;
long long key, value, next_key;
bool next_key_valid = true;
@@ -463,7 +463,7 @@ static void test_devmap(int task, void *data)
#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
static void test_sockmap(int tasks, void *data)
{
- int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
+ int one = 1, map_fd_rx = 0, map_fd_tx = 0, map_fd_break, s, sc, rc;
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, udp, sfd[6] = {0xdeadbeef};
@@ -868,9 +868,12 @@ static void test_sockmap(int tasks, void *data)
goto out_sockmap;
}
- /* Test map close sockets */
- for (i = 0; i < 6; i++)
+ /* Test map close sockets and empty maps */
+ for (i = 0; i < 6; i++) {
+ bpf_map_delete_elem(map_fd_tx, &i);
+ bpf_map_delete_elem(map_fd_rx, &i);
close(sfd[i]);
+ }
close(fd);
close(map_fd_rx);
bpf_object__close(obj);
@@ -881,8 +884,13 @@ out:
printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno));
exit(1);
out_sockmap:
- for (i = 0; i < 6; i++)
+ for (i = 0; i < 6; i++) {
+ if (map_fd_tx)
+ bpf_map_delete_elem(map_fd_tx, &i);
+ if (map_fd_rx)
+ bpf_map_delete_elem(map_fd_rx, &i);
close(sfd[i]);
+ }
close(fd);
exit(1);
}
@@ -931,8 +939,12 @@ static void test_map_large(void)
close(fd);
}
-static void run_parallel(int tasks, void (*fn)(int task, void *data),
- void *data)
+#define run_parallel(N, FN, DATA) \
+ printf("Fork %d tasks to '" #FN "'\n", N); \
+ __run_parallel(N, FN, DATA)
+
+static void __run_parallel(int tasks, void (*fn)(int task, void *data),
+ void *data)
{
pid_t pid[tasks];
int i;
@@ -972,7 +984,7 @@ static void test_map_stress(void)
#define DO_UPDATE 1
#define DO_DELETE 0
-static void do_work(int fn, void *data)
+static void test_update_delete(int fn, void *data)
{
int do_update = ((int *)data)[1];
int fd = ((int *)data)[0];
@@ -1012,7 +1024,7 @@ static void test_map_parallel(void)
*/
data[0] = fd;
data[1] = DO_UPDATE;
- run_parallel(TASKS, do_work, data);
+ run_parallel(TASKS, test_update_delete, data);
/* Check that key=0 is already there. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
@@ -1035,7 +1047,7 @@ static void test_map_parallel(void)
/* Now let's delete all elemenets in parallel. */
data[1] = DO_DELETE;
- run_parallel(TASKS, do_work, data);
+ run_parallel(TASKS, test_update_delete, data);
/* Nothing should be left. */
key = -1;
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 833b9c1ec450..e78aad0a68bb 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -26,6 +26,7 @@ import time
logfile = None
log_level = 1
+skip_extack = False
bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
pp = pprint.PrettyPrinter()
devs = [] # devices we created for clean up
@@ -132,7 +133,7 @@ def rm(f):
if f in files:
files.remove(f)
-def tool(name, args, flags, JSON=True, ns="", fail=True):
+def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
params = ""
if JSON:
params += "%s " % (flags["json"])
@@ -140,9 +141,20 @@ def tool(name, args, flags, JSON=True, ns="", fail=True):
if ns != "":
ns = "ip netns exec %s " % (ns)
- ret, out = cmd(ns + name + " " + params + args, fail=fail)
- if JSON and len(out.strip()) != 0:
- return ret, json.loads(out)
+ if include_stderr:
+ ret, stdout, stderr = cmd(ns + name + " " + params + args,
+ fail=fail, include_stderr=True)
+ else:
+ ret, stdout = cmd(ns + name + " " + params + args,
+ fail=fail, include_stderr=False)
+
+ if JSON and len(stdout.strip()) != 0:
+ out = json.loads(stdout)
+ else:
+ out = stdout
+
+ if include_stderr:
+ return ret, out, stderr
else:
return ret, out
@@ -181,13 +193,15 @@ def bpftool_map_list_wait(expected=0, n_retry=20):
time.sleep(0.05)
raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
-def ip(args, force=False, JSON=True, ns="", fail=True):
+def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False):
if force:
args = "-force " + args
- return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, fail=fail)
+ return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
-def tc(args, JSON=True, ns="", fail=True):
- return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail)
+def tc(args, JSON=True, ns="", fail=True, include_stderr=False):
+ return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
def ethtool(dev, opt, args, fail=True):
return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
@@ -348,13 +362,19 @@ class NetdevSim:
return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
fail=fail)
- def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True):
+ def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False,
+ fail=True, include_stderr=False):
+ if verbose:
+ bpf += " verbose"
return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
- force=force, JSON=JSON, fail=fail)
+ force=force, JSON=JSON,
+ fail=fail, include_stderr=include_stderr)
- def unset_xdp(self, mode, force=False, JSON=True, fail=True):
+ def unset_xdp(self, mode, force=False, JSON=True,
+ fail=True, include_stderr=False):
return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
- force=force, JSON=JSON, fail=fail)
+ force=force, JSON=JSON,
+ fail=fail, include_stderr=include_stderr)
def ip_link_show(self, xdp):
_, link = ip("link show dev %s" % (self['ifname']))
@@ -409,17 +429,41 @@ class NetdevSim:
(len(filters), expected))
return filters
- def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False,
- fail=True):
+ def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None,
+ chain=None, cls="", params="",
+ fail=True, include_stderr=False):
+ spec = ""
+ if prio is not None:
+ spec += " prio %d" % (prio)
+ if handle:
+ spec += " handle %s" % (handle)
+ if chain is not None:
+ spec += " chain %d" % (chain)
+
+ return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\
+ .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec,
+ cls=cls, params=params),
+ fail=fail, include_stderr=include_stderr)
+
+ def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None,
+ chain=None, da=False, verbose=False,
+ skip_sw=False, skip_hw=False,
+ fail=True, include_stderr=False):
+ cls = "bpf " + bpf
+
params = ""
if da:
params += " da"
+ if verbose:
+ params += " verbose"
if skip_sw:
params += " skip_sw"
if skip_hw:
params += " skip_hw"
- return tc("filter add dev %s ingress bpf %s %s" %
- (self['ifname'], bpf, params), fail=fail)
+
+ return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls,
+ chain=chain, params=params,
+ fail=fail, include_stderr=include_stderr)
def set_ethtool_tc_offloads(self, enable, fail=True):
args = "hw-tc-offload %s" % ("on" if enable else "off")
@@ -491,6 +535,39 @@ def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
fail("dev" not in m.keys(), "Device parameters not reported")
fail(dev != m["dev"], "Map's device different than program's")
+def check_extack(output, reference, args):
+ if skip_extack:
+ return
+ lines = output.split("\n")
+ comp = len(lines) >= 2 and lines[1] == reference
+ fail(not comp, "Missing or incorrect netlink extack message")
+
+def check_extack_nsim(output, reference, args):
+ check_extack(output, "Error: netdevsim: " + reference, args)
+
+def check_no_extack(res, needle):
+ fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"),
+ "Found '%s' in command output, leaky extack?" % (needle))
+
+def check_verifier_log(output, reference):
+ lines = output.split("\n")
+ for l in reversed(lines):
+ if l == reference:
+ return
+ fail(True, "Missing or incorrect message from netdevsim in verifier log")
+
+def test_spurios_extack(sim, obj, skip_hw, needle):
+ res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw,
+ include_stderr=True)
+ check_no_extack(res, needle)
+ res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+ skip_hw=skip_hw, include_stderr=True)
+ check_no_extack(res, needle)
+ res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf",
+ include_stderr=True)
+ check_no_extack(res, needle)
+
+
# Parse command line
parser = argparse.ArgumentParser()
parser.add_argument("--log", help="output verbose log to given file")
@@ -527,6 +604,14 @@ for s in samples:
skip(ret != 0, "sample %s/%s not found, please compile it" %
(bpf_test_dir, s))
+# Check if iproute2 is built with libmnl (needed by extack support)
+_, _, err = cmd("tc qdisc delete dev lo handle 0",
+ fail=False, include_stderr=True)
+if err.find("Error: Failed to find qdisc with specified handle.") == -1:
+ print("Warning: no extack message in iproute2 output, libmnl missing?")
+ log("Warning: no extack message in iproute2 output, libmnl missing?", "")
+ skip_extack = True
+
# Check if net namespaces seem to work
ns = mknetns()
skip(ns is None, "Could not create a net namespace")
@@ -558,8 +643,10 @@ try:
sim.tc_flush_filters()
start_test("Test TC offloads are off by default...")
- ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+ ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "TC filter loaded without enabling TC offloads")
+ check_extack(err, "Error: TC offload is disabled on net device.", args)
sim.wait_for_flush()
sim.set_ethtool_tc_offloads(True)
@@ -587,13 +674,63 @@ try:
sim.dfs["bpf_tc_non_bound_accept"] = "N"
start_test("Test TC cBPF unbound bytecode doesn't offload...")
- ret, _ = sim.cls_bpf_add_filter(bytecode, skip_sw=True, fail=False)
+ ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "TC bytecode loaded for offload")
+ check_extack_nsim(err, "netdevsim configured to reject unbound programs.",
+ args)
sim.wait_for_flush()
+ start_test("Test non-0 chain offload...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1,
+ skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Offloaded a filter to chain other than 0")
+ check_extack(err, "Error: Driver supports only offload of chain 0.", args)
+ sim.tc_flush_filters()
+
+ start_test("Test TC replace...")
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ start_test("Test TC replace bad flags...")
+ for i in range(3):
+ for j in range(3):
+ ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+ skip_sw=(j == 1), skip_hw=(j == 2),
+ fail=False)
+ fail(bool(ret) != bool(j),
+ "Software TC incorrect load in replace test, iteration %d" %
+ (j))
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ start_test("Test spurious extack from the driver...")
+ test_spurios_extack(sim, obj, False, "netdevsim")
+ test_spurios_extack(sim, obj, True, "netdevsim")
+
+ sim.set_ethtool_tc_offloads(False)
+
+ test_spurios_extack(sim, obj, False, "TC offload is disabled")
+ test_spurios_extack(sim, obj, True, "TC offload is disabled")
+
+ sim.set_ethtool_tc_offloads(True)
+
+ sim.tc_flush_filters()
+
start_test("Test TC offloads work...")
- ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+ ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+ fail=False, include_stderr=True)
fail(ret != 0, "TC filter did not load with TC offloads enabled")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
start_test("Test TC offload basics...")
dfs = sim.dfs_get_bound_progs(expected=1)
@@ -669,16 +806,24 @@ try:
"Device parameters reported for non-offloaded program")
start_test("Test XDP prog replace with bad flags...")
- ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False)
+ ret, _, err = sim.set_xdp(obj, "offload", force=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Replaced XDP program with a program in different mode")
- ret, _ = sim.set_xdp(obj, "", force=True, fail=False)
+ check_extack_nsim(err, "program loaded with different flags.", args)
+ ret, _, err = sim.set_xdp(obj, "", force=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Replaced XDP program with a program in different mode")
+ check_extack_nsim(err, "program loaded with different flags.", args)
start_test("Test XDP prog remove with bad flags...")
- ret, _ = sim.unset_xdp("offload", force=True, fail=False)
+ ret, _, err = sim.unset_xdp("offload", force=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Removed program with a bad mode mode")
- ret, _ = sim.unset_xdp("", force=True, fail=False)
+ check_extack_nsim(err, "program loaded with different flags.", args)
+ ret, _, err = sim.unset_xdp("", force=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Removed program with a bad mode mode")
+ check_extack_nsim(err, "program loaded with different flags.", args)
start_test("Test MTU restrictions...")
ret, _ = sim.set_mtu(9000, fail=False)
@@ -687,18 +832,20 @@ try:
sim.unset_xdp("drv")
bpftool_prog_list_wait(expected=0)
sim.set_mtu(9000)
- ret, _ = sim.set_xdp(obj, "drv", fail=False)
+ ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True)
fail(ret == 0, "Driver should refuse to load program with MTU of 9000...")
+ check_extack_nsim(err, "MTU too large w/ XDP enabled.", args)
sim.set_mtu(1500)
sim.wait_for_flush()
start_test("Test XDP offload...")
- sim.set_xdp(obj, "offload")
+ _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
ipl = sim.ip_link_show(xdp=True)
link_xdp = ipl["xdp"]["prog"]
progs = bpftool_prog_list(expected=1)
prog = progs[0]
fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
start_test("Test XDP offload is device bound...")
dfs = sim.dfs_get_bound_progs(expected=1)
@@ -724,25 +871,32 @@ try:
sim2.set_xdp(obj, "offload")
pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
- ret, _ = sim.set_xdp(pinned, "offload", fail=False)
+ ret, _, err = sim.set_xdp(pinned, "offload",
+ fail=False, include_stderr=True)
fail(ret == 0, "Pinned program loaded for a different device accepted")
+ check_extack_nsim(err, "program bound to different dev.", args)
sim2.remove()
- ret, _ = sim.set_xdp(pinned, "offload", fail=False)
+ ret, _, err = sim.set_xdp(pinned, "offload",
+ fail=False, include_stderr=True)
fail(ret == 0, "Pinned program loaded for a removed device accepted")
+ check_extack_nsim(err, "xdpoffload of non-bound program.", args)
rm(pin_file)
bpftool_prog_list_wait(expected=0)
start_test("Test mixing of TC and XDP...")
sim.tc_add_ingress()
sim.set_xdp(obj, "offload")
- ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False)
+ ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Loading TC when XDP active should fail")
+ check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
sim.unset_xdp("offload")
sim.wait_for_flush()
sim.cls_bpf_add_filter(obj, skip_sw=True)
- ret, _ = sim.set_xdp(obj, "offload", fail=False)
+ ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
fail(ret == 0, "Loading XDP when TC active should fail")
+ check_extack_nsim(err, "TC program is already loaded.", args)
start_test("Test binding TC from pinned...")
pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
@@ -765,8 +919,10 @@ try:
start_test("Test asking for TC offload of two filters...")
sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
- ret, _ = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, fail=False)
+ ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True,
+ fail=False, include_stderr=True)
fail(ret == 0, "Managed to offload two TC filters at the same time")
+ check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
sim.tc_flush_filters(bound=2, total=2)
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
new file mode 100644
index 000000000000..2fe43289943c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _TEST_TCPBPF_H
+#define _TEST_TCPBPF_H
+
+struct tcpbpf_globals {
+ __u32 event_map;
+ __u32 total_retrans;
+ __u32 data_segs_in;
+ __u32 data_segs_out;
+ __u32 bad_cb_test_rv;
+ __u32 good_cb_test_rv;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+};
+#endif
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
new file mode 100644
index 000000000000..57119ad57a3f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/in6.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_tcpbpf.h"
+
+struct bpf_map_def SEC("maps") global_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct tcpbpf_globals),
+ .max_entries = 2,
+};
+
+static inline void update_event_map(int event)
+{
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (gp == NULL) {
+ struct tcpbpf_globals g = {0};
+
+ g.event_map |= (1 << event);
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ } else {
+ g = *gp;
+ g.event_map |= (1 << event);
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+}
+
+int _version SEC("version") = 1;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+ int rv = -1;
+ int bad_call_rv = 0;
+ int good_call_rv = 0;
+ int op;
+ int v = 0;
+
+ op = (int) skops->op;
+
+ update_event_map(op);
+
+ switch (op) {
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ /* Test failure to set largest cb flag (assumes not defined) */
+ bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
+ /* Set callback */
+ good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+ BPF_SOCK_OPS_STATE_CB_FLAG);
+ /* Update results */
+ {
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ g.bad_cb_test_rv = bad_call_rv;
+ g.good_cb_test_rv = good_call_rv;
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ skops->sk_txhash = 0x12345f;
+ v = 0xff;
+ rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
+ sizeof(v));
+ break;
+ case BPF_SOCK_OPS_RTO_CB:
+ break;
+ case BPF_SOCK_OPS_RETRANS_CB:
+ break;
+ case BPF_SOCK_OPS_STATE_CB:
+ if (skops->args[1] == BPF_TCP_CLOSE) {
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ g.total_retrans = skops->total_retrans;
+ g.data_segs_in = skops->data_segs_in;
+ g.data_segs_out = skops->data_segs_out;
+ g.bytes_received = skops->bytes_received;
+ g.bytes_acked = skops->bytes_acked;
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+ break;
+ default:
+ rv = -1;
+ }
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
new file mode 100644
index 000000000000..95a370f3d378
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <assert.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_util.h"
+#include <linux/perf_event.h>
+#include "test_tcpbpf.h"
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map) {
+ printf("%s:FAIL:map '%s' not found\n", test, name);
+ return -1;
+ }
+ return bpf_map__fd(map);
+}
+
+#define SYSTEM(CMD) \
+ do { \
+ if (system(CMD)) { \
+ printf("system(%s) FAILS!\n", CMD); \
+ } \
+ } while (0)
+
+int main(int argc, char **argv)
+{
+ const char *file = "test_tcpbpf_kern.o";
+ struct tcpbpf_globals g = {0};
+ int cg_fd, prog_fd, map_fd;
+ bool debug_flag = false;
+ int error = EXIT_FAILURE;
+ struct bpf_object *obj;
+ char cmd[100], *dir;
+ struct stat buffer;
+ __u32 key = 0;
+ int pid;
+ int rv;
+
+ if (argc > 1 && strcmp(argv[1], "-d") == 0)
+ debug_flag = true;
+
+ dir = "/tmp/cgroupv2/foo";
+
+ if (stat(dir, &buffer) != 0) {
+ SYSTEM("mkdir -p /tmp/cgroupv2");
+ SYSTEM("mount -t cgroup2 none /tmp/cgroupv2");
+ SYSTEM("mkdir -p /tmp/cgroupv2/foo");
+ }
+ pid = (int) getpid();
+ sprintf(cmd, "echo %d >> /tmp/cgroupv2/foo/cgroup.procs", pid);
+ SYSTEM(cmd);
+
+ cg_fd = open(dir, O_DIRECTORY, O_RDONLY);
+ if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+ printf("FAILED: load_bpf_file failed for: %s\n", file);
+ goto err;
+ }
+
+ rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (rv) {
+ printf("FAILED: bpf_prog_attach: %d (%s)\n",
+ error, strerror(errno));
+ goto err;
+ }
+
+ SYSTEM("./tcp_server.py");
+
+ map_fd = bpf_find_map(__func__, obj, "global_map");
+ if (map_fd < 0)
+ goto err;
+
+ rv = bpf_map_lookup_elem(map_fd, &key, &g);
+ if (rv != 0) {
+ printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
+ goto err;
+ }
+
+ if (g.bytes_received != 501 || g.bytes_acked != 1002 ||
+ g.data_segs_in != 1 || g.data_segs_out != 1 ||
+ (g.event_map ^ 0x47e) != 0 || g.bad_cb_test_rv != 0x80 ||
+ g.good_cb_test_rv != 0) {
+ printf("FAILED: Wrong stats\n");
+ if (debug_flag) {
+ printf("\n");
+ printf("bytes_received: %d (expecting 501)\n",
+ (int)g.bytes_received);
+ printf("bytes_acked: %d (expecting 1002)\n",
+ (int)g.bytes_acked);
+ printf("data_segs_in: %d (expecting 1)\n",
+ g.data_segs_in);
+ printf("data_segs_out: %d (expecting 1)\n",
+ g.data_segs_out);
+ printf("event_map: 0x%x (at least 0x47e)\n",
+ g.event_map);
+ printf("bad_cb_test_rv: 0x%x (expecting 0x80)\n",
+ g.bad_cb_test_rv);
+ printf("good_cb_test_rv:0x%x (expecting 0)\n",
+ g.good_cb_test_rv);
+ }
+ goto err;
+ }
+ printf("PASSED!\n");
+ error = 0;
+err:
+ bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+ return error;
+
+}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index fb82d29ee863..697bd83de295 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -21,6 +21,7 @@
#include <stddef.h>
#include <stdbool.h>
#include <sched.h>
+#include <limits.h>
#include <sys/capability.h>
#include <sys/resource.h>
@@ -111,7 +112,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
- .retval = 0,
+ .retval = 42,
},
{
"DIV32 by 0, zero check 2",
@@ -123,7 +124,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
- .retval = 0,
+ .retval = 42,
},
{
"DIV64 by 0, zero check",
@@ -135,7 +136,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
- .retval = 0,
+ .retval = 42,
},
{
"MOD32 by 0, zero check 1",
@@ -147,7 +148,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
- .retval = 0,
+ .retval = 42,
},
{
"MOD32 by 0, zero check 2",
@@ -159,7 +160,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
- .retval = 0,
+ .retval = 42,
},
{
"MOD64 by 0, zero check",
@@ -171,13 +172,245 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "DIV32 by 0, zero check ok, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_2, 16),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 8,
+ },
+ {
+ "DIV32 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV32 by 0, zero check 2, cls",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 by 0, zero check, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
.retval = 0,
},
{
+ "MOD32 by 0, zero check ok, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 3),
+ BPF_MOV32_IMM(BPF_REG_2, 5),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "MOD32 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD32 by 0, zero check 2, cls",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD64 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "MOD64 by 0, zero check 2, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, -1),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = -1,
+ },
+ /* Just make sure that JITs used udiv/umod as otherwise we get
+ * an exception from INT_MIN/-1 overflow similarly as with div
+ * by zero.
+ */
+ {
+ "DIV32 overflow, check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, -1),
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV32 overflow, check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 overflow, check 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 overflow, check 2",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+ BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "MOD32 overflow, check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, -1),
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = INT_MIN,
+ },
+ {
+ "MOD32 overflow, check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = INT_MIN,
+ },
+ {
+ "MOD64 overflow, check 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD64 overflow, check 2",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "xor32 zero extend check",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_2, -1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff),
+ BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
"empty prog",
.insns = {
},
- .errstr = "last insn is not an exit or jmp",
+ .errstr = "unknown opcode 00",
.result = REJECT,
},
{
@@ -374,7 +607,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = REJECT,
- .errstr = "BPF_ARSH not supported for 32 bit ALU",
+ .errstr = "unknown opcode c4",
},
{
"arsh32 on reg",
@@ -385,7 +618,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = REJECT,
- .errstr = "BPF_ARSH not supported for 32 bit ALU",
+ .errstr = "unknown opcode cc",
},
{
"arsh64 on imm",
@@ -501,7 +734,7 @@ static struct bpf_test tests[] = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "BPF_CALL uses reserved",
+ .errstr = "unknown opcode 8d",
.result = REJECT,
},
{
@@ -691,7 +924,7 @@ static struct bpf_test tests[] = {
BPF_RAW_INSN(0, 0, 0, 0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_LD_IMM",
+ .errstr = "unknown opcode 00",
.result = REJECT,
},
{
@@ -709,7 +942,7 @@ static struct bpf_test tests[] = {
BPF_RAW_INSN(-1, 0, 0, 0, 0),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_ALU opcode f0",
+ .errstr = "unknown opcode ff",
.result = REJECT,
},
{
@@ -718,7 +951,7 @@ static struct bpf_test tests[] = {
BPF_RAW_INSN(-1, -1, -1, -1, -1),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_ALU opcode f0",
+ .errstr = "unknown opcode ff",
.result = REJECT,
},
{
@@ -7543,7 +7776,7 @@ static struct bpf_test tests[] = {
},
BPF_EXIT_INSN(),
},
- .errstr = "BPF_END uses reserved fields",
+ .errstr = "unknown opcode d7",
.result = REJECT,
},
{
@@ -8766,6 +8999,7 @@ static struct bpf_test tests[] = {
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .retval = 1,
},
{
"check deducing bounds from const, 3",
@@ -8963,6 +9197,90 @@ static struct bpf_test tests[] = {
.retval = 1,
},
{
+ "calls: div by 0 in subprog",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(BPF_REG_2, 0),
+ BPF_MOV32_IMM(BPF_REG_3, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "calls: multiple ret types in subprog 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'inv'",
+ },
+ {
+ "calls: multiple ret types in subprog 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
+ offsetof(struct __sk_buff, data)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 16 },
+ .result = REJECT,
+ .errstr = "R0 min value is outside of the array range",
+ },
+ {
"calls: overlapping caller/callee",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0),
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b4b69c2d1012..9dea96380339 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1310,7 +1310,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- if (is_vm_hugetlb_page(vma) && !logging_active) {
+ if (vma_kernel_pagesize(vma) == PMD_SIZE && !logging_active) {
hugetlb = true;
gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
} else {
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 62310122ee78..743ca5cb05ef 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -285,9 +285,11 @@ int vgic_init(struct kvm *kvm)
if (ret)
goto out;
- ret = vgic_v4_init(kvm);
- if (ret)
- goto out;
+ if (vgic_has_its(kvm)) {
+ ret = vgic_v4_init(kvm);
+ if (ret)
+ goto out;
+ }
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 4a37292855bc..bc4265154bac 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -118,7 +118,7 @@ int vgic_v4_init(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int i, nr_vcpus, ret;
- if (!vgic_supports_direct_msis(kvm))
+ if (!kvm_vgic_global_state.has_gicv4)
return 0; /* Nothing to see here... move along. */
if (dist->its_vm.vpes)