diff options
39 files changed, 2036 insertions, 221 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index d1e3f0997d6b..a1e329ec24cd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -168,7 +168,8 @@ MODULE_PARM_DESC(force_init, "Forcibly become Master PF and initialize adapter," static int dflt_msg_enable = DFLT_MSG_ENABLE; module_param(dflt_msg_enable, int, 0644); -MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap"); +MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap, " + "deprecated parameter"); /* * The driver uses the best interrupt scheme available on a platform in the diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 1cc8a7a69457..730fec73d5a6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -74,7 +74,8 @@ static int dflt_msg_enable = DFLT_MSG_ENABLE; module_param(dflt_msg_enable, int, 0644); MODULE_PARM_DESC(dflt_msg_enable, - "default adapter ethtool message level bitmap"); + "default adapter ethtool message level bitmap, " + "deprecated parameter"); /* * The driver uses the best interrupt scheme available on a platform in the diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index df8e2fd6a649..43bb4139d896 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -33,16 +33,6 @@ static void i40e_resume_aq(struct i40e_hw *hw); /** - * i40e_is_nvm_update_op - return true if this is an NVM update operation - * @desc: API request descriptor - **/ -static inline bool i40e_is_nvm_update_op(struct i40e_aq_desc *desc) -{ - return (desc->opcode == cpu_to_le16(i40e_aqc_opc_nvm_erase)) || - (desc->opcode == cpu_to_le16(i40e_aqc_opc_nvm_update)); -} - -/** * i40e_adminq_init_regs - Initialize AdminQ registers * @hw: pointer to the hardware structure * @@ -624,7 +614,7 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw) /* pre-emptive resource lock release */ i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL); - hw->aq.nvm_release_on_done = false; + hw->nvm_release_on_done = false; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; ret_code = i40e_aq_set_hmc_resource_profile(hw, @@ -1023,26 +1013,7 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, hw->aq.arq.next_to_clean = ntc; hw->aq.arq.next_to_use = ntu; - if (i40e_is_nvm_update_op(&e->desc)) { - if (hw->aq.nvm_release_on_done) { - i40e_release_nvm(hw); - hw->aq.nvm_release_on_done = false; - } - - switch (hw->nvmupd_state) { - case I40E_NVMUPD_STATE_INIT_WAIT: - hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; - break; - - case I40E_NVMUPD_STATE_WRITE_WAIT: - hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING; - break; - - default: - break; - } - } - + i40e_nvmupd_check_wait_event(hw, le16_to_cpu(e->desc.opcode)); clean_arq_element_out: /* Set pending if needed, unlock and return */ if (pending) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h index 12fbbddea299..d92aad38afdc 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h @@ -97,7 +97,6 @@ struct i40e_adminq_info { u32 fw_build; /* firmware build number */ u16 api_maj_ver; /* api major version */ u16 api_min_ver; /* api minor version */ - bool nvm_release_on_done; struct mutex asq_mutex; /* Send queue lock */ struct mutex arq_mutex; /* Receive queue lock */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 8276a1393e6d..f3c1d8890cbb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -60,6 +60,7 @@ static i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_SFP_X722: case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: + case I40E_DEV_ID_SFP_I_X722: hw->mac.type = I40E_MAC_X722; break; default: @@ -3079,6 +3080,9 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, break; case I40E_AQ_CAP_ID_MSIX: p->num_msix_vectors = number; + i40e_debug(hw, I40E_DEBUG_INIT, + "HW Capability: MSIX vector count = %d\n", + p->num_msix_vectors); break; case I40E_AQ_CAP_ID_VF_MSIX: p->num_msix_vectors_vf = number; diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 0c97733d253c..83dccf1792e7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -147,9 +147,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n", (unsigned long int)nd->vlan_features); } - if (vsi->active_vlans) - dev_info(&pf->pdev->dev, - " vlgrp: & = %p\n", vsi->active_vlans); + dev_info(&pf->pdev->dev, + " vlgrp: & = %p\n", vsi->active_vlans); dev_info(&pf->pdev->dev, " state = %li flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n", vsi->state, vsi->flags, diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h index 99257fcd1ef4..dd4457d29e98 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h @@ -44,6 +44,7 @@ #define I40E_DEV_ID_SFP_X722 0x37D0 #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 +#define I40E_DEV_ID_SFP_I_X722 0x37D3 #define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \ (d) == I40E_DEV_ID_QSFP_B || \ diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 410d237f9137..8a83d4514812 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -313,6 +313,13 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, ecmd->advertising |= ADVERTISED_10000baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB) ecmd->advertising |= ADVERTISED_1000baseT_Full; + /* adding 100baseT support for 10GBASET_PHY */ + if (pf->flags & I40E_FLAG_HAVE_10GBASET_PHY) { + ecmd->supported |= SUPPORTED_100baseT_Full; + ecmd->advertising |= ADVERTISED_100baseT_Full | + ADVERTISED_1000baseT_Full | + ADVERTISED_10000baseT_Full; + } break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: ecmd->supported = SUPPORTED_Autoneg | @@ -325,6 +332,15 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, SUPPORTED_100baseT_Full; if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB) ecmd->advertising |= ADVERTISED_100baseT_Full; + /* firmware detects 10G phy as 100M phy at 100M speed */ + if (pf->flags & I40E_FLAG_HAVE_10GBASET_PHY) { + ecmd->supported |= SUPPORTED_10000baseT_Full | + SUPPORTED_1000baseT_Full; + ecmd->advertising |= ADVERTISED_Autoneg | + ADVERTISED_100baseT_Full | + ADVERTISED_1000baseT_Full | + ADVERTISED_10000baseT_Full; + } break; case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c index 92d2208d13c7..58e6c1570335 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c +++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Intel Ethernet Controller XL710 Family Linux Driver - * Copyright(c) 2013 - 2015 Intel Corporation. + * Copyright(c) 2013 - 2016 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -38,16 +38,6 @@ #include "i40e_fcoe.h" /** - * i40e_rx_is_fcoe - returns true if the rx packet type is FCoE - * @ptype: the packet type field from rx descriptor write-back - **/ -static inline bool i40e_rx_is_fcoe(u16 ptype) -{ - return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && - (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); -} - -/** * i40e_fcoe_sof_is_class2 - returns true if this is a FC Class 2 SOF * @sof: the FCoE start of frame delimiter **/ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 297fd39ba255..dc3b3939dd0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -46,7 +46,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 5 -#define DRV_VERSION_BUILD 1 +#define DRV_VERSION_BUILD 5 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -90,6 +90,7 @@ static const struct pci_device_id i40e_pci_tbl[] = { {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0}, + {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0}, {PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0}, /* required last entry */ @@ -6858,6 +6859,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) */ ret = i40e_aq_set_phy_int_mask(&pf->hw, ~(I40E_AQ_EVENT_LINK_UPDOWN | + I40E_AQ_EVENT_MEDIA_NA | I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (ret) dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n", @@ -8436,7 +8438,6 @@ static int i40e_sw_init(struct i40e_pf *pf) pf->msg_enable = netif_msg_init(I40E_DEFAULT_MSG_ENABLE, (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)); - pf->hw.debug_mask = pf->msg_enable | I40E_DEBUG_DIAG; if (debug != -1 && debug != I40E_DEFAULT_MSG_ENABLE) { if (I40E_DEBUG_USER & debug) pf->hw.debug_mask = debug; @@ -8447,7 +8448,6 @@ static int i40e_sw_init(struct i40e_pf *pf) /* Set default capability flags */ pf->flags = I40E_FLAG_RX_CSUM_ENABLED | I40E_FLAG_MSI_ENABLED | - I40E_FLAG_LINK_POLLING_ENABLED | I40E_FLAG_MSIX_ENABLED; if (iommu_present(&pci_bus_type)) @@ -9069,6 +9069,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_get_vf_config = i40e_ndo_get_vf_config, .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, .ndo_set_vf_spoofchk = i40e_ndo_set_vf_spoofchk, + .ndo_set_vf_trust = i40e_ndo_set_vf_trust, #if IS_ENABLED(CONFIG_VXLAN) .ndo_add_vxlan_port = i40e_add_vxlan_port, .ndo_del_vxlan_port = i40e_del_vxlan_port, @@ -9158,6 +9159,12 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) I40E_VLAN_ANY, false, true); spin_unlock_bh(&vsi->mac_filter_list_lock); } + } else if ((pf->hw.aq.api_maj_ver > 1) || + ((pf->hw.aq.api_maj_ver == 1) && + (pf->hw.aq.api_min_ver > 4))) { + /* Supported in FW API version higher than 1.4 */ + pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; + pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE; } else { /* relate the VSI_VMDQ name to the VSI_MAIN name */ snprintf(netdev->name, IFNAMSIZ, "%sv%%d", @@ -11064,6 +11071,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) */ err = i40e_aq_set_phy_int_mask(&pf->hw, ~(I40E_AQ_EVENT_LINK_UPDOWN | + I40E_AQ_EVENT_MEDIA_NA | I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (err) dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c index 5730f8091e1b..f2cea3d25de3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c +++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c @@ -696,7 +696,7 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw, i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n", i40e_nvm_update_state_str[upd_cmd], hw->nvmupd_state, - hw->aq.nvm_release_on_done, + hw->nvm_release_on_done, cmd->command, cmd->config, cmd->offset, cmd->data_size); if (upd_cmd == I40E_NVMUPD_INVALID) { @@ -799,7 +799,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, if (status) { i40e_release_nvm(hw); } else { - hw->aq.nvm_release_on_done = true; + hw->nvm_release_on_done = true; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } } @@ -815,7 +815,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, if (status) { i40e_release_nvm(hw); } else { - hw->aq.nvm_release_on_done = true; + hw->nvm_release_on_done = true; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } } @@ -849,7 +849,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw, -EIO; i40e_release_nvm(hw); } else { - hw->aq.nvm_release_on_done = true; + hw->nvm_release_on_done = true; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } } @@ -953,7 +953,7 @@ retry: -EIO; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; } else { - hw->aq.nvm_release_on_done = true; + hw->nvm_release_on_done = true; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } break; @@ -980,7 +980,7 @@ retry: -EIO; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; } else { - hw->aq.nvm_release_on_done = true; + hw->nvm_release_on_done = true; hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT; } break; @@ -1030,6 +1030,37 @@ retry: } /** + * i40e_nvmupd_check_wait_event - handle NVM update operation events + * @hw: pointer to the hardware structure + * @opcode: the event that just happened + **/ +void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode) +{ + if (opcode == i40e_aqc_opc_nvm_erase || + opcode == i40e_aqc_opc_nvm_update) { + i40e_debug(hw, I40E_DEBUG_NVM, + "NVMUPD: clearing wait on opcode 0x%04x\n", opcode); + if (hw->nvm_release_on_done) { + i40e_release_nvm(hw); + hw->nvm_release_on_done = false; + } + + switch (hw->nvmupd_state) { + case I40E_NVMUPD_STATE_INIT_WAIT: + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + break; + + case I40E_NVMUPD_STATE_WRITE_WAIT: + hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING; + break; + + default: + break; + } + } +} + +/** * i40e_nvmupd_validate_command - Validate given command * @hw: pointer to hardware structure * @cmd: pointer to nvm update command buffer diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index d51eee5bf79a..134035f53f2c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -308,6 +308,7 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw, i40e_status i40e_nvmupd_command(struct i40e_hw *hw, struct i40e_nvm_access *cmd, u8 *bytes, int *); +void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode); void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status); extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[]; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 5bef5b0f00d9..29ffed27e5a9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1703,7 +1703,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget) ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; #ifdef I40E_FCOE - if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) { + if (unlikely( + i40e_rx_is_fcoe(rx_ptype) && + !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) { dev_kfree_skb_any(skb); continue; } @@ -1834,7 +1836,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget) ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; #ifdef I40E_FCOE - if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) { + if (unlikely( + i40e_rx_is_fcoe(rx_ptype) && + !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) { dev_kfree_skb_any(skb); continue; } @@ -2252,15 +2256,13 @@ out: /** * i40e_tso - set up the tso context descriptor - * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @hdr_len: ptr to the size of the packet header * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ -static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, - u8 *hdr_len, u64 *cd_type_cmd_tso_mss) +static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { u64 cd_cmd, cd_tso_len, cd_mss; union { @@ -2304,10 +2306,8 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, l4_offset = l4.hdr - skb->data; /* remove payload length from outer checksum */ - paylen = (__force u16)l4.udp->check; - paylen += ntohs((__force __be16)1) * - (u16)~(skb->len - l4_offset); - l4.udp->check = ~csum_fold((__force __wsum)paylen); + paylen = skb->len - l4_offset; + csum_replace_by_diff(&l4.udp->check, htonl(paylen)); } /* reset pointers to inner headers */ @@ -2327,9 +2327,8 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, l4_offset = l4.hdr - skb->data; /* remove payload length from inner checksum */ - paylen = (__force u16)l4.tcp->check; - paylen += ntohs((__force __be16)1) * (u16)~(skb->len - l4_offset); - l4.tcp->check = ~csum_fold((__force __wsum)paylen); + paylen = skb->len - l4_offset; + csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); /* compute length of segmentation header */ *hdr_len = (l4.tcp->doff * 4) + l4_offset; @@ -2411,7 +2410,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, unsigned char *hdr; } l4; unsigned char *exthdr; - u32 offset, cmd = 0, tunnel = 0; + u32 offset, cmd = 0; __be16 frag_off; u8 l4_proto = 0; @@ -2425,6 +2424,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; if (skb->encapsulation) { + u32 tunnel = 0; /* define outer network header type */ if (*tx_flags & I40E_TX_FLAGS_IPV4) { tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? @@ -2934,7 +2934,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss); + tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 9e654e611642..77ccdde56c0c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -448,4 +448,14 @@ static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) return __i40e_chk_linearize(skb); } + +/** + * i40e_rx_is_fcoe - returns true if the Rx packet type is FCoE + * @ptype: the packet type field from Rx descriptor write-back + **/ +static inline bool i40e_rx_is_fcoe(u16 ptype) +{ + return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && + (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h index 3335f9d13374..793036b259e5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_type.h +++ b/drivers/net/ethernet/intel/i40e/i40e_type.h @@ -549,6 +549,7 @@ struct i40e_hw { enum i40e_nvmupd_state nvmupd_state; struct i40e_aq_desc nvm_wb_desc; struct i40e_virt_mem nvm_buff; + bool nvm_release_on_done; /* HMC info */ struct i40e_hmc_info hmc; /* HMC info struct */ @@ -1533,4 +1534,37 @@ struct i40e_lldp_variables { /* RSS Hash Table Size */ #define I40E_PFQF_CTL_0_HASHLUTSIZE_512 0x00010000 + +/* INPUT SET MASK for RSS, flow director, and flexible payload */ +#define I40E_L3_SRC_SHIFT 47 +#define I40E_L3_SRC_MASK (0x3ULL << I40E_L3_SRC_SHIFT) +#define I40E_L3_V6_SRC_SHIFT 43 +#define I40E_L3_V6_SRC_MASK (0xFFULL << I40E_L3_V6_SRC_SHIFT) +#define I40E_L3_DST_SHIFT 35 +#define I40E_L3_DST_MASK (0x3ULL << I40E_L3_DST_SHIFT) +#define I40E_L3_V6_DST_SHIFT 35 +#define I40E_L3_V6_DST_MASK (0xFFULL << I40E_L3_V6_DST_SHIFT) +#define I40E_L4_SRC_SHIFT 34 +#define I40E_L4_SRC_MASK (0x1ULL << I40E_L4_SRC_SHIFT) +#define I40E_L4_DST_SHIFT 33 +#define I40E_L4_DST_MASK (0x1ULL << I40E_L4_DST_SHIFT) +#define I40E_VERIFY_TAG_SHIFT 31 +#define I40E_VERIFY_TAG_MASK (0x3ULL << I40E_VERIFY_TAG_SHIFT) + +#define I40E_FLEX_50_SHIFT 13 +#define I40E_FLEX_50_MASK (0x1ULL << I40E_FLEX_50_SHIFT) +#define I40E_FLEX_51_SHIFT 12 +#define I40E_FLEX_51_MASK (0x1ULL << I40E_FLEX_51_SHIFT) +#define I40E_FLEX_52_SHIFT 11 +#define I40E_FLEX_52_MASK (0x1ULL << I40E_FLEX_52_SHIFT) +#define I40E_FLEX_53_SHIFT 10 +#define I40E_FLEX_53_MASK (0x1ULL << I40E_FLEX_53_SHIFT) +#define I40E_FLEX_54_SHIFT 9 +#define I40E_FLEX_54_MASK (0x1ULL << I40E_FLEX_54_SHIFT) +#define I40E_FLEX_55_SHIFT 8 +#define I40E_FLEX_55_MASK (0x1ULL << I40E_FLEX_55_SHIFT) +#define I40E_FLEX_56_SHIFT 7 +#define I40E_FLEX_56_MASK (0x1ULL << I40E_FLEX_56_SHIFT) +#define I40E_FLEX_57_SHIFT 6 +#define I40E_FLEX_57_MASK (0x1ULL << I40E_FLEX_57_SHIFT) #endif /* _I40E_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h index ab866cf3dc18..c92a3bdee229 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h @@ -80,10 +80,15 @@ enum i40e_virtchnl_ops { I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14, I40E_VIRTCHNL_OP_GET_STATS = 15, I40E_VIRTCHNL_OP_FCOE = 16, - I40E_VIRTCHNL_OP_EVENT = 17, + I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ I40E_VIRTCHNL_OP_IWARP = 20, I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21, I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22, + I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23, + I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24, + I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, + I40E_VIRTCHNL_OP_SET_RSS_HENA = 26, + }; /* Virtual channel message descriptor. This overlays the admin queue @@ -157,6 +162,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 +#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 struct i40e_virtchnl_vf_resource { u16 num_vsis; @@ -165,8 +171,8 @@ struct i40e_virtchnl_vf_resource { u16 max_mtu; u32 vf_offload_flags; - u32 max_fcoe_contexts; - u32 max_fcoe_filters; + u32 rss_key_size; + u32 rss_lut_size; struct i40e_virtchnl_vsi_resource vsi_res[1]; }; @@ -325,6 +331,39 @@ struct i40e_virtchnl_promisc_info { * PF replies with struct i40e_eth_stats in an external buffer. */ +/* I40E_VIRTCHNL_OP_CONFIG_RSS_KEY + * I40E_VIRTCHNL_OP_CONFIG_RSS_LUT + * VF sends these messages to configure RSS. Only supported if both PF + * and VF drivers set the I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF bit during + * configuration negotiation. If this is the case, then the RSS fields in + * the VF resource struct are valid. + * Both the key and LUT are initialized to 0 by the PF, meaning that + * RSS is effectively disabled until set up by the VF. + */ +struct i40e_virtchnl_rss_key { + u16 vsi_id; + u16 key_len; + u8 key[1]; /* RSS hash key, packed bytes */ +}; + +struct i40e_virtchnl_rss_lut { + u16 vsi_id; + u16 lut_entries; + u8 lut[1]; /* RSS lookup table*/ +}; + +/* I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS + * I40E_VIRTCHNL_OP_SET_RSS_HENA + * VF sends these messages to get and set the hash filter enable bits for RSS. + * By default, the PF sets these to all possible traffic types that the + * hardware supports. The VF can query this value if it wants to change the + * traffic types that are hashed by the hardware. + * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h + */ +struct i40e_virtchnl_rss_hena { + u64 hena; +}; + /* I40E_VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 47b9e62473c4..30f8cbe6b54b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -937,6 +937,11 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr) wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg); i40e_flush(hw); } + /* clear the VFLR bit in GLGEN_VFLRSTAT */ + reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32; + bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32; + wr32(hw, I40E_GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); + i40e_flush(hw); if (i40e_quiesce_vf_pci(vf)) dev_err(&pf->pdev->dev, "VF %d PCI transactions stuck\n", @@ -989,10 +994,6 @@ complete_reset: /* tell the VF the reset is done */ wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE); - /* clear the VFLR bit in GLGEN_VFLRSTAT */ - reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32; - bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32; - wr32(hw, I40E_GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); i40e_flush(hw); clear_bit(__I40E_VF_DISABLE, &pf->state); } @@ -1232,8 +1233,8 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, /* single place to detect unsuccessful return values */ if (v_retval) { vf->num_invalid_msgs++; - dev_err(&pf->pdev->dev, "VF %d failed opcode %d, error: %d\n", - vf->vf_id, v_opcode, v_retval); + dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n", + vf->vf_id, v_opcode, v_retval); if (vf->num_invalid_msgs > I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) { dev_err(&pf->pdev->dev, @@ -1251,9 +1252,9 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, aq_ret = i40e_aq_send_msg_to_vf(hw, abs_vf_id, v_opcode, v_retval, msg, msglen, NULL); if (aq_ret) { - dev_err(&pf->pdev->dev, - "Unable to send the message to VF %d aq_err %d\n", - vf->vf_id, pf->hw.aq.asq_last_status); + dev_info(&pf->pdev->dev, + "Unable to send the message to VF %d aq_err %d\n", + vf->vf_id, pf->hw.aq.asq_last_status); return -EIO; } @@ -1311,8 +1312,8 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; struct i40e_vsi *vsi; - int i = 0, len = 0; int num_vsis = 1; + int len = 0; int ret; if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { @@ -1361,8 +1362,16 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2; } - if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING) + if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING) { + if (pf->flags & I40E_FLAG_MFP_ENABLED) { + dev_err(&pf->pdev->dev, + "VF %d requested polling mode: this feature is supported only when the device is running in single function per port (SFP) mode\n", + vf->vf_id); + ret = I40E_ERR_PARAM; + goto err; + } vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING; + } if (pf->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) { if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) @@ -1374,15 +1383,14 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) vfres->num_queue_pairs = vf->num_queue_pairs; vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf; if (vf->lan_vsi_idx) { - vfres->vsi_res[i].vsi_id = vf->lan_vsi_id; - vfres->vsi_res[i].vsi_type = I40E_VSI_SRIOV; - vfres->vsi_res[i].num_queue_pairs = vsi->alloc_queue_pairs; + vfres->vsi_res[0].vsi_id = vf->lan_vsi_id; + vfres->vsi_res[0].vsi_type = I40E_VSI_SRIOV; + vfres->vsi_res[0].num_queue_pairs = vsi->alloc_queue_pairs; /* VFs only use TC 0 */ - vfres->vsi_res[i].qset_handle + vfres->vsi_res[0].qset_handle = le16_to_cpu(vsi->info.qs_handle[0]); - ether_addr_copy(vfres->vsi_res[i].default_mac_addr, + ether_addr_copy(vfres->vsi_res[0].default_mac_addr, vf->default_lan_addr.addr); - i++; } set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states); @@ -2297,11 +2305,9 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf) /* read GLGEN_VFLRSTAT register to find out the flr VFs */ vf = &pf->vf[vf_id]; reg = rd32(hw, I40E_GLGEN_VFLRSTAT(reg_idx)); - if (reg & BIT(bit_idx)) { + if (reg & BIT(bit_idx)) /* i40e_reset_vf will clear the bit in GLGEN_VFLRSTAT */ - if (!test_bit(__I40E_DOWN, &pf->state)) - i40e_reset_vf(vf, true); - } + i40e_reset_vf(vf, true); } return 0; @@ -2765,3 +2771,45 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable) out: return ret; } + +/** + * i40e_ndo_set_vf_trust + * @netdev: network interface device structure of the pf + * @vf_id: VF identifier + * @setting: trust setting + * + * Enable or disable VF trust setting + **/ +int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_pf *pf = np->vsi->back; + struct i40e_vf *vf; + int ret = 0; + + /* validate the request */ + if (vf_id >= pf->num_alloc_vfs) { + dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id); + return -EINVAL; + } + + if (pf->flags & I40E_FLAG_MFP_ENABLED) { + dev_err(&pf->pdev->dev, "Trusted VF not supported in MFP mode.\n"); + return -EINVAL; + } + + vf = &pf->vf[vf_id]; + + if (!vf) + return -EINVAL; + if (setting == vf->trusted) + goto out; + + vf->trusted = setting; + i40e_vc_notify_vf_reset(vf); + i40e_reset_vf(vf, false); + dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", + vf_id, setting ? "" : "un"); +out: + return ret; +} diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index e7b2fba0309e..838cbd2299a4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -88,6 +88,7 @@ struct i40e_vf { struct i40e_virtchnl_ether_addr default_fcoe_addr; u16 port_vlan_id; bool pf_set_mac; /* The VMM admin set the VF MAC address */ + bool trusted; /* VSI indices - actual VSI pointers are maintained in the PF structure * When assigned, these will be non-zero, because VSI 0 is always @@ -127,6 +128,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos); int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, int max_tx_rate); +int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting); int i40e_ndo_get_vf_config(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi); int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h index a3eae5d9a2bd..1f9b3b5d946d 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h @@ -97,7 +97,6 @@ struct i40e_adminq_info { u32 fw_build; /* firmware build number */ u16 api_maj_ver; /* api major version */ u16 api_min_ver; /* api minor version */ - bool nvm_release_on_done; struct mutex asq_mutex; /* Send queue lock */ struct mutex arq_mutex; /* Receive queue lock */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 771ac6ad8cda..4db0c0326185 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -58,6 +58,7 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_SFP_X722: case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: + case I40E_DEV_ID_SFP_I_X722: hw->mac.type = I40E_MAC_X722; break; case I40E_DEV_ID_X722_VF: diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h index ca8b58c3d1f5..70235706915e 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h @@ -44,6 +44,7 @@ #define I40E_DEV_ID_SFP_X722 0x37D0 #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 +#define I40E_DEV_ID_SFP_I_X722 0x37D3 #define I40E_DEV_ID_X722_VF 0x37CD #define I40E_DEV_ID_X722_VF_HV 0x37D9 diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 570348d93e5d..0c912a4999db 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -1160,7 +1160,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget) ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; #ifdef I40E_FCOE - if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) { + if (unlikely( + i40e_rx_is_fcoe(rx_ptype) && + !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) { dev_kfree_skb_any(skb); continue; } @@ -1519,15 +1521,13 @@ out: /** * i40e_tso - set up the tso context descriptor - * @tx_ring: ptr to the ring to send * @skb: ptr to the skb we're sending * @hdr_len: ptr to the size of the packet header * @cd_type_cmd_tso_mss: Quad Word 1 * * Returns 0 if no TSO can happen, 1 if tso is going, or error **/ -static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, - u8 *hdr_len, u64 *cd_type_cmd_tso_mss) +static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss) { u64 cd_cmd, cd_tso_len, cd_mss; union { @@ -1571,10 +1571,8 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, l4_offset = l4.hdr - skb->data; /* remove payload length from outer checksum */ - paylen = (__force u16)l4.udp->check; - paylen += ntohs((__force __be16)1) * - (u16)~(skb->len - l4_offset); - l4.udp->check = ~csum_fold((__force __wsum)paylen); + paylen = skb->len - l4_offset; + csum_replace_by_diff(&l4.udp->check, htonl(paylen)); } /* reset pointers to inner headers */ @@ -1594,9 +1592,8 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb, l4_offset = l4.hdr - skb->data; /* remove payload length from inner checksum */ - paylen = (__force u16)l4.tcp->check; - paylen += ntohs((__force __be16)1) * (u16)~(skb->len - l4_offset); - l4.tcp->check = ~csum_fold((__force __wsum)paylen); + paylen = skb->len - l4_offset; + csum_replace_by_diff(&l4.tcp->check, htonl(paylen)); /* compute length of segmentation header */ *hdr_len = (l4.tcp->doff * 4) + l4_offset; @@ -1636,7 +1633,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, unsigned char *hdr; } l4; unsigned char *exthdr; - u32 offset, cmd = 0, tunnel = 0; + u32 offset, cmd = 0; __be16 frag_off; u8 l4_proto = 0; @@ -1650,6 +1647,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; if (skb->encapsulation) { + u32 tunnel = 0; /* define outer network header type */ if (*tx_flags & I40E_TX_FLAGS_IPV4) { tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ? @@ -2152,7 +2150,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, else if (protocol == htons(ETH_P_IPV6)) tx_flags |= I40E_TX_FLAGS_IPV6; - tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss); + tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss); if (tso < 0) goto out_drop; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 3ec0ea5ea3db..84c28aa64fdf 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -430,4 +430,14 @@ static inline bool i40e_chk_linearize(struct sk_buff *skb, int count) return __i40evf_chk_linearize(skb); } + +/** + * i40e_rx_is_fcoe - returns true if the Rx packet type is FCoE + * @ptype: the packet type field from Rx descriptor write-back + **/ +static inline bool i40e_rx_is_fcoe(u16 ptype) +{ + return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && + (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h index 301fe2b6dd03..4a78c18e0b7b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_type.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h @@ -522,6 +522,7 @@ struct i40e_hw { enum i40e_nvmupd_state nvmupd_state; struct i40e_aq_desc nvm_wb_desc; struct i40e_virt_mem nvm_buff; + bool nvm_release_on_done; /* HMC info */ struct i40e_hmc_info hmc; /* HMC info struct */ @@ -1329,4 +1330,46 @@ enum i40e_reset_type { /* RSS Hash Table Size */ #define I40E_PFQF_CTL_0_HASHLUTSIZE_512 0x00010000 + +/* INPUT SET MASK for RSS, flow director and flexible payload */ +#define I40E_FD_INSET_L3_SRC_SHIFT 47 +#define I40E_FD_INSET_L3_SRC_WORD_MASK (0x3ULL << \ + I40E_FD_INSET_L3_SRC_SHIFT) +#define I40E_FD_INSET_L3_DST_SHIFT 35 +#define I40E_FD_INSET_L3_DST_WORD_MASK (0x3ULL << \ + I40E_FD_INSET_L3_DST_SHIFT) +#define I40E_FD_INSET_L4_SRC_SHIFT 34 +#define I40E_FD_INSET_L4_SRC_WORD_MASK (0x1ULL << \ + I40E_FD_INSET_L4_SRC_SHIFT) +#define I40E_FD_INSET_L4_DST_SHIFT 33 +#define I40E_FD_INSET_L4_DST_WORD_MASK (0x1ULL << \ + I40E_FD_INSET_L4_DST_SHIFT) +#define I40E_FD_INSET_VERIFY_TAG_SHIFT 31 +#define I40E_FD_INSET_VERIFY_TAG_WORD_MASK (0x3ULL << \ + I40E_FD_INSET_VERIFY_TAG_SHIFT) + +#define I40E_FD_INSET_FLEX_WORD50_SHIFT 17 +#define I40E_FD_INSET_FLEX_WORD50_MASK (0x1ULL << \ + I40E_FD_INSET_FLEX_WORD50_SHIFT) +#define I40E_FD_INSET_FLEX_WORD51_SHIFT 16 +#define I40E_FD_INSET_FLEX_WORD51_MASK (0x1ULL << \ + I40E_FD_INSET_FLEX_WORD51_SHIFT) +#define I40E_FD_INSET_FLEX_WORD52_SHIFT 15 +#define I40E_FD_INSET_FLEX_WORD52_MASK (0x1ULL << \ + I40E_FD_INSET_FLEX_WORD52_SHIFT) +#define I40E_FD_INSET_FLEX_WORD53_SHIFT 14 +#define I40E_FD_INSET_FLEX_WORD53_MASK (0x1ULL << \ + I40E_FD_INSET_FLEX_WORD53_SHIFT) +#define I40E_FD_INSET_FLEX_WORD54_SHIFT 13 +#define I40E_FD_INSET_FLEX_WORD54_MASK (0x1ULL << \ + I40E_FD_INSET_FLEX_WORD54_SHIFT) +#define I40E_FD_INSET_FLEX_WORD55_SHIFT 12 +#define I40E_FD_INSET_FLEX_WORD55_MASK (0x1ULL << \ + I40E_FD_INSET_FLEX_WORD55_SHIFT) +#define I40E_FD_INSET_FLEX_WORD56_SHIFT 11 +#define I40E_FD_INSET_FLEX_WORD56_MASK (0x1ULL << \ + I40E_FD_INSET_FLEX_WORD56_SHIFT) +#define I40E_FD_INSET_FLEX_WORD57_SHIFT 10 +#define I40E_FD_INSET_FLEX_WORD57_MASK (0x1ULL << \ + I40E_FD_INSET_FLEX_WORD57_SHIFT) #endif /* _I40E_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index 3b9d2037456c..f04ce6cb70dc 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -80,7 +80,12 @@ enum i40e_virtchnl_ops { I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14, I40E_VIRTCHNL_OP_GET_STATS = 15, I40E_VIRTCHNL_OP_FCOE = 16, - I40E_VIRTCHNL_OP_EVENT = 17, + I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */ + I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23, + I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24, + I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25, + I40E_VIRTCHNL_OP_SET_RSS_HENA = 26, + }; /* Virtual channel message descriptor. This overlays the admin queue @@ -154,6 +159,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 +#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 struct i40e_virtchnl_vf_resource { u16 num_vsis; @@ -162,8 +168,8 @@ struct i40e_virtchnl_vf_resource { u16 max_mtu; u32 vf_offload_flags; - u32 max_fcoe_contexts; - u32 max_fcoe_filters; + u32 rss_key_size; + u32 rss_lut_size; struct i40e_virtchnl_vsi_resource vsi_res[1]; }; @@ -322,6 +328,39 @@ struct i40e_virtchnl_promisc_info { * PF replies with struct i40e_eth_stats in an external buffer. */ +/* I40E_VIRTCHNL_OP_CONFIG_RSS_KEY + * I40E_VIRTCHNL_OP_CONFIG_RSS_LUT + * VF sends these messages to configure RSS. Only supported if both PF + * and VF drivers set the I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF bit during + * configuration negotiation. If this is the case, then the RSS fields in + * the VF resource struct are valid. + * Both the key and LUT are initialized to 0 by the PF, meaning that + * RSS is effectively disabled until set up by the VF. + */ +struct i40e_virtchnl_rss_key { + u16 vsi_id; + u16 key_len; + u8 key[1]; /* RSS hash key, packed bytes */ +}; + +struct i40e_virtchnl_rss_lut { + u16 vsi_id; + u16 lut_entries; + u8 lut[1]; /* RSS lookup table*/ +}; + +/* I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS + * I40E_VIRTCHNL_OP_SET_RSS_HENA + * VF sends these messages to get and set the hash filter enable bits for RSS. + * By default, the PF sets these to all possible traffic types that the + * hardware supports. The VF can query this value if it wants to change the + * traffic types that are hashed by the hardware. + * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h + */ +struct i40e_virtchnl_rss_hena { + u64 hena; +}; + /* I40E_VIRTCHNL_OP_EVENT * PF sends this message to inform the VF driver of events that may affect it. * No direct response is expected from the VF, though it may generate other diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index e3973684746b..9110319a8f00 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 5 -#define DRV_VERSION_BUILD 1 +#define DRV_VERSION_BUILD 5 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -1341,7 +1341,7 @@ static int i40evf_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, } if (lut) { - ret = i40evf_aq_get_rss_lut(hw, vsi->id, seed, lut, lut_size); + ret = i40evf_aq_get_rss_lut(hw, vsi->id, false, lut, lut_size); if (ret) { dev_err(&adapter->pdev->dev, "Cannot get RSS lut, err %s aq_err %s\n", @@ -2252,6 +2252,28 @@ static int i40evf_change_mtu(struct net_device *netdev, int new_mtu) return 0; } +#define I40EVF_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_TX |\ + NETIF_F_HW_VLAN_CTAG_RX |\ + NETIF_F_HW_VLAN_CTAG_FILTER) + +/** + * i40evf_fix_features - fix up the netdev feature bits + * @netdev: our net device + * @features: desired feature bits + * + * Returns fixed-up features bits + **/ +static netdev_features_t i40evf_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + + features &= ~I40EVF_VLAN_FEATURES; + if (adapter->vf_res->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_VLAN) + features |= I40EVF_VLAN_FEATURES; + return features; +} + static const struct net_device_ops i40evf_netdev_ops = { .ndo_open = i40evf_open, .ndo_stop = i40evf_close, @@ -2264,6 +2286,7 @@ static const struct net_device_ops i40evf_netdev_ops = { .ndo_tx_timeout = i40evf_tx_timeout, .ndo_vlan_rx_add_vid = i40evf_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = i40evf_vlan_rx_kill_vid, + .ndo_fix_features = i40evf_fix_features, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = i40evf_netpoll, #endif @@ -2300,29 +2323,20 @@ static int i40evf_check_reset_complete(struct i40e_hw *hw) **/ int i40evf_process_config(struct i40evf_adapter *adapter) { + struct i40e_virtchnl_vf_resource *vfres = adapter->vf_res; struct net_device *netdev = adapter->netdev; int i; /* got VF config message back from PF, now we can parse it */ - for (i = 0; i < adapter->vf_res->num_vsis; i++) { - if (adapter->vf_res->vsi_res[i].vsi_type == I40E_VSI_SRIOV) - adapter->vsi_res = &adapter->vf_res->vsi_res[i]; + for (i = 0; i < vfres->num_vsis; i++) { + if (vfres->vsi_res[i].vsi_type == I40E_VSI_SRIOV) + adapter->vsi_res = &vfres->vsi_res[i]; } if (!adapter->vsi_res) { dev_err(&adapter->pdev->dev, "No LAN VSI found\n"); return -ENODEV; } - if (adapter->vf_res->vf_offload_flags - & I40E_VIRTCHNL_VF_OFFLOAD_VLAN) { - netdev->vlan_features = netdev->features & - ~(NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER); - netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_VLAN_CTAG_FILTER; - } netdev->features |= NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_IP_CSUM | @@ -2331,7 +2345,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter) NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | - NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXCSUM | NETIF_F_GRO; @@ -2348,9 +2362,15 @@ int i40evf_process_config(struct i40evf_adapter *adapter) if (adapter->flags & I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE) netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + /* always clear VLAN features because they can change at every reset */ + netdev->features &= ~(I40EVF_VLAN_FEATURES); /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features; - netdev->hw_features &= ~NETIF_F_RXCSUM; + + if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_VLAN) { + netdev->vlan_features = netdev->features; + netdev->features |= I40EVF_VLAN_FEATURES; + } adapter->vsi.id = adapter->vsi_res->vsi_id; @@ -2831,11 +2851,11 @@ static void i40evf_remove(struct pci_dev *pdev) adapter->state = __I40EVF_REMOVE; adapter->aq_required = 0; i40evf_request_reset(adapter); - msleep(20); + msleep(50); /* If the FW isn't responding, kick it once, but only once. */ if (!i40evf_asq_done(hw)) { i40evf_request_reset(adapter); - msleep(20); + msleep(50); } if (adapter->msix_entries) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 2ad7f67854d5..5989f7cb5462 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -50,3 +50,11 @@ config MLXSW_SPECTRUM To compile this driver as a module, choose M here: the module will be called mlxsw_spectrum. + +config MLXSW_SPECTRUM_DCB + bool "Data Center Bridging (DCB) support" + depends on MLXSW_SPECTRUM && DCB + default y + ---help--- + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 584cac444852..9b5ebf84c051 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -8,3 +8,4 @@ mlxsw_switchx2-objs := switchx2.o obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_switchdev.o +mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index ffe4c0305733..28f5b99e585a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1805,6 +1805,184 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port, } } +/* QTCT - QoS Switch Traffic Class Table + * ------------------------------------- + * Configures the mapping between the packet switch priority and the + * traffic class on the transmit port. + */ +#define MLXSW_REG_QTCT_ID 0x400A +#define MLXSW_REG_QTCT_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_qtct = { + .id = MLXSW_REG_QTCT_ID, + .len = MLXSW_REG_QTCT_LEN, +}; + +/* reg_qtct_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is not supported. + */ +MLXSW_ITEM32(reg, qtct, local_port, 0x00, 16, 8); + +/* reg_qtct_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: Index + */ +MLXSW_ITEM32(reg, qtct, sub_port, 0x00, 8, 8); + +/* reg_qtct_switch_prio + * Switch priority. + * Access: Index + */ +MLXSW_ITEM32(reg, qtct, switch_prio, 0x00, 0, 4); + +/* reg_qtct_tclass + * Traffic class. + * Default values: + * switch_prio 0 : tclass 1 + * switch_prio 1 : tclass 0 + * switch_prio i : tclass i, for i > 1 + * Access: RW + */ +MLXSW_ITEM32(reg, qtct, tclass, 0x04, 0, 4); + +static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port, + u8 switch_prio, u8 tclass) +{ + MLXSW_REG_ZERO(qtct, payload); + mlxsw_reg_qtct_local_port_set(payload, local_port); + mlxsw_reg_qtct_switch_prio_set(payload, switch_prio); + mlxsw_reg_qtct_tclass_set(payload, tclass); +} + +/* QEEC - QoS ETS Element Configuration Register + * --------------------------------------------- + * Configures the ETS elements. + */ +#define MLXSW_REG_QEEC_ID 0x400D +#define MLXSW_REG_QEEC_LEN 0x1C + +static const struct mlxsw_reg_info mlxsw_reg_qeec = { + .id = MLXSW_REG_QEEC_ID, + .len = MLXSW_REG_QEEC_LEN, +}; + +/* reg_qeec_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is supported. + */ +MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8); + +enum mlxsw_reg_qeec_hr { + MLXSW_REG_QEEC_HIERARCY_PORT, + MLXSW_REG_QEEC_HIERARCY_GROUP, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HIERARCY_TC, +}; + +/* reg_qeec_element_hierarchy + * 0 - Port + * 1 - Group + * 2 - Subgroup + * 3 - Traffic Class + * Access: Index + */ +MLXSW_ITEM32(reg, qeec, element_hierarchy, 0x04, 16, 4); + +/* reg_qeec_element_index + * The index of the element in the hierarchy. + * Access: Index + */ +MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8); + +/* reg_qeec_next_element_index + * The index of the next (lower) element in the hierarchy. + * Access: RW + * + * Note: Reserved for element_hierarchy 0. + */ +MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8); + +enum { + MLXSW_REG_QEEC_BYTES_MODE, + MLXSW_REG_QEEC_PACKETS_MODE, +}; + +/* reg_qeec_pb + * Packets or bytes mode. + * 0 - Bytes mode + * 1 - Packets mode + * Access: RW + * + * Note: Used for max shaper configuration. For Spectrum, packets mode + * is supported only for traffic classes of CPU port. + */ +MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1); + +/* reg_qeec_mase + * Max shaper configuration enable. Enables configuration of the max + * shaper on this ETS element. + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1); + +/* A large max rate will disable the max shaper. */ +#define MLXSW_REG_QEEC_MAS_DIS 200000000 /* Kbps */ + +/* reg_qeec_max_shaper_rate + * Max shaper information rate. + * For CPU port, can only be configured for port hierarchy. + * When in bytes mode, value is specified in units of 1000bps. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 28); + +/* reg_qeec_de + * DWRR configuration enable. Enables configuration of the dwrr and + * dwrr_weight. + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, de, 0x18, 31, 1); + +/* reg_qeec_dwrr + * Transmission selection algorithm to use on the link going down from + * the ETS element. + * 0 - Strict priority + * 1 - DWRR + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1); + +/* reg_qeec_dwrr_weight + * DWRR weight on the link going down from the ETS element. The + * percentage of bandwidth guaranteed to an ETS element within + * its hierarchy. The sum of all weights across all ETS elements + * within one hierarchy should be equal to 100. Reserved when + * transmission selection algorithm is strict priority. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8); + +static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index) +{ + MLXSW_REG_ZERO(qeec, payload); + mlxsw_reg_qeec_local_port_set(payload, local_port); + mlxsw_reg_qeec_element_hierarchy_set(payload, hr); + mlxsw_reg_qeec_element_index_set(payload, index); + mlxsw_reg_qeec_next_element_index_set(payload, next_index); +} + /* PMLP - Ports Module to Local Port Register * ------------------------------------------ * Configures the assignment of modules to local ports. @@ -2141,6 +2319,145 @@ static inline void mlxsw_reg_paos_pack(char *payload, u8 local_port, mlxsw_reg_paos_e_set(payload, 1); } +/* PFCC - Ports Flow Control Configuration Register + * ------------------------------------------------ + * Configures and retrieves the per port flow control configuration. + */ +#define MLXSW_REG_PFCC_ID 0x5007 +#define MLXSW_REG_PFCC_LEN 0x20 + +static const struct mlxsw_reg_info mlxsw_reg_pfcc = { + .id = MLXSW_REG_PFCC_ID, + .len = MLXSW_REG_PFCC_LEN, +}; + +/* reg_pfcc_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pfcc, local_port, 0x00, 16, 8); + +/* reg_pfcc_pnat + * Port number access type. Determines the way local_port is interpreted: + * 0 - Local port number. + * 1 - IB / label port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pfcc, pnat, 0x00, 14, 2); + +/* reg_pfcc_shl_cap + * Send to higher layers capabilities: + * 0 - No capability of sending Pause and PFC frames to higher layers. + * 1 - Device has capability of sending Pause and PFC frames to higher + * layers. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, shl_cap, 0x00, 1, 1); + +/* reg_pfcc_shl_opr + * Send to higher layers operation: + * 0 - Pause and PFC frames are handled by the port (default). + * 1 - Pause and PFC frames are handled by the port and also sent to + * higher layers. Only valid if shl_cap = 1. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, shl_opr, 0x00, 0, 1); + +/* reg_pfcc_ppan + * Pause policy auto negotiation. + * 0 - Disabled. Generate / ignore Pause frames based on pptx / pprtx. + * 1 - Enabled. When auto-negotiation is performed, set the Pause policy + * based on the auto-negotiation resolution. + * Access: RW + * + * Note: The auto-negotiation advertisement is set according to pptx and + * pprtx. When PFC is set on Tx / Rx, ppan must be set to 0. + */ +MLXSW_ITEM32(reg, pfcc, ppan, 0x04, 28, 4); + +/* reg_pfcc_prio_mask_tx + * Bit per priority indicating if Tx flow control policy should be + * updated based on bit pfctx. + * Access: WO + */ +MLXSW_ITEM32(reg, pfcc, prio_mask_tx, 0x04, 16, 8); + +/* reg_pfcc_prio_mask_rx + * Bit per priority indicating if Rx flow control policy should be + * updated based on bit pfcrx. + * Access: WO + */ +MLXSW_ITEM32(reg, pfcc, prio_mask_rx, 0x04, 0, 8); + +/* reg_pfcc_pptx + * Admin Pause policy on Tx. + * 0 - Never generate Pause frames (default). + * 1 - Generate Pause frames according to Rx buffer threshold. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pptx, 0x08, 31, 1); + +/* reg_pfcc_aptx + * Active (operational) Pause policy on Tx. + * 0 - Never generate Pause frames. + * 1 - Generate Pause frames according to Rx buffer threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, aptx, 0x08, 30, 1); + +/* reg_pfcc_pfctx + * Priority based flow control policy on Tx[7:0]. Per-priority bit mask: + * 0 - Never generate priority Pause frames on the specified priority + * (default). + * 1 - Generate priority Pause frames according to Rx buffer threshold on + * the specified priority. + * Access: RW + * + * Note: pfctx and pptx must be mutually exclusive. + */ +MLXSW_ITEM32(reg, pfcc, pfctx, 0x08, 16, 8); + +/* reg_pfcc_pprx + * Admin Pause policy on Rx. + * 0 - Ignore received Pause frames (default). + * 1 - Respect received Pause frames. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pprx, 0x0C, 31, 1); + +/* reg_pfcc_aprx + * Active (operational) Pause policy on Rx. + * 0 - Ignore received Pause frames. + * 1 - Respect received Pause frames. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, aprx, 0x0C, 30, 1); + +/* reg_pfcc_pfcrx + * Priority based flow control policy on Rx[7:0]. Per-priority bit mask: + * 0 - Ignore incoming priority Pause frames on the specified priority + * (default). + * 1 - Respect incoming priority Pause frames on the specified priority. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pfcrx, 0x0C, 16, 8); + +#define MLXSW_REG_PFCC_ALL_PRIO 0xFF + +static inline void mlxsw_reg_pfcc_prio_pack(char *payload, u8 pfc_en) +{ + mlxsw_reg_pfcc_prio_mask_tx_set(payload, MLXSW_REG_PFCC_ALL_PRIO); + mlxsw_reg_pfcc_prio_mask_rx_set(payload, MLXSW_REG_PFCC_ALL_PRIO); + mlxsw_reg_pfcc_pfctx_set(payload, pfc_en); + mlxsw_reg_pfcc_pfcrx_set(payload, pfc_en); +} + +static inline void mlxsw_reg_pfcc_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(pfcc, payload); + mlxsw_reg_pfcc_local_port_set(payload, local_port); +} + /* PPCNT - Ports Performance Counters Register * ------------------------------------------- * The PPCNT register retrieves per port performance counters. @@ -2180,6 +2497,11 @@ MLXSW_ITEM32(reg, ppcnt, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2); +enum mlxsw_reg_ppcnt_grp { + MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0, + MLXSW_REG_PPCNT_PRIO_CNT = 0x10, +}; + /* reg_ppcnt_grp * Performance counter group. * Group 63 indicates all groups. Only valid on Set() operation with @@ -2215,6 +2537,8 @@ MLXSW_ITEM32(reg, ppcnt, clr, 0x04, 31, 1); */ MLXSW_ITEM32(reg, ppcnt, prio_tc, 0x04, 0, 5); +/* Ethernet IEEE 802.3 Counter Group */ + /* reg_ppcnt_a_frames_transmitted_ok * Access: RO */ @@ -2329,15 +2653,145 @@ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received, MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted, 0x08 + 0x90, 0, 64); -static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port) +/* Ethernet Per Priority Group Counters */ + +/* reg_ppcnt_rx_octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_octets, 0x08 + 0x00, 0, 64); + +/* reg_ppcnt_rx_frames + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_frames, 0x08 + 0x20, 0, 64); + +/* reg_ppcnt_tx_octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_octets, 0x08 + 0x28, 0, 64); + +/* reg_ppcnt_tx_frames + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_frames, 0x08 + 0x48, 0, 64); + +/* reg_ppcnt_rx_pause + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_pause, 0x08 + 0x50, 0, 64); + +/* reg_ppcnt_rx_pause_duration + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_pause_duration, 0x08 + 0x58, 0, 64); + +/* reg_ppcnt_tx_pause + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause, 0x08 + 0x60, 0, 64); + +/* reg_ppcnt_tx_pause_duration + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, 0x08 + 0x68, 0, 64); + +/* reg_ppcnt_rx_pause_transition + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64); + +static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, + enum mlxsw_reg_ppcnt_grp grp, + u8 prio_tc) { MLXSW_REG_ZERO(ppcnt, payload); mlxsw_reg_ppcnt_swid_set(payload, 0); mlxsw_reg_ppcnt_local_port_set(payload, local_port); mlxsw_reg_ppcnt_pnat_set(payload, 0); - mlxsw_reg_ppcnt_grp_set(payload, 0); + mlxsw_reg_ppcnt_grp_set(payload, grp); mlxsw_reg_ppcnt_clr_set(payload, 0); - mlxsw_reg_ppcnt_prio_tc_set(payload, 0); + mlxsw_reg_ppcnt_prio_tc_set(payload, prio_tc); +} + +/* PPTB - Port Prio To Buffer Register + * ----------------------------------- + * Configures the switch priority to buffer table. + */ +#define MLXSW_REG_PPTB_ID 0x500B +#define MLXSW_REG_PPTB_LEN 0x0C + +static const struct mlxsw_reg_info mlxsw_reg_pptb = { + .id = MLXSW_REG_PPTB_ID, + .len = MLXSW_REG_PPTB_LEN, +}; + +enum { + MLXSW_REG_PPTB_MM_UM, + MLXSW_REG_PPTB_MM_UNICAST, + MLXSW_REG_PPTB_MM_MULTICAST, +}; + +/* reg_pptb_mm + * Mapping mode. + * 0 - Map both unicast and multicast packets to the same buffer. + * 1 - Map only unicast packets. + * 2 - Map only multicast packets. + * Access: Index + * + * Note: SwitchX-2 only supports the first option. + */ +MLXSW_ITEM32(reg, pptb, mm, 0x00, 28, 2); + +/* reg_pptb_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pptb, local_port, 0x00, 16, 8); + +/* reg_pptb_um + * Enables the update of the untagged_buf field. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, um, 0x00, 8, 1); + +/* reg_pptb_pm + * Enables the update of the prio_to_buff field. + * Bit <i> is a flag for updating the mapping for switch priority <i>. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, pm, 0x00, 0, 8); + +/* reg_pptb_prio_to_buff + * Mapping of switch priority <i> to one of the allocated receive port + * buffers. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff, 0x04, 0x04, 4); + +/* reg_pptb_pm_msb + * Enables the update of the prio_to_buff field. + * Bit <i> is a flag for updating the mapping for switch priority <i+8>. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, pm_msb, 0x08, 24, 8); + +/* reg_pptb_untagged_buff + * Mapping of untagged frames to one of the allocated receive port buffers. + * Access: RW + * + * Note: In SwitchX-2 this field must be mapped to buffer 8. Reserved for + * Spectrum, as it maps untagged packets based on the default switch priority. + */ +MLXSW_ITEM32(reg, pptb, untagged_buff, 0x08, 0, 4); + +#define MLXSW_REG_PPTB_ALL_PRIO 0xFF + +static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(pptb, payload); + mlxsw_reg_pptb_mm_set(payload, MLXSW_REG_PPTB_MM_UM); + mlxsw_reg_pptb_local_port_set(payload, local_port); + mlxsw_reg_pptb_pm_set(payload, MLXSW_REG_PPTB_ALL_PRIO); } /* PBMC - Port Buffer Management Control Register @@ -2346,7 +2800,7 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port) * allocation for different Prios, and the Pause threshold management. */ #define MLXSW_REG_PBMC_ID 0x500C -#define MLXSW_REG_PBMC_LEN 0x68 +#define MLXSW_REG_PBMC_LEN 0x6C static const struct mlxsw_reg_info mlxsw_reg_pbmc = { .id = MLXSW_REG_PBMC_ID, @@ -2374,6 +2828,8 @@ MLXSW_ITEM32(reg, pbmc, xoff_timer_value, 0x04, 16, 16); */ MLXSW_ITEM32(reg, pbmc, xoff_refresh, 0x04, 0, 16); +#define MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX 11 + /* reg_pbmc_buf_lossy * The field indicates if the buffer is lossy. * 0 - Lossless @@ -2398,6 +2854,30 @@ MLXSW_ITEM32_INDEXED(reg, pbmc, buf_epsb, 0x0C, 24, 1, 0x08, 0x00, false); */ MLXSW_ITEM32_INDEXED(reg, pbmc, buf_size, 0x0C, 0, 16, 0x08, 0x00, false); +/* reg_pbmc_buf_xoff_threshold + * Once the amount of data in the buffer goes above this value, device + * starts sending PFC frames for all priorities associated with the + * buffer. Units are represented in cells. Reserved in case of lossy + * buffer. + * Access: RW + * + * Note: In Spectrum, reserved for buffer[9]. + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xoff_threshold, 0x0C, 16, 16, + 0x08, 0x04, false); + +/* reg_pbmc_buf_xon_threshold + * When the amount of data in the buffer goes below this value, device + * stops sending PFC frames for the priorities associated with the + * buffer. Units are represented in cells. Reserved in case of lossy + * buffer. + * Access: RW + * + * Note: In Spectrum, reserved for buffer[9]. + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xon_threshold, 0x0C, 0, 16, + 0x08, 0x04, false); + static inline void mlxsw_reg_pbmc_pack(char *payload, u8 local_port, u16 xoff_timer_value, u16 xoff_refresh) { @@ -2416,6 +2896,17 @@ static inline void mlxsw_reg_pbmc_lossy_buffer_pack(char *payload, mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size); } +static inline void mlxsw_reg_pbmc_lossless_buffer_pack(char *payload, + int buf_index, u16 size, + u16 threshold) +{ + mlxsw_reg_pbmc_buf_lossy_set(payload, buf_index, 0); + mlxsw_reg_pbmc_buf_epsb_set(payload, buf_index, 0); + mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size); + mlxsw_reg_pbmc_buf_xoff_threshold_set(payload, buf_index, threshold); + mlxsw_reg_pbmc_buf_xon_threshold_set(payload, buf_index, threshold); +} + /* PSPA - Port Switch Partition Allocation * --------------------------------------- * Controls the association of a port with a switch partition and enables @@ -3283,6 +3774,10 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SFMR"; case MLXSW_REG_SPVMLR_ID: return "SPVMLR"; + case MLXSW_REG_QTCT_ID: + return "QTCT"; + case MLXSW_REG_QEEC_ID: + return "QEEC"; case MLXSW_REG_PMLP_ID: return "PMLP"; case MLXSW_REG_PMTU_ID: @@ -3293,8 +3788,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "PPAD"; case MLXSW_REG_PAOS_ID: return "PAOS"; + case MLXSW_REG_PFCC_ID: + return "PFCC"; case MLXSW_REG_PPCNT_ID: return "PPCNT"; + case MLXSW_REG_PPTB_ID: + return "PPTB"; case MLXSW_REG_PBMC_ID: return "PBMC"; case MLXSW_REG_PSPA_ID: diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index cb5f36e497e9..507263a2d226 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -49,6 +49,7 @@ #include <linux/jiffies.h> #include <linux/bitops.h> #include <linux/list.h> +#include <linux/dcbnl.h> #include <net/devlink.h> #include <net/switchdev.h> #include <generated/utsrelease.h> @@ -449,16 +450,89 @@ static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) return 0; } +static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int pg_index, int mtu, + bool pause_en, bool pfc_en, u16 delay) +{ + u16 pg_size = 2 * MLXSW_SP_BYTES_TO_CELLS(mtu); + + delay = pfc_en ? mlxsw_sp_pfc_delay_get(mtu, delay) : + MLXSW_SP_PAUSE_DELAY; + + if (pause_en || pfc_en) + mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, pg_index, + pg_size + delay, pg_size); + else + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg_index, pg_size); +} + +int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, + u8 *prio_tc, bool pause_en, + struct ieee_pfc *my_pfc) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0; + u16 delay = !!my_pfc ? my_pfc->delay : 0; + char pbmc_pl[MLXSW_REG_PBMC_LEN]; + int i, j, err; + + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); + if (err) + return err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + bool configure = false; + bool pfc = false; + + for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) { + if (prio_tc[j] == i) { + pfc = pfc_en & BIT(j); + configure = true; + break; + } + } + + if (!configure) + continue; + mlxsw_sp_pg_buf_pack(pbmc_pl, i, mtu, pause_en, pfc, delay); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); +} + +static int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, + int mtu, bool pause_en) +{ + u8 def_prio_tc[IEEE_8021QAZ_MAX_TCS] = {0}; + bool dcb_en = !!mlxsw_sp_port->dcb.ets; + struct ieee_pfc *my_pfc; + u8 *prio_tc; + + prio_tc = dcb_en ? mlxsw_sp_port->dcb.ets->prio_tc : def_prio_tc; + my_pfc = dcb_en ? mlxsw_sp_port->dcb.pfc : NULL; + + return __mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, prio_tc, + pause_en, my_pfc); +} + static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); int err; - err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); + err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, pause_en); if (err) return err; + err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); + if (err) + goto err_port_mtu_set; dev->mtu = mtu; return 0; + +err_port_mtu_set: + mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + return err; } static struct rtnl_link_stats64 * @@ -936,6 +1010,68 @@ static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, sizeof(drvinfo->bus_info)); } +static void mlxsw_sp_port_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + pause->rx_pause = mlxsw_sp_port->link.rx_pause; + pause->tx_pause = mlxsw_sp_port->link.tx_pause; +} + +static int mlxsw_sp_port_pause_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ethtool_pauseparam *pause) +{ + char pfcc_pl[MLXSW_REG_PFCC_LEN]; + + mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pfcc_pprx_set(pfcc_pl, pause->rx_pause); + mlxsw_reg_pfcc_pptx_set(pfcc_pl, pause->tx_pause); + + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc), + pfcc_pl); +} + +static int mlxsw_sp_port_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = pause->tx_pause || pause->rx_pause; + int err; + + if (mlxsw_sp_port->dcb.pfc && mlxsw_sp_port->dcb.pfc->pfc_en) { + netdev_err(dev, "PFC already enabled on port\n"); + return -EINVAL; + } + + if (pause->autoneg) { + netdev_err(dev, "PAUSE frames autonegotiation isn't supported\n"); + return -EINVAL; + } + + err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + if (err) { + netdev_err(dev, "Failed to configure port's headroom\n"); + return err; + } + + err = mlxsw_sp_port_pause_set(mlxsw_sp_port, pause); + if (err) { + netdev_err(dev, "Failed to set PAUSE parameters\n"); + goto err_port_pause_configure; + } + + mlxsw_sp_port->link.rx_pause = pause->rx_pause; + mlxsw_sp_port->link.tx_pause = pause->tx_pause; + + return 0; + +err_port_pause_configure: + pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); + mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + return err; +} + struct mlxsw_sp_port_hw_stats { char str[ETH_GSTRING_LEN]; u64 (*getter)(char *payload); @@ -1071,7 +1207,8 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev, int i; int err; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port); + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, + MLXSW_REG_PPCNT_IEEE_8023_CNT, 0); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); for (i = 0; i < MLXSW_SP_PORT_HW_STATS_LEN; i++) data[i] = !err ? mlxsw_sp_port_hw_stats[i].getter(ppcnt_pl) : 0; @@ -1419,6 +1556,8 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, + .get_pauseparam = mlxsw_sp_port_get_pauseparam, + .set_pauseparam = mlxsw_sp_port_set_pauseparam, .get_strings = mlxsw_sp_port_get_strings, .set_phys_id = mlxsw_sp_port_set_phys_id, .get_ethtool_stats = mlxsw_sp_port_get_stats, @@ -1441,6 +1580,108 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); } +int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, + bool dwrr, u8 dwrr_weight) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_de_set(qeec_pl, true); + mlxsw_reg_qeec_dwrr_set(qeec_pl, dwrr); + mlxsw_reg_qeec_dwrr_weight_set(qeec_pl, dwrr_weight); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 maxrate) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_mase_set(qeec_pl, true); + mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, + u8 switch_prio, u8 tclass) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qtct_pl[MLXSW_REG_QTCT_LEN]; + + mlxsw_reg_qtct_pack(qtct_pl, mlxsw_sp_port->local_port, switch_prio, + tclass); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtct), qtct_pl); +} + +static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err, i; + + /* Setup the elements hierarcy, so that each TC is linked to + * one subgroup, which are all member in the same group. + */ + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false, + 0); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + 0, false, 0); + if (err) + return err; + } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, i, i, + false, 0); + if (err) + return err; + } + + /* Make sure the max shaper is disabled in all hierarcies that + * support it. + */ + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + i, 0, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; + } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, + i, i, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; + } + + /* Map all priorities to traffic class 0. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0); + if (err) + return err; + } + + return 0; +} + static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool split, u8 module, u8 width) { @@ -1548,6 +1789,21 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_buffers_init; } + err = mlxsw_sp_port_ets_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize ETS\n", + mlxsw_sp_port->local_port); + goto err_port_ets_init; + } + + /* ETS and buffers must be initialized before DCB. */ + err = mlxsw_sp_port_dcb_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize DCB\n", + mlxsw_sp_port->local_port); + goto err_port_dcb_init; + } + mlxsw_sp_port_switchdev_init(mlxsw_sp_port); err = register_netdev(dev); if (err) { @@ -1568,6 +1824,8 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, err_port_vlan_init: unregister_netdev(dev); err_register_netdev: +err_port_dcb_init: +err_port_ets_init: err_port_buffers_init: err_port_admin_status_set: err_port_mtu_set: @@ -1637,6 +1895,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) devlink_port = &mlxsw_sp_port->devlink_port; devlink_port_type_clear(devlink_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + mlxsw_sp_port_dcb_fini(mlxsw_sp_port); devlink_port_unregister(devlink_port); mlxsw_sp_port_vports_fini(mlxsw_sp_port); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index d58ab0cd9507..47610a5ccd78 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -42,6 +42,7 @@ #include <linux/bitops.h> #include <linux/if_vlan.h> #include <linux/list.h> +#include <linux/dcbnl.h> #include <net/switchdev.h> #include <net/devlink.h> @@ -62,6 +63,23 @@ #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ +#define MLXSW_SP_BYTES_PER_CELL 96 + +#define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL) + +/* Maximum delay buffer needed in case of PAUSE frames, in cells. + * Assumes 100m cable and maximum MTU. + */ +#define MLXSW_SP_PAUSE_DELAY 612 + +#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */ + +static inline u16 mlxsw_sp_pfc_delay_get(int mtu, u16 delay) +{ + delay = MLXSW_SP_BYTES_TO_CELLS(DIV_ROUND_UP(delay, BITS_PER_BYTE)); + return MLXSW_SP_CELL_FACTOR * delay + MLXSW_SP_BYTES_TO_CELLS(mtu); +} + struct mlxsw_sp_port; struct mlxsw_sp_upper { @@ -166,6 +184,15 @@ struct mlxsw_sp_port { struct mlxsw_sp_vfid *vfid; u16 vid; } vport; + struct { + u8 tx_pause:1, + rx_pause:1; + } link; + struct { + struct ieee_ets *ets; + struct ieee_maxrate *maxrate; + struct ieee_pfc *pfc; + } dcb; /* 802.1Q bridge VLANs */ unsigned long *active_vlans; unsigned long *untagged_vlans; @@ -174,6 +201,12 @@ struct mlxsw_sp_port { struct devlink_port devlink_port; }; +static inline bool +mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause; +} + static inline struct mlxsw_sp_port * mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) { @@ -265,5 +298,33 @@ int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, bool set, bool only_uc); void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, + bool dwrr, u8 dwrr_weight); +int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, + u8 switch_prio, u8 tclass); +int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, + u8 *prio_tc, bool pause_en, + struct ieee_pfc *my_pfc); +int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 maxrate); + +#ifdef CONFIG_MLXSW_SPECTRUM_DCB + +int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port); + +#else + +static inline int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return 0; +} + +static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{} + +#endif #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index d59195e3f7fb..97c8d537be5b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -34,6 +34,8 @@ #include <linux/kernel.h> #include <linux/types.h> +#include <linux/dcbnl.h> +#include <linux/if_ether.h> #include "spectrum.h" #include "core.h" @@ -52,15 +54,15 @@ struct mlxsw_sp_pb { } static const struct mlxsw_sp_pb mlxsw_sp_pbs[] = { - MLXSW_SP_PB(0, 208), - MLXSW_SP_PB(1, 208), - MLXSW_SP_PB(2, 208), - MLXSW_SP_PB(3, 208), - MLXSW_SP_PB(4, 208), - MLXSW_SP_PB(5, 208), - MLXSW_SP_PB(6, 208), - MLXSW_SP_PB(7, 208), - MLXSW_SP_PB(9, 208), + MLXSW_SP_PB(0, 2 * MLXSW_SP_BYTES_TO_CELLS(ETH_FRAME_LEN)), + MLXSW_SP_PB(1, 0), + MLXSW_SP_PB(2, 0), + MLXSW_SP_PB(3, 0), + MLXSW_SP_PB(4, 0), + MLXSW_SP_PB(5, 0), + MLXSW_SP_PB(6, 0), + MLXSW_SP_PB(7, 0), + MLXSW_SP_PB(9, 2 * MLXSW_SP_BYTES_TO_CELLS(MLXSW_PORT_MAX_MTU)), }; #define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs) @@ -78,11 +80,33 @@ static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) pb = &mlxsw_sp_pbs[i]; mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pb->index, pb->size); } + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, + MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0); return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); } -#define MLXSW_SP_SB_BYTES_PER_CELL 96 +static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + char pptb_pl[MLXSW_REG_PPTB_LEN]; + int i; + + mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, 0); + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), + pptb_pl); +} + +static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_port_pb_init(mlxsw_sp_port); + if (err) + return err; + return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port); +} struct mlxsw_sp_sb_pool { u8 pool; @@ -92,11 +116,9 @@ struct mlxsw_sp_sb_pool { }; #define MLXSW_SP_SB_POOL_INGRESS_SIZE \ - ((15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) / \ - MLXSW_SP_SB_BYTES_PER_CELL) + (15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) #define MLXSW_SP_SB_POOL_EGRESS_SIZE \ - ((14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) / \ - MLXSW_SP_SB_BYTES_PER_CELL) + (14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) #define MLXSW_SP_SB_POOL(_pool, _dir, _mode, _size) \ { \ @@ -115,14 +137,14 @@ struct mlxsw_sp_sb_pool { MLXSW_REG_SBPR_MODE_DYNAMIC, _size) static const struct mlxsw_sp_sb_pool mlxsw_sp_sb_pools[] = { - MLXSW_SP_SB_POOL_INGRESS(0, MLXSW_SP_SB_POOL_INGRESS_SIZE), + MLXSW_SP_SB_POOL_INGRESS(0, MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_POOL_INGRESS_SIZE)), MLXSW_SP_SB_POOL_INGRESS(1, 0), MLXSW_SP_SB_POOL_INGRESS(2, 0), MLXSW_SP_SB_POOL_INGRESS(3, 0), - MLXSW_SP_SB_POOL_EGRESS(0, MLXSW_SP_SB_POOL_EGRESS_SIZE), + MLXSW_SP_SB_POOL_EGRESS(0, MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_POOL_EGRESS_SIZE)), MLXSW_SP_SB_POOL_EGRESS(1, 0), MLXSW_SP_SB_POOL_EGRESS(2, 0), - MLXSW_SP_SB_POOL_EGRESS(2, MLXSW_SP_SB_POOL_EGRESS_SIZE), + MLXSW_SP_SB_POOL_EGRESS(2, MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_POOL_EGRESS_SIZE)), }; #define MLXSW_SP_SB_POOLS_LEN ARRAY_SIZE(mlxsw_sp_sb_pools) @@ -178,7 +200,7 @@ struct mlxsw_sp_sb_cm { MLXSW_SP_SB_CM(_tc, MLXSW_REG_SBCM_DIR_EGRESS, 104, 2, 3) static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms[] = { - MLXSW_SP_SB_CM_INGRESS(0, 10000 / MLXSW_SP_SB_BYTES_PER_CELL, 8), + MLXSW_SP_SB_CM_INGRESS(0, MLXSW_SP_BYTES_TO_CELLS(10000), 8), MLXSW_SP_SB_CM_INGRESS(1, 0, 0), MLXSW_SP_SB_CM_INGRESS(2, 0, 0), MLXSW_SP_SB_CM_INGRESS(3, 0, 0), @@ -186,15 +208,15 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms[] = { MLXSW_SP_SB_CM_INGRESS(5, 0, 0), MLXSW_SP_SB_CM_INGRESS(6, 0, 0), MLXSW_SP_SB_CM_INGRESS(7, 0, 0), - MLXSW_SP_SB_CM_INGRESS(9, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff), - MLXSW_SP_SB_CM_EGRESS(0, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(1, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(2, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(3, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(4, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(5, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(6, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(7, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), + MLXSW_SP_SB_CM_INGRESS(9, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff), + MLXSW_SP_SB_CM_EGRESS(0, MLXSW_SP_BYTES_TO_CELLS(1500), 9), + MLXSW_SP_SB_CM_EGRESS(1, MLXSW_SP_BYTES_TO_CELLS(1500), 9), + MLXSW_SP_SB_CM_EGRESS(2, MLXSW_SP_BYTES_TO_CELLS(1500), 9), + MLXSW_SP_SB_CM_EGRESS(3, MLXSW_SP_BYTES_TO_CELLS(1500), 9), + MLXSW_SP_SB_CM_EGRESS(4, MLXSW_SP_BYTES_TO_CELLS(1500), 9), + MLXSW_SP_SB_CM_EGRESS(5, MLXSW_SP_BYTES_TO_CELLS(1500), 9), + MLXSW_SP_SB_CM_EGRESS(6, MLXSW_SP_BYTES_TO_CELLS(1500), 9), + MLXSW_SP_SB_CM_EGRESS(7, MLXSW_SP_BYTES_TO_CELLS(1500), 9), MLXSW_SP_SB_CM_EGRESS(8, 0, 0), MLXSW_SP_SB_CM_EGRESS(9, 0, 0), MLXSW_SP_SB_CM_EGRESS(10, 0, 0), @@ -353,21 +375,21 @@ struct mlxsw_sp_sb_mm { } static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { - MLXSW_SP_SB_MM(0, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(1, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(2, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(3, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(4, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(5, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(6, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(7, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(8, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(9, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(10, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(11, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(12, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(13, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(14, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(0, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(1, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(2, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(3, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(4, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(5, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(6, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(7, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(8, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(9, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(10, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(11, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(12, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(13, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(14, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), }; #define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) @@ -410,7 +432,7 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) { int err; - err = mlxsw_sp_port_pb_init(mlxsw_sp_port); + err = mlxsw_sp_port_headroom_init(mlxsw_sp_port); if (err) return err; err = mlxsw_sp_port_sb_cms_init(mlxsw_sp_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c new file mode 100644 index 000000000000..0b323661c0b6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -0,0 +1,480 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/netdevice.h> +#include <linux/string.h> +#include <linux/bitops.h> +#include <net/dcbnl.h> + +#include "spectrum.h" +#include "reg.h" + +static u8 mlxsw_sp_dcbnl_getdcbx(struct net_device __always_unused *dev) +{ + return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; +} + +static u8 mlxsw_sp_dcbnl_setdcbx(struct net_device __always_unused *dev, + u8 mode) +{ + return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0; +} + +static int mlxsw_sp_dcbnl_ieee_getets(struct net_device *dev, + struct ieee_ets *ets) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(ets, mlxsw_sp_port->dcb.ets, sizeof(*ets)); + + return 0; +} + +static int mlxsw_sp_port_ets_validate(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + struct net_device *dev = mlxsw_sp_port->dev; + bool has_ets_tc = false; + int i, tx_bw_sum = 0; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + break; + case IEEE_8021QAZ_TSA_ETS: + has_ets_tc = true; + tx_bw_sum += ets->tc_tx_bw[i]; + break; + default: + netdev_err(dev, "Only strict priority and ETS are supported\n"); + return -EINVAL; + } + + if (ets->prio_tc[i] >= IEEE_8021QAZ_MAX_TCS) { + netdev_err(dev, "Invalid TC\n"); + return -EINVAL; + } + } + + if (has_ets_tc && tx_bw_sum != 100) { + netdev_err(dev, "Total ETS bandwidth should equal 100\n"); + return -EINVAL; + } + + return 0; +} + +static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port, + u8 *prio_tc) +{ + char pptb_pl[MLXSW_REG_PPTB_LEN]; + int i; + + mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, prio_tc[i]); + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), + pptb_pl); +} + +static bool mlxsw_sp_ets_has_pg(u8 *prio_tc, u8 pg) +{ + int i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + if (prio_tc[i] == pg) + return true; + return false; +} + +static int mlxsw_sp_port_pg_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + u8 *old_prio_tc, u8 *new_prio_tc) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pbmc_pl[MLXSW_REG_PBMC_LEN]; + int err, i; + + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); + if (err) + return err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + u8 pg = old_prio_tc[i]; + + if (!mlxsw_sp_ets_has_pg(new_prio_tc, pg)) + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg, 0); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); +} + +static int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); + struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets; + struct net_device *dev = mlxsw_sp_port->dev; + int err; + + /* Create the required PGs, but don't destroy existing ones, as + * traffic is still directed to them. + */ + err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, + ets->prio_tc, pause_en, + mlxsw_sp_port->dcb.pfc); + if (err) { + netdev_err(dev, "Failed to configure port's headroom\n"); + return err; + } + + err = mlxsw_sp_port_pg_prio_map(mlxsw_sp_port, ets->prio_tc); + if (err) { + netdev_err(dev, "Failed to set PG-priority mapping\n"); + goto err_port_prio_pg_map; + } + + err = mlxsw_sp_port_pg_destroy(mlxsw_sp_port, my_ets->prio_tc, + ets->prio_tc); + if (err) + netdev_warn(dev, "Failed to remove ununsed PGs\n"); + + return 0; + +err_port_prio_pg_map: + mlxsw_sp_port_pg_destroy(mlxsw_sp_port, ets->prio_tc, my_ets->prio_tc); + return err; +} + +static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets; + struct net_device *dev = mlxsw_sp_port->dev; + int i, err; + + /* Egress configuration. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + bool dwrr = ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS; + u8 weight = ets->tc_tx_bw[i]; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + 0, dwrr, weight); + if (err) { + netdev_err(dev, "Failed to link subgroup ETS element %d to group\n", + i); + goto err_port_ets_set; + } + } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, + ets->prio_tc[i]); + if (err) { + netdev_err(dev, "Failed to map prio %d to TC %d\n", i, + ets->prio_tc[i]); + goto err_port_prio_tc_set; + } + } + + /* Ingress configuration. */ + err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, ets); + if (err) + goto err_port_headroom_set; + + return 0; + +err_port_headroom_set: + i = IEEE_8021QAZ_MAX_TCS; +err_port_prio_tc_set: + for (i--; i >= 0; i--) + mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, my_ets->prio_tc[i]); + i = IEEE_8021QAZ_MAX_TCS; +err_port_ets_set: + for (i--; i >= 0; i--) { + bool dwrr = my_ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS; + u8 weight = my_ets->tc_tx_bw[i]; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + 0, dwrr, weight); + } + return err; +} + +static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev, + struct ieee_ets *ets) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + err = mlxsw_sp_port_ets_validate(mlxsw_sp_port, ets); + if (err) + return err; + + err = __mlxsw_sp_dcbnl_ieee_setets(mlxsw_sp_port, ets); + if (err) + return err; + + memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets)); + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_getmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(maxrate, mlxsw_sp_port->dcb.maxrate, sizeof(*maxrate)); + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct ieee_maxrate *my_maxrate = mlxsw_sp_port->dcb.maxrate; + int err, i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + i, 0, + maxrate->tc_maxrate[i]); + if (err) { + netdev_err(dev, "Failed to set maxrate for TC %d\n", i); + goto err_port_ets_maxrate_set; + } + } + + memcpy(mlxsw_sp_port->dcb.maxrate, maxrate, sizeof(*maxrate)); + + return 0; + +err_port_ets_maxrate_set: + for (i--; i >= 0; i--) + mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + i, 0, my_maxrate->tc_maxrate[i]); + return err; +} + +static int mlxsw_sp_port_pfc_cnt_get(struct mlxsw_sp_port *mlxsw_sp_port, + u8 prio) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct ieee_pfc *my_pfc = mlxsw_sp_port->dcb.pfc; + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, + MLXSW_REG_PPCNT_PRIO_CNT, prio); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); + if (err) + return err; + + my_pfc->requests[prio] = mlxsw_reg_ppcnt_tx_pause_get(ppcnt_pl); + my_pfc->indications[prio] = mlxsw_reg_ppcnt_rx_pause_get(ppcnt_pl); + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_getpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err, i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_pfc_cnt_get(mlxsw_sp_port, i); + if (err) { + netdev_err(dev, "Failed to get PFC count for priority %d\n", + i); + return err; + } + } + + memcpy(pfc, mlxsw_sp_port->dcb.pfc, sizeof(*pfc)); + + return 0; +} + +static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_pfc *pfc) +{ + char pfcc_pl[MLXSW_REG_PFCC_LEN]; + + mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en); + + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc), + pfcc_pl); +} + +static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + if (mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) { + netdev_err(dev, "PAUSE frames already enabled on port\n"); + return -EINVAL; + } + + err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, + mlxsw_sp_port->dcb.ets->prio_tc, + false, pfc); + if (err) { + netdev_err(dev, "Failed to configure port's headroom for PFC\n"); + return err; + } + + err = mlxsw_sp_port_pfc_set(mlxsw_sp_port, pfc); + if (err) { + netdev_err(dev, "Failed to configure PFC\n"); + goto err_port_pfc_set; + } + + memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc)); + + return 0; + +err_port_pfc_set: + __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, + mlxsw_sp_port->dcb.ets->prio_tc, false, + mlxsw_sp_port->dcb.pfc); + return err; +} + +static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = { + .ieee_getets = mlxsw_sp_dcbnl_ieee_getets, + .ieee_setets = mlxsw_sp_dcbnl_ieee_setets, + .ieee_getmaxrate = mlxsw_sp_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlxsw_sp_dcbnl_ieee_setmaxrate, + .ieee_getpfc = mlxsw_sp_dcbnl_ieee_getpfc, + .ieee_setpfc = mlxsw_sp_dcbnl_ieee_setpfc, + + .getdcbx = mlxsw_sp_dcbnl_getdcbx, + .setdcbx = mlxsw_sp_dcbnl_setdcbx, +}; + +static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->dcb.ets = kzalloc(sizeof(*mlxsw_sp_port->dcb.ets), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.ets) + return -ENOMEM; + + mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; +} + +static void mlxsw_sp_port_ets_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.ets); +} + +static int mlxsw_sp_port_maxrate_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int i; + + mlxsw_sp_port->dcb.maxrate = kmalloc(sizeof(*mlxsw_sp_port->dcb.maxrate), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.maxrate) + return -ENOMEM; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port->dcb.maxrate->tc_maxrate[i] = MLXSW_REG_QEEC_MAS_DIS; + + return 0; +} + +static void mlxsw_sp_port_maxrate_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.maxrate); +} + +static int mlxsw_sp_port_pfc_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->dcb.pfc = kzalloc(sizeof(*mlxsw_sp_port->dcb.pfc), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.pfc) + return -ENOMEM; + + mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; +} + +static void mlxsw_sp_port_pfc_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.pfc); +} + +int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_port_ets_init(mlxsw_sp_port); + if (err) + return err; + err = mlxsw_sp_port_maxrate_init(mlxsw_sp_port); + if (err) + goto err_port_maxrate_init; + err = mlxsw_sp_port_pfc_init(mlxsw_sp_port); + if (err) + goto err_port_pfc_init; + + mlxsw_sp_port->dev->dcbnl_ops = &mlxsw_sp_dcbnl_ops; + + return 0; + +err_port_pfc_init: + mlxsw_sp_port_maxrate_fini(mlxsw_sp_port); +err_port_maxrate_init: + mlxsw_sp_port_ets_fini(mlxsw_sp_port); + return err; +} + +void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port_pfc_fini(mlxsw_sp_port); + mlxsw_sp_port_maxrate_fini(mlxsw_sp_port); + mlxsw_sp_port_ets_fini(mlxsw_sp_port); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 7a60a26759b6..c49447f31acc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -518,7 +518,8 @@ static void mlxsw_sx_port_get_stats(struct net_device *dev, int i; int err; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port); + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port, + MLXSW_REG_PPCNT_IEEE_8023_CNT, 0); err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ppcnt), ppcnt_pl); for (i = 0; i < MLXSW_SX_PORT_HW_STATS_LEN; i++) data[i] = !err ? mlxsw_sx_port_hw_stats[i].getter(ppcnt_pl) : 0; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 1c0fa364323e..51cccddfe403 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1192,6 +1192,45 @@ out: unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS; } +static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed, + __be32 *protocol, + struct sk_buff *skb, u32 vxflags) +{ + struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed; + + /* Need to have Next Protocol set for interfaces in GPE mode. */ + if (!gpe->np_applied) + return false; + /* "The initial version is 0. If a receiver does not support the + * version indicated it MUST drop the packet. + */ + if (gpe->version != 0) + return false; + /* "When the O bit is set to 1, the packet is an OAM packet and OAM + * processing MUST occur." However, we don't implement OAM + * processing, thus drop the packet. + */ + if (gpe->oam_flag) + return false; + + switch (gpe->next_protocol) { + case VXLAN_GPE_NP_IPV4: + *protocol = htons(ETH_P_IP); + break; + case VXLAN_GPE_NP_IPV6: + *protocol = htons(ETH_P_IPV6); + break; + case VXLAN_GPE_NP_ETHERNET: + *protocol = htons(ETH_P_TEB); + break; + default: + return false; + } + + unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS; + return true; +} + static bool vxlan_set_mac(struct vxlan_dev *vxlan, struct vxlan_sock *vs, struct sk_buff *skb) @@ -1257,9 +1296,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) struct vxlanhdr unparsed; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; + __be32 protocol = htons(ETH_P_TEB); + bool raw_proto = false; void *oiph; - /* Need Vxlan and inner Ethernet header to be present */ + /* Need UDP and VXLAN header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) return 1; @@ -1283,9 +1324,18 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (!vxlan) goto drop; - if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB), - !net_eq(vxlan->net, dev_net(vxlan->dev)))) - goto drop; + /* For backwards compatibility, only allow reserved fields to be + * used by VXLAN extensions if explicitly requested. + */ + if (vs->flags & VXLAN_F_GPE) { + if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags)) + goto drop; + raw_proto = true; + } + + if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto, + !net_eq(vxlan->net, dev_net(vxlan->dev)))) + goto drop; if (vxlan_collect_metadata(vs)) { __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); @@ -1304,14 +1354,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) memset(md, 0, sizeof(*md)); } - /* For backwards compatibility, only allow reserved fields to be - * used by VXLAN extensions if explicitly requested. - */ if (vs->flags & VXLAN_F_REMCSUM_RX) if (!vxlan_remcsum(&unparsed, skb, vs->flags)) goto drop; if (vs->flags & VXLAN_F_GBP) vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md); + /* Note that GBP and GPE can never be active together. This is + * ensured in vxlan_dev_configure. + */ if (unparsed.vx_flags || unparsed.vx_vni) { /* If there are any unprocessed flags remaining treat @@ -1325,8 +1375,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) goto drop; } - if (!vxlan_set_mac(vxlan, vs, skb)) - goto drop; + if (!raw_proto) { + if (!vxlan_set_mac(vxlan, vs, skb)) + goto drop; + } else { + skb->dev = vxlan->dev; + skb->pkt_type = PACKET_HOST; + } oiph = skb_network_header(skb); skb_reset_network_header(skb); @@ -1685,6 +1740,27 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags, gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); } +static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags, + __be16 protocol) +{ + struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh; + + gpe->np_applied = 1; + + switch (protocol) { + case htons(ETH_P_IP): + gpe->next_protocol = VXLAN_GPE_NP_IPV4; + return 0; + case htons(ETH_P_IPV6): + gpe->next_protocol = VXLAN_GPE_NP_IPV6; + return 0; + case htons(ETH_P_TEB): + gpe->next_protocol = VXLAN_GPE_NP_ETHERNET; + return 0; + } + return -EPFNOSUPPORT; +} + static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, int iphdr_len, __be32 vni, struct vxlan_metadata *md, u32 vxflags, @@ -1694,6 +1770,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, int min_headroom; int err; int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + __be16 inner_protocol = htons(ETH_P_TEB); if ((vxflags & VXLAN_F_REMCSUM_TX) && skb->ip_summed == CHECKSUM_PARTIAL) { @@ -1712,10 +1789,8 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, /* Need space for new headers (invalidates iph ptr) */ err = skb_cow_head(skb, min_headroom); - if (unlikely(err)) { - kfree_skb(skb); - return err; - } + if (unlikely(err)) + goto out_free; skb = vlan_hwaccel_push_inside(skb); if (WARN_ON(!skb)) @@ -1744,9 +1819,19 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, if (vxflags & VXLAN_F_GBP) vxlan_build_gbp_hdr(vxh, vxflags, md); + if (vxflags & VXLAN_F_GPE) { + err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol); + if (err < 0) + goto out_free; + inner_protocol = skb->protocol; + } - skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + skb_set_inner_protocol(skb, inner_protocol); return 0; + +out_free: + kfree_skb(skb); + return err; } static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, @@ -2106,9 +2191,17 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) info = skb_tunnel_info(skb); skb_reset_mac_header(skb); - eth = eth_hdr(skb); - if ((vxlan->flags & VXLAN_F_PROXY)) { + if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { + if (info && info->mode & IP_TUNNEL_INFO_TX) + vxlan_xmit_one(skb, dev, NULL, false); + else + kfree_skb(skb); + return NETDEV_TX_OK; + } + + if (vxlan->flags & VXLAN_F_PROXY) { + eth = eth_hdr(skb); if (ntohs(eth->h_proto) == ETH_P_ARP) return arp_reduce(dev, skb); #if IS_ENABLED(CONFIG_IPV6) @@ -2123,18 +2216,10 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) return neigh_reduce(dev, skb); } - eth = eth_hdr(skb); #endif } - if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { - if (info && info->mode & IP_TUNNEL_INFO_TX) - vxlan_xmit_one(skb, dev, NULL, false); - else - kfree_skb(skb); - return NETDEV_TX_OK; - } - + eth = eth_hdr(skb); f = vxlan_find_mac(vxlan, eth->h_dest); did_rsc = false; @@ -2404,7 +2489,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) return 0; } -static const struct net_device_ops vxlan_netdev_ops = { +static const struct net_device_ops vxlan_netdev_ether_ops = { .ndo_init = vxlan_init, .ndo_uninit = vxlan_uninit, .ndo_open = vxlan_open, @@ -2421,6 +2506,17 @@ static const struct net_device_ops vxlan_netdev_ops = { .ndo_fill_metadata_dst = vxlan_fill_metadata_dst, }; +static const struct net_device_ops vxlan_netdev_raw_ops = { + .ndo_init = vxlan_init, + .ndo_uninit = vxlan_uninit, + .ndo_open = vxlan_open, + .ndo_stop = vxlan_stop, + .ndo_start_xmit = vxlan_xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_change_mtu = vxlan_change_mtu, + .ndo_fill_metadata_dst = vxlan_fill_metadata_dst, +}; + /* Info for udev, that this is a virtual tunnel endpoint */ static struct device_type vxlan_type = { .name = "vxlan", @@ -2458,10 +2554,6 @@ static void vxlan_setup(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); unsigned int h; - eth_hw_addr_random(dev); - ether_setup(dev); - - dev->netdev_ops = &vxlan_netdev_ops; dev->destructor = free_netdev; SET_NETDEV_DEVTYPE(dev, &vxlan_type); @@ -2476,8 +2568,7 @@ static void vxlan_setup(struct net_device *dev) dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; netif_keep_dst(dev); - dev->priv_flags &= ~IFF_TX_SKB_SHARING; - dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; + dev->priv_flags |= IFF_NO_QUEUE; INIT_LIST_HEAD(&vxlan->next); spin_lock_init(&vxlan->hash_lock); @@ -2496,6 +2587,26 @@ static void vxlan_setup(struct net_device *dev) INIT_HLIST_HEAD(&vxlan->fdb_head[h]); } +static void vxlan_ether_setup(struct net_device *dev) +{ + eth_hw_addr_random(dev); + ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + dev->netdev_ops = &vxlan_netdev_ether_ops; +} + +static void vxlan_raw_setup(struct net_device *dev) +{ + dev->type = ARPHRD_NONE; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->mtu = ETH_DATA_LEN; + dev->tx_queue_len = 1000; + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; + dev->netdev_ops = &vxlan_netdev_raw_ops; +} + static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_ID] = { .type = NLA_U32 }, [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, @@ -2522,6 +2633,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 }, [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 }, [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, }, + [IFLA_VXLAN_GPE] = { .type = NLA_FLAG, }, [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG }, }; @@ -2722,6 +2834,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, __be16 default_port = vxlan->cfg.dst_port; struct net_device *lowerdev = NULL; + if (conf->flags & VXLAN_F_GPE) { + if (conf->flags & ~VXLAN_F_ALLOWED_GPE) + return -EINVAL; + /* For now, allow GPE only together with COLLECT_METADATA. + * This can be relaxed later; in such case, the other side + * of the PtP link will have to be provided. + */ + if (!(conf->flags & VXLAN_F_COLLECT_METADATA)) + return -EINVAL; + + vxlan_raw_setup(dev); + } else { + vxlan_ether_setup(dev); + } + vxlan->net = src_net; dst->remote_vni = conf->vni; @@ -2783,8 +2910,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, dev->needed_headroom = needed_headroom; memcpy(&vxlan->cfg, conf, sizeof(*conf)); - if (!vxlan->cfg.dst_port) - vxlan->cfg.dst_port = default_port; + if (!vxlan->cfg.dst_port) { + if (conf->flags & VXLAN_F_GPE) + vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */ + else + vxlan->cfg.dst_port = default_port; + } vxlan->flags |= conf->flags; if (!vxlan->cfg.age_interval) @@ -2955,6 +3086,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, if (data[IFLA_VXLAN_GBP]) conf.flags |= VXLAN_F_GBP; + if (data[IFLA_VXLAN_GPE]) + conf.flags |= VXLAN_F_GPE; + if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL; @@ -2971,6 +3105,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev, case -EEXIST: pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni)); break; + + case -EINVAL: + pr_info("unsupported combination of extensions\n"); + break; } return err; @@ -3098,6 +3236,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_flag(skb, IFLA_VXLAN_GBP)) goto nla_put_failure; + if (vxlan->flags & VXLAN_F_GPE && + nla_put_flag(skb, IFLA_VXLAN_GPE)) + goto nla_put_failure; + if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL && nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL)) goto nla_put_failure; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 56050f913339..16435d8b1f93 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -295,8 +295,15 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, return INET_ECN_encapsulate(tos, inner); } -int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, - bool xnet); +int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, + __be16 inner_proto, bool raw_proto, bool xnet); + +static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, + __be16 inner_proto, bool xnet) +{ + return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet); +} + void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, u8 proto, u8 tos, u8 ttl, __be16 df, bool xnet); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 73ed2e951c02..dcc6f4057115 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -119,6 +119,64 @@ struct vxlanhdr_gbp { #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) #define VXLAN_GBP_ID_MASK (0xFFFF) +/* + * VXLAN Generic Protocol Extension (VXLAN_F_GPE): + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|Ver|I|P|R|O| Reserved |Next Protocol | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Ver = Version. Indicates VXLAN GPE protocol version. + * + * P = Next Protocol Bit. The P bit is set to indicate that the + * Next Protocol field is present. + * + * O = OAM Flag Bit. The O bit is set to indicate that the packet + * is an OAM packet. + * + * Next Protocol = This 8 bit field indicates the protocol header + * immediately following the VXLAN GPE header. + * + * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01 + */ + +struct vxlanhdr_gpe { +#if defined(__LITTLE_ENDIAN_BITFIELD) + u8 oam_flag:1, + reserved_flags1:1, + np_applied:1, + instance_applied:1, + version:2, +reserved_flags2:2; +#elif defined(__BIG_ENDIAN_BITFIELD) + u8 reserved_flags2:2, + version:2, + instance_applied:1, + np_applied:1, + reserved_flags1:1, + oam_flag:1; +#endif + u8 reserved_flags3; + u8 reserved_flags4; + u8 next_protocol; + __be32 vx_vni; +}; + +/* VXLAN-GPE header flags. */ +#define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28)) +#define VXLAN_HF_NP cpu_to_be32(BIT(26)) +#define VXLAN_HF_OAM cpu_to_be32(BIT(24)) + +#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \ + cpu_to_be32(0xff)) + +/* VXLAN-GPE header Next Protocol. */ +#define VXLAN_GPE_NP_IPV4 0x01 +#define VXLAN_GPE_NP_IPV6 0x02 +#define VXLAN_GPE_NP_ETHERNET 0x03 +#define VXLAN_GPE_NP_NSH 0x04 + struct vxlan_metadata { u32 gbp; }; @@ -206,16 +264,26 @@ struct vxlan_dev { #define VXLAN_F_GBP 0x800 #define VXLAN_F_REMCSUM_NOPARTIAL 0x1000 #define VXLAN_F_COLLECT_METADATA 0x2000 +#define VXLAN_F_GPE 0x4000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable */ #define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \ + VXLAN_F_GPE | \ VXLAN_F_UDP_ZERO_CSUM6_RX | \ VXLAN_F_REMCSUM_RX | \ VXLAN_F_REMCSUM_NOPARTIAL | \ VXLAN_F_COLLECT_METADATA) +/* Flags that can be set together with VXLAN_F_GPE. */ +#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \ + VXLAN_F_IPV6 | \ + VXLAN_F_UDP_ZERO_CSUM_TX | \ + VXLAN_F_UDP_ZERO_CSUM6_TX | \ + VXLAN_F_UDP_ZERO_CSUM6_RX | \ + VXLAN_F_COLLECT_METADATA) + struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index c488066fb53a..9427f17d06d6 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -488,6 +488,7 @@ enum { IFLA_VXLAN_REMCSUM_NOPARTIAL, IFLA_VXLAN_COLLECT_METADATA, IFLA_VXLAN_LABEL, + IFLA_VXLAN_GPE, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index b3ab1205dfdf..43445df61efd 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -86,15 +86,15 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, } EXPORT_SYMBOL_GPL(iptunnel_xmit); -int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, - bool xnet) +int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len, + __be16 inner_proto, bool raw_proto, bool xnet) { if (unlikely(!pskb_may_pull(skb, hdr_len))) return -ENOMEM; skb_pull_rcsum(skb, hdr_len); - if (inner_proto == htons(ETH_P_TEB)) { + if (!raw_proto && inner_proto == htons(ETH_P_TEB)) { struct ethhdr *eh; if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) @@ -117,7 +117,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto, return iptunnel_pull_offloads(skb); } -EXPORT_SYMBOL_GPL(iptunnel_pull_header); +EXPORT_SYMBOL_GPL(__iptunnel_pull_header); struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md, gfp_t flags) |