aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.c33
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_devids.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c16
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_fcoe.c12
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c14
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_nvm.c43
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c28
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h10
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_type.h34
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl.h45
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c90
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h2
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq.h1
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_common.c1
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_devids.h1
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c24
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.h10
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_type.h43
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h45
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c58
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Kconfig8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Makefile1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h507
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c263
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h61
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c108
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c480
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/switchx2.c3
-rw-r--r--drivers/net/vxlan.c210
-rw-r--r--include/net/ip_tunnels.h11
-rw-r--r--include/net/vxlan.h68
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--net/ipv4/ip_tunnel_core.c8
39 files changed, 2036 insertions, 221 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index d1e3f0997d6b..a1e329ec24cd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -168,7 +168,8 @@ MODULE_PARM_DESC(force_init, "Forcibly become Master PF and initialize adapter,"
static int dflt_msg_enable = DFLT_MSG_ENABLE;
module_param(dflt_msg_enable, int, 0644);
-MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap");
+MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap, "
+ "deprecated parameter");
/*
* The driver uses the best interrupt scheme available on a platform in the
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 1cc8a7a69457..730fec73d5a6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -74,7 +74,8 @@ static int dflt_msg_enable = DFLT_MSG_ENABLE;
module_param(dflt_msg_enable, int, 0644);
MODULE_PARM_DESC(dflt_msg_enable,
- "default adapter ethtool message level bitmap");
+ "default adapter ethtool message level bitmap, "
+ "deprecated parameter");
/*
* The driver uses the best interrupt scheme available on a platform in the
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index df8e2fd6a649..43bb4139d896 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -33,16 +33,6 @@
static void i40e_resume_aq(struct i40e_hw *hw);
/**
- * i40e_is_nvm_update_op - return true if this is an NVM update operation
- * @desc: API request descriptor
- **/
-static inline bool i40e_is_nvm_update_op(struct i40e_aq_desc *desc)
-{
- return (desc->opcode == cpu_to_le16(i40e_aqc_opc_nvm_erase)) ||
- (desc->opcode == cpu_to_le16(i40e_aqc_opc_nvm_update));
-}
-
-/**
* i40e_adminq_init_regs - Initialize AdminQ registers
* @hw: pointer to the hardware structure
*
@@ -624,7 +614,7 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw)
/* pre-emptive resource lock release */
i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
- hw->aq.nvm_release_on_done = false;
+ hw->nvm_release_on_done = false;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
ret_code = i40e_aq_set_hmc_resource_profile(hw,
@@ -1023,26 +1013,7 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
hw->aq.arq.next_to_clean = ntc;
hw->aq.arq.next_to_use = ntu;
- if (i40e_is_nvm_update_op(&e->desc)) {
- if (hw->aq.nvm_release_on_done) {
- i40e_release_nvm(hw);
- hw->aq.nvm_release_on_done = false;
- }
-
- switch (hw->nvmupd_state) {
- case I40E_NVMUPD_STATE_INIT_WAIT:
- hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
- break;
-
- case I40E_NVMUPD_STATE_WRITE_WAIT:
- hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING;
- break;
-
- default:
- break;
- }
- }
-
+ i40e_nvmupd_check_wait_event(hw, le16_to_cpu(e->desc.opcode));
clean_arq_element_out:
/* Set pending if needed, unlock and return */
if (pending)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
index 12fbbddea299..d92aad38afdc 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
@@ -97,7 +97,6 @@ struct i40e_adminq_info {
u32 fw_build; /* firmware build number */
u16 api_maj_ver; /* api major version */
u16 api_min_ver; /* api minor version */
- bool nvm_release_on_done;
struct mutex asq_mutex; /* Send queue lock */
struct mutex arq_mutex; /* Receive queue lock */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 8276a1393e6d..f3c1d8890cbb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -60,6 +60,7 @@ static i40e_status i40e_set_mac_type(struct i40e_hw *hw)
case I40E_DEV_ID_SFP_X722:
case I40E_DEV_ID_1G_BASE_T_X722:
case I40E_DEV_ID_10G_BASE_T_X722:
+ case I40E_DEV_ID_SFP_I_X722:
hw->mac.type = I40E_MAC_X722;
break;
default:
@@ -3079,6 +3080,9 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
break;
case I40E_AQ_CAP_ID_MSIX:
p->num_msix_vectors = number;
+ i40e_debug(hw, I40E_DEBUG_INIT,
+ "HW Capability: MSIX vector count = %d\n",
+ p->num_msix_vectors);
break;
case I40E_AQ_CAP_ID_VF_MSIX:
p->num_msix_vectors_vf = number;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 0c97733d253c..83dccf1792e7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -147,9 +147,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
dev_info(&pf->pdev->dev, " vlan_features = 0x%08lx\n",
(unsigned long int)nd->vlan_features);
}
- if (vsi->active_vlans)
- dev_info(&pf->pdev->dev,
- " vlgrp: & = %p\n", vsi->active_vlans);
+ dev_info(&pf->pdev->dev,
+ " vlgrp: & = %p\n", vsi->active_vlans);
dev_info(&pf->pdev->dev,
" state = %li flags = 0x%08lx, netdev_registered = %i, current_netdev_flags = 0x%04x\n",
vsi->state, vsi->flags,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h
index 99257fcd1ef4..dd4457d29e98 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h
@@ -44,6 +44,7 @@
#define I40E_DEV_ID_SFP_X722 0x37D0
#define I40E_DEV_ID_1G_BASE_T_X722 0x37D1
#define I40E_DEV_ID_10G_BASE_T_X722 0x37D2
+#define I40E_DEV_ID_SFP_I_X722 0x37D3
#define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \
(d) == I40E_DEV_ID_QSFP_B || \
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 410d237f9137..8a83d4514812 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -313,6 +313,13 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
ecmd->advertising |= ADVERTISED_10000baseT_Full;
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
ecmd->advertising |= ADVERTISED_1000baseT_Full;
+ /* adding 100baseT support for 10GBASET_PHY */
+ if (pf->flags & I40E_FLAG_HAVE_10GBASET_PHY) {
+ ecmd->supported |= SUPPORTED_100baseT_Full;
+ ecmd->advertising |= ADVERTISED_100baseT_Full |
+ ADVERTISED_1000baseT_Full |
+ ADVERTISED_10000baseT_Full;
+ }
break;
case I40E_PHY_TYPE_1000BASE_T_OPTICAL:
ecmd->supported = SUPPORTED_Autoneg |
@@ -325,6 +332,15 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
SUPPORTED_100baseT_Full;
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
ecmd->advertising |= ADVERTISED_100baseT_Full;
+ /* firmware detects 10G phy as 100M phy at 100M speed */
+ if (pf->flags & I40E_FLAG_HAVE_10GBASET_PHY) {
+ ecmd->supported |= SUPPORTED_10000baseT_Full |
+ SUPPORTED_1000baseT_Full;
+ ecmd->advertising |= ADVERTISED_Autoneg |
+ ADVERTISED_100baseT_Full |
+ ADVERTISED_1000baseT_Full |
+ ADVERTISED_10000baseT_Full;
+ }
break;
case I40E_PHY_TYPE_10GBASE_CR1_CU:
case I40E_PHY_TYPE_10GBASE_CR1:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
index 92d2208d13c7..58e6c1570335 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2015 Intel Corporation.
+ * Copyright(c) 2013 - 2016 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -38,16 +38,6 @@
#include "i40e_fcoe.h"
/**
- * i40e_rx_is_fcoe - returns true if the rx packet type is FCoE
- * @ptype: the packet type field from rx descriptor write-back
- **/
-static inline bool i40e_rx_is_fcoe(u16 ptype)
-{
- return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) &&
- (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER);
-}
-
-/**
* i40e_fcoe_sof_is_class2 - returns true if this is a FC Class 2 SOF
* @sof: the FCoE start of frame delimiter
**/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 297fd39ba255..dc3b3939dd0a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -46,7 +46,7 @@ static const char i40e_driver_string[] =
#define DRV_VERSION_MAJOR 1
#define DRV_VERSION_MINOR 5
-#define DRV_VERSION_BUILD 1
+#define DRV_VERSION_BUILD 5
#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
__stringify(DRV_VERSION_MINOR) "." \
__stringify(DRV_VERSION_BUILD) DRV_KERN
@@ -90,6 +90,7 @@ static const struct pci_device_id i40e_pci_tbl[] = {
{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_X722), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_1G_BASE_T_X722), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_10G_BASE_T_X722), 0},
+ {PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0},
/* required last entry */
@@ -6858,6 +6859,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit)
*/
ret = i40e_aq_set_phy_int_mask(&pf->hw,
~(I40E_AQ_EVENT_LINK_UPDOWN |
+ I40E_AQ_EVENT_MEDIA_NA |
I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
if (ret)
dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n",
@@ -8436,7 +8438,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
pf->msg_enable = netif_msg_init(I40E_DEFAULT_MSG_ENABLE,
(NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK));
- pf->hw.debug_mask = pf->msg_enable | I40E_DEBUG_DIAG;
if (debug != -1 && debug != I40E_DEFAULT_MSG_ENABLE) {
if (I40E_DEBUG_USER & debug)
pf->hw.debug_mask = debug;
@@ -8447,7 +8448,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
/* Set default capability flags */
pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
I40E_FLAG_MSI_ENABLED |
- I40E_FLAG_LINK_POLLING_ENABLED |
I40E_FLAG_MSIX_ENABLED;
if (iommu_present(&pci_bus_type))
@@ -9069,6 +9069,7 @@ static const struct net_device_ops i40e_netdev_ops = {
.ndo_get_vf_config = i40e_ndo_get_vf_config,
.ndo_set_vf_link_state = i40e_ndo_set_vf_link_state,
.ndo_set_vf_spoofchk = i40e_ndo_set_vf_spoofchk,
+ .ndo_set_vf_trust = i40e_ndo_set_vf_trust,
#if IS_ENABLED(CONFIG_VXLAN)
.ndo_add_vxlan_port = i40e_add_vxlan_port,
.ndo_del_vxlan_port = i40e_del_vxlan_port,
@@ -9158,6 +9159,12 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
I40E_VLAN_ANY, false, true);
spin_unlock_bh(&vsi->mac_filter_list_lock);
}
+ } else if ((pf->hw.aq.api_maj_ver > 1) ||
+ ((pf->hw.aq.api_maj_ver == 1) &&
+ (pf->hw.aq.api_min_ver > 4))) {
+ /* Supported in FW API version higher than 1.4 */
+ pf->flags |= I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
+ pf->auto_disable_flags = I40E_FLAG_HW_ATR_EVICT_CAPABLE;
} else {
/* relate the VSI_VMDQ name to the VSI_MAIN name */
snprintf(netdev->name, IFNAMSIZ, "%sv%%d",
@@ -11064,6 +11071,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
err = i40e_aq_set_phy_int_mask(&pf->hw,
~(I40E_AQ_EVENT_LINK_UPDOWN |
+ I40E_AQ_EVENT_MEDIA_NA |
I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL);
if (err)
dev_info(&pf->pdev->dev, "set phy mask fail, err %s aq_err %s\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 5730f8091e1b..f2cea3d25de3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -696,7 +696,7 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
i40e_debug(hw, I40E_DEBUG_NVM, "%s state %d nvm_release_on_hold %d cmd 0x%08x config 0x%08x offset 0x%08x data_size 0x%08x\n",
i40e_nvm_update_state_str[upd_cmd],
hw->nvmupd_state,
- hw->aq.nvm_release_on_done,
+ hw->nvm_release_on_done,
cmd->command, cmd->config, cmd->offset, cmd->data_size);
if (upd_cmd == I40E_NVMUPD_INVALID) {
@@ -799,7 +799,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw,
if (status) {
i40e_release_nvm(hw);
} else {
- hw->aq.nvm_release_on_done = true;
+ hw->nvm_release_on_done = true;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
}
}
@@ -815,7 +815,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw,
if (status) {
i40e_release_nvm(hw);
} else {
- hw->aq.nvm_release_on_done = true;
+ hw->nvm_release_on_done = true;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
}
}
@@ -849,7 +849,7 @@ static i40e_status i40e_nvmupd_state_init(struct i40e_hw *hw,
-EIO;
i40e_release_nvm(hw);
} else {
- hw->aq.nvm_release_on_done = true;
+ hw->nvm_release_on_done = true;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
}
}
@@ -953,7 +953,7 @@ retry:
-EIO;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
} else {
- hw->aq.nvm_release_on_done = true;
+ hw->nvm_release_on_done = true;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
}
break;
@@ -980,7 +980,7 @@ retry:
-EIO;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
} else {
- hw->aq.nvm_release_on_done = true;
+ hw->nvm_release_on_done = true;
hw->nvmupd_state = I40E_NVMUPD_STATE_INIT_WAIT;
}
break;
@@ -1030,6 +1030,37 @@ retry:
}
/**
+ * i40e_nvmupd_check_wait_event - handle NVM update operation events
+ * @hw: pointer to the hardware structure
+ * @opcode: the event that just happened
+ **/
+void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode)
+{
+ if (opcode == i40e_aqc_opc_nvm_erase ||
+ opcode == i40e_aqc_opc_nvm_update) {
+ i40e_debug(hw, I40E_DEBUG_NVM,
+ "NVMUPD: clearing wait on opcode 0x%04x\n", opcode);
+ if (hw->nvm_release_on_done) {
+ i40e_release_nvm(hw);
+ hw->nvm_release_on_done = false;
+ }
+
+ switch (hw->nvmupd_state) {
+ case I40E_NVMUPD_STATE_INIT_WAIT:
+ hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+ break;
+
+ case I40E_NVMUPD_STATE_WRITE_WAIT:
+ hw->nvmupd_state = I40E_NVMUPD_STATE_WRITING;
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+/**
* i40e_nvmupd_validate_command - Validate given command
* @hw: pointer to hardware structure
* @cmd: pointer to nvm update command buffer
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index d51eee5bf79a..134035f53f2c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -308,6 +308,7 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
struct i40e_nvm_access *cmd,
u8 *bytes, int *);
+void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode);
void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 5bef5b0f00d9..29ffed27e5a9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1703,7 +1703,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
: 0;
#ifdef I40E_FCOE
- if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
+ if (unlikely(
+ i40e_rx_is_fcoe(rx_ptype) &&
+ !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
dev_kfree_skb_any(skb);
continue;
}
@@ -1834,7 +1836,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
: 0;
#ifdef I40E_FCOE
- if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
+ if (unlikely(
+ i40e_rx_is_fcoe(rx_ptype) &&
+ !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
dev_kfree_skb_any(skb);
continue;
}
@@ -2252,15 +2256,13 @@ out:
/**
* i40e_tso - set up the tso context descriptor
- * @tx_ring: ptr to the ring to send
* @skb: ptr to the skb we're sending
* @hdr_len: ptr to the size of the packet header
* @cd_type_cmd_tso_mss: Quad Word 1
*
* Returns 0 if no TSO can happen, 1 if tso is going, or error
**/
-static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
- u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
+static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
{
u64 cd_cmd, cd_tso_len, cd_mss;
union {
@@ -2304,10 +2306,8 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
l4_offset = l4.hdr - skb->data;
/* remove payload length from outer checksum */
- paylen = (__force u16)l4.udp->check;
- paylen += ntohs((__force __be16)1) *
- (u16)~(skb->len - l4_offset);
- l4.udp->check = ~csum_fold((__force __wsum)paylen);
+ paylen = skb->len - l4_offset;
+ csum_replace_by_diff(&l4.udp->check, htonl(paylen));
}
/* reset pointers to inner headers */
@@ -2327,9 +2327,8 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
l4_offset = l4.hdr - skb->data;
/* remove payload length from inner checksum */
- paylen = (__force u16)l4.tcp->check;
- paylen += ntohs((__force __be16)1) * (u16)~(skb->len - l4_offset);
- l4.tcp->check = ~csum_fold((__force __wsum)paylen);
+ paylen = skb->len - l4_offset;
+ csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
/* compute length of segmentation header */
*hdr_len = (l4.tcp->doff * 4) + l4_offset;
@@ -2411,7 +2410,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
unsigned char *hdr;
} l4;
unsigned char *exthdr;
- u32 offset, cmd = 0, tunnel = 0;
+ u32 offset, cmd = 0;
__be16 frag_off;
u8 l4_proto = 0;
@@ -2425,6 +2424,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
if (skb->encapsulation) {
+ u32 tunnel = 0;
/* define outer network header type */
if (*tx_flags & I40E_TX_FLAGS_IPV4) {
tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
@@ -2934,7 +2934,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
else if (protocol == htons(ETH_P_IPV6))
tx_flags |= I40E_TX_FLAGS_IPV6;
- tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss);
+ tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss);
if (tso < 0)
goto out_drop;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 9e654e611642..77ccdde56c0c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -448,4 +448,14 @@ static inline bool i40e_chk_linearize(struct sk_buff *skb, int count)
return __i40e_chk_linearize(skb);
}
+
+/**
+ * i40e_rx_is_fcoe - returns true if the Rx packet type is FCoE
+ * @ptype: the packet type field from Rx descriptor write-back
+ **/
+static inline bool i40e_rx_is_fcoe(u16 ptype)
+{
+ return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) &&
+ (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER);
+}
#endif /* _I40E_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 3335f9d13374..793036b259e5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -549,6 +549,7 @@ struct i40e_hw {
enum i40e_nvmupd_state nvmupd_state;
struct i40e_aq_desc nvm_wb_desc;
struct i40e_virt_mem nvm_buff;
+ bool nvm_release_on_done;
/* HMC info */
struct i40e_hmc_info hmc; /* HMC info struct */
@@ -1533,4 +1534,37 @@ struct i40e_lldp_variables {
/* RSS Hash Table Size */
#define I40E_PFQF_CTL_0_HASHLUTSIZE_512 0x00010000
+
+/* INPUT SET MASK for RSS, flow director, and flexible payload */
+#define I40E_L3_SRC_SHIFT 47
+#define I40E_L3_SRC_MASK (0x3ULL << I40E_L3_SRC_SHIFT)
+#define I40E_L3_V6_SRC_SHIFT 43
+#define I40E_L3_V6_SRC_MASK (0xFFULL << I40E_L3_V6_SRC_SHIFT)
+#define I40E_L3_DST_SHIFT 35
+#define I40E_L3_DST_MASK (0x3ULL << I40E_L3_DST_SHIFT)
+#define I40E_L3_V6_DST_SHIFT 35
+#define I40E_L3_V6_DST_MASK (0xFFULL << I40E_L3_V6_DST_SHIFT)
+#define I40E_L4_SRC_SHIFT 34
+#define I40E_L4_SRC_MASK (0x1ULL << I40E_L4_SRC_SHIFT)
+#define I40E_L4_DST_SHIFT 33
+#define I40E_L4_DST_MASK (0x1ULL << I40E_L4_DST_SHIFT)
+#define I40E_VERIFY_TAG_SHIFT 31
+#define I40E_VERIFY_TAG_MASK (0x3ULL << I40E_VERIFY_TAG_SHIFT)
+
+#define I40E_FLEX_50_SHIFT 13
+#define I40E_FLEX_50_MASK (0x1ULL << I40E_FLEX_50_SHIFT)
+#define I40E_FLEX_51_SHIFT 12
+#define I40E_FLEX_51_MASK (0x1ULL << I40E_FLEX_51_SHIFT)
+#define I40E_FLEX_52_SHIFT 11
+#define I40E_FLEX_52_MASK (0x1ULL << I40E_FLEX_52_SHIFT)
+#define I40E_FLEX_53_SHIFT 10
+#define I40E_FLEX_53_MASK (0x1ULL << I40E_FLEX_53_SHIFT)
+#define I40E_FLEX_54_SHIFT 9
+#define I40E_FLEX_54_MASK (0x1ULL << I40E_FLEX_54_SHIFT)
+#define I40E_FLEX_55_SHIFT 8
+#define I40E_FLEX_55_MASK (0x1ULL << I40E_FLEX_55_SHIFT)
+#define I40E_FLEX_56_SHIFT 7
+#define I40E_FLEX_56_MASK (0x1ULL << I40E_FLEX_56_SHIFT)
+#define I40E_FLEX_57_SHIFT 6
+#define I40E_FLEX_57_MASK (0x1ULL << I40E_FLEX_57_SHIFT)
#endif /* _I40E_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
index ab866cf3dc18..c92a3bdee229 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
@@ -80,10 +80,15 @@ enum i40e_virtchnl_ops {
I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14,
I40E_VIRTCHNL_OP_GET_STATS = 15,
I40E_VIRTCHNL_OP_FCOE = 16,
- I40E_VIRTCHNL_OP_EVENT = 17,
+ I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */
I40E_VIRTCHNL_OP_IWARP = 20,
I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP = 21,
I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP = 22,
+ I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23,
+ I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24,
+ I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25,
+ I40E_VIRTCHNL_OP_SET_RSS_HENA = 26,
+
};
/* Virtual channel message descriptor. This overlays the admin queue
@@ -157,6 +162,7 @@ struct i40e_virtchnl_vsi_resource {
#define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000
#define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000
#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000
+#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000
struct i40e_virtchnl_vf_resource {
u16 num_vsis;
@@ -165,8 +171,8 @@ struct i40e_virtchnl_vf_resource {
u16 max_mtu;
u32 vf_offload_flags;
- u32 max_fcoe_contexts;
- u32 max_fcoe_filters;
+ u32 rss_key_size;
+ u32 rss_lut_size;
struct i40e_virtchnl_vsi_resource vsi_res[1];
};
@@ -325,6 +331,39 @@ struct i40e_virtchnl_promisc_info {
* PF replies with struct i40e_eth_stats in an external buffer.
*/
+/* I40E_VIRTCHNL_OP_CONFIG_RSS_KEY
+ * I40E_VIRTCHNL_OP_CONFIG_RSS_LUT
+ * VF sends these messages to configure RSS. Only supported if both PF
+ * and VF drivers set the I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF bit during
+ * configuration negotiation. If this is the case, then the RSS fields in
+ * the VF resource struct are valid.
+ * Both the key and LUT are initialized to 0 by the PF, meaning that
+ * RSS is effectively disabled until set up by the VF.
+ */
+struct i40e_virtchnl_rss_key {
+ u16 vsi_id;
+ u16 key_len;
+ u8 key[1]; /* RSS hash key, packed bytes */
+};
+
+struct i40e_virtchnl_rss_lut {
+ u16 vsi_id;
+ u16 lut_entries;
+ u8 lut[1]; /* RSS lookup table*/
+};
+
+/* I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS
+ * I40E_VIRTCHNL_OP_SET_RSS_HENA
+ * VF sends these messages to get and set the hash filter enable bits for RSS.
+ * By default, the PF sets these to all possible traffic types that the
+ * hardware supports. The VF can query this value if it wants to change the
+ * traffic types that are hashed by the hardware.
+ * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h
+ */
+struct i40e_virtchnl_rss_hena {
+ u64 hena;
+};
+
/* I40E_VIRTCHNL_OP_EVENT
* PF sends this message to inform the VF driver of events that may affect it.
* No direct response is expected from the VF, though it may generate other
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 47b9e62473c4..30f8cbe6b54b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -937,6 +937,11 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
i40e_flush(hw);
}
+ /* clear the VFLR bit in GLGEN_VFLRSTAT */
+ reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32;
+ bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32;
+ wr32(hw, I40E_GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
+ i40e_flush(hw);
if (i40e_quiesce_vf_pci(vf))
dev_err(&pf->pdev->dev, "VF %d PCI transactions stuck\n",
@@ -989,10 +994,6 @@ complete_reset:
/* tell the VF the reset is done */
wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE);
- /* clear the VFLR bit in GLGEN_VFLRSTAT */
- reg_idx = (hw->func_caps.vf_base_id + vf->vf_id) / 32;
- bit_idx = (hw->func_caps.vf_base_id + vf->vf_id) % 32;
- wr32(hw, I40E_GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx));
i40e_flush(hw);
clear_bit(__I40E_VF_DISABLE, &pf->state);
}
@@ -1232,8 +1233,8 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
/* single place to detect unsuccessful return values */
if (v_retval) {
vf->num_invalid_msgs++;
- dev_err(&pf->pdev->dev, "VF %d failed opcode %d, error: %d\n",
- vf->vf_id, v_opcode, v_retval);
+ dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n",
+ vf->vf_id, v_opcode, v_retval);
if (vf->num_invalid_msgs >
I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) {
dev_err(&pf->pdev->dev,
@@ -1251,9 +1252,9 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
aq_ret = i40e_aq_send_msg_to_vf(hw, abs_vf_id, v_opcode, v_retval,
msg, msglen, NULL);
if (aq_ret) {
- dev_err(&pf->pdev->dev,
- "Unable to send the message to VF %d aq_err %d\n",
- vf->vf_id, pf->hw.aq.asq_last_status);
+ dev_info(&pf->pdev->dev,
+ "Unable to send the message to VF %d aq_err %d\n",
+ vf->vf_id, pf->hw.aq.asq_last_status);
return -EIO;
}
@@ -1311,8 +1312,8 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
struct i40e_pf *pf = vf->pf;
i40e_status aq_ret = 0;
struct i40e_vsi *vsi;
- int i = 0, len = 0;
int num_vsis = 1;
+ int len = 0;
int ret;
if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
@@ -1361,8 +1362,16 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2;
}
- if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING)
+ if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING) {
+ if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+ dev_err(&pf->pdev->dev,
+ "VF %d requested polling mode: this feature is supported only when the device is running in single function per port (SFP) mode\n",
+ vf->vf_id);
+ ret = I40E_ERR_PARAM;
+ goto err;
+ }
vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING;
+ }
if (pf->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) {
if (vf->driver_caps & I40E_VIRTCHNL_VF_OFFLOAD_WB_ON_ITR)
@@ -1374,15 +1383,14 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
vfres->num_queue_pairs = vf->num_queue_pairs;
vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
if (vf->lan_vsi_idx) {
- vfres->vsi_res[i].vsi_id = vf->lan_vsi_id;
- vfres->vsi_res[i].vsi_type = I40E_VSI_SRIOV;
- vfres->vsi_res[i].num_queue_pairs = vsi->alloc_queue_pairs;
+ vfres->vsi_res[0].vsi_id = vf->lan_vsi_id;
+ vfres->vsi_res[0].vsi_type = I40E_VSI_SRIOV;
+ vfres->vsi_res[0].num_queue_pairs = vsi->alloc_queue_pairs;
/* VFs only use TC 0 */
- vfres->vsi_res[i].qset_handle
+ vfres->vsi_res[0].qset_handle
= le16_to_cpu(vsi->info.qs_handle[0]);
- ether_addr_copy(vfres->vsi_res[i].default_mac_addr,
+ ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
vf->default_lan_addr.addr);
- i++;
}
set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states);
@@ -2297,11 +2305,9 @@ int i40e_vc_process_vflr_event(struct i40e_pf *pf)
/* read GLGEN_VFLRSTAT register to find out the flr VFs */
vf = &pf->vf[vf_id];
reg = rd32(hw, I40E_GLGEN_VFLRSTAT(reg_idx));
- if (reg & BIT(bit_idx)) {
+ if (reg & BIT(bit_idx))
/* i40e_reset_vf will clear the bit in GLGEN_VFLRSTAT */
- if (!test_bit(__I40E_DOWN, &pf->state))
- i40e_reset_vf(vf, true);
- }
+ i40e_reset_vf(vf, true);
}
return 0;
@@ -2765,3 +2771,45 @@ int i40e_ndo_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool enable)
out:
return ret;
}
+
+/**
+ * i40e_ndo_set_vf_trust
+ * @netdev: network interface device structure of the pf
+ * @vf_id: VF identifier
+ * @setting: trust setting
+ *
+ * Enable or disable VF trust setting
+ **/
+int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
+{
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_pf *pf = np->vsi->back;
+ struct i40e_vf *vf;
+ int ret = 0;
+
+ /* validate the request */
+ if (vf_id >= pf->num_alloc_vfs) {
+ dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
+ return -EINVAL;
+ }
+
+ if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+ dev_err(&pf->pdev->dev, "Trusted VF not supported in MFP mode.\n");
+ return -EINVAL;
+ }
+
+ vf = &pf->vf[vf_id];
+
+ if (!vf)
+ return -EINVAL;
+ if (setting == vf->trusted)
+ goto out;
+
+ vf->trusted = setting;
+ i40e_vc_notify_vf_reset(vf);
+ i40e_reset_vf(vf, false);
+ dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
+ vf_id, setting ? "" : "un");
+out:
+ return ret;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index e7b2fba0309e..838cbd2299a4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -88,6 +88,7 @@ struct i40e_vf {
struct i40e_virtchnl_ether_addr default_fcoe_addr;
u16 port_vlan_id;
bool pf_set_mac; /* The VMM admin set the VF MAC address */
+ bool trusted;
/* VSI indices - actual VSI pointers are maintained in the PF structure
* When assigned, these will be non-zero, because VSI 0 is always
@@ -127,6 +128,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
int vf_id, u16 vlan_id, u8 qos);
int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
int max_tx_rate);
+int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting);
int i40e_ndo_get_vf_config(struct net_device *netdev,
int vf_id, struct ifla_vf_info *ivi);
int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
index a3eae5d9a2bd..1f9b3b5d946d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.h
@@ -97,7 +97,6 @@ struct i40e_adminq_info {
u32 fw_build; /* firmware build number */
u16 api_maj_ver; /* api major version */
u16 api_min_ver; /* api minor version */
- bool nvm_release_on_done;
struct mutex asq_mutex; /* Send queue lock */
struct mutex arq_mutex; /* Receive queue lock */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 771ac6ad8cda..4db0c0326185 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -58,6 +58,7 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw)
case I40E_DEV_ID_SFP_X722:
case I40E_DEV_ID_1G_BASE_T_X722:
case I40E_DEV_ID_10G_BASE_T_X722:
+ case I40E_DEV_ID_SFP_I_X722:
hw->mac.type = I40E_MAC_X722;
break;
case I40E_DEV_ID_X722_VF:
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
index ca8b58c3d1f5..70235706915e 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
@@ -44,6 +44,7 @@
#define I40E_DEV_ID_SFP_X722 0x37D0
#define I40E_DEV_ID_1G_BASE_T_X722 0x37D1
#define I40E_DEV_ID_10G_BASE_T_X722 0x37D2
+#define I40E_DEV_ID_SFP_I_X722 0x37D3
#define I40E_DEV_ID_X722_VF 0x37CD
#define I40E_DEV_ID_X722_VF_HV 0x37D9
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 570348d93e5d..0c912a4999db 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1160,7 +1160,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
: 0;
#ifdef I40E_FCOE
- if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
+ if (unlikely(
+ i40e_rx_is_fcoe(rx_ptype) &&
+ !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
dev_kfree_skb_any(skb);
continue;
}
@@ -1519,15 +1521,13 @@ out:
/**
* i40e_tso - set up the tso context descriptor
- * @tx_ring: ptr to the ring to send
* @skb: ptr to the skb we're sending
* @hdr_len: ptr to the size of the packet header
* @cd_type_cmd_tso_mss: Quad Word 1
*
* Returns 0 if no TSO can happen, 1 if tso is going, or error
**/
-static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
- u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
+static int i40e_tso(struct sk_buff *skb, u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
{
u64 cd_cmd, cd_tso_len, cd_mss;
union {
@@ -1571,10 +1571,8 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
l4_offset = l4.hdr - skb->data;
/* remove payload length from outer checksum */
- paylen = (__force u16)l4.udp->check;
- paylen += ntohs((__force __be16)1) *
- (u16)~(skb->len - l4_offset);
- l4.udp->check = ~csum_fold((__force __wsum)paylen);
+ paylen = skb->len - l4_offset;
+ csum_replace_by_diff(&l4.udp->check, htonl(paylen));
}
/* reset pointers to inner headers */
@@ -1594,9 +1592,8 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
l4_offset = l4.hdr - skb->data;
/* remove payload length from inner checksum */
- paylen = (__force u16)l4.tcp->check;
- paylen += ntohs((__force __be16)1) * (u16)~(skb->len - l4_offset);
- l4.tcp->check = ~csum_fold((__force __wsum)paylen);
+ paylen = skb->len - l4_offset;
+ csum_replace_by_diff(&l4.tcp->check, htonl(paylen));
/* compute length of segmentation header */
*hdr_len = (l4.tcp->doff * 4) + l4_offset;
@@ -1636,7 +1633,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
unsigned char *hdr;
} l4;
unsigned char *exthdr;
- u32 offset, cmd = 0, tunnel = 0;
+ u32 offset, cmd = 0;
__be16 frag_off;
u8 l4_proto = 0;
@@ -1650,6 +1647,7 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
if (skb->encapsulation) {
+ u32 tunnel = 0;
/* define outer network header type */
if (*tx_flags & I40E_TX_FLAGS_IPV4) {
tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
@@ -2152,7 +2150,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
else if (protocol == htons(ETH_P_IPV6))
tx_flags |= I40E_TX_FLAGS_IPV6;
- tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss);
+ tso = i40e_tso(skb, &hdr_len, &cd_type_cmd_tso_mss);
if (tso < 0)
goto out_drop;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 3ec0ea5ea3db..84c28aa64fdf 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -430,4 +430,14 @@ static inline bool i40e_chk_linearize(struct sk_buff *skb, int count)
return __i40evf_chk_linearize(skb);
}
+
+/**
+ * i40e_rx_is_fcoe - returns true if the Rx packet type is FCoE
+ * @ptype: the packet type field from Rx descriptor write-back
+ **/
+static inline bool i40e_rx_is_fcoe(u16 ptype)
+{
+ return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) &&
+ (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER);
+}
#endif /* _I40E_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 301fe2b6dd03..4a78c18e0b7b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -522,6 +522,7 @@ struct i40e_hw {
enum i40e_nvmupd_state nvmupd_state;
struct i40e_aq_desc nvm_wb_desc;
struct i40e_virt_mem nvm_buff;
+ bool nvm_release_on_done;
/* HMC info */
struct i40e_hmc_info hmc; /* HMC info struct */
@@ -1329,4 +1330,46 @@ enum i40e_reset_type {
/* RSS Hash Table Size */
#define I40E_PFQF_CTL_0_HASHLUTSIZE_512 0x00010000
+
+/* INPUT SET MASK for RSS, flow director and flexible payload */
+#define I40E_FD_INSET_L3_SRC_SHIFT 47
+#define I40E_FD_INSET_L3_SRC_WORD_MASK (0x3ULL << \
+ I40E_FD_INSET_L3_SRC_SHIFT)
+#define I40E_FD_INSET_L3_DST_SHIFT 35
+#define I40E_FD_INSET_L3_DST_WORD_MASK (0x3ULL << \
+ I40E_FD_INSET_L3_DST_SHIFT)
+#define I40E_FD_INSET_L4_SRC_SHIFT 34
+#define I40E_FD_INSET_L4_SRC_WORD_MASK (0x1ULL << \
+ I40E_FD_INSET_L4_SRC_SHIFT)
+#define I40E_FD_INSET_L4_DST_SHIFT 33
+#define I40E_FD_INSET_L4_DST_WORD_MASK (0x1ULL << \
+ I40E_FD_INSET_L4_DST_SHIFT)
+#define I40E_FD_INSET_VERIFY_TAG_SHIFT 31
+#define I40E_FD_INSET_VERIFY_TAG_WORD_MASK (0x3ULL << \
+ I40E_FD_INSET_VERIFY_TAG_SHIFT)
+
+#define I40E_FD_INSET_FLEX_WORD50_SHIFT 17
+#define I40E_FD_INSET_FLEX_WORD50_MASK (0x1ULL << \
+ I40E_FD_INSET_FLEX_WORD50_SHIFT)
+#define I40E_FD_INSET_FLEX_WORD51_SHIFT 16
+#define I40E_FD_INSET_FLEX_WORD51_MASK (0x1ULL << \
+ I40E_FD_INSET_FLEX_WORD51_SHIFT)
+#define I40E_FD_INSET_FLEX_WORD52_SHIFT 15
+#define I40E_FD_INSET_FLEX_WORD52_MASK (0x1ULL << \
+ I40E_FD_INSET_FLEX_WORD52_SHIFT)
+#define I40E_FD_INSET_FLEX_WORD53_SHIFT 14
+#define I40E_FD_INSET_FLEX_WORD53_MASK (0x1ULL << \
+ I40E_FD_INSET_FLEX_WORD53_SHIFT)
+#define I40E_FD_INSET_FLEX_WORD54_SHIFT 13
+#define I40E_FD_INSET_FLEX_WORD54_MASK (0x1ULL << \
+ I40E_FD_INSET_FLEX_WORD54_SHIFT)
+#define I40E_FD_INSET_FLEX_WORD55_SHIFT 12
+#define I40E_FD_INSET_FLEX_WORD55_MASK (0x1ULL << \
+ I40E_FD_INSET_FLEX_WORD55_SHIFT)
+#define I40E_FD_INSET_FLEX_WORD56_SHIFT 11
+#define I40E_FD_INSET_FLEX_WORD56_MASK (0x1ULL << \
+ I40E_FD_INSET_FLEX_WORD56_SHIFT)
+#define I40E_FD_INSET_FLEX_WORD57_SHIFT 10
+#define I40E_FD_INSET_FLEX_WORD57_MASK (0x1ULL << \
+ I40E_FD_INSET_FLEX_WORD57_SHIFT)
#endif /* _I40E_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
index 3b9d2037456c..f04ce6cb70dc 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
@@ -80,7 +80,12 @@ enum i40e_virtchnl_ops {
I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE = 14,
I40E_VIRTCHNL_OP_GET_STATS = 15,
I40E_VIRTCHNL_OP_FCOE = 16,
- I40E_VIRTCHNL_OP_EVENT = 17,
+ I40E_VIRTCHNL_OP_EVENT = 17, /* must ALWAYS be 17 */
+ I40E_VIRTCHNL_OP_CONFIG_RSS_KEY = 23,
+ I40E_VIRTCHNL_OP_CONFIG_RSS_LUT = 24,
+ I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS = 25,
+ I40E_VIRTCHNL_OP_SET_RSS_HENA = 26,
+
};
/* Virtual channel message descriptor. This overlays the admin queue
@@ -154,6 +159,7 @@ struct i40e_virtchnl_vsi_resource {
#define I40E_VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000
#define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000
#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000
+#define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000
struct i40e_virtchnl_vf_resource {
u16 num_vsis;
@@ -162,8 +168,8 @@ struct i40e_virtchnl_vf_resource {
u16 max_mtu;
u32 vf_offload_flags;
- u32 max_fcoe_contexts;
- u32 max_fcoe_filters;
+ u32 rss_key_size;
+ u32 rss_lut_size;
struct i40e_virtchnl_vsi_resource vsi_res[1];
};
@@ -322,6 +328,39 @@ struct i40e_virtchnl_promisc_info {
* PF replies with struct i40e_eth_stats in an external buffer.
*/
+/* I40E_VIRTCHNL_OP_CONFIG_RSS_KEY
+ * I40E_VIRTCHNL_OP_CONFIG_RSS_LUT
+ * VF sends these messages to configure RSS. Only supported if both PF
+ * and VF drivers set the I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF bit during
+ * configuration negotiation. If this is the case, then the RSS fields in
+ * the VF resource struct are valid.
+ * Both the key and LUT are initialized to 0 by the PF, meaning that
+ * RSS is effectively disabled until set up by the VF.
+ */
+struct i40e_virtchnl_rss_key {
+ u16 vsi_id;
+ u16 key_len;
+ u8 key[1]; /* RSS hash key, packed bytes */
+};
+
+struct i40e_virtchnl_rss_lut {
+ u16 vsi_id;
+ u16 lut_entries;
+ u8 lut[1]; /* RSS lookup table*/
+};
+
+/* I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS
+ * I40E_VIRTCHNL_OP_SET_RSS_HENA
+ * VF sends these messages to get and set the hash filter enable bits for RSS.
+ * By default, the PF sets these to all possible traffic types that the
+ * hardware supports. The VF can query this value if it wants to change the
+ * traffic types that are hashed by the hardware.
+ * Traffic types are defined in the i40e_filter_pctype enum in i40e_type.h
+ */
+struct i40e_virtchnl_rss_hena {
+ u64 hena;
+};
+
/* I40E_VIRTCHNL_OP_EVENT
* PF sends this message to inform the VF driver of events that may affect it.
* No direct response is expected from the VF, though it may generate other
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index e3973684746b..9110319a8f00 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -38,7 +38,7 @@ static const char i40evf_driver_string[] =
#define DRV_VERSION_MAJOR 1
#define DRV_VERSION_MINOR 5
-#define DRV_VERSION_BUILD 1
+#define DRV_VERSION_BUILD 5
#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
__stringify(DRV_VERSION_MINOR) "." \
__stringify(DRV_VERSION_BUILD) \
@@ -1341,7 +1341,7 @@ static int i40evf_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
}
if (lut) {
- ret = i40evf_aq_get_rss_lut(hw, vsi->id, seed, lut, lut_size);
+ ret = i40evf_aq_get_rss_lut(hw, vsi->id, false, lut, lut_size);
if (ret) {
dev_err(&adapter->pdev->dev,
"Cannot get RSS lut, err %s aq_err %s\n",
@@ -2252,6 +2252,28 @@ static int i40evf_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
+#define I40EVF_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_TX |\
+ NETIF_F_HW_VLAN_CTAG_RX |\
+ NETIF_F_HW_VLAN_CTAG_FILTER)
+
+/**
+ * i40evf_fix_features - fix up the netdev feature bits
+ * @netdev: our net device
+ * @features: desired feature bits
+ *
+ * Returns fixed-up features bits
+ **/
+static netdev_features_t i40evf_fix_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ struct i40evf_adapter *adapter = netdev_priv(netdev);
+
+ features &= ~I40EVF_VLAN_FEATURES;
+ if (adapter->vf_res->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_VLAN)
+ features |= I40EVF_VLAN_FEATURES;
+ return features;
+}
+
static const struct net_device_ops i40evf_netdev_ops = {
.ndo_open = i40evf_open,
.ndo_stop = i40evf_close,
@@ -2264,6 +2286,7 @@ static const struct net_device_ops i40evf_netdev_ops = {
.ndo_tx_timeout = i40evf_tx_timeout,
.ndo_vlan_rx_add_vid = i40evf_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = i40evf_vlan_rx_kill_vid,
+ .ndo_fix_features = i40evf_fix_features,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = i40evf_netpoll,
#endif
@@ -2300,29 +2323,20 @@ static int i40evf_check_reset_complete(struct i40e_hw *hw)
**/
int i40evf_process_config(struct i40evf_adapter *adapter)
{
+ struct i40e_virtchnl_vf_resource *vfres = adapter->vf_res;
struct net_device *netdev = adapter->netdev;
int i;
/* got VF config message back from PF, now we can parse it */
- for (i = 0; i < adapter->vf_res->num_vsis; i++) {
- if (adapter->vf_res->vsi_res[i].vsi_type == I40E_VSI_SRIOV)
- adapter->vsi_res = &adapter->vf_res->vsi_res[i];
+ for (i = 0; i < vfres->num_vsis; i++) {
+ if (vfres->vsi_res[i].vsi_type == I40E_VSI_SRIOV)
+ adapter->vsi_res = &vfres->vsi_res[i];
}
if (!adapter->vsi_res) {
dev_err(&adapter->pdev->dev, "No LAN VSI found\n");
return -ENODEV;
}
- if (adapter->vf_res->vf_offload_flags
- & I40E_VIRTCHNL_VF_OFFLOAD_VLAN) {
- netdev->vlan_features = netdev->features &
- ~(NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_CTAG_RX |
- NETIF_F_HW_VLAN_CTAG_FILTER);
- netdev->features |= NETIF_F_HW_VLAN_CTAG_TX |
- NETIF_F_HW_VLAN_CTAG_RX |
- NETIF_F_HW_VLAN_CTAG_FILTER;
- }
netdev->features |= NETIF_F_HIGHDMA |
NETIF_F_SG |
NETIF_F_IP_CSUM |
@@ -2331,7 +2345,7 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
NETIF_F_TSO |
NETIF_F_TSO6 |
NETIF_F_TSO_ECN |
- NETIF_F_GSO_GRE |
+ NETIF_F_GSO_GRE |
NETIF_F_GSO_UDP_TUNNEL |
NETIF_F_RXCSUM |
NETIF_F_GRO;
@@ -2348,9 +2362,15 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
if (adapter->flags & I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE)
netdev->features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ /* always clear VLAN features because they can change at every reset */
+ netdev->features &= ~(I40EVF_VLAN_FEATURES);
/* copy netdev features into list of user selectable features */
netdev->hw_features |= netdev->features;
- netdev->hw_features &= ~NETIF_F_RXCSUM;
+
+ if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_VLAN) {
+ netdev->vlan_features = netdev->features;
+ netdev->features |= I40EVF_VLAN_FEATURES;
+ }
adapter->vsi.id = adapter->vsi_res->vsi_id;
@@ -2831,11 +2851,11 @@ static void i40evf_remove(struct pci_dev *pdev)
adapter->state = __I40EVF_REMOVE;
adapter->aq_required = 0;
i40evf_request_reset(adapter);
- msleep(20);
+ msleep(50);
/* If the FW isn't responding, kick it once, but only once. */
if (!i40evf_asq_done(hw)) {
i40evf_request_reset(adapter);
- msleep(20);
+ msleep(50);
}
if (adapter->msix_entries) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 2ad7f67854d5..5989f7cb5462 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -50,3 +50,11 @@ config MLXSW_SPECTRUM
To compile this driver as a module, choose M here: the
module will be called mlxsw_spectrum.
+
+config MLXSW_SPECTRUM_DCB
+ bool "Data Center Bridging (DCB) support"
+ depends on MLXSW_SPECTRUM && DCB
+ default y
+ ---help---
+ Say Y here if you want to use Data Center Bridging (DCB) in the
+ driver.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 584cac444852..9b5ebf84c051 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -8,3 +8,4 @@ mlxsw_switchx2-objs := switchx2.o
obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o
mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_switchdev.o
+mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index ffe4c0305733..28f5b99e585a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -1805,6 +1805,184 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port,
}
}
+/* QTCT - QoS Switch Traffic Class Table
+ * -------------------------------------
+ * Configures the mapping between the packet switch priority and the
+ * traffic class on the transmit port.
+ */
+#define MLXSW_REG_QTCT_ID 0x400A
+#define MLXSW_REG_QTCT_LEN 0x08
+
+static const struct mlxsw_reg_info mlxsw_reg_qtct = {
+ .id = MLXSW_REG_QTCT_ID,
+ .len = MLXSW_REG_QTCT_LEN,
+};
+
+/* reg_qtct_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is not supported.
+ */
+MLXSW_ITEM32(reg, qtct, local_port, 0x00, 16, 8);
+
+/* reg_qtct_sub_port
+ * Virtual port within the physical port.
+ * Should be set to 0 when virtual ports are not enabled on the port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qtct, sub_port, 0x00, 8, 8);
+
+/* reg_qtct_switch_prio
+ * Switch priority.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qtct, switch_prio, 0x00, 0, 4);
+
+/* reg_qtct_tclass
+ * Traffic class.
+ * Default values:
+ * switch_prio 0 : tclass 1
+ * switch_prio 1 : tclass 0
+ * switch_prio i : tclass i, for i > 1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qtct, tclass, 0x04, 0, 4);
+
+static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port,
+ u8 switch_prio, u8 tclass)
+{
+ MLXSW_REG_ZERO(qtct, payload);
+ mlxsw_reg_qtct_local_port_set(payload, local_port);
+ mlxsw_reg_qtct_switch_prio_set(payload, switch_prio);
+ mlxsw_reg_qtct_tclass_set(payload, tclass);
+}
+
+/* QEEC - QoS ETS Element Configuration Register
+ * ---------------------------------------------
+ * Configures the ETS elements.
+ */
+#define MLXSW_REG_QEEC_ID 0x400D
+#define MLXSW_REG_QEEC_LEN 0x1C
+
+static const struct mlxsw_reg_info mlxsw_reg_qeec = {
+ .id = MLXSW_REG_QEEC_ID,
+ .len = MLXSW_REG_QEEC_LEN,
+};
+
+/* reg_qeec_local_port
+ * Local port number.
+ * Access: Index
+ *
+ * Note: CPU port is supported.
+ */
+MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8);
+
+enum mlxsw_reg_qeec_hr {
+ MLXSW_REG_QEEC_HIERARCY_PORT,
+ MLXSW_REG_QEEC_HIERARCY_GROUP,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ MLXSW_REG_QEEC_HIERARCY_TC,
+};
+
+/* reg_qeec_element_hierarchy
+ * 0 - Port
+ * 1 - Group
+ * 2 - Subgroup
+ * 3 - Traffic Class
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qeec, element_hierarchy, 0x04, 16, 4);
+
+/* reg_qeec_element_index
+ * The index of the element in the hierarchy.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8);
+
+/* reg_qeec_next_element_index
+ * The index of the next (lower) element in the hierarchy.
+ * Access: RW
+ *
+ * Note: Reserved for element_hierarchy 0.
+ */
+MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8);
+
+enum {
+ MLXSW_REG_QEEC_BYTES_MODE,
+ MLXSW_REG_QEEC_PACKETS_MODE,
+};
+
+/* reg_qeec_pb
+ * Packets or bytes mode.
+ * 0 - Bytes mode
+ * 1 - Packets mode
+ * Access: RW
+ *
+ * Note: Used for max shaper configuration. For Spectrum, packets mode
+ * is supported only for traffic classes of CPU port.
+ */
+MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1);
+
+/* reg_qeec_mase
+ * Max shaper configuration enable. Enables configuration of the max
+ * shaper on this ETS element.
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1);
+
+/* A large max rate will disable the max shaper. */
+#define MLXSW_REG_QEEC_MAS_DIS 200000000 /* Kbps */
+
+/* reg_qeec_max_shaper_rate
+ * Max shaper information rate.
+ * For CPU port, can only be configured for port hierarchy.
+ * When in bytes mode, value is specified in units of 1000bps.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 28);
+
+/* reg_qeec_de
+ * DWRR configuration enable. Enables configuration of the dwrr and
+ * dwrr_weight.
+ * 0 - Disable
+ * 1 - Enable
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, de, 0x18, 31, 1);
+
+/* reg_qeec_dwrr
+ * Transmission selection algorithm to use on the link going down from
+ * the ETS element.
+ * 0 - Strict priority
+ * 1 - DWRR
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1);
+
+/* reg_qeec_dwrr_weight
+ * DWRR weight on the link going down from the ETS element. The
+ * percentage of bandwidth guaranteed to an ETS element within
+ * its hierarchy. The sum of all weights across all ETS elements
+ * within one hierarchy should be equal to 100. Reserved when
+ * transmission selection algorithm is strict priority.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8);
+
+static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index,
+ u8 next_index)
+{
+ MLXSW_REG_ZERO(qeec, payload);
+ mlxsw_reg_qeec_local_port_set(payload, local_port);
+ mlxsw_reg_qeec_element_hierarchy_set(payload, hr);
+ mlxsw_reg_qeec_element_index_set(payload, index);
+ mlxsw_reg_qeec_next_element_index_set(payload, next_index);
+}
+
/* PMLP - Ports Module to Local Port Register
* ------------------------------------------
* Configures the assignment of modules to local ports.
@@ -2141,6 +2319,145 @@ static inline void mlxsw_reg_paos_pack(char *payload, u8 local_port,
mlxsw_reg_paos_e_set(payload, 1);
}
+/* PFCC - Ports Flow Control Configuration Register
+ * ------------------------------------------------
+ * Configures and retrieves the per port flow control configuration.
+ */
+#define MLXSW_REG_PFCC_ID 0x5007
+#define MLXSW_REG_PFCC_LEN 0x20
+
+static const struct mlxsw_reg_info mlxsw_reg_pfcc = {
+ .id = MLXSW_REG_PFCC_ID,
+ .len = MLXSW_REG_PFCC_LEN,
+};
+
+/* reg_pfcc_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pfcc, local_port, 0x00, 16, 8);
+
+/* reg_pfcc_pnat
+ * Port number access type. Determines the way local_port is interpreted:
+ * 0 - Local port number.
+ * 1 - IB / label port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pfcc, pnat, 0x00, 14, 2);
+
+/* reg_pfcc_shl_cap
+ * Send to higher layers capabilities:
+ * 0 - No capability of sending Pause and PFC frames to higher layers.
+ * 1 - Device has capability of sending Pause and PFC frames to higher
+ * layers.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pfcc, shl_cap, 0x00, 1, 1);
+
+/* reg_pfcc_shl_opr
+ * Send to higher layers operation:
+ * 0 - Pause and PFC frames are handled by the port (default).
+ * 1 - Pause and PFC frames are handled by the port and also sent to
+ * higher layers. Only valid if shl_cap = 1.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pfcc, shl_opr, 0x00, 0, 1);
+
+/* reg_pfcc_ppan
+ * Pause policy auto negotiation.
+ * 0 - Disabled. Generate / ignore Pause frames based on pptx / pprtx.
+ * 1 - Enabled. When auto-negotiation is performed, set the Pause policy
+ * based on the auto-negotiation resolution.
+ * Access: RW
+ *
+ * Note: The auto-negotiation advertisement is set according to pptx and
+ * pprtx. When PFC is set on Tx / Rx, ppan must be set to 0.
+ */
+MLXSW_ITEM32(reg, pfcc, ppan, 0x04, 28, 4);
+
+/* reg_pfcc_prio_mask_tx
+ * Bit per priority indicating if Tx flow control policy should be
+ * updated based on bit pfctx.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, pfcc, prio_mask_tx, 0x04, 16, 8);
+
+/* reg_pfcc_prio_mask_rx
+ * Bit per priority indicating if Rx flow control policy should be
+ * updated based on bit pfcrx.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, pfcc, prio_mask_rx, 0x04, 0, 8);
+
+/* reg_pfcc_pptx
+ * Admin Pause policy on Tx.
+ * 0 - Never generate Pause frames (default).
+ * 1 - Generate Pause frames according to Rx buffer threshold.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pfcc, pptx, 0x08, 31, 1);
+
+/* reg_pfcc_aptx
+ * Active (operational) Pause policy on Tx.
+ * 0 - Never generate Pause frames.
+ * 1 - Generate Pause frames according to Rx buffer threshold.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pfcc, aptx, 0x08, 30, 1);
+
+/* reg_pfcc_pfctx
+ * Priority based flow control policy on Tx[7:0]. Per-priority bit mask:
+ * 0 - Never generate priority Pause frames on the specified priority
+ * (default).
+ * 1 - Generate priority Pause frames according to Rx buffer threshold on
+ * the specified priority.
+ * Access: RW
+ *
+ * Note: pfctx and pptx must be mutually exclusive.
+ */
+MLXSW_ITEM32(reg, pfcc, pfctx, 0x08, 16, 8);
+
+/* reg_pfcc_pprx
+ * Admin Pause policy on Rx.
+ * 0 - Ignore received Pause frames (default).
+ * 1 - Respect received Pause frames.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pfcc, pprx, 0x0C, 31, 1);
+
+/* reg_pfcc_aprx
+ * Active (operational) Pause policy on Rx.
+ * 0 - Ignore received Pause frames.
+ * 1 - Respect received Pause frames.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, pfcc, aprx, 0x0C, 30, 1);
+
+/* reg_pfcc_pfcrx
+ * Priority based flow control policy on Rx[7:0]. Per-priority bit mask:
+ * 0 - Ignore incoming priority Pause frames on the specified priority
+ * (default).
+ * 1 - Respect incoming priority Pause frames on the specified priority.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pfcc, pfcrx, 0x0C, 16, 8);
+
+#define MLXSW_REG_PFCC_ALL_PRIO 0xFF
+
+static inline void mlxsw_reg_pfcc_prio_pack(char *payload, u8 pfc_en)
+{
+ mlxsw_reg_pfcc_prio_mask_tx_set(payload, MLXSW_REG_PFCC_ALL_PRIO);
+ mlxsw_reg_pfcc_prio_mask_rx_set(payload, MLXSW_REG_PFCC_ALL_PRIO);
+ mlxsw_reg_pfcc_pfctx_set(payload, pfc_en);
+ mlxsw_reg_pfcc_pfcrx_set(payload, pfc_en);
+}
+
+static inline void mlxsw_reg_pfcc_pack(char *payload, u8 local_port)
+{
+ MLXSW_REG_ZERO(pfcc, payload);
+ mlxsw_reg_pfcc_local_port_set(payload, local_port);
+}
+
/* PPCNT - Ports Performance Counters Register
* -------------------------------------------
* The PPCNT register retrieves per port performance counters.
@@ -2180,6 +2497,11 @@ MLXSW_ITEM32(reg, ppcnt, local_port, 0x00, 16, 8);
*/
MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2);
+enum mlxsw_reg_ppcnt_grp {
+ MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0,
+ MLXSW_REG_PPCNT_PRIO_CNT = 0x10,
+};
+
/* reg_ppcnt_grp
* Performance counter group.
* Group 63 indicates all groups. Only valid on Set() operation with
@@ -2215,6 +2537,8 @@ MLXSW_ITEM32(reg, ppcnt, clr, 0x04, 31, 1);
*/
MLXSW_ITEM32(reg, ppcnt, prio_tc, 0x04, 0, 5);
+/* Ethernet IEEE 802.3 Counter Group */
+
/* reg_ppcnt_a_frames_transmitted_ok
* Access: RO
*/
@@ -2329,15 +2653,145 @@ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received,
MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted,
0x08 + 0x90, 0, 64);
-static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port)
+/* Ethernet Per Priority Group Counters */
+
+/* reg_ppcnt_rx_octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, rx_octets, 0x08 + 0x00, 0, 64);
+
+/* reg_ppcnt_rx_frames
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, rx_frames, 0x08 + 0x20, 0, 64);
+
+/* reg_ppcnt_tx_octets
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tx_octets, 0x08 + 0x28, 0, 64);
+
+/* reg_ppcnt_tx_frames
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tx_frames, 0x08 + 0x48, 0, 64);
+
+/* reg_ppcnt_rx_pause
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, rx_pause, 0x08 + 0x50, 0, 64);
+
+/* reg_ppcnt_rx_pause_duration
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, rx_pause_duration, 0x08 + 0x58, 0, 64);
+
+/* reg_ppcnt_tx_pause
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tx_pause, 0x08 + 0x60, 0, 64);
+
+/* reg_ppcnt_tx_pause_duration
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, 0x08 + 0x68, 0, 64);
+
+/* reg_ppcnt_rx_pause_transition
+ * Access: RO
+ */
+MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64);
+
+static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
+ enum mlxsw_reg_ppcnt_grp grp,
+ u8 prio_tc)
{
MLXSW_REG_ZERO(ppcnt, payload);
mlxsw_reg_ppcnt_swid_set(payload, 0);
mlxsw_reg_ppcnt_local_port_set(payload, local_port);
mlxsw_reg_ppcnt_pnat_set(payload, 0);
- mlxsw_reg_ppcnt_grp_set(payload, 0);
+ mlxsw_reg_ppcnt_grp_set(payload, grp);
mlxsw_reg_ppcnt_clr_set(payload, 0);
- mlxsw_reg_ppcnt_prio_tc_set(payload, 0);
+ mlxsw_reg_ppcnt_prio_tc_set(payload, prio_tc);
+}
+
+/* PPTB - Port Prio To Buffer Register
+ * -----------------------------------
+ * Configures the switch priority to buffer table.
+ */
+#define MLXSW_REG_PPTB_ID 0x500B
+#define MLXSW_REG_PPTB_LEN 0x0C
+
+static const struct mlxsw_reg_info mlxsw_reg_pptb = {
+ .id = MLXSW_REG_PPTB_ID,
+ .len = MLXSW_REG_PPTB_LEN,
+};
+
+enum {
+ MLXSW_REG_PPTB_MM_UM,
+ MLXSW_REG_PPTB_MM_UNICAST,
+ MLXSW_REG_PPTB_MM_MULTICAST,
+};
+
+/* reg_pptb_mm
+ * Mapping mode.
+ * 0 - Map both unicast and multicast packets to the same buffer.
+ * 1 - Map only unicast packets.
+ * 2 - Map only multicast packets.
+ * Access: Index
+ *
+ * Note: SwitchX-2 only supports the first option.
+ */
+MLXSW_ITEM32(reg, pptb, mm, 0x00, 28, 2);
+
+/* reg_pptb_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, pptb, local_port, 0x00, 16, 8);
+
+/* reg_pptb_um
+ * Enables the update of the untagged_buf field.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pptb, um, 0x00, 8, 1);
+
+/* reg_pptb_pm
+ * Enables the update of the prio_to_buff field.
+ * Bit <i> is a flag for updating the mapping for switch priority <i>.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pptb, pm, 0x00, 0, 8);
+
+/* reg_pptb_prio_to_buff
+ * Mapping of switch priority <i> to one of the allocated receive port
+ * buffers.
+ * Access: RW
+ */
+MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff, 0x04, 0x04, 4);
+
+/* reg_pptb_pm_msb
+ * Enables the update of the prio_to_buff field.
+ * Bit <i> is a flag for updating the mapping for switch priority <i+8>.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, pptb, pm_msb, 0x08, 24, 8);
+
+/* reg_pptb_untagged_buff
+ * Mapping of untagged frames to one of the allocated receive port buffers.
+ * Access: RW
+ *
+ * Note: In SwitchX-2 this field must be mapped to buffer 8. Reserved for
+ * Spectrum, as it maps untagged packets based on the default switch priority.
+ */
+MLXSW_ITEM32(reg, pptb, untagged_buff, 0x08, 0, 4);
+
+#define MLXSW_REG_PPTB_ALL_PRIO 0xFF
+
+static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port)
+{
+ MLXSW_REG_ZERO(pptb, payload);
+ mlxsw_reg_pptb_mm_set(payload, MLXSW_REG_PPTB_MM_UM);
+ mlxsw_reg_pptb_local_port_set(payload, local_port);
+ mlxsw_reg_pptb_pm_set(payload, MLXSW_REG_PPTB_ALL_PRIO);
}
/* PBMC - Port Buffer Management Control Register
@@ -2346,7 +2800,7 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port)
* allocation for different Prios, and the Pause threshold management.
*/
#define MLXSW_REG_PBMC_ID 0x500C
-#define MLXSW_REG_PBMC_LEN 0x68
+#define MLXSW_REG_PBMC_LEN 0x6C
static const struct mlxsw_reg_info mlxsw_reg_pbmc = {
.id = MLXSW_REG_PBMC_ID,
@@ -2374,6 +2828,8 @@ MLXSW_ITEM32(reg, pbmc, xoff_timer_value, 0x04, 16, 16);
*/
MLXSW_ITEM32(reg, pbmc, xoff_refresh, 0x04, 0, 16);
+#define MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX 11
+
/* reg_pbmc_buf_lossy
* The field indicates if the buffer is lossy.
* 0 - Lossless
@@ -2398,6 +2854,30 @@ MLXSW_ITEM32_INDEXED(reg, pbmc, buf_epsb, 0x0C, 24, 1, 0x08, 0x00, false);
*/
MLXSW_ITEM32_INDEXED(reg, pbmc, buf_size, 0x0C, 0, 16, 0x08, 0x00, false);
+/* reg_pbmc_buf_xoff_threshold
+ * Once the amount of data in the buffer goes above this value, device
+ * starts sending PFC frames for all priorities associated with the
+ * buffer. Units are represented in cells. Reserved in case of lossy
+ * buffer.
+ * Access: RW
+ *
+ * Note: In Spectrum, reserved for buffer[9].
+ */
+MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xoff_threshold, 0x0C, 16, 16,
+ 0x08, 0x04, false);
+
+/* reg_pbmc_buf_xon_threshold
+ * When the amount of data in the buffer goes below this value, device
+ * stops sending PFC frames for the priorities associated with the
+ * buffer. Units are represented in cells. Reserved in case of lossy
+ * buffer.
+ * Access: RW
+ *
+ * Note: In Spectrum, reserved for buffer[9].
+ */
+MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xon_threshold, 0x0C, 0, 16,
+ 0x08, 0x04, false);
+
static inline void mlxsw_reg_pbmc_pack(char *payload, u8 local_port,
u16 xoff_timer_value, u16 xoff_refresh)
{
@@ -2416,6 +2896,17 @@ static inline void mlxsw_reg_pbmc_lossy_buffer_pack(char *payload,
mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size);
}
+static inline void mlxsw_reg_pbmc_lossless_buffer_pack(char *payload,
+ int buf_index, u16 size,
+ u16 threshold)
+{
+ mlxsw_reg_pbmc_buf_lossy_set(payload, buf_index, 0);
+ mlxsw_reg_pbmc_buf_epsb_set(payload, buf_index, 0);
+ mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size);
+ mlxsw_reg_pbmc_buf_xoff_threshold_set(payload, buf_index, threshold);
+ mlxsw_reg_pbmc_buf_xon_threshold_set(payload, buf_index, threshold);
+}
+
/* PSPA - Port Switch Partition Allocation
* ---------------------------------------
* Controls the association of a port with a switch partition and enables
@@ -3283,6 +3774,10 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
return "SFMR";
case MLXSW_REG_SPVMLR_ID:
return "SPVMLR";
+ case MLXSW_REG_QTCT_ID:
+ return "QTCT";
+ case MLXSW_REG_QEEC_ID:
+ return "QEEC";
case MLXSW_REG_PMLP_ID:
return "PMLP";
case MLXSW_REG_PMTU_ID:
@@ -3293,8 +3788,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id)
return "PPAD";
case MLXSW_REG_PAOS_ID:
return "PAOS";
+ case MLXSW_REG_PFCC_ID:
+ return "PFCC";
case MLXSW_REG_PPCNT_ID:
return "PPCNT";
+ case MLXSW_REG_PPTB_ID:
+ return "PPTB";
case MLXSW_REG_PBMC_ID:
return "PBMC";
case MLXSW_REG_PSPA_ID:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index cb5f36e497e9..507263a2d226 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -49,6 +49,7 @@
#include <linux/jiffies.h>
#include <linux/bitops.h>
#include <linux/list.h>
+#include <linux/dcbnl.h>
#include <net/devlink.h>
#include <net/switchdev.h>
#include <generated/utsrelease.h>
@@ -449,16 +450,89 @@ static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p)
return 0;
}
+static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int pg_index, int mtu,
+ bool pause_en, bool pfc_en, u16 delay)
+{
+ u16 pg_size = 2 * MLXSW_SP_BYTES_TO_CELLS(mtu);
+
+ delay = pfc_en ? mlxsw_sp_pfc_delay_get(mtu, delay) :
+ MLXSW_SP_PAUSE_DELAY;
+
+ if (pause_en || pfc_en)
+ mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, pg_index,
+ pg_size + delay, pg_size);
+ else
+ mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg_index, pg_size);
+}
+
+int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
+ u8 *prio_tc, bool pause_en,
+ struct ieee_pfc *my_pfc)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0;
+ u16 delay = !!my_pfc ? my_pfc->delay : 0;
+ char pbmc_pl[MLXSW_REG_PBMC_LEN];
+ int i, j, err;
+
+ mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+ if (err)
+ return err;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ bool configure = false;
+ bool pfc = false;
+
+ for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) {
+ if (prio_tc[j] == i) {
+ pfc = pfc_en & BIT(j);
+ configure = true;
+ break;
+ }
+ }
+
+ if (!configure)
+ continue;
+ mlxsw_sp_pg_buf_pack(pbmc_pl, i, mtu, pause_en, pfc, delay);
+ }
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+}
+
+static int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ int mtu, bool pause_en)
+{
+ u8 def_prio_tc[IEEE_8021QAZ_MAX_TCS] = {0};
+ bool dcb_en = !!mlxsw_sp_port->dcb.ets;
+ struct ieee_pfc *my_pfc;
+ u8 *prio_tc;
+
+ prio_tc = dcb_en ? mlxsw_sp_port->dcb.ets->prio_tc : def_prio_tc;
+ my_pfc = dcb_en ? mlxsw_sp_port->dcb.pfc : NULL;
+
+ return __mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, prio_tc,
+ pause_en, my_pfc);
+}
+
static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
int err;
- err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu);
+ err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, pause_en);
if (err)
return err;
+ err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu);
+ if (err)
+ goto err_port_mtu_set;
dev->mtu = mtu;
return 0;
+
+err_port_mtu_set:
+ mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
+ return err;
}
static struct rtnl_link_stats64 *
@@ -936,6 +1010,68 @@ static void mlxsw_sp_port_get_drvinfo(struct net_device *dev,
sizeof(drvinfo->bus_info));
}
+static void mlxsw_sp_port_get_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ pause->rx_pause = mlxsw_sp_port->link.rx_pause;
+ pause->tx_pause = mlxsw_sp_port->link.tx_pause;
+}
+
+static int mlxsw_sp_port_pause_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ethtool_pauseparam *pause)
+{
+ char pfcc_pl[MLXSW_REG_PFCC_LEN];
+
+ mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port);
+ mlxsw_reg_pfcc_pprx_set(pfcc_pl, pause->rx_pause);
+ mlxsw_reg_pfcc_pptx_set(pfcc_pl, pause->tx_pause);
+
+ return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc),
+ pfcc_pl);
+}
+
+static int mlxsw_sp_port_set_pauseparam(struct net_device *dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ bool pause_en = pause->tx_pause || pause->rx_pause;
+ int err;
+
+ if (mlxsw_sp_port->dcb.pfc && mlxsw_sp_port->dcb.pfc->pfc_en) {
+ netdev_err(dev, "PFC already enabled on port\n");
+ return -EINVAL;
+ }
+
+ if (pause->autoneg) {
+ netdev_err(dev, "PAUSE frames autonegotiation isn't supported\n");
+ return -EINVAL;
+ }
+
+ err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
+ if (err) {
+ netdev_err(dev, "Failed to configure port's headroom\n");
+ return err;
+ }
+
+ err = mlxsw_sp_port_pause_set(mlxsw_sp_port, pause);
+ if (err) {
+ netdev_err(dev, "Failed to set PAUSE parameters\n");
+ goto err_port_pause_configure;
+ }
+
+ mlxsw_sp_port->link.rx_pause = pause->rx_pause;
+ mlxsw_sp_port->link.tx_pause = pause->tx_pause;
+
+ return 0;
+
+err_port_pause_configure:
+ pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
+ mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en);
+ return err;
+}
+
struct mlxsw_sp_port_hw_stats {
char str[ETH_GSTRING_LEN];
u64 (*getter)(char *payload);
@@ -1071,7 +1207,8 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev,
int i;
int err;
- mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port);
+ mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port,
+ MLXSW_REG_PPCNT_IEEE_8023_CNT, 0);
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl);
for (i = 0; i < MLXSW_SP_PORT_HW_STATS_LEN; i++)
data[i] = !err ? mlxsw_sp_port_hw_stats[i].getter(ppcnt_pl) : 0;
@@ -1419,6 +1556,8 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev,
static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = {
.get_drvinfo = mlxsw_sp_port_get_drvinfo,
.get_link = ethtool_op_get_link,
+ .get_pauseparam = mlxsw_sp_port_get_pauseparam,
+ .set_pauseparam = mlxsw_sp_port_set_pauseparam,
.get_strings = mlxsw_sp_port_get_strings,
.set_phys_id = mlxsw_sp_port_set_phys_id,
.get_ethtool_stats = mlxsw_sp_port_get_stats,
@@ -1441,6 +1580,108 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
}
+int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
+ bool dwrr, u8 dwrr_weight)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+ mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
+ next_index);
+ mlxsw_reg_qeec_de_set(qeec_pl, true);
+ mlxsw_reg_qeec_dwrr_set(qeec_pl, dwrr);
+ mlxsw_reg_qeec_dwrr_weight_set(qeec_pl, dwrr_weight);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
+int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index,
+ u8 next_index, u32 maxrate)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qeec_pl[MLXSW_REG_QEEC_LEN];
+
+ mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index,
+ next_index);
+ mlxsw_reg_qeec_mase_set(qeec_pl, true);
+ mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl);
+}
+
+int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 switch_prio, u8 tclass)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qtct_pl[MLXSW_REG_QTCT_LEN];
+
+ mlxsw_reg_qtct_pack(qtct_pl, mlxsw_sp_port->local_port, switch_prio,
+ tclass);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtct), qtct_pl);
+}
+
+static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err, i;
+
+ /* Setup the elements hierarcy, so that each TC is linked to
+ * one subgroup, which are all member in the same group.
+ */
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false,
+ 0);
+ if (err)
+ return err;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+ 0, false, 0);
+ if (err)
+ return err;
+ }
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_TC, i, i,
+ false, 0);
+ if (err)
+ return err;
+ }
+
+ /* Make sure the max shaper is disabled in all hierarcies that
+ * support it.
+ */
+ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0,
+ MLXSW_REG_QEEC_MAS_DIS);
+ if (err)
+ return err;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ i, 0,
+ MLXSW_REG_QEEC_MAS_DIS);
+ if (err)
+ return err;
+ }
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_TC,
+ i, i,
+ MLXSW_REG_QEEC_MAS_DIS);
+ if (err)
+ return err;
+ }
+
+ /* Map all priorities to traffic class 0. */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
bool split, u8 module, u8 width)
{
@@ -1548,6 +1789,21 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
goto err_port_buffers_init;
}
+ err = mlxsw_sp_port_ets_init(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize ETS\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_ets_init;
+ }
+
+ /* ETS and buffers must be initialized before DCB. */
+ err = mlxsw_sp_port_dcb_init(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize DCB\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_dcb_init;
+ }
+
mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
err = register_netdev(dev);
if (err) {
@@ -1568,6 +1824,8 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
err_port_vlan_init:
unregister_netdev(dev);
err_register_netdev:
+err_port_dcb_init:
+err_port_ets_init:
err_port_buffers_init:
err_port_admin_status_set:
err_port_mtu_set:
@@ -1637,6 +1895,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
devlink_port = &mlxsw_sp_port->devlink_port;
devlink_port_type_clear(devlink_port);
unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
+ mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
devlink_port_unregister(devlink_port);
mlxsw_sp_port_vports_fini(mlxsw_sp_port);
mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index d58ab0cd9507..47610a5ccd78 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -42,6 +42,7 @@
#include <linux/bitops.h>
#include <linux/if_vlan.h>
#include <linux/list.h>
+#include <linux/dcbnl.h>
#include <net/switchdev.h>
#include <net/devlink.h>
@@ -62,6 +63,23 @@
#define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */
+#define MLXSW_SP_BYTES_PER_CELL 96
+
+#define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL)
+
+/* Maximum delay buffer needed in case of PAUSE frames, in cells.
+ * Assumes 100m cable and maximum MTU.
+ */
+#define MLXSW_SP_PAUSE_DELAY 612
+
+#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */
+
+static inline u16 mlxsw_sp_pfc_delay_get(int mtu, u16 delay)
+{
+ delay = MLXSW_SP_BYTES_TO_CELLS(DIV_ROUND_UP(delay, BITS_PER_BYTE));
+ return MLXSW_SP_CELL_FACTOR * delay + MLXSW_SP_BYTES_TO_CELLS(mtu);
+}
+
struct mlxsw_sp_port;
struct mlxsw_sp_upper {
@@ -166,6 +184,15 @@ struct mlxsw_sp_port {
struct mlxsw_sp_vfid *vfid;
u16 vid;
} vport;
+ struct {
+ u8 tx_pause:1,
+ rx_pause:1;
+ } link;
+ struct {
+ struct ieee_ets *ets;
+ struct ieee_maxrate *maxrate;
+ struct ieee_pfc *pfc;
+ } dcb;
/* 802.1Q bridge VLANs */
unsigned long *active_vlans;
unsigned long *untagged_vlans;
@@ -174,6 +201,12 @@ struct mlxsw_sp_port {
struct devlink_port devlink_port;
};
+static inline bool
+mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ return mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause;
+}
+
static inline struct mlxsw_sp_port *
mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index)
{
@@ -265,5 +298,33 @@ int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid,
bool set, bool only_uc);
void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid);
+int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
+ bool dwrr, u8 dwrr_weight);
+int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 switch_prio, u8 tclass);
+int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu,
+ u8 *prio_tc, bool pause_en,
+ struct ieee_pfc *my_pfc);
+int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ enum mlxsw_reg_qeec_hr hr, u8 index,
+ u8 next_index, u32 maxrate);
+
+#ifdef CONFIG_MLXSW_SPECTRUM_DCB
+
+int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port);
+
+#else
+
+static inline int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ return 0;
+}
+
+static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{}
+
+#endif
#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index d59195e3f7fb..97c8d537be5b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -34,6 +34,8 @@
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/dcbnl.h>
+#include <linux/if_ether.h>
#include "spectrum.h"
#include "core.h"
@@ -52,15 +54,15 @@ struct mlxsw_sp_pb {
}
static const struct mlxsw_sp_pb mlxsw_sp_pbs[] = {
- MLXSW_SP_PB(0, 208),
- MLXSW_SP_PB(1, 208),
- MLXSW_SP_PB(2, 208),
- MLXSW_SP_PB(3, 208),
- MLXSW_SP_PB(4, 208),
- MLXSW_SP_PB(5, 208),
- MLXSW_SP_PB(6, 208),
- MLXSW_SP_PB(7, 208),
- MLXSW_SP_PB(9, 208),
+ MLXSW_SP_PB(0, 2 * MLXSW_SP_BYTES_TO_CELLS(ETH_FRAME_LEN)),
+ MLXSW_SP_PB(1, 0),
+ MLXSW_SP_PB(2, 0),
+ MLXSW_SP_PB(3, 0),
+ MLXSW_SP_PB(4, 0),
+ MLXSW_SP_PB(5, 0),
+ MLXSW_SP_PB(6, 0),
+ MLXSW_SP_PB(7, 0),
+ MLXSW_SP_PB(9, 2 * MLXSW_SP_BYTES_TO_CELLS(MLXSW_PORT_MAX_MTU)),
};
#define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs)
@@ -78,11 +80,33 @@ static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port)
pb = &mlxsw_sp_pbs[i];
mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pb->index, pb->size);
}
+ mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl,
+ MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0);
return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core,
MLXSW_REG(pbmc), pbmc_pl);
}
-#define MLXSW_SP_SB_BYTES_PER_CELL 96
+static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ char pptb_pl[MLXSW_REG_PPTB_LEN];
+ int i;
+
+ mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, 0);
+ return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
+ pptb_pl);
+}
+
+static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err;
+
+ err = mlxsw_sp_port_pb_init(mlxsw_sp_port);
+ if (err)
+ return err;
+ return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port);
+}
struct mlxsw_sp_sb_pool {
u8 pool;
@@ -92,11 +116,9 @@ struct mlxsw_sp_sb_pool {
};
#define MLXSW_SP_SB_POOL_INGRESS_SIZE \
- ((15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) / \
- MLXSW_SP_SB_BYTES_PER_CELL)
+ (15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS))
#define MLXSW_SP_SB_POOL_EGRESS_SIZE \
- ((14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) / \
- MLXSW_SP_SB_BYTES_PER_CELL)
+ (14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS))
#define MLXSW_SP_SB_POOL(_pool, _dir, _mode, _size) \
{ \
@@ -115,14 +137,14 @@ struct mlxsw_sp_sb_pool {
MLXSW_REG_SBPR_MODE_DYNAMIC, _size)
static const struct mlxsw_sp_sb_pool mlxsw_sp_sb_pools[] = {
- MLXSW_SP_SB_POOL_INGRESS(0, MLXSW_SP_SB_POOL_INGRESS_SIZE),
+ MLXSW_SP_SB_POOL_INGRESS(0, MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_POOL_INGRESS_SIZE)),
MLXSW_SP_SB_POOL_INGRESS(1, 0),
MLXSW_SP_SB_POOL_INGRESS(2, 0),
MLXSW_SP_SB_POOL_INGRESS(3, 0),
- MLXSW_SP_SB_POOL_EGRESS(0, MLXSW_SP_SB_POOL_EGRESS_SIZE),
+ MLXSW_SP_SB_POOL_EGRESS(0, MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_POOL_EGRESS_SIZE)),
MLXSW_SP_SB_POOL_EGRESS(1, 0),
MLXSW_SP_SB_POOL_EGRESS(2, 0),
- MLXSW_SP_SB_POOL_EGRESS(2, MLXSW_SP_SB_POOL_EGRESS_SIZE),
+ MLXSW_SP_SB_POOL_EGRESS(2, MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_POOL_EGRESS_SIZE)),
};
#define MLXSW_SP_SB_POOLS_LEN ARRAY_SIZE(mlxsw_sp_sb_pools)
@@ -178,7 +200,7 @@ struct mlxsw_sp_sb_cm {
MLXSW_SP_SB_CM(_tc, MLXSW_REG_SBCM_DIR_EGRESS, 104, 2, 3)
static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms[] = {
- MLXSW_SP_SB_CM_INGRESS(0, 10000 / MLXSW_SP_SB_BYTES_PER_CELL, 8),
+ MLXSW_SP_SB_CM_INGRESS(0, MLXSW_SP_BYTES_TO_CELLS(10000), 8),
MLXSW_SP_SB_CM_INGRESS(1, 0, 0),
MLXSW_SP_SB_CM_INGRESS(2, 0, 0),
MLXSW_SP_SB_CM_INGRESS(3, 0, 0),
@@ -186,15 +208,15 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms[] = {
MLXSW_SP_SB_CM_INGRESS(5, 0, 0),
MLXSW_SP_SB_CM_INGRESS(6, 0, 0),
MLXSW_SP_SB_CM_INGRESS(7, 0, 0),
- MLXSW_SP_SB_CM_INGRESS(9, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff),
- MLXSW_SP_SB_CM_EGRESS(0, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9),
- MLXSW_SP_SB_CM_EGRESS(1, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9),
- MLXSW_SP_SB_CM_EGRESS(2, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9),
- MLXSW_SP_SB_CM_EGRESS(3, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9),
- MLXSW_SP_SB_CM_EGRESS(4, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9),
- MLXSW_SP_SB_CM_EGRESS(5, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9),
- MLXSW_SP_SB_CM_EGRESS(6, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9),
- MLXSW_SP_SB_CM_EGRESS(7, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9),
+ MLXSW_SP_SB_CM_INGRESS(9, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff),
+ MLXSW_SP_SB_CM_EGRESS(0, MLXSW_SP_BYTES_TO_CELLS(1500), 9),
+ MLXSW_SP_SB_CM_EGRESS(1, MLXSW_SP_BYTES_TO_CELLS(1500), 9),
+ MLXSW_SP_SB_CM_EGRESS(2, MLXSW_SP_BYTES_TO_CELLS(1500), 9),
+ MLXSW_SP_SB_CM_EGRESS(3, MLXSW_SP_BYTES_TO_CELLS(1500), 9),
+ MLXSW_SP_SB_CM_EGRESS(4, MLXSW_SP_BYTES_TO_CELLS(1500), 9),
+ MLXSW_SP_SB_CM_EGRESS(5, MLXSW_SP_BYTES_TO_CELLS(1500), 9),
+ MLXSW_SP_SB_CM_EGRESS(6, MLXSW_SP_BYTES_TO_CELLS(1500), 9),
+ MLXSW_SP_SB_CM_EGRESS(7, MLXSW_SP_BYTES_TO_CELLS(1500), 9),
MLXSW_SP_SB_CM_EGRESS(8, 0, 0),
MLXSW_SP_SB_CM_EGRESS(9, 0, 0),
MLXSW_SP_SB_CM_EGRESS(10, 0, 0),
@@ -353,21 +375,21 @@ struct mlxsw_sp_sb_mm {
}
static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = {
- MLXSW_SP_SB_MM(0, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(1, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(2, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(3, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(4, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(5, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(6, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(7, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(8, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(9, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(10, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(11, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(12, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(13, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
- MLXSW_SP_SB_MM(14, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0),
+ MLXSW_SP_SB_MM(0, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(1, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(2, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(3, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(4, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(5, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(6, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(7, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(8, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(9, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(10, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(11, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(12, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(13, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
+ MLXSW_SP_SB_MM(14, MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0),
};
#define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms)
@@ -410,7 +432,7 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
int err;
- err = mlxsw_sp_port_pb_init(mlxsw_sp_port);
+ err = mlxsw_sp_port_headroom_init(mlxsw_sp_port);
if (err)
return err;
err = mlxsw_sp_port_sb_cms_init(mlxsw_sp_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
new file mode 100644
index 000000000000..0b323661c0b6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -0,0 +1,480 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <net/dcbnl.h>
+
+#include "spectrum.h"
+#include "reg.h"
+
+static u8 mlxsw_sp_dcbnl_getdcbx(struct net_device __always_unused *dev)
+{
+ return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+}
+
+static u8 mlxsw_sp_dcbnl_setdcbx(struct net_device __always_unused *dev,
+ u8 mode)
+{
+ return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0;
+}
+
+static int mlxsw_sp_dcbnl_ieee_getets(struct net_device *dev,
+ struct ieee_ets *ets)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ memcpy(ets, mlxsw_sp_port->dcb.ets, sizeof(*ets));
+
+ return 0;
+}
+
+static int mlxsw_sp_port_ets_validate(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ieee_ets *ets)
+{
+ struct net_device *dev = mlxsw_sp_port->dev;
+ bool has_ets_tc = false;
+ int i, tx_bw_sum = 0;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_STRICT:
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ has_ets_tc = true;
+ tx_bw_sum += ets->tc_tx_bw[i];
+ break;
+ default:
+ netdev_err(dev, "Only strict priority and ETS are supported\n");
+ return -EINVAL;
+ }
+
+ if (ets->prio_tc[i] >= IEEE_8021QAZ_MAX_TCS) {
+ netdev_err(dev, "Invalid TC\n");
+ return -EINVAL;
+ }
+ }
+
+ if (has_ets_tc && tx_bw_sum != 100) {
+ netdev_err(dev, "Total ETS bandwidth should equal 100\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 *prio_tc)
+{
+ char pptb_pl[MLXSW_REG_PPTB_LEN];
+ int i;
+
+ mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port);
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, prio_tc[i]);
+ return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb),
+ pptb_pl);
+}
+
+static bool mlxsw_sp_ets_has_pg(u8 *prio_tc, u8 pg)
+{
+ int i;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ if (prio_tc[i] == pg)
+ return true;
+ return false;
+}
+
+static int mlxsw_sp_port_pg_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 *old_prio_tc, u8 *new_prio_tc)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char pbmc_pl[MLXSW_REG_PBMC_LEN];
+ int err, i;
+
+ mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+ if (err)
+ return err;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ u8 pg = old_prio_tc[i];
+
+ if (!mlxsw_sp_ets_has_pg(new_prio_tc, pg))
+ mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg, 0);
+ }
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl);
+}
+
+static int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ieee_ets *ets)
+{
+ bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
+ struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets;
+ struct net_device *dev = mlxsw_sp_port->dev;
+ int err;
+
+ /* Create the required PGs, but don't destroy existing ones, as
+ * traffic is still directed to them.
+ */
+ err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
+ ets->prio_tc, pause_en,
+ mlxsw_sp_port->dcb.pfc);
+ if (err) {
+ netdev_err(dev, "Failed to configure port's headroom\n");
+ return err;
+ }
+
+ err = mlxsw_sp_port_pg_prio_map(mlxsw_sp_port, ets->prio_tc);
+ if (err) {
+ netdev_err(dev, "Failed to set PG-priority mapping\n");
+ goto err_port_prio_pg_map;
+ }
+
+ err = mlxsw_sp_port_pg_destroy(mlxsw_sp_port, my_ets->prio_tc,
+ ets->prio_tc);
+ if (err)
+ netdev_warn(dev, "Failed to remove ununsed PGs\n");
+
+ return 0;
+
+err_port_prio_pg_map:
+ mlxsw_sp_port_pg_destroy(mlxsw_sp_port, ets->prio_tc, my_ets->prio_tc);
+ return err;
+}
+
+static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ieee_ets *ets)
+{
+ struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets;
+ struct net_device *dev = mlxsw_sp_port->dev;
+ int i, err;
+
+ /* Egress configuration. */
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ bool dwrr = ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS;
+ u8 weight = ets->tc_tx_bw[i];
+
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+ 0, dwrr, weight);
+ if (err) {
+ netdev_err(dev, "Failed to link subgroup ETS element %d to group\n",
+ i);
+ goto err_port_ets_set;
+ }
+ }
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
+ ets->prio_tc[i]);
+ if (err) {
+ netdev_err(dev, "Failed to map prio %d to TC %d\n", i,
+ ets->prio_tc[i]);
+ goto err_port_prio_tc_set;
+ }
+ }
+
+ /* Ingress configuration. */
+ err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, ets);
+ if (err)
+ goto err_port_headroom_set;
+
+ return 0;
+
+err_port_headroom_set:
+ i = IEEE_8021QAZ_MAX_TCS;
+err_port_prio_tc_set:
+ for (i--; i >= 0; i--)
+ mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, my_ets->prio_tc[i]);
+ i = IEEE_8021QAZ_MAX_TCS;
+err_port_ets_set:
+ for (i--; i >= 0; i--) {
+ bool dwrr = my_ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS;
+ u8 weight = my_ets->tc_tx_bw[i];
+
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+ 0, dwrr, weight);
+ }
+ return err;
+}
+
+static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev,
+ struct ieee_ets *ets)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int err;
+
+ err = mlxsw_sp_port_ets_validate(mlxsw_sp_port, ets);
+ if (err)
+ return err;
+
+ err = __mlxsw_sp_dcbnl_ieee_setets(mlxsw_sp_port, ets);
+ if (err)
+ return err;
+
+ memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets));
+
+ return 0;
+}
+
+static int mlxsw_sp_dcbnl_ieee_getmaxrate(struct net_device *dev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+ memcpy(maxrate, mlxsw_sp_port->dcb.maxrate, sizeof(*maxrate));
+
+ return 0;
+}
+
+static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev,
+ struct ieee_maxrate *maxrate)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ struct ieee_maxrate *my_maxrate = mlxsw_sp_port->dcb.maxrate;
+ int err, i;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ i, 0,
+ maxrate->tc_maxrate[i]);
+ if (err) {
+ netdev_err(dev, "Failed to set maxrate for TC %d\n", i);
+ goto err_port_ets_maxrate_set;
+ }
+ }
+
+ memcpy(mlxsw_sp_port->dcb.maxrate, maxrate, sizeof(*maxrate));
+
+ return 0;
+
+err_port_ets_maxrate_set:
+ for (i--; i >= 0; i--)
+ mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ i, 0, my_maxrate->tc_maxrate[i]);
+ return err;
+}
+
+static int mlxsw_sp_port_pfc_cnt_get(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 prio)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct ieee_pfc *my_pfc = mlxsw_sp_port->dcb.pfc;
+ char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+ int err;
+
+ mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port,
+ MLXSW_REG_PPCNT_PRIO_CNT, prio);
+ err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl);
+ if (err)
+ return err;
+
+ my_pfc->requests[prio] = mlxsw_reg_ppcnt_tx_pause_get(ppcnt_pl);
+ my_pfc->indications[prio] = mlxsw_reg_ppcnt_rx_pause_get(ppcnt_pl);
+
+ return 0;
+}
+
+static int mlxsw_sp_dcbnl_ieee_getpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int err, i;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+ err = mlxsw_sp_port_pfc_cnt_get(mlxsw_sp_port, i);
+ if (err) {
+ netdev_err(dev, "Failed to get PFC count for priority %d\n",
+ i);
+ return err;
+ }
+ }
+
+ memcpy(pfc, mlxsw_sp_port->dcb.pfc, sizeof(*pfc));
+
+ return 0;
+}
+
+static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct ieee_pfc *pfc)
+{
+ char pfcc_pl[MLXSW_REG_PFCC_LEN];
+
+ mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port);
+ mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en);
+
+ return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc),
+ pfcc_pl);
+}
+
+static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev,
+ struct ieee_pfc *pfc)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+ int err;
+
+ if (mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) {
+ netdev_err(dev, "PAUSE frames already enabled on port\n");
+ return -EINVAL;
+ }
+
+ err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
+ mlxsw_sp_port->dcb.ets->prio_tc,
+ false, pfc);
+ if (err) {
+ netdev_err(dev, "Failed to configure port's headroom for PFC\n");
+ return err;
+ }
+
+ err = mlxsw_sp_port_pfc_set(mlxsw_sp_port, pfc);
+ if (err) {
+ netdev_err(dev, "Failed to configure PFC\n");
+ goto err_port_pfc_set;
+ }
+
+ memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc));
+
+ return 0;
+
+err_port_pfc_set:
+ __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
+ mlxsw_sp_port->dcb.ets->prio_tc, false,
+ mlxsw_sp_port->dcb.pfc);
+ return err;
+}
+
+static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = {
+ .ieee_getets = mlxsw_sp_dcbnl_ieee_getets,
+ .ieee_setets = mlxsw_sp_dcbnl_ieee_setets,
+ .ieee_getmaxrate = mlxsw_sp_dcbnl_ieee_getmaxrate,
+ .ieee_setmaxrate = mlxsw_sp_dcbnl_ieee_setmaxrate,
+ .ieee_getpfc = mlxsw_sp_dcbnl_ieee_getpfc,
+ .ieee_setpfc = mlxsw_sp_dcbnl_ieee_setpfc,
+
+ .getdcbx = mlxsw_sp_dcbnl_getdcbx,
+ .setdcbx = mlxsw_sp_dcbnl_setdcbx,
+};
+
+static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ mlxsw_sp_port->dcb.ets = kzalloc(sizeof(*mlxsw_sp_port->dcb.ets),
+ GFP_KERNEL);
+ if (!mlxsw_sp_port->dcb.ets)
+ return -ENOMEM;
+
+ mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS;
+
+ return 0;
+}
+
+static void mlxsw_sp_port_ets_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ kfree(mlxsw_sp_port->dcb.ets);
+}
+
+static int mlxsw_sp_port_maxrate_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int i;
+
+ mlxsw_sp_port->dcb.maxrate = kmalloc(sizeof(*mlxsw_sp_port->dcb.maxrate),
+ GFP_KERNEL);
+ if (!mlxsw_sp_port->dcb.maxrate)
+ return -ENOMEM;
+
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ mlxsw_sp_port->dcb.maxrate->tc_maxrate[i] = MLXSW_REG_QEEC_MAS_DIS;
+
+ return 0;
+}
+
+static void mlxsw_sp_port_maxrate_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ kfree(mlxsw_sp_port->dcb.maxrate);
+}
+
+static int mlxsw_sp_port_pfc_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ mlxsw_sp_port->dcb.pfc = kzalloc(sizeof(*mlxsw_sp_port->dcb.pfc),
+ GFP_KERNEL);
+ if (!mlxsw_sp_port->dcb.pfc)
+ return -ENOMEM;
+
+ mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS;
+
+ return 0;
+}
+
+static void mlxsw_sp_port_pfc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ kfree(mlxsw_sp_port->dcb.pfc);
+}
+
+int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ int err;
+
+ err = mlxsw_sp_port_ets_init(mlxsw_sp_port);
+ if (err)
+ return err;
+ err = mlxsw_sp_port_maxrate_init(mlxsw_sp_port);
+ if (err)
+ goto err_port_maxrate_init;
+ err = mlxsw_sp_port_pfc_init(mlxsw_sp_port);
+ if (err)
+ goto err_port_pfc_init;
+
+ mlxsw_sp_port->dev->dcbnl_ops = &mlxsw_sp_dcbnl_ops;
+
+ return 0;
+
+err_port_pfc_init:
+ mlxsw_sp_port_maxrate_fini(mlxsw_sp_port);
+err_port_maxrate_init:
+ mlxsw_sp_port_ets_fini(mlxsw_sp_port);
+ return err;
+}
+
+void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ mlxsw_sp_port_pfc_fini(mlxsw_sp_port);
+ mlxsw_sp_port_maxrate_fini(mlxsw_sp_port);
+ mlxsw_sp_port_ets_fini(mlxsw_sp_port);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 7a60a26759b6..c49447f31acc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -518,7 +518,8 @@ static void mlxsw_sx_port_get_stats(struct net_device *dev,
int i;
int err;
- mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port);
+ mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port,
+ MLXSW_REG_PPCNT_IEEE_8023_CNT, 0);
err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ppcnt), ppcnt_pl);
for (i = 0; i < MLXSW_SX_PORT_HW_STATS_LEN; i++)
data[i] = !err ? mlxsw_sx_port_hw_stats[i].getter(ppcnt_pl) : 0;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 1c0fa364323e..51cccddfe403 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1192,6 +1192,45 @@ out:
unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
}
+static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
+ __be32 *protocol,
+ struct sk_buff *skb, u32 vxflags)
+{
+ struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
+
+ /* Need to have Next Protocol set for interfaces in GPE mode. */
+ if (!gpe->np_applied)
+ return false;
+ /* "The initial version is 0. If a receiver does not support the
+ * version indicated it MUST drop the packet.
+ */
+ if (gpe->version != 0)
+ return false;
+ /* "When the O bit is set to 1, the packet is an OAM packet and OAM
+ * processing MUST occur." However, we don't implement OAM
+ * processing, thus drop the packet.
+ */
+ if (gpe->oam_flag)
+ return false;
+
+ switch (gpe->next_protocol) {
+ case VXLAN_GPE_NP_IPV4:
+ *protocol = htons(ETH_P_IP);
+ break;
+ case VXLAN_GPE_NP_IPV6:
+ *protocol = htons(ETH_P_IPV6);
+ break;
+ case VXLAN_GPE_NP_ETHERNET:
+ *protocol = htons(ETH_P_TEB);
+ break;
+ default:
+ return false;
+ }
+
+ unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
+ return true;
+}
+
static bool vxlan_set_mac(struct vxlan_dev *vxlan,
struct vxlan_sock *vs,
struct sk_buff *skb)
@@ -1257,9 +1296,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
struct vxlanhdr unparsed;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
+ __be32 protocol = htons(ETH_P_TEB);
+ bool raw_proto = false;
void *oiph;
- /* Need Vxlan and inner Ethernet header to be present */
+ /* Need UDP and VXLAN header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN))
return 1;
@@ -1283,9 +1324,18 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
if (!vxlan)
goto drop;
- if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
- !net_eq(vxlan->net, dev_net(vxlan->dev))))
- goto drop;
+ /* For backwards compatibility, only allow reserved fields to be
+ * used by VXLAN extensions if explicitly requested.
+ */
+ if (vs->flags & VXLAN_F_GPE) {
+ if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
+ goto drop;
+ raw_proto = true;
+ }
+
+ if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
+ !net_eq(vxlan->net, dev_net(vxlan->dev))))
+ goto drop;
if (vxlan_collect_metadata(vs)) {
__be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
@@ -1304,14 +1354,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
memset(md, 0, sizeof(*md));
}
- /* For backwards compatibility, only allow reserved fields to be
- * used by VXLAN extensions if explicitly requested.
- */
if (vs->flags & VXLAN_F_REMCSUM_RX)
if (!vxlan_remcsum(&unparsed, skb, vs->flags))
goto drop;
if (vs->flags & VXLAN_F_GBP)
vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
+ /* Note that GBP and GPE can never be active together. This is
+ * ensured in vxlan_dev_configure.
+ */
if (unparsed.vx_flags || unparsed.vx_vni) {
/* If there are any unprocessed flags remaining treat
@@ -1325,8 +1375,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
goto drop;
}
- if (!vxlan_set_mac(vxlan, vs, skb))
- goto drop;
+ if (!raw_proto) {
+ if (!vxlan_set_mac(vxlan, vs, skb))
+ goto drop;
+ } else {
+ skb->dev = vxlan->dev;
+ skb->pkt_type = PACKET_HOST;
+ }
oiph = skb_network_header(skb);
skb_reset_network_header(skb);
@@ -1685,6 +1740,27 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
}
+static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
+ __be16 protocol)
+{
+ struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;
+
+ gpe->np_applied = 1;
+
+ switch (protocol) {
+ case htons(ETH_P_IP):
+ gpe->next_protocol = VXLAN_GPE_NP_IPV4;
+ return 0;
+ case htons(ETH_P_IPV6):
+ gpe->next_protocol = VXLAN_GPE_NP_IPV6;
+ return 0;
+ case htons(ETH_P_TEB):
+ gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
+ return 0;
+ }
+ return -EPFNOSUPPORT;
+}
+
static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
int iphdr_len, __be32 vni,
struct vxlan_metadata *md, u32 vxflags,
@@ -1694,6 +1770,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
int min_headroom;
int err;
int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ __be16 inner_protocol = htons(ETH_P_TEB);
if ((vxflags & VXLAN_F_REMCSUM_TX) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1712,10 +1789,8 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
/* Need space for new headers (invalidates iph ptr) */
err = skb_cow_head(skb, min_headroom);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
+ if (unlikely(err))
+ goto out_free;
skb = vlan_hwaccel_push_inside(skb);
if (WARN_ON(!skb))
@@ -1744,9 +1819,19 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
if (vxflags & VXLAN_F_GBP)
vxlan_build_gbp_hdr(vxh, vxflags, md);
+ if (vxflags & VXLAN_F_GPE) {
+ err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
+ if (err < 0)
+ goto out_free;
+ inner_protocol = skb->protocol;
+ }
- skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+ skb_set_inner_protocol(skb, inner_protocol);
return 0;
+
+out_free:
+ kfree_skb(skb);
+ return err;
}
static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
@@ -2106,9 +2191,17 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
info = skb_tunnel_info(skb);
skb_reset_mac_header(skb);
- eth = eth_hdr(skb);
- if ((vxlan->flags & VXLAN_F_PROXY)) {
+ if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
+ if (info && info->mode & IP_TUNNEL_INFO_TX)
+ vxlan_xmit_one(skb, dev, NULL, false);
+ else
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+
+ if (vxlan->flags & VXLAN_F_PROXY) {
+ eth = eth_hdr(skb);
if (ntohs(eth->h_proto) == ETH_P_ARP)
return arp_reduce(dev, skb);
#if IS_ENABLED(CONFIG_IPV6)
@@ -2123,18 +2216,10 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
return neigh_reduce(dev, skb);
}
- eth = eth_hdr(skb);
#endif
}
- if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
- if (info && info->mode & IP_TUNNEL_INFO_TX)
- vxlan_xmit_one(skb, dev, NULL, false);
- else
- kfree_skb(skb);
- return NETDEV_TX_OK;
- }
-
+ eth = eth_hdr(skb);
f = vxlan_find_mac(vxlan, eth->h_dest);
did_rsc = false;
@@ -2404,7 +2489,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
return 0;
}
-static const struct net_device_ops vxlan_netdev_ops = {
+static const struct net_device_ops vxlan_netdev_ether_ops = {
.ndo_init = vxlan_init,
.ndo_uninit = vxlan_uninit,
.ndo_open = vxlan_open,
@@ -2421,6 +2506,17 @@ static const struct net_device_ops vxlan_netdev_ops = {
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};
+static const struct net_device_ops vxlan_netdev_raw_ops = {
+ .ndo_init = vxlan_init,
+ .ndo_uninit = vxlan_uninit,
+ .ndo_open = vxlan_open,
+ .ndo_stop = vxlan_stop,
+ .ndo_start_xmit = vxlan_xmit,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_change_mtu = vxlan_change_mtu,
+ .ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
+};
+
/* Info for udev, that this is a virtual tunnel endpoint */
static struct device_type vxlan_type = {
.name = "vxlan",
@@ -2458,10 +2554,6 @@ static void vxlan_setup(struct net_device *dev)
struct vxlan_dev *vxlan = netdev_priv(dev);
unsigned int h;
- eth_hw_addr_random(dev);
- ether_setup(dev);
-
- dev->netdev_ops = &vxlan_netdev_ops;
dev->destructor = free_netdev;
SET_NETDEV_DEVTYPE(dev, &vxlan_type);
@@ -2476,8 +2568,7 @@ static void vxlan_setup(struct net_device *dev)
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
netif_keep_dst(dev);
- dev->priv_flags &= ~IFF_TX_SKB_SHARING;
- dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
+ dev->priv_flags |= IFF_NO_QUEUE;
INIT_LIST_HEAD(&vxlan->next);
spin_lock_init(&vxlan->hash_lock);
@@ -2496,6 +2587,26 @@ static void vxlan_setup(struct net_device *dev)
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
}
+static void vxlan_ether_setup(struct net_device *dev)
+{
+ eth_hw_addr_random(dev);
+ ether_setup(dev);
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ dev->netdev_ops = &vxlan_netdev_ether_ops;
+}
+
+static void vxlan_raw_setup(struct net_device *dev)
+{
+ dev->type = ARPHRD_NONE;
+ dev->hard_header_len = 0;
+ dev->addr_len = 0;
+ dev->mtu = ETH_DATA_LEN;
+ dev->tx_queue_len = 1000;
+ dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
+ dev->netdev_ops = &vxlan_netdev_raw_ops;
+}
+
static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_ID] = { .type = NLA_U32 },
[IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
@@ -2522,6 +2633,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
[IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
[IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
+ [IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
};
@@ -2722,6 +2834,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
__be16 default_port = vxlan->cfg.dst_port;
struct net_device *lowerdev = NULL;
+ if (conf->flags & VXLAN_F_GPE) {
+ if (conf->flags & ~VXLAN_F_ALLOWED_GPE)
+ return -EINVAL;
+ /* For now, allow GPE only together with COLLECT_METADATA.
+ * This can be relaxed later; in such case, the other side
+ * of the PtP link will have to be provided.
+ */
+ if (!(conf->flags & VXLAN_F_COLLECT_METADATA))
+ return -EINVAL;
+
+ vxlan_raw_setup(dev);
+ } else {
+ vxlan_ether_setup(dev);
+ }
+
vxlan->net = src_net;
dst->remote_vni = conf->vni;
@@ -2783,8 +2910,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
dev->needed_headroom = needed_headroom;
memcpy(&vxlan->cfg, conf, sizeof(*conf));
- if (!vxlan->cfg.dst_port)
- vxlan->cfg.dst_port = default_port;
+ if (!vxlan->cfg.dst_port) {
+ if (conf->flags & VXLAN_F_GPE)
+ vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */
+ else
+ vxlan->cfg.dst_port = default_port;
+ }
vxlan->flags |= conf->flags;
if (!vxlan->cfg.age_interval)
@@ -2955,6 +3086,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
if (data[IFLA_VXLAN_GBP])
conf.flags |= VXLAN_F_GBP;
+ if (data[IFLA_VXLAN_GPE])
+ conf.flags |= VXLAN_F_GPE;
+
if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;
@@ -2971,6 +3105,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
case -EEXIST:
pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
break;
+
+ case -EINVAL:
+ pr_info("unsupported combination of extensions\n");
+ break;
}
return err;
@@ -3098,6 +3236,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_flag(skb, IFLA_VXLAN_GBP))
goto nla_put_failure;
+ if (vxlan->flags & VXLAN_F_GPE &&
+ nla_put_flag(skb, IFLA_VXLAN_GPE))
+ goto nla_put_failure;
+
if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL &&
nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
goto nla_put_failure;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 56050f913339..16435d8b1f93 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -295,8 +295,15 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
return INET_ECN_encapsulate(tos, inner);
}
-int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
- bool xnet);
+int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
+ __be16 inner_proto, bool raw_proto, bool xnet);
+
+static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
+ __be16 inner_proto, bool xnet)
+{
+ return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet);
+}
+
void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, u8 proto,
u8 tos, u8 ttl, __be16 df, bool xnet);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 73ed2e951c02..dcc6f4057115 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -119,6 +119,64 @@ struct vxlanhdr_gbp {
#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16)
#define VXLAN_GBP_ID_MASK (0xFFFF)
+/*
+ * VXLAN Generic Protocol Extension (VXLAN_F_GPE):
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|Ver|I|P|R|O| Reserved |Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VXLAN Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Ver = Version. Indicates VXLAN GPE protocol version.
+ *
+ * P = Next Protocol Bit. The P bit is set to indicate that the
+ * Next Protocol field is present.
+ *
+ * O = OAM Flag Bit. The O bit is set to indicate that the packet
+ * is an OAM packet.
+ *
+ * Next Protocol = This 8 bit field indicates the protocol header
+ * immediately following the VXLAN GPE header.
+ *
+ * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01
+ */
+
+struct vxlanhdr_gpe {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 oam_flag:1,
+ reserved_flags1:1,
+ np_applied:1,
+ instance_applied:1,
+ version:2,
+reserved_flags2:2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ u8 reserved_flags2:2,
+ version:2,
+ instance_applied:1,
+ np_applied:1,
+ reserved_flags1:1,
+ oam_flag:1;
+#endif
+ u8 reserved_flags3;
+ u8 reserved_flags4;
+ u8 next_protocol;
+ __be32 vx_vni;
+};
+
+/* VXLAN-GPE header flags. */
+#define VXLAN_HF_VER cpu_to_be32(BIT(29) | BIT(28))
+#define VXLAN_HF_NP cpu_to_be32(BIT(26))
+#define VXLAN_HF_OAM cpu_to_be32(BIT(24))
+
+#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
+ cpu_to_be32(0xff))
+
+/* VXLAN-GPE header Next Protocol. */
+#define VXLAN_GPE_NP_IPV4 0x01
+#define VXLAN_GPE_NP_IPV6 0x02
+#define VXLAN_GPE_NP_ETHERNET 0x03
+#define VXLAN_GPE_NP_NSH 0x04
+
struct vxlan_metadata {
u32 gbp;
};
@@ -206,16 +264,26 @@ struct vxlan_dev {
#define VXLAN_F_GBP 0x800
#define VXLAN_F_REMCSUM_NOPARTIAL 0x1000
#define VXLAN_F_COLLECT_METADATA 0x2000
+#define VXLAN_F_GPE 0x4000
/* Flags that are used in the receive path. These flags must match in
* order for a socket to be shareable
*/
#define VXLAN_F_RCV_FLAGS (VXLAN_F_GBP | \
+ VXLAN_F_GPE | \
VXLAN_F_UDP_ZERO_CSUM6_RX | \
VXLAN_F_REMCSUM_RX | \
VXLAN_F_REMCSUM_NOPARTIAL | \
VXLAN_F_COLLECT_METADATA)
+/* Flags that can be set together with VXLAN_F_GPE. */
+#define VXLAN_F_ALLOWED_GPE (VXLAN_F_GPE | \
+ VXLAN_F_IPV6 | \
+ VXLAN_F_UDP_ZERO_CSUM_TX | \
+ VXLAN_F_UDP_ZERO_CSUM6_TX | \
+ VXLAN_F_UDP_ZERO_CSUM6_RX | \
+ VXLAN_F_COLLECT_METADATA)
+
struct net_device *vxlan_dev_create(struct net *net, const char *name,
u8 name_assign_type, struct vxlan_config *conf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c488066fb53a..9427f17d06d6 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -488,6 +488,7 @@ enum {
IFLA_VXLAN_REMCSUM_NOPARTIAL,
IFLA_VXLAN_COLLECT_METADATA,
IFLA_VXLAN_LABEL,
+ IFLA_VXLAN_GPE,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index b3ab1205dfdf..43445df61efd 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -86,15 +86,15 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(iptunnel_xmit);
-int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
- bool xnet)
+int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
+ __be16 inner_proto, bool raw_proto, bool xnet)
{
if (unlikely(!pskb_may_pull(skb, hdr_len)))
return -ENOMEM;
skb_pull_rcsum(skb, hdr_len);
- if (inner_proto == htons(ETH_P_TEB)) {
+ if (!raw_proto && inner_proto == htons(ETH_P_TEB)) {
struct ethhdr *eh;
if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
@@ -117,7 +117,7 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
return iptunnel_pull_offloads(skb);
}
-EXPORT_SYMBOL_GPL(iptunnel_pull_header);
+EXPORT_SYMBOL_GPL(__iptunnel_pull_header);
struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
gfp_t flags)