diff options
31 files changed, 1087 insertions, 409 deletions
diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h index 03da369675c6..cae76f5e7787 100644 --- a/drivers/net/dsa/microchip/ksz8.h +++ b/drivers/net/dsa/microchip/ksz8.h @@ -7,7 +7,8 @@ #ifndef __KSZ8XXX_H #define __KSZ8XXX_H -#include <linux/kernel.h> + +#include <linux/types.h> enum ksz_regs { REG_IND_CTRL_0, diff --git a/drivers/net/ethernet/altera/altera_utils.h b/drivers/net/ethernet/altera/altera_utils.h index b7d772f2dcbb..3c2e32fb7389 100644 --- a/drivers/net/ethernet/altera/altera_utils.h +++ b/drivers/net/ethernet/altera/altera_utils.h @@ -3,11 +3,12 @@ * Copyright (C) 2014 Altera Corporation. All rights reserved */ -#include <linux/kernel.h> - #ifndef __ALTERA_UTILS_H__ #define __ALTERA_UTILS_H__ +#include <linux/compiler.h> +#include <linux/types.h> + void tse_set_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask); void tse_clear_bit(void __iomem *ioaddr, size_t offs, u32 bit_mask); int tse_bit_is_set(void __iomem *ioaddr, size_t offs, u32 bit_mask); diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 9559c16078f9..e6cb20aaa76a 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -434,7 +434,7 @@ int gem_get_hwtst(struct net_device *dev, struct ifreq *rq) return 0; } -static int gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) +static void gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) { u32 reg_val; @@ -444,8 +444,6 @@ static int gem_ptp_set_one_step_sync(struct macb *bp, u8 enable) macb_writel(bp, NCR, reg_val | MACB_BIT(OSSMODE)); else macb_writel(bp, NCR, reg_val & ~MACB_BIT(OSSMODE)); - - return 0; } int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -468,8 +466,7 @@ int gem_set_hwtst(struct net_device *dev, struct ifreq *ifr, int cmd) case HWTSTAMP_TX_OFF: break; case HWTSTAMP_TX_ONESTEP_SYNC: - if (gem_ptp_set_one_step_sync(bp, 1) != 0) - return -ERANGE; + gem_ptp_set_one_step_sync(bp, 1); tx_bd_control = TSTAMP_ALL_FRAMES; break; case HWTSTAMP_TX_ON: diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 18558a019353..57f4ec4f8d2f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -565,6 +565,7 @@ struct i40e_pf { #define I40E_FLAG_DISABLE_FW_LLDP BIT(24) #define I40E_FLAG_RS_FEC BIT(25) #define I40E_FLAG_BASE_R_FEC BIT(26) +#define I40E_FLAG_VF_VLAN_PRUNING BIT(27) /* TOTAL_PORT_SHUTDOWN * Allows to physically disable the link on the NIC's port. * If enabled, (after link down request from the OS) diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 610f00cbaff9..c65e9e2dcb42 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -457,6 +457,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = { I40E_PRIV_FLAG("disable-fw-lldp", I40E_FLAG_DISABLE_FW_LLDP, 0), I40E_PRIV_FLAG("rs-fec", I40E_FLAG_RS_FEC, 0), I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0), + I40E_PRIV_FLAG("vf-vlan-pruning", + I40E_FLAG_VF_VLAN_PRUNING, 0), }; #define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags) @@ -5285,6 +5287,13 @@ flags_complete: return -EOPNOTSUPP; } + if ((changed_flags & I40E_FLAG_VF_VLAN_PRUNING) && + pf->num_alloc_vfs) { + dev_warn(&pf->pdev->dev, + "Changing vf-vlan-pruning flag while VF(s) are active is not supported\n"); + return -EOPNOTSUPP; + } + if ((changed_flags & new_flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && (new_flags & I40E_FLAG_MFP_ENABLED)) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 332a608dbaa6..1599ac538e7f 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1369,6 +1369,114 @@ static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, } /** + * i40e_get_vf_new_vlan - Get new vlan id on a vf + * @vsi: the vsi to configure + * @new_mac: new mac filter to be added + * @f: existing mac filter, replaced with new_mac->f if new_mac is not NULL + * @vlan_filters: the number of active VLAN filters + * @trusted: flag if the VF is trusted + * + * Get new VLAN id based on current VLAN filters, trust, PVID + * and vf-vlan-prune-disable flag. + * + * Returns the value of the new vlan filter or + * the old value if no new filter is needed. + */ +static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi, + struct i40e_new_mac_filter *new_mac, + struct i40e_mac_filter *f, + int vlan_filters, + bool trusted) +{ + s16 pvid = le16_to_cpu(vsi->info.pvid); + struct i40e_pf *pf = vsi->back; + bool is_any; + + if (new_mac) + f = new_mac->f; + + if (pvid && f->vlan != pvid) + return pvid; + + is_any = (trusted || + !(pf->flags & I40E_FLAG_VF_VLAN_PRUNING)); + + if ((vlan_filters && f->vlan == I40E_VLAN_ANY) || + (!is_any && !vlan_filters && f->vlan == I40E_VLAN_ANY) || + (is_any && !vlan_filters && f->vlan == 0)) { + if (is_any) + return I40E_VLAN_ANY; + else + return 0; + } + + return f->vlan; +} + +/** + * i40e_correct_vf_mac_vlan_filters - Correct non-VLAN VF filters if necessary + * @vsi: the vsi to configure + * @tmp_add_list: list of filters ready to be added + * @tmp_del_list: list of filters ready to be deleted + * @vlan_filters: the number of active VLAN filters + * @trusted: flag if the VF is trusted + * + * Correct VF VLAN filters based on current VLAN filters, trust, PVID + * and vf-vlan-prune-disable flag. + * + * In case of memory allocation failure return -ENOMEM. Otherwise, return 0. + * + * This function is only expected to be called from within + * i40e_sync_vsi_filters. + * + * NOTE: This function expects to be called while under the + * mac_filter_hash_lock + */ +static int i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi, + struct hlist_head *tmp_add_list, + struct hlist_head *tmp_del_list, + int vlan_filters, + bool trusted) +{ + struct i40e_mac_filter *f, *add_head; + struct i40e_new_mac_filter *new_mac; + struct hlist_node *h; + int bkt, new_vlan; + + hlist_for_each_entry(new_mac, tmp_add_list, hlist) { + new_mac->f->vlan = i40e_get_vf_new_vlan(vsi, new_mac, NULL, + vlan_filters, trusted); + } + + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { + new_vlan = i40e_get_vf_new_vlan(vsi, NULL, f, vlan_filters, + trusted); + if (new_vlan != f->vlan) { + add_head = i40e_add_filter(vsi, f->macaddr, new_vlan); + if (!add_head) + return -ENOMEM; + /* Create a temporary i40e_new_mac_filter */ + new_mac = kzalloc(sizeof(*new_mac), GFP_ATOMIC); + if (!new_mac) + return -ENOMEM; + new_mac->f = add_head; + new_mac->state = add_head->state; + + /* Add the new filter to the tmp list */ + hlist_add_head(&new_mac->hlist, tmp_add_list); + + /* Put the original filter into the delete list */ + f->state = I40E_FILTER_REMOVE; + hash_del(&f->hlist); + hlist_add_head(&f->hlist, tmp_del_list); + } + } + + vsi->has_vlan_filter = !!vlan_filters; + return 0; +} + +/** * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM * @vsi: the PF Main VSI - inappropriate for any other VSI * @macaddr: the MAC address @@ -2423,10 +2531,14 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) vlan_filters++; } - retval = i40e_correct_mac_vlan_filters(vsi, - &tmp_add_list, - &tmp_del_list, - vlan_filters); + if (vsi->type != I40E_VSI_SRIOV) + retval = i40e_correct_mac_vlan_filters + (vsi, &tmp_add_list, &tmp_del_list, + vlan_filters); + else + retval = i40e_correct_vf_mac_vlan_filters + (vsi, &tmp_add_list, &tmp_del_list, + vlan_filters, pf->vf[vsi->vf_id].trusted); hlist_for_each_entry(new, &tmp_add_list, hlist) netdev_hw_addr_refcnt(new->f, vsi->netdev, 1); @@ -2855,8 +2967,21 @@ int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid) int bkt; hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { - if (f->state == I40E_FILTER_REMOVE) + /* If we're asked to add a filter that has been marked for + * removal, it is safe to simply restore it to active state. + * __i40e_del_filter will have simply deleted any filters which + * were previously marked NEW or FAILED, so if it is currently + * marked REMOVE it must have previously been ACTIVE. Since we + * haven't yet run the sync filters task, just restore this + * filter to the ACTIVE state so that the sync task leaves it + * in place. + */ + if (f->state == I40E_FILTER_REMOVE && f->vlan == vid) { + f->state = I40E_FILTER_ACTIVE; + continue; + } else if (f->state == I40E_FILTER_REMOVE) { continue; + } add_f = i40e_add_filter(vsi, f->macaddr, vid); if (!add_f) { dev_info(&vsi->back->pdev->dev, diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 2606e8f0f19b..9949469333d5 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -4349,6 +4349,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; + i40e_vlan_stripping_enable(vsi); i40e_vc_reset_vf(vf, true); /* During reset the VF got a new VSI, so refresh a pointer. */ vsi = pf->vsi[vf->lan_vsi_idx]; @@ -4364,7 +4365,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, * MAC addresses deleted. */ if ((!(vlan_id || qos) || - vlanprio != le16_to_cpu(vsi->info.pvid)) && + vlanprio != le16_to_cpu(vsi->info.pvid)) && vsi->info.pvid) { ret = i40e_add_vlan_all_mac(vsi, I40E_VLAN_ANY); if (ret) { @@ -4727,6 +4728,11 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) goto out; vf->trusted = setting; + + /* request PF to sync mac/vlan filters for the VF */ + set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); + pf->vsi[vf->lan_vsi_idx]->flags |= I40E_VSI_FLAG_FILTER_CHANGED; + i40e_vc_reset_vf(vf, true); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 49aed3e506a6..fda1198d2c00 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -146,7 +146,8 @@ struct iavf_mac_filter { u8 remove:1; /* filter needs to be removed */ u8 add:1; /* filter needs to be added */ u8 is_primary:1; /* filter is a default VF MAC */ - u8 padding:4; + u8 add_handled:1; /* received response for filter add */ + u8 padding:3; }; }; @@ -248,6 +249,7 @@ struct iavf_adapter { struct work_struct adminq_task; struct delayed_work client_task; wait_queue_head_t down_waitqueue; + wait_queue_head_t vc_waitqueue; struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; struct list_head mac_filter_list; @@ -292,6 +294,7 @@ struct iavf_adapter { #define IAVF_FLAG_QUEUES_DISABLED BIT(17) #define IAVF_FLAG_SETUP_NETDEV_FEATURES BIT(18) #define IAVF_FLAG_REINIT_MSIX_NEEDED BIT(20) +#define IAVF_FLAG_INITIAL_MAC_SET BIT(23) /* duplicates for common code */ #define IAVF_FLAG_DCB_ENABLED 0 /* flags for admin queue service task */ @@ -559,6 +562,8 @@ void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid); +int iavf_replace_primary_mac(struct iavf_adapter *adapter, + const u8 *new_mac); void iavf_set_vlan_offload_features(struct iavf_adapter *adapter, netdev_features_t prev_features, diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 7dfcf78b57fb..95772e17e5be 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -983,6 +983,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->mac_filter_list); f->add = true; + f->add_handled = false; f->is_new_mac = true; f->is_primary = false; adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; @@ -994,47 +995,132 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, } /** - * iavf_set_mac - NDO callback to set port mac address - * @netdev: network interface device structure - * @p: pointer to an address structure + * iavf_replace_primary_mac - Replace current primary address + * @adapter: board private structure + * @new_mac: new MAC address to be applied * - * Returns 0 on success, negative on failure + * Replace current dev_addr and send request to PF for removal of previous + * primary MAC address filter and addition of new primary MAC filter. + * Return 0 for success, -ENOMEM for failure. + * + * Do not call this with mac_vlan_list_lock! **/ -static int iavf_set_mac(struct net_device *netdev, void *p) +int iavf_replace_primary_mac(struct iavf_adapter *adapter, + const u8 *new_mac) { - struct iavf_adapter *adapter = netdev_priv(netdev); struct iavf_hw *hw = &adapter->hw; struct iavf_mac_filter *f; - struct sockaddr *addr = p; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - - if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) - return 0; spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry(f, &adapter->mac_filter_list, list) { + f->is_primary = false; + } + f = iavf_find_filter(adapter, hw->mac.addr); if (f) { f->remove = true; - f->is_primary = true; adapter->aq_required |= IAVF_FLAG_AQ_DEL_MAC_FILTER; } - f = iavf_add_filter(adapter, addr->sa_data); + f = iavf_add_filter(adapter, new_mac); + if (f) { + /* Always send the request to add if changing primary MAC + * even if filter is already present on the list + */ f->is_primary = true; - ether_addr_copy(hw->mac.addr, addr->sa_data); + f->add = true; + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; + ether_addr_copy(hw->mac.addr, new_mac); } spin_unlock_bh(&adapter->mac_vlan_list_lock); /* schedule the watchdog task to immediately process the request */ - if (f) + if (f) { queue_work(iavf_wq, &adapter->watchdog_task.work); + return 0; + } + return -ENOMEM; +} + +/** + * iavf_is_mac_set_handled - wait for a response to set MAC from PF + * @netdev: network interface device structure + * @macaddr: MAC address to set + * + * Returns true on success, false on failure + */ +static bool iavf_is_mac_set_handled(struct net_device *netdev, + const u8 *macaddr) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_mac_filter *f; + bool ret = false; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + f = iavf_find_filter(adapter, macaddr); + + if (!f || (!f->add && f->add_handled)) + ret = true; + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + return ret; +} + +/** + * iavf_set_mac - NDO callback to set port MAC address + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + */ +static int iavf_set_mac(struct net_device *netdev, void *p) +{ + struct iavf_adapter *adapter = netdev_priv(netdev); + struct sockaddr *addr = p; + bool handle_mac = iavf_is_mac_set_handled(netdev, addr->sa_data); + int ret; - return (f == NULL) ? -ENOMEM : 0; + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + ret = iavf_replace_primary_mac(adapter, addr->sa_data); + + if (ret) + return ret; + + /* If this is an initial set MAC during VF spawn do not wait */ + if (adapter->flags & IAVF_FLAG_INITIAL_MAC_SET) { + adapter->flags &= ~IAVF_FLAG_INITIAL_MAC_SET; + return 0; + } + + if (handle_mac) + goto done; + + ret = wait_event_interruptible_timeout(adapter->vc_waitqueue, false, msecs_to_jiffies(2500)); + + /* If ret < 0 then it means wait was interrupted. + * If ret == 0 then it means we got a timeout. + * else it means we got response for set MAC from PF, + * check if netdev MAC was updated to requested MAC, + * if yes then set MAC succeeded otherwise it failed return -EACCES + */ + if (ret < 0) + return ret; + + if (!ret) + return -EAGAIN; + +done: + if (!ether_addr_equal(netdev->dev_addr, addr->sa_data)) + return -EACCES; + + return 0; } /** @@ -2451,6 +2537,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter) ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } + adapter->flags |= IAVF_FLAG_INITIAL_MAC_SET; + adapter->tx_desc_count = IAVF_DEFAULT_TXD; adapter->rx_desc_count = IAVF_DEFAULT_RXD; err = iavf_init_interrupt_scheme(adapter); @@ -4681,6 +4769,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* Setup the wait queue for indicating transition to down status */ init_waitqueue_head(&adapter->down_waitqueue); + /* Setup the wait queue for indicating virtchannel events */ + init_waitqueue_head(&adapter->vc_waitqueue); + return 0; err_ioremap: diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 782450d5c12f..e2b4ba98f71e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -598,6 +598,8 @@ static void iavf_mac_add_ok(struct iavf_adapter *adapter) spin_lock_bh(&adapter->mac_vlan_list_lock); list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { f->is_new_mac = false; + if (!f->add && !f->add_handled) + f->add_handled = true; } spin_unlock_bh(&adapter->mac_vlan_list_lock); } @@ -618,6 +620,9 @@ static void iavf_mac_add_reject(struct iavf_adapter *adapter) if (f->remove && ether_addr_equal(f->macaddr, netdev->dev_addr)) f->remove = false; + if (!f->add && !f->add_handled) + f->add_handled = true; + if (f->is_new_mac) { list_del(&f->list); kfree(f); @@ -1932,6 +1937,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_mac_add_reject(adapter); /* restore administratively set MAC address */ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); + wake_up(&adapter->vc_waitqueue); break; case VIRTCHNL_OP_DEL_VLAN: dev_err(&adapter->pdev->dev, "Failed to delete VLAN filter, error %s\n", @@ -2091,7 +2097,13 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, if (!v_retval) iavf_mac_add_ok(adapter); if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr)) - eth_hw_addr_set(netdev, adapter->hw.mac.addr); + if (!ether_addr_equal(netdev->dev_addr, + adapter->hw.mac.addr)) { + netif_addr_lock_bh(netdev); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); + netif_addr_unlock_bh(netdev); + } + wake_up(&adapter->vc_waitqueue); break; case VIRTCHNL_OP_GET_STATS: { struct iavf_eth_stats *stats = @@ -2121,10 +2133,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, /* restore current mac address */ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); } else { + netif_addr_lock_bh(netdev); /* refresh current mac address if changed */ - eth_hw_addr_set(netdev, adapter->hw.mac.addr); ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); + netif_addr_unlock_bh(netdev); } spin_lock_bh(&adapter->mac_vlan_list_lock); iavf_add_filter(adapter, adapter->hw.mac.addr); @@ -2160,6 +2173,10 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, } fallthrough; case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS: { + struct iavf_mac_filter *f; + bool was_mac_changed; + u64 aq_required = 0; + if (v_opcode == VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS) memcpy(&adapter->vlan_v2_caps, msg, min_t(u16, msglen, @@ -2167,6 +2184,46 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_process_config(adapter); adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES; + was_mac_changed = !ether_addr_equal(netdev->dev_addr, + adapter->hw.mac.addr); + + spin_lock_bh(&adapter->mac_vlan_list_lock); + + /* re-add all MAC filters */ + list_for_each_entry(f, &adapter->mac_filter_list, list) { + if (was_mac_changed && + ether_addr_equal(netdev->dev_addr, f->macaddr)) + ether_addr_copy(f->macaddr, + adapter->hw.mac.addr); + + f->is_new_mac = true; + f->add = true; + f->add_handled = false; + f->remove = false; + } + + /* re-add all VLAN filters */ + if (VLAN_FILTERING_ALLOWED(adapter)) { + struct iavf_vlan_filter *vlf; + + if (!list_empty(&adapter->vlan_filter_list)) { + list_for_each_entry(vlf, + &adapter->vlan_filter_list, + list) + vlf->add = true; + + aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; + } + } + + spin_unlock_bh(&adapter->mac_vlan_list_lock); + + netif_addr_lock_bh(netdev); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); + netif_addr_unlock_bh(netdev); + + adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER | + aq_required; } break; case VIRTCHNL_OP_ENABLE_QUEUES: diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 59c9a10f83ba..6beb3d4873a3 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1444,8 +1444,8 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, int done = 0, bytes = 0; while (done < budget) { + unsigned int pktlen, *rxdcsum; struct net_device *netdev; - unsigned int pktlen; dma_addr_t dma_addr; u32 hash, reason; int mac = 0; @@ -1512,7 +1512,13 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, pktlen = RX_DMA_GET_PLEN0(trxd.rxd2); skb->dev = netdev; skb_put(skb, pktlen); - if (trxd.rxd4 & eth->soc->txrx.rx_dma_l4_valid) + + if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) + rxdcsum = &trxd.rxd3; + else + rxdcsum = &trxd.rxd4; + + if (*rxdcsum & eth->soc->txrx.rx_dma_l4_valid) skb->ip_summed = CHECKSUM_UNNECESSARY; else skb_checksum_none_assert(skb); @@ -3761,6 +3767,7 @@ static const struct mtk_soc_data mt7986_data = { .txd_size = sizeof(struct mtk_tx_dma_v2), .rxd_size = sizeof(struct mtk_rx_dma_v2), .rx_irq_done_mask = MTK_RX_DONE_INT_V2, + .rx_dma_l4_valid = RX_DMA_L4_VALID_V2, .dma_max_len = MTK_TX_DMA_BUF_LEN_V2, .dma_len_offset = 8, }, diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h index afab6f0fc564..6ad43c7cefe6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h @@ -4,7 +4,6 @@ #ifndef NFP_CRC32_H #define NFP_CRC32_H -#include <linux/kernel.h> #include <linux/crc32.h> /** diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile index 7a38925f4165..a666a88ac1ff 100644 --- a/drivers/net/vmxnet3/Makefile +++ b/drivers/net/vmxnet3/Makefile @@ -2,7 +2,7 @@ # # Linux driver for VMware's vmxnet3 ethernet NIC. # -# Copyright (C) 2007-2021, VMware, Inc. All Rights Reserved. +# Copyright (C) 2007-2022, VMware, Inc. All Rights Reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h index f9f3a23d1698..41c0660a0c54 100644 --- a/drivers/net/vmxnet3/upt1_defs.h +++ b/drivers/net/vmxnet3/upt1_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h index 74d4e8bc4abc..41d6767283a6 100644 --- a/drivers/net/vmxnet3/vmxnet3_defs.h +++ b/drivers/net/vmxnet3/vmxnet3_defs.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -40,7 +40,13 @@ enum { VMXNET3_REG_MACL = 0x28, /* MAC Address Low */ VMXNET3_REG_MACH = 0x30, /* MAC Address High */ VMXNET3_REG_ICR = 0x38, /* Interrupt Cause Register */ - VMXNET3_REG_ECR = 0x40 /* Event Cause Register */ + VMXNET3_REG_ECR = 0x40, /* Event Cause Register */ + VMXNET3_REG_DCR = 0x48, /* Device capability register, + * from 0x48 to 0x80 + */ + VMXNET3_REG_PTCR = 0x88, /* Passthru capbility register + * from 0x88 to 0xb0 + */ }; /* BAR 0 */ @@ -51,8 +57,18 @@ enum { VMXNET3_REG_RXPROD2 = 0xA00 /* Rx Producer Index for ring 2 */ }; -#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */ -#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */ +/* For Large PT BAR, the following offset to DB register */ +enum { + VMXNET3_REG_LB_TXPROD = 0x1000, /* Tx Producer Index */ + VMXNET3_REG_LB_RXPROD = 0x1400, /* Rx Producer Index for ring 1 */ + VMXNET3_REG_LB_RXPROD2 = 0x1800, /* Rx Producer Index for ring 2 */ +}; + +#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */ +#define VMXNET3_LARGE_PT_REG_SIZE 8192 /* large PT pages */ +#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */ +#define VMXNET3_LARGE_BAR0_REG_SIZE (4096 * 4096) /* LARGE BAR 0 */ +#define VMXNET3_OOB_REG_SIZE (4094 * 4096) /* OOB pages */ #define VMXNET3_REG_ALIGN 8 /* All registers are 8-byte aligned. */ #define VMXNET3_REG_ALIGN_MASK 0x7 @@ -83,6 +99,9 @@ enum { VMXNET3_CMD_SET_COALESCE, VMXNET3_CMD_REGISTER_MEMREGS, VMXNET3_CMD_SET_RSS_FIELDS, + VMXNET3_CMD_RESERVED4, + VMXNET3_CMD_RESERVED5, + VMXNET3_CMD_SET_RING_BUFFER_SIZE, VMXNET3_CMD_FIRST_GET = 0xF00D0000, VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, @@ -101,6 +120,9 @@ enum { VMXNET3_CMD_GET_RESERVED2, VMXNET3_CMD_GET_RESERVED3, VMXNET3_CMD_GET_MAX_QUEUES_CONF, + VMXNET3_CMD_GET_RESERVED4, + VMXNET3_CMD_GET_MAX_CAPABILITIES, + VMXNET3_CMD_GET_DCR0_REG, }; /* @@ -126,17 +148,17 @@ struct Vmxnet3_TxDesc { #ifdef __BIG_ENDIAN_BITFIELD u32 msscof:14; /* MSS, checksum offset, flags */ - u32 ext1:1; + u32 ext1:1; /* set to 1 to indicate inner csum/tso, vmxnet3 v7 */ u32 dtype:1; /* descriptor type */ - u32 oco:1; + u32 oco:1; /* Outer csum offload */ u32 gen:1; /* generation bit */ u32 len:14; #else u32 len:14; u32 gen:1; /* generation bit */ - u32 oco:1; + u32 oco:1; /* Outer csum offload */ u32 dtype:1; /* descriptor type */ - u32 ext1:1; + u32 ext1:1; /* set to 1 to indicate inner csum/tso, vmxnet3 v7 */ u32 msscof:14; /* MSS, checksum offset, flags */ #endif /* __BIG_ENDIAN_BITFIELD */ @@ -240,11 +262,13 @@ struct Vmxnet3_RxCompDesc { u32 rqID:10; /* rx queue/ring ID */ u32 sop:1; /* Start of Packet */ u32 eop:1; /* End of Packet */ - u32 ext1:2; + u32 ext1:2; /* bit 0: indicating v4/v6/.. is for inner header */ + /* bit 1: indicating rssType is based on inner header */ u32 rxdIdx:12; /* Index of the RxDesc */ #else u32 rxdIdx:12; /* Index of the RxDesc */ - u32 ext1:2; + u32 ext1:2; /* bit 0: indicating v4/v6/.. is for inner header */ + /* bit 1: indicating rssType is based on inner header */ u32 eop:1; /* End of Packet */ u32 sop:1; /* Start of Packet */ u32 rqID:10; /* rx queue/ring ID */ @@ -378,6 +402,8 @@ union Vmxnet3_GenericDesc { /* max # of tx descs for a non-tso pkt */ #define VMXNET3_MAX_TXD_PER_PKT 16 +/* max # of tx descs for a tso pkt */ +#define VMXNET3_MAX_TSO_TXD_PER_PKT 24 /* Max size of a single rx buffer */ #define VMXNET3_MAX_RX_BUF_SIZE ((1 << 14) - 1) @@ -724,6 +750,13 @@ enum Vmxnet3_RSSField { VMXNET3_RSS_FIELDS_ESPIP6 = 0x0020, }; +struct Vmxnet3_RingBufferSize { + __le16 ring1BufSizeType0; + __le16 ring1BufSizeType1; + __le16 ring2BufSizeType1; + __le16 pad; +}; + /* If the command data <= 16 bytes, use the shared memory directly. * otherwise, use variable length configuration descriptor. */ @@ -731,6 +764,7 @@ union Vmxnet3_CmdInfo { struct Vmxnet3_VariableLenConfDesc varConf; struct Vmxnet3_SetPolling setPolling; enum Vmxnet3_RSSField setRssFields; + struct Vmxnet3_RingBufferSize ringBufSize; __le64 data[2]; }; @@ -801,4 +835,30 @@ struct Vmxnet3_DriverShared { #define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */ #define VMXNET3_LINK_DOWN 0 +#define VMXNET3_DCR_ERROR 31 /* error when bit 31 of DCR is set */ +#define VMXNET3_CAP_UDP_RSS 0 /* bit 0 of DCR 0 */ +#define VMXNET3_CAP_ESP_RSS_IPV4 1 /* bit 1 of DCR 0 */ +#define VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD 2 /* bit 2 of DCR 0 */ +#define VMXNET3_CAP_GENEVE_TSO 3 /* bit 3 of DCR 0 */ +#define VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD 4 /* bit 4 of DCR 0 */ +#define VMXNET3_CAP_VXLAN_TSO 5 /* bit 5 of DCR 0 */ +#define VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD 6 /* bit 6 of DCR 0 */ +#define VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD 7 /* bit 7 of DCR 0 */ +#define VMXNET3_CAP_PKT_STEERING_IPV4 8 /* bit 8 of DCR 0 */ +#define VMXNET3_CAP_VERSION_4_MAX VMXNET3_CAP_PKT_STEERING_IPV4 +#define VMXNET3_CAP_ESP_RSS_IPV6 9 /* bit 9 of DCR 0 */ +#define VMXNET3_CAP_VERSION_5_MAX VMXNET3_CAP_ESP_RSS_IPV6 +#define VMXNET3_CAP_ESP_OVER_UDP_RSS 10 /* bit 10 of DCR 0 */ +#define VMXNET3_CAP_INNER_RSS 11 /* bit 11 of DCR 0 */ +#define VMXNET3_CAP_INNER_ESP_RSS 12 /* bit 12 of DCR 0 */ +#define VMXNET3_CAP_CRC32_HASH_FUNC 13 /* bit 13 of DCR 0 */ +#define VMXNET3_CAP_VERSION_6_MAX VMXNET3_CAP_CRC32_HASH_FUNC +#define VMXNET3_CAP_OAM_FILTER 14 /* bit 14 of DCR 0 */ +#define VMXNET3_CAP_ESP_QS 15 /* bit 15 of DCR 0 */ +#define VMXNET3_CAP_LARGE_BAR 16 /* bit 16 of DCR 0 */ +#define VMXNET3_CAP_OOORX_COMP 17 /* bit 17 of DCR 0 */ +#define VMXNET3_CAP_VERSION_7_MAX 18 +/* when new capability is introduced, update VMXNET3_CAP_MAX */ +#define VMXNET3_CAP_MAX VMXNET3_CAP_VERSION_7_MAX + #endif /* _VMXNET3_DEFS_H_ */ diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 93e8d119d45f..1565e1808a19 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -130,6 +130,20 @@ vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter) netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue)); } +/* Check if capability is supported by UPT device or + * UPT is even requested + */ +bool +vmxnet3_check_ptcapability(u32 cap_supported, u32 cap) +{ + if (cap_supported & (1UL << VMXNET3_DCR_ERROR) || + cap_supported & (1UL << cap)) { + return true; + } + + return false; +} + /* * Check the link state. This may start or stop the tx queue. @@ -571,6 +585,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, rbi = rbi_base + ring->next2fill; gd = ring->base + ring->next2fill; + rbi->comp_state = VMXNET3_RXD_COMP_PENDING; if (rbi->buf_type == VMXNET3_RX_BUF_SKB) { if (rbi->skb == NULL) { @@ -630,8 +645,10 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx, /* Fill the last buffer but dont mark it ready, or else the * device will think that the queue is full */ - if (num_allocated == num_to_alloc) + if (num_allocated == num_to_alloc) { + rbi->comp_state = VMXNET3_RXD_COMP_DONE; break; + } gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT); num_allocated++; @@ -1044,6 +1061,23 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, } tq->stats.copy_skb_header++; } + if (unlikely(count > VMXNET3_MAX_TSO_TXD_PER_PKT)) { + /* tso pkts must not use more than + * VMXNET3_MAX_TSO_TXD_PER_PKT entries + */ + if (skb_linearize(skb) != 0) { + tq->stats.drop_too_many_frags++; + goto drop_pkt; + } + tq->stats.linearized++; + + /* recalculate the # of descriptors to use */ + count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) + 1; + if (unlikely(count > VMXNET3_MAX_TSO_TXD_PER_PKT)) { + tq->stats.drop_too_many_frags++; + goto drop_pkt; + } + } if (skb->encapsulation) { vmxnet3_prepare_inner_tso(skb, &ctx); } else { @@ -1127,7 +1161,12 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, if (ctx.mss) { if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) { gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; - gdesc->txd.om = VMXNET3_OM_ENCAP; + if (VMXNET3_VERSION_GE_7(adapter)) { + gdesc->txd.om = VMXNET3_OM_TSO; + gdesc->txd.ext1 = 1; + } else { + gdesc->txd.om = VMXNET3_OM_ENCAP; + } gdesc->txd.msscof = ctx.mss; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) @@ -1144,8 +1183,15 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, skb->encapsulation) { gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; - gdesc->txd.om = VMXNET3_OM_ENCAP; - gdesc->txd.msscof = 0; /* Reserved */ + if (VMXNET3_VERSION_GE_7(adapter)) { + gdesc->txd.om = VMXNET3_OM_CSUM; + gdesc->txd.msscof = ctx.l4_offset + + skb->csum_offset; + gdesc->txd.ext1 = 1; + } else { + gdesc->txd.om = VMXNET3_OM_ENCAP; + gdesc->txd.msscof = 0; /* Reserved */ + } } else { gdesc->txd.hlen = ctx.l4_offset; gdesc->txd.om = VMXNET3_OM_CSUM; @@ -1193,7 +1239,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, if (tx_num_deferred >= le32_to_cpu(tq->shared->txThreshold)) { tq->shared->txNumDeferred = 0; VMXNET3_WRITE_BAR0_REG(adapter, - VMXNET3_REG_TXPROD + tq->qid * 8, + adapter->tx_prod_offset + tq->qid * 8, tq->tx_ring.next2fill); } @@ -1345,14 +1391,15 @@ static int vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter, int quota) { - static const u32 rxprod_reg[2] = { - VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2 + u32 rxprod_reg[2] = { + adapter->rx_prod_offset, adapter->rx_prod2_offset }; u32 num_pkts = 0; bool skip_page_frags = false; struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; u16 segCnt = 0, mss = 0; + int comp_offset, fill_offset; #ifdef __BIG_ENDIAN_BITFIELD struct Vmxnet3_RxDesc rxCmdDesc; struct Vmxnet3_RxCompDesc rxComp; @@ -1625,9 +1672,15 @@ not_lro: rcd_done: /* device may have skipped some rx descs */ - ring->next2comp = idx; - num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); ring = rq->rx_ring + ring_idx; + rbi->comp_state = VMXNET3_RXD_COMP_DONE; + + comp_offset = vmxnet3_cmd_ring_desc_avail(ring); + fill_offset = (idx > ring->next2fill ? 0 : ring->size) + + idx - ring->next2fill - 1; + if (!ring->isOutOfOrder || fill_offset >= comp_offset) + ring->next2comp = idx; + num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); /* Ensure that the writes to rxd->gen bits will be observed * after all other writes to rxd objects. @@ -1635,18 +1688,38 @@ rcd_done: dma_wmb(); while (num_to_alloc) { - vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, - &rxCmdDesc); - BUG_ON(!rxd->addr); + rbi = rq->buf_info[ring_idx] + ring->next2fill; + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_OOORX_COMP))) + goto refill_buf; + if (ring_idx == 0) { + /* ring0 Type1 buffers can get skipped; re-fill them */ + if (rbi->buf_type != VMXNET3_RX_BUF_SKB) + goto refill_buf; + } + if (rbi->comp_state == VMXNET3_RXD_COMP_DONE) { +refill_buf: + vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, + &rxCmdDesc); + WARN_ON(!rxd->addr); + + /* Recv desc is ready to be used by the device */ + rxd->gen = ring->gen; + vmxnet3_cmd_ring_adv_next2fill(ring); + rbi->comp_state = VMXNET3_RXD_COMP_PENDING; + num_to_alloc--; + } else { + /* rx completion hasn't occurred */ + ring->isOutOfOrder = 1; + break; + } + } - /* Recv desc is ready to be used by the device */ - rxd->gen = ring->gen; - vmxnet3_cmd_ring_adv_next2fill(ring); - num_to_alloc--; + if (num_to_alloc == 0) { + ring->isOutOfOrder = 0; } /* if needed, update the register */ - if (unlikely(rq->shared->updateRxProd)) { + if (unlikely(rq->shared->updateRxProd) && (ring->next2fill & 0xf) == 0) { VMXNET3_WRITE_BAR0_REG(adapter, rxprod_reg[ring_idx] + rq->qid * 8, ring->next2fill); @@ -1810,6 +1883,7 @@ vmxnet3_rq_init(struct vmxnet3_rx_queue *rq, memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size * sizeof(struct Vmxnet3_RxDesc)); rq->rx_ring[i].gen = VMXNET3_INIT_GEN; + rq->rx_ring[i].isOutOfOrder = 0; } if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1, adapter) == 0) { @@ -2000,8 +2074,17 @@ vmxnet3_poll_rx_only(struct napi_struct *napi, int budget) rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget); if (rxd_done < budget) { + struct Vmxnet3_RxCompDesc *rcd; +#ifdef __BIG_ENDIAN_BITFIELD + struct Vmxnet3_RxCompDesc rxComp; +#endif napi_complete_done(napi, rxd_done); vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx); + /* after unmasking the interrupt, check if any descriptors were completed */ + vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd, + &rxComp); + if (rcd->gen == rq->comp_ring.gen && napi_reschedule(napi)) + vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx); } return rxd_done; } @@ -2627,6 +2710,23 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter) } static void +vmxnet3_init_bufsize(struct vmxnet3_adapter *adapter) +{ + struct Vmxnet3_DriverShared *shared = adapter->shared; + union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; + unsigned long flags; + + if (!VMXNET3_VERSION_GE_7(adapter)) + return; + + cmdInfo->ringBufSize = adapter->ringBufSize; + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_SET_RING_BUFFER_SIZE); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); +} + +static void vmxnet3_init_coalesce(struct vmxnet3_adapter *adapter) { struct Vmxnet3_DriverShared *shared = adapter->shared; @@ -2671,6 +2771,36 @@ vmxnet3_init_rssfields(struct vmxnet3_adapter *adapter) adapter->rss_fields = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); } else { + if (VMXNET3_VERSION_GE_7(adapter)) { + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_UDPIP4 || + adapter->rss_fields & VMXNET3_RSS_FIELDS_UDPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_UDP_RSS)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_UDP_RSS; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_UDP_RSS); + } + + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_ESPIP4) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV4)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV4; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV4); + } + + if ((adapter->rss_fields & VMXNET3_RSS_FIELDS_ESPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV6)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV6; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV6); + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + } cmdInfo->setRssFields = adapter->rss_fields; VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_SET_RSS_FIELDS); @@ -2734,14 +2864,15 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter) goto activate_err; } + vmxnet3_init_bufsize(adapter); vmxnet3_init_coalesce(adapter); vmxnet3_init_rssfields(adapter); for (i = 0; i < adapter->num_rx_queues; i++) { VMXNET3_WRITE_BAR0_REG(adapter, - VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN, + adapter->rx_prod_offset + i * VMXNET3_REG_ALIGN, adapter->rx_queue[i].rx_ring[0].next2fill); - VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 + + VMXNET3_WRITE_BAR0_REG(adapter, (adapter->rx_prod2_offset + (i * VMXNET3_REG_ALIGN)), adapter->rx_queue[i].rx_ring[1].next2fill); } @@ -2907,19 +3038,29 @@ static void vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) { size_t sz, i, ring0_size, ring1_size, comp_size; - if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE - - VMXNET3_MAX_ETH_HDR_SIZE) { - adapter->skb_buf_size = adapter->netdev->mtu + - VMXNET3_MAX_ETH_HDR_SIZE; - if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE) - adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE; - - adapter->rx_buf_per_pkt = 1; + /* With version7 ring1 will have only T0 buffers */ + if (!VMXNET3_VERSION_GE_7(adapter)) { + if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE - + VMXNET3_MAX_ETH_HDR_SIZE) { + adapter->skb_buf_size = adapter->netdev->mtu + + VMXNET3_MAX_ETH_HDR_SIZE; + if (adapter->skb_buf_size < VMXNET3_MIN_T0_BUF_SIZE) + adapter->skb_buf_size = VMXNET3_MIN_T0_BUF_SIZE; + + adapter->rx_buf_per_pkt = 1; + } else { + adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE; + sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE + + VMXNET3_MAX_ETH_HDR_SIZE; + adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE; + } } else { - adapter->skb_buf_size = VMXNET3_MAX_SKB_BUF_SIZE; - sz = adapter->netdev->mtu - VMXNET3_MAX_SKB_BUF_SIZE + - VMXNET3_MAX_ETH_HDR_SIZE; - adapter->rx_buf_per_pkt = 1 + (sz + PAGE_SIZE - 1) / PAGE_SIZE; + adapter->skb_buf_size = min((int)adapter->netdev->mtu + VMXNET3_MAX_ETH_HDR_SIZE, + VMXNET3_MAX_SKB_BUF_SIZE); + adapter->rx_buf_per_pkt = 1; + adapter->ringBufSize.ring1BufSizeType0 = cpu_to_le16(adapter->skb_buf_size); + adapter->ringBufSize.ring1BufSizeType1 = 0; + adapter->ringBufSize.ring2BufSizeType1 = cpu_to_le16(PAGE_SIZE); } /* @@ -2935,6 +3076,11 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter) ring1_size = (ring1_size + sz - 1) / sz * sz; ring1_size = min_t(u32, ring1_size, VMXNET3_RX_RING2_MAX_SIZE / sz * sz); + /* For v7 and later, keep ring size power of 2 for UPT */ + if (VMXNET3_VERSION_GE_7(adapter)) { + ring0_size = rounddown_pow_of_two(ring0_size); + ring1_size = rounddown_pow_of_two(ring1_size); + } comp_size = ring0_size + ring1_size; for (i = 0; i < adapter->num_rx_queues; i++) { @@ -3185,6 +3331,47 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter) NETIF_F_GSO_UDP_TUNNEL_CSUM; } + if (VMXNET3_VERSION_GE_7(adapter)) { + unsigned long flags; + + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD; + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD))) { + netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + } + netdev->vlan_features = netdev->hw_features & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); @@ -3472,7 +3659,12 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_alloc_pci; ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS); - if (ver & (1 << VMXNET3_REV_6)) { + if (ver & (1 << VMXNET3_REV_7)) { + VMXNET3_WRITE_BAR1_REG(adapter, + VMXNET3_REG_VRRS, + 1 << VMXNET3_REV_7); + adapter->version = VMXNET3_REV_7 + 1; + } else if (ver & (1 << VMXNET3_REV_6)) { VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_VRRS, 1 << VMXNET3_REV_6); @@ -3520,6 +3712,39 @@ vmxnet3_probe_device(struct pci_dev *pdev, goto err_ver; } + if (VMXNET3_VERSION_GE_7(adapter)) { + adapter->devcap_supported[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_DCR); + adapter->ptcap_supported[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_PTCR); + if (adapter->devcap_supported[0] & (1UL << VMXNET3_CAP_LARGE_BAR)) { + adapter->dev_caps[0] = adapter->devcap_supported[0] & + (1UL << VMXNET3_CAP_LARGE_BAR); + } + if (!(adapter->ptcap_supported[0] & (1UL << VMXNET3_DCR_ERROR)) && + adapter->ptcap_supported[0] & (1UL << VMXNET3_CAP_OOORX_COMP) && + adapter->devcap_supported[0] & (1UL << VMXNET3_CAP_OOORX_COMP)) { + adapter->dev_caps[0] |= adapter->devcap_supported[0] & + (1UL << VMXNET3_CAP_OOORX_COMP); + } + if (adapter->dev_caps[0]) + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } + + if (VMXNET3_VERSION_GE_7(adapter) && + adapter->dev_caps[0] & (1UL << VMXNET3_CAP_LARGE_BAR)) { + adapter->tx_prod_offset = VMXNET3_REG_LB_TXPROD; + adapter->rx_prod_offset = VMXNET3_REG_LB_RXPROD; + adapter->rx_prod2_offset = VMXNET3_REG_LB_RXPROD2; + } else { + adapter->tx_prod_offset = VMXNET3_REG_TXPROD; + adapter->rx_prod_offset = VMXNET3_REG_RXPROD; + adapter->rx_prod2_offset = VMXNET3_REG_RXPROD2; + } + if (VMXNET3_VERSION_GE_6(adapter)) { spin_lock_irqsave(&adapter->cmd_lock, flags); VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 3172d46c0335..ce3993282c0f 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -298,7 +298,7 @@ netdev_features_t vmxnet3_features_check(struct sk_buff *skb, return features; } -static void vmxnet3_enable_encap_offloads(struct net_device *netdev) +static void vmxnet3_enable_encap_offloads(struct net_device *netdev, netdev_features_t features) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); @@ -306,8 +306,50 @@ static void vmxnet3_enable_encap_offloads(struct net_device *netdev) netdev->hw_enc_features |= NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM; + NETIF_F_LRO; + if (features & NETIF_F_GSO_UDP_TUNNEL) + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; + if (features & NETIF_F_GSO_UDP_TUNNEL_CSUM) + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; + } + if (VMXNET3_VERSION_GE_7(adapter)) { + unsigned long flags; + + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_TSO)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_TSO; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD; + } + if (vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD; + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + + if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD)) && + !(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD))) { + netdev->hw_enc_features &= ~NETIF_F_GSO_UDP_TUNNEL_CSUM; + } } } @@ -322,6 +364,22 @@ static void vmxnet3_disable_encap_offloads(struct net_device *netdev) NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM); } + if (VMXNET3_VERSION_GE_7(adapter)) { + unsigned long flags; + + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_GENEVE_CHECKSUM_OFFLOAD | + 1UL << VMXNET3_CAP_VXLAN_CHECKSUM_OFFLOAD | + 1UL << VMXNET3_CAP_GENEVE_TSO | + 1UL << VMXNET3_CAP_VXLAN_TSO | + 1UL << VMXNET3_CAP_GENEVE_OUTER_CHECKSUM_OFFLOAD | + 1UL << VMXNET3_CAP_VXLAN_OUTER_CHECKSUM_OFFLOAD); + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } } int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) @@ -357,8 +415,8 @@ int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features) adapter->shared->devRead.misc.uptFeatures &= ~UPT1_F_RXVLAN; - if ((features & tun_offload_mask) != 0 && !udp_tun_enabled) { - vmxnet3_enable_encap_offloads(netdev); + if ((features & tun_offload_mask) != 0) { + vmxnet3_enable_encap_offloads(netdev, features); adapter->shared->devRead.misc.uptFeatures |= UPT1_F_RXINNEROFLD; } else if ((features & tun_offload_mask) == 0 && @@ -462,7 +520,7 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) for (i = 0; i < adapter->num_tx_queues; i++) { struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i]; - buf[j++] = VMXNET3_READ_BAR0_REG(adapter, VMXNET3_REG_TXPROD + + buf[j++] = VMXNET3_READ_BAR0_REG(adapter, adapter->tx_prod_offset + i * VMXNET3_REG_ALIGN); buf[j++] = VMXNET3_GET_ADDR_LO(tq->tx_ring.basePA); @@ -490,9 +548,9 @@ vmxnet3_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p) for (i = 0; i < adapter->num_rx_queues; i++) { struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i]; - buf[j++] = VMXNET3_READ_BAR0_REG(adapter, VMXNET3_REG_RXPROD + + buf[j++] = VMXNET3_READ_BAR0_REG(adapter, adapter->rx_prod_offset + i * VMXNET3_REG_ALIGN); - buf[j++] = VMXNET3_READ_BAR0_REG(adapter, VMXNET3_REG_RXPROD2 + + buf[j++] = VMXNET3_READ_BAR0_REG(adapter, adapter->rx_prod2_offset + i * VMXNET3_REG_ALIGN); buf[j++] = VMXNET3_GET_ADDR_LO(rq->rx_ring[0].basePA); @@ -660,6 +718,13 @@ vmxnet3_set_ringparam(struct net_device *netdev, new_rx_ring2_size = min_t(u32, new_rx_ring2_size, VMXNET3_RX_RING2_MAX_SIZE); + /* For v7 and later, keep ring size power of 2 for UPT */ + if (VMXNET3_VERSION_GE_7(adapter)) { + new_tx_ring_size = rounddown_pow_of_two(new_tx_ring_size); + new_rx_ring_size = rounddown_pow_of_two(new_rx_ring_size); + new_rx_ring2_size = rounddown_pow_of_two(new_rx_ring2_size); + } + /* rx data ring buffer size has to be a multiple of * VMXNET3_RXDATA_DESC_SIZE_ALIGN */ @@ -913,6 +978,39 @@ vmxnet3_set_rss_hash_opt(struct net_device *netdev, union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo; unsigned long flags; + if (VMXNET3_VERSION_GE_7(adapter)) { + if ((rss_fields & VMXNET3_RSS_FIELDS_UDPIP4 || + rss_fields & VMXNET3_RSS_FIELDS_UDPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_UDP_RSS)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_UDP_RSS; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_UDP_RSS); + } + if ((rss_fields & VMXNET3_RSS_FIELDS_ESPIP4) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV4)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV4; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV4); + } + if ((rss_fields & VMXNET3_RSS_FIELDS_ESPIP6) && + vmxnet3_check_ptcapability(adapter->ptcap_supported[0], + VMXNET3_CAP_ESP_RSS_IPV6)) { + adapter->dev_caps[0] |= 1UL << VMXNET3_CAP_ESP_RSS_IPV6; + } else { + adapter->dev_caps[0] &= ~(1UL << VMXNET3_CAP_ESP_RSS_IPV6); + } + + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, + adapter->dev_caps[0]); + spin_lock_irqsave(&adapter->cmd_lock, flags); + VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_DCR0_REG); + adapter->dev_caps[0] = VMXNET3_READ_BAR1_REG(adapter, + VMXNET3_REG_CMD); + spin_unlock_irqrestore(&adapter->cmd_lock, flags); + } spin_lock_irqsave(&adapter->cmd_lock, flags); cmdInfo->setRssFields = rss_fields; VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 7027ff483fa5..3367db23aa13 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -1,7 +1,7 @@ /* * Linux driver for VMware's vmxnet3 ethernet NIC. * - * Copyright (C) 2008-2021, VMware, Inc. All Rights Reserved. + * Copyright (C) 2008-2022, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -69,18 +69,19 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.6.0.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.7.0.0-k" /* Each byte of this 32-bit integer encodes a version number in * VMXNET3_DRIVER_VERSION_STRING. */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01060000 +#define VMXNET3_DRIVER_VERSION_NUM 0x01070000 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ #define VMXNET3_RSS #endif +#define VMXNET3_REV_7 6 /* Vmxnet3 Rev. 7 */ #define VMXNET3_REV_6 5 /* Vmxnet3 Rev. 6 */ #define VMXNET3_REV_5 4 /* Vmxnet3 Rev. 5 */ #define VMXNET3_REV_4 3 /* Vmxnet3 Rev. 4 */ @@ -135,6 +136,7 @@ struct vmxnet3_cmd_ring { u32 next2fill; u32 next2comp; u8 gen; + u8 isOutOfOrder; dma_addr_t basePA; }; @@ -259,9 +261,13 @@ enum vmxnet3_rx_buf_type { VMXNET3_RX_BUF_PAGE = 2 }; +#define VMXNET3_RXD_COMP_PENDING 0 +#define VMXNET3_RXD_COMP_DONE 1 + struct vmxnet3_rx_buf_info { enum vmxnet3_rx_buf_type buf_type; u16 len; + u8 comp_state; union { struct sk_buff *skb; struct page *page; @@ -402,6 +408,13 @@ struct vmxnet3_adapter { dma_addr_t pm_conf_pa; dma_addr_t rss_conf_pa; bool queuesExtEnabled; + struct Vmxnet3_RingBufferSize ringBufSize; + u32 devcap_supported[8]; + u32 ptcap_supported[8]; + u32 dev_caps[8]; + u16 tx_prod_offset; + u16 rx_prod_offset; + u16 rx_prod2_offset; }; #define VMXNET3_WRITE_BAR0_REG(adapter, reg, val) \ @@ -431,11 +444,13 @@ struct vmxnet3_adapter { (adapter->version >= VMXNET3_REV_5 + 1) #define VMXNET3_VERSION_GE_6(adapter) \ (adapter->version >= VMXNET3_REV_6 + 1) +#define VMXNET3_VERSION_GE_7(adapter) \ + (adapter->version >= VMXNET3_REV_7 + 1) /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */ #define VMXNET3_DEF_TX_RING_SIZE 512 #define VMXNET3_DEF_RX_RING_SIZE 1024 -#define VMXNET3_DEF_RX_RING2_SIZE 256 +#define VMXNET3_DEF_RX_RING2_SIZE 512 #define VMXNET3_DEF_RXDATA_DESC_SIZE 128 @@ -494,6 +509,7 @@ void vmxnet3_set_ethtool_ops(struct net_device *netdev); void vmxnet3_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats); +bool vmxnet3_check_ptcapability(u32 cap_supported, u32 cap); extern char vmxnet3_driver_name[]; #endif diff --git a/drivers/net/wan/farsync.h b/drivers/net/wan/farsync.h index 5f43568a9715..63908dbbb02d 100644 --- a/drivers/net/wan/farsync.h +++ b/drivers/net/wan/farsync.h @@ -43,7 +43,7 @@ * This version number is incremented with each official release of the * package and is a simplified number for normal user reference. * Individual files are tracked by the version control system and may - * have individual versions (or IDs) that move much faster than the + * have individual versions (or IDs) that move much faster than * the release version as individual updates are tracked. */ #define FST_USER_VERSION "1.04" diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index d9dea4829c86..8174d7b2966c 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -48,7 +48,6 @@ #include <linux/debugfs.h> typedef unsigned int pending_ring_idx_t; -#define INVALID_PENDING_RING_IDX (~0U) struct pending_tx_info { struct xen_netif_tx_request req; /* tx request */ @@ -82,8 +81,6 @@ struct xenvif_rx_meta { /* Discriminate from any valid pending_idx value. */ #define INVALID_PENDING_IDX 0xFFFF -#define MAX_BUFFER_OFFSET XEN_PAGE_SIZE - #define MAX_PENDING_REQS XEN_NETIF_TX_RING_SIZE /* The maximum number of frags is derived from the size of a grant (same @@ -367,11 +364,6 @@ void xenvif_free(struct xenvif *vif); int xenvif_xenbus_init(void); void xenvif_xenbus_fini(void); -int xenvif_schedulable(struct xenvif *vif); - -int xenvif_queue_stopped(struct xenvif_queue *queue); -void xenvif_wake_queue(struct xenvif_queue *queue); - /* (Un)Map communication rings. */ void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue); int xenvif_map_frontend_data_rings(struct xenvif_queue *queue, @@ -394,7 +386,6 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); -void xenvif_rx_action(struct xenvif_queue *queue); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); @@ -403,9 +394,6 @@ void xenvif_carrier_on(struct xenvif *vif); void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf, bool zerocopy_success); -/* Unmap a pending page and release it back to the guest */ -void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); - static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue) { return MAX_PENDING_REQS - diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 8e035374a370..fb32ae82d9b0 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -69,7 +69,7 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue) wake_up(&queue->dealloc_wq); } -int xenvif_schedulable(struct xenvif *vif) +static int xenvif_schedulable(struct xenvif *vif) { return netif_running(vif->dev) && test_bit(VIF_STATUS_CONNECTED, &vif->status) && @@ -177,20 +177,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -int xenvif_queue_stopped(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - unsigned int id = queue->id; - return netif_tx_queue_stopped(netdev_get_tx_queue(dev, id)); -} - -void xenvif_wake_queue(struct xenvif_queue *queue) -{ - struct net_device *dev = queue->vif->dev; - unsigned int id = queue->id; - netif_tx_wake_queue(netdev_get_tx_queue(dev, id)); -} - static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev) { diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index d93814c14a23..fc61a4418737 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -112,6 +112,8 @@ static void make_tx_response(struct xenvif_queue *queue, s8 st); static void push_tx_responses(struct xenvif_queue *queue); +static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx); + static inline int tx_work_todo(struct xenvif_queue *queue); static inline unsigned long idx_to_pfn(struct xenvif_queue *queue, @@ -1418,7 +1420,7 @@ static void push_tx_responses(struct xenvif_queue *queue) notify_remote_via_irq(queue->tx_irq); } -void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) +static void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx) { int ret; struct gnttab_unmap_grant_ref tx_unmap_op; diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index dbac4c03d21a..8df2c736fd23 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -486,7 +486,7 @@ static void xenvif_rx_skb(struct xenvif_queue *queue) #define RX_BATCH_SIZE 64 -void xenvif_rx_action(struct xenvif_queue *queue) +static void xenvif_rx_action(struct xenvif_queue *queue) { struct sk_buff_head completed_skbs; unsigned int work_done = 0; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d3d10556f0fa..82edf0359ab3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -43,6 +43,7 @@ #include <linux/netfilter/nf_conntrack_common.h> #endif #include <net/net_debug.h> +#include <net/dropreason.h> /** * DOC: skb checksums @@ -337,184 +338,6 @@ struct sk_buff_head { struct sk_buff; -/* The reason of skb drop, which is used in kfree_skb_reason(). - * en...maybe they should be splited by group? - * - * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is - * used to translate the reason to string. - */ -enum skb_drop_reason { - SKB_NOT_DROPPED_YET = 0, - SKB_DROP_REASON_NOT_SPECIFIED, /* drop reason is not specified */ - SKB_DROP_REASON_NO_SOCKET, /* socket not found */ - SKB_DROP_REASON_PKT_TOO_SMALL, /* packet size is too small */ - SKB_DROP_REASON_TCP_CSUM, /* TCP checksum error */ - SKB_DROP_REASON_SOCKET_FILTER, /* dropped by socket filter */ - SKB_DROP_REASON_UDP_CSUM, /* UDP checksum error */ - SKB_DROP_REASON_NETFILTER_DROP, /* dropped by netfilter */ - SKB_DROP_REASON_OTHERHOST, /* packet don't belong to current - * host (interface is in promisc - * mode) - */ - SKB_DROP_REASON_IP_CSUM, /* IP checksum error */ - SKB_DROP_REASON_IP_INHDR, /* there is something wrong with - * IP header (see - * IPSTATS_MIB_INHDRERRORS) - */ - SKB_DROP_REASON_IP_RPFILTER, /* IP rpfilter validate failed. - * see the document for rp_filter - * in ip-sysctl.rst for more - * information - */ - SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, /* destination address of L2 - * is multicast, but L3 is - * unicast. - */ - SKB_DROP_REASON_XFRM_POLICY, /* xfrm policy check failed */ - SKB_DROP_REASON_IP_NOPROTO, /* no support for IP protocol */ - SKB_DROP_REASON_SOCKET_RCVBUFF, /* socket receive buff is full */ - SKB_DROP_REASON_PROTO_MEM, /* proto memory limition, such as - * udp packet drop out of - * udp_memory_allocated. - */ - SKB_DROP_REASON_TCP_MD5NOTFOUND, /* no MD5 hash and one - * expected, corresponding - * to LINUX_MIB_TCPMD5NOTFOUND - */ - SKB_DROP_REASON_TCP_MD5UNEXPECTED, /* MD5 hash and we're not - * expecting one, corresponding - * to LINUX_MIB_TCPMD5UNEXPECTED - */ - SKB_DROP_REASON_TCP_MD5FAILURE, /* MD5 hash and its wrong, - * corresponding to - * LINUX_MIB_TCPMD5FAILURE - */ - SKB_DROP_REASON_SOCKET_BACKLOG, /* failed to add skb to socket - * backlog (see - * LINUX_MIB_TCPBACKLOGDROP) - */ - SKB_DROP_REASON_TCP_FLAGS, /* TCP flags invalid */ - SKB_DROP_REASON_TCP_ZEROWINDOW, /* TCP receive window size is zero, - * see LINUX_MIB_TCPZEROWINDOWDROP - */ - SKB_DROP_REASON_TCP_OLD_DATA, /* the TCP data reveived is already - * received before (spurious retrans - * may happened), see - * LINUX_MIB_DELAYEDACKLOST - */ - SKB_DROP_REASON_TCP_OVERWINDOW, /* the TCP data is out of window, - * the seq of the first byte exceed - * the right edges of receive - * window - */ - SKB_DROP_REASON_TCP_OFOMERGE, /* the data of skb is already in - * the ofo queue, corresponding to - * LINUX_MIB_TCPOFOMERGE - */ - SKB_DROP_REASON_TCP_RFC7323_PAWS, /* PAWS check, corresponding to - * LINUX_MIB_PAWSESTABREJECTED - */ - SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /* Not acceptable SEQ field */ - SKB_DROP_REASON_TCP_RESET, /* Invalid RST packet */ - SKB_DROP_REASON_TCP_INVALID_SYN, /* Incoming packet has unexpected SYN flag */ - SKB_DROP_REASON_TCP_CLOSE, /* TCP socket in CLOSE state */ - SKB_DROP_REASON_TCP_FASTOPEN, /* dropped by FASTOPEN request socket */ - SKB_DROP_REASON_TCP_OLD_ACK, /* TCP ACK is old, but in window */ - SKB_DROP_REASON_TCP_TOO_OLD_ACK, /* TCP ACK is too old */ - SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, /* TCP ACK for data we haven't sent yet */ - SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, /* pruned from TCP OFO queue */ - SKB_DROP_REASON_TCP_OFO_DROP, /* data already in receive queue */ - SKB_DROP_REASON_IP_OUTNOROUTES, /* route lookup failed */ - SKB_DROP_REASON_BPF_CGROUP_EGRESS, /* dropped by - * BPF_PROG_TYPE_CGROUP_SKB - * eBPF program - */ - SKB_DROP_REASON_IPV6DISABLED, /* IPv6 is disabled on the device */ - SKB_DROP_REASON_NEIGH_CREATEFAIL, /* failed to create neigh - * entry - */ - SKB_DROP_REASON_NEIGH_FAILED, /* neigh entry in failed state */ - SKB_DROP_REASON_NEIGH_QUEUEFULL, /* arp_queue for neigh - * entry is full - */ - SKB_DROP_REASON_NEIGH_DEAD, /* neigh entry is dead */ - SKB_DROP_REASON_TC_EGRESS, /* dropped in TC egress HOOK */ - SKB_DROP_REASON_QDISC_DROP, /* dropped by qdisc when packet - * outputting (failed to enqueue to - * current qdisc) - */ - SKB_DROP_REASON_CPU_BACKLOG, /* failed to enqueue the skb to - * the per CPU backlog queue. This - * can be caused by backlog queue - * full (see netdev_max_backlog in - * net.rst) or RPS flow limit - */ - SKB_DROP_REASON_XDP, /* dropped by XDP in input path */ - SKB_DROP_REASON_TC_INGRESS, /* dropped in TC ingress HOOK */ - SKB_DROP_REASON_UNHANDLED_PROTO, /* protocol not implemented - * or not supported - */ - SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation - * error - */ - SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ - SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from - * user space, e.g., via - * zerocopy_sg_from_iter() - * or skb_orphan_frags_rx() - */ - SKB_DROP_REASON_DEV_HDR, /* device driver specific - * header/metadata is invalid - */ - /* the device is not ready to xmit/recv due to any of its data - * structure that is not up/ready/initialized, e.g., the IFF_UP is - * not set, or driver specific tun->tfiles[txq] is not initialized - */ - SKB_DROP_REASON_DEV_READY, - SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ - SKB_DROP_REASON_NOMEM, /* error due to OOM */ - SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header - * from networking data, e.g., failed - * to pull the protocol header from - * frags via pskb_may_pull() - */ - SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly - * attached to tun/tap, e.g., via - * TUNSETFILTEREBPF - */ - SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented - * at tun/tap, e.g., check_filter() - */ - SKB_DROP_REASON_ICMP_CSUM, /* ICMP checksum error */ - SKB_DROP_REASON_INVALID_PROTO, /* the packet doesn't follow RFC - * 2211, such as a broadcasts - * ICMP_TIMESTAMP - */ - SKB_DROP_REASON_IP_INADDRERRORS, /* host unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_IP_INNOROUTES, /* network unreachable, corresponding - * to IPSTATS_MIB_INADDRERRORS - */ - SKB_DROP_REASON_PKT_TOO_BIG, /* packet size is too big (maybe exceed - * the MTU) - */ - SKB_DROP_REASON_MAX, -}; - -#define SKB_DR_INIT(name, reason) \ - enum skb_drop_reason name = SKB_DROP_REASON_##reason -#define SKB_DR(name) \ - SKB_DR_INIT(name, NOT_SPECIFIED) -#define SKB_DR_SET(name, reason) \ - (name = SKB_DROP_REASON_##reason) -#define SKB_DR_OR(name, reason) \ - do { \ - if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ - name == SKB_NOT_DROPPED_YET) \ - SKB_DR_SET(name, reason); \ - } while (0) - /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. diff --git a/include/net/dropreason.h b/include/net/dropreason.h new file mode 100644 index 000000000000..fae9b40e54fa --- /dev/null +++ b/include/net/dropreason.h @@ -0,0 +1,256 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_DROPREASON_H +#define _LINUX_DROPREASON_H + +/** + * enum skb_drop_reason - the reasons of skb drops + * + * The reason of skb drop, which is used in kfree_skb_reason(). + */ +enum skb_drop_reason { + /** + * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case) + */ + SKB_NOT_DROPPED_YET = 0, + /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */ + SKB_DROP_REASON_NOT_SPECIFIED, + /** @SKB_DROP_REASON_NO_SOCKET: socket not found */ + SKB_DROP_REASON_NO_SOCKET, + /** @SKB_DROP_REASON_PKT_TOO_SMALL: packet size is too small */ + SKB_DROP_REASON_PKT_TOO_SMALL, + /** @SKB_DROP_REASON_TCP_CSUM: TCP checksum error */ + SKB_DROP_REASON_TCP_CSUM, + /** @SKB_DROP_REASON_SOCKET_FILTER: dropped by socket filter */ + SKB_DROP_REASON_SOCKET_FILTER, + /** @SKB_DROP_REASON_UDP_CSUM: UDP checksum error */ + SKB_DROP_REASON_UDP_CSUM, + /** @SKB_DROP_REASON_NETFILTER_DROP: dropped by netfilter */ + SKB_DROP_REASON_NETFILTER_DROP, + /** + * @SKB_DROP_REASON_OTHERHOST: packet don't belong to current host + * (interface is in promisc mode) + */ + SKB_DROP_REASON_OTHERHOST, + /** @SKB_DROP_REASON_IP_CSUM: IP checksum error */ + SKB_DROP_REASON_IP_CSUM, + /** + * @SKB_DROP_REASON_IP_INHDR: there is something wrong with IP header (see + * IPSTATS_MIB_INHDRERRORS) + */ + SKB_DROP_REASON_IP_INHDR, + /** + * @SKB_DROP_REASON_IP_RPFILTER: IP rpfilter validate failed. see the + * document for rp_filter in ip-sysctl.rst for more information + */ + SKB_DROP_REASON_IP_RPFILTER, + /** + * @SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST: destination address of L2 is + * multicast, but L3 is unicast. + */ + SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, + /** @SKB_DROP_REASON_XFRM_POLICY: xfrm policy check failed */ + SKB_DROP_REASON_XFRM_POLICY, + /** @SKB_DROP_REASON_IP_NOPROTO: no support for IP protocol */ + SKB_DROP_REASON_IP_NOPROTO, + /** @SKB_DROP_REASON_SOCKET_RCVBUFF: socket receive buff is full */ + SKB_DROP_REASON_SOCKET_RCVBUFF, + /** + * @SKB_DROP_REASON_PROTO_MEM: proto memory limition, such as udp packet + * drop out of udp_memory_allocated. + */ + SKB_DROP_REASON_PROTO_MEM, + /** + * @SKB_DROP_REASON_TCP_MD5NOTFOUND: no MD5 hash and one expected, + * corresponding to LINUX_MIB_TCPMD5NOTFOUND + */ + SKB_DROP_REASON_TCP_MD5NOTFOUND, + /** + * @SKB_DROP_REASON_TCP_MD5UNEXPECTED: MD5 hash and we're not expecting + * one, corresponding to LINUX_MIB_TCPMD5UNEXPECTED + */ + SKB_DROP_REASON_TCP_MD5UNEXPECTED, + /** + * @SKB_DROP_REASON_TCP_MD5FAILURE: MD5 hash and its wrong, corresponding + * to LINUX_MIB_TCPMD5FAILURE + */ + SKB_DROP_REASON_TCP_MD5FAILURE, + /** + * @SKB_DROP_REASON_SOCKET_BACKLOG: failed to add skb to socket backlog ( + * see LINUX_MIB_TCPBACKLOGDROP) + */ + SKB_DROP_REASON_SOCKET_BACKLOG, + /** @SKB_DROP_REASON_TCP_FLAGS: TCP flags invalid */ + SKB_DROP_REASON_TCP_FLAGS, + /** + * @SKB_DROP_REASON_TCP_ZEROWINDOW: TCP receive window size is zero, + * see LINUX_MIB_TCPZEROWINDOWDROP + */ + SKB_DROP_REASON_TCP_ZEROWINDOW, + /** + * @SKB_DROP_REASON_TCP_OLD_DATA: the TCP data reveived is already + * received before (spurious retrans may happened), see + * LINUX_MIB_DELAYEDACKLOST + */ + SKB_DROP_REASON_TCP_OLD_DATA, + /** + * @SKB_DROP_REASON_TCP_OVERWINDOW: the TCP data is out of window, + * the seq of the first byte exceed the right edges of receive + * window + */ + SKB_DROP_REASON_TCP_OVERWINDOW, + /** + * @SKB_DROP_REASON_TCP_OFOMERGE: the data of skb is already in the ofo + * queue, corresponding to LINUX_MIB_TCPOFOMERGE + */ + SKB_DROP_REASON_TCP_OFOMERGE, + /** + * @SKB_DROP_REASON_TCP_RFC7323_PAWS: PAWS check, corresponding to + * LINUX_MIB_PAWSESTABREJECTED + */ + SKB_DROP_REASON_TCP_RFC7323_PAWS, + /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */ + SKB_DROP_REASON_TCP_INVALID_SEQUENCE, + /** @SKB_DROP_REASON_TCP_RESET: Invalid RST packet */ + SKB_DROP_REASON_TCP_RESET, + /** + * @SKB_DROP_REASON_TCP_INVALID_SYN: Incoming packet has unexpected + * SYN flag + */ + SKB_DROP_REASON_TCP_INVALID_SYN, + /** @SKB_DROP_REASON_TCP_CLOSE: TCP socket in CLOSE state */ + SKB_DROP_REASON_TCP_CLOSE, + /** @SKB_DROP_REASON_TCP_FASTOPEN: dropped by FASTOPEN request socket */ + SKB_DROP_REASON_TCP_FASTOPEN, + /** @SKB_DROP_REASON_TCP_OLD_ACK: TCP ACK is old, but in window */ + SKB_DROP_REASON_TCP_OLD_ACK, + /** @SKB_DROP_REASON_TCP_TOO_OLD_ACK: TCP ACK is too old */ + SKB_DROP_REASON_TCP_TOO_OLD_ACK, + /** + * @SKB_DROP_REASON_TCP_ACK_UNSENT_DATA: TCP ACK for data we haven't + * sent yet + */ + SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, + /** @SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE: pruned from TCP OFO queue */ + SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, + /** @SKB_DROP_REASON_TCP_OFO_DROP: data already in receive queue */ + SKB_DROP_REASON_TCP_OFO_DROP, + /** @SKB_DROP_REASON_IP_OUTNOROUTES: route lookup failed */ + SKB_DROP_REASON_IP_OUTNOROUTES, + /** + * @SKB_DROP_REASON_BPF_CGROUP_EGRESS: dropped by BPF_PROG_TYPE_CGROUP_SKB + * eBPF program + */ + SKB_DROP_REASON_BPF_CGROUP_EGRESS, + /** @SKB_DROP_REASON_IPV6DISABLED: IPv6 is disabled on the device */ + SKB_DROP_REASON_IPV6DISABLED, + /** @SKB_DROP_REASON_NEIGH_CREATEFAIL: failed to create neigh entry */ + SKB_DROP_REASON_NEIGH_CREATEFAIL, + /** @SKB_DROP_REASON_NEIGH_FAILED: neigh entry in failed state */ + SKB_DROP_REASON_NEIGH_FAILED, + /** @SKB_DROP_REASON_NEIGH_QUEUEFULL: arp_queue for neigh entry is full */ + SKB_DROP_REASON_NEIGH_QUEUEFULL, + /** @SKB_DROP_REASON_NEIGH_DEAD: neigh entry is dead */ + SKB_DROP_REASON_NEIGH_DEAD, + /** @SKB_DROP_REASON_TC_EGRESS: dropped in TC egress HOOK */ + SKB_DROP_REASON_TC_EGRESS, + /** + * @SKB_DROP_REASON_QDISC_DROP: dropped by qdisc when packet outputting ( + * failed to enqueue to current qdisc) + */ + SKB_DROP_REASON_QDISC_DROP, + /** + * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU + * backlog queue. This can be caused by backlog queue full (see + * netdev_max_backlog in net.rst) or RPS flow limit + */ + SKB_DROP_REASON_CPU_BACKLOG, + /** @SKB_DROP_REASON_XDP: dropped by XDP in input path */ + SKB_DROP_REASON_XDP, + /** @SKB_DROP_REASON_TC_INGRESS: dropped in TC ingress HOOK */ + SKB_DROP_REASON_TC_INGRESS, + /** @SKB_DROP_REASON_UNHANDLED_PROTO: protocol not implemented or not supported */ + SKB_DROP_REASON_UNHANDLED_PROTO, + /** @SKB_DROP_REASON_SKB_CSUM: sk_buff checksum computation error */ + SKB_DROP_REASON_SKB_CSUM, + /** @SKB_DROP_REASON_SKB_GSO_SEG: gso segmentation error */ + SKB_DROP_REASON_SKB_GSO_SEG, + /** + * @SKB_DROP_REASON_SKB_UCOPY_FAULT: failed to copy data from user space, + * e.g., via zerocopy_sg_from_iter() or skb_orphan_frags_rx() + */ + SKB_DROP_REASON_SKB_UCOPY_FAULT, + /** @SKB_DROP_REASON_DEV_HDR: device driver specific header/metadata is invalid */ + SKB_DROP_REASON_DEV_HDR, + /** + * @SKB_DROP_REASON_DEV_READY: the device is not ready to xmit/recv due to + * any of its data structure that is not up/ready/initialized, + * e.g., the IFF_UP is not set, or driver specific tun->tfiles[txq] + * is not initialized + */ + SKB_DROP_REASON_DEV_READY, + /** @SKB_DROP_REASON_FULL_RING: ring buffer is full */ + SKB_DROP_REASON_FULL_RING, + /** @SKB_DROP_REASON_NOMEM: error due to OOM */ + SKB_DROP_REASON_NOMEM, + /** + * @SKB_DROP_REASON_HDR_TRUNC: failed to trunc/extract the header from + * networking data, e.g., failed to pull the protocol header from + * frags via pskb_may_pull() + */ + SKB_DROP_REASON_HDR_TRUNC, + /** + * @SKB_DROP_REASON_TAP_FILTER: dropped by (ebpf) filter directly attached + * to tun/tap, e.g., via TUNSETFILTEREBPF + */ + SKB_DROP_REASON_TAP_FILTER, + /** + * @SKB_DROP_REASON_TAP_TXFILTER: dropped by tx filter implemented at + * tun/tap, e.g., check_filter() + */ + SKB_DROP_REASON_TAP_TXFILTER, + /** @SKB_DROP_REASON_ICMP_CSUM: ICMP checksum error */ + SKB_DROP_REASON_ICMP_CSUM, + /** + * @SKB_DROP_REASON_INVALID_PROTO: the packet doesn't follow RFC 2211, + * such as a broadcasts ICMP_TIMESTAMP + */ + SKB_DROP_REASON_INVALID_PROTO, + /** + * @SKB_DROP_REASON_IP_INADDRERRORS: host unreachable, corresponding to + * IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INADDRERRORS, + /** + * @SKB_DROP_REASON_IP_INNOROUTES: network unreachable, corresponding to + * IPSTATS_MIB_INADDRERRORS + */ + SKB_DROP_REASON_IP_INNOROUTES, + /** + * @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the + * MTU) + */ + SKB_DROP_REASON_PKT_TOO_BIG, + /** + * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be + * used as a real 'reason' + */ + SKB_DROP_REASON_MAX, +}; + +#define SKB_DR_INIT(name, reason) \ + enum skb_drop_reason name = SKB_DROP_REASON_##reason +#define SKB_DR(name) \ + SKB_DR_INIT(name, NOT_SPECIFIED) +#define SKB_DR_SET(name, reason) \ + (name = SKB_DROP_REASON_##reason) +#define SKB_DR_OR(name, reason) \ + do { \ + if (name == SKB_DROP_REASON_NOT_SPECIFIED || \ + name == SKB_NOT_DROPPED_YET) \ + SKB_DR_SET(name, reason); \ + } while (0) + +extern const char * const drop_reasons[]; + +#endif diff --git a/include/net/sock.h b/include/net/sock.h index c585ef6565d9..657873e2d90f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -611,7 +611,7 @@ void sock_net_set(struct sock *sk, struct net *net) int sk_set_peek_off(struct sock *sk, int val); -static inline int sk_peek_offset(struct sock *sk, int flags) +static inline int sk_peek_offset(const struct sock *sk, int flags) { if (unlikely(flags & MSG_PEEK)) { return READ_ONCE(sk->sk_peek_off); @@ -863,7 +863,7 @@ static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \ pos = rcu_dereference(hlist_next_rcu(pos))) -static inline struct user_namespace *sk_user_ns(struct sock *sk) +static inline struct user_namespace *sk_user_ns(const struct sock *sk) { /* Careful only use this in a context where these parameters * can not change and must all be valid, such as recvmsg from @@ -909,7 +909,7 @@ enum sock_flags { #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) -static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) +static inline void sock_copy_flags(struct sock *nsk, const struct sock *osk) { nsk->sk_flags = osk->sk_flags; } diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index a477bf907498..45264e4bb254 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -9,92 +9,6 @@ #include <linux/netdevice.h> #include <linux/tracepoint.h> -#define TRACE_SKB_DROP_REASON \ - EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ - EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ - EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ - EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ - EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ - EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ - EM(SKB_DROP_REASON_NETFILTER_DROP, NETFILTER_DROP) \ - EM(SKB_DROP_REASON_OTHERHOST, OTHERHOST) \ - EM(SKB_DROP_REASON_IP_CSUM, IP_CSUM) \ - EM(SKB_DROP_REASON_IP_INHDR, IP_INHDR) \ - EM(SKB_DROP_REASON_IP_RPFILTER, IP_RPFILTER) \ - EM(SKB_DROP_REASON_UNICAST_IN_L2_MULTICAST, \ - UNICAST_IN_L2_MULTICAST) \ - EM(SKB_DROP_REASON_XFRM_POLICY, XFRM_POLICY) \ - EM(SKB_DROP_REASON_IP_NOPROTO, IP_NOPROTO) \ - EM(SKB_DROP_REASON_SOCKET_RCVBUFF, SOCKET_RCVBUFF) \ - EM(SKB_DROP_REASON_PROTO_MEM, PROTO_MEM) \ - EM(SKB_DROP_REASON_TCP_MD5NOTFOUND, TCP_MD5NOTFOUND) \ - EM(SKB_DROP_REASON_TCP_MD5UNEXPECTED, \ - TCP_MD5UNEXPECTED) \ - EM(SKB_DROP_REASON_TCP_MD5FAILURE, TCP_MD5FAILURE) \ - EM(SKB_DROP_REASON_SOCKET_BACKLOG, SOCKET_BACKLOG) \ - EM(SKB_DROP_REASON_TCP_FLAGS, TCP_FLAGS) \ - EM(SKB_DROP_REASON_TCP_ZEROWINDOW, TCP_ZEROWINDOW) \ - EM(SKB_DROP_REASON_TCP_OLD_DATA, TCP_OLD_DATA) \ - EM(SKB_DROP_REASON_TCP_OVERWINDOW, TCP_OVERWINDOW) \ - EM(SKB_DROP_REASON_TCP_OFOMERGE, TCP_OFOMERGE) \ - EM(SKB_DROP_REASON_TCP_OFO_DROP, TCP_OFO_DROP) \ - EM(SKB_DROP_REASON_TCP_RFC7323_PAWS, TCP_RFC7323_PAWS) \ - EM(SKB_DROP_REASON_TCP_INVALID_SEQUENCE, \ - TCP_INVALID_SEQUENCE) \ - EM(SKB_DROP_REASON_TCP_RESET, TCP_RESET) \ - EM(SKB_DROP_REASON_TCP_INVALID_SYN, TCP_INVALID_SYN) \ - EM(SKB_DROP_REASON_TCP_CLOSE, TCP_CLOSE) \ - EM(SKB_DROP_REASON_TCP_FASTOPEN, TCP_FASTOPEN) \ - EM(SKB_DROP_REASON_TCP_OLD_ACK, TCP_OLD_ACK) \ - EM(SKB_DROP_REASON_TCP_TOO_OLD_ACK, TCP_TOO_OLD_ACK) \ - EM(SKB_DROP_REASON_TCP_ACK_UNSENT_DATA, \ - TCP_ACK_UNSENT_DATA) \ - EM(SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE, \ - TCP_OFO_QUEUE_PRUNE) \ - EM(SKB_DROP_REASON_IP_OUTNOROUTES, IP_OUTNOROUTES) \ - EM(SKB_DROP_REASON_BPF_CGROUP_EGRESS, \ - BPF_CGROUP_EGRESS) \ - EM(SKB_DROP_REASON_IPV6DISABLED, IPV6DISABLED) \ - EM(SKB_DROP_REASON_NEIGH_CREATEFAIL, NEIGH_CREATEFAIL) \ - EM(SKB_DROP_REASON_NEIGH_FAILED, NEIGH_FAILED) \ - EM(SKB_DROP_REASON_NEIGH_QUEUEFULL, NEIGH_QUEUEFULL) \ - EM(SKB_DROP_REASON_NEIGH_DEAD, NEIGH_DEAD) \ - EM(SKB_DROP_REASON_TC_EGRESS, TC_EGRESS) \ - EM(SKB_DROP_REASON_QDISC_DROP, QDISC_DROP) \ - EM(SKB_DROP_REASON_CPU_BACKLOG, CPU_BACKLOG) \ - EM(SKB_DROP_REASON_XDP, XDP) \ - EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ - EM(SKB_DROP_REASON_UNHANDLED_PROTO, UNHANDLED_PROTO) \ - EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM) \ - EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ - EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ - EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR) \ - EM(SKB_DROP_REASON_DEV_READY, DEV_READY) \ - EM(SKB_DROP_REASON_FULL_RING, FULL_RING) \ - EM(SKB_DROP_REASON_NOMEM, NOMEM) \ - EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ - EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ - EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ - EM(SKB_DROP_REASON_ICMP_CSUM, ICMP_CSUM) \ - EM(SKB_DROP_REASON_INVALID_PROTO, INVALID_PROTO) \ - EM(SKB_DROP_REASON_IP_INADDRERRORS, IP_INADDRERRORS) \ - EM(SKB_DROP_REASON_IP_INNOROUTES, IP_INNOROUTES) \ - EM(SKB_DROP_REASON_PKT_TOO_BIG, PKT_TOO_BIG) \ - EMe(SKB_DROP_REASON_MAX, MAX) - -#undef EM -#undef EMe - -#define EM(a, b) TRACE_DEFINE_ENUM(a); -#define EMe(a, b) TRACE_DEFINE_ENUM(a); - -TRACE_SKB_DROP_REASON - -#undef EM -#undef EMe -#define EM(a, b) { a, #b }, -#define EMe(a, b) { a, #b } - /* * Tracepoint for free an sk_buff: */ @@ -121,8 +35,7 @@ TRACE_EVENT(kfree_skb, TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", __entry->skbaddr, __entry->protocol, __entry->location, - __print_symbolic(__entry->reason, - TRACE_SKB_DROP_REASON)) + drop_reasons[__entry->reason]) ); TRACE_EVENT(consume_skb, diff --git a/net/core/.gitignore b/net/core/.gitignore new file mode 100644 index 000000000000..df1e74372cce --- /dev/null +++ b/net/core/.gitignore @@ -0,0 +1 @@ +dropreason_str.c diff --git a/net/core/Makefile b/net/core/Makefile index a8e4f737692b..e8ce3bd283a6 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -4,7 +4,8 @@ # obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ - gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o + gen_stats.o gen_estimator.o net_namespace.o secure_seq.o \ + flow_dissector.o dropreason_str.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o @@ -39,3 +40,23 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o + +clean-files := dropreason_str.c + +quiet_cmd_dropreason_str = GEN $@ +cmd_dropreason_str = awk -F ',' 'BEGIN{ print "\#include <net/dropreason.h>\n"; \ + print "const char * const drop_reasons[] = {" }\ + /^enum skb_drop/ { dr=1; }\ + /^\};/ { dr=0; }\ + /^\tSKB_DROP_REASON_/ {\ + if (dr) {\ + sub(/\tSKB_DROP_REASON_/, "", $$1);\ + printf "\t[SKB_DROP_REASON_%s] = \"%s\",\n", $$1, $$1;\ + }\ + }\ + END{ print "};" }' $< > $@ + +$(obj)/dropreason_str.c: $(srctree)/include/net/dropreason.h + $(call cmd,dropreason_str) + +$(obj)/dropreason_str.o: $(obj)/dropreason_str.c diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 41cac0e4834e..4ad1decce724 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -48,19 +48,6 @@ static int trace_state = TRACE_OFF; static bool monitor_hw; -#undef EM -#undef EMe - -#define EM(a, b) [a] = #b, -#define EMe(a, b) [a] = #b - -/* drop_reasons is used to translate 'enum skb_drop_reason' to string, - * which is reported to user space. - */ -static const char * const drop_reasons[] = { - TRACE_SKB_DROP_REASON -}; - /* net_dm_mutex * * An overall lock guarding every operation coming from userspace. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b3559cb1d82..b661040c100e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -91,6 +91,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); +/* The array 'drop_reasons' is auto-generated in dropreason_str.c */ +EXPORT_SYMBOL(drop_reasons); + /** * skb_panic - private function for out-of-line support * @skb: buffer |