From 0754d65bd4be5bb7392aa59339a290c80346a93c Mon Sep 17 00:00:00 2001 From: Kiran Patil Date: Fri, 15 Oct 2021 16:35:15 -0700 Subject: ice: Add infrastructure for mqprio support via ndo_setup_tc Add infrastructure required for "ndo_setup_tc:qdisc_mqprio". ice_vsi_setup is modified to configure traffic classes based on mqprio data received from the stack. This includes low-level functions to configure min, max rate-limit parameters in hardware for traffic classes. Each traffic class gets mapped to a hardware channel (VSI) which can be individually configured with different bandwidth parameters. Co-developed-by: Tarun Singh Signed-off-by: Tarun Singh Signed-off-by: Kiran Patil Signed-off-by: Amritha Nambiar Signed-off-by: Sudheer Mogilappagari Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 78 ++++- drivers/net/ethernet/intel/ice/ice_base.c | 34 +- drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 201 ++++++++---- drivers/net/ethernet/intel/ice/ice_dcb_lib.h | 10 +- drivers/net/ethernet/intel/ice/ice_eswitch.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.c | 382 ++++++++++++++++++++--- drivers/net/ethernet/intel/ice/ice_lib.h | 11 +- drivers/net/ethernet/intel/ice/ice_main.c | 6 +- drivers/net/ethernet/intel/ice/ice_sched.c | 68 +++- drivers/net/ethernet/intel/ice/ice_sched.h | 2 + drivers/net/ethernet/intel/ice/ice_txrx.h | 7 + drivers/net/ethernet/intel/ice/ice_type.h | 3 + drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 4 +- 13 files changed, 671 insertions(+), 137 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 80f14886b5b1..c2814e31c438 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -104,6 +105,10 @@ #define ICE_INVAL_VFID 256 #define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ + +#define ICE_CHNL_START_TC 1 +#define ICE_CHNL_MAX_TC 16 + #define ICE_MAX_RESET_WAIT 20 #define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) @@ -145,6 +150,9 @@ #define ice_for_each_q_vector(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++) +#define ice_for_each_chnl_tc(i) \ + for ((i) = ICE_CHNL_START_TC; (i) < ICE_CHNL_MAX_TC; (i)++) + #define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \ ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX) @@ -172,6 +180,21 @@ enum ice_feature { DECLARE_STATIC_KEY_FALSE(ice_xdp_locking_key); +struct ice_channel { + struct list_head list; + u8 type; + u16 sw_id; + u16 base_q; + u16 num_rxq; + u16 num_txq; + u16 vsi_num; + u8 ena_tc; + struct ice_aqc_vsi_props info; + u64 max_tx_rate; + u64 min_tx_rate; + struct ice_vsi *ch_vsi; +}; + struct ice_txq_meta { u32 q_teid; /* Tx-scheduler element identifier */ u16 q_id; /* Entry in VSI's txq_map bitmap */ @@ -189,7 +212,7 @@ struct ice_tc_info { struct ice_tc_cfg { u8 numtc; /* Total number of enabled TCs */ - u8 ena_tc; /* Tx map */ + u16 ena_tc; /* Tx map */ struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS]; }; @@ -361,6 +384,34 @@ struct ice_vsi { struct net_device **target_netdevs; + struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */ + + /* Channel Specific Fields */ + struct ice_vsi *tc_map_vsi[ICE_CHNL_MAX_TC]; + u16 cnt_q_avail; + u16 next_base_q; /* next queue to be used for channel setup */ + struct list_head ch_list; + u16 num_chnl_rxq; + u16 num_chnl_txq; + u16 ch_rss_size; + /* store away rss size info before configuring ADQ channels so that, + * it can be used after tc-qdisc delete, to get back RSS setting as + * they were before + */ + u16 orig_rss_size; + /* this keeps tracks of all enabled TC with and without DCB + * and inclusive of ADQ, vsi->mqprio_opt keeps track of queue + * information + */ + u8 all_numtc; + u16 all_enatc; + + /* store away TC info, to be used for rebuild logic */ + u8 old_numtc; + u16 old_ena_tc; + + struct ice_channel *ch; + /* setup back reference, to which aggregator node this VSI * corresponds to */ @@ -407,6 +458,7 @@ enum ice_pf_flags { ICE_FLAG_PTP, /* PTP is enabled by software */ ICE_FLAG_AUX_ENA, ICE_FLAG_ADV_FEATURES, + ICE_FLAG_TC_MQPRIO, /* support for Multi queue TC */ ICE_FLAG_CLS_FLOWER, ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, @@ -704,6 +756,30 @@ static inline void ice_clear_sriov_cap(struct ice_pf *pf) ((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT) #define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx) +/** + * ice_is_adq_active - any active ADQs + * @pf: pointer to PF + * + * This function returns true if there are any ADQs configured (which is + * determined by looking at VSI type (which should be VSI_PF), numtc, and + * TC_MQPRIO flag) otherwise return false + */ +static inline bool ice_is_adq_active(struct ice_pf *pf) +{ + struct ice_vsi *vsi; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return false; + + /* is ADQ configured */ + if (vsi->tc_cfg.numtc > ICE_CHNL_START_TC && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + return true; + + return false; +} + bool netif_is_ice(struct net_device *dev); int ice_vsi_setup_tx_rings(struct ice_vsi *vsi); int ice_vsi_setup_rx_rings(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index be625977addf..fa6cd63cbf1f 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -213,6 +213,9 @@ static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 { WARN_ONCE(ice_ring_is_xdp(ring) && tc, "XDP ring can't belong to TC other than 0\n"); + if (ring->ch) + return ring->q_index - ring->ch->base_q; + /* Idea here for calculation is that we subtract the number of queue * count from TC that ring belongs to from it's absolute queue index * and as a result we get the queue's index within TC. @@ -300,7 +303,10 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf case ICE_VSI_LB: case ICE_VSI_CTRL: case ICE_VSI_PF: - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; + if (ring->ch) + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; + else + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; break; case ICE_VSI_VF: /* Firmware expects vmvf_num to be absolute VF ID */ @@ -315,7 +321,10 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf } /* make sure the context is associated with the right VSI */ - tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); + if (ring->ch) + tlan_ctx->src_vsi = ring->ch->vsi_num; + else + tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); /* Restrict Tx timestamps to the PF VSI */ switch (vsi->type) { @@ -747,6 +756,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 buf_len = struct_size(qg_buf, txqs, 1); struct ice_tlan_ctx tlan_ctx = { 0 }; struct ice_aqc_add_txqs_perq *txq; + struct ice_channel *ch = ring->ch; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; @@ -785,8 +795,14 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, ring->q_handle = ice_calc_txq_handle(vsi, ring, tc); } - status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle, - 1, qg_buf, buf_len, NULL); + if (ch) + status = ice_ena_vsi_txq(vsi->port_info, ch->ch_vsi->idx, 0, + ring->q_handle, 1, qg_buf, buf_len, + NULL); + else + status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, + ring->q_handle, 1, qg_buf, buf_len, + NULL); if (status) { dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %s\n", ice_stat_str(status)); @@ -967,6 +983,7 @@ void ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta) { + struct ice_channel *ch = ring->ch; u8 tc; if (IS_ENABLED(CONFIG_DCB)) @@ -977,6 +994,11 @@ ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring, txq_meta->q_id = ring->reg_idx; txq_meta->q_teid = ring->txq_teid; txq_meta->q_handle = ring->q_handle; - txq_meta->vsi_idx = vsi->idx; - txq_meta->tc = tc; + if (ch) { + txq_meta->vsi_idx = ch->ch_vsi->idx; + txq_meta->tc = 0; + } else { + txq_meta->vsi_idx = vsi->idx; + txq_meta->tc = tc; + } } diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 4284526e9e24..a72e18320a22 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -4,53 +4,11 @@ #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" -/** - * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration - * @vsi: the VSI being configured - * @ena_tc: TC map to be enabled - */ -void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) -{ - struct net_device *netdev = vsi->netdev; - struct ice_pf *pf = vsi->back; - struct ice_dcbx_cfg *dcbcfg; - u8 netdev_tc; - int i; - - if (!netdev) - return; - - if (!ena_tc) { - netdev_reset_tc(netdev); - return; - } - - if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc)) - return; - - dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; - - ice_for_each_traffic_class(i) - if (vsi->tc_cfg.ena_tc & BIT(i)) - netdev_set_tc_queue(netdev, - vsi->tc_cfg.tc_info[i].netdev_tc, - vsi->tc_cfg.tc_info[i].qcount_tx, - vsi->tc_cfg.tc_info[i].qoffset); - - for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { - u8 ets_tc = dcbcfg->etscfg.prio_table[i]; - - /* Get the mapped netdev TC# for the UP */ - netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc; - netdev_set_prio_tc_map(netdev, i, netdev_tc); - } -} - /** * ice_dcb_get_ena_tc - return bitmap of enabled TCs * @dcbcfg: DCB config to evaluate for enabled TCs */ -u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg) +static u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg) { u8 i, num_tc, ena_tc = 1; @@ -178,6 +136,67 @@ u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg) return ret; } +/** + * ice_get_first_droptc - returns number of first droptc + * @vsi: used to find the first droptc + * + * This function returns the value of first_droptc. + * When DCB is enabled, first droptc information is derived from enabled_tc + * and PFC enabled bits. otherwise this function returns 0 as there is one + * TC without DCB (tc0) + */ +static u8 ice_get_first_droptc(struct ice_vsi *vsi) +{ + struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg; + struct device *dev = ice_pf_to_dev(vsi->back); + u8 num_tc, ena_tc_map, pfc_ena_map; + u8 i; + + num_tc = ice_dcb_get_num_tc(cfg); + + /* get bitmap of enabled TCs */ + ena_tc_map = ice_dcb_get_ena_tc(cfg); + + /* get bitmap of PFC enabled TCs */ + pfc_ena_map = cfg->pfc.pfcena; + + /* get first TC that is not PFC enabled */ + for (i = 0; i < num_tc; i++) { + if ((ena_tc_map & BIT(i)) && (!(pfc_ena_map & BIT(i)))) { + dev_dbg(dev, "first drop tc = %d\n", i); + return i; + } + } + + dev_dbg(dev, "first drop tc = 0\n"); + return 0; +} + +/** + * ice_vsi_set_dcb_tc_cfg - Set VSI's TC based on DCB configuration + * @vsi: pointer to the VSI instance + */ +void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi) +{ + struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg; + + switch (vsi->type) { + case ICE_VSI_PF: + vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg); + vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg); + break; + case ICE_VSI_CHNL: + vsi->tc_cfg.ena_tc = BIT(ice_get_first_droptc(vsi)); + vsi->tc_cfg.numtc = 1; + break; + case ICE_VSI_CTRL: + case ICE_VSI_LB: + default: + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; + } +} + /** * ice_dcb_get_tc - Get the TC associated with the queue * @vsi: ptr to the VSI @@ -218,11 +237,68 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) qoffset = vsi->tc_cfg.tc_info[n].qoffset; qcount = vsi->tc_cfg.tc_info[n].qcount_tx; - for (i = qoffset; i < (qoffset + qcount); i++) { - tx_ring = vsi->tx_rings[i]; - rx_ring = vsi->rx_rings[i]; - tx_ring->dcb_tc = n; - rx_ring->dcb_tc = n; + for (i = qoffset; i < (qoffset + qcount); i++) + vsi->tx_rings[i]->dcb_tc = n; + + qcount = vsi->tc_cfg.tc_info[n].qcount_rx; + for (i = qoffset; i < (qoffset + qcount); i++) + vsi->rx_rings[i]->dcb_tc = n; + } + /* applicable only if "all_enatc" is set, which will be set from + * setup_tc method as part of configuring channels + */ + if (vsi->all_enatc) { + u8 first_droptc = ice_get_first_droptc(vsi); + + /* When DCB is configured, TC for ADQ queues (which are really + * PF queues) should be the first drop TC of the main VSI + */ + ice_for_each_chnl_tc(n) { + if (!(vsi->all_enatc & BIT(n))) + break; + + qoffset = vsi->mqprio_qopt.qopt.offset[n]; + qcount = vsi->mqprio_qopt.qopt.count[n]; + for (i = qoffset; i < (qoffset + qcount); i++) { + vsi->tx_rings[i]->dcb_tc = first_droptc; + vsi->rx_rings[i]->dcb_tc = first_droptc; + } + } + } +} + +/** + * ice_dcb_ena_dis_vsi - disable certain VSIs for DCB config/reconfig + * @pf: pointer to the PF instance + * @ena: true to enable VSIs, false to disable + * @locked: true if caller holds RTNL lock, false otherwise + * + * Before a new DCB configuration can be applied, VSIs of type PF, SWITCHDEV + * and CHNL need to be brought down. Following completion of DCB configuration + * the VSIs that were downed need to be brought up again. This helper function + * does both. + */ +static void ice_dcb_ena_dis_vsi(struct ice_pf *pf, bool ena, bool locked) +{ + int i; + + ice_for_each_vsi(pf, i) { + struct ice_vsi *vsi = pf->vsi[i]; + + if (!vsi) + continue; + + switch (vsi->type) { + case ICE_VSI_CHNL: + case ICE_VSI_SWITCHDEV_CTRL: + case ICE_VSI_PF: + if (ena) + ice_ena_vsi(vsi, locked); + else + ice_dis_vsi(vsi, locked); + break; + default: + continue; } } } @@ -331,7 +407,9 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) */ if (!locked) rtnl_lock(); - ice_dis_vsi(pf_vsi, true); + + /* disable VSIs affected by DCB changes */ + ice_dcb_ena_dis_vsi(pf, false, true); memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg)); memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec)); @@ -359,7 +437,8 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) ice_pf_dcb_recfg(pf); out: - ice_ena_vsi(pf_vsi, true); + /* enable previously downed VSIs */ + ice_dcb_ena_dis_vsi(pf, true, true); if (!locked) rtnl_unlock(); free_cfg: @@ -674,6 +753,8 @@ void ice_pf_dcb_recfg(struct ice_pf *pf) tc_map = ICE_DFLT_TRAFFIC_CLASS; ice_dcb_noncontig_cfg(pf); } + } else if (vsi->type == ICE_VSI_CHNL) { + tc_map = BIT(ice_get_first_droptc(vsi)); } else { tc_map = ICE_DFLT_TRAFFIC_CLASS; } @@ -684,10 +765,11 @@ void ice_pf_dcb_recfg(struct ice_pf *pf) vsi->idx); continue; } - /* no need to proceed with remaining cfg if it is switchdev - * VSI + /* no need to proceed with remaining cfg if it is CHNL + * or switchdev VSI */ - if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) + if (vsi->type == ICE_VSI_CHNL || + vsi->type == ICE_VSI_SWITCHDEV_CTRL) continue; ice_vsi_map_rings_to_vectors(vsi); @@ -862,7 +944,6 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_dcbx_cfg tmp_dcbx_cfg; bool need_reconfig = false; struct ice_port_info *pi; - struct ice_vsi *pf_vsi; u8 mib_type; int ret; @@ -938,14 +1019,9 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, clear_bit(ICE_FLAG_DCB_ENA, pf->flags); } - pf_vsi = ice_get_main_vsi(pf); - if (!pf_vsi) { - dev_dbg(dev, "PF VSI doesn't exist\n"); - goto out; - } - rtnl_lock(); - ice_dis_vsi(pf_vsi, true); + /* disable VSIs affected by DCB changes */ + ice_dcb_ena_dis_vsi(pf, false, true); ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { @@ -956,7 +1032,8 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, /* changes in configuration update VSI */ ice_pf_dcb_recfg(pf); - ice_ena_vsi(pf_vsi, true); + /* enable previously downed VSIs */ + ice_dcb_ena_dis_vsi(pf, true, true); unlock_rtnl: rtnl_unlock(); out: diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h index 6700e97b3b51..4c421c842a13 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h @@ -16,7 +16,6 @@ void ice_dcb_rebuild(struct ice_pf *pf); int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked); -u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg); u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg); void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi); bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue); @@ -34,8 +33,6 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event); -void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); - /** * ice_find_q_in_range * @low: start of queue range for a TC i.e. offset of TC @@ -69,6 +66,12 @@ static inline u8 ice_get_pfc_mode(struct ice_pf *pf) #else static inline void ice_dcb_rebuild(struct ice_pf *pf) { } +static inline void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi) +{ + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; +} + static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg) { return ICE_DFLT_TRAFFIC_CLASS; @@ -130,7 +133,6 @@ static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { } static inline void ice_update_dcb_stats(struct ice_pf *pf) { } static inline void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { } -static inline void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) { } static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { } #endif /* CONFIG_DCB */ #endif /* _ICE_DCB_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index a20c38446e76..6cb50653b18d 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -329,7 +329,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf) static struct ice_vsi * ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID, NULL); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 231f8bea2519..62adbb39670b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -22,6 +22,8 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type) return "ICE_VSI_VF"; case ICE_VSI_CTRL: return "ICE_VSI_CTRL"; + case ICE_VSI_CHNL: + return "ICE_VSI_CHNL"; case ICE_VSI_LB: return "ICE_VSI_LB"; case ICE_VSI_SWITCHDEV_CTRL: @@ -73,6 +75,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) struct device *dev; dev = ice_pf_to_dev(pf); + if (vsi->type == ICE_VSI_CHNL) + return 0; /* allocate memory for both Tx and Rx ring pointers */ vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq, @@ -229,6 +233,10 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) vsi->alloc_rxq = 1; vsi->num_q_vectors = 1; break; + case ICE_VSI_CHNL: + vsi->alloc_txq = 0; + vsi->alloc_rxq = 0; + break; case ICE_VSI_LB: vsi->alloc_txq = 1; vsi->alloc_rxq = 1; @@ -274,7 +282,7 @@ static int ice_get_free_slot(void *array, int size, int curr) * ice_vsi_delete - delete a VSI from the switch * @vsi: pointer to VSI being removed */ -static void ice_vsi_delete(struct ice_vsi *vsi) +void ice_vsi_delete(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct ice_vsi_ctx *ctxt; @@ -345,7 +353,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) * * Returns 0 on success, negative on failure */ -static int ice_vsi_clear(struct ice_vsi *vsi) +int ice_vsi_clear(struct ice_vsi *vsi) { struct ice_pf *pf = NULL; struct device *dev; @@ -438,12 +446,14 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d * ice_vsi_alloc - Allocates the next available struct VSI in the PF * @pf: board private structure * @vsi_type: type of VSI + * @ch: ptr to channel * @vf_id: ID of the VF being configured * * returns a pointer to a VSI on success, NULL on failure. */ static struct ice_vsi * -ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) +ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, + struct ice_channel *ch, u16 vf_id) { struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi = NULL; @@ -470,7 +480,7 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) if (vsi_type == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf_id); - else + else if (vsi_type != ICE_VSI_CHNL) ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); switch (vsi->type) { @@ -499,6 +509,13 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) if (ice_vsi_alloc_arrays(vsi)) goto err_rings; break; + case ICE_VSI_CHNL: + if (!ch) + goto err_rings; + vsi->num_rxq = ch->num_rxq; + vsi->num_txq = ch->num_txq; + vsi->next_base_q = ch->base_q; + break; case ICE_VSI_LB: if (ice_vsi_alloc_arrays(vsi)) goto err_rings; @@ -615,6 +632,9 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi) }; int ret; + if (vsi->type == ICE_VSI_CHNL) + return 0; + ret = __ice_vsi_get_qs(&tx_qs_cfg); if (ret) return ret; @@ -733,11 +753,16 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) cap = &pf->hw.func_caps.common_cap; switch (vsi->type) { + case ICE_VSI_CHNL: case ICE_VSI_PF: /* PF VSI will inherit RSS instance of PF */ vsi->rss_table_size = (u16)cap->rss_table_size; - vsi->rss_size = min_t(u16, num_online_cpus(), - BIT(cap->rss_table_entry_width)); + if (vsi->type == ICE_VSI_CHNL) + vsi->rss_size = min_t(u16, vsi->num_rxq, + BIT(cap->rss_table_entry_width)); + else + vsi->rss_size = min_t(u16, num_online_cpus(), + BIT(cap->rss_table_entry_width)); vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF; break; case ICE_VSI_SWITCHDEV_CTRL: @@ -814,21 +839,13 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) u16 num_txq_per_tc, num_rxq_per_tc; u16 qcount_tx = vsi->alloc_txq; u16 qcount_rx = vsi->alloc_rxq; - bool ena_tc0 = false; u8 netdev_tc = 0; int i; - /* at least TC0 should be enabled by default */ - if (vsi->tc_cfg.numtc) { - if (!(vsi->tc_cfg.ena_tc & BIT(0))) - ena_tc0 = true; - } else { - ena_tc0 = true; - } - - if (ena_tc0) { - vsi->tc_cfg.numtc++; - vsi->tc_cfg.ena_tc |= 1; + if (!vsi->tc_cfg.numtc) { + /* at least TC0 should be enabled by default */ + vsi->tc_cfg.numtc = 1; + vsi->tc_cfg.ena_tc = 1; } num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC); @@ -970,6 +987,7 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); switch (vsi->type) { + case ICE_VSI_CHNL: case ICE_VSI_PF: /* PF VSI will inherit RSS instance of PF */ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF; @@ -992,6 +1010,28 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) ICE_AQ_VSI_Q_OPT_RSS_HASH_M); } +static void +ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) +{ + struct ice_pf *pf = vsi->back; + u16 qcount, qmap; + u8 offset = 0; + int pow; + + qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix); + + pow = order_base_2(qcount); + qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & + ICE_AQ_VSI_TC_Q_OFFSET_M) | + ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & + ICE_AQ_VSI_TC_Q_NUM_M); + + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG); + ctxt->info.q_mapping[0] = cpu_to_le16(vsi->next_base_q); + ctxt->info.q_mapping[1] = cpu_to_le16(qcount); +} + /** * ice_vsi_init - Create and initialize a VSI * @vsi: the VSI being configured @@ -1020,6 +1060,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) ctxt->flags = ICE_AQ_VSI_TYPE_PF; break; case ICE_VSI_SWITCHDEV_CTRL: + case ICE_VSI_CHNL: ctxt->flags = ICE_AQ_VSI_TYPE_VMDQ2; break; case ICE_VSI_VF: @@ -1032,6 +1073,21 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) goto out; } + /* Handle VLAN pruning for channel VSI if main VSI has VLAN + * prune enabled + */ + if (vsi->type == ICE_VSI_CHNL) { + struct ice_vsi *main_vsi; + + main_vsi = ice_get_main_vsi(pf); + if (main_vsi && ice_vsi_is_vlan_pruning_ena(main_vsi)) + ctxt->info.sw_flags2 |= + ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + else + ctxt->info.sw_flags2 &= + ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + } + ice_set_dflt_vsi_ctx(ctxt); if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) ice_set_fd_vsi_ctx(ctxt, vsi); @@ -1052,13 +1108,17 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) } ctxt->info.sw_id = vsi->port_info->sw_id; - ice_vsi_setup_q_map(vsi, ctxt); - if (!init_vsi) /* means VSI being updated */ - /* must to indicate which section of VSI context are - * being modified - */ - ctxt->info.valid_sections |= - cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); + if (vsi->type == ICE_VSI_CHNL) { + ice_chnl_vsi_setup_q_map(vsi, ctxt); + } else { + ice_vsi_setup_q_map(vsi, ctxt); + if (!init_vsi) /* means VSI being updated */ + /* must to indicate which section of VSI context are + * being modified + */ + ctxt->info.valid_sections |= + cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); + } /* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off * respectively @@ -1237,6 +1297,8 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) /* SRIOV doesn't grab irq_tracker entries for each VSI */ if (vsi->type == ICE_VSI_VF) return 0; + if (vsi->type == ICE_VSI_CHNL) + return 0; if (vsi->base_vector) { dev_dbg(dev, "VSI %d has non-zero base vector %d\n", @@ -1403,7 +1465,7 @@ void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) * ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI * @vsi: VSI to be configured */ -static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) +int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct device *dev; @@ -1411,7 +1473,25 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) int err; dev = ice_pf_to_dev(pf); - vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); + if (vsi->type == ICE_VSI_PF && vsi->ch_rss_size && + (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))) { + vsi->rss_size = min_t(u16, vsi->rss_size, vsi->ch_rss_size); + } else { + vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); + + /* If orig_rss_size is valid and it is less than determined + * main VSI's rss_size, update main VSI's rss_size to be + * orig_rss_size so that when tc-qdisc is deleted, main VSI + * RSS table gets programmed to be correct (whatever it was + * to begin with (prior to setup-tc for ADQ config) + */ + if (vsi->orig_rss_size && vsi->rss_size < vsi->orig_rss_size && + vsi->orig_rss_size <= vsi->num_rxq) { + vsi->rss_size = vsi->orig_rss_size; + /* now orig_rss_size is used, reset it to zero */ + vsi->orig_rss_size = 0; + } + } lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) @@ -2260,10 +2340,14 @@ err_out: static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi) { - struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg; + if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) { + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; + return; + } - vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg); - vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg); + /* set VSI TC information based on DCB config */ + ice_vsi_set_dcb_tc_cfg(vsi); } /** @@ -2376,6 +2460,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) switch (vsi->type) { case ICE_VSI_CTRL: + case ICE_VSI_CHNL: case ICE_VSI_LB: case ICE_VSI_PF: case ICE_VSI_SWITCHDEV_CTRL: @@ -2475,6 +2560,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) * @vf_id: defines VF ID to which this VSI connects. This field is meant to be * used only for ICE_VSI_VF VSI type. For other VSI types, should * fill-in ICE_INVAL_VFID as input. + * @ch: ptr to channel * * This allocates the sw VSI structure and its queue resources. * @@ -2483,7 +2569,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) */ struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, u16 vf_id) + enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct device *dev = ice_pf_to_dev(pf); @@ -2491,10 +2577,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_vsi *vsi; int ret, i; - if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) - vsi = ice_vsi_alloc(pf, vsi_type, vf_id); + if (vsi_type == ICE_VSI_CHNL) + vsi = ice_vsi_alloc(pf, vsi_type, ch, ICE_INVAL_VFID); + else if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) + vsi = ice_vsi_alloc(pf, vsi_type, NULL, vf_id); else - vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID); + vsi = ice_vsi_alloc(pf, vsi_type, NULL, ICE_INVAL_VFID); if (!vsi) { dev_err(dev, "could not allocate VSI\n"); @@ -2511,10 +2599,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, ice_alloc_fd_res(vsi); - if (ice_vsi_get_qs(vsi)) { - dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n", - vsi->idx); - goto unroll_vsi_alloc; + if (vsi_type != ICE_VSI_CHNL) { + if (ice_vsi_get_qs(vsi)) { + dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n", + vsi->idx); + goto unroll_vsi_alloc; + } } /* set RSS capabilities */ @@ -2573,6 +2663,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, } ice_init_arfs(vsi); break; + case ICE_VSI_CHNL: + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + ice_vsi_cfg_rss_lut_key(vsi); + ice_vsi_set_rss_flow_fld(vsi); + } + break; case ICE_VSI_VF: /* VF driver will take care of creating netdev for this type and * map queues to vectors through Virtchnl, PF driver only @@ -2611,9 +2707,21 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, } /* configure VSI nodes based on number of queues and TC's */ - for (i = 0; i < vsi->tc_cfg.numtc; i++) - max_txqs[i] = vsi->alloc_txq; + ice_for_each_traffic_class(i) { + if (!(vsi->tc_cfg.ena_tc & BIT(i))) + continue; + + if (vsi->type == ICE_VSI_CHNL) { + if (!vsi->alloc_txq && vsi->num_txq) + max_txqs[i] = vsi->num_txq; + else + max_txqs[i] = pf->num_lan_tx; + } else { + max_txqs[i] = vsi->alloc_txq; + } + } + dev_dbg(dev, "vsi->tc_cfg.ena_tc = %d\n", vsi->tc_cfg.ena_tc); status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { @@ -3269,6 +3377,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret) goto err_vectors; + break; + case ICE_VSI_CHNL: + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + ice_vsi_cfg_rss_lut_key(vsi); + ice_vsi_set_rss_flow_fld(vsi); + } break; default: break; @@ -3276,14 +3390,30 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) /* configure VSI nodes based on number of queues and TC's */ for (i = 0; i < vsi->tc_cfg.numtc; i++) { - max_txqs[i] = vsi->alloc_txq; + /* configure VSI nodes based on number of queues and TC's. + * ADQ creates VSIs for each TC/Channel but doesn't + * allocate queues instead it reconfigures the PF queues + * as per the TC command. So max_txqs should point to the + * PF Tx queues. + */ + if (vtype == ICE_VSI_CHNL) + max_txqs[i] = pf->num_lan_tx; + else + max_txqs[i] = vsi->alloc_txq; if (ice_is_xdp_ena_vsi(vsi)) max_txqs[i] += vsi->num_xdp_txq; } - status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, - max_txqs); + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + /* If MQPRIO is set, means channel code path, hence for main + * VSI's, use TC as 1 + */ + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, max_txqs); + else + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, + vsi->tc_cfg.ena_tc, max_txqs); + if (status) { dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %s\n", vsi->vsi_num, ice_stat_str(status)); @@ -3355,7 +3485,6 @@ int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout) return 0; } -#ifdef CONFIG_DCB /** * ice_vsi_update_q_map - update our copy of the VSI info with new queue map * @vsi: VSI being configured @@ -3370,6 +3499,146 @@ static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx) sizeof(vsi->info.tc_mapping)); } +/** + * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration + * @vsi: the VSI being configured + * @ena_tc: TC map to be enabled + */ +void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) +{ + struct net_device *netdev = vsi->netdev; + struct ice_pf *pf = vsi->back; + int numtc = vsi->tc_cfg.numtc; + struct ice_dcbx_cfg *dcbcfg; + u8 netdev_tc; + int i; + + if (!netdev) + return; + + /* CHNL VSI doesn't have it's own netdev, hence, no netdev_tc */ + if (vsi->type == ICE_VSI_CHNL) + return; + + if (!ena_tc) { + netdev_reset_tc(netdev); + return; + } + + if (vsi->type == ICE_VSI_PF && ice_is_adq_active(pf)) + numtc = vsi->all_numtc; + + if (netdev_set_num_tc(netdev, numtc)) + return; + + dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; + + ice_for_each_traffic_class(i) + if (vsi->tc_cfg.ena_tc & BIT(i)) + netdev_set_tc_queue(netdev, + vsi->tc_cfg.tc_info[i].netdev_tc, + vsi->tc_cfg.tc_info[i].qcount_tx, + vsi->tc_cfg.tc_info[i].qoffset); + /* setup TC queue map for CHNL TCs */ + ice_for_each_chnl_tc(i) { + if (!(vsi->all_enatc & BIT(i))) + break; + if (!vsi->mqprio_qopt.qopt.count[i]) + break; + netdev_set_tc_queue(netdev, i, + vsi->mqprio_qopt.qopt.count[i], + vsi->mqprio_qopt.qopt.offset[i]); + } + + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + return; + + for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { + u8 ets_tc = dcbcfg->etscfg.prio_table[i]; + + /* Get the mapped netdev TC# for the UP */ + netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc; + netdev_set_prio_tc_map(netdev, i, netdev_tc); + } +} + +/** + * ice_vsi_setup_q_map_mqprio - Prepares mqprio based tc_config + * @vsi: the VSI being configured, + * @ctxt: VSI context structure + * @ena_tc: number of traffic classes to enable + * + * Prepares VSI tc_config to have queue configurations based on MQPRIO options. + */ +static void +ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt, + u8 ena_tc) +{ + u16 pow, offset = 0, qcount_tx = 0, qcount_rx = 0, qmap; + u16 tc0_offset = vsi->mqprio_qopt.qopt.offset[0]; + int tc0_qcount = vsi->mqprio_qopt.qopt.count[0]; + u8 netdev_tc = 0; + int i; + + vsi->tc_cfg.ena_tc = ena_tc ? ena_tc : 1; + + pow = order_base_2(tc0_qcount); + qmap = ((tc0_offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & + ICE_AQ_VSI_TC_Q_OFFSET_M) | + ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M); + + ice_for_each_traffic_class(i) { + if (!(vsi->tc_cfg.ena_tc & BIT(i))) { + /* TC is not enabled */ + vsi->tc_cfg.tc_info[i].qoffset = 0; + vsi->tc_cfg.tc_info[i].qcount_rx = 1; + vsi->tc_cfg.tc_info[i].qcount_tx = 1; + vsi->tc_cfg.tc_info[i].netdev_tc = 0; + ctxt->info.tc_mapping[i] = 0; + continue; + } + + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount_rx = vsi->mqprio_qopt.qopt.count[i]; + qcount_tx = vsi->mqprio_qopt.qopt.count[i]; + vsi->tc_cfg.tc_info[i].qoffset = offset; + vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx; + vsi->tc_cfg.tc_info[i].qcount_tx = qcount_tx; + vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++; + } + + if (vsi->all_numtc && vsi->all_numtc != vsi->tc_cfg.numtc) { + ice_for_each_chnl_tc(i) { + if (!(vsi->all_enatc & BIT(i))) + continue; + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount_rx = vsi->mqprio_qopt.qopt.count[i]; + qcount_tx = vsi->mqprio_qopt.qopt.count[i]; + } + } + + /* Set actual Tx/Rx queue pairs */ + vsi->num_txq = offset + qcount_tx; + vsi->num_rxq = offset + qcount_rx; + + /* Setup queue TC[0].qmap for given VSI context */ + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]); + ctxt->info.q_mapping[1] = cpu_to_le16(tc0_qcount); + + /* Find queue count available for channel VSIs and starting offset + * for channel VSIs + */ + if (tc0_qcount && tc0_qcount < vsi->num_rxq) { + vsi->cnt_q_avail = vsi->num_rxq - tc0_qcount; + vsi->next_base_q = tc0_qcount; + } + dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_txq = %d\n", vsi->num_txq); + dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_rxq = %d\n", vsi->num_rxq); + dev_dbg(ice_pf_to_dev(vsi->back), "all_numtc %u, all_enatc: 0x%04x, tc_cfg.numtc %u\n", + vsi->all_numtc, vsi->all_enatc, vsi->tc_cfg.numtc); +} + /** * ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map * @vsi: VSI to be configured @@ -3388,6 +3657,9 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) u8 num_tc = 0; dev = ice_pf_to_dev(pf); + if (vsi->tc_cfg.ena_tc == ena_tc && + vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL) + return ret; ice_for_each_traffic_class(i) { /* build bitmap of enabled TCs */ @@ -3395,6 +3667,12 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) num_tc++; /* populate max_txqs per TC */ max_txqs[i] = vsi->alloc_txq; + /* Update max_txqs if it is CHNL VSI, because alloc_t[r]xq are + * zero for CHNL VSI, hence use num_txq instead as max_txqs + */ + if (vsi->type == ICE_VSI_CHNL && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + max_txqs[i] = vsi->num_txq; } vsi->tc_cfg.ena_tc = ena_tc; @@ -3407,7 +3685,11 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) ctx->vf_num = 0; ctx->info = vsi->info; - ice_vsi_setup_q_map(vsi, ctx); + if (vsi->type == ICE_VSI_PF && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc); + else + ice_vsi_setup_q_map(vsi, ctx); /* must to indicate which section of VSI context are being modified */ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); @@ -3418,8 +3700,13 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) goto out; } - status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, - max_txqs); + if (vsi->type == ICE_VSI_PF && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, + max_txqs); + else + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, + vsi->tc_cfg.ena_tc, max_txqs); if (status) { dev_err(dev, "VSI %d failed TC config, error %s\n", @@ -3435,7 +3722,6 @@ out: kfree(ctx); return ret; } -#endif /* CONFIG_DCB */ /** * ice_update_ring_stats - Update ring statistics diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index c79fcbf82d8f..b44ceffe40bf 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -51,13 +51,18 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); int ice_set_link(struct ice_vsi *vsi, bool ena); -#ifdef CONFIG_DCB +void ice_vsi_delete(struct ice_vsi *vsi); +int ice_vsi_clear(struct ice_vsi *vsi); + int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc); -#endif /* CONFIG_DCB */ + +int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi); + +void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, u16 vf_id); + enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch); void ice_napi_del(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 846623a97723..20c912842ad0 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3240,7 +3240,7 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) static struct ice_vsi * ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID, NULL); } /** @@ -3254,7 +3254,7 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) static struct ice_vsi * ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID, NULL); } /** @@ -3268,7 +3268,7 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi * ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID, NULL); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 560e52b99f83..fe36c3b5eb4c 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -2998,6 +2998,43 @@ static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw) } } +/** + * ice_sched_save_vsi_bw - save VSI node's BW information + * @pi: port information structure + * @vsi_handle: sw VSI handle + * @tc: traffic class + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save BW information of VSI type node for post replay use. + */ +static int +ice_sched_save_vsi_bw(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + struct ice_vsi_ctx *vsi_ctx; + + if (!ice_is_vsi_valid(pi->hw, vsi_handle)) + return -EINVAL; + vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle); + if (!vsi_ctx) + return -EINVAL; + switch (rl_type) { + case ICE_MIN_BW: + ice_set_clear_cir_bw(&vsi_ctx->sched.bw_t_info[tc], bw); + break; + case ICE_MAX_BW: + ice_set_clear_eir_bw(&vsi_ctx->sched.bw_t_info[tc], bw); + break; + case ICE_SHARED_BW: + ice_set_clear_shared_bw(&vsi_ctx->sched.bw_t_info[tc], bw); + break; + default: + return -EINVAL; + } + return 0; +} + /** * ice_sched_calc_wakeup - calculate RL profile wakeup parameter * @hw: pointer to the HW struct @@ -3875,9 +3912,17 @@ enum ice_status ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, enum ice_rl_type rl_type, u32 bw) { - return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, - ICE_AGG_TYPE_VSI, - tc, rl_type, bw); + int status; + + status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, bw); + if (!status) { + mutex_lock(&pi->sched_lock); + status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type, bw); + mutex_unlock(&pi->sched_lock); + } + return status; } /** @@ -3894,10 +3939,19 @@ enum ice_status ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, enum ice_rl_type rl_type) { - return ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, - ICE_AGG_TYPE_VSI, - tc, rl_type, - ICE_SCHED_DFLT_BW); + int status; + + status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, + ICE_SCHED_DFLT_BW); + if (!status) { + mutex_lock(&pi->sched_lock); + status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type, + ICE_SCHED_DFLT_BW); + mutex_unlock(&pi->sched_lock); + } + return status; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index f89b80ba3499..551e13e46653 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -58,6 +58,8 @@ struct ice_sched_agg_info { DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS); u32 agg_id; enum ice_agg_type agg_type; + /* bw_t_info saves aggregator BW information */ + struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS]; /* save aggregator TC bitmap */ DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS); }; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index c759a02bfce4..c56dd1749903 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -287,6 +287,7 @@ struct ice_rx_ring { struct rcu_head rcu; /* to avoid race on free */ /* CL4 - 3rd cacheline starts here */ + struct ice_channel *ch; struct bpf_prog *xdp_prog; struct ice_tx_ring *xdp_ring; struct xsk_buff_pool *xsk_pool; @@ -328,6 +329,7 @@ struct ice_tx_ring { /* CL3 - 3rd cacheline starts here */ struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ + struct ice_channel *ch; struct ice_ptp_tx *tx_tstamps; spinlock_t tx_lock; u32 txq_teid; /* Added Tx queue TEID */ @@ -352,6 +354,11 @@ static inline void ice_clear_ring_build_skb_ena(struct ice_rx_ring *ring) ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB; } +static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring) +{ + return !!ring->ch; +} + static inline bool ice_ring_is_xdp(struct ice_tx_ring *ring) { return !!(ring->flags & ICE_TX_FLAGS_RING_XDP); diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index d5cb1c5a89c0..9e0c2923c62e 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -138,6 +138,7 @@ enum ice_vsi_type { ICE_VSI_PF = 0, ICE_VSI_VF = 1, ICE_VSI_CTRL = 3, /* equates to ICE_VSI_PF with 1 queue pair */ + ICE_VSI_CHNL = 4, ICE_VSI_LB = 6, ICE_VSI_SWITCHDEV_CTRL = 7, }; @@ -570,6 +571,8 @@ struct ice_sched_vsi_info { struct list_head list_entry; u16 max_lanq[ICE_MAX_TRAFFIC_CLASS]; u16 max_rdmaq[ICE_MAX_TRAFFIC_CLASS]; + /* bw_t_info saves VSI BW information */ + struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS]; }; /* driver defines the policy */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 8e3f9ec4e35b..a42eaf6f942e 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -832,7 +832,7 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id); + vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id, NULL); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n"); @@ -859,7 +859,7 @@ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id); + vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id, NULL); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n"); ice_vf_ctrl_invalidate_vsi(vf); -- cgit v1.2.3-59-g8ed1b From fbc7b27af0f9fb181811424e29caf6825594a841 Mon Sep 17 00:00:00 2001 From: Kiran Patil Date: Fri, 15 Oct 2021 16:35:16 -0700 Subject: ice: enable ndo_setup_tc support for mqprio_qdisc Add support in driver for TC_QDISC_SETUP_MQPRIO. This support enables instantiation of channels in HW using existing MQPRIO infrastructure which is extended to be offloadable. This provides a mechanism to configure dedicated set of queues for each TC. Configuring channels using "tc mqprio": -------------------------------------- tc qdisc add dev root mqprio num_tc 3 map 0 1 2 \ queues 4@0 4@4 4@8 hw 1 mode channel Above command configures 3 TCs having 4 queues each. "hw 1 mode channel" implies offload of channel configuration to HW. When driver processes configuration received via "ndo_setup_tc: QDISC_SETUP_MQPRIO", each TC maps to HW VSI with specified queues. User can optionally specify bandwidth min and max rate limit per TC (see example below). If shaper params like min and/or max bandwidth rate limit are specified, driver configures VSI specific rate limiter in HW. Configuring channels and bandwidth shaper parameters using "tc mqprio": ---------------------------------------------------------------- tc qdisc add dev root mqprio \ num_tc 4 map 0 1 2 3 queues 4@0 4@4 4@8 4@12 hw 1 mode channel \ shaper bw_rlimit min_rate 1Gbit 2Gbit 3Gbit 4Gbit \ max_rate 4Gbit 5Gbit 6Gbit 7Gbit Command to view configured TCs: ----------------------------- tc qdisc show dev Deleting TCs: ------------ tc qdisc del dev root mqprio Signed-off-by: Kiran Patil Signed-off-by: Amritha Nambiar Signed-off-by: Sudheer Mogilappagari Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 21 + drivers/net/ethernet/intel/ice/ice_ethtool.c | 10 + drivers/net/ethernet/intel/ice/ice_lib.c | 2 +- drivers/net/ethernet/intel/ice/ice_lib.h | 1 + drivers/net/ethernet/intel/ice/ice_main.c | 889 ++++++++++++++++++++++++++- 5 files changed, 913 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index c2814e31c438..4f94ecff87e8 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -56,6 +56,7 @@ #include "ice_dcb.h" #include "ice_switch.h" #include "ice_common.h" +#include "ice_flow.h" #include "ice_sched.h" #include "ice_idc_int.h" #include "ice_virtchnl_pf.h" @@ -126,6 +127,13 @@ #define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i])) #define ICE_TX_FDIRDESC(R, i) (&(((struct ice_fltr_desc *)((R)->desc))[i])) +/* Minimum BW limit is 500 Kbps for any scheduler node */ +#define ICE_MIN_BW_LIMIT 500 +/* User can specify BW in either Kbit/Mbit/Gbit and OS converts it in bytes. + * use it to convert user specified BW limit into Kbps + */ +#define ICE_BW_KBPS_DIVISOR 125 + /* Macro for each VSI in a PF */ #define ice_for_each_vsi(pf, i) \ for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++) @@ -440,6 +448,8 @@ struct ice_q_vector { cpumask_t affinity_mask; struct irq_affinity_notify affinity_notify; + struct ice_channel *ch; + char name[ICE_INT_NAME_STR_LEN]; u16 total_events; /* net_dim(): number of interrupts processed */ @@ -595,6 +605,17 @@ struct ice_netdev_priv { struct ice_repr *repr; }; +/** + * ice_vector_ch_enabled + * @qv: pointer to q_vector, can be NULL + * + * This function returns true if vector is channel enabled otherwise false + */ +static inline bool ice_vector_ch_enabled(struct ice_q_vector *qv) +{ + return !!qv->ch; /* Enable it to run with TC */ +} + /** * ice_irq_dynamic_ena - Enable default interrupt generation settings * @hw: pointer to HW struct diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 8b3eef6632e9..cfe96a127ed4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3194,6 +3194,11 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, return -EIO; } + if (ice_is_adq_active(pf)) { + netdev_err(netdev, "Cannot change RSS params with ADQ configured.\n"); + return -EOPNOTSUPP; + } + if (key) { if (!vsi->rss_hkey_user) { vsi->rss_hkey_user = @@ -3404,6 +3409,11 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) if (ch->other_count != (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1U : 0U)) return -EINVAL; + if (ice_is_adq_active(pf)) { + netdev_err(dev, "Cannot set channels with ADQ configured.\n"); + return -EOPNOTSUPP; + } + if (test_bit(ICE_FLAG_FD_ENA, pf->flags) && pf->hw.fdir_active_fltr) { netdev_err(dev, "Cannot set channels when Flow Director filters are active\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 62adbb39670b..4904ae088daa 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3954,7 +3954,7 @@ int ice_get_link_speed_mbps(struct ice_vsi *vsi) * * Return current VSI link speed and 0 if the speed is unknown. */ -static int ice_get_link_speed_kbps(struct ice_vsi *vsi) +int ice_get_link_speed_kbps(struct ice_vsi *vsi) { int speed_mbps; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index b44ceffe40bf..e7f4ecbb8549 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -124,6 +124,7 @@ int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); int ice_clear_dflt_vsi(struct ice_sw *sw); int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate); int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate); +int ice_get_link_speed_kbps(struct ice_vsi *vsi); int ice_get_link_speed_mbps(struct ice_vsi *vsi); int ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 20c912842ad0..ff2d000bbbab 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -55,6 +55,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type); static void ice_vsi_release_all(struct ice_pf *pf); +static int ice_rebuild_channels(struct ice_pf *pf); +static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr); + bool netif_is_ice(struct net_device *dev) { return dev && (dev->netdev_ops == &ice_netdev_ops); @@ -106,7 +109,12 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) ice_for_each_txq(vsi, i) { struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; - if (tx_ring && tx_ring->desc) { + if (!tx_ring) + continue; + if (ice_ring_ch_enabled(tx_ring)) + continue; + + if (tx_ring->desc) { /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this * queue. @@ -458,17 +466,21 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) } /** - * ice_prepare_for_reset - prep for the core to reset + * ice_prepare_for_reset - prep for reset * @pf: board private structure + * @reset_type: reset type requested * * Inform or close all dependent features in prep for reset. */ static void -ice_prepare_for_reset(struct ice_pf *pf) +ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { struct ice_hw *hw = &pf->hw; + struct ice_vsi *vsi; unsigned int i; + dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type); + /* already prepared for reset */ if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; @@ -483,6 +495,38 @@ ice_prepare_for_reset(struct ice_pf *pf) ice_for_each_vf(pf, i) ice_set_vf_state_qs_dis(&pf->vf[i]); + /* release ADQ specific HW and SW resources */ + vsi = ice_get_main_vsi(pf); + if (!vsi) + goto skip; + + /* to be on safe side, reset orig_rss_size so that normal flow + * of deciding rss_size can take precedence + */ + vsi->orig_rss_size = 0; + + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { + if (reset_type == ICE_RESET_PFR) { + vsi->old_ena_tc = vsi->all_enatc; + vsi->old_numtc = vsi->all_numtc; + } else { + ice_remove_q_channels(vsi, true); + + /* for other reset type, do not support channel rebuild + * hence reset needed info + */ + vsi->old_ena_tc = 0; + vsi->all_enatc = 0; + vsi->old_numtc = 0; + vsi->all_numtc = 0; + vsi->req_txq = 0; + vsi->req_rxq = 0; + clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt)); + } + } +skip: + /* clear SW filtering DB */ ice_clear_hw_tbls(hw); /* disable the VSIs and their queues that are not already DOWN */ @@ -502,8 +546,7 @@ ice_prepare_for_reset(struct ice_pf *pf) /** * ice_do_reset - Initiate one of many types of resets * @pf: board private structure - * @reset_type: reset type requested - * before this function was called. + * @reset_type: reset type requested before this function was called. */ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { @@ -512,7 +555,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) dev_dbg(dev, "reset_type 0x%x requested\n", reset_type); - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, reset_type); /* trigger the reset */ if (ice_reset(hw, reset_type)) { @@ -570,7 +613,7 @@ static void ice_reset_subtask(struct ice_pf *pf) /* return if no valid reset type requested */ if (reset_type == ICE_RESET_INVAL) return; - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, reset_type); /* make sure we are ready to rebuild */ if (ice_check_reset(&pf->hw)) { @@ -3243,6 +3286,13 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID, NULL); } +static struct ice_vsi * +ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, + struct ice_channel *ch) +{ + return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, ICE_INVAL_VFID, ch); +} + /** * ice_ctrl_vsi_setup - Set up a control VSI * @pf: board private structure @@ -3361,6 +3411,9 @@ static int ice_setup_pf_sw(struct ice_pf *pf) if (!vsi) return -ENOMEM; + /* init channel list */ + INIT_LIST_HEAD(&vsi->ch_list); + status = ice_cfg_netdev(vsi); if (status) { status = -ENODEV; @@ -4956,7 +5009,7 @@ ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err) if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { set_bit(ICE_PFR_REQ, pf->state); - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, ICE_RESET_PFR); } } @@ -5048,7 +5101,7 @@ static void ice_pci_err_reset_prepare(struct pci_dev *pdev) if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { set_bit(ICE_PFR_REQ, pf->state); - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, ICE_RESET_PFR); } } } @@ -5453,6 +5506,11 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) ice_clear_arfs(vsi); } + /* don't turn off hw_tc_offload when ADQ is already enabled */ + if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) { + dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n"); + return -EACCES; + } return ret; } @@ -6307,6 +6365,9 @@ static void ice_vsi_release_all(struct ice_pf *pf) if (!pf->vsi[i]) continue; + if (pf->vsi[i]->type == ICE_VSI_CHNL) + continue; + err = ice_vsi_release(pf->vsi[i]); if (err) dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n", @@ -6517,6 +6578,15 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_vsi_rebuild; } + if (reset_type == ICE_RESET_PFR) { + err = ice_rebuild_channels(pf); + if (err) { + dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n", + err); + goto err_vsi_rebuild; + } + } + /* If Flow Director is active */ if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) { err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL); @@ -7183,6 +7253,799 @@ ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) } } +/** + * ice_validate_mqprio_qopt - Validate TCF input parameters + * @vsi: Pointer to VSI + * @mqprio_qopt: input parameters for mqprio queue configuration + * + * This function validates MQPRIO params, such as qcount (power of 2 wherever + * needed), and make sure user doesn't specify qcount and BW rate limit + * for TCs, which are more than "num_tc" + */ +static int +ice_validate_mqprio_qopt(struct ice_vsi *vsi, + struct tc_mqprio_qopt_offload *mqprio_qopt) +{ + u64 sum_max_rate = 0, sum_min_rate = 0; + int non_power_of_2_qcount = 0; + struct ice_pf *pf = vsi->back; + int max_rss_q_cnt = 0; + struct device *dev; + int i, speed; + u8 num_tc; + + if (vsi->type != ICE_VSI_PF) + return -EINVAL; + + if (mqprio_qopt->qopt.offset[0] != 0 || + mqprio_qopt->qopt.num_tc < 1 || + mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC) + return -EINVAL; + + dev = ice_pf_to_dev(pf); + vsi->ch_rss_size = 0; + num_tc = mqprio_qopt->qopt.num_tc; + + for (i = 0; num_tc; i++) { + int qcount = mqprio_qopt->qopt.count[i]; + u64 max_rate, min_rate, rem; + + if (!qcount) + return -EINVAL; + + if (is_power_of_2(qcount)) { + if (non_power_of_2_qcount && + qcount > non_power_of_2_qcount) { + dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n", + qcount, non_power_of_2_qcount); + return -EINVAL; + } + if (qcount > max_rss_q_cnt) + max_rss_q_cnt = qcount; + } else { + if (non_power_of_2_qcount && + qcount != non_power_of_2_qcount) { + dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n", + qcount, non_power_of_2_qcount); + return -EINVAL; + } + if (qcount < max_rss_q_cnt) { + dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n", + qcount, max_rss_q_cnt); + return -EINVAL; + } + max_rss_q_cnt = qcount; + non_power_of_2_qcount = qcount; + } + + /* TC command takes input in K/N/Gbps or K/M/Gbit etc but + * converts the bandwidth rate limit into Bytes/s when + * passing it down to the driver. So convert input bandwidth + * from Bytes/s to Kbps + */ + max_rate = mqprio_qopt->max_rate[i]; + max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR); + sum_max_rate += max_rate; + + /* min_rate is minimum guaranteed rate and it can't be zero */ + min_rate = mqprio_qopt->min_rate[i]; + min_rate = div_u64(min_rate, ICE_BW_KBPS_DIVISOR); + sum_min_rate += min_rate; + + if (min_rate && min_rate < ICE_MIN_BW_LIMIT) { + dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i, + min_rate, ICE_MIN_BW_LIMIT); + return -EINVAL; + } + + iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem); + if (rem) { + dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps", + i, ICE_MIN_BW_LIMIT); + return -EINVAL; + } + + iter_div_u64_rem(max_rate, ICE_MIN_BW_LIMIT, &rem); + if (rem) { + dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps", + i, ICE_MIN_BW_LIMIT); + return -EINVAL; + } + + /* min_rate can't be more than max_rate, except when max_rate + * is zero (implies max_rate sought is max line rate). In such + * a case min_rate can be more than max. + */ + if (max_rate && min_rate > max_rate) { + dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n", + min_rate, max_rate); + return -EINVAL; + } + + if (i >= mqprio_qopt->qopt.num_tc - 1) + break; + if (mqprio_qopt->qopt.offset[i + 1] != + (mqprio_qopt->qopt.offset[i] + qcount)) + return -EINVAL; + } + if (vsi->num_rxq < + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) + return -EINVAL; + if (vsi->num_txq < + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) + return -EINVAL; + + speed = ice_get_link_speed_kbps(vsi); + if (sum_max_rate && sum_max_rate > (u64)speed) { + dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n", + sum_max_rate, speed); + return -EINVAL; + } + if (sum_min_rate && sum_min_rate > (u64)speed) { + dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n", + sum_min_rate, speed); + return -EINVAL; + } + + /* make sure vsi->ch_rss_size is set correctly based on TC's qcount */ + vsi->ch_rss_size = max_rss_q_cnt; + + return 0; +} + +/** + * ice_add_channel - add a channel by adding VSI + * @pf: ptr to PF device + * @sw_id: underlying HW switching element ID + * @ch: ptr to channel structure + * + * Add a channel (VSI) using add_vsi and queue_map + */ +static int ice_add_channel(struct ice_pf *pf, u16 sw_id, struct ice_channel *ch) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_vsi *vsi; + + if (ch->type != ICE_VSI_CHNL) { + dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type); + return -EINVAL; + } + + vsi = ice_chnl_vsi_setup(pf, pf->hw.port_info, ch); + if (!vsi || vsi->type != ICE_VSI_CHNL) { + dev_err(dev, "create chnl VSI failure\n"); + return -EINVAL; + } + + ch->sw_id = sw_id; + ch->vsi_num = vsi->vsi_num; + ch->info.mapping_flags = vsi->info.mapping_flags; + ch->ch_vsi = vsi; + /* set the back pointer of channel for newly created VSI */ + vsi->ch = ch; + + memcpy(&ch->info.q_mapping, &vsi->info.q_mapping, + sizeof(vsi->info.q_mapping)); + memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping, + sizeof(vsi->info.tc_mapping)); + + return 0; +} + +/** + * ice_chnl_cfg_res + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * + * Configure channel specific resources such as rings, vector. + */ +static void ice_chnl_cfg_res(struct ice_vsi *vsi, struct ice_channel *ch) +{ + int i; + + for (i = 0; i < ch->num_txq; i++) { + struct ice_q_vector *tx_q_vector, *rx_q_vector; + struct ice_ring_container *rc; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; + + tx_ring = vsi->tx_rings[ch->base_q + i]; + rx_ring = vsi->rx_rings[ch->base_q + i]; + if (!tx_ring || !rx_ring) + continue; + + /* setup ring being channel enabled */ + tx_ring->ch = ch; + rx_ring->ch = ch; + + /* following code block sets up vector specific attributes */ + tx_q_vector = tx_ring->q_vector; + rx_q_vector = rx_ring->q_vector; + if (!tx_q_vector && !rx_q_vector) + continue; + + if (tx_q_vector) { + tx_q_vector->ch = ch; + /* setup Tx and Rx ITR setting if DIM is off */ + rc = &tx_q_vector->tx; + if (!ITR_IS_DYNAMIC(rc)) + ice_write_itr(rc, rc->itr_setting); + } + if (rx_q_vector) { + rx_q_vector->ch = ch; + /* setup Tx and Rx ITR setting if DIM is off */ + rc = &rx_q_vector->rx; + if (!ITR_IS_DYNAMIC(rc)) + ice_write_itr(rc, rc->itr_setting); + } + } + + /* it is safe to assume that, if channel has non-zero num_t[r]xq, then + * GLINT_ITR register would have written to perform in-context + * update, hence perform flush + */ + if (ch->num_txq || ch->num_rxq) + ice_flush(&vsi->back->hw); +} + +/** + * ice_cfg_chnl_all_res - configure channel resources + * @vsi: pte to main_vsi + * @ch: ptr to channel structure + * + * This function configures channel specific resources such as flow-director + * counter index, and other resources such as queues, vectors, ITR settings + */ +static void +ice_cfg_chnl_all_res(struct ice_vsi *vsi, struct ice_channel *ch) +{ + /* configure channel (aka ADQ) resources such as queues, vectors, + * ITR settings for channel specific vectors and anything else + */ + ice_chnl_cfg_res(vsi, ch); +} + +/** + * ice_setup_hw_channel - setup new channel + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * @sw_id: underlying HW switching element ID + * @type: type of channel to be created (VMDq2/VF) + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and configures Tx rings accordingly + */ +static int +ice_setup_hw_channel(struct ice_pf *pf, struct ice_vsi *vsi, + struct ice_channel *ch, u16 sw_id, u8 type) +{ + struct device *dev = ice_pf_to_dev(pf); + int ret; + + ch->base_q = vsi->next_base_q; + ch->type = type; + + ret = ice_add_channel(pf, sw_id, ch); + if (ret) { + dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id); + return ret; + } + + /* configure/setup ADQ specific resources */ + ice_cfg_chnl_all_res(vsi, ch); + + /* make sure to update the next_base_q so that subsequent channel's + * (aka ADQ) VSI queue map is correct + */ + vsi->next_base_q = vsi->next_base_q + ch->num_rxq; + dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num, + ch->num_rxq); + + return 0; +} + +/** + * ice_setup_channel - setup new channel using uplink element + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and uplink switching element + */ +static bool +ice_setup_channel(struct ice_pf *pf, struct ice_vsi *vsi, + struct ice_channel *ch) +{ + struct device *dev = ice_pf_to_dev(pf); + u16 sw_id; + int ret; + + if (vsi->type != ICE_VSI_PF) { + dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type); + return false; + } + + sw_id = pf->first_sw->sw_id; + + /* create channel (VSI) */ + ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, ICE_VSI_CHNL); + if (ret) { + dev_err(dev, "failed to setup hw_channel\n"); + return false; + } + dev_dbg(dev, "successfully created channel()\n"); + + return ch->ch_vsi ? true : false; +} + +/** + * ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate + * @vsi: VSI to be configured + * @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit + * @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit + */ +static int +ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate) +{ + int err; + + err = ice_set_min_bw_limit(vsi, min_tx_rate); + if (err) + return err; + + return ice_set_max_bw_limit(vsi, max_tx_rate); +} + +/** + * ice_create_q_channel - function to create channel + * @vsi: VSI to be configured + * @ch: ptr to channel (it contains channel specific params) + * + * This function creates channel (VSI) using num_queues specified by user, + * reconfigs RSS if needed. + */ +static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch) +{ + struct ice_pf *pf = vsi->back; + struct device *dev; + + if (!ch) + return -EINVAL; + + dev = ice_pf_to_dev(pf); + if (!ch->num_txq || !ch->num_rxq) { + dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq); + return -EINVAL; + } + + if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_txq) { + dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n", + vsi->cnt_q_avail, ch->num_txq); + return -EINVAL; + } + + if (!ice_setup_channel(pf, vsi, ch)) { + dev_info(dev, "Failed to setup channel\n"); + return -EINVAL; + } + /* configure BW rate limit */ + if (ch->ch_vsi && (ch->max_tx_rate || ch->min_tx_rate)) { + int ret; + + ret = ice_set_bw_limit(ch->ch_vsi, ch->max_tx_rate, + ch->min_tx_rate); + if (ret) + dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n", + ch->max_tx_rate, ch->ch_vsi->vsi_num); + else + dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n", + ch->max_tx_rate, ch->ch_vsi->vsi_num); + } + + vsi->cnt_q_avail -= ch->num_txq; + + return 0; +} + +/** + * ice_remove_q_channels - Remove queue channels for the TCs + * @vsi: VSI to be configured + * @rem_fltr: delete advanced switch filter or not + * + * Remove queue channels for the TCs + */ +static void ice_remove_q_channels(struct ice_vsi *vsi, bool __maybe_unused rem_fltr) +{ + struct ice_channel *ch, *ch_tmp; + int i; + + /* perform cleanup for channels if they exist */ + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + struct ice_vsi *ch_vsi; + + list_del(&ch->list); + ch_vsi = ch->ch_vsi; + if (!ch_vsi) { + kfree(ch); + continue; + } + + /* Reset queue contexts */ + for (i = 0; i < ch->num_rxq; i++) { + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; + + tx_ring = vsi->tx_rings[ch->base_q + i]; + rx_ring = vsi->rx_rings[ch->base_q + i]; + if (tx_ring) { + tx_ring->ch = NULL; + if (tx_ring->q_vector) + tx_ring->q_vector->ch = NULL; + } + if (rx_ring) { + rx_ring->ch = NULL; + if (rx_ring->q_vector) + rx_ring->q_vector->ch = NULL; + } + } + + /* clear the VSI from scheduler tree */ + ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx); + + /* Delete VSI from FW */ + ice_vsi_delete(ch->ch_vsi); + + /* Delete VSI from PF and HW VSI arrays */ + ice_vsi_clear(ch->ch_vsi); + + /* free the channel */ + kfree(ch); + } + + /* clear the channel VSI map which is stored in main VSI */ + ice_for_each_chnl_tc(i) + vsi->tc_map_vsi[i] = NULL; + + /* reset main VSI's all TC information */ + vsi->all_enatc = 0; + vsi->all_numtc = 0; +} + +/** + * ice_rebuild_channels - rebuild channel + * @pf: ptr to PF + * + * Recreate channel VSIs and replay filters + */ +static int ice_rebuild_channels(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_vsi *main_vsi; + bool rem_adv_fltr = true; + struct ice_channel *ch; + struct ice_vsi *vsi; + int tc_idx = 1; + int i, err; + + main_vsi = ice_get_main_vsi(pf); + if (!main_vsi) + return 0; + + if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) || + main_vsi->old_numtc == 1) + return 0; /* nothing to be done */ + + /* reconfigure main VSI based on old value of TC and cached values + * for MQPRIO opts + */ + err = ice_vsi_cfg_tc(main_vsi, main_vsi->old_ena_tc); + if (err) { + dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n", + main_vsi->old_ena_tc, main_vsi->vsi_num); + return err; + } + + /* rebuild ADQ VSIs */ + ice_for_each_vsi(pf, i) { + enum ice_vsi_type type; + + vsi = pf->vsi[i]; + if (!vsi || vsi->type != ICE_VSI_CHNL) + continue; + + type = vsi->type; + + /* rebuild ADQ VSI */ + err = ice_vsi_rebuild(vsi, true); + if (err) { + dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n", + ice_vsi_type_str(type), vsi->idx, err); + goto cleanup; + } + + /* Re-map HW VSI number, using VSI handle that has been + * previously validated in ice_replay_vsi() call above + */ + vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx); + + /* replay filters for the VSI */ + err = ice_replay_vsi(&pf->hw, vsi->idx); + if (err) { + dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n", + ice_vsi_type_str(type), err, vsi->idx); + rem_adv_fltr = false; + goto cleanup; + } + dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n", + ice_vsi_type_str(type), vsi->idx); + + /* store ADQ VSI at correct TC index in main VSI's + * map of TC to VSI + */ + main_vsi->tc_map_vsi[tc_idx++] = vsi; + } + + /* ADQ VSI(s) has been rebuilt successfully, so setup + * channel for main VSI's Tx and Rx rings + */ + list_for_each_entry(ch, &main_vsi->ch_list, list) { + struct ice_vsi *ch_vsi; + + ch_vsi = ch->ch_vsi; + if (!ch_vsi) + continue; + + /* reconfig channel resources */ + ice_cfg_chnl_all_res(main_vsi, ch); + + /* replay BW rate limit if it is non-zero */ + if (!ch->max_tx_rate && !ch->min_tx_rate) + continue; + + err = ice_set_bw_limit(ch_vsi, ch->max_tx_rate, + ch->min_tx_rate); + if (err) + dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n", + err, ch->max_tx_rate, ch->min_tx_rate, + ch_vsi->vsi_num); + else + dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n", + ch->max_tx_rate, ch->min_tx_rate, + ch_vsi->vsi_num); + } + + /* reconfig RSS for main VSI */ + if (main_vsi->ch_rss_size) + ice_vsi_cfg_rss_lut_key(main_vsi); + + return 0; + +cleanup: + ice_remove_q_channels(main_vsi, rem_adv_fltr); + return err; +} + +/** + * ice_create_q_channels - Add queue channel for the given TCs + * @vsi: VSI to be configured + * + * Configures queue channel mapping to the given TCs + */ +static int ice_create_q_channels(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_channel *ch; + int ret = 0, i; + + ice_for_each_chnl_tc(i) { + if (!(vsi->all_enatc & BIT(i))) + continue; + + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + ret = -ENOMEM; + goto err_free; + } + INIT_LIST_HEAD(&ch->list); + ch->num_rxq = vsi->mqprio_qopt.qopt.count[i]; + ch->num_txq = vsi->mqprio_qopt.qopt.count[i]; + ch->base_q = vsi->mqprio_qopt.qopt.offset[i]; + ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i]; + ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i]; + + /* convert to Kbits/s */ + if (ch->max_tx_rate) + ch->max_tx_rate = div_u64(ch->max_tx_rate, + ICE_BW_KBPS_DIVISOR); + if (ch->min_tx_rate) + ch->min_tx_rate = div_u64(ch->min_tx_rate, + ICE_BW_KBPS_DIVISOR); + + ret = ice_create_q_channel(vsi, ch); + if (ret) { + dev_err(ice_pf_to_dev(pf), + "failed creating channel TC:%d\n", i); + kfree(ch); + goto err_free; + } + list_add_tail(&ch->list, &vsi->ch_list); + vsi->tc_map_vsi[i] = ch->ch_vsi; + dev_dbg(ice_pf_to_dev(pf), + "successfully created channel: VSI %pK\n", ch->ch_vsi); + } + return 0; + +err_free: + ice_remove_q_channels(vsi, false); + + return ret; +} + +/** + * ice_setup_tc_mqprio_qdisc - configure multiple traffic classes + * @netdev: net device to configure + * @type_data: TC offload data + */ +static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data) +{ + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u16 mode, ena_tc_qdisc = 0; + int cur_txq, cur_rxq; + u8 hw = 0, num_tcf; + struct device *dev; + int ret, i; + + dev = ice_pf_to_dev(pf); + num_tcf = mqprio_qopt->qopt.num_tc; + hw = mqprio_qopt->qopt.hw; + mode = mqprio_qopt->mode; + if (!hw) { + clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + vsi->ch_rss_size = 0; + memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); + goto config_tcf; + } + + /* Generate queue region map for number of TCF requested */ + for (i = 0; i < num_tcf; i++) + ena_tc_qdisc |= BIT(i); + + switch (mode) { + case TC_MQPRIO_MODE_CHANNEL: + + ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt); + if (ret) { + netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n", + ret); + return ret; + } + memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); + set_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + break; + default: + return -EINVAL; + } + +config_tcf: + + /* Requesting same TCF configuration as already enabled */ + if (ena_tc_qdisc == vsi->tc_cfg.ena_tc && + mode != TC_MQPRIO_MODE_CHANNEL) + return 0; + + /* Pause VSI queues */ + ice_dis_vsi(vsi, true); + + if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + ice_remove_q_channels(vsi, true); + + if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { + vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf), + num_online_cpus()); + vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf), + num_online_cpus()); + } else { + /* logic to rebuild VSI, same like ethtool -L */ + u16 offset = 0, qcount_tx = 0, qcount_rx = 0; + + for (i = 0; i < num_tcf; i++) { + if (!(ena_tc_qdisc & BIT(i))) + continue; + + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount_rx = vsi->mqprio_qopt.qopt.count[i]; + qcount_tx = vsi->mqprio_qopt.qopt.count[i]; + } + vsi->req_txq = offset + qcount_tx; + vsi->req_rxq = offset + qcount_rx; + + /* store away original rss_size info, so that it gets reused + * form ice_vsi_rebuild during tc-qdisc delete stage - to + * determine, what should be the rss_sizefor main VSI + */ + vsi->orig_rss_size = vsi->rss_size; + } + + /* save current values of Tx and Rx queues before calling VSI rebuild + * for fallback option + */ + cur_txq = vsi->num_txq; + cur_rxq = vsi->num_rxq; + + /* proceed with rebuild main VSI using correct number of queues */ + ret = ice_vsi_rebuild(vsi, false); + if (ret) { + /* fallback to current number of queues */ + dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n"); + vsi->req_txq = cur_txq; + vsi->req_rxq = cur_rxq; + clear_bit(ICE_RESET_FAILED, pf->state); + if (ice_vsi_rebuild(vsi, false)) { + dev_err(dev, "Rebuild of main VSI failed again\n"); + return ret; + } + } + + vsi->all_numtc = num_tcf; + vsi->all_enatc = ena_tc_qdisc; + ret = ice_vsi_cfg_tc(vsi, ena_tc_qdisc); + if (ret) { + netdev_err(netdev, "failed configuring TC for VSI id=%d\n", + vsi->vsi_num); + goto exit; + } + + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { + u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; + u64 min_tx_rate = vsi->mqprio_qopt.min_rate[0]; + + /* set TC0 rate limit if specified */ + if (max_tx_rate || min_tx_rate) { + /* convert to Kbits/s */ + if (max_tx_rate) + max_tx_rate = div_u64(max_tx_rate, ICE_BW_KBPS_DIVISOR); + if (min_tx_rate) + min_tx_rate = div_u64(min_tx_rate, ICE_BW_KBPS_DIVISOR); + + ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate); + if (!ret) { + dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n", + max_tx_rate, min_tx_rate, vsi->vsi_num); + } else { + dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n", + max_tx_rate, min_tx_rate, vsi->vsi_num); + goto exit; + } + } + ret = ice_create_q_channels(vsi); + if (ret) { + netdev_err(netdev, "failed configuring queue channels\n"); + goto exit; + } else { + netdev_dbg(netdev, "successfully configured channels\n"); + } + } + + if (vsi->ch_rss_size) + ice_vsi_cfg_rss_lut_key(vsi); + +exit: + /* if error, reset the all_numtc and all_enatc */ + if (ret) { + vsi->all_numtc = 0; + vsi->all_enatc = 0; + } + /* resume VSI */ + ice_ena_vsi(vsi, true); + + return ret; +} + static LIST_HEAD(ice_block_cb_list); static int @@ -7190,6 +8053,8 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + int err; switch (type) { case TC_SETUP_BLOCK: @@ -7197,6 +8062,12 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, &ice_block_cb_list, ice_setup_tc_block_cb, np, np, true); + case TC_SETUP_QDISC_MQPRIO: + /* setup traffic classifier for receive side */ + mutex_lock(&pf->tc_mutex); + err = ice_setup_tc_mqprio_qdisc(netdev, type_data); + mutex_unlock(&pf->tc_mutex); + return err; default: return -EOPNOTSUPP; } -- cgit v1.2.3-59-g8ed1b From 9fea749856d14c4713a2f5dee6f692aeaa2700b9 Mon Sep 17 00:00:00 2001 From: Kiran Patil Date: Fri, 15 Oct 2021 16:35:17 -0700 Subject: ice: Add tc-flower filter support for channel Add support to add/delete channel specific filter using tc-flower. For now, only supported action is "skip_sw hw_tc " Filter criteria is specific to channel and it can be combination of L3, L3+L4, L2+L4. Example: MATCH criteria Action --------------------------- src and/or dest IPv4[6]/mask -> Forward to "hw_tc " dest IPv4[6]/mask + dest L4 port -> Forward to "hw_tc " dest MAC + dest L4 port -> Forward to "hw_tc " src IPv4[6]/mask + src L4 port -> Forward to "hw_tc " src MAC + src L4 port -> Forward to "hw_tc " Adding tc-flower filter for channel using "hw_tc" ------------------------------------------------- tc qdisc add dev clsact Above two steps are only needed the first time when adding tc-flower filter. tc filter add dev protocol ip ingress prio 1 flower \ dst_ip 192.168.0.1/32 ip_proto tcp dst_port 5001 \ skip_sw hw_tc 1 tc filter show dev ingress filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 hw_tc 1 eth_type ipv4 ip_proto tcp dst_ip 192.168.0.1 dst_port 5001 skip_sw in_hw in_hw_count 1 Delete specific filter: ------------------------- tc filter del dev ingress pref 1 handle 0x1 flower Delete All filters: ------------------ tc filter del dev ingress Co-developed-by: Amritha Nambiar Signed-off-by: Amritha Nambiar Signed-off-by: Kiran Patil Signed-off-by: Sudheer Mogilappagari Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 9 +- drivers/net/ethernet/intel/ice/ice_main.c | 77 +++++++++- drivers/net/ethernet/intel/ice/ice_switch.c | 119 +++++++++++++++ drivers/net/ethernet/intel/ice/ice_switch.h | 2 + drivers/net/ethernet/intel/ice/ice_tc_lib.c | 217 +++++++++++++++++++++++++++- drivers/net/ethernet/intel/ice/ice_tc_lib.h | 22 +++ 6 files changed, 438 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 4f94ecff87e8..967a90efcb11 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -39,6 +39,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -402,6 +405,7 @@ struct ice_vsi { u16 num_chnl_rxq; u16 num_chnl_txq; u16 ch_rss_size; + u16 num_chnl_fltr; /* store away rss size info before configuring ADQ channels so that, * it can be used after tc-qdisc delete, to get back RSS setting as * they were before @@ -581,7 +585,10 @@ struct ice_pf { struct auxiliary_device *adev; int aux_idx; u32 sw_int_count; - + /* count of tc_flower filters specific to channel (aka where filter + * action is "hw_tc ") + */ + u16 num_dmac_chnl_fltrs; struct hlist_head tc_flower_fltr_list; __le64 nvm_phy_type_lo; /* NVM PHY type low */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ff2d000bbbab..cb82abd08a40 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5255,6 +5255,12 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return -EBUSY; } + if (ice_chnl_dmac_fltr_cnt(pf)) { + netdev_err(netdev, "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n", + mac); + return -EAGAIN; + } + netif_addr_lock_bh(netdev); ether_addr_copy(old_mac, netdev->dev_addr); /* change the netdev's MAC address */ @@ -5511,6 +5517,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n"); return -EACCES; } + + if ((features & NETIF_F_HW_TC) && + !(netdev->features & NETIF_F_HW_TC)) + set_bit(ICE_FLAG_CLS_FLOWER, pf->flags); + else + clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags); + return ret; } @@ -7649,6 +7662,57 @@ static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch) return 0; } +/** + * ice_rem_all_chnl_fltrs - removes all channel filters + * @pf: ptr to PF, TC-flower based filter are tracked at PF level + * + * Remove all advanced switch filters only if they are channel specific + * tc-flower based filter + */ +static void ice_rem_all_chnl_fltrs(struct ice_pf *pf) +{ + struct ice_tc_flower_fltr *fltr; + struct hlist_node *node; + + /* to remove all channel filters, iterate an ordered list of filters */ + hlist_for_each_entry_safe(fltr, node, + &pf->tc_flower_fltr_list, + tc_flower_node) { + struct ice_rule_query_data rule; + int status; + + /* for now process only channel specific filters */ + if (!ice_is_chnl_fltr(fltr)) + continue; + + rule.rid = fltr->rid; + rule.rule_id = fltr->rule_id; + rule.vsi_handle = fltr->dest_id; + status = ice_rem_adv_rule_by_id(&pf->hw, &rule); + if (status) { + if (status == -ENOENT) + dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n", + rule.rule_id); + else + dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n", + status); + } else if (fltr->dest_vsi) { + /* update advanced switch filter count */ + if (fltr->dest_vsi->type == ICE_VSI_CHNL) { + u32 flags = fltr->flags; + + fltr->dest_vsi->num_chnl_fltr--; + if (flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_ENC_DST_MAC)) + pf->num_dmac_chnl_fltrs--; + } + } + + hlist_del(&fltr->tc_flower_node); + kfree(fltr); + } +} + /** * ice_remove_q_channels - Remove queue channels for the TCs * @vsi: VSI to be configured @@ -7656,11 +7720,16 @@ static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch) * * Remove queue channels for the TCs */ -static void ice_remove_q_channels(struct ice_vsi *vsi, bool __maybe_unused rem_fltr) +static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr) { struct ice_channel *ch, *ch_tmp; + struct ice_pf *pf = vsi->back; int i; + /* remove all tc-flower based filter if they are channel filters only */ + if (rem_fltr) + ice_rem_all_chnl_fltrs(pf); + /* perform cleanup for channels if they exist */ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { struct ice_vsi *ch_vsi; @@ -7926,6 +7995,12 @@ static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data) } memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); set_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + /* don't assume state of hw_tc_offload during driver load + * and set the flag for TC flower filter if hw_tc_offload + * already ON + */ + if (vsi->netdev->features & NETIF_F_HW_TC) + set_bit(ICE_FLAG_CLS_FLOWER, pf->flags); break; default: return -EINVAL; diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index a4a299012f9f..2742e1c1e337 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -2272,6 +2272,125 @@ exit: return status; } +/** + * ice_mac_fltr_exist - does this MAC filter exist for given VSI + * @hw: pointer to the hardware structure + * @mac: MAC address to be checked (for MAC filter) + * @vsi_handle: check MAC filter for this VSI + */ +bool ice_mac_fltr_exist(struct ice_hw *hw, u8 *mac, u16 vsi_handle) +{ + struct ice_fltr_mgmt_list_entry *entry; + struct list_head *rule_head; + struct ice_switch_info *sw; + struct mutex *rule_lock; /* Lock to protect filter rule list */ + u16 hw_vsi_id; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return false; + + hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); + sw = hw->switch_info; + rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; + if (!rule_head) + return false; + + rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock; + mutex_lock(rule_lock); + list_for_each_entry(entry, rule_head, list_entry) { + struct ice_fltr_info *f_info = &entry->fltr_info; + u8 *mac_addr = &f_info->l_data.mac.mac_addr[0]; + + if (is_zero_ether_addr(mac_addr)) + continue; + + if (f_info->flag != ICE_FLTR_TX || + f_info->src_id != ICE_SRC_ID_VSI || + f_info->lkup_type != ICE_SW_LKUP_MAC || + f_info->fltr_act != ICE_FWD_TO_VSI || + hw_vsi_id != f_info->fwd_id.hw_vsi_id) + continue; + + if (ether_addr_equal(mac, mac_addr)) { + mutex_unlock(rule_lock); + return true; + } + } + mutex_unlock(rule_lock); + return false; +} + +/** + * ice_vlan_fltr_exist - does this VLAN filter exist for given VSI + * @hw: pointer to the hardware structure + * @vlan_id: VLAN ID + * @vsi_handle: check MAC filter for this VSI + */ +bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle) +{ + struct ice_fltr_mgmt_list_entry *entry; + struct list_head *rule_head; + struct ice_switch_info *sw; + struct mutex *rule_lock; /* Lock to protect filter rule list */ + u16 hw_vsi_id; + + if (vlan_id > ICE_MAX_VLAN_ID) + return false; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return false; + + hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); + sw = hw->switch_info; + rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules; + if (!rule_head) + return false; + + rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock; + mutex_lock(rule_lock); + list_for_each_entry(entry, rule_head, list_entry) { + struct ice_fltr_info *f_info = &entry->fltr_info; + u16 entry_vlan_id = f_info->l_data.vlan.vlan_id; + struct ice_vsi_list_map_info *map_info; + + if (entry_vlan_id > ICE_MAX_VLAN_ID) + continue; + + if (f_info->flag != ICE_FLTR_TX || + f_info->src_id != ICE_SRC_ID_VSI || + f_info->lkup_type != ICE_SW_LKUP_VLAN) + continue; + + /* Only allowed filter action are FWD_TO_VSI/_VSI_LIST */ + if (f_info->fltr_act != ICE_FWD_TO_VSI && + f_info->fltr_act != ICE_FWD_TO_VSI_LIST) + continue; + + if (f_info->fltr_act == ICE_FWD_TO_VSI) { + if (hw_vsi_id != f_info->fwd_id.hw_vsi_id) + continue; + } else if (f_info->fltr_act == ICE_FWD_TO_VSI_LIST) { + /* If filter_action is FWD_TO_VSI_LIST, make sure + * that VSI being checked is part of VSI list + */ + if (entry->vsi_count == 1 && + entry->vsi_list_info) { + map_info = entry->vsi_list_info; + if (!test_bit(vsi_handle, map_info->vsi_map)) + continue; + } + } + + if (vlan_id == entry_vlan_id) { + mutex_unlock(rule_lock); + return true; + } + } + mutex_unlock(rule_lock); + + return false; +} + /** * ice_add_mac - Add a MAC address based filter rule * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index d4c0a3b594af..c4dd2062c469 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -335,6 +335,8 @@ enum ice_status ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list); int ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable); +bool ice_mac_fltr_exist(struct ice_hw *hw, u8 *mac, u16 vsi_handle); +bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle); void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle); enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list); diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index 1dccfd116bc9..725caa160b13 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -303,6 +303,136 @@ exit: return ret; } +/** + * ice_add_tc_flower_adv_fltr - add appropriate filter rules + * @vsi: Pointer to VSI + * @tc_fltr: Pointer to TC flower filter structure + * + * based on filter parameters using Advance recipes supported + * by OS package. + */ +static int +ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, + struct ice_tc_flower_fltr *tc_fltr) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers; + struct ice_adv_rule_info rule_info = {0}; + struct ice_rule_query_data rule_added; + struct ice_adv_lkup_elem *list; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 flags = tc_fltr->flags; + struct ice_vsi *ch_vsi; + struct device *dev; + u16 lkups_cnt = 0; + u16 l4_proto = 0; + int ret = 0; + u16 i = 0; + + dev = ice_pf_to_dev(pf); + if (ice_is_safe_mode(pf)) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because driver is in safe mode"); + return -EOPNOTSUPP; + } + + if (!flags || (flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 | + ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 | + ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | + ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT))) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unsupported encap field(s)"); + return -EOPNOTSUPP; + } + + /* get the channel (aka ADQ VSI) */ + if (tc_fltr->dest_vsi) + ch_vsi = tc_fltr->dest_vsi; + else + ch_vsi = vsi->tc_map_vsi[tc_fltr->action.tc_class]; + + lkups_cnt = ice_tc_count_lkups(flags, headers, tc_fltr); + list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); + if (!list) + return -ENOMEM; + + i = ice_tc_fill_rules(hw, flags, tc_fltr, list, &rule_info, &l4_proto); + if (i != lkups_cnt) { + ret = -EINVAL; + goto exit; + } + + rule_info.sw_act.fltr_act = tc_fltr->action.fltr_act; + if (tc_fltr->action.tc_class >= ICE_CHNL_START_TC) { + if (!ch_vsi) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because specified destination doesn't exist"); + ret = -EINVAL; + goto exit; + } + + rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI; + rule_info.sw_act.vsi_handle = ch_vsi->idx; + rule_info.priority = 7; + rule_info.sw_act.src = hw->pf_id; + rule_info.rx = true; + dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n", + tc_fltr->action.tc_class, + rule_info.sw_act.vsi_handle, lkups_cnt); + } else { + rule_info.sw_act.flag |= ICE_FLTR_TX; + rule_info.sw_act.src = vsi->idx; + rule_info.rx = false; + } + + /* specify the cookie as filter_rule_id */ + rule_info.fltr_rule_id = tc_fltr->cookie; + + ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added); + if (ret == -EEXIST) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, + "Unable to add filter because it already exist"); + ret = -EINVAL; + goto exit; + } else if (ret) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, + "Unable to add filter due to error"); + ret = -EIO; + goto exit; + } + + /* store the output params, which are needed later for removing + * advanced switch filter + */ + tc_fltr->rid = rule_added.rid; + tc_fltr->rule_id = rule_added.rule_id; + if (tc_fltr->action.tc_class > 0 && ch_vsi) { + /* For PF ADQ, VSI type is set as ICE_VSI_CHNL, and + * for PF ADQ filter, it is not yet set in tc_fltr, + * hence store the dest_vsi ptr in tc_fltr + */ + if (ch_vsi->type == ICE_VSI_CHNL) + tc_fltr->dest_vsi = ch_vsi; + /* keep track of advanced switch filter for + * destination VSI (channel VSI) + */ + ch_vsi->num_chnl_fltr++; + /* in this case, dest_id is VSI handle (sw handle) */ + tc_fltr->dest_id = rule_added.vsi_handle; + + /* keeps track of channel filters for PF VSI */ + if (vsi->type == ICE_VSI_PF && + (flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_ENC_DST_MAC))) + pf->num_dmac_chnl_fltrs++; + } + dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x) for TC %u, rid %u, rule_id %u, vsi_idx %u\n", + lkups_cnt, flags, + tc_fltr->action.tc_class, rule_added.rid, + rule_added.rule_id, rule_added.vsi_handle); +exit: + kfree(list); + return ret; +} + /** * ice_tc_set_ipv4 - Parse IPv4 addresses from TC flower filter * @match: Pointer to flow match structure @@ -561,10 +691,13 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, static int ice_add_switch_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) { + if (fltr->action.fltr_act == ICE_FWD_TO_QGRP) + return -EOPNOTSUPP; + if (ice_is_eswitch_mode_switchdev(vsi->back)) return ice_eswitch_add_tc_fltr(vsi, fltr); - return -EOPNOTSUPP; + return ice_add_tc_flower_adv_fltr(vsi, fltr); } /** @@ -581,6 +714,7 @@ ice_handle_tclass_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) { int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); + struct ice_vsi *main_vsi; if (tc < 0) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because specified destination is invalid"); @@ -591,13 +725,69 @@ ice_handle_tclass_action(struct ice_vsi *vsi, return -EINVAL; } - if (!(vsi->tc_cfg.ena_tc & BIT(tc))) { + if (!(vsi->all_enatc & BIT(tc))) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of non-existence destination"); return -EINVAL; } /* Redirect to a TC class or Queue Group */ - fltr->action.fltr_act = ICE_FWD_TO_QGRP; + main_vsi = ice_get_main_vsi(vsi->back); + if (!main_vsi || !main_vsi->netdev) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because of invalid netdevice"); + return -EINVAL; + } + + if ((fltr->flags & ICE_TC_FLWR_FIELD_TENANT_ID) && + (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_SRC_MAC))) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because filter using tunnel key and inner MAC is unsupported combination"); + return -EOPNOTSUPP; + } + + /* For ADQ, filter must include dest MAC address, otherwise unwanted + * packets with unrelated MAC address get delivered to ADQ VSIs as long + * as remaining filter criteria is satisfied such as dest IP address + * and dest/src L4 port. Following code is trying to handle: + * 1. For non-tunnel, if user specify MAC addresses, use them (means + * this code won't do anything + * 2. For non-tunnel, if user didn't specify MAC address, add implicit + * dest MAC to be lower netdev's active unicast MAC address + */ + if (!(fltr->flags & ICE_TC_FLWR_FIELD_DST_MAC)) { + ether_addr_copy(fltr->outer_headers.l2_key.dst_mac, + main_vsi->netdev->dev_addr); + eth_broadcast_addr(fltr->outer_headers.l2_mask.dst_mac); + fltr->flags |= ICE_TC_FLWR_FIELD_DST_MAC; + } + + /* validate specified dest MAC address, make sure either it belongs to + * lower netdev or any of MACVLAN. MACVLANs MAC address are added as + * unicast MAC filter destined to main VSI. + */ + if (!ice_mac_fltr_exist(&main_vsi->back->hw, + fltr->outer_headers.l2_key.dst_mac, + main_vsi->idx)) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because legacy MAC filter for specified destination doesn't exist"); + return -EINVAL; + } + + /* Make sure VLAN is already added to main VSI, before allowing ADQ to + * add a VLAN based filter such as MAC + VLAN + L4 port. + */ + if (fltr->flags & ICE_TC_FLWR_FIELD_VLAN) { + u16 vlan_id = be16_to_cpu(fltr->outer_headers.vlan_hdr.vlan_id); + + if (!ice_vlan_fltr_exist(&main_vsi->back->hw, vlan_id, + main_vsi->idx)) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because legacy VLAN filter for specified destination doesn't exist"); + return -EINVAL; + } + } + fltr->action.fltr_act = ICE_FWD_TO_VSI; fltr->action.tc_class = tc; return 0; @@ -639,8 +829,8 @@ ice_parse_tc_flower_actions(struct ice_vsi *vsi, /* Drop action */ if (act->id == FLOW_ACTION_DROP) { - fltr->action.fltr_act = ICE_DROP_PACKET; - return 0; + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action DROP"); + return -EINVAL; } fltr->action.fltr_act = ICE_FWD_TO_VSI; } @@ -673,6 +863,20 @@ static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) return -EIO; } + /* update advanced switch filter count for destination + * VSI if filter destination was VSI + */ + if (fltr->dest_vsi) { + if (fltr->dest_vsi->type == ICE_VSI_CHNL) { + fltr->dest_vsi->num_chnl_fltr--; + + /* keeps track of channel filters for PF VSI */ + if (vsi->type == ICE_VSI_PF && + (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_ENC_DST_MAC))) + pf->num_dmac_chnl_fltrs--; + } + } return 0; } @@ -811,7 +1015,8 @@ ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower) /* find filter */ fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie); if (!fltr) { - if (hlist_empty(&pf->tc_flower_fltr_list)) + if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) && + hlist_empty(&pf->tc_flower_fltr_list)) return 0; NL_SET_ERR_MSG_MOD(cls_flower->common.extack, "failed to delete TC flower filter because unable to find it"); diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index d90e9e37ae25..ee9b284fcc02 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -120,6 +120,28 @@ struct ice_tc_flower_fltr { struct netlink_ext_ack *extack; }; +/** + * ice_is_chnl_fltr - is this a valid channel filter + * @f: Pointer to tc-flower filter + * + * Criteria to determine of given filter is valid channel filter + * or not is based on its "destination". If destination is hw_tc (aka tc_class) + * and it is non-zero, then it is valid channel (aka ADQ) filter + */ +static inline bool ice_is_chnl_fltr(struct ice_tc_flower_fltr *f) +{ + return !!f->action.tc_class; +} + +/** + * ice_chnl_dmac_fltr_cnt - DMAC based CHNL filter count + * @pf: Pointer to PF + */ +static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf) +{ + return pf->num_dmac_chnl_fltrs; +} + int ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); -- cgit v1.2.3-59-g8ed1b