diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
45 files changed, 2869 insertions, 671 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 2391e3cfb56b..7845aa5bf6be 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -34,6 +34,7 @@ config MLX5_CORE_EN depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m select PAGE_POOL + select DIMLIB default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 90db891fcc22..8456b19d79cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,7 +13,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index a4cf123e3f17..ddf1b87f1bc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __field(u32, index) __field(u32, action) __field(u32, flow_tag) + __field(u32, flow_source) __field(u8, mask_enable) __field(int, new_fte) __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) @@ -204,7 +205,8 @@ TRACE_EVENT(mlx5_fs_set_fte, __entry->index = fte->index; __entry->action = fte->action.action; __entry->mask_enable = __entry->fg->mask.match_criteria_enable; - __entry->flow_tag = fte->action.flow_tag; + __entry->flow_tag = fte->flow_context.flow_tag; + __entry->flow_source = fte->flow_context.flow_source; memcpy(__entry->mask_outer, MLX5_ADDR_OF(fte_match_param, &__entry->fg->mask.match_criteria, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cb00e622c006..f0d77eb66acf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -48,7 +48,7 @@ #include <linux/rhashtable.h> #include <net/switchdev.h> #include <net/xdp.h> -#include <linux/net_dim.h> +#include <linux/dim.h> #include <linux/bits.h> #include "wq.h" #include "mlx5_core.h" @@ -245,9 +245,9 @@ struct mlx5e_params { u16 num_channels; u8 num_tc; bool rx_cqe_compress_def; - struct net_dim_cq_moder rx_cq_moderation; - struct net_dim_cq_moder tx_cq_moderation; bool tunneled_offload_en; + struct dim_cq_moder rx_cq_moderation; + struct dim_cq_moder tx_cq_moderation; bool lro_en; u8 tx_min_inline_mode; bool vlan_strip_disable; @@ -371,7 +371,7 @@ struct mlx5e_txqsq { /* dirtied @completion */ u16 cc; u32 dma_fifo_cc; - struct net_dim dim; /* Adaptive Moderation */ + struct dim dim; /* Adaptive Moderation */ /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; @@ -660,7 +660,7 @@ struct mlx5e_rq { int ix; unsigned int hw_mtu; - struct net_dim dim; /* Dynamic Interrupt Moderation */ + struct dim dim; /* Dynamic Interrupt Moderation */ /* XDP */ struct bpf_prog *xdp_prog; @@ -861,6 +861,7 @@ struct mlx5e_profile { void (*cleanup_tx)(struct mlx5e_priv *priv); void (*enable)(struct mlx5e_priv *priv); void (*disable)(struct mlx5e_priv *priv); + int (*update_rx)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); void (*update_carrier)(struct mlx5e_priv *priv); struct { @@ -973,7 +974,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq); struct mlx5e_cq_param; -int mlx5e_open_cq(struct mlx5e_channel *c, struct net_dim_cq_moder moder, +int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder, struct mlx5e_cq_param *param, struct mlx5e_cq *cq); void mlx5e_close_cq(struct mlx5e_cq *cq); @@ -1158,6 +1159,7 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); int mlx5e_create_tises(struct mlx5e_priv *priv); +int mlx5e_update_nic_rx(struct mlx5e_priv *priv); void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index f5ad531e1749..3739646b653f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -5,6 +5,7 @@ #include <net/gre.h> #include <net/geneve.h> #include "en/tc_tun.h" +#include "en_tc.h" struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) { @@ -47,7 +48,8 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, *route_dev = dev; if (is_vlan_dev(*route_dev)) *out_dev = uplink_dev; - else if (mlx5e_eswitch_rep(dev)) + else if (mlx5e_eswitch_rep(dev) && + mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) *out_dev = *route_dev; else return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 9b4d47c47c92..aaffa6f68dc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -61,7 +61,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_channel *c) { struct mlx5e_channel_param cparam = {}; - struct net_dim_cq_moder icocq_moder = {}; + struct dim_cq_moder icocq_moder = {}; int err; if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index d67adf70a97b..ca9cfbf57d8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -30,22 +30,22 @@ * SOFTWARE. */ -#include <linux/net_dim.h> +#include <linux/dim.h> #include "en.h" static void -mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, +mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) { mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } void mlx5e_rx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); @@ -53,9 +53,9 @@ void mlx5e_rx_dim_work(struct work_struct *work) void mlx5e_tx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 74235317d4dc..126ec4181286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -47,7 +47,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), + strlcpy(drvinfo->bus_info, dev_name(mdev->device), sizeof(drvinfo->bus_info)); } @@ -487,7 +487,7 @@ static int mlx5e_set_channels(struct net_device *dev, int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct net_dim_cq_moder *rx_moder, *tx_moder; + struct dim_cq_moder *rx_moder, *tx_moder; if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; @@ -542,7 +542,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct net_dim_cq_moder *rx_moder, *tx_moder; + struct dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; int err = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index ec5392baabc8..ea3a490b569a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -436,7 +436,7 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); - flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); if (IS_ERR(rule)) { err = PTR_ERR(rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 67b562c7f8a1..2f9093ba82aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -617,11 +617,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, switch (params->rx_cq_moderation.cq_period_mode) { case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; break; case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: default: - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } rq->page_cache.head = 0; @@ -1602,7 +1602,7 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq) mlx5_core_destroy_cq(cq->mdev, &cq->mcq); } -int mlx5e_open_cq(struct mlx5e_channel *c, struct net_dim_cq_moder moder, +int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder, struct mlx5e_cq_param *param, struct mlx5e_cq *cq) { struct mlx5_core_dev *mdev = c->mdev; @@ -1804,7 +1804,7 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_channel_param *cparam) { - struct net_dim_cq_moder icocq_moder = {0, 0}; + struct dim_cq_moder icocq_moder = {0, 0}; int err; err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); @@ -2230,7 +2230,7 @@ void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } void mlx5e_build_icosq_param(struct mlx5e_priv *priv, @@ -2939,7 +2939,7 @@ static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, if (hw_modify) hw_modify(priv); - mlx5e_refresh_tirs(priv, false); + priv->profile->update_rx(priv); mlx5e_activate_priv_channels(priv); /* return carrier back if needed */ @@ -2989,7 +2989,7 @@ int mlx5e_open_locked(struct net_device *netdev) if (err) goto err_clear_state_opened_flag; - mlx5e_refresh_tirs(priv, false); + priv->profile->update_rx(priv); mlx5e_activate_priv_channels(priv); if (priv->profile->update_carrier) priv->profile->update_carrier(priv); @@ -4612,9 +4612,9 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } -static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; @@ -4625,9 +4625,9 @@ static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) return moder; } -static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; @@ -4641,8 +4641,8 @@ static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) { return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? - NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE : - NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; } void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) @@ -4819,14 +4819,18 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->ethtool_ops = &mlx5e_ethtool_ops; netdev->vlan_features |= NETIF_F_SG; - netdev->vlan_features |= NETIF_F_IP_CSUM; - netdev->vlan_features |= NETIF_F_IPV6_CSUM; + netdev->vlan_features |= NETIF_F_HW_CSUM; netdev->vlan_features |= NETIF_F_GRO; netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_RXCSUM; netdev->vlan_features |= NETIF_F_RXHASH; + netdev->mpls_features |= NETIF_F_SG; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->mpls_features |= NETIF_F_TSO; + netdev->mpls_features |= NETIF_F_TSO6; + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; @@ -4842,8 +4846,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { - netdev->hw_enc_features |= NETIF_F_IP_CSUM; - netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; @@ -5141,6 +5144,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5_lag_remove(mdev); } +int mlx5e_update_nic_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, false); +} + static const struct mlx5e_profile mlx5e_nic_profile = { .init = mlx5e_nic_init, .cleanup = mlx5e_nic_cleanup, @@ -5150,6 +5158,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .enable = mlx5e_nic_enable, .disable = mlx5e_nic_disable, + .update_rx = mlx5e_update_nic_rx, .update_stats = mlx5e_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 8a7f60f30838..6a013a8c1150 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -391,30 +391,19 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { static int mlx5e_rep_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid) { - struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_upper = NULL; - struct mlx5e_priv *uplink_priv = NULL; - struct net_device *uplink_dev; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + u64 parent_id; + + priv = netdev_priv(dev); + esw = priv->mdev->priv.eswitch; if (esw->mode == SRIOV_NONE) return -EOPNOTSUPP; - uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - if (uplink_dev) { - uplink_upper = netdev_master_upper_dev_get(uplink_dev); - uplink_priv = netdev_priv(uplink_dev); - } - - ppid->id_len = ETH_ALEN; - if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { - ether_addr_copy(ppid->id, uplink_upper->dev_addr); - } else { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; - - ether_addr_copy(ppid->id, rep->hw_id); - } + parent_id = mlx5_query_nic_system_image_guid(priv->mdev); + ppid->id_len = sizeof(parent_id); + memcpy(ppid->id, &parent_id, sizeof(parent_id)); return 0; } @@ -1145,6 +1134,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, if (rep->vport == MLX5_VPORT_UPLINK) ret = snprintf(buf, len, "p%d", fn); + else if (rep->vport == MLX5_VPORT_PF) + ret = snprintf(buf, len, "pf%d", fn); else ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1); @@ -1636,6 +1627,11 @@ static void mlx5e_rep_enable(struct mlx5e_priv *priv) mlx5e_set_netdev_mtu_boundaries(priv); } +static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) +{ + return 0; +} + static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) { struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); @@ -1711,6 +1707,7 @@ static const struct mlx5e_profile mlx5e_rep_profile = { .init_tx = mlx5e_init_rep_tx, .cleanup_tx = mlx5e_cleanup_rep_tx, .enable = mlx5e_rep_enable, + .update_rx = mlx5e_update_rep_rx, .update_stats = mlx5e_rep_update_hw_counters, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, @@ -1726,6 +1723,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .cleanup_tx = mlx5e_cleanup_rep_tx, .enable = mlx5e_uplink_rep_enable, .disable = mlx5e_uplink_rep_disable, + .update_rx = mlx5e_update_rep_rx, .update_stats = mlx5e_uplink_rep_update_hw_counters, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index eb8433cc49a7..e6b199cd68ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -717,19 +717,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context; struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .flow_tag = attr->flow_tag, .reformat_id = 0, - .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND, + .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; bool table_created = false; int err, dest_ix = 0; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = attr->flow_tag; + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) { @@ -2799,6 +2802,16 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, return err; } +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev) +{ + if (is_merged_eswitch_dev(priv, out_dev)) + return true; + + return mlx5e_eswitch_rep(out_dev) && + same_hw_devs(priv, netdev_priv(out_dev)); +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, @@ -2864,9 +2877,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - if (netdev_port_same_parent_id(priv->netdev, - out_dev) || - is_merged_eswitch_dev(priv, out_dev)) { + if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); @@ -2883,6 +2894,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (err) return err; } + if (is_vlan_dev(parse_attr->filter_dev)) { err = add_vlan_pop_action(priv, attr, &action); @@ -2890,8 +2902,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return err; } - if (!mlx5e_eswitch_rep(out_dev)) + if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, out_dev->name); return -EOPNOTSUPP; + } out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index f62e81902d27..8f288cc53cee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -74,6 +74,9 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9ae327e80d6d..3c98b7fe2923 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -49,26 +49,24 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) { struct mlx5e_sq_stats *stats = sq->stats; - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) return; - net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes, - &dim_sample); + dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&sq->dim, dim_sample); } static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) { struct mlx5e_rq_stats *stats = rq->stats; - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) return; - net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes, - &dim_sample); + dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&rq->dim, dim_sample); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 5e9319d3d90c..58fff2f39b38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -61,17 +61,21 @@ enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; -struct mlx5_irq_info { - cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; - void *context; /* dev_id provided to request_irq */ +/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update + * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is + * used to set the EQ size, budget must be smaller than the EQ size. + */ +enum { + MLX5_EQ_POLLING_BUDGET = 128, }; +static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); + struct mlx5_eq_table { struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq cmd_eq; - struct mlx5_eq async_eq; + struct mlx5_eq_async pages_eq; + struct mlx5_eq_async cmd_eq; + struct mlx5_eq_async async_eq; struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; @@ -79,11 +83,8 @@ struct mlx5_eq_table { struct mlx5_nb cq_err_nb; struct mutex lock; /* sync async eqs creations */ - int num_comp_vectors; - struct mlx5_irq_info *irq_info; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif + int num_comp_eqs; + struct mlx5_irq_table *irq_table; }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -124,16 +125,24 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) return cq; } -static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) +static int mlx5_eq_comp_int(struct notifier_block *nb, + __always_unused unsigned long action, + __always_unused void *data) { - struct mlx5_eq_comp *eq_comp = eq_ptr; - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_comp *eq_comp = + container_of(nb, struct mlx5_eq_comp, irq_nb); + struct mlx5_eq *eq = &eq_comp->core; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; u32 cqn = -1; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've * checked the ownership bit. */ @@ -151,26 +160,16 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) } ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); if (cqn != -1) tasklet_schedule(&eq_comp->tasklet_ctx.task); - return IRQ_HANDLED; + return 0; } /* Some architectures don't latch interrupts when they are disabled, so using @@ -184,25 +183,32 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) disable_irq(eq->core.irqn); count_eqe = eq->core.cons_index; - mlx5_eq_comp_int(eq->core.irqn, eq); + mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); count_eqe = eq->core.cons_index - count_eqe; enable_irq(eq->core.irqn); return count_eqe; } -static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) +static int mlx5_eq_async_int(struct notifier_block *nb, + unsigned long action, void *data) { - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_async *eq_async = + container_of(nb, struct mlx5_eq_async, irq_nb); + struct mlx5_eq *eq = &eq_async->core; struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; dev = eq->dev; eqt = dev->priv.eq_table; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. @@ -217,23 +223,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); - return IRQ_HANDLED; + return 0; } static void init_eq_buf(struct mlx5_eq *eq) @@ -248,23 +244,19 @@ static void init_eq_buf(struct mlx5_eq *eq) } static int -create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - u8 vecidx = param->index; + u8 vecidx = param->irq_index; __be64 *pas; void *eqc; int inlen; u32 *in; int err; - if (eq_table->irq_info[vecidx].context) - return -EEXIST; - /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); spin_lock_init(&cq_table->lock); @@ -307,34 +299,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, if (err) goto err_in; - snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", - name, pci_name(dev->pdev)); - eq_table->irq_info[vecidx].context = param->context; - eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, param->handler, 0, - eq_table->irq_info[vecidx].name, param->context); - if (err) - goto err_eq; err = mlx5_debug_eq_add(dev, eq); if (err) - goto err_irq; - - /* EQs are created in ARMED state - */ - eq_update_ci(eq, 1); + goto err_eq; kvfree(in); return 0; -err_irq: - free_irq(eq->irqn, eq); - err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -346,18 +323,48 @@ err_buf: return err; } -static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +/** + * mlx5_eq_enable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to enable + * @nb - notifier call block + * mlx5_eq_enable - must be called after EQ is created in device. + */ +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; - struct mlx5_irq_info *irq_info; int err; - irq_info = &eq_table->irq_info[eq->vecidx]; + err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb); + if (!err) + eq_update_ci(eq, 1); - mlx5_debug_eq_remove(dev, eq); + return err; +} +EXPORT_SYMBOL(mlx5_eq_enable); - free_irq(eq->irqn, irq_info->context); - irq_info->context = NULL; +/** + * mlx5_eq_disable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to disable + * @nb - notifier call block + * mlx5_eq_disable - must be called before EQ is destroyed. + */ +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb); +} +EXPORT_SYMBOL(mlx5_eq_disable); + +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; + + mlx5_debug_eq_remove(dev, eq); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) @@ -423,6 +430,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + eq_table->irq_table = dev->priv.irq_table; return 0; kvfree_eq_table: @@ -439,19 +447,20 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ -static int create_async_eq(struct mlx5_core_dev *dev, const char *name, +static int create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; mutex_lock(&eq_table->lock); - if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { - err = -ENOSPC; + /* Async EQs must share irq index 0 */ + if (param->irq_index != 0) { + err = -EINVAL; goto unlock; } - err = create_map_eq(dev, eq, name, param); + err = create_map_eq(dev, eq, param); unlock: mutex_unlock(&eq_table->lock); return err; @@ -480,7 +489,7 @@ static int cq_err_event_notifier(struct notifier_block *nb, /* type == MLX5_EVENT_TYPE_CQ_ERROR */ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); - eq = &eqt->async_eq; + eq = &eqt->async_eq.core; eqe = data; cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; @@ -549,55 +558,73 @@ static int create_async_eqs(struct mlx5_core_dev *dev) MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); + table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_CMD_IDX, + .irq_index = 0, .mask = 1ull << MLX5_EVENT_TYPE_CMD, .nent = MLX5_NUM_CMD_EQE, - .context = &table->cmd_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); + err = create_async_eq(dev, &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; } - + err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err); + goto err1; + } mlx5_cmd_use_events(dev); + table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_ASYNC_IDX, + .irq_index = 0, .mask = gather_async_events_mask(dev), .nent = MLX5_NUM_ASYNC_EQE, - .context = &table->async_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); + err = create_async_eq(dev, &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); - goto err1; + goto err2; + } + err = mlx5_eq_enable(dev, &table->async_eq.core, + &table->async_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable async EQ %d\n", err); + goto err3; } + table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PAGEREQ_IDX, + .irq_index = 0, .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, .nent = /* TODO: sriov max_vf + */ 1, - .context = &table->pages_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); + err = create_async_eq(dev, &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); - goto err2; + goto err4; + } + err = mlx5_eq_enable(dev, &table->pages_eq.core, + &table->pages_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err); + goto err5; } return err; +err5: + destroy_async_eq(dev, &table->pages_eq.core); +err4: + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); +err3: + destroy_async_eq(dev, &table->async_eq.core); err2: - destroy_async_eq(dev, &table->async_eq); - -err1: mlx5_cmd_use_polling(dev); - destroy_async_eq(dev, &table->cmd_eq); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); +err1: + destroy_async_eq(dev, &table->cmd_eq.core); err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; @@ -608,19 +635,22 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; - err = destroy_async_eq(dev, &table->pages_eq); + mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb); + err = destroy_async_eq(dev, &table->pages_eq.core); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = destroy_async_eq(dev, &table->async_eq); + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); + err = destroy_async_eq(dev, &table->async_eq.core); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = destroy_async_eq(dev, &table->cmd_eq); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + err = destroy_async_eq(dev, &table->cmd_eq.core); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -630,24 +660,24 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) { - return &dev->priv.eq_table->async_eq; + return &dev->priv.eq_table->async_eq.core; } void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->async_eq.irqn); + synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); } void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); + synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); } /* Generic EQ API for mlx5_core consumers * Needed For RDMA ODP EQ for now */ struct mlx5_eq * -mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, +mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param) { struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); @@ -656,7 +686,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, if (!eq) return ERR_PTR(-ENOMEM); - err = create_async_eq(dev, name, eq, param); + err = create_async_eq(dev, eq, param); if (err) { kvfree(eq); eq = ERR_PTR(err); @@ -714,84 +744,14 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); -/* Completion EQs */ - -static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; - - if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - irq_info->mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, irq_info->mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(irq_info->mask); -} - -static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { - err = set_comp_irq_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - clear_comp_irq_affinity_hint(mdev, i); - - return err; -} - -static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) - clear_comp_irq_affinity_hint(mdev, i); -} - static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; - clear_comp_irqs_affinity_hints(dev); - -#ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; - } -#endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); if (destroy_unmap_eq(dev, &eq->core)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", eq->core.eqn); @@ -803,23 +763,17 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq_comp *eq; - int ncomp_vec; + int ncomp_eqs; int nent; int err; int i; INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; + ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; -#ifdef CONFIG_RFS_ACCEL - table->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!table->rmap) - return -ENOMEM; -#endif - for (i = 0; i < ncomp_vec; i++) { - int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + for (i = 0; i < ncomp_eqs; i++) { + int vecidx = i + MLX5_IRQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; eq = kzalloc(sizeof(*eq), GFP_KERNEL); @@ -834,33 +788,29 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, (unsigned long)&eq->tasklet_ctx); -#ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); -#endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .index = vecidx, + .irq_index = vecidx, .mask = 0, .nent = nent, - .context = &eq->core, - .handler = mlx5_eq_comp_int }; - err = create_map_eq(dev, &eq->core, name, ¶m); + err = create_map_eq(dev, &eq->core, ¶m); + if (err) { + kfree(eq); + goto clean; + } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); if (err) { + destroy_unmap_eq(dev, &eq->core); kfree(eq); goto clean; } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ list_add_tail(&eq->list, &table->comp_eqs_list); } - err = set_comp_irq_affinity_hints(dev); - if (err) { - mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); - goto clean; - } - return 0; clean: @@ -891,22 +841,24 @@ EXPORT_SYMBOL(mlx5_vector2eqn); unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->num_comp_vectors; + return dev->priv.eq_table->num_comp_eqs; } EXPORT_SYMBOL(mlx5_comp_vectors_count); struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - /* TODO: consider irq_get_affinity_mask(irq) */ - return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; + int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE; + + return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table, + vecidx); } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->rmap; + return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table); } #endif @@ -927,82 +879,19 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int i, max_eqs; - - clear_comp_irqs_affinity_hints(dev); - -#ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; - } -#endif mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; - for (i = max_eqs - 1; i >= 0; i--) { - if (!table->irq_info[i].context) - continue; - free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); - table->irq_info[i].context = NULL; - } + mlx5_irq_table_destroy(dev); mutex_unlock(&table->lock); - pci_free_irq_vectors(dev->pdev); -} - -static int alloc_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = priv->eq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); - int nvec; - int err; - - nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; - nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) - return -ENOMEM; - - table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); - if (!table->irq_info) - return -ENOMEM; - - nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, - nvec, PCI_IRQ_MSIX); - if (nvec < 0) { - err = nvec; - goto err_free_irq_info; - } - - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; - - return 0; - -err_free_irq_info: - kfree(table->irq_info); - return err; -} - -static void free_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - - pci_free_irq_vectors(dev->pdev); - kfree(priv->eq_table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; - err = alloc_irq_vectors(dev); - if (err) { - mlx5_core_err(dev, "alloc irq vectors failed\n"); - return err; - } + eq_table->num_comp_eqs = + mlx5_irq_get_num_comp(eq_table->irq_table); err = create_async_eqs(dev); if (err) { @@ -1020,7 +909,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) err_comp_eqs: destroy_async_eqs(dev); err_async_eqs: - free_irq_vectors(dev); return err; } @@ -1028,7 +916,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { destroy_comp_eqs(dev); destroy_async_eqs(dev); - free_irq_vectors(dev); } int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5414e8f82d5f..67e76979bb42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -134,6 +134,30 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *in, int inlen) +{ + return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen); +} + +static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *out, int outlen) +{ + return query_esw_vport_context_cmd(esw->dev, vport, out, outlen); +} + static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { @@ -939,7 +963,7 @@ int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS, - vport->vport); + mlx5_eswitch_vport_num_to_index(esw, vport->vport)); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport); return -EOPNOTSUPP; @@ -1057,7 +1081,7 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->vport); + mlx5_eswitch_vport_num_to_index(esw, vport->vport)); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport); return -EOPNOTSUPP; @@ -1168,6 +1192,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, vport->ingress.drop_rule = NULL; vport->ingress.allow_rule = NULL; + + esw_vport_del_ingress_acl_modify_metadata(esw, vport); } void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, @@ -1686,10 +1712,9 @@ static int eswitch_vport_event(struct notifier_block *nb, return NOTIFY_OK; } -static int query_esw_functions(struct mlx5_core_dev *dev, - u32 *out, int outlen) +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; MLX5_SET(query_esw_functions_in, in, opcode, MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); @@ -1697,22 +1722,6 @@ static int query_esw_functions(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } -int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {0}; - int err; - - err = query_esw_functions(dev, out, sizeof(out)); - if (err) - return err; - - *num_vfs = MLX5_GET(query_esw_functions_out, out, - host_params_context.host_num_of_vfs); - esw_debug(dev, "host_num_of_vfs=%d\n", *num_vfs); - - return 0; -} - /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) @@ -1720,7 +1729,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { struct mlx5_vport *vport; int total_nvports = 0; - u16 vf_nvports = 0; int err; int i, enabled_events; @@ -1739,15 +1747,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); if (mode == SRIOV_OFFLOADS) { - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - err = mlx5_esw_query_functions(esw->dev, &vf_nvports); - if (err) - return err; + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) total_nvports = esw->total_vports; - } else { - vf_nvports = nvfs; + else total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); - } } esw->mode = mode; @@ -1761,7 +1764,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, vf_nvports, total_nvports); + err = esw_offloads_init(esw, nvfs, total_nvports); } if (err) @@ -2480,6 +2483,17 @@ u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return ESW_ALLOWED(esw) ? esw->offloads.encap : + DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); + bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { if ((dev0->priv.eswitch->mode == SRIOV_NONE && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 849a628f6d17..b6472cf166ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -68,6 +68,8 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; + int modify_metadata_id; + struct mlx5_flow_handle *modify_metadata_rule; struct mlx5_flow_handle *allow_rule; struct mlx5_flow_handle *drop_rule; struct mlx5_fc *drop_counter; @@ -178,7 +180,7 @@ struct mlx5_esw_offload { const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; u64 num_flows; - u8 encap; + enum devlink_eswitch_encap_mode encap; }; /* E-Switch MC FDB table hash node */ @@ -198,6 +200,10 @@ struct mlx5_esw_functions { u16 num_vfs; }; +enum { + MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -205,6 +211,7 @@ struct mlx5_eswitch { struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; struct mlx5_vport *vports; + u32 flags; int total_vports; int enabled_vports; /* Synchronize between vport change events @@ -242,6 +249,8 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); @@ -269,6 +278,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *in, int inlen); +int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *out, int outlen); + struct mlx5_flow_spec; struct mlx5_esw_flow_attr; struct mlx5_termtbl_handle; @@ -379,9 +393,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack); -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, @@ -409,7 +425,7 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); -int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u16 *num_vfs); +int mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) @@ -528,6 +544,8 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); +bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -536,6 +554,11 @@ static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } +static inline int +mlx5_esw_query_functions(struct mlx5_core_dev *dev, u32 *out, int outlen) +{ + return -EOPNOTSUPP; +} #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 060de01f09b6..af08e06f73ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -88,6 +88,53 @@ u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) return 1; } +static void +mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + void *misc2; + void *misc; + + /* Use metadata matching because vport is not represented by single + * VHCA in dual-port RoCE mode, and matching on source vport may fail. + */ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch, + attr->in_rep->vport)); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc))) + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } + + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && + attr->in_rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@ -99,7 +146,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; int j, i = 0; - void *misc; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); @@ -159,21 +205,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, i++; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); - - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); + mlx5_eswitch_set_rule_source_port(esw, spec, attr); - spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { if (attr->tunnel_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; @@ -223,7 +256,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_table *fast_fdb; struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; - void *misc; int i; fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0); @@ -255,25 +287,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].ft = fwd_fdb, i++; - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); - - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + mlx5_eswitch_set_rule_source_port(esw, spec, attr); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); - - if (attr->match_level == MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; - else - spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + if (attr->match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); @@ -570,23 +588,87 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } -static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, +static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u8 fdb_to_vport_reg_c_id; + int err; + + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + out, sizeof(out)); + if (err) + return err; + + fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + + fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.fdb_to_vport_reg_c_id, 1); + + return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + in, sizeof(in)); +} + +static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u8 fdb_to_vport_reg_c_id; + int err; + + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + out, sizeof(out)); + if (err) + return err; + + fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + + fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.fdb_to_vport_reg_c_id, 1); + + return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + in, sizeof(in)); +} + +static void peer_miss_rules_setup(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev, struct mlx5_flow_spec *spec, struct mlx5_flow_destination *dest) { - void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); + void *misc; - MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(peer_dev, vhca_id)); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + } dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest->vport.num = peer_dev->priv.eswitch->manager_vport; @@ -594,6 +676,26 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } +static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + struct mlx5_flow_spec *spec, + u16 vport) +{ + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(peer_esw, + vport)); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + } +} + static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { @@ -611,7 +713,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, if (!spec) return -ENOMEM; - peer_miss_rules_setup(peer_dev, spec, &dest); + peer_miss_rules_setup(esw, peer_dev, spec, &dest); flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL); if (!flows) { @@ -624,7 +726,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc_parameters); if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { @@ -646,7 +750,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, } mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) { - MLX5_SET(fte_match_set_misc, misc, source_port, i); + esw_set_peer_miss_rule_source_port(esw, + peer_dev->priv.eswitch, + spec, i); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { @@ -930,6 +1037,30 @@ static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw) #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 +static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in) +{ + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0); + } else { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + } +} + static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1027,19 +1158,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) /* create peer esw miss group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, - match_criteria); + esw_set_flow_group_source_port(esw, flow_group_in); + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_port); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } - MLX5_SET(create_flow_group_in, flow_group_in, - source_eswitch_owner_vhca_id_valid, 1); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + esw->total_vports - 1); @@ -1153,7 +1286,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; u32 *flow_group_in; - void *match_criteria, *misc; int err = 0; nvports = nvports + MLX5_ESW_MISS_FLOWS; @@ -1163,12 +1295,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) /* create vport rx group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + esw_set_flow_group_source_port(esw, flow_group_in); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); @@ -1207,13 +1335,24 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, goto out; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, vport); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, @@ -1449,34 +1588,13 @@ err_reps: return err; } -static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) -{ - int err; - - /* Special vports must be loaded first. */ - err = __load_reps_special_vport(esw, rep_type); - if (err) - return err; - - err = __load_reps_vf_vport(esw, nvports, rep_type); - if (err) - goto err_vfs; - - return 0; - -err_vfs: - __unload_reps_special_vport(esw, rep_type); - return err; -} - -static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_special_vport(struct mlx5_eswitch *esw) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_all_vport(esw, nvports, rep_type); + err = __load_reps_special_vport(esw, rep_type); if (err) goto err_reps; } @@ -1485,7 +1603,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - __unload_reps_all_vport(esw, nvports, rep_type); + __unload_reps_special_vport(esw, rep_type); return err; } @@ -1521,6 +1639,10 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != + mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) + break; + err = mlx5_esw_offloads_pair(esw, peer_esw); if (err) goto err_out; @@ -1589,32 +1711,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; /* For prio tag mode, there is only 1 FTEs: - * 1) Untagged packets - push prio tag VLAN, allow + * 1) Untagged packets - push prio tag VLAN and modify metadata if + * required, allow * Unmatched traffic is allowed by default */ - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) - return -EOPNOTSUPP; - - esw_vport_cleanup_ingress_rules(esw, vport); - - err = esw_vport_enable_ingress_acl(esw, vport); - if (err) { - mlx5_core_warn(esw->dev, - "failed to enable prio tag ingress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; - } - - esw_debug(esw->dev, - "vport[%d] configure ingress rules\n", vport->vport); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { err = -ENOMEM; @@ -1630,6 +1736,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, flow_act.vlan[0].ethtype = ETH_P_8021Q; flow_act.vlan[0].vid = 0; flow_act.vlan[0].prio = 0; + + if (vport->ingress.modify_metadata_rule) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_id = vport->ingress.modify_metadata_id; + } + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, &flow_act, NULL, 0); @@ -1650,6 +1762,58 @@ out_no_mem: return err; } +static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec spec = {}; + int err = 0; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + + err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action, &vport->ingress.modify_metadata_id); + if (err) { + esw_warn(esw->dev, + "failed to alloc modify header for vport %d ingress acl (%d)\n", + vport->vport, err); + return err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.modify_id = vport->ingress.modify_metadata_id; + vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, + &spec, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.modify_metadata_rule); + esw_warn(esw->dev, + "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", + vport->vport, err); + vport->ingress.modify_metadata_rule = NULL; + goto out; + } + +out: + if (err) + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + return err; +} + +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (vport->ingress.modify_metadata_rule) { + mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + + vport->ingress.modify_metadata_rule = NULL; + } +} + static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1657,6 +1821,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec; int err = 0; + if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + /* For prio tag mode, there is only 1 FTEs: * 1) prio tag packets - pop the prio tag VLAN, allow * Unmatched traffic is allowed by default @@ -1710,27 +1877,98 @@ out_no_mem: return err; } -static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports) +static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = NULL; - int i, j; int err; - mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) { + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + esw_vport_cleanup_ingress_rules(esw, vport); + + err = esw_vport_enable_ingress_acl(esw, vport); + if (err) { + esw_warn(esw->dev, + "failed to enable ingress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } + + esw_debug(esw->dev, + "vport[%d] configure ingress rules\n", vport->vport); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = esw_vport_add_ingress_acl_modify_metadata(esw, vport); + if (err) + goto out; + } + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + mlx5_eswitch_is_vf_vport(esw, vport->vport)) { err = esw_vport_ingress_prio_tag_config(esw, vport); if (err) - goto err_ingress; - err = esw_vport_egress_prio_tag_config(esw, vport); + goto out; + } + +out: + if (err) + esw_vport_disable_ingress_acl(esw, vport); + return err; +} + +static bool +esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) +{ + if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl)) + return false; + + if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_0)) + return false; + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) + return false; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev) || + mlx5_ecpf_vport_exists(esw->dev)) + return false; + + return true; +} + +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i, j; + int err; + + if (esw_check_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + + mlx5_esw_for_all_vports(esw, i, vport) { + err = esw_vport_ingress_common_config(esw, vport); if (err) - goto err_egress; + goto err_ingress; + + if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + err = esw_vport_egress_prio_tag_config(esw, vport); + if (err) + goto err_egress; + } } + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + esw_info(esw->dev, "Use metadata reg_c as source vport to match\n"); + return 0; err_egress: esw_vport_disable_ingress_acl(esw, vport); err_ingress: - mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) { + for (j = MLX5_VPORT_PF; j < i; j++) { + vport = &esw->vports[j]; esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1738,15 +1976,17 @@ err_ingress: return err; } -static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; int i; - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) { + mlx5_esw_for_all_vports(esw, i, vport) { esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } + + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; } static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports, @@ -1757,15 +1997,13 @@ static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports, memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { - err = esw_prio_tag_acls_config(esw, vf_nvports); - if (err) - return err; - } + err = esw_create_offloads_acl_tables(esw); + if (err) + return err; err = esw_create_offloads_fdb_tables(esw, nvports); if (err) - return err; + goto create_fdb_err; err = esw_create_offloads_table(esw, nvports); if (err) @@ -1783,6 +2021,9 @@ create_fg_err: create_ft_err: esw_destroy_offloads_fdb_tables(esw); +create_fdb_err: + esw_destroy_offloads_acl_tables(esw); + return err; } @@ -1791,12 +2032,12 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) - esw_prio_tag_acls_cleanup(esw); + esw_destroy_offloads_acl_tables(esw); } static void esw_functions_changed_event_handler(struct work_struct *work) { + u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {}; struct mlx5_host_work *host_work; struct mlx5_eswitch *esw; u16 num_vfs = 0; @@ -1805,7 +2046,9 @@ static void esw_functions_changed_event_handler(struct work_struct *work) host_work = container_of(work, struct mlx5_host_work, work); esw = host_work->esw; - err = mlx5_esw_query_functions(esw->dev, &num_vfs); + err = mlx5_esw_query_functions(esw->dev, out, sizeof(out)); + num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); if (err || num_vfs == esw->esw_funcs.num_vfs) goto out; @@ -1825,6 +2068,21 @@ out: kfree(host_work); } +static void esw_emulate_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work = + container_of(work, struct mlx5_host_work, work); + struct mlx5_eswitch *esw = host_work->esw; + int err; + + if (esw->esw_funcs.num_vfs) { + err = esw_offloads_load_vf_reps(esw, esw->esw_funcs.num_vfs); + if (err) + esw_warn(esw->dev, "Load vf reps err=%d\n", err); + } + kfree(host_work); +} + static int esw_functions_changed_event(struct notifier_block *nb, unsigned long type, void *data) { @@ -1841,7 +2099,11 @@ static int esw_functions_changed_event(struct notifier_block *nb, host_work->esw = esw; - INIT_WORK(&host_work->work, esw_functions_changed_event_handler); + if (mlx5_eswitch_is_funcs_handler(esw->dev)) + INIT_WORK(&host_work->work, + esw_functions_changed_event_handler); + else + INIT_WORK(&host_work->work, esw_emulate_event_handler); queue_work(esw->work_queue, &host_work->work); return NOTIFY_OK; @@ -1850,13 +2112,14 @@ static int esw_functions_changed_event(struct notifier_block *nb, static void esw_functions_changed_event_init(struct mlx5_eswitch *esw, u16 vf_nvports) { - if (!mlx5_eswitch_is_funcs_handler(esw->dev)) - return; - - MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, - ESW_FUNCTIONS_CHANGED); - mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); - esw->esw_funcs.num_vfs = vf_nvports; + if (mlx5_eswitch_is_funcs_handler(esw->dev)) { + esw->esw_funcs.num_vfs = 0; + MLX5_NB_INIT(&esw->esw_funcs.nb, esw_functions_changed_event, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + } else { + esw->esw_funcs.num_vfs = vf_nvports; + } } static void esw_functions_changed_event_cleanup(struct mlx5_eswitch *esw) @@ -1877,7 +2140,17 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, if (err) return err; - err = esw_offloads_load_all_reps(esw, vf_nvports); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = mlx5_eswitch_enable_passing_vport_metadata(esw); + if (err) + goto err_vport_metadata; + } + + /* Only load special vports reps. VF reps will be loaded in + * context of functions_changed event handler through real + * or emulated event. + */ + err = esw_offloads_load_special_vport(esw); if (err) goto err_reps; @@ -1888,9 +2161,22 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, mlx5_rdma_enable_roce(esw->dev); + /* Call esw_functions_changed event to load VF reps: + * 1. HW does not support the event then emulate it + * Or + * 2. The event was already notified when num_vfs changed + * and eswitch was in legacy mode + */ + esw_functions_changed_event(&esw->esw_funcs.nb.nb, + MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED, + NULL); + return 0; err_reps: + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_disable_passing_vport_metadata(esw); +err_vport_metadata: esw_offloads_steering_cleanup(esw); return err; } @@ -1916,18 +2202,12 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_cleanup(struct mlx5_eswitch *esw) { - u16 num_vfs; - esw_functions_changed_event_cleanup(esw); - - if (mlx5_eswitch_is_funcs_handler(esw->dev)) - num_vfs = esw->esw_funcs.num_vfs; - else - num_vfs = esw->dev->priv.sriov.num_vfs; - mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_all_reps(esw, num_vfs); + esw_offloads_unload_all_reps(esw, esw->esw_funcs.num_vfs); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_disable_passing_vport_metadata(esw); esw_offloads_steering_cleanup(esw); } @@ -2167,7 +2447,8 @@ out: return 0; } -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); @@ -2216,7 +2497,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, return err; } -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -2295,3 +2577,22 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, return mlx5_eswitch_get_rep(esw, vport); } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); + +bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num >= MLX5_VPORT_FIRST_VF && + vport_num <= esw->dev->priv.sriov.max_vfs; +} + +bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); +} +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); + +u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + u16 vport_num) +{ + return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index cb7d8ebe2c95..1d55a324a17e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -49,8 +49,8 @@ mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, struct mlx5_termtbl_handle *tt, struct mlx5_flow_act *flow_act) { + static const struct mlx5_flow_spec spec = {}; struct mlx5_flow_namespace *root_ns; - struct mlx5_flow_spec spec = {}; int prio, flags; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 52c47d3dd5a5..c76da309506b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, u8 match_criteria_enable, const u32 *match_c, const u32 *match_v, - struct mlx5_flow_act *flow_act) + struct mlx5_flow_act *flow_act, + struct mlx5_flow_context *flow_context) { const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); @@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, (match_criteria_enable & ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - (flow_act->flags & FLOW_ACT_HAS_TAG)) + (flow_context->flags & FLOW_CONTEXT_HAS_TAG)) return false; return true; @@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, fg->mask.match_criteria_enable, fg->mask.match_criteria, fte->val, - &fte->action)) + &fte->action, + &fte->flow_context)) return ERR_PTR(-EINVAL); else if (!mlx5_is_fpga_ipsec_rule(mdev, fg->mask.match_criteria_enable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index bb24c3797218..7ac1249eadc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -396,7 +396,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); MLX5_SET(flow_context, in_flow_context, group_id, group_id); - MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); if (extended_dest) { @@ -771,6 +775,10 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); table_type = FS_FT_NIC_TX; break; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions); + table_type = FS_FT_ESW_INGRESS_ACL; + break; default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fe76c6fd6d80..a68a51c5011a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -584,7 +584,7 @@ err_ida_remove: } static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, - u32 *match_value, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act) { struct mlx5_flow_steering *steering = get_steering(&ft->node); @@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, if (!fte) return ERR_PTR(-ENOMEM); - memcpy(fte->val, match_value, sizeof(fte->val)); + memcpy(fte->val, &spec->match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; fte->action = *flow_act; + fte->flow_context = spec->flow_context; tree_init_node(&fte->node, NULL, del_sw_fte); @@ -612,7 +613,7 @@ static void dealloc_flow_group(struct mlx5_flow_steering *steering, static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, u8 match_criteria_enable, - void *match_criteria, + const void *match_criteria, int start_index, int end_index) { @@ -642,7 +643,7 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, u8 match_criteria_enable, - void *match_criteria, + const void *match_criteria, int start_index, int end_index, struct list_head *prev) @@ -1285,7 +1286,7 @@ free_handle: } static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec) { struct list_head *prev = &ft->node.children; struct mlx5_flow_group *fg; @@ -1430,7 +1431,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2) return false; } -static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +static int check_conflicting_ftes(struct fs_fte *fte, + const struct mlx5_flow_context *flow_context, + const struct mlx5_flow_act *flow_act) { if (check_conflicting_actions(flow_act->action, fte->action.action)) { mlx5_core_warn(get_dev(&fte->node), @@ -1438,12 +1441,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act return -EEXIST; } - if ((flow_act->flags & FLOW_ACT_HAS_TAG) && - fte->action.flow_tag != flow_act->flow_tag) { + if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) && + fte->flow_context.flow_tag != flow_context->flow_tag) { mlx5_core_warn(get_dev(&fte->node), "FTE flow tag %u already exists with different flow tag %u\n", - fte->action.flow_tag, - flow_act->flow_tag); + fte->flow_context.flow_tag, + flow_context->flow_tag); return -EEXIST; } @@ -1451,7 +1454,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act } static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, - u32 *match_value, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num, @@ -1462,7 +1465,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, int i; int ret; - ret = check_conflicting_ftes(fte, flow_act); + ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act); if (ret) return ERR_PTR(ret); @@ -1536,7 +1539,7 @@ static void free_match_list(struct match_list_head *head) static int build_match_list(struct match_list_head *match_head, struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec) { struct rhlist_head *tmp, *list; struct mlx5_flow_group *g; @@ -1589,7 +1592,7 @@ static u64 matched_fgs_get_version(struct list_head *match_head) static struct fs_fte * lookup_fte_locked(struct mlx5_flow_group *g, - u32 *match_value, + const u32 *match_value, bool take_write) { struct fs_fte *fte_tmp; @@ -1622,7 +1625,7 @@ out: static struct mlx5_flow_handle * try_add_to_existing_fg(struct mlx5_flow_table *ft, struct list_head *match_head, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num, @@ -1637,7 +1640,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, u64 version; int err; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) return ERR_PTR(-ENOMEM); @@ -1653,8 +1656,7 @@ search_again_locked: fte_tmp = lookup_fte_locked(g, spec->match_value, take_write); if (!fte_tmp) continue; - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte_tmp); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node, false); tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); @@ -1701,8 +1703,7 @@ skip_search: nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); return rule; @@ -1715,7 +1716,7 @@ out: static struct mlx5_flow_handle * _mlx5_add_flow_rules(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) @@ -1788,7 +1789,7 @@ search_again_locked: if (err) goto err_release_fg; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) { err = PTR_ERR(fte); goto err_release_fg; @@ -1802,8 +1803,7 @@ search_again_locked: nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, flow_act, dest, - dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); tree_put_node(&g->node, false); @@ -1823,7 +1823,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) struct mlx5_flow_handle * mlx5_add_flow_rules(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int num_dest) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index a08c3d09a50f..c48c382f926f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -170,6 +170,7 @@ struct fs_fte { u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 index; + struct mlx5_flow_context flow_context; struct mlx5_flow_act action; enum fs_fte_status status; struct mlx5_fc *counter; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index da81a5a7b8e9..00e66c3772cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -418,6 +418,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .cleanup_rx = mlx5i_cleanup_rx, .enable = NULL, /* mlx5i_enable */ .disable = NULL, /* mlx5i_disable */ + .update_rx = mlx5e_update_nic_rx, .update_stats = NULL, /* mlx5i_update_stats */ .update_carrier = NULL, /* no HW update in IB link */ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, @@ -526,7 +527,7 @@ static int mlx5i_open(struct net_device *netdev) if (err) goto err_remove_fs_underlay_qp; - mlx5e_refresh_tirs(epriv, false); + epriv->profile->update_rx(epriv); mlx5e_activate_priv_channels(epriv); mutex_unlock(&epriv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index b491b8f5fd6b..e05186ada721 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -221,7 +221,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) mlx5_core_warn(mdev, "opening child channels failed, %d\n", err); goto err_clear_state_opened_flag; } - mlx5e_refresh_tirs(epriv, false); + epriv->profile->update_rx(epriv); mlx5e_activate_priv_channels(epriv); mutex_unlock(&epriv->state_lock); @@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .cleanup_rx = mlx5i_pkey_cleanup_rx, .enable = NULL, .disable = NULL, + .update_rx = mlx5e_update_nic_rx, .update_stats = NULL, .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index c0fb6d72b695..24bd991a727e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -7,7 +7,6 @@ #include <linux/mlx5/eq.h> #include <linux/mlx5/cq.h> -#define MLX5_MAX_IRQ_NAME (32) #define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) struct mlx5_eq_tasklet { @@ -36,8 +35,14 @@ struct mlx5_eq { struct mlx5_rsc_debug *dbg; }; +struct mlx5_eq_async { + struct mlx5_eq core; + struct notifier_block irq_nb; +}; + struct mlx5_eq_comp { - struct mlx5_eq core; /* Must be first */ + struct mlx5_eq core; + struct notifier_block irq_nb; struct mlx5_eq_tasklet tasklet_ctx; struct list_head list; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index a71d5b9c7ab2..3118e8d66407 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -67,6 +67,7 @@ static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) struct l2table_node { struct l2addr_node node; u32 index; /* index in HW l2 table */ + int ref_count; }; struct mlx5_mpfs { @@ -134,8 +135,8 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { struct mlx5_mpfs *mpfs = dev->priv.mpfs; struct l2table_node *l2addr; + int err = 0; u32 index; - int err; if (!MLX5_ESWITCH_MANAGER(dev)) return 0; @@ -144,30 +145,35 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); if (l2addr) { - err = -EEXIST; - goto abort; + l2addr->ref_count++; + goto out; } err = alloc_l2table_index(mpfs, &index); if (err) - goto abort; + goto out; l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); if (!l2addr) { - free_l2table_index(mpfs, index); err = -ENOMEM; - goto abort; + goto hash_add_err; } - l2addr->index = index; err = set_l2table_entry_cmd(dev, index, mac); - if (err) { - l2addr_hash_del(l2addr); - free_l2table_index(mpfs, index); - } + if (err) + goto set_table_entry_err; + + l2addr->index = index; + l2addr->ref_count = 1; mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); -abort: + goto out; + +set_table_entry_err: + l2addr_hash_del(l2addr); +hash_add_err: + free_l2table_index(mpfs, index); +out: mutex_unlock(&mpfs->lock); return err; } @@ -190,6 +196,9 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) goto unlock; } + if (--l2addr->ref_count > 0) + goto unlock; + index = l2addr->index; del_l2table_entry_cmd(dev, index); l2addr_hash_del(l2addr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 998eec938d3c..072b56fda27e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -172,18 +172,28 @@ static struct mlx5_profile profile[] = { #define FW_INIT_TIMEOUT_MILI 2000 #define FW_INIT_WAIT_MS 2 -#define FW_PRE_INIT_TIMEOUT_MILI 10000 +#define FW_PRE_INIT_TIMEOUT_MILI 120000 +#define FW_INIT_WARN_MESSAGE_INTERVAL 20000 -static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) +static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, + u32 warn_time_mili) { + unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); int err = 0; + BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL); + while (fw_initializing(dev)) { if (time_after(jiffies, end)) { err = -EBUSY; break; } + if (warn_time_mili && time_after(jiffies, warn)) { + mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n", + jiffies_to_msecs(end - warn) / 1000); + warn = jiffies + msecs_to_jiffies(warn_time_mili); + } msleep(FW_INIT_WAIT_MS); } @@ -799,10 +809,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_devcom; } + err = mlx5_irq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize irq table\n"); + goto err_devcom; + } + err = mlx5_eq_table_init(dev); if (err) { mlx5_core_err(dev, "failed to initialize eq\n"); - goto err_devcom; + goto err_irq_cleanup; } err = mlx5_events_init(dev); @@ -840,32 +856,32 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_rl_cleanup; } - err = mlx5_eswitch_init(dev); + err = mlx5_sriov_init(dev); if (err) { - mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + mlx5_core_err(dev, "Failed to init sriov %d\n", err); goto err_mpfs_cleanup; } - err = mlx5_sriov_init(dev); + err = mlx5_eswitch_init(dev); if (err) { - mlx5_core_err(dev, "Failed to init sriov %d\n", err); - goto err_eswitch_cleanup; + mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + goto err_sriov_cleanup; } err = mlx5_fpga_init(dev); if (err) { mlx5_core_err(dev, "Failed to init fpga device %d\n", err); - goto err_sriov_cleanup; + goto err_eswitch_cleanup; } dev->tracer = mlx5_fw_tracer_create(dev); return 0; -err_sriov_cleanup: - mlx5_sriov_cleanup(dev); err_eswitch_cleanup: mlx5_eswitch_cleanup(dev->priv.eswitch); +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: @@ -880,6 +896,8 @@ err_events_cleanup: mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); +err_irq_cleanup: + mlx5_irq_table_cleanup(dev); err_devcom: mlx5_devcom_unregister_device(dev->priv.devcom); @@ -890,8 +908,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { mlx5_fw_tracer_destroy(dev->tracer); mlx5_fpga_cleanup(dev); - mlx5_sriov_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_sriov_cleanup(dev); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); mlx5_geneve_destroy(dev->geneve); @@ -903,6 +921,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); + mlx5_irq_table_cleanup(dev); mlx5_devcom_unregister_device(dev->priv.devcom); } @@ -919,7 +938,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) /* wait for firmware to accept initialization segments configurations */ - err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL); if (err) { mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n", FW_PRE_INIT_TIMEOUT_MILI); @@ -932,7 +951,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) return err; } - err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0); if (err) { mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI); @@ -1036,6 +1055,12 @@ static int mlx5_load(struct mlx5_core_dev *dev) mlx5_events_start(dev); mlx5_pagealloc_start(dev); + err = mlx5_irq_table_create(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc IRQs\n"); + goto err_irq_table; + } + err = mlx5_eq_table_create(dev); if (err) { mlx5_core_err(dev, "Failed to create EQs\n"); @@ -1107,6 +1132,8 @@ err_fpga_start: err_fw_tracer: mlx5_eq_table_destroy(dev); err_eq_table: + mlx5_irq_table_destroy(dev); +err_irq_table: mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); mlx5_put_uars_page(dev, dev->priv.uar); @@ -1123,6 +1150,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_fpga_device_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); + mlx5_irq_table_destroy(dev); mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); mlx5_put_uars_page(dev, dev->priv.uar); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 29bb61a10289..958769702823 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -159,6 +159,19 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_irq_table_init(struct mlx5_core_dev *dev); +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_irq_table_create(struct mlx5_core_dev *dev); +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx); +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table); +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table); + int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); void mlx5_events_start(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index ea744d8466ea..9231b39d18b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -38,15 +38,12 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; - - memset(table, 0, sizeof(*table)); - rwlock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ); } void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { + WARN_ON(!xa_empty(&dev->priv.mkey_table)); } int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, @@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_async_cbk_t callback, struct mlx5_async_work *context) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; u32 mkey_index; void *mkc; int err; @@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", mkey_index, key, mkey->key); - /* connect to mkey tree */ - write_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); - write_unlock_irq(&table->lock); + err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey, + GFP_KERNEL)); if (err) { - mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", + mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n", mlx5_base_mkey(mkey->key), err); mlx5_core_destroy_mkey(dev, mkey); } @@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; struct mlx5_core_mkey *deleted_mkey; unsigned long flags; - write_lock_irqsave(&table->lock, flags); - deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); - write_unlock_irqrestore(&table->lock, flags); + xa_lock_irqsave(mkeys, flags); + deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); + xa_unlock_irqrestore(mkeys, flags); if (!deleted_mkey) { - mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n", + mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n", mlx5_base_mkey(mkey->key)); return -ENOENT; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c new file mode 100644 index 000000000000..373981a659c7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif + +#define MLX5_MAX_IRQ_NAME (32) + +struct mlx5_irq { + struct atomic_notifier_head nh; + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + +struct mlx5_irq_table { + struct mlx5_irq *irq; + int nvec; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.irq_table); +} + +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) +{ + return table->nvec - MLX5_IRQ_VEC_COMP_BASE; +} + +static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +{ + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + return &irq_table->irq[vecidx]; +} + +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq *irq; + + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_register(&irq->nh, nb); +} + +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq *irq; + + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_unregister(&irq->nh, nb); +} + +static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + +static void irq_set_name(char *name, int vecidx) +{ + if (vecidx == 0) { + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async"); + return; + } + + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_IRQ_VEC_COMP_BASE); + return; +} + +static int request_irqs(struct mlx5_core_dev *dev, int nvec) +{ + char name[MLX5_MAX_IRQ_NAME]; + int err; + int i; + + for (i = 0; i < nvec; i++) { + struct mlx5_irq *irq = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + irq_set_name(name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); + snprintf(irq->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name, + &irq->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq\n"); + goto err_request_irq; + } + } + return 0; + +err_request_irq: + for (; i >= 0; i--) { + struct mlx5_irq *irq = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + free_irq(irqn, &irq->nh); + } + return err; +} + +static void irq_clear_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + free_irq_cpu_rmap(irq_table->rmap); +#endif +} + +static int irq_set_rmap(struct mlx5_core_dev *mdev) +{ + int err = 0; +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = mdev->priv.irq_table; + int num_affinity_vec; + int vecidx; + + num_affinity_vec = mlx5_irq_get_num_comp(irq_table); + irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); + if (!irq_table->rmap) { + err = -ENOMEM; + mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); + goto err_out; + } + + vecidx = MLX5_IRQ_VEC_COMP_BASE; + for (; vecidx < irq_table->nvec; vecidx++) { + err = irq_cpu_rmap_add(irq_table->rmap, + pci_irq_vector(mdev->pdev, vecidx)); + if (err) { + mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", + err); + goto err_irq_cpu_rmap_add; + } + } + return 0; + +err_irq_cpu_rmap_add: + irq_clear_rmap(mdev); +err_out: +#endif + return err; +} + +/* Completion IRQ vectors */ + +static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; + struct mlx5_irq *irq; + int irqn; + + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node), + irq->mask); + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irqn, irq->mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", + irqn); + + return 0; +} + +static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; + struct mlx5_irq *irq; + int irqn; + + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + irq_set_affinity_hint(irqn, NULL); + free_cpumask_var(irq->mask); +} + +static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int err; + int i; + + for (i = 0; i < nvec; i++) { + err = set_comp_irq_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + clear_comp_irq_affinity_hint(mdev, i); + + return err; +} + +static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int i; + + for (i = 0; i < nvec; i++) + clear_comp_irq_affinity_hint(mdev, i); +} + +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) +{ + return irq_table->irq[vecidx].mask; +} + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) +{ + return irq_table->rmap; +} +#endif + +static void unrequest_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); +} + +int mlx5_irq_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_irq_table *table = priv->irq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_IRQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_IRQ_VEC_COMP_BASE) + return -ENOMEM; + + table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL); + if (!table->irq) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1, + nvec, PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq; + } + + table->nvec = nvec; + + err = irq_set_rmap(dev); + if (err) + goto err_set_rmap; + + err = request_irqs(dev, nvec); + if (err) + goto err_request_irqs; + + err = set_comp_irq_affinity_hints(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); + goto err_set_affinity; + } + + return 0; + +err_set_affinity: + unrequest_irqs(dev); +err_request_irqs: + irq_clear_rmap(dev); +err_set_rmap: + pci_free_irq_vectors(dev->pdev); +err_free_irq: + kfree(table->irq); + return err; +} + +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + /* free_irq requires that affinity and rmap will be cleared + * before calling it. This is why there is asymmetry with set_rmap + * which should be called after alloc_irq but before request_irq. + */ + irq_clear_rmap(dev); + clear_comp_irqs_affinity_hints(dev); + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); + pci_free_irq_vectors(dev->pdev); + kfree(table->irq); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a249b3c3843d..2eecb831c499 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -208,6 +208,27 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) mlx5_device_disable_sriov(dev); } +static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_functions_out)] = {}; + u16 host_total_vfs; + int err; + + if (mlx5_core_is_ecpf_esw_manager(dev)) { + err = mlx5_esw_query_functions(dev, out, sizeof(out)); + host_total_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_total_vfs); + + /* Old FW doesn't support getting total_vfs from esw func + * but supports getting it from pci_sriov. + */ + if (!err && host_total_vfs) + return host_total_vfs; + } + + return pci_sriov_get_totalvfs(dev->pdev); +} + int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -218,6 +239,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) return 0; total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index b5d64aed259e..06c80343d9ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -84,6 +84,7 @@ config MLXSW_SPECTRUM select OBJAGG select MLXFW imply PTP_1588_CLOCK + select NET_PTP_CLASSIFY if PTP_1588_CLOCK default m ---help--- This driver supports Mellanox Technologies Spectrum Ethernet diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 30e0526a9cf6..17ceac7505e5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1245,6 +1245,15 @@ int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, } EXPORT_SYMBOL(mlxsw_core_skb_transmit); +void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port) +{ + if (mlxsw_core->driver->ptp_transmitted) + mlxsw_core->driver->ptp_transmitted(mlxsw_core, skb, + local_port); +} +EXPORT_SYMBOL(mlxsw_core_ptp_transmitted); + static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a, const struct mlxsw_rx_listener *rxl_b) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 6dbb0ede502e..8efcff4b59cb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -48,6 +48,8 @@ bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, const struct mlxsw_tx_info *tx_info); int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); +void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port); struct mlxsw_rx_listener { void (*func)(struct sk_buff *skb, u8 local_port, void *priv); @@ -296,6 +298,13 @@ struct mlxsw_driver { u64 *p_linear_size); int (*params_register)(struct mlxsw_core *mlxsw_core); void (*params_unregister)(struct mlxsw_core *mlxsw_core); + + /* Notify a driver that a timestamped packet was transmitted. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*ptp_transmitted)(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port); + u8 txhdr_len; const struct mlxsw_config_profile *profile; bool res_query_enabled; @@ -418,4 +427,14 @@ enum mlxsw_devlink_param_id { MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, }; +struct mlxsw_skb_cb { + struct mlxsw_tx_info tx_info; +}; + +static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(mlxsw_skb_cb) > sizeof(skb->cb)); + return (struct mlxsw_skb_cb *) skb->cb; +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 056e3f55ae6c..5b00726c4346 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -52,8 +52,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; - int index; + int temp, index; int err; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, @@ -65,7 +64,7 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); - return sprintf(buf, "%u\n", temp); + return sprintf(buf, "%d\n", temp); } static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, @@ -76,8 +75,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp_max; - int index; + int temp_max, index; int err; index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, @@ -89,7 +87,7 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL); - return sprintf(buf, "%u\n", temp_max); + return sprintf(buf, "%d\n", temp_max); } static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, @@ -215,8 +213,8 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; u8 module; + int temp; int err; module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; @@ -227,7 +225,7 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, return err; mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); - return sprintf(buf, "%u\n", temp); + return sprintf(buf, "%d\n", temp); } static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index cfab0e330a47..35a1dc89c28a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -23,6 +23,7 @@ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_ZONE_MAX_NAME 16 +#define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0) #define MLXSW_THERMAL_MAX_STATE 10 #define MLXSW_THERMAL_MAX_DUTY 255 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values @@ -98,7 +99,7 @@ struct mlxsw_thermal_module { struct thermal_zone_device *tzdev; struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; - int module; + int module; /* Module or gearbox number */ }; struct mlxsw_thermal { @@ -111,6 +112,10 @@ struct mlxsw_thermal { struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; struct mlxsw_thermal_module *tz_module_arr; + struct mlxsw_thermal_module *tz_gearbox_arr; + u8 tz_gearbox_num; + unsigned int tz_highest_score; + struct thermal_zone_device *tz_highest_dev; }; static inline u8 mlxsw_state_to_duty(int state) @@ -195,6 +200,34 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, return 0; } +static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal, + struct thermal_zone_device *tzdev, + struct mlxsw_thermal_trip *trips, + int temp) +{ + struct mlxsw_thermal_trip *trip = trips; + unsigned int score, delta, i, shift = 1; + + /* Calculate thermal zone score, if temperature is above the critical + * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX. + */ + score = MLXSW_THERMAL_TEMP_SCORE_MAX; + for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS; + i++, trip++) { + if (temp < trip->temp) { + delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp); + score = delta * shift; + break; + } + shift *= 256; + } + + if (score > thermal->tz_highest_score) { + thermal->tz_highest_score = score; + thermal->tz_highest_dev = tzdev; + } +} + static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev, struct thermal_cooling_device *cdev) { @@ -279,7 +312,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, struct mlxsw_thermal *thermal = tzdev->devdata; struct device *dev = thermal->bus_info->dev; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; + int temp; int err; mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false); @@ -290,8 +323,11 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + if (temp > 0) + mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips, + temp); - *p_temp = (int) temp; + *p_temp = temp; return 0; } @@ -351,6 +387,22 @@ static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev, return 0; } +static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev, + int trip, enum thermal_trend *trend) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + if (tzdev == thermal->tz_highest_dev) + return 1; + + *trend = THERMAL_TREND_STABLE; + return 0; +} + static struct thermal_zone_device_ops mlxsw_thermal_ops = { .bind = mlxsw_thermal_bind, .unbind = mlxsw_thermal_unbind, @@ -362,6 +414,7 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = { .set_trip_temp = mlxsw_thermal_set_trip_temp, .get_trip_hyst = mlxsw_thermal_get_trip_hyst, .set_trip_hyst = mlxsw_thermal_set_trip_hyst, + .get_trend = mlxsw_thermal_trend_get, }; static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev, @@ -450,7 +503,7 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, struct mlxsw_thermal *thermal = tz->parent; struct device *dev = thermal->bus_info->dev; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; + int temp; int err; /* Read module temperature. */ @@ -466,13 +519,15 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, return 0; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); - *p_temp = (int) temp; + *p_temp = temp; if (!temp) return 0; /* Update trip points. */ - mlxsw_thermal_module_trips_update(dev, thermal->core, tz); + err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz); + if (!err && temp > 0) + mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); return 0; } @@ -537,10 +592,6 @@ mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip, return 0; } -static struct thermal_zone_params mlxsw_thermal_module_params = { - .governor_name = "user_space", -}; - static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .bind = mlxsw_thermal_module_bind, .unbind = mlxsw_thermal_module_unbind, @@ -552,6 +603,46 @@ static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .set_trip_temp = mlxsw_thermal_module_trip_temp_set, .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, + .get_trend = mlxsw_thermal_trend_get, +}; + +static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u16 index; + int temp; + int err; + + index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module; + mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); + + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + if (temp > 0) + mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); + + *p_temp = temp; + return 0; +} + +static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { + .bind = mlxsw_thermal_module_bind, + .unbind = mlxsw_thermal_module_unbind, + .get_mode = mlxsw_thermal_module_mode_get, + .set_mode = mlxsw_thermal_module_mode_set, + .get_temp = mlxsw_thermal_gearbox_temp_get, + .get_trip_type = mlxsw_thermal_module_trip_type_get, + .get_trip_temp = mlxsw_thermal_module_trip_temp_get, + .set_trip_temp = mlxsw_thermal_module_trip_temp_set, + .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, + .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, + .get_trend = mlxsw_thermal_trend_get, }; static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, @@ -667,13 +758,13 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz) MLXSW_THERMAL_TRIP_MASK, module_tz, &mlxsw_thermal_module_ops, - &mlxsw_thermal_module_params, - 0, 0); + NULL, 0, 0); if (IS_ERR(module_tz->tzdev)) { err = PTR_ERR(module_tz->tzdev); return err; } + module_tz->mode = THERMAL_DEVICE_ENABLED; return 0; } @@ -779,6 +870,92 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) kfree(thermal->tz_module_arr); } +static int +mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz) +{ + char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME]; + + snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d", + gearbox_tz->module + 1); + gearbox_tz->tzdev = thermal_zone_device_register(tz_name, + MLXSW_THERMAL_NUM_TRIPS, + MLXSW_THERMAL_TRIP_MASK, + gearbox_tz, + &mlxsw_thermal_gearbox_ops, + NULL, 0, 0); + if (IS_ERR(gearbox_tz->tzdev)) + return PTR_ERR(gearbox_tz->tzdev); + + gearbox_tz->mode = THERMAL_DEVICE_ENABLED; + return 0; +} + +static void +mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz) +{ + thermal_zone_device_unregister(gearbox_tz->tzdev); +} + +static int +mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal *thermal) +{ + struct mlxsw_thermal_module *gearbox_tz; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + int i; + int err; + + if (!mlxsw_core_res_query_enabled(core)) + return 0; + + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL); + if (!thermal->tz_gearbox_num) + return 0; + + thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num, + sizeof(*thermal->tz_gearbox_arr), + GFP_KERNEL); + if (!thermal->tz_gearbox_arr) + return -ENOMEM; + + for (i = 0; i < thermal->tz_gearbox_num; i++) { + gearbox_tz = &thermal->tz_gearbox_arr[i]; + memcpy(gearbox_tz->trips, default_thermal_trips, + sizeof(thermal->trips)); + gearbox_tz->module = i; + gearbox_tz->parent = thermal; + err = mlxsw_thermal_gearbox_tz_init(gearbox_tz); + if (err) + goto err_unreg_tz_gearbox; + } + + return 0; + +err_unreg_tz_gearbox: + for (i--; i >= 0; i--) + mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]); + kfree(thermal->tz_gearbox_arr); + return err; +} + +static void +mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal) +{ + int i; + + if (!mlxsw_core_res_query_enabled(thermal->core)) + return; + + for (i = thermal->tz_gearbox_num - 1; i >= 0; i--) + mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]); + kfree(thermal->tz_gearbox_arr); +} + int mlxsw_thermal_init(struct mlxsw_core *core, const struct mlxsw_bus_info *bus_info, struct mlxsw_thermal **p_thermal) @@ -869,10 +1046,16 @@ int mlxsw_thermal_init(struct mlxsw_core *core, if (err) goto err_unreg_tzdev; + err = mlxsw_thermal_gearboxes_init(dev, core, thermal); + if (err) + goto err_unreg_modules_tzdev; + thermal->mode = THERMAL_DEVICE_ENABLED; *p_thermal = thermal; return 0; +err_unreg_modules_tzdev: + mlxsw_thermal_modules_fini(thermal); err_unreg_tzdev: if (thermal->tzdev) { thermal_zone_device_unregister(thermal->tzdev); @@ -891,6 +1074,7 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) { int i; + mlxsw_thermal_gearboxes_fini(thermal); mlxsw_thermal_modules_fini(thermal); if (thermal->tzdev) { thermal_zone_device_unregister(thermal->tzdev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 6acb9bbfdf89..051b19388a81 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -508,17 +508,28 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, { struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_elem_info *elem_info; + struct mlxsw_tx_info tx_info; char *wqe; struct sk_buff *skb; int i; spin_lock(&q->lock); elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info; skb = elem_info->u.sdq.skb; wqe = elem_info->elem; for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE); - dev_kfree_skb_any(skb); + + if (unlikely(!tx_info.is_emad && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + mlxsw_core_ptp_transmitted(mlxsw_pci->core, skb, + tx_info.local_port); + skb = NULL; + } + + if (skb) + dev_kfree_skb_any(skb); elem_info->u.sdq.skb = NULL; if (q->consumer_counter++ != consumer_counter_limit) @@ -1548,6 +1559,7 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, err = -EAGAIN; goto unlock; } + mlxsw_skb_cb(skb)->tx_info = *tx_info; elem_info->u.sdq.skb = skb; wqe = elem_info->elem; @@ -1571,6 +1583,9 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, goto unmap_frags; } + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + /* Set unused sq entries byte count to zero. */ for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_byte_count_set(wqe, i, 0); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 452f645fa040..76ff5b217c04 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5292,6 +5292,8 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1, }; /* reg_htgt_trap_group @@ -8050,7 +8052,10 @@ MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN); MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12); /* Convert to milli degrees Celsius */ -#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125) +#define MLXSW_REG_MTMP_TEMP_TO_MC(val) ({ typeof(val) v_ = (val); \ + ((v_) >= 0) ? ((v_) * 125) : \ + ((s16)((GENMASK(15, 0) + (v_) + 1) \ + * 125)); }) /* reg_mtmp_temperature * Temperature reading from the sensor. Reading is in 0.125 Celsius @@ -8121,11 +8126,10 @@ static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index, MLXSW_REG_MTMP_THRESH_HI); } -static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, - unsigned int *p_max_temp, - char *sensor_name) +static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp, + int *p_max_temp, char *sensor_name) { - u16 temp; + s16 temp; if (p_temp) { temp = mlxsw_reg_mtmp_temperature_get(payload); @@ -9146,6 +9150,216 @@ static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth, mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport); } +/* MOGCR - Monitoring Global Configuration Register + * ------------------------------------------------ + */ +#define MLXSW_REG_MOGCR_ID 0x9086 +#define MLXSW_REG_MOGCR_LEN 0x20 + +MLXSW_REG_DEFINE(mogcr, MLXSW_REG_MOGCR_ID, MLXSW_REG_MOGCR_LEN); + +/* reg_mogcr_ptp_iftc + * PTP Ingress FIFO Trap Clear + * The PTP_ING_FIFO trap provides MTPPTR with clr according + * to this value. Default 0. + * Reserved when IB switches and when SwitchX/-2, Spectrum-2 + * Access: RW + */ +MLXSW_ITEM32(reg, mogcr, ptp_iftc, 0x00, 1, 1); + +/* reg_mogcr_ptp_eftc + * PTP Egress FIFO Trap Clear + * The PTP_EGR_FIFO trap provides MTPPTR with clr according + * to this value. Default 0. + * Reserved when IB switches and when SwitchX/-2, Spectrum-2 + * Access: RW + */ +MLXSW_ITEM32(reg, mogcr, ptp_eftc, 0x00, 0, 1); + +/* MTPPPC - Time Precision Packet Port Configuration + * ------------------------------------------------- + * This register serves for configuration of which PTP messages should be + * timestamped. This is a global configuration, despite the register name. + * + * Reserved when Spectrum-2. + */ +#define MLXSW_REG_MTPPPC_ID 0x9090 +#define MLXSW_REG_MTPPPC_LEN 0x28 + +MLXSW_REG_DEFINE(mtpppc, MLXSW_REG_MTPPPC_ID, MLXSW_REG_MTPPPC_LEN); + +/* reg_mtpppc_ing_timestamp_message_type + * Bitwise vector of PTP message types to timestamp at ingress. + * MessageType field as defined by IEEE 1588 + * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req) + * Default all 0 + * Access: RW + */ +MLXSW_ITEM32(reg, mtpppc, ing_timestamp_message_type, 0x08, 0, 16); + +/* reg_mtpppc_egr_timestamp_message_type + * Bitwise vector of PTP message types to timestamp at egress. + * MessageType field as defined by IEEE 1588 + * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req) + * Default all 0 + * Access: RW + */ +MLXSW_ITEM32(reg, mtpppc, egr_timestamp_message_type, 0x0C, 0, 16); + +static inline void mlxsw_reg_mtpppc_pack(char *payload, u16 ing, u16 egr) +{ + MLXSW_REG_ZERO(mtpppc, payload); + mlxsw_reg_mtpppc_ing_timestamp_message_type_set(payload, ing); + mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr); +} + +/* MTPPTR - Time Precision Packet Timestamping Reading + * --------------------------------------------------- + * The MTPPTR is used for reading the per port PTP timestamp FIFO. + * There is a trap for packets which are latched to the timestamp FIFO, thus the + * SW knows which FIFO to read. Note that packets enter the FIFO before been + * trapped. The sequence number is used to synchronize the timestamp FIFO + * entries and the trapped packets. + * Reserved when Spectrum-2. + */ + +#define MLXSW_REG_MTPPTR_ID 0x9091 +#define MLXSW_REG_MTPPTR_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_MTPPTR_REC_LEN 0x10 /* record length */ +#define MLXSW_REG_MTPPTR_REC_MAX_COUNT 4 +#define MLXSW_REG_MTPPTR_LEN (MLXSW_REG_MTPPTR_BASE_LEN + \ + MLXSW_REG_MTPPTR_REC_LEN * MLXSW_REG_MTPPTR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(mtpptr, MLXSW_REG_MTPPTR_ID, MLXSW_REG_MTPPTR_LEN); + +/* reg_mtpptr_local_port + * Not supported for CPU port. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpptr, local_port, 0x00, 16, 8); + +enum mlxsw_reg_mtpptr_dir { + MLXSW_REG_MTPPTR_DIR_INGRESS, + MLXSW_REG_MTPPTR_DIR_EGRESS, +}; + +/* reg_mtpptr_dir + * Direction. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpptr, dir, 0x00, 0, 1); + +/* reg_mtpptr_clr + * Clear the records. + * Access: OP + */ +MLXSW_ITEM32(reg, mtpptr, clr, 0x04, 31, 1); + +/* reg_mtpptr_num_rec + * Number of valid records in the response + * Range 0.. cap_ptp_timestamp_fifo + * Access: RO + */ +MLXSW_ITEM32(reg, mtpptr, num_rec, 0x08, 0, 4); + +/* reg_mtpptr_rec_message_type + * MessageType field as defined by IEEE 1588 Each bit corresponds to a value + * (e.g. Bit0: Sync, Bit1: Delay_Req) + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_message_type, + MLXSW_REG_MTPPTR_BASE_LEN, 8, 4, + MLXSW_REG_MTPPTR_REC_LEN, 0, false); + +/* reg_mtpptr_rec_domain_number + * DomainNumber field as defined by IEEE 1588 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_domain_number, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 8, + MLXSW_REG_MTPPTR_REC_LEN, 0, false); + +/* reg_mtpptr_rec_sequence_id + * SequenceId field as defined by IEEE 1588 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_sequence_id, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 16, + MLXSW_REG_MTPPTR_REC_LEN, 0x4, false); + +/* reg_mtpptr_rec_timestamp_high + * Timestamp of when the PTP packet has passed through the port Units of PLL + * clock time. + * For Spectrum-1 the PLL clock is 156.25Mhz and PLL clock time is 6.4nSec. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_high, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 32, + MLXSW_REG_MTPPTR_REC_LEN, 0x8, false); + +/* reg_mtpptr_rec_timestamp_low + * See rec_timestamp_high. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_low, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 32, + MLXSW_REG_MTPPTR_REC_LEN, 0xC, false); + +static inline void mlxsw_reg_mtpptr_unpack(const char *payload, + unsigned int rec, + u8 *p_message_type, + u8 *p_domain_number, + u16 *p_sequence_id, + u64 *p_timestamp) +{ + u32 timestamp_high, timestamp_low; + + *p_message_type = mlxsw_reg_mtpptr_rec_message_type_get(payload, rec); + *p_domain_number = mlxsw_reg_mtpptr_rec_domain_number_get(payload, rec); + *p_sequence_id = mlxsw_reg_mtpptr_rec_sequence_id_get(payload, rec); + timestamp_high = mlxsw_reg_mtpptr_rec_timestamp_high_get(payload, rec); + timestamp_low = mlxsw_reg_mtpptr_rec_timestamp_low_get(payload, rec); + *p_timestamp = (u64)timestamp_high << 32 | timestamp_low; +} + +/* MTPTPT - Monitoring Precision Time Protocol Trap Register + * --------------------------------------------------------- + * This register is used for configuring under which trap to deliver PTP + * packets depending on type of the packet. + */ +#define MLXSW_REG_MTPTPT_ID 0x9092 +#define MLXSW_REG_MTPTPT_LEN 0x08 + +MLXSW_REG_DEFINE(mtptpt, MLXSW_REG_MTPTPT_ID, MLXSW_REG_MTPTPT_LEN); + +enum mlxsw_reg_mtptpt_trap_id { + MLXSW_REG_MTPTPT_TRAP_ID_PTP0, + MLXSW_REG_MTPTPT_TRAP_ID_PTP1, +}; + +/* reg_mtptpt_trap_id + * Trap id. + * Access: Index + */ +MLXSW_ITEM32(reg, mtptpt, trap_id, 0x00, 0, 4); + +/* reg_mtptpt_message_type + * Bitwise vector of PTP message types to trap. This is a necessary but + * non-sufficient condition since need to enable also per port. See MTPPPC. + * Message types are defined by IEEE 1588 Each bit corresponds to a value (e.g. + * Bit0: Sync, Bit1: Delay_Req) + */ +MLXSW_ITEM32(reg, mtptpt, message_type, 0x04, 0, 16); + +static inline void mlxsw_reg_mtptptp_pack(char *payload, + enum mlxsw_reg_mtptpt_trap_id trap_id, + u16 message_type) +{ + MLXSW_REG_ZERO(mtptpt, payload); + mlxsw_reg_mtptpt_trap_id_set(payload, trap_id); + mlxsw_reg_mtptpt_message_type_set(payload, message_type); +} + /* MGPIR - Management General Peripheral Information Register * ---------------------------------------------------------- * MGPIR register allows software to query the hardware and @@ -10214,6 +10428,10 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mcda), MLXSW_REG(mgpc), MLXSW_REG(mprs), + MLXSW_REG(mogcr), + MLXSW_REG(mtpppc), + MLXSW_REG(mtpptr), + MLXSW_REG(mtptpt), MLXSW_REG(mgpir), MLXSW_REG(tngcr), MLXSW_REG(tnumt), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3e8593824b33..755b14b82c8f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -147,6 +147,34 @@ struct mlxsw_sp_mlxfw_dev { struct mlxsw_sp *mlxsw_sp; }; +struct mlxsw_sp_ptp_ops { + struct mlxsw_sp_ptp_clock * + (*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev); + void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock); + + struct mlxsw_sp_ptp_state *(*init)(struct mlxsw_sp *mlxsw_sp); + void (*fini)(struct mlxsw_sp_ptp_state *ptp_state); + + /* Notify a driver that a packet that might be PTP was received. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*receive)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port); + + /* Notify a driver that a timestamped packet was transmitted. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*transmitted)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port); + + int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info); +}; + static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev, u16 component_index, u32 *p_max_size, u8 *p_align_bits, u16 *p_max_write_size) @@ -778,6 +806,8 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, u64 len; int err; + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); + if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) return NETDEV_TX_BUSY; @@ -1785,6 +1815,65 @@ mlxsw_sp_port_get_devlink_port(struct net_device *dev) mlxsw_sp_port->local_port); } +static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, + &config); + if (err) + return err; + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + + return 0; +} + +static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, + &config); + if (err) + return err; + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + + return 0; +} + +static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct hwtstamp_config config = {0}; + + mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config); +} + +static int +mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr); + case SIOCGHWTSTAMP: + return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr); + default: + return -EOPNOTSUPP; + } +} + static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, @@ -1800,6 +1889,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_set_features = mlxsw_sp_set_features, .ndo_get_devlink_port = mlxsw_sp_port_get_devlink_port, + .ndo_do_ioctl = mlxsw_sp_port_ioctl, }; static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, @@ -3228,6 +3318,15 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, return err; } +static int +mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info); +} + static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, @@ -3241,6 +3340,7 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, .get_module_info = mlxsw_sp_get_module_info, .get_module_eeprom = mlxsw_sp_get_module_eeprom, + .get_ts_info = mlxsw_sp_get_ts_info, }; static int @@ -3657,6 +3757,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw); + mlxsw_sp_port_ptp_clear(mlxsw_sp_port); mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; @@ -3947,8 +4048,48 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, } } -static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, - u8 local_port, void *priv) +static void mlxsw_sp1_ptp_fifo_event_func(struct mlxsw_sp *mlxsw_sp, + char *mtpptr_pl, bool ingress) +{ + u8 local_port; + u8 num_rec; + int i; + + local_port = mlxsw_reg_mtpptr_local_port_get(mtpptr_pl); + num_rec = mlxsw_reg_mtpptr_num_rec_get(mtpptr_pl); + for (i = 0; i < num_rec; i++) { + u8 domain_number; + u8 message_type; + u16 sequence_id; + u64 timestamp; + + mlxsw_reg_mtpptr_unpack(mtpptr_pl, i, &message_type, + &domain_number, &sequence_id, + ×tamp); + mlxsw_sp1_ptp_got_timestamp(mlxsw_sp, ingress, local_port, + message_type, domain_number, + sequence_id, timestamp); + } +} + +static void mlxsw_sp1_ptp_ing_fifo_event_func(const struct mlxsw_reg_info *reg, + char *mtpptr_pl, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, true); +} + +static void mlxsw_sp1_ptp_egr_fifo_event_func(const struct mlxsw_reg_info *reg, + char *mtpptr_pl, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, false); +} + +void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, + u8 local_port, void *priv) { struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; @@ -4022,6 +4163,14 @@ out: consume_skb(skb); } +static void mlxsw_sp_rx_listener_ptp(struct sk_buff *skb, u8 local_port, + void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port); +} + #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) @@ -4043,7 +4192,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { /* L2 traps */ MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true), MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true), - MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true), + MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU, + false, SP_LLDP, DISCARD), MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false), MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false), MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false), @@ -4112,6 +4262,16 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { /* NVE traps */ MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false), MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false), + /* PTP traps */ + MLXSW_RXL(mlxsw_sp_rx_listener_ptp, PTP0, TRAP_TO_CPU, + false, SP_PTP0, DISCARD), + MLXSW_SP_RXL_NO_MARK(PTP1, TRAP_TO_CPU, PTP1, false), +}; + +static const struct mlxsw_listener mlxsw_sp1_listener[] = { + /* Events */ + MLXSW_EVENTL(mlxsw_sp1_ptp_egr_fifo_event_func, PTP_EGR_FIFO, SP_PTP0), + MLXSW_EVENTL(mlxsw_sp1_ptp_ing_fifo_event_func, PTP_ING_FIFO, SP_PTP0), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) @@ -4163,6 +4323,14 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) rate = 1024; burst_size = 7; break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0: + rate = 24 * 1024; + burst_size = 12; + break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1: + rate = 19 * 1024; + burst_size = 12; + break; default: continue; } @@ -4201,6 +4369,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0: priority = 5; tc = 5; break; @@ -4218,6 +4387,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1: priority = 2; tc = 2; break; @@ -4251,22 +4421,16 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) return 0; } -static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_listener listeners[], + size_t listeners_count) { int i; int err; - err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core); - if (err) - return err; - - err = mlxsw_sp_trap_groups_set(mlxsw_sp->core); - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) { + for (i = 0; i < listeners_count; i++) { err = mlxsw_core_trap_register(mlxsw_sp->core, - &mlxsw_sp_listener[i], + &listeners[i], mlxsw_sp); if (err) goto err_listener_register; @@ -4277,23 +4441,63 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) err_listener_register: for (i--; i >= 0; i--) { mlxsw_core_trap_unregister(mlxsw_sp->core, - &mlxsw_sp_listener[i], + &listeners[i], mlxsw_sp); } return err; } -static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_listener listeners[], + size_t listeners_count) { int i; - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) { + for (i = 0; i < listeners_count; i++) { mlxsw_core_trap_unregister(mlxsw_sp->core, - &mlxsw_sp_listener[i], + &listeners[i], mlxsw_sp); } } +static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core); + if (err) + return err; + + err = mlxsw_sp_trap_groups_set(mlxsw_sp->core); + if (err) + return err; + + err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener)); + if (err) + return err; + + err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners, + mlxsw_sp->listeners_count); + if (err) + goto err_extra_traps_init; + + return 0; + +err_extra_traps_init: + mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener)); + return err; +} + +static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners, + mlxsw_sp->listeners_count); + mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener)); +} + #define MLXSW_SP_LAG_SEED_INIT 0xcafecafe static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) @@ -4346,20 +4550,28 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } -struct mlxsw_sp_ptp_ops { - struct mlxsw_sp_ptp_clock * - (*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev); - void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock); -}; - static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { .clock_init = mlxsw_sp1_ptp_clock_init, .clock_fini = mlxsw_sp1_ptp_clock_fini, + .init = mlxsw_sp1_ptp_init, + .fini = mlxsw_sp1_ptp_fini, + .receive = mlxsw_sp1_ptp_receive, + .transmitted = mlxsw_sp1_ptp_transmitted, + .hwtstamp_get = mlxsw_sp1_ptp_hwtstamp_get, + .hwtstamp_set = mlxsw_sp1_ptp_hwtstamp_set, + .get_ts_info = mlxsw_sp1_ptp_get_ts_info, }; static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { .clock_init = mlxsw_sp2_ptp_clock_init, .clock_fini = mlxsw_sp2_ptp_clock_fini, + .init = mlxsw_sp2_ptp_init, + .fini = mlxsw_sp2_ptp_fini, + .receive = mlxsw_sp2_ptp_receive, + .transmitted = mlxsw_sp2_ptp_transmitted, + .hwtstamp_get = mlxsw_sp2_ptp_hwtstamp_get, + .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set, + .get_ts_info = mlxsw_sp2_ptp_get_ts_info, }; static int mlxsw_sp_netdevice_event(struct notifier_block *unused, @@ -4471,6 +4683,16 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, } } + if (mlxsw_sp->clock) { + /* NULL is a valid return value from ptp_ops->init */ + mlxsw_sp->ptp_state = mlxsw_sp->ptp_ops->init(mlxsw_sp); + if (IS_ERR(mlxsw_sp->ptp_state)) { + err = PTR_ERR(mlxsw_sp->ptp_state); + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize PTP\n"); + goto err_ptp_init; + } + } + /* Initialize netdevice notifier after router and SPAN is initialized, * so that the event handler can use router structures and call SPAN * respin. @@ -4502,6 +4724,9 @@ err_dpipe_init: unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); err_netdev_notifier: if (mlxsw_sp->clock) + mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); +err_ptp_init: + if (mlxsw_sp->clock) mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); err_ptp_clock_init: mlxsw_sp_router_fini(mlxsw_sp); @@ -4548,6 +4773,8 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals; mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops; mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops; + mlxsw_sp->listeners = mlxsw_sp1_listener; + mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -4579,8 +4806,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); - if (mlxsw_sp->clock) + if (mlxsw_sp->clock) { + mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); + } mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_nve_fini(mlxsw_sp); @@ -4923,6 +5152,15 @@ static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core) mlxsw_sp_params_unregister(mlxsw_core); } +static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + skb_pull(skb, MLXSW_TXHDR_LEN); + mlxsw_sp->ptp_ops->transmitted(mlxsw_sp, skb, local_port); +} + static struct mlxsw_driver mlxsw_sp1_driver = { .kind = mlxsw_sp1_driver_name, .priv_size = sizeof(struct mlxsw_sp), @@ -4947,6 +5185,7 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, .params_register = mlxsw_sp_params_register, .params_unregister = mlxsw_sp_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp1_config_profile, .res_query_enabled = true, @@ -4975,6 +5214,7 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .resources_register = mlxsw_sp2_resources_register, .params_register = mlxsw_sp2_params_register, .params_unregister = mlxsw_sp2_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .res_query_enabled = true, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 84f4276193b3..7f8427c1a997 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -136,6 +136,7 @@ struct mlxsw_sp_acl_tcam_ops; struct mlxsw_sp_nve_ops; struct mlxsw_sp_sb_vals; struct mlxsw_sp_port_type_speed_ops; +struct mlxsw_sp_ptp_state; struct mlxsw_sp_ptp_ops; struct mlxsw_sp { @@ -157,6 +158,7 @@ struct mlxsw_sp { struct mlxsw_sp_nve *nve; struct notifier_block netdevice_nb; struct mlxsw_sp_ptp_clock *clock; + struct mlxsw_sp_ptp_state *ptp_state; struct mlxsw_sp_counter_pool *counter_pool; struct { @@ -175,6 +177,8 @@ struct mlxsw_sp { const struct mlxsw_sp_sb_vals *sb_vals; const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; const struct mlxsw_sp_ptp_ops *ptp_ops; + const struct mlxsw_listener *listeners; + size_t listeners_count; }; static inline struct mlxsw_sp_upper * @@ -262,6 +266,11 @@ struct mlxsw_sp_port { unsigned acl_rule_count; struct mlxsw_sp_acl_block *ing_acl_block; struct mlxsw_sp_acl_block *eg_acl_block; + struct { + struct hwtstamp_config hwtstamp_config; + u16 ing_types; + u16 egr_types; + } ptp; }; struct mlxsw_sp_port_type_speed_ops { @@ -438,6 +447,8 @@ struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, extern struct notifier_block mlxsw_sp_switchdev_notifier; /* spectrum.c */ +void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, + u8 local_port, void *priv); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, bool dwrr, u8 dwrr_weight); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index bb6c0cb25771..437023d67a3b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -6,7 +6,13 @@ #include <linux/timecounter.h> #include <linux/spinlock.h> #include <linux/device.h> +#include <linux/rhashtable.h> +#include <linux/ptp_classify.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/net_tstamp.h> +#include "spectrum.h" #include "spectrum_ptp.h" #include "core.h" @@ -14,6 +20,44 @@ #define MLXSW_SP1_PTP_CLOCK_FREQ_KHZ 156257 /* 6.4nSec */ #define MLXSW_SP1_PTP_CLOCK_MASK 64 +#define MLXSW_SP1_PTP_HT_GC_INTERVAL 500 /* ms */ + +/* How long, approximately, should the unmatched entries stay in the hash table + * before they are collected. Should be evenly divisible by the GC interval. + */ +#define MLXSW_SP1_PTP_HT_GC_TIMEOUT 1000 /* ms */ + +struct mlxsw_sp_ptp_state { + struct mlxsw_sp *mlxsw_sp; + struct rhashtable unmatched_ht; + spinlock_t unmatched_lock; /* protects the HT */ + struct delayed_work ht_gc_dw; + u32 gc_cycle; +}; + +struct mlxsw_sp1_ptp_key { + u8 local_port; + u8 message_type; + u16 sequence_id; + u8 domain_number; + bool ingress; +}; + +struct mlxsw_sp1_ptp_unmatched { + struct mlxsw_sp1_ptp_key key; + struct rhash_head ht_node; + struct rcu_head rcu; + struct sk_buff *skb; + u64 timestamp; + u32 gc_cycle; +}; + +static const struct rhashtable_params mlxsw_sp1_ptp_unmatched_ht_params = { + .key_len = sizeof_field(struct mlxsw_sp1_ptp_unmatched, key), + .key_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, key), + .head_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, ht_node), +}; + struct mlxsw_sp_ptp_clock { struct mlxsw_core *core; spinlock_t lock; /* protect this structure */ @@ -89,9 +133,9 @@ mlxsw_sp1_ptp_phc_settime(struct mlxsw_sp_ptp_clock *clock, u64 nsec) next_sec = div_u64(nsec, NSEC_PER_SEC) + 1; next_sec_in_nsec = next_sec * NSEC_PER_SEC; - spin_lock(&clock->lock); + spin_lock_bh(&clock->lock); cycles = mlxsw_sp1_ptp_ns2cycles(&clock->tc, next_sec_in_nsec); - spin_unlock(&clock->lock); + spin_unlock_bh(&clock->lock); mlxsw_reg_mtpps_vpin_pack(mtpps_pl, cycles); err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtpps), mtpps_pl); @@ -124,11 +168,11 @@ static int mlxsw_sp1_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) adj *= ppb; diff = div_u64(adj, NSEC_PER_SEC); - spin_lock(&clock->lock); + spin_lock_bh(&clock->lock); timecounter_read(&clock->tc); clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : clock->nominal_c_mult + diff; - spin_unlock(&clock->lock); + spin_unlock_bh(&clock->lock); return mlxsw_sp1_ptp_phc_adjfreq(clock, neg_adj ? -ppb : ppb); } @@ -139,10 +183,10 @@ static int mlxsw_sp1_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); u64 nsec; - spin_lock(&clock->lock); + spin_lock_bh(&clock->lock); timecounter_adjtime(&clock->tc, delta); nsec = timecounter_read(&clock->tc); - spin_unlock(&clock->lock); + spin_unlock_bh(&clock->lock); return mlxsw_sp1_ptp_phc_settime(clock, nsec); } @@ -155,10 +199,10 @@ static int mlxsw_sp1_ptp_gettimex(struct ptp_clock_info *ptp, container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); u64 cycles, nsec; - spin_lock(&clock->lock); + spin_lock_bh(&clock->lock); cycles = __mlxsw_sp1_ptp_read_frc(clock, sts); nsec = timecounter_cyc2time(&clock->tc, cycles); - spin_unlock(&clock->lock); + spin_unlock_bh(&clock->lock); *ts = ns_to_timespec64(nsec); @@ -172,10 +216,10 @@ static int mlxsw_sp1_ptp_settime(struct ptp_clock_info *ptp, container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); u64 nsec = timespec64_to_ns(ts); - spin_lock(&clock->lock); + spin_lock_bh(&clock->lock); timecounter_init(&clock->tc, &clock->cycles, nsec); nsec = timecounter_read(&clock->tc); - spin_unlock(&clock->lock); + spin_unlock_bh(&clock->lock); return mlxsw_sp1_ptp_phc_settime(clock, nsec); } @@ -197,9 +241,9 @@ static void mlxsw_sp1_ptp_clock_overflow(struct work_struct *work) clock = container_of(dwork, struct mlxsw_sp_ptp_clock, overflow_work); - spin_lock(&clock->lock); + spin_lock_bh(&clock->lock); timecounter_read(&clock->tc); - spin_unlock(&clock->lock); + spin_unlock_bh(&clock->lock); mlxsw_core_schedule_dw(&clock->overflow_work, clock->overflow_period); } @@ -264,3 +308,646 @@ void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) cancel_delayed_work_sync(&clock->overflow_work); kfree(clock); } + +static int mlxsw_sp_ptp_parse(struct sk_buff *skb, + u8 *p_domain_number, + u8 *p_message_type, + u16 *p_sequence_id) +{ + unsigned int offset = 0; + unsigned int ptp_class; + u8 *data; + + data = skb_mac_header(skb); + ptp_class = ptp_classify_raw(skb); + + switch (ptp_class & PTP_CLASS_VMASK) { + case PTP_CLASS_V1: + case PTP_CLASS_V2: + break; + default: + return -ERANGE; + } + + if (ptp_class & PTP_CLASS_VLAN) + offset += VLAN_HLEN; + + switch (ptp_class & PTP_CLASS_PMASK) { + case PTP_CLASS_IPV4: + offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN; + break; + case PTP_CLASS_IPV6: + offset += ETH_HLEN + IP6_HLEN + UDP_HLEN; + break; + case PTP_CLASS_L2: + offset += ETH_HLEN; + break; + default: + return -ERANGE; + } + + /* PTP header is 34 bytes. */ + if (skb->len < offset + 34) + return -EINVAL; + + *p_message_type = data[offset] & 0x0f; + *p_domain_number = data[offset + 4]; + *p_sequence_id = (u16)(data[offset + 30]) << 8 | data[offset + 31]; + return 0; +} + +/* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on + * error. + */ +static struct mlxsw_sp1_ptp_unmatched * +mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, + u64 timestamp) +{ + int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL; + struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state; + struct mlxsw_sp1_ptp_unmatched *unmatched; + struct mlxsw_sp1_ptp_unmatched *conflict; + + unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC); + if (!unmatched) + return ERR_PTR(-ENOMEM); + + unmatched->key = key; + unmatched->skb = skb; + unmatched->timestamp = timestamp; + unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles; + + conflict = rhashtable_lookup_get_insert_fast(&ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); + if (conflict) + kfree(unmatched); + + return conflict; +} + +static struct mlxsw_sp1_ptp_unmatched * +mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key) +{ + return rhashtable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key, + mlxsw_sp1_ptp_unmatched_ht_params); +} + +static int +mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + return rhashtable_remove_fast(&mlxsw_sp->ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); +} + +/* This function is called in the following scenarios: + * + * 1) When a packet is matched with its timestamp. + * 2) In several situation when it is necessary to immediately pass on + * an SKB without a timestamp. + * 3) From GC indirectly through mlxsw_sp1_ptp_unmatched_finish(). + * This case is similar to 2) above. + */ +static void mlxsw_sp1_ptp_packet_finish(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port, + bool ingress, + struct skb_shared_hwtstamps *hwtstamps) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + /* Between capturing the packet and finishing it, there is a window of + * opportunity for the originating port to go away (e.g. due to a + * split). Also make sure the SKB device reference is still valid. + */ + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!(mlxsw_sp_port && (!skb->dev || skb->dev == mlxsw_sp_port->dev))) { + dev_kfree_skb_any(skb); + return; + } + + if (ingress) { + if (hwtstamps) + *skb_hwtstamps(skb) = *hwtstamps; + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); + } else { + /* skb_tstamp_tx() allows hwtstamps to be NULL. */ + skb_tstamp_tx(skb, hwtstamps); + dev_kfree_skb_any(skb); + } +} + +static void mlxsw_sp1_packet_timestamp(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, + u64 timestamp) +{ + struct skb_shared_hwtstamps hwtstamps; + u64 nsec; + + spin_lock_bh(&mlxsw_sp->clock->lock); + nsec = timecounter_cyc2time(&mlxsw_sp->clock->tc, timestamp); + spin_unlock_bh(&mlxsw_sp->clock->lock); + + hwtstamps.hwtstamp = ns_to_ktime(nsec); + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, key.ingress, &hwtstamps); +} + +static void +mlxsw_sp1_ptp_unmatched_finish(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + if (unmatched->skb && unmatched->timestamp) + mlxsw_sp1_packet_timestamp(mlxsw_sp, unmatched->key, + unmatched->skb, + unmatched->timestamp); + else if (unmatched->skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, unmatched->skb, + unmatched->key.local_port, + unmatched->key.ingress, NULL); + kfree_rcu(unmatched, rcu); +} + +static void mlxsw_sp1_ptp_unmatched_free_fn(void *ptr, void *arg) +{ + struct mlxsw_sp1_ptp_unmatched *unmatched = ptr; + + /* This is invoked at a point where the ports are gone already. Nothing + * to do with whatever is left in the HT but to free it. + */ + if (unmatched->skb) + dev_kfree_skb_any(unmatched->skb); + kfree_rcu(unmatched, rcu); +} + +static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, u64 timestamp) +{ + struct mlxsw_sp1_ptp_unmatched *unmatched, *conflict; + int err; + + rcu_read_lock(); + + unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key); + + spin_lock(&mlxsw_sp->ptp_state->unmatched_lock); + + if (unmatched) { + /* There was an unmatched entry when we looked, but it may have + * been removed before we took the lock. + */ + err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched); + if (err) + unmatched = NULL; + } + + if (!unmatched) { + /* We have no unmatched entry, but one may have been added after + * we looked, but before we took the lock. + */ + unmatched = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, + skb, timestamp); + if (IS_ERR(unmatched)) { + if (skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, + key.ingress, NULL); + unmatched = NULL; + } else if (unmatched) { + /* Save just told us, under lock, that the entry is + * there, so this has to work. + */ + err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, + unmatched); + WARN_ON_ONCE(err); + } + } + + /* If unmatched is non-NULL here, it comes either from the lookup, or + * from the save attempt above. In either case the entry was removed + * from the hash table. If unmatched is NULL, a new unmatched entry was + * added to the hash table, and there was no conflict. + */ + + if (skb && unmatched && unmatched->timestamp) { + unmatched->skb = skb; + } else if (timestamp && unmatched && unmatched->skb) { + unmatched->timestamp = timestamp; + } else if (unmatched) { + /* unmatched holds an older entry of the same type: either an + * skb if we are handling skb, or a timestamp if we are handling + * timestamp. We can't match that up, so save what we have. + */ + conflict = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, + skb, timestamp); + if (IS_ERR(conflict)) { + if (skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, + key.ingress, NULL); + } else { + /* Above, we removed an object with this key from the + * hash table, under lock, so conflict can not be a + * valid pointer. + */ + WARN_ON_ONCE(conflict); + } + } + + spin_unlock(&mlxsw_sp->ptp_state->unmatched_lock); + + if (unmatched) + mlxsw_sp1_ptp_unmatched_finish(mlxsw_sp, unmatched); + + rcu_read_unlock(); +} + +static void mlxsw_sp1_ptp_got_packet(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port, + bool ingress) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp1_ptp_key key; + u8 types; + int err; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto immediate; + + types = ingress ? mlxsw_sp_port->ptp.ing_types : + mlxsw_sp_port->ptp.egr_types; + if (!types) + goto immediate; + + memset(&key, 0, sizeof(key)); + key.local_port = local_port; + key.ingress = ingress; + + err = mlxsw_sp_ptp_parse(skb, &key.domain_number, &key.message_type, + &key.sequence_id); + if (err) + goto immediate; + + /* For packets whose timestamping was not enabled on this port, don't + * bother trying to match the timestamp. + */ + if (!((1 << key.message_type) & types)) + goto immediate; + + mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, skb, 0); + return; + +immediate: + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, local_port, ingress, NULL); +} + +void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u8 local_port, u8 message_type, + u8 domain_number, u16 sequence_id, + u64 timestamp) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp1_ptp_key key; + u8 types; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + return; + + types = ingress ? mlxsw_sp_port->ptp.ing_types : + mlxsw_sp_port->ptp.egr_types; + + /* For message types whose timestamping was not enabled on this port, + * don't bother with the timestamp. + */ + if (!((1 << message_type) & types)) + return; + + memset(&key, 0, sizeof(key)); + key.local_port = local_port; + key.domain_number = domain_number; + key.message_type = message_type; + key.sequence_id = sequence_id; + key.ingress = ingress; + + mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, NULL, timestamp); +} + +void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port) +{ + skb_reset_mac_header(skb); + mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, true); +} + +void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, false); +} + +static void +mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + int err; + + /* If an unmatched entry has an SKB, it has to be handed over to the + * networking stack. This is usually done from a trap handler, which is + * invoked in a softirq context. Here we are going to do it in process + * context. If that were to be interrupted by a softirq, it could cause + * a deadlock when an attempt is made to take an already-taken lock + * somewhere along the sending path. Disable softirqs to prevent this. + */ + local_bh_disable(); + + spin_lock(&ptp_state->unmatched_lock); + err = rhashtable_remove_fast(&ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); + spin_unlock(&ptp_state->unmatched_lock); + + if (err) + /* The packet was matched with timestamp during the walk. */ + goto out; + + /* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While + * the comment at that function states that it can only be called in + * soft IRQ context, this pattern of local_bh_disable() + + * netif_receive_skb(), in process context, is seen elsewhere in the + * kernel, notably in pktgen. + */ + mlxsw_sp1_ptp_unmatched_finish(ptp_state->mlxsw_sp, unmatched); + +out: + local_bh_enable(); +} + +static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp1_ptp_unmatched *unmatched; + struct mlxsw_sp_ptp_state *ptp_state; + struct rhashtable_iter iter; + u32 gc_cycle; + void *obj; + + ptp_state = container_of(dwork, struct mlxsw_sp_ptp_state, ht_gc_dw); + gc_cycle = ptp_state->gc_cycle++; + + rhashtable_walk_enter(&ptp_state->unmatched_ht, &iter); + rhashtable_walk_start(&iter); + while ((obj = rhashtable_walk_next(&iter))) { + if (IS_ERR(obj)) + continue; + + unmatched = obj; + if (unmatched->gc_cycle <= gc_cycle) + mlxsw_sp1_ptp_ht_gc_collect(ptp_state, unmatched); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + + mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw, + MLXSW_SP1_PTP_HT_GC_INTERVAL); +} + +static int mlxsw_sp_ptp_mtptpt_set(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_mtptpt_trap_id trap_id, + u16 message_type) +{ + char mtptpt_pl[MLXSW_REG_MTPTPT_LEN]; + + mlxsw_reg_mtptptp_pack(mtptpt_pl, trap_id, message_type); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtptpt), mtptpt_pl); +} + +static int mlxsw_sp1_ptp_set_fifo_clr_on_trap(struct mlxsw_sp *mlxsw_sp, + bool clr) +{ + char mogcr_pl[MLXSW_REG_MOGCR_LEN] = {0}; + int err; + + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl); + if (err) + return err; + + mlxsw_reg_mogcr_ptp_iftc_set(mogcr_pl, clr); + mlxsw_reg_mogcr_ptp_eftc_set(mogcr_pl, clr); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl); +} + +static int mlxsw_sp1_ptp_mtpppc_set(struct mlxsw_sp *mlxsw_sp, + u16 ing_types, u16 egr_types) +{ + char mtpppc_pl[MLXSW_REG_MTPPPC_LEN]; + + mlxsw_reg_mtpppc_pack(mtpppc_pl, ing_types, egr_types); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtpppc), mtpppc_pl); +} + +struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_ptp_state *ptp_state; + u16 message_type; + int err; + + ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL); + if (!ptp_state) + return ERR_PTR(-ENOMEM); + ptp_state->mlxsw_sp = mlxsw_sp; + + spin_lock_init(&ptp_state->unmatched_lock); + + err = rhashtable_init(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_ht_params); + if (err) + goto err_hashtable_init; + + /* Delive these message types as PTP0. */ + message_type = BIT(MLXSW_SP_PTP_MESSAGE_TYPE_SYNC) | + BIT(MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ) | + BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ) | + BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP); + err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, + message_type); + if (err) + goto err_mtptpt_set; + + /* Everything else is PTP1. */ + message_type = ~message_type; + err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, + message_type); + if (err) + goto err_mtptpt1_set; + + err = mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, true); + if (err) + goto err_fifo_clr; + + INIT_DELAYED_WORK(&ptp_state->ht_gc_dw, mlxsw_sp1_ptp_ht_gc); + mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw, + MLXSW_SP1_PTP_HT_GC_INTERVAL); + return ptp_state; + +err_fifo_clr: + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); +err_mtptpt1_set: + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); +err_mtptpt_set: + rhashtable_destroy(&ptp_state->unmatched_ht); +err_hashtable_init: + kfree(ptp_state); + return ERR_PTR(err); +} + +void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ + struct mlxsw_sp *mlxsw_sp = ptp_state->mlxsw_sp; + + cancel_delayed_work_sync(&ptp_state->ht_gc_dw); + mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp, 0, 0); + mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false); + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); + rhashtable_free_and_destroy(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_free_fn, NULL); + kfree(ptp_state); +} + +int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + *config = mlxsw_sp_port->ptp.hwtstamp_config; + return 0; +} + +static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config, + u16 *p_ing_types, u16 *p_egr_types, + enum hwtstamp_rx_filters *p_rx_filter) +{ + enum hwtstamp_rx_filters rx_filter = config->rx_filter; + enum hwtstamp_tx_types tx_type = config->tx_type; + u16 ing_types = 0x00; + u16 egr_types = 0x00; + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + egr_types = 0x00; + break; + case HWTSTAMP_TX_ON: + egr_types = 0xff; + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + return -ERANGE; + } + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + ing_types = 0x00; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + ing_types = 0x01; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + ing_types = 0x02; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + ing_types = 0x0f; + break; + case HWTSTAMP_FILTER_ALL: + ing_types = 0xff; + break; + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_NTP_ALL: + return -ERANGE; + } + + *p_ing_types = ing_types; + *p_egr_types = egr_types; + *p_rx_filter = rx_filter; + return 0; +} + +static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ing_types, u16 egr_types) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port *tmp; + int i; + + /* MTPPPC configures timestamping globally, not per port. Find the + * configuration that contains all configured timestamping requests. + */ + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) { + tmp = mlxsw_sp->ports[i]; + if (tmp && tmp != mlxsw_sp_port) { + ing_types |= tmp->ptp.ing_types; + egr_types |= tmp->ptp.egr_types; + } + } + + return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp, + ing_types, egr_types); +} + +int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + enum hwtstamp_rx_filters rx_filter; + u16 ing_types; + u16 egr_types; + int err; + + err = mlxsw_sp_ptp_get_message_types(config, &ing_types, &egr_types, + &rx_filter); + if (err) + return err; + + err = mlxsw_sp1_ptp_mtpppc_update(mlxsw_sp_port, ing_types, egr_types); + if (err) + return err; + + mlxsw_sp_port->ptp.hwtstamp_config = *config; + mlxsw_sp_port->ptp.ing_types = ing_types; + mlxsw_sp_port->ptp.egr_types = egr_types; + + /* Notify the ioctl caller what we are actually timestamping. */ + config->rx_filter = rx_filter; + + return 0; +} + +int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp); + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h index 76fa00a4be75..b23abfc0bd76 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -5,11 +5,27 @@ #define _MLXSW_SPECTRUM_PTP_H #include <linux/device.h> +#include <linux/rhashtable.h> -#include "spectrum.h" - +struct mlxsw_sp; +struct mlxsw_sp_port; struct mlxsw_sp_ptp_clock; +enum { + MLXSW_SP_PTP_MESSAGE_TYPE_SYNC, + MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ, + MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ, + MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP, +}; + +static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info) +{ + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + info->phc_index = -1; + return 0; +} + #if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) struct mlxsw_sp_ptp_clock * @@ -17,6 +33,30 @@ mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev); void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock); +struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp); + +void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state); + +void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port); + +void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port); + +void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u8 local_port, u8 message_type, + u8 domain_number, u16 sequence_id, + u64 timestamp); + +int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info); + #else static inline struct mlxsw_sp_ptp_clock * @@ -29,6 +69,56 @@ static inline void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) { } +static inline struct mlxsw_sp_ptp_state * +mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + return NULL; +} + +static inline void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ +} + +static inline void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); +} + +static inline void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + dev_kfree_skb_any(skb); +} + +static inline void +mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u8 local_port, u8 message_type, + u8 domain_number, + u16 sequence_id, u64 timestamp) +{ +} + +static inline int +mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int +mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + return mlxsw_sp_ptp_get_ts_info_noptp(info); +} + #endif static inline struct mlxsw_sp_ptp_clock * @@ -41,4 +131,46 @@ static inline void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) { } +static inline struct mlxsw_sp_ptp_state * +mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + return NULL; +} + +static inline void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ +} + +static inline void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); +} + +static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + dev_kfree_skb_any(skb); +} + +static inline int +mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int +mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + return mlxsw_sp_ptp_get_ts_info_noptp(info); +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index fc4f19167262..bdab96f5bc70 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -299,6 +299,8 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, u64 len; int err; + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); + if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info)) return NETDEV_TX_BUSY; diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 451216dd7f6b..19202bdb5105 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -17,6 +17,8 @@ enum { MLXSW_TRAP_ID_MVRP = 0x15, MLXSW_TRAP_ID_RPVST = 0x16, MLXSW_TRAP_ID_DHCP = 0x19, + MLXSW_TRAP_ID_PTP0 = 0x28, + MLXSW_TRAP_ID_PTP1 = 0x29, MLXSW_TRAP_ID_IGMP_QUERY = 0x30, MLXSW_TRAP_ID_IGMP_V1_REPORT = 0x31, MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32, @@ -76,6 +78,10 @@ enum { enum mlxsw_event_trap_id { /* Port Up/Down event generated by hardware */ MLXSW_TRAP_ID_PUDE = 0x8, + /* PTP Ingress FIFO has a new entry */ + MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D, + /* PTP Egress FIFO has a new entry */ + MLXSW_TRAP_ID_PTP_EGR_FIFO = 0x2E, }; #endif /* _MLXSW_TRAP_H */ |