diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
27 files changed, 1780 insertions, 1064 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 502d1b97855c..b0f79a5151cf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -684,7 +684,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud xdp_prog = rcu_dereference(ring->xdp_prog); xdp.rxq = &ring->xdp_rxq; xdp.frame_sz = priv->frag_info[0].frag_stride; - doorbell_pending = 0; + doorbell_pending = false; /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h index 582997577a04..954b86faac29 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h @@ -135,7 +135,7 @@ int mlx4_SET_VPORT_QOS_get(struct mlx4_dev *dev, u8 port, u8 vport, * @dev: mlx4_dev. * @port: Physical port number. * @vport: Vport id. - * @out_param: Array of mlx4_vport_qos_param which holds the requested values. + * @in_param: Array of mlx4_vport_qos_param which holds the requested values. * * Returns 0 on success or a negative mlx4_core errno code. **/ diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 1187ef1375e2..394f43add85c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -300,7 +300,7 @@ static const char *resource_str(enum mlx4_resource rt) case RES_FS_RULE: return "RES_FS_RULE"; case RES_XRCD: return "RES_XRCD"; default: return "Unknown resource type !!!"; - }; + } } static void rem_slave_vlans(struct mlx4_dev *dev, int slave); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 2d477f9a8cb7..83a67ca43a41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -81,7 +81,7 @@ mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/t mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \ steering/dr_matcher.o steering/dr_rule.o \ - steering/dr_icm_pool.o \ + steering/dr_icm_pool.o steering/dr_buddy.o \ steering/dr_ste.o steering/dr_send.o \ steering/dr_cmd.o steering/dr_fw.o \ steering/dr_action.o steering/fs_dr.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 38e4f19d69f8..43271a3856ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -2,6 +2,8 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include "en/params.h" +#include "en/txrx.h" +#include "en_accel/tls_rxtx.h" static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) @@ -152,3 +154,35 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0; } + +u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool is_mpwqe = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); + u16 stop_room; + + stop_room = mlx5e_tls_get_stop_room(mdev, params); + stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + if (is_mpwqe) + /* A MPWQE can take up to the maximum-sized WQE + all the normal + * stop room can be taken if a new packet breaks the active + * MPWQE session and allocates its WQEs right away. + */ + stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + + return stop_room; +} + +int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params) +{ + size_t sq_size = 1 << params->log_sq_size; + u16 stop_room; + + stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); + if (stop_room >= sq_size) { + netdev_err(priv->netdev, "Stop room %hu is bigger than the SQ size %zu\n", + stop_room, sq_size); + return -EINVAL; + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index a87273e801b2..187007ad3349 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -30,6 +30,7 @@ struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; bool is_mpw; + u16 stop_room; }; struct mlx5e_channel_param { @@ -124,4 +125,7 @@ void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, struct mlx5e_params *params, struct mlx5e_sq_param *param); +u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); +int mlx5e_validate_params(struct mlx5e_priv *priv, struct mlx5e_params *params); + #endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index ae90d533a350..2e3e78b0f333 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -366,7 +366,8 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_wqe_info *wi, u32 *xsk_frames, - bool recycle) + bool recycle, + struct xdp_frame_bulk *bq) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; u16 i; @@ -379,7 +380,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, /* XDP_TX from the XSK RQ and XDP_REDIRECT */ dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, xdpi.frame.xdpf->len, DMA_TO_DEVICE); - xdp_return_frame(xdpi.frame.xdpf); + xdp_return_frame_bulk(xdpi.frame.xdpf, bq); break; case MLX5E_XDP_XMIT_MODE_PAGE: /* XDP_TX from the regular RQ */ @@ -397,12 +398,15 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) { + struct xdp_frame_bulk bq; struct mlx5e_xdpsq *sq; struct mlx5_cqe64 *cqe; u32 xsk_frames = 0; u16 sqcc; int i; + xdp_frame_bulk_init(&bq); + sq = container_of(cq, struct mlx5e_xdpsq, cq); if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) @@ -434,7 +438,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) sqcc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq); } while (!last_wqe); if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { @@ -447,6 +451,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) } } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + xdp_flush_frame_bulk(&bq); + if (xsk_frames) xsk_tx_completed(sq->xsk_pool, xsk_frames); @@ -463,8 +469,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) { + struct xdp_frame_bulk bq; u32 xsk_frames = 0; + xdp_frame_bulk_init(&bq); + + rcu_read_lock(); /* need for xdp_return_frame_bulk */ + while (sq->cc != sq->pc) { struct mlx5e_xdp_wqe_info *wi; u16 ci; @@ -474,9 +485,12 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) sq->cc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq); } + xdp_flush_frame_bulk(&bq); + rcu_read_unlock(); + if (xsk_frames) xsk_tx_completed(sq->xsk_pool, xsk_frames); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index b140e13fdcc8..d16def68ecff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -13,20 +13,20 @@ struct mlx5e_dump_wqe { (DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB)) static u8 -mlx5e_ktls_dumps_num_wqes(struct mlx5e_txqsq *sq, unsigned int nfrags, +mlx5e_ktls_dumps_num_wqes(struct mlx5e_params *params, unsigned int nfrags, unsigned int sync_len) { /* Given the MTU and sync_len, calculates an upper bound for the * number of DUMP WQEs needed for the TX resync of a record. */ - return nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu); + return nfrags + DIV_ROUND_UP(sync_len, MLX5E_SW2HW_MTU(params, params->sw_mtu)); } -u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq) +u16 mlx5e_ktls_get_stop_room(struct mlx5e_params *params) { u16 num_dumps, stop_room = 0; - num_dumps = mlx5e_ktls_dumps_num_wqes(sq, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); + num_dumps = mlx5e_ktls_dumps_num_wqes(params, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE); stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); stop_room += mlx5e_stop_room_for_wqe(MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h index 7521c9be735b..ee04e916fa21 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h @@ -14,7 +14,7 @@ struct mlx5e_accel_tx_tls_state { u32 tls_tisn; }; -u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq); +u16 mlx5e_ktls_get_stop_room(struct mlx5e_params *params); bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq, struct sk_buff *skb, int datalen, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 6982b193ee8a..f51c04284e4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -385,15 +385,13 @@ void mlx5e_tls_handle_rx_skb_metadata(struct mlx5e_rq *rq, struct sk_buff *skb, *cqe_bcnt -= MLX5E_METADATA_ETHER_LEN; } -u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq) +u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - struct mlx5_core_dev *mdev = sq->channel->mdev; - if (!mlx5_accel_is_tls_device(mdev)) return 0; if (mlx5_accel_is_ktls_device(mdev)) - return mlx5e_ktls_get_stop_room(sq); + return mlx5e_ktls_get_stop_room(params); /* FPGA */ /* Resync SKB. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h index 5f162ad2ee8f..9923132c9440 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h @@ -43,7 +43,7 @@ #include "en.h" #include "en/txrx.h" -u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq); +u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params); bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state); @@ -71,7 +71,7 @@ mlx5e_accel_is_tls(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { return false; static inline void mlx5e_tls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 *cqe_bcnt) {} -static inline u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq) +static inline u16 mlx5e_tls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d25a56ec6876..42e61dc28ead 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,6 +32,7 @@ #include "en.h" #include "en/port.h" +#include "en/params.h" #include "en/xsk/pool.h" #include "lib/clock.h" @@ -369,6 +370,10 @@ int mlx5e_ethtool_set_ringparam(struct mlx5e_priv *priv, new_channels.params.log_rq_mtu_frames = log_rq_size; new_channels.params.log_sq_size = log_sq_size; + err = mlx5e_validate_params(priv, &new_channels.params); + if (err) + goto unlock; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; goto unlock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ebce97921e03..527c5f12c5af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1121,28 +1121,6 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) return 0; } -static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size) -{ - int sq_size = 1 << log_sq_size; - - sq->stop_room = mlx5e_tls_get_stop_room(sq); - sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); - if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) - /* A MPWQE can take up to the maximum-sized WQE + all the normal - * stop room can be taken if a new packet breaks the active - * MPWQE session and allocates its WQEs right away. - */ - sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); - - if (WARN_ON(sq->stop_room >= sq_size)) { - netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n", - sq->stop_room, sq_size); - return -ENOSPC; - } - - return 0; -} - static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work); static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int txq_ix, @@ -1176,9 +1154,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, set_bit(MLX5E_SQ_STATE_TLS, &sq->state); if (param->is_mpw) set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); - err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size); - if (err) - return err; + sq->stop_room = param->stop_room; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -2225,6 +2201,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); + param->stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params); mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); } @@ -3999,6 +3976,9 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, new_channels.params = *params; new_channels.params.sw_mtu = new_mtu; + err = mlx5e_validate_params(priv, &new_channels.params); + if (err) + goto out; if (params->xdp_prog && !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 8ebfe782f95e..4ea5d6ddf56a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -189,19 +189,21 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) return count_eqe; } -static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags) +static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, bool recovery, + unsigned long *flags) __acquires(&eq->lock) { - if (in_irq()) + if (!recovery) spin_lock(&eq->lock); else spin_lock_irqsave(&eq->lock, *flags); } -static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags) +static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, bool recovery, + unsigned long *flags) __releases(&eq->lock) { - if (in_irq()) + if (!recovery) spin_unlock(&eq->lock); else spin_unlock_irqrestore(&eq->lock, *flags); @@ -223,11 +225,13 @@ static int mlx5_eq_async_int(struct notifier_block *nb, struct mlx5_eqe *eqe; unsigned long flags; int num_eqes = 0; + bool recovery; dev = eq->dev; eqt = dev->priv.eq_table; - mlx5_eq_async_int_lock(eq_async, &flags); + recovery = action == ASYNC_EQ_RECOVER; + mlx5_eq_async_int_lock(eq_async, recovery, &flags); eqe = next_eqe_sw(eq); if (!eqe) @@ -249,9 +253,9 @@ static int mlx5_eq_async_int(struct notifier_block *nb, out: eq_update_ci(eq, 1); - mlx5_eq_async_int_unlock(eq_async, &flags); + mlx5_eq_async_int_unlock(eq_async, recovery, &flags); - return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0; + return unlikely(recovery) ? num_eqes : 0; } void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h index 656f96be6e20..89ef592656c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/sdk.h @@ -47,11 +47,12 @@ /** * enum mlx5_fpga_access_type - Enumerated the different methods possible for * accessing the device memory address space + * + * @MLX5_FPGA_ACCESS_TYPE_I2C: Use the slow CX-FPGA I2C bus + * @MLX5_FPGA_ACCESS_TYPE_DONTCARE: Use the fastest available method */ enum mlx5_fpga_access_type { - /** Use the slow CX-FPGA I2C bus */ MLX5_FPGA_ACCESS_TYPE_I2C = 0x0, - /** Use the fastest available method */ MLX5_FPGA_ACCESS_TYPE_DONTCARE = 0x0, }; @@ -113,6 +114,7 @@ struct mlx5_fpga_conn_attr { * subsequent receives. */ void (*recv_cb)(void *cb_arg, struct mlx5_fpga_dma_buf *buf); + /** @cb_arg: A context to be passed to recv_cb callback */ void *cb_arg; }; @@ -145,7 +147,7 @@ void mlx5_fpga_sbu_conn_destroy(struct mlx5_fpga_conn *conn); /** * mlx5_fpga_sbu_conn_sendmsg() - Queue the transmission of a packet - * @fdev: An FPGA SBU connection + * @conn: An FPGA SBU connection * @buf: The packet buffer * * Queues a packet for transmission over an FPGA SBU connection. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c new file mode 100644 index 000000000000..7df11a019df9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_buddy.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 - 2008 Mellanox Technologies. All rights reserved. + * Copyright (c) 2006 - 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved. + */ + +#include "dr_types.h" + +int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int max_order) +{ + int i; + + buddy->max_order = max_order; + + INIT_LIST_HEAD(&buddy->list_node); + INIT_LIST_HEAD(&buddy->used_list); + INIT_LIST_HEAD(&buddy->hot_list); + + buddy->bitmap = kcalloc(buddy->max_order + 1, + sizeof(*buddy->bitmap), + GFP_KERNEL); + buddy->num_free = kcalloc(buddy->max_order + 1, + sizeof(*buddy->num_free), + GFP_KERNEL); + + if (!buddy->bitmap || !buddy->num_free) + goto err_free_all; + + /* Allocating max_order bitmaps, one for each order */ + + for (i = 0; i <= buddy->max_order; ++i) { + unsigned int size = 1 << (buddy->max_order - i); + + buddy->bitmap[i] = bitmap_zalloc(size, GFP_KERNEL); + if (!buddy->bitmap[i]) + goto err_out_free_each_bit_per_order; + } + + /* In the beginning, we have only one order that is available for + * use (the biggest one), so mark the first bit in both bitmaps. + */ + + bitmap_set(buddy->bitmap[buddy->max_order], 0, 1); + + buddy->num_free[buddy->max_order] = 1; + + return 0; + +err_out_free_each_bit_per_order: + for (i = 0; i <= buddy->max_order; ++i) + bitmap_free(buddy->bitmap[i]); + +err_free_all: + kfree(buddy->num_free); + kfree(buddy->bitmap); + return -ENOMEM; +} + +void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy) +{ + int i; + + list_del(&buddy->list_node); + + for (i = 0; i <= buddy->max_order; ++i) + bitmap_free(buddy->bitmap[i]); + + kfree(buddy->num_free); + kfree(buddy->bitmap); +} + +static int dr_buddy_find_free_seg(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int start_order, + unsigned int *segment, + unsigned int *order) +{ + unsigned int seg, order_iter, m; + + for (order_iter = start_order; + order_iter <= buddy->max_order; ++order_iter) { + if (!buddy->num_free[order_iter]) + continue; + + m = 1 << (buddy->max_order - order_iter); + seg = find_first_bit(buddy->bitmap[order_iter], m); + + if (WARN(seg >= m, + "ICM Buddy: failed finding free mem for order %d\n", + order_iter)) + return -ENOMEM; + + break; + } + + if (order_iter > buddy->max_order) + return -ENOMEM; + + *segment = seg; + *order = order_iter; + return 0; +} + +/** + * mlx5dr_buddy_alloc_mem() - Update second level bitmap. + * @buddy: Buddy to update. + * @order: Order of the buddy to update. + * @segment: Segment number. + * + * This function finds the first area of the ICM memory managed by this buddy. + * It uses the data structures of the buddy system in order to find the first + * area of free place, starting from the current order till the maximum order + * in the system. + * + * Return: 0 when segment is set, non-zero error status otherwise. + * + * The function returns the location (segment) in the whole buddy ICM memory + * area - the index of the memory segment that is available for use. + */ +int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int order, + unsigned int *segment) +{ + unsigned int seg, order_iter; + int err; + + err = dr_buddy_find_free_seg(buddy, order, &seg, &order_iter); + if (err) + return err; + + bitmap_clear(buddy->bitmap[order_iter], seg, 1); + --buddy->num_free[order_iter]; + + /* If we found free memory in some order that is bigger than the + * required order, we need to split every order between the required + * order and the order that we found into two parts, and mark accordingly. + */ + while (order_iter > order) { + --order_iter; + seg <<= 1; + bitmap_set(buddy->bitmap[order_iter], seg ^ 1, 1); + ++buddy->num_free[order_iter]; + } + + seg <<= order; + *segment = seg; + + return 0; +} + +void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int seg, unsigned int order) +{ + seg >>= order; + + /* Whenever a segment is free, + * the mem is added to the buddy that gave it. + */ + while (test_bit(seg ^ 1, buddy->bitmap[order])) { + bitmap_clear(buddy->bitmap[order], seg ^ 1, 1); + --buddy->num_free[order]; + seg >>= 1; + ++order; + } + bitmap_set(buddy->bitmap[order], seg, 1); + + ++buddy->num_free[order]; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index 6bd34b293007..ebc879052e42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -93,12 +93,12 @@ int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev, caps->gvmi = MLX5_CAP_GEN(mdev, vhca_id); caps->flex_protocols = MLX5_CAP_GEN(mdev, flex_parser_protocols); - if (mlx5dr_matcher_supp_flex_parser_icmp_v4(caps)) { + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED) { caps->flex_parser_id_icmp_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw0); caps->flex_parser_id_icmp_dw1 = MLX5_CAP_GEN(mdev, flex_parser_id_icmp_dw1); } - if (mlx5dr_matcher_supp_flex_parser_icmp_v6(caps)) { + if (caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED) { caps->flex_parser_id_icmpv6_dw0 = MLX5_CAP_GEN(mdev, flex_parser_id_icmpv6_dw0); caps->flex_parser_id_icmpv6_dw1 = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c index cc33515b9aba..66c24767e3b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c @@ -4,50 +4,16 @@ #include "dr_types.h" #define DR_ICM_MODIFY_HDR_ALIGN_BASE 64 -#define DR_ICM_SYNC_THRESHOLD (64 * 1024 * 1024) - -struct mlx5dr_icm_pool; - -struct mlx5dr_icm_bucket { - struct mlx5dr_icm_pool *pool; - - /* Chunks that aren't visible to HW not directly and not in cache */ - struct list_head free_list; - unsigned int free_list_count; - - /* Used chunks, HW may be accessing this memory */ - struct list_head used_list; - unsigned int used_list_count; - - /* HW may be accessing this memory but at some future, - * undetermined time, it might cease to do so. Before deciding to call - * sync_ste, this list is moved to sync_list - */ - struct list_head hot_list; - unsigned int hot_list_count; - - /* Pending sync list, entries from the hot list are moved to this list. - * sync_ste is executed and then sync_list is concatenated to the free list - */ - struct list_head sync_list; - unsigned int sync_list_count; - - u32 total_chunks; - u32 num_of_entries; - u32 entry_size; - /* protect the ICM bucket */ - struct mutex mutex; -}; +#define DR_ICM_SYNC_THRESHOLD_POOL (64 * 1024 * 1024) struct mlx5dr_icm_pool { - struct mlx5dr_icm_bucket *buckets; enum mlx5dr_icm_type icm_type; enum mlx5dr_icm_chunk_size max_log_chunk_sz; - enum mlx5dr_icm_chunk_size num_of_buckets; - struct list_head icm_mr_list; - /* protect the ICM MR list */ - struct mutex mr_mutex; struct mlx5dr_domain *dmn; + /* memory management */ + struct mutex mutex; /* protect the ICM pool and ICM buddy */ + struct list_head buddy_mem_list; + u64 hot_memory_size; }; struct mlx5dr_icm_dm { @@ -58,13 +24,11 @@ struct mlx5dr_icm_dm { }; struct mlx5dr_icm_mr { - struct mlx5dr_icm_pool *pool; struct mlx5_core_mkey mkey; struct mlx5dr_icm_dm dm; - size_t used_length; + struct mlx5dr_domain *dmn; size_t length; u64 icm_start_addr; - struct list_head mr_list; }; static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev, @@ -107,8 +71,7 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) if (!icm_mr) return NULL; - icm_mr->pool = pool; - INIT_LIST_HEAD(&icm_mr->mr_list); + icm_mr->dmn = pool->dmn; icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, pool->icm_type); @@ -150,8 +113,6 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool) goto free_mkey; } - list_add_tail(&icm_mr->mr_list, &pool->icm_mr_list); - return icm_mr; free_mkey: @@ -166,10 +127,9 @@ free_icm_mr: static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) { - struct mlx5_core_dev *mdev = icm_mr->pool->dmn->mdev; + struct mlx5_core_dev *mdev = icm_mr->dmn->mdev; struct mlx5dr_icm_dm *dm = &icm_mr->dm; - list_del(&icm_mr->mr_list); mlx5_core_destroy_mkey(mdev, &icm_mr->mkey); mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0, dm->addr, dm->obj_id); @@ -178,19 +138,17 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr) static int dr_icm_chunk_ste_init(struct mlx5dr_icm_chunk *chunk) { - struct mlx5dr_icm_bucket *bucket = chunk->bucket; - - chunk->ste_arr = kvzalloc(bucket->num_of_entries * + chunk->ste_arr = kvzalloc(chunk->num_of_entries * sizeof(chunk->ste_arr[0]), GFP_KERNEL); if (!chunk->ste_arr) return -ENOMEM; - chunk->hw_ste_arr = kvzalloc(bucket->num_of_entries * + chunk->hw_ste_arr = kvzalloc(chunk->num_of_entries * DR_STE_SIZE_REDUCED, GFP_KERNEL); if (!chunk->hw_ste_arr) goto out_free_ste_arr; - chunk->miss_list = kvmalloc(bucket->num_of_entries * + chunk->miss_list = kvmalloc(chunk->num_of_entries * sizeof(chunk->miss_list[0]), GFP_KERNEL); if (!chunk->miss_list) goto out_free_hw_ste_arr; @@ -204,72 +162,6 @@ out_free_ste_arr: return -ENOMEM; } -static int dr_icm_chunks_create(struct mlx5dr_icm_bucket *bucket) -{ - size_t mr_free_size, mr_req_size, mr_row_size; - struct mlx5dr_icm_pool *pool = bucket->pool; - struct mlx5dr_icm_mr *icm_mr = NULL; - struct mlx5dr_icm_chunk *chunk; - int i, err = 0; - - mr_req_size = bucket->num_of_entries * bucket->entry_size; - mr_row_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz, - pool->icm_type); - mutex_lock(&pool->mr_mutex); - if (!list_empty(&pool->icm_mr_list)) { - icm_mr = list_last_entry(&pool->icm_mr_list, - struct mlx5dr_icm_mr, mr_list); - - if (icm_mr) - mr_free_size = icm_mr->dm.length - icm_mr->used_length; - } - - if (!icm_mr || mr_free_size < mr_row_size) { - icm_mr = dr_icm_pool_mr_create(pool); - if (!icm_mr) { - err = -ENOMEM; - goto out_err; - } - } - - /* Create memory aligned chunks */ - for (i = 0; i < mr_row_size / mr_req_size; i++) { - chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL); - if (!chunk) { - err = -ENOMEM; - goto out_err; - } - - chunk->bucket = bucket; - chunk->rkey = icm_mr->mkey.key; - /* mr start addr is zero based */ - chunk->mr_addr = icm_mr->used_length; - chunk->icm_addr = (uintptr_t)icm_mr->icm_start_addr + icm_mr->used_length; - icm_mr->used_length += mr_req_size; - chunk->num_of_entries = bucket->num_of_entries; - chunk->byte_size = chunk->num_of_entries * bucket->entry_size; - - if (pool->icm_type == DR_ICM_TYPE_STE) { - err = dr_icm_chunk_ste_init(chunk); - if (err) - goto out_free_chunk; - } - - INIT_LIST_HEAD(&chunk->chunk_list); - list_add(&chunk->chunk_list, &bucket->free_list); - bucket->free_list_count++; - bucket->total_chunks++; - } - mutex_unlock(&pool->mr_mutex); - return 0; - -out_free_chunk: - kvfree(chunk); -out_err: - mutex_unlock(&pool->mr_mutex); - return err; -} - static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) { kvfree(chunk->miss_list); @@ -277,166 +169,199 @@ static void dr_icm_chunk_ste_cleanup(struct mlx5dr_icm_chunk *chunk) kvfree(chunk->ste_arr); } -static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk) +static enum mlx5dr_icm_type +get_chunk_icm_type(struct mlx5dr_icm_chunk *chunk) +{ + return chunk->buddy_mem->pool->icm_type; +} + +static void dr_icm_chunk_destroy(struct mlx5dr_icm_chunk *chunk, + struct mlx5dr_icm_buddy_mem *buddy) { - struct mlx5dr_icm_bucket *bucket = chunk->bucket; + enum mlx5dr_icm_type icm_type = get_chunk_icm_type(chunk); + buddy->used_memory -= chunk->byte_size; list_del(&chunk->chunk_list); - bucket->total_chunks--; - if (bucket->pool->icm_type == DR_ICM_TYPE_STE) + if (icm_type == DR_ICM_TYPE_STE) dr_icm_chunk_ste_cleanup(chunk); kvfree(chunk); } -static void dr_icm_bucket_init(struct mlx5dr_icm_pool *pool, - struct mlx5dr_icm_bucket *bucket, - enum mlx5dr_icm_chunk_size chunk_size) +static int dr_icm_buddy_create(struct mlx5dr_icm_pool *pool) { - if (pool->icm_type == DR_ICM_TYPE_STE) - bucket->entry_size = DR_STE_SIZE; - else - bucket->entry_size = DR_MODIFY_ACTION_SIZE; - - bucket->num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); - bucket->pool = pool; - mutex_init(&bucket->mutex); - INIT_LIST_HEAD(&bucket->free_list); - INIT_LIST_HEAD(&bucket->used_list); - INIT_LIST_HEAD(&bucket->hot_list); - INIT_LIST_HEAD(&bucket->sync_list); + struct mlx5dr_icm_buddy_mem *buddy; + struct mlx5dr_icm_mr *icm_mr; + + icm_mr = dr_icm_pool_mr_create(pool); + if (!icm_mr) + return -ENOMEM; + + buddy = kvzalloc(sizeof(*buddy), GFP_KERNEL); + if (!buddy) + goto free_mr; + + if (mlx5dr_buddy_init(buddy, pool->max_log_chunk_sz)) + goto err_free_buddy; + + buddy->icm_mr = icm_mr; + buddy->pool = pool; + + /* add it to the -start- of the list in order to search in it first */ + list_add(&buddy->list_node, &pool->buddy_mem_list); + + return 0; + +err_free_buddy: + kvfree(buddy); +free_mr: + dr_icm_pool_mr_destroy(icm_mr); + return -ENOMEM; } -static void dr_icm_bucket_cleanup(struct mlx5dr_icm_bucket *bucket) +static void dr_icm_buddy_destroy(struct mlx5dr_icm_buddy_mem *buddy) { struct mlx5dr_icm_chunk *chunk, *next; - mutex_destroy(&bucket->mutex); - list_splice_tail_init(&bucket->sync_list, &bucket->free_list); - list_splice_tail_init(&bucket->hot_list, &bucket->free_list); + list_for_each_entry_safe(chunk, next, &buddy->hot_list, chunk_list) + dr_icm_chunk_destroy(chunk, buddy); + + list_for_each_entry_safe(chunk, next, &buddy->used_list, chunk_list) + dr_icm_chunk_destroy(chunk, buddy); - list_for_each_entry_safe(chunk, next, &bucket->free_list, chunk_list) - dr_icm_chunk_destroy(chunk); + dr_icm_pool_mr_destroy(buddy->icm_mr); - WARN_ON(bucket->total_chunks != 0); + mlx5dr_buddy_cleanup(buddy); - /* Cleanup of unreturned chunks */ - list_for_each_entry_safe(chunk, next, &bucket->used_list, chunk_list) - dr_icm_chunk_destroy(chunk); + kvfree(buddy); } -static u64 dr_icm_hot_mem_size(struct mlx5dr_icm_pool *pool) +static struct mlx5dr_icm_chunk * +dr_icm_chunk_create(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + struct mlx5dr_icm_buddy_mem *buddy_mem_pool, + unsigned int seg) { - u64 hot_size = 0; - int chunk_order; + struct mlx5dr_icm_chunk *chunk; + int offset; - for (chunk_order = 0; chunk_order < pool->num_of_buckets; chunk_order++) - hot_size += pool->buckets[chunk_order].hot_list_count * - mlx5dr_icm_pool_chunk_size_to_byte(chunk_order, pool->icm_type); + chunk = kvzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) + return NULL; - return hot_size; -} + offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg; + + chunk->rkey = buddy_mem_pool->icm_mr->mkey.key; + chunk->mr_addr = offset; + chunk->icm_addr = + (uintptr_t)buddy_mem_pool->icm_mr->icm_start_addr + offset; + chunk->num_of_entries = + mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); + chunk->byte_size = + mlx5dr_icm_pool_chunk_size_to_byte(chunk_size, pool->icm_type); + chunk->seg = seg; + + if (pool->icm_type == DR_ICM_TYPE_STE && dr_icm_chunk_ste_init(chunk)) { + mlx5dr_err(pool->dmn, + "Failed to init ste arrays (order: %d)\n", + chunk_size); + goto out_free_chunk; + } -static bool dr_icm_reuse_hot_entries(struct mlx5dr_icm_pool *pool, - struct mlx5dr_icm_bucket *bucket) -{ - u64 bytes_for_sync; + buddy_mem_pool->used_memory += chunk->byte_size; + chunk->buddy_mem = buddy_mem_pool; + INIT_LIST_HEAD(&chunk->chunk_list); - bytes_for_sync = dr_icm_hot_mem_size(pool); - if (bytes_for_sync < DR_ICM_SYNC_THRESHOLD || !bucket->hot_list_count) - return false; + /* chunk now is part of the used_list */ + list_add_tail(&chunk->chunk_list, &buddy_mem_pool->used_list); - return true; -} + return chunk; -static void dr_icm_chill_bucket_start(struct mlx5dr_icm_bucket *bucket) -{ - list_splice_tail_init(&bucket->hot_list, &bucket->sync_list); - bucket->sync_list_count += bucket->hot_list_count; - bucket->hot_list_count = 0; +out_free_chunk: + kvfree(chunk); + return NULL; } -static void dr_icm_chill_bucket_end(struct mlx5dr_icm_bucket *bucket) +static bool dr_icm_pool_is_sync_required(struct mlx5dr_icm_pool *pool) { - list_splice_tail_init(&bucket->sync_list, &bucket->free_list); - bucket->free_list_count += bucket->sync_list_count; - bucket->sync_list_count = 0; -} + if (pool->hot_memory_size > DR_ICM_SYNC_THRESHOLD_POOL) + return true; -static void dr_icm_chill_bucket_abort(struct mlx5dr_icm_bucket *bucket) -{ - list_splice_tail_init(&bucket->sync_list, &bucket->hot_list); - bucket->hot_list_count += bucket->sync_list_count; - bucket->sync_list_count = 0; + return false; } -static void dr_icm_chill_buckets_start(struct mlx5dr_icm_pool *pool, - struct mlx5dr_icm_bucket *cb, - bool buckets[DR_CHUNK_SIZE_MAX]) +static int dr_icm_pool_sync_all_buddy_pools(struct mlx5dr_icm_pool *pool) { - struct mlx5dr_icm_bucket *bucket; - int i; - - for (i = 0; i < pool->num_of_buckets; i++) { - bucket = &pool->buckets[i]; - if (bucket == cb) { - dr_icm_chill_bucket_start(bucket); - continue; - } + struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; + int err; - /* Freeing the mutex is done at the end of that process, after - * sync_ste was executed at dr_icm_chill_buckets_end func. - */ - if (mutex_trylock(&bucket->mutex)) { - dr_icm_chill_bucket_start(bucket); - buckets[i] = true; - } + err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); + if (err) { + mlx5dr_err(pool->dmn, "Failed to sync to HW (err: %d)\n", err); + return err; } -} -static void dr_icm_chill_buckets_end(struct mlx5dr_icm_pool *pool, - struct mlx5dr_icm_bucket *cb, - bool buckets[DR_CHUNK_SIZE_MAX]) -{ - struct mlx5dr_icm_bucket *bucket; - int i; - - for (i = 0; i < pool->num_of_buckets; i++) { - bucket = &pool->buckets[i]; - if (bucket == cb) { - dr_icm_chill_bucket_end(bucket); - continue; - } + list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) { + struct mlx5dr_icm_chunk *chunk, *tmp_chunk; - if (!buckets[i]) - continue; + list_for_each_entry_safe(chunk, tmp_chunk, &buddy->hot_list, chunk_list) { + mlx5dr_buddy_free_mem(buddy, chunk->seg, + ilog2(chunk->num_of_entries)); + pool->hot_memory_size -= chunk->byte_size; + dr_icm_chunk_destroy(chunk, buddy); + } - dr_icm_chill_bucket_end(bucket); - mutex_unlock(&bucket->mutex); + if (!buddy->used_memory && pool->icm_type == DR_ICM_TYPE_STE) + dr_icm_buddy_destroy(buddy); } + + return 0; } -static void dr_icm_chill_buckets_abort(struct mlx5dr_icm_pool *pool, - struct mlx5dr_icm_bucket *cb, - bool buckets[DR_CHUNK_SIZE_MAX]) +static int dr_icm_handle_buddies_get_mem(struct mlx5dr_icm_pool *pool, + enum mlx5dr_icm_chunk_size chunk_size, + struct mlx5dr_icm_buddy_mem **buddy, + unsigned int *seg) { - struct mlx5dr_icm_bucket *bucket; - int i; - - for (i = 0; i < pool->num_of_buckets; i++) { - bucket = &pool->buckets[i]; - if (bucket == cb) { - dr_icm_chill_bucket_abort(bucket); - continue; - } + struct mlx5dr_icm_buddy_mem *buddy_mem_pool; + bool new_mem = false; + int err; - if (!buckets[i]) - continue; +alloc_buddy_mem: + /* find the next free place from the buddy list */ + list_for_each_entry(buddy_mem_pool, &pool->buddy_mem_list, list_node) { + err = mlx5dr_buddy_alloc_mem(buddy_mem_pool, + chunk_size, seg); + if (!err) + goto found; + + if (WARN_ON(new_mem)) { + /* We have new memory pool, first in the list */ + mlx5dr_err(pool->dmn, + "No memory for order: %d\n", + chunk_size); + goto out; + } + } - dr_icm_chill_bucket_abort(bucket); - mutex_unlock(&bucket->mutex); + /* no more available allocators in that pool, create new */ + err = dr_icm_buddy_create(pool); + if (err) { + mlx5dr_err(pool->dmn, + "Failed creating buddy for order %d\n", + chunk_size); + goto out; } + + /* mark we have new memory, first in list */ + new_mem = true; + goto alloc_buddy_mem; + +found: + *buddy = buddy_mem_pool; +out: + return err; } /* Allocate an ICM chunk, each chunk holds a piece of ICM memory and @@ -446,68 +371,48 @@ struct mlx5dr_icm_chunk * mlx5dr_icm_alloc_chunk(struct mlx5dr_icm_pool *pool, enum mlx5dr_icm_chunk_size chunk_size) { - struct mlx5dr_icm_chunk *chunk = NULL; /* Fix compilation warning */ - bool buckets[DR_CHUNK_SIZE_MAX] = {}; - struct mlx5dr_icm_bucket *bucket; - int err; + struct mlx5dr_icm_chunk *chunk = NULL; + struct mlx5dr_icm_buddy_mem *buddy; + unsigned int seg; + int ret; if (chunk_size > pool->max_log_chunk_sz) return NULL; - bucket = &pool->buckets[chunk_size]; - - mutex_lock(&bucket->mutex); - - /* Take chunk from pool if available, otherwise allocate new chunks */ - if (list_empty(&bucket->free_list)) { - if (dr_icm_reuse_hot_entries(pool, bucket)) { - dr_icm_chill_buckets_start(pool, bucket, buckets); - err = mlx5dr_cmd_sync_steering(pool->dmn->mdev); - if (err) { - dr_icm_chill_buckets_abort(pool, bucket, buckets); - mlx5dr_err(pool->dmn, "Sync_steering failed\n"); - chunk = NULL; - goto out; - } - dr_icm_chill_buckets_end(pool, bucket, buckets); - } else { - dr_icm_chunks_create(bucket); - } - } + mutex_lock(&pool->mutex); + /* find mem, get back the relevant buddy pool and seg in that mem */ + ret = dr_icm_handle_buddies_get_mem(pool, chunk_size, &buddy, &seg); + if (ret) + goto out; - if (!list_empty(&bucket->free_list)) { - chunk = list_last_entry(&bucket->free_list, - struct mlx5dr_icm_chunk, - chunk_list); - if (chunk) { - list_del_init(&chunk->chunk_list); - list_add_tail(&chunk->chunk_list, &bucket->used_list); - bucket->free_list_count--; - bucket->used_list_count++; - } - } + chunk = dr_icm_chunk_create(pool, chunk_size, buddy, seg); + if (!chunk) + goto out_err; + + goto out; + +out_err: + mlx5dr_buddy_free_mem(buddy, seg, chunk_size); out: - mutex_unlock(&bucket->mutex); + mutex_unlock(&pool->mutex); return chunk; } void mlx5dr_icm_free_chunk(struct mlx5dr_icm_chunk *chunk) { - struct mlx5dr_icm_bucket *bucket = chunk->bucket; + struct mlx5dr_icm_buddy_mem *buddy = chunk->buddy_mem; + struct mlx5dr_icm_pool *pool = buddy->pool; - if (bucket->pool->icm_type == DR_ICM_TYPE_STE) { - memset(chunk->ste_arr, 0, - bucket->num_of_entries * sizeof(chunk->ste_arr[0])); - memset(chunk->hw_ste_arr, 0, - bucket->num_of_entries * DR_STE_SIZE_REDUCED); - } + /* move the memory to the waiting list AKA "hot" */ + mutex_lock(&pool->mutex); + list_move_tail(&chunk->chunk_list, &buddy->hot_list); + pool->hot_memory_size += chunk->byte_size; + + /* Check if we have chunks that are waiting for sync-ste */ + if (dr_icm_pool_is_sync_required(pool)) + dr_icm_pool_sync_all_buddy_pools(pool); - mutex_lock(&bucket->mutex); - list_del_init(&chunk->chunk_list); - list_add_tail(&chunk->chunk_list, &bucket->hot_list); - bucket->hot_list_count++; - bucket->used_list_count--; - mutex_unlock(&bucket->mutex); + mutex_unlock(&pool->mutex); } struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, @@ -515,7 +420,6 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, { enum mlx5dr_icm_chunk_size max_log_chunk_sz; struct mlx5dr_icm_pool *pool; - int i; if (icm_type == DR_ICM_TYPE_STE) max_log_chunk_sz = dmn->info.max_log_sw_icm_sz; @@ -526,43 +430,24 @@ struct mlx5dr_icm_pool *mlx5dr_icm_pool_create(struct mlx5dr_domain *dmn, if (!pool) return NULL; - pool->buckets = kcalloc(max_log_chunk_sz + 1, - sizeof(pool->buckets[0]), - GFP_KERNEL); - if (!pool->buckets) - goto free_pool; - pool->dmn = dmn; pool->icm_type = icm_type; pool->max_log_chunk_sz = max_log_chunk_sz; - pool->num_of_buckets = max_log_chunk_sz + 1; - INIT_LIST_HEAD(&pool->icm_mr_list); - for (i = 0; i < pool->num_of_buckets; i++) - dr_icm_bucket_init(pool, &pool->buckets[i], i); + INIT_LIST_HEAD(&pool->buddy_mem_list); - mutex_init(&pool->mr_mutex); + mutex_init(&pool->mutex); return pool; - -free_pool: - kvfree(pool); - return NULL; } void mlx5dr_icm_pool_destroy(struct mlx5dr_icm_pool *pool) { - struct mlx5dr_icm_mr *icm_mr, *next; - int i; - - mutex_destroy(&pool->mr_mutex); - - list_for_each_entry_safe(icm_mr, next, &pool->icm_mr_list, mr_list) - dr_icm_pool_mr_destroy(icm_mr); + struct mlx5dr_icm_buddy_mem *buddy, *tmp_buddy; - for (i = 0; i < pool->num_of_buckets; i++) - dr_icm_bucket_cleanup(&pool->buckets[i]); + list_for_each_entry_safe(buddy, tmp_buddy, &pool->buddy_mem_list, list_node) + dr_icm_buddy_destroy(buddy); - kfree(pool->buckets); + mutex_destroy(&pool->mutex); kvfree(pool); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c index 7df883686d46..cb5202e17856 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c @@ -85,7 +85,7 @@ static bool dr_mask_is_ttl_set(struct mlx5dr_match_spec *spec) (_misc2)._inner_outer##_first_mpls_s_bos || \ (_misc2)._inner_outer##_first_mpls_ttl) -static bool dr_mask_is_gre_set(struct mlx5dr_match_misc *misc) +static bool dr_mask_is_tnl_gre_set(struct mlx5dr_match_misc *misc) { return (misc->gre_key_h || misc->gre_key_l || misc->gre_protocol || misc->gre_c_present || @@ -98,12 +98,12 @@ static bool dr_mask_is_gre_set(struct mlx5dr_match_misc *misc) (_misc2).outer_first_mpls_over_##gre_udp##_s_bos || \ (_misc2).outer_first_mpls_over_##gre_udp##_ttl) -#define DR_MASK_IS_FLEX_PARSER_0_SET(_misc2) ( \ +#define DR_MASK_IS_TNL_MPLS_SET(_misc2) ( \ DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), gre) || \ DR_MASK_IS_OUTER_MPLS_OVER_GRE_UDP_SET((_misc2), udp)) static bool -dr_mask_is_misc3_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) +dr_mask_is_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) { return (misc3->outer_vxlan_gpe_vni || misc3->outer_vxlan_gpe_next_protocol || @@ -111,21 +111,20 @@ dr_mask_is_misc3_vxlan_gpe_set(struct mlx5dr_match_misc3 *misc3) } static bool -dr_matcher_supp_flex_parser_vxlan_gpe(struct mlx5dr_cmd_caps *caps) +dr_matcher_supp_vxlan_gpe(struct mlx5dr_cmd_caps *caps) { - return caps->flex_protocols & - MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED; + return caps->flex_protocols & MLX5_FLEX_PARSER_VXLAN_GPE_ENABLED; } static bool -dr_mask_is_flex_parser_tnl_vxlan_gpe_set(struct mlx5dr_match_param *mask, - struct mlx5dr_domain *dmn) +dr_mask_is_tnl_vxlan_gpe(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) { - return dr_mask_is_misc3_vxlan_gpe_set(&mask->misc3) && - dr_matcher_supp_flex_parser_vxlan_gpe(&dmn->info.caps); + return dr_mask_is_vxlan_gpe_set(&mask->misc3) && + dr_matcher_supp_vxlan_gpe(&dmn->info.caps); } -static bool dr_mask_is_misc_geneve_set(struct mlx5dr_match_misc *misc) +static bool dr_mask_is_tnl_geneve_set(struct mlx5dr_match_misc *misc) { return misc->geneve_vni || misc->geneve_oam || @@ -134,26 +133,46 @@ static bool dr_mask_is_misc_geneve_set(struct mlx5dr_match_misc *misc) } static bool -dr_matcher_supp_flex_parser_geneve(struct mlx5dr_cmd_caps *caps) +dr_matcher_supp_tnl_geneve(struct mlx5dr_cmd_caps *caps) { - return caps->flex_protocols & - MLX5_FLEX_PARSER_GENEVE_ENABLED; + return caps->flex_protocols & MLX5_FLEX_PARSER_GENEVE_ENABLED; } static bool -dr_mask_is_flex_parser_tnl_geneve_set(struct mlx5dr_match_param *mask, - struct mlx5dr_domain *dmn) +dr_mask_is_tnl_geneve(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) { - return dr_mask_is_misc_geneve_set(&mask->misc) && - dr_matcher_supp_flex_parser_geneve(&dmn->info.caps); + return dr_mask_is_tnl_geneve_set(&mask->misc) && + dr_matcher_supp_tnl_geneve(&dmn->info.caps); } -static bool dr_mask_is_flex_parser_icmpv6_set(struct mlx5dr_match_misc3 *misc3) +static int dr_matcher_supp_icmp_v4(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED; +} + +static int dr_matcher_supp_icmp_v6(struct mlx5dr_cmd_caps *caps) +{ + return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED; +} + +static bool dr_mask_is_icmpv6_set(struct mlx5dr_match_misc3 *misc3) { return (misc3->icmpv6_type || misc3->icmpv6_code || misc3->icmpv6_header_data); } +static bool dr_mask_is_icmp(struct mlx5dr_match_param *mask, + struct mlx5dr_domain *dmn) +{ + if (DR_MASK_IS_ICMPV4_SET(&mask->misc3)) + return dr_matcher_supp_icmp_v4(&dmn->info.caps); + else if (dr_mask_is_icmpv6_set(&mask->misc3)) + return dr_matcher_supp_icmp_v6(&dmn->info.caps); + + return false; +} + static bool dr_mask_is_wqe_metadata_set(struct mlx5dr_match_misc2 *misc2) { return misc2->metadata_reg_a; @@ -257,7 +276,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (dr_mask_is_smac_set(&mask.outer) && dr_mask_is_dmac_set(&mask.outer)) { - mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], &mask, + mlx5dr_ste_build_eth_l2_src_dst(&sb[idx++], &mask, inner, rx); } @@ -277,8 +296,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, inner, rx); if (DR_MASK_IS_ETH_L4_SET(mask.outer, mask.misc, outer)) - mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask, - inner, rx); + mlx5dr_ste_build_eth_ipv6_l3_l4(&sb[idx++], &mask, + inner, rx); } else { if (dr_mask_is_ipv4_5_tuple_set(&mask.outer)) mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask, @@ -289,14 +308,12 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, inner, rx); } - if (dr_mask_is_flex_parser_tnl_vxlan_gpe_set(&mask, dmn)) - mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(&sb[idx++], - &mask, - inner, rx); - else if (dr_mask_is_flex_parser_tnl_geneve_set(&mask, dmn)) - mlx5dr_ste_build_flex_parser_tnl_geneve(&sb[idx++], - &mask, - inner, rx); + if (dr_mask_is_tnl_vxlan_gpe(&mask, dmn)) + mlx5dr_ste_build_tnl_vxlan_gpe(&sb[idx++], &mask, + inner, rx); + else if (dr_mask_is_tnl_geneve(&mask, dmn)) + mlx5dr_ste_build_tnl_geneve(&sb[idx++], &mask, + inner, rx); if (DR_MASK_IS_ETH_L4_MISC_SET(mask.misc3, outer)) mlx5dr_ste_build_eth_l4_misc(&sb[idx++], &mask, inner, rx); @@ -304,22 +321,18 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, outer)) mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx); - if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2)) - mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, - inner, rx); + if (DR_MASK_IS_TNL_MPLS_SET(mask.misc2)) + mlx5dr_ste_build_tnl_mpls(&sb[idx++], &mask, inner, rx); - if ((DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(&mask.misc3) && - mlx5dr_matcher_supp_flex_parser_icmp_v4(&dmn->info.caps)) || - (dr_mask_is_flex_parser_icmpv6_set(&mask.misc3) && - mlx5dr_matcher_supp_flex_parser_icmp_v6(&dmn->info.caps))) { - ret = mlx5dr_ste_build_flex_parser_1(&sb[idx++], - &mask, &dmn->info.caps, - inner, rx); + if (dr_mask_is_icmp(&mask, dmn)) { + ret = mlx5dr_ste_build_icmp(&sb[idx++], + &mask, &dmn->info.caps, + inner, rx); if (ret) return ret; } - if (dr_mask_is_gre_set(&mask.misc)) - mlx5dr_ste_build_gre(&sb[idx++], &mask, inner, rx); + if (dr_mask_is_tnl_gre_set(&mask.misc)) + mlx5dr_ste_build_tnl_gre(&sb[idx++], &mask, inner, rx); } /* Inner */ @@ -334,7 +347,7 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (dr_mask_is_smac_set(&mask.inner) && dr_mask_is_dmac_set(&mask.inner)) { - mlx5dr_ste_build_eth_l2_src_des(&sb[idx++], + mlx5dr_ste_build_eth_l2_src_dst(&sb[idx++], &mask, inner, rx); } @@ -354,8 +367,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, inner, rx); if (DR_MASK_IS_ETH_L4_SET(mask.inner, mask.misc, inner)) - mlx5dr_ste_build_ipv6_l3_l4(&sb[idx++], &mask, - inner, rx); + mlx5dr_ste_build_eth_ipv6_l3_l4(&sb[idx++], &mask, + inner, rx); } else { if (dr_mask_is_ipv4_5_tuple_set(&mask.inner)) mlx5dr_ste_build_eth_l3_ipv4_5_tuple(&sb[idx++], &mask, @@ -372,8 +385,8 @@ static int dr_matcher_set_ste_builders(struct mlx5dr_matcher *matcher, if (DR_MASK_IS_FIRST_MPLS_SET(mask.misc2, inner)) mlx5dr_ste_build_mpls(&sb[idx++], &mask, inner, rx); - if (DR_MASK_IS_FLEX_PARSER_0_SET(mask.misc2)) - mlx5dr_ste_build_flex_parser_0(&sb[idx++], &mask, inner, rx); + if (DR_MASK_IS_TNL_MPLS_SET(mask.misc2)) + mlx5dr_ste_build_tnl_mpls(&sb[idx++], &mask, inner, rx); } /* Empty matcher, takes all */ if (matcher->match_criteria == DR_MATCHER_CRITERIA_EMPTY) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index b01aaec75622..d275823bff2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -1090,7 +1090,7 @@ static int dr_ste_build_eth_l2_src_des_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *sb, +void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx) { @@ -1594,9 +1594,9 @@ static int dr_ste_build_ipv6_l3_l4_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) { dr_ste_build_ipv6_l3_l4_bit_mask(mask, inner, sb->bit_mask); @@ -1693,8 +1693,8 @@ static int dr_ste_build_gre_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, bool inner, bool rx) +void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, bool inner, bool rx) { dr_ste_build_gre_bit_mask(mask, inner, sb->bit_mask); @@ -1771,9 +1771,9 @@ static int dr_ste_build_flex_parser_0_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) { dr_ste_build_flex_parser_0_bit_mask(mask, inner, sb->bit_mask); @@ -1792,8 +1792,8 @@ static int dr_ste_build_flex_parser_1_bit_mask(struct mlx5dr_match_param *mask, struct mlx5dr_cmd_caps *caps, u8 *bit_mask) { + bool is_ipv4_mask = DR_MASK_IS_ICMPV4_SET(&mask->misc3); struct mlx5dr_match_misc3 *misc_3_mask = &mask->misc3; - bool is_ipv4_mask = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3_mask); u32 icmp_header_data_mask; u32 icmp_type_mask; u32 icmp_code_mask; @@ -1869,7 +1869,7 @@ static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value, u32 icmp_code; bool is_ipv4; - is_ipv4 = DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(misc_3); + is_ipv4 = DR_MASK_IS_ICMPV4_SET(misc_3); if (is_ipv4) { icmp_header_data = misc_3->icmpv4_header_data; icmp_type = misc_3->icmpv4_type; @@ -1928,10 +1928,10 @@ static int dr_ste_build_flex_parser_1_tag(struct mlx5dr_match_param *value, return 0; } -int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - struct mlx5dr_cmd_caps *caps, - bool inner, bool rx) +int mlx5dr_ste_build_icmp(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx) { int ret; @@ -2069,9 +2069,9 @@ dr_ste_build_flex_parser_tnl_vxlan_gpe_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) { dr_ste_build_flex_parser_tnl_vxlan_gpe_bit_mask(mask, inner, sb->bit_mask); @@ -2122,9 +2122,9 @@ dr_ste_build_flex_parser_tnl_geneve_tag(struct mlx5dr_match_param *value, return 0; } -void mlx5dr_ste_build_flex_parser_tnl_geneve(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx) +void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx) { dr_ste_build_flex_parser_tnl_geneve_bit_mask(mask, sb->bit_mask); sb->rx = rx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index f50f3b107aa3..3e423c8ed22f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -114,7 +114,7 @@ enum mlx5dr_ipv { struct mlx5dr_icm_pool; struct mlx5dr_icm_chunk; -struct mlx5dr_icm_bucket; +struct mlx5dr_icm_buddy_mem; struct mlx5dr_ste_htbl; struct mlx5dr_match_param; struct mlx5dr_cmd_caps; @@ -288,7 +288,7 @@ int mlx5dr_ste_build_ste_arr(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, struct mlx5dr_match_param *value, u8 *ste_arr); -void mlx5dr_ste_build_eth_l2_src_des(struct mlx5dr_ste_build *builder, +void mlx5dr_ste_build_eth_l2_src_dst(struct mlx5dr_ste_build *builder, struct mlx5dr_match_param *mask, bool inner, bool rx); void mlx5dr_ste_build_eth_l3_ipv4_5_tuple(struct mlx5dr_ste_build *sb, @@ -312,31 +312,31 @@ void mlx5dr_ste_build_eth_l2_dst(struct mlx5dr_ste_build *sb, void mlx5dr_ste_build_eth_l2_tnl(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -void mlx5dr_ste_build_ipv6_l3_l4(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx); +void mlx5dr_ste_build_eth_ipv6_l3_l4(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_eth_l4_misc(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -void mlx5dr_ste_build_gre(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx); +void mlx5dr_ste_build_tnl_gre(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_mpls(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -void mlx5dr_ste_build_flex_parser_0(struct mlx5dr_ste_build *sb, +void mlx5dr_ste_build_tnl_mpls(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); +int mlx5dr_ste_build_icmp(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + struct mlx5dr_cmd_caps *caps, + bool inner, bool rx); +void mlx5dr_ste_build_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); -int mlx5dr_ste_build_flex_parser_1(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - struct mlx5dr_cmd_caps *caps, - bool inner, bool rx); -void mlx5dr_ste_build_flex_parser_tnl_vxlan_gpe(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx); -void mlx5dr_ste_build_flex_parser_tnl_geneve(struct mlx5dr_ste_build *sb, - struct mlx5dr_match_param *mask, - bool inner, bool rx); +void mlx5dr_ste_build_tnl_geneve(struct mlx5dr_ste_build *sb, + struct mlx5dr_match_param *mask, + bool inner, bool rx); void mlx5dr_ste_build_general_purpose(struct mlx5dr_ste_build *sb, struct mlx5dr_match_param *mask, bool inner, bool rx); @@ -588,9 +588,9 @@ struct mlx5dr_match_param { struct mlx5dr_match_misc3 misc3; }; -#define DR_MASK_IS_FLEX_PARSER_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ - (_misc3)->icmpv4_code || \ - (_misc3)->icmpv4_header_data) +#define DR_MASK_IS_ICMPV4_SET(_misc3) ((_misc3)->icmpv4_type || \ + (_misc3)->icmpv4_code || \ + (_misc3)->icmpv4_header_data) struct mlx5dr_esw_caps { u64 drop_icm_address_rx; @@ -731,7 +731,6 @@ struct mlx5dr_action { struct mlx5dr_domain *dmn; struct mlx5dr_icm_chunk *chunk; u8 *data; - u32 data_size; u16 num_of_actions; u32 index; u8 allow_rx:1; @@ -804,7 +803,7 @@ void mlx5dr_rule_update_rule_member(struct mlx5dr_ste *new_ste, struct mlx5dr_ste *ste); struct mlx5dr_icm_chunk { - struct mlx5dr_icm_bucket *bucket; + struct mlx5dr_icm_buddy_mem *buddy_mem; struct list_head chunk_list; u32 rkey; u32 num_of_entries; @@ -812,6 +811,11 @@ struct mlx5dr_icm_chunk { u64 icm_addr; u64 mr_addr; + /* indicates the index of this chunk in the whole memory, + * used for deleting the chunk from the buddy + */ + unsigned int seg; + /* Memory optimisation */ struct mlx5dr_ste *ste_arr; u8 *hw_ste_arr; @@ -840,23 +844,20 @@ static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn) mlx5dr_domain_nic_unlock(&dmn->info.rx); } -static inline int -mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps) -{ - return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V4_ENABLED; -} - -static inline int -mlx5dr_matcher_supp_flex_parser_icmp_v6(struct mlx5dr_cmd_caps *caps) -{ - return caps->flex_protocols & MLX5_FLEX_PARSER_ICMP_V6_ENABLED; -} - int mlx5dr_matcher_select_builders(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, enum mlx5dr_ipv outer_ipv, enum mlx5dr_ipv inner_ipv); +static inline int +mlx5dr_icm_pool_dm_type_to_entry_size(enum mlx5dr_icm_type icm_type) +{ + if (icm_type == DR_ICM_TYPE_STE) + return DR_STE_SIZE; + + return DR_MODIFY_ACTION_SIZE; +} + static inline u32 mlx5dr_icm_pool_chunk_size_to_entries(enum mlx5dr_icm_chunk_size chunk_size) { @@ -870,11 +871,7 @@ mlx5dr_icm_pool_chunk_size_to_byte(enum mlx5dr_icm_chunk_size chunk_size, int num_of_entries; int entry_size; - if (icm_type == DR_ICM_TYPE_STE) - entry_size = DR_STE_SIZE; - else - entry_size = DR_MODIFY_ACTION_SIZE; - + entry_size = mlx5dr_icm_pool_dm_type_to_entry_size(icm_type); num_of_entries = mlx5dr_icm_pool_chunk_size_to_entries(chunk_size); return entry_size * num_of_entries; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 7914fe3fc68d..4177786b8eaf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -127,4 +127,36 @@ mlx5dr_is_supported(struct mlx5_core_dev *dev) return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner); } +/* buddy functions & structure */ + +struct mlx5dr_icm_mr; + +struct mlx5dr_icm_buddy_mem { + unsigned long **bitmap; + unsigned int *num_free; + u32 max_order; + struct list_head list_node; + struct mlx5dr_icm_mr *icm_mr; + struct mlx5dr_icm_pool *pool; + + /* This is the list of used chunks. HW may be accessing this memory */ + struct list_head used_list; + u64 used_memory; + + /* Hardware may be accessing this memory but at some future, + * undetermined time, it might cease to do so. + * sync_ste command sets them free. + */ + struct list_head hot_list; +}; + +int mlx5dr_buddy_init(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int max_order); +void mlx5dr_buddy_cleanup(struct mlx5dr_icm_buddy_mem *buddy); +int mlx5dr_buddy_alloc_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int order, + unsigned int *segment); +void mlx5dr_buddy_free_mem(struct mlx5dr_icm_buddy_mem *buddy, + unsigned int seg, unsigned int order); + #endif /* _MLX5DR_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 39eff6a57ba2..fcf9095b3f55 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -7279,10 +7279,11 @@ static inline void mlxsw_reg_ralue_pack4(char *payload, enum mlxsw_reg_ralxx_protocol protocol, enum mlxsw_reg_ralue_op op, u16 virtual_router, u8 prefix_len, - u32 dip) + u32 *dip) { mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); - mlxsw_reg_ralue_dip4_set(payload, dip); + if (dip) + mlxsw_reg_ralue_dip4_set(payload, *dip); } static inline void mlxsw_reg_ralue_pack6(char *payload, @@ -7292,7 +7293,8 @@ static inline void mlxsw_reg_ralue_pack6(char *payload, const void *dip) { mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); - mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); + if (dip) + mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); } static inline void @@ -8245,6 +8247,86 @@ mlxsw_reg_rmft2_ipv6_pack(char *payload, bool v, u16 offset, u16 virtual_router, mlxsw_reg_rmft2_sip6_mask_memcpy_to(payload, (void *)&sip6_mask); } +/* Note that XRALXX register position violates the rule of ordering register + * definition by the ID. However, XRALXX pack helpers are using RALXX pack + * helpers, RALXX registers have higher IDs. + */ + +/* XRALTA - XM Router Algorithmic LPM Tree Allocation Register + * ----------------------------------------------------------- + * The XRALTA is used to allocate the XLT LPM trees. + * + * This register embeds original RALTA register. + */ +#define MLXSW_REG_XRALTA_ID 0x7811 +#define MLXSW_REG_XRALTA_LEN 0x08 +#define MLXSW_REG_XRALTA_RALTA_OFFSET 0x04 + +MLXSW_REG_DEFINE(xralta, MLXSW_REG_XRALTA_ID, MLXSW_REG_XRALTA_LEN); + +static inline void mlxsw_reg_xralta_pack(char *payload, bool alloc, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + char *ralta_payload = payload + MLXSW_REG_XRALTA_RALTA_OFFSET; + + MLXSW_REG_ZERO(xralta, payload); + mlxsw_reg_ralta_pack(ralta_payload, alloc, protocol, tree_id); +} + +/* XRALST - XM Router Algorithmic LPM Structure Tree Register + * ---------------------------------------------------------- + * The XRALST is used to set and query the structure of an XLT LPM tree. + * + * This register embeds original RALST register. + */ +#define MLXSW_REG_XRALST_ID 0x7812 +#define MLXSW_REG_XRALST_LEN 0x108 +#define MLXSW_REG_XRALST_RALST_OFFSET 0x04 + +MLXSW_REG_DEFINE(xralst, MLXSW_REG_XRALST_ID, MLXSW_REG_XRALST_LEN); + +static inline void mlxsw_reg_xralst_pack(char *payload, u8 root_bin, u8 tree_id) +{ + char *ralst_payload = payload + MLXSW_REG_XRALST_RALST_OFFSET; + + MLXSW_REG_ZERO(xralst, payload); + mlxsw_reg_ralst_pack(ralst_payload, root_bin, tree_id); +} + +static inline void mlxsw_reg_xralst_bin_pack(char *payload, u8 bin_number, + u8 left_child_bin, + u8 right_child_bin) +{ + char *ralst_payload = payload + MLXSW_REG_XRALST_RALST_OFFSET; + + mlxsw_reg_ralst_bin_pack(ralst_payload, bin_number, left_child_bin, + right_child_bin); +} + +/* XRALTB - XM Router Algorithmic LPM Tree Binding Register + * -------------------------------------------------------- + * The XRALTB register is used to bind virtual router and protocol + * to an allocated LPM tree. + * + * This register embeds original RALTB register. + */ +#define MLXSW_REG_XRALTB_ID 0x7813 +#define MLXSW_REG_XRALTB_LEN 0x08 +#define MLXSW_REG_XRALTB_RALTB_OFFSET 0x04 + +MLXSW_REG_DEFINE(xraltb, MLXSW_REG_XRALTB_ID, MLXSW_REG_XRALTB_LEN); + +static inline void mlxsw_reg_xraltb_pack(char *payload, u16 virtual_router, + enum mlxsw_reg_ralxx_protocol protocol, + u8 tree_id) +{ + char *raltb_payload = payload + MLXSW_REG_XRALTB_RALTB_OFFSET; + + MLXSW_REG_ZERO(xraltb, payload); + mlxsw_reg_raltb_pack(raltb_payload, virtual_router, protocol, tree_id); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -11195,6 +11277,9 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rigr2), MLXSW_REG(recr2), MLXSW_REG(rmft2), + MLXSW_REG(xralta), + MLXSW_REG(xralst), + MLXSW_REG(xraltb), MLXSW_REG(mfcr), MLXSW_REG(mfsc), MLXSW_REG(mfsm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index a8525992528f..089d99535f9e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -181,23 +181,26 @@ mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp, - u32 dip, u8 prefix_len, u16 ul_vr_id, - enum mlxsw_reg_ralue_op op, - u32 tunnel_index) +mlxsw_sp_ipip_fib_entry_op_gre4_do(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + u32 dip, u8 prefix_len, u16 ul_vr_id, + enum mlxsw_sp_fib_entry_op op, + u32 tunnel_index, + struct mlxsw_sp_fib_entry_priv *priv) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; - - mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op, - ul_vr_id, prefix_len, dip); - mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + ll_ops->fib_entry_pack(op_ctx, MLXSW_SP_L3_PROTO_IPV4, op, ul_vr_id, + prefix_len, (unsigned char *) &dip, priv); + ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx, tunnel_index); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry, - enum mlxsw_reg_ralue_op op, - u32 tunnel_index) + const struct mlxsw_sp_router_ll_ops *ll_ops, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_ipip_entry *ipip_entry, + enum mlxsw_sp_fib_entry_op op, u32 tunnel_index, + struct mlxsw_sp_fib_entry_priv *priv) { u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb); __be32 dip; @@ -210,9 +213,8 @@ static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp, dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, ipip_entry->ol_dev).addr4; - return mlxsw_sp_ipip_fib_entry_op_gre4_ralue(mlxsw_sp, be32_to_cpu(dip), - 32, ul_vr_id, op, - tunnel_index); + return mlxsw_sp_ipip_fib_entry_op_gre4_do(mlxsw_sp, ll_ops, op_ctx, be32_to_cpu(dip), + 32, ul_vr_id, op, tunnel_index, priv); } static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto, @@ -231,8 +233,7 @@ static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto, } static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev, - enum mlxsw_sp_l3proto ol_proto) + const struct net_device *ol_dev) { struct ip_tunnel *tunnel = netdev_priv(ol_dev); __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index bb5c4d4a5872..d32702cb6ab4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -43,8 +43,7 @@ struct mlxsw_sp_ipip_ops { struct mlxsw_sp_ipip_entry *ipip_entry); bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev, - enum mlxsw_sp_l3proto ol_proto); + const struct net_device *ol_dev); /* Return a configuration for creating an overlay loopback RIF. */ struct mlxsw_sp_rif_ipip_lb_config @@ -52,9 +51,12 @@ struct mlxsw_sp_ipip_ops { const struct net_device *ol_dev); int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_ipip_entry *ipip_entry, - enum mlxsw_reg_ralue_op op, - u32 tunnel_index); + enum mlxsw_sp_fib_entry_op op, + u32 tunnel_index, + struct mlxsw_sp_fib_entry_priv *priv); int (*ol_netdev_change)(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_ipip_entry *ipip_entry, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4381f8c6c3fb..147dd8fab2af 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -352,6 +352,7 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP, }; +struct mlxsw_sp_nexthop_group_info; struct mlxsw_sp_nexthop_group; struct mlxsw_sp_fib_entry; @@ -368,18 +369,71 @@ struct mlxsw_sp_fib_entry_decap { u32 tunnel_index; }; +static struct mlxsw_sp_fib_entry_priv * +mlxsw_sp_fib_entry_priv_create(const struct mlxsw_sp_router_ll_ops *ll_ops) +{ + struct mlxsw_sp_fib_entry_priv *priv; + + if (!ll_ops->fib_entry_priv_size) + /* No need to have priv */ + return NULL; + + priv = kzalloc(sizeof(*priv) + ll_ops->fib_entry_priv_size, GFP_KERNEL); + if (!priv) + return ERR_PTR(-ENOMEM); + refcount_set(&priv->refcnt, 1); + return priv; +} + +static void +mlxsw_sp_fib_entry_priv_destroy(struct mlxsw_sp_fib_entry_priv *priv) +{ + kfree(priv); +} + +static void mlxsw_sp_fib_entry_priv_hold(struct mlxsw_sp_fib_entry_priv *priv) +{ + refcount_inc(&priv->refcnt); +} + +static void mlxsw_sp_fib_entry_priv_put(struct mlxsw_sp_fib_entry_priv *priv) +{ + if (!priv || !refcount_dec_and_test(&priv->refcnt)) + return; + mlxsw_sp_fib_entry_priv_destroy(priv); +} + +static void mlxsw_sp_fib_entry_op_ctx_priv_hold(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_entry_priv *priv) +{ + if (!priv) + return; + mlxsw_sp_fib_entry_priv_hold(priv); + list_add(&priv->list, &op_ctx->fib_entry_priv_list); +} + +static void mlxsw_sp_fib_entry_op_ctx_priv_put_all(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) +{ + struct mlxsw_sp_fib_entry_priv *priv, *tmp; + + list_for_each_entry_safe(priv, tmp, &op_ctx->fib_entry_priv_list, list) + mlxsw_sp_fib_entry_priv_put(priv); + INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list); +} + struct mlxsw_sp_fib_entry { struct mlxsw_sp_fib_node *fib_node; enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */ + struct mlxsw_sp_fib_entry_priv *priv; }; struct mlxsw_sp_fib4_entry { struct mlxsw_sp_fib_entry common; + struct fib_info *fi; u32 tb_id; - u32 prio; u8 tos; u8 type; }; @@ -409,6 +463,7 @@ struct mlxsw_sp_fib { struct mlxsw_sp_vr *vr; struct mlxsw_sp_lpm_tree *lpm_tree; enum mlxsw_sp_l3proto proto; + const struct mlxsw_sp_router_ll_ops *ll_ops; }; struct mlxsw_sp_vr { @@ -422,12 +477,31 @@ struct mlxsw_sp_vr { refcount_t ul_rif_refcnt; }; +static int mlxsw_sp_router_ll_basic_ralta_write(struct mlxsw_sp *mlxsw_sp, char *xralta_pl) +{ + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), + xralta_pl + MLXSW_REG_XRALTA_RALTA_OFFSET); +} + +static int mlxsw_sp_router_ll_basic_ralst_write(struct mlxsw_sp *mlxsw_sp, char *xralst_pl) +{ + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), + xralst_pl + MLXSW_REG_XRALST_RALST_OFFSET); +} + +static int mlxsw_sp_router_ll_basic_raltb_write(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl) +{ + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), + xraltb_pl + MLXSW_REG_XRALTB_RALTB_OFFSET); +} + static const struct rhashtable_params mlxsw_sp_fib_ht_params; static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, enum mlxsw_sp_l3proto proto) { + const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto]; struct mlxsw_sp_lpm_tree *lpm_tree; struct mlxsw_sp_fib *fib; int err; @@ -443,6 +517,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp, fib->proto = proto; fib->vr = vr; fib->lpm_tree = lpm_tree; + fib->ll_ops = ll_ops; mlxsw_sp_lpm_tree_hold(lpm_tree); err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id); if (err) @@ -481,33 +556,36 @@ mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp) } static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_lpm_tree *lpm_tree) { - char ralta_pl[MLXSW_REG_RALTA_LEN]; + char xralta_pl[MLXSW_REG_XRALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, true, - (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, - lpm_tree->id); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + mlxsw_reg_xralta_pack(xralta_pl, true, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); + return ll_ops->ralta_write(mlxsw_sp, xralta_pl); } static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_lpm_tree *lpm_tree) { - char ralta_pl[MLXSW_REG_RALTA_LEN]; + char xralta_pl[MLXSW_REG_XRALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, false, - (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, - lpm_tree->id); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + mlxsw_reg_xralta_pack(xralta_pl, false, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); + ll_ops->ralta_write(mlxsw_sp, xralta_pl); } static int mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_prefix_usage *prefix_usage, struct mlxsw_sp_lpm_tree *lpm_tree) { - char ralst_pl[MLXSW_REG_RALST_LEN]; + char xralst_pl[MLXSW_REG_XRALST_LEN]; u8 root_bin = 0; u8 prefix; u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD; @@ -515,19 +593,20 @@ mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) root_bin = prefix; - mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id); + mlxsw_reg_xralst_pack(xralst_pl, root_bin, lpm_tree->id); mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) { if (prefix == 0) continue; - mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix, - MLXSW_REG_RALST_BIN_NO_CHILD); + mlxsw_reg_xralst_bin_pack(xralst_pl, prefix, last_prefix, + MLXSW_REG_RALST_BIN_NO_CHILD); last_prefix = prefix; } - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); + return ll_ops->ralst_write(mlxsw_sp, xralst_pl); } static struct mlxsw_sp_lpm_tree * mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_prefix_usage *prefix_usage, enum mlxsw_sp_l3proto proto) { @@ -538,12 +617,11 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, if (!lpm_tree) return ERR_PTR(-EBUSY); lpm_tree->proto = proto; - err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree); + err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, ll_ops, lpm_tree); if (err) return ERR_PTR(err); - err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage, - lpm_tree); + err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, ll_ops, prefix_usage, lpm_tree); if (err) goto err_left_struct_set; memcpy(&lpm_tree->prefix_usage, prefix_usage, @@ -554,14 +632,15 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, return lpm_tree; err_left_struct_set: - mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree); return ERR_PTR(err); } static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_router_ll_ops *ll_ops, struct mlxsw_sp_lpm_tree *lpm_tree) { - mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + mlxsw_sp_lpm_tree_free(mlxsw_sp, ll_ops, lpm_tree); } static struct mlxsw_sp_lpm_tree * @@ -569,6 +648,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_prefix_usage *prefix_usage, enum mlxsw_sp_l3proto proto) { + const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto]; struct mlxsw_sp_lpm_tree *lpm_tree; int i; @@ -582,7 +662,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, return lpm_tree; } } - return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto); + return mlxsw_sp_lpm_tree_create(mlxsw_sp, ll_ops, prefix_usage, proto); } static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) @@ -593,8 +673,11 @@ static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_lpm_tree *lpm_tree) { + const struct mlxsw_sp_router_ll_ops *ll_ops = + mlxsw_sp->router->proto_ll_ops[lpm_tree->proto]; + if (--lpm_tree->ref_count == 0) - mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); + mlxsw_sp_lpm_tree_destroy(mlxsw_sp, ll_ops, lpm_tree); } #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */ @@ -684,23 +767,23 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_fib *fib, u8 tree_id) { - char raltb_pl[MLXSW_REG_RALTB_LEN]; + char xraltb_pl[MLXSW_REG_XRALTB_LEN]; - mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, - (enum mlxsw_reg_ralxx_protocol) fib->proto, - tree_id); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); + mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, + tree_id); + return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl); } static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_fib *fib) { - char raltb_pl[MLXSW_REG_RALTB_LEN]; + char xraltb_pl[MLXSW_REG_XRALTB_LEN]; /* Bind to tree 0 which is default */ - mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, - (enum mlxsw_reg_ralxx_protocol) fib->proto, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); + mlxsw_reg_xraltb_pack(xraltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, 0); + return fib->ll_ops->raltb_write(mlxsw_sp, xraltb_pl); } static u32 mlxsw_sp_fix_tb_id(u32 tb_id) @@ -1270,21 +1353,33 @@ mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp, /* Given decap parameters, find the corresponding IPIP entry. */ static struct mlxsw_sp_ipip_entry * -mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, - const struct net_device *ul_dev, +mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, int ul_dev_ifindex, enum mlxsw_sp_l3proto ul_proto, union mlxsw_sp_l3addr ul_dip) { - struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_ipip_entry *ipip_entry = NULL; + struct net_device *ul_dev; + + rcu_read_lock(); + + ul_dev = dev_get_by_index_rcu(mlxsw_sp_net(mlxsw_sp), ul_dev_ifindex); + if (!ul_dev) + goto out_unlock; list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, ipip_list_node) if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev, ul_proto, ul_dip, ipip_entry)) - return ipip_entry; + goto out_unlock; + + rcu_read_unlock(); return NULL; + +out_unlock: + rcu_read_unlock(); + return ipip_entry; } static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, @@ -1370,11 +1465,7 @@ static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_ipip_ops *ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; - /* For deciding whether decap should be offloaded, we don't care about - * overlay protocol, so ask whether either one is supported. - */ - return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) || - ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6); + return ops->can_offload(mlxsw_sp, ol_dev); } static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp, @@ -2749,10 +2840,11 @@ struct mlxsw_sp_nexthop { struct list_head neigh_list_node; /* member of neigh entry list */ struct list_head rif_list_node; struct list_head router_list_node; - struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group - * this belongs to - */ + struct mlxsw_sp_nexthop_group_info *nhgi; /* pointer back to the group + * this nexthop belongs to + */ struct rhash_head ht_node; + struct neigh_table *neigh_tbl; struct mlxsw_sp_nexthop_key key; unsigned char gw_addr[sizeof(struct in6_addr)]; int ifindex; @@ -2778,21 +2870,36 @@ struct mlxsw_sp_nexthop { bool counter_valid; }; -struct mlxsw_sp_nexthop_group { - void *priv; - struct rhash_head ht_node; - struct list_head fib_list; /* list of fib entries that use this group */ - struct neigh_table *neigh_tbl; - u8 adj_index_valid:1, - gateway:1; /* routes using the group use a gateway */ +enum mlxsw_sp_nexthop_group_type { + MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4, + MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6, +}; + +struct mlxsw_sp_nexthop_group_info { + struct mlxsw_sp_nexthop_group *nh_grp; u32 adj_index; u16 ecmp_size; u16 count; int sum_norm_weight; + u8 adj_index_valid:1, + gateway:1; /* routes using the group use a gateway */ struct mlxsw_sp_nexthop nexthops[0]; #define nh_rif nexthops[0].rif }; +struct mlxsw_sp_nexthop_group { + struct rhash_head ht_node; + struct list_head fib_list; /* list of fib entries that use this group */ + union { + struct { + struct fib_info *fi; + } ipv4; + }; + struct mlxsw_sp_nexthop_group_info *nhgi; + enum mlxsw_sp_nexthop_group_type type; + bool can_destroy; +}; + void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { @@ -2858,18 +2965,18 @@ unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh) int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index, u32 *p_adj_size, u32 *p_adj_hash_index) { - struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi; u32 adj_hash_index = 0; int i; - if (!nh->offloaded || !nh_grp->adj_index_valid) + if (!nh->offloaded || !nhgi->adj_index_valid) return -EINVAL; - *p_adj_index = nh_grp->adj_index; - *p_adj_size = nh_grp->ecmp_size; + *p_adj_index = nhgi->adj_index; + *p_adj_size = nhgi->ecmp_size; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i]; if (nh_iter == nh) break; @@ -2888,11 +2995,11 @@ struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh) bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) { - struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp; + struct mlxsw_sp_nexthop_group_info *nhgi = nh->nhgi; int i; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh_iter = &nhgi->nexthops[i]; if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP) return true; @@ -2900,14 +3007,8 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh) return false; } -static struct fib_info * -mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp) -{ - return nh_grp->priv; -} - struct mlxsw_sp_nexthop_group_cmp_arg { - enum mlxsw_sp_l3proto proto; + enum mlxsw_sp_nexthop_group_type type; union { struct fib_info *fi; struct mlxsw_sp_fib6_entry *fib6_entry; @@ -2921,10 +3022,10 @@ mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp, { int i; - for (i = 0; i < nh_grp->count; i++) { + for (i = 0; i < nh_grp->nhgi->count; i++) { const struct mlxsw_sp_nexthop *nh; - nh = &nh_grp->nexthops[i]; + nh = &nh_grp->nhgi->nexthops[i]; if (nh->ifindex == ifindex && nh->nh_weight == weight && ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr)) return true; @@ -2939,7 +3040,7 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - if (nh_grp->count != fib6_entry->nrt6) + if (nh_grp->nhgi->count != fib6_entry->nrt6) return false; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { @@ -2964,10 +3065,13 @@ mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr) const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key; const struct mlxsw_sp_nexthop_group *nh_grp = ptr; - switch (cmp_arg->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp); - case MLXSW_SP_L3_PROTO_IPV6: + if (nh_grp->type != cmp_arg->type) + return 1; + + switch (cmp_arg->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + return cmp_arg->fi != nh_grp->ipv4.fi; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: return !mlxsw_sp_nexthop6_group_cmp(nh_grp, cmp_arg->fib6_entry); default: @@ -2976,12 +3080,6 @@ mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr) } } -static int -mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp) -{ - return nh_grp->neigh_tbl->family; -} - static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) { const struct mlxsw_sp_nexthop_group *nh_grp = data; @@ -2990,14 +3088,14 @@ static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) unsigned int val; int i; - switch (mlxsw_sp_nexthop_group_type(nh_grp)) { - case AF_INET: - fi = mlxsw_sp_nexthop4_group_fi(nh_grp); + switch (nh_grp->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: + fi = nh_grp->ipv4.fi; return jhash(&fi, sizeof(fi), seed); - case AF_INET6: - val = nh_grp->count; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: + val = nh_grp->nhgi->count; + for (i = 0; i < nh_grp->nhgi->count; i++) { + nh = &nh_grp->nhgi->nexthops[i]; val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed); val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed); } @@ -3031,10 +3129,10 @@ mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed) { const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data; - switch (cmp_arg->proto) { - case MLXSW_SP_L3_PROTO_IPV4: + switch (cmp_arg->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed); - case MLXSW_SP_L3_PROTO_IPV6: + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed); default: WARN_ON(1); @@ -3052,8 +3150,8 @@ static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && - !nh_grp->gateway) + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 && + !nh_grp->nhgi->gateway) return 0; return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht, @@ -3064,8 +3162,8 @@ static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && - !nh_grp->gateway) + if (nh_grp->type == MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6 && + !nh_grp->nhgi->gateway) return; rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht, @@ -3079,7 +3177,7 @@ mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; - cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4; + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4; cmp_arg.fi = fi; return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, &cmp_arg, @@ -3092,7 +3190,7 @@ mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; - cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6; + cmp_arg.type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6; cmp_arg.fib6_entry = fib6_entry; return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, &cmp_arg, @@ -3151,14 +3249,16 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, int err; list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + if (fib == fib_entry->fib_node->fib) continue; fib = fib_entry->fib_node->fib; err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib, old_adj_index, old_ecmp_size, - nh_grp->adj_index, - nh_grp->ecmp_size); + nhgi->adj_index, + nhgi->ecmp_size); if (err) return err; } @@ -3229,15 +3329,15 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop_group_info *nhgi, bool reallocate) { - u32 adj_index = nh_grp->adj_index; /* base */ + u32 adj_index = nhgi->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; int i; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (!nh->should_offload) { nh->offloaded = 0; @@ -3337,13 +3437,13 @@ static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp, } static void -mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group_info *nhgi) { int i, g = 0, sum_norm_weight = 0; struct mlxsw_sp_nexthop *nh; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (!nh->should_offload) continue; @@ -3353,8 +3453,8 @@ mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) g = nh->nh_weight; } - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (!nh->should_offload) continue; @@ -3362,18 +3462,18 @@ mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp) sum_norm_weight += nh->norm_nh_weight; } - nh_grp->sum_norm_weight = sum_norm_weight; + nhgi->sum_norm_weight = sum_norm_weight; } static void -mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group_info *nhgi) { - int total = nh_grp->sum_norm_weight; - u16 ecmp_size = nh_grp->ecmp_size; int i, weight = 0, lower_bound = 0; + int total = nhgi->sum_norm_weight; + u16 ecmp_size = nhgi->ecmp_size; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; int upper_bound; if (!nh->should_offload) @@ -3395,8 +3495,8 @@ mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, { int i; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; if (nh->offloaded) nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; @@ -3442,36 +3542,36 @@ static void mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - switch (mlxsw_sp_nexthop_group_type(nh_grp)) { - case AF_INET: + switch (nh_grp->type) { + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4: mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp); break; - case AF_INET6: + case MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6: mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp); break; } } -static void +static int mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; u16 ecmp_size, old_ecmp_size; struct mlxsw_sp_nexthop *nh; bool offload_change = false; u32 adj_index; bool old_adj_index_valid; + int i, err2, err = 0; u32 old_adj_index; - int i; - int err; - if (!nh_grp->gateway) { + if (!nhgi->gateway) { mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); - return; + return 0; } - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; if (nh->should_offload != nh->offloaded) { offload_change = true; @@ -3483,21 +3583,21 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, /* Nothing was added or removed, so no need to reallocate. Just * update MAC on existing adjacency indexes. */ - err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, false); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; } - return; + return 0; } - mlxsw_sp_nexthop_group_normalize(nh_grp); - if (!nh_grp->sum_norm_weight) + mlxsw_sp_nexthop_group_normalize(nhgi); + if (!nhgi->sum_norm_weight) /* No neigh of this group is connected so we just set * the trap and let everthing flow through kernel. */ goto set_trap; - ecmp_size = nh_grp->sum_norm_weight; + ecmp_size = nhgi->sum_norm_weight; err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size); if (err) /* No valid allocation size available. */ @@ -3512,14 +3612,14 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); goto set_trap; } - old_adj_index_valid = nh_grp->adj_index_valid; - old_adj_index = nh_grp->adj_index; - old_ecmp_size = nh_grp->ecmp_size; - nh_grp->adj_index_valid = 1; - nh_grp->adj_index = adj_index; - nh_grp->ecmp_size = ecmp_size; - mlxsw_sp_nexthop_group_rebalance(nh_grp); - err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true); + old_adj_index_valid = nhgi->adj_index_valid; + old_adj_index = nhgi->adj_index; + old_ecmp_size = nhgi->ecmp_size; + nhgi->adj_index_valid = 1; + nhgi->adj_index = adj_index; + nhgi->ecmp_size = ecmp_size; + mlxsw_sp_nexthop_group_rebalance(nhgi); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nhgi, true); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; @@ -3536,7 +3636,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n"); goto set_trap; } - return; + return 0; } err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp, @@ -3548,22 +3648,23 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, goto set_trap; } - return; + return 0; set_trap: - old_adj_index_valid = nh_grp->adj_index_valid; - nh_grp->adj_index_valid = 0; - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; + old_adj_index_valid = nhgi->adj_index_valid; + nhgi->adj_index_valid = 0; + for (i = 0; i < nhgi->count; i++) { + nh = &nhgi->nexthops[i]; nh->offloaded = 0; } - err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); - if (err) + err2 = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp); + if (err2) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n"); mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); if (old_adj_index_valid) mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, - nh_grp->ecmp_size, nh_grp->adj_index); + nhgi->ecmp_size, nhgi->adj_index); + return err; } static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, @@ -3589,10 +3690,9 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, nh = list_first_entry(&neigh_entry->nexthop_list, struct mlxsw_sp_nexthop, neigh_list_node); - n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (!n) { - n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, - nh->rif->dev); + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -3615,7 +3715,7 @@ mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, neigh_release(old_n); neigh_clone(n); __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } neigh_release(n); @@ -3652,7 +3752,7 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } @@ -3683,7 +3783,7 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, u8 nud_state, dead; int err; - if (!nh->nh_grp->gateway || nh->neigh_entry) + if (!nh->nhgi->gateway || nh->neigh_entry) return 0; /* Take a reference of neigh here ensuring that neigh would @@ -3691,10 +3791,9 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, * The reference is taken either in neigh_lookup() or * in neigh_create() in case n is not found. */ - n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); + n = neigh_lookup(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (!n) { - n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, - nh->rif->dev); + n = neigh_create(nh->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -3775,7 +3874,7 @@ static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, { bool removing; - if (!nh->nh_grp->gateway || nh->ipip_entry) + if (!nh->nhgi->gateway || nh->ipip_entry) return; nh->ipip_entry = ipip_entry; @@ -3807,27 +3906,11 @@ static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp, mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt); } -static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) -{ - switch (nh->type) { - case MLXSW_SP_NEXTHOP_TYPE_ETH: - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); - break; - case MLXSW_SP_NEXTHOP_TYPE_IPIP: - mlxsw_sp_nexthop_rif_fini(nh); - mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); - break; - } -} - -static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh, - struct fib_nh *fib_nh) +static int mlxsw_sp_nexthop_type_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + const struct net_device *dev) { const struct mlxsw_sp_ipip_ops *ipip_ops; - struct net_device *dev = fib_nh->fib_nh_dev; struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_rif *rif; int err; @@ -3835,8 +3918,7 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); if (ipip_entry) { ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; - if (ipip_ops->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV4)) { + if (ipip_ops->can_offload(mlxsw_sp, dev)) { nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); return 0; @@ -3860,10 +3942,19 @@ err_neigh_init: return err; } -static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); + break; + } } static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, @@ -3875,7 +3966,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, struct in_device *in_dev; int err; - nh->nh_grp = nh_grp; + nh->nhgi = nh_grp->nhgi; nh->key.fib_nh = fib_nh; #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = fib_nh->fib_nh_weight; @@ -3883,6 +3974,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, nh->nh_weight = 1; #endif memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4)); + nh->neigh_tbl = &arp_tbl; err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) return err; @@ -3892,6 +3984,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, if (!dev) return 0; + nh->ifindex = dev->ifindex; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); @@ -3902,7 +3995,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, } rcu_read_unlock(); - err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); if (err) goto err_nexthop_neigh_init; @@ -3916,7 +4009,7 @@ err_nexthop_neigh_init: static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); @@ -3938,14 +4031,14 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, switch (event) { case FIB_EVENT_NH_ADD: - mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); + mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, fib_nh->fib_nh_dev); break; case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); break; } - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, @@ -3968,7 +4061,7 @@ static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp, } __mlxsw_sp_nexthop_neigh_update(nh, removing); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } @@ -3991,7 +4084,7 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nhgi->nh_grp); } } @@ -4004,45 +4097,94 @@ static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); } +static int +mlxsw_sp_nexthop4_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + unsigned int nhs = fib_info_num_path(nh_grp->ipv4.fi); + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_nexthop *nh; + int err, i; + + nhgi = kzalloc(struct_size(nhgi, nexthops, nhs), GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; + nhgi->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, nh_grp->ipv4.fi); + nhgi->count = nhs; + for (i = 0; i < nhgi->count; i++) { + struct fib_nh *fib_nh; + + nh = &nhgi->nexthops[i]; + fib_nh = fib_info_nh(nh_grp->ipv4.fi, i); + err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); + if (err) + goto err_nexthop4_init; + } + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) + goto err_group_refresh; + + return 0; + +err_group_refresh: + i = nhgi->count; +err_nexthop4_init: + for (i--; i >= 0; i--) { + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop4_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + int i; + + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + kfree(nhgi); +} + static struct mlxsw_sp_nexthop_group * mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { - unsigned int nhs = fib_info_num_path(fi); struct mlxsw_sp_nexthop_group *nh_grp; - struct mlxsw_sp_nexthop *nh; - struct fib_nh *fib_nh; - int i; int err; - nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL); + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); - nh_grp->priv = fi; INIT_LIST_HEAD(&nh_grp->fib_list); - nh_grp->neigh_tbl = &arp_tbl; - - nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi); - nh_grp->count = nhs; + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV4; + nh_grp->ipv4.fi = fi; fib_info_hold(fi); - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; - fib_nh = fib_info_nh(fi, i); - err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); - if (err) - goto err_nexthop4_init; - } + + err = mlxsw_sp_nexthop4_group_info_init(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_info_init; + err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); if (err) goto err_nexthop_group_insert; - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + + nh_grp->can_destroy = true; + return nh_grp; err_nexthop_group_insert: -err_nexthop4_init: - for (i--; i >= 0; i--) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); - } + mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp); +err_nexthop_group_info_init: fib_info_put(fi); kfree(nh_grp); return ERR_PTR(err); @@ -4052,17 +4194,11 @@ static void mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - struct mlxsw_sp_nexthop *nh; - int i; - + if (!nh_grp->can_destroy) + return; mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); - for (i = 0; i < nh_grp->count; i++) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); - } - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); - WARN_ON_ONCE(nh_grp->adj_index_valid); - fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp)); + mlxsw_sp_nexthop4_group_info_fini(mlxsw_sp, nh_grp); + fib_info_put(nh_grp->ipv4.fi); kfree(nh_grp); } @@ -4120,9 +4256,9 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return !!nh_group->adj_index_valid; + return !!nh_group->nhgi->adj_index_valid; case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return !!nh_group->nh_rif; + return !!nh_group->nhgi->nh_rif; case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: @@ -4138,8 +4274,8 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, { int i; - for (i = 0; i < nh_grp->count; i++) { - struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; struct fib6_info *rt = mlxsw_sp_rt6->rt; if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev && @@ -4156,7 +4292,6 @@ static void mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group); u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; int dst_len = fib_entry->fib_node->key.prefix_len; struct mlxsw_sp_fib4_entry *fib4_entry; @@ -4166,7 +4301,7 @@ mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp, should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry); fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, common); - fri.fi = fi; + fri.fi = fib4_entry->fi; fri.tb_id = fib4_entry->tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = dst_len; @@ -4181,7 +4316,6 @@ static void mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group); u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr; int dst_len = fib_entry->fib_node->key.prefix_len; struct mlxsw_sp_fib4_entry *fib4_entry; @@ -4189,7 +4323,7 @@ mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, common); - fri.fi = fi; + fri.fi = fib4_entry->fi; fri.tb_id = fib4_entry->tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = dst_len; @@ -4264,13 +4398,14 @@ mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { switch (op) { - case MLXSW_REG_RALUE_OP_WRITE_WRITE: + case MLXSW_SP_FIB_ENTRY_OP_WRITE: + case MLXSW_SP_FIB_ENTRY_OP_UPDATE: mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry); break; - case MLXSW_REG_RALUE_OP_WRITE_DELETE: + case MLXSW_SP_FIB_ENTRY_OP_DELETE: mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry); break; default: @@ -4278,32 +4413,132 @@ mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp, } } +struct mlxsw_sp_fib_entry_op_ctx_basic { + char ralue_pl[MLXSW_REG_RALUE_LEN]; +}; + static void -mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl, - const struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +mlxsw_sp_router_ll_basic_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_sp_l3proto proto, + enum mlxsw_sp_fib_entry_op op, + u16 virtual_router, u8 prefix_len, + unsigned char *addr, + struct mlxsw_sp_fib_entry_priv *priv) { - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; - enum mlxsw_reg_ralxx_protocol proto; - u32 *p_dip; + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + enum mlxsw_reg_ralxx_protocol ralxx_proto; + char *ralue_pl = op_ctx_basic->ralue_pl; + enum mlxsw_reg_ralue_op ralue_op; - proto = (enum mlxsw_reg_ralxx_protocol) fib->proto; + ralxx_proto = (enum mlxsw_reg_ralxx_protocol) proto; - switch (fib->proto) { + switch (op) { + case MLXSW_SP_FIB_ENTRY_OP_WRITE: + case MLXSW_SP_FIB_ENTRY_OP_UPDATE: + ralue_op = MLXSW_REG_RALUE_OP_WRITE_WRITE; + break; + case MLXSW_SP_FIB_ENTRY_OP_DELETE: + ralue_op = MLXSW_REG_RALUE_OP_WRITE_DELETE; + break; + default: + WARN_ON_ONCE(1); + return; + } + + switch (proto) { case MLXSW_SP_L3_PROTO_IPV4: - p_dip = (u32 *) fib_entry->fib_node->key.addr; - mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id, - fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, ralxx_proto, ralue_op, + virtual_router, prefix_len, (u32 *) addr); break; case MLXSW_SP_L3_PROTO_IPV6: - mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id, - fib_entry->fib_node->key.prefix_len, - fib_entry->fib_node->key.addr); + mlxsw_reg_ralue_pack6(ralue_pl, ralxx_proto, ralue_op, + virtual_router, prefix_len, addr); break; } } +static void +mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u32 adjacency_index, u16 ecmp_size) +{ + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + + mlxsw_reg_ralue_act_remote_pack(op_ctx_basic->ralue_pl, trap_action, + trap_id, adjacency_index, ecmp_size); +} + +static void +mlxsw_sp_router_ll_basic_fib_entry_act_local_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u16 local_erif) +{ + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + + mlxsw_reg_ralue_act_local_pack(op_ctx_basic->ralue_pl, trap_action, + trap_id, local_erif); +} + +static void +mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) +{ + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + + mlxsw_reg_ralue_act_ip2me_pack(op_ctx_basic->ralue_pl); +} + +static void +mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + u32 tunnel_ptr) +{ + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + + mlxsw_reg_ralue_act_ip2me_tun_pack(op_ctx_basic->ralue_pl, tunnel_ptr); +} + +static int +mlxsw_sp_router_ll_basic_fib_entry_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + bool *postponed_for_bulk) +{ + struct mlxsw_sp_fib_entry_op_ctx_basic *op_ctx_basic = (void *) op_ctx->ll_priv; + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), + op_ctx_basic->ralue_pl); +} + +static bool +mlxsw_sp_router_ll_basic_fib_entry_is_committed(struct mlxsw_sp_fib_entry_priv *priv) +{ + return true; +} + +static void mlxsw_sp_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_sp_fib_entry_op op) +{ + struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; + + mlxsw_sp_fib_entry_op_ctx_priv_hold(op_ctx, fib_entry->priv); + fib->ll_ops->fib_entry_pack(op_ctx, fib->proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + fib_entry->fib_node->key.addr, + fib_entry->priv); +} + +int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + const struct mlxsw_sp_router_ll_ops *ll_ops) +{ + bool postponed_for_bulk = false; + int err; + + err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, &postponed_for_bulk); + if (!postponed_for_bulk) + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); + return err; +} + static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index) { enum mlxsw_reg_ratr_trap_action trap_action; @@ -4338,11 +4573,13 @@ err_ratr_write: } static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; - char ralue_pl[MLXSW_REG_RALUE_LEN]; + struct mlxsw_sp_nexthop_group_info *nhgi = nh_group->nhgi; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -4355,12 +4592,11 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, */ if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; - adjacency_index = fib_entry->nh_group->adj_index; - ecmp_size = fib_entry->nh_group->ecmp_size; - } else if (!nh_group->adj_index_valid && nh_group->count && - nh_group->nh_rif) { + adjacency_index = nhgi->adj_index; + ecmp_size = nhgi->ecmp_size; + } else if (!nhgi->adj_index_valid && nhgi->count && nhgi->nh_rif) { err = mlxsw_sp_adj_discard_write(mlxsw_sp, - nh_group->nh_rif->rif_index); + nhgi->nh_rif->rif_index); if (err) return err; trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; @@ -4371,19 +4607,20 @@ static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, - adjacency_index, ecmp_size); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_remote_pack(op_ctx, trap_action, trap_id, + adjacency_index, ecmp_size); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { - struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif; + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; + struct mlxsw_sp_rif *rif = fib_entry->nh_group->nhgi->nh_rif; enum mlxsw_reg_ralue_trap_action trap_action; - char ralue_pl[MLXSW_REG_RALUE_LEN]; u16 trap_id = 0; u16 rif_index = 0; @@ -4395,58 +4632,62 @@ static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, - rif_index); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, rif_index); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_ip2me_pack(op_ctx); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; enum mlxsw_reg_ralue_trap_action trap_action; - char ralue_pl[MLXSW_REG_RALUE_LEN]; trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR; - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, 0, 0); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; enum mlxsw_reg_ralue_trap_action trap_action; - char ralue_pl[MLXSW_REG_RALUE_LEN]; u16 trap_id; trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; trap_id = MLXSW_TRAP_ID_RTR_INGRESS1; - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_local_pack(op_ctx, trap_action, trap_id, 0); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry; const struct mlxsw_sp_ipip_ops *ipip_ops; @@ -4454,52 +4695,53 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, return -EINVAL; ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; - return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op, - fib_entry->decap.tunnel_index); + return ipip_ops->fib_entry_op(mlxsw_sp, ll_ops, op_ctx, ipip_entry, op, + fib_entry->decap.tunnel_index, fib_entry->priv); } static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; - mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); - mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, - fib_entry->decap.tunnel_index); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + mlxsw_sp_fib_entry_pack(op_ctx, fib_entry, op); + ll_ops->fib_entry_act_ip2me_tun_pack(op_ctx, + fib_entry->decap.tunnel_index); + return mlxsw_sp_fib_entry_commit(mlxsw_sp, op_ctx, ll_ops); } static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_local(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: - return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE: - return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE: - return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry, - op); + return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: - return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, - fib_entry, op); + return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, op_ctx, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: - return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, op_ctx, fib_entry, op); } return -EINVAL; } static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) + enum mlxsw_sp_fib_entry_op op) { - int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); + int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, op); if (err) return err; @@ -4509,18 +4751,35 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, return err; } +static int __mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_entry *fib_entry, + bool is_new) +{ + return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, + is_new ? MLXSW_SP_FIB_ENTRY_OP_WRITE : + MLXSW_SP_FIB_ENTRY_OP_UPDATE); +} + static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, - MLXSW_REG_RALUE_OP_WRITE_WRITE); + struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx; + + mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); + return __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, false); } static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry) { - return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, - MLXSW_REG_RALUE_OP_WRITE_DELETE); + const struct mlxsw_sp_router_ll_ops *ll_ops = fib_entry->fib_node->fib->ll_ops; + + if (!ll_ops->fib_entry_is_committed(fib_entry->priv)) + return 0; + return mlxsw_sp_fib_entry_op(mlxsw_sp, op_ctx, fib_entry, + MLXSW_SP_FIB_ENTRY_OP_DELETE); } static int @@ -4528,17 +4787,17 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { - struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + struct mlxsw_sp_nexthop_group_info *nhgi = fib_entry->nh_group->nhgi; union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; struct mlxsw_sp_router *router = mlxsw_sp->router; u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); + int ifindex = nhgi->nexthops[0].ifindex; struct mlxsw_sp_ipip_entry *ipip_entry; - struct fib_info *fi = fen_info->fi; switch (fen_info->type) { case RTN_LOCAL: - ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV4, dip); + ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, ifindex, + MLXSW_SP_L3_PROTO_IPV4, dip); if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, @@ -4571,7 +4830,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE; return 0; case RTN_UNICAST: - if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi)) + if (nhgi->gateway) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; else fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; @@ -4608,15 +4867,22 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, return ERR_PTR(-ENOMEM); fib_entry = &fib4_entry->common; - err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); - if (err) - goto err_fib4_entry_type_set; + fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops); + if (IS_ERR(fib_entry->priv)) { + err = PTR_ERR(fib_entry->priv); + goto err_fib_entry_priv_create; + } err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi); if (err) goto err_nexthop4_group_get; - fib4_entry->prio = fen_info->fi->fib_priority; + err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); + if (err) + goto err_fib4_entry_type_set; + + fib4_entry->fi = fen_info->fi; + fib_info_hold(fib4_entry->fi); fib4_entry->tb_id = fen_info->tb_id; fib4_entry->type = fen_info->type; fib4_entry->tos = fen_info->tos; @@ -4625,9 +4891,11 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, return fib4_entry; -err_nexthop4_group_get: - mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib_entry); err_fib4_entry_type_set: + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); +err_nexthop4_group_get: + mlxsw_sp_fib_entry_priv_put(fib_entry->priv); +err_fib_entry_priv_create: kfree(fib4_entry); return ERR_PTR(err); } @@ -4635,8 +4903,10 @@ err_fib4_entry_type_set: static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib4_entry *fib4_entry) { - mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + fib_info_put(fib4_entry->fi); mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib_entry_priv_put(fib4_entry->common.priv); kfree(fib4_entry); } @@ -4665,8 +4935,7 @@ mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, if (fib4_entry->tb_id == fen_info->tb_id && fib4_entry->tos == fen_info->tos && fib4_entry->type == fen_info->type && - mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) == - fen_info->fi) + fib4_entry->fi == fen_info->fi) return fib4_entry; return NULL; @@ -4875,14 +5144,16 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + bool is_new = !fib_node->fib_entry; int err; fib_node->fib_entry = fib_entry; - err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, fib_entry, is_new); if (err) goto err_fib_entry_update; @@ -4893,14 +5164,25 @@ err_fib_entry_update: return err; } -static void -mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static int __mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + int err; - mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + err = mlxsw_sp_fib_entry_del(mlxsw_sp, op_ctx, fib_entry); fib_node->fib_entry = NULL; + return err; +} + +static void mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx; + + mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); + __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, fib_entry); } static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry) @@ -4922,6 +5204,7 @@ static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry) static int mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced; @@ -4955,7 +5238,7 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, } replaced = fib_node->fib_entry; - err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common); + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib4_entry->common); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); goto err_fib_node_entry_link; @@ -4980,23 +5263,26 @@ err_fib4_entry_create: return err; } -static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, - struct fib_entry_notifier_info *fen_info) +static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; + int err; if (mlxsw_sp->router->aborted) - return; + return 0; fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); if (!fib4_entry) - return; + return 0; fib_node = fib4_entry->common.fib_node; - mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common); + err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib4_entry->common); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; } static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt) @@ -5047,7 +5333,8 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) { struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; - fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + if (!mlxsw_sp_rt6->rt->nh) + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt); kfree(mlxsw_sp_rt6); } @@ -5081,51 +5368,6 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret); } -static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - struct mlxsw_sp_nexthop *nh, - const struct fib6_info *rt) -{ - const struct mlxsw_sp_ipip_ops *ipip_ops; - struct mlxsw_sp_ipip_entry *ipip_entry; - struct net_device *dev = rt->fib6_nh->fib_nh_dev; - struct mlxsw_sp_rif *rif; - int err; - - ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev); - if (ipip_entry) { - ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; - if (ipip_ops->can_offload(mlxsw_sp, dev, - MLXSW_SP_L3_PROTO_IPV6)) { - nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; - mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry); - return 0; - } - } - - nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); - - err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); - if (err) - goto err_nexthop_neigh_init; - - return 0; - -err_nexthop_neigh_init: - mlxsw_sp_nexthop_rif_fini(nh); - return err; -} - -static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) -{ - mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); -} - static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh, @@ -5133,9 +5375,12 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, { struct net_device *dev = rt->fib6_nh->fib_nh_dev; - nh->nh_grp = nh_grp; + nh->nhgi = nh_grp->nhgi; nh->nh_weight = rt->fib6_nh->fib_nh_weight; memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr)); +#if IS_ENABLED(CONFIG_IPV6) + nh->neigh_tbl = &nd_tbl; +#endif mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -5144,13 +5389,13 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, return 0; nh->ifindex = dev->ifindex; - return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt); + return mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); } static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); } @@ -5162,51 +5407,98 @@ static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); } -static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry) +static int +mlxsw_sp_nexthop6_group_info_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_fib6_entry *fib6_entry) { - struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop_group_info *nhgi; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; struct mlxsw_sp_nexthop *nh; - int i = 0; - int err; + int err, i; - nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6), - GFP_KERNEL); - if (!nh_grp) - return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&nh_grp->fib_list); -#if IS_ENABLED(CONFIG_IPV6) - nh_grp->neigh_tbl = &nd_tbl; -#endif + nhgi = kzalloc(struct_size(nhgi, nexthops, fib6_entry->nrt6), + GFP_KERNEL); + if (!nhgi) + return -ENOMEM; + nh_grp->nhgi = nhgi; + nhgi->nh_grp = nh_grp; mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, list); - nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); - nh_grp->count = fib6_entry->nrt6; - for (i = 0; i < nh_grp->count; i++) { + nhgi->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); + nhgi->count = fib6_entry->nrt6; + for (i = 0; i < nhgi->count; i++) { struct fib6_info *rt = mlxsw_sp_rt6->rt; - nh = &nh_grp->nexthops[i]; + nh = &nhgi->nexthops[i]; err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); if (err) goto err_nexthop6_init; mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list); } + nh_grp->nhgi = nhgi; + err = mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + if (err) + goto err_group_refresh; + + return 0; + +err_group_refresh: + i = nhgi->count; +err_nexthop6_init: + for (i--; i >= 0; i--) { + nh = &nhgi->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + kfree(nhgi); + return err; +} + +static void +mlxsw_sp_nexthop6_group_info_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + int i; + + for (i = nhgi->count - 1; i >= 0; i--) { + struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; + + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON_ONCE(nhgi->adj_index_valid); + kfree(nhgi); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + int err; + + nh_grp = kzalloc(sizeof(*nh_grp), GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); + nh_grp->type = MLXSW_SP_NEXTHOP_GROUP_TYPE_IPV6; + + err = mlxsw_sp_nexthop6_group_info_init(mlxsw_sp, nh_grp, fib6_entry); + if (err) + goto err_nexthop_group_info_init; err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); if (err) goto err_nexthop_group_insert; - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + nh_grp->can_destroy = true; + return nh_grp; err_nexthop_group_insert: -err_nexthop6_init: - for (i--; i >= 0; i--) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); - } + mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp); +err_nexthop_group_info_init: kfree(nh_grp); return ERR_PTR(err); } @@ -5215,16 +5507,10 @@ static void mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { - struct mlxsw_sp_nexthop *nh; - int i = nh_grp->count; - + if (!nh_grp->can_destroy) + return; mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); - for (i--; i >= 0; i--) { - nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); - } - mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); - WARN_ON(nh_grp->adj_index_valid); + mlxsw_sp_nexthop6_group_info_fini(mlxsw_sp, nh_grp); kfree(nh_grp); } @@ -5240,15 +5526,15 @@ static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, return PTR_ERR(nh_grp); } - list_add_tail(&fib6_entry->common.nexthop_group_node, - &nh_grp->fib_list); - fib6_entry->common.nh_group = nh_grp; - /* The route and the nexthop are described by the same struct, so we * need to the update the nexthop offload indication for the new route. */ __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); + list_add_tail(&fib6_entry->common.nexthop_group_node, + &nh_grp->fib_list); + fib6_entry->common.nh_group = nh_grp; + return 0; } @@ -5263,9 +5549,9 @@ static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp); } -static int -mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib6_entry *fib6_entry) +static int mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib6_entry *fib6_entry) { struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group; int err; @@ -5281,7 +5567,8 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, * currently associated with it in the device's table is that * of the old group. Start using the new one instead. */ - err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common); + err = __mlxsw_sp_fib_entry_update(mlxsw_sp, op_ctx, + &fib6_entry->common, false); if (err) goto err_fib_entry_update; @@ -5301,6 +5588,7 @@ err_nexthop6_group_get: static int mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib6_entry *fib6_entry, struct fib6_info **rt_arr, unsigned int nrt6) { @@ -5318,7 +5606,7 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, fib6_entry->nrt6++; } - err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry); if (err) goto err_nexthop6_group_update; @@ -5339,6 +5627,7 @@ err_rt6_create: static void mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, struct mlxsw_sp_fib6_entry *fib6_entry, struct fib6_info **rt_arr, unsigned int nrt6) { @@ -5356,26 +5645,20 @@ mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); } - mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + mlxsw_sp_nexthop6_group_update(mlxsw_sp, op_ctx, fib6_entry); } static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, const struct fib6_info *rt) { - /* Packets hitting RTF_REJECT routes need to be discarded by the - * stack. We can rely on their destination device not having a - * RIF (it's the loopback device) and can thus use action type - * local, which will cause them to be trapped with a lower - * priority than packets that need to be locally received. - */ if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; else if (rt->fib6_type == RTN_BLACKHOLE) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE; else if (rt->fib6_flags & RTF_REJECT) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE; - else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) + else if (fib_entry->nh_group->nhgi->gateway) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; else fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; @@ -5409,6 +5692,12 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, return ERR_PTR(-ENOMEM); fib_entry = &fib6_entry->common; + fib_entry->priv = mlxsw_sp_fib_entry_priv_create(fib_node->fib->ll_ops); + if (IS_ERR(fib_entry->priv)) { + err = PTR_ERR(fib_entry->priv); + goto err_fib_entry_priv_create; + } + INIT_LIST_HEAD(&fib6_entry->rt6_list); for (i = 0; i < nrt6; i++) { @@ -5421,12 +5710,12 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, fib6_entry->nrt6++; } - mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]); - err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); if (err) goto err_nexthop6_group_get; + mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]); + fib_entry->fib_node = fib_node; return fib6_entry; @@ -5441,6 +5730,8 @@ err_rt6_create: list_del(&mlxsw_sp_rt6->list); mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); } + mlxsw_sp_fib_entry_priv_put(fib_entry->priv); +err_fib_entry_priv_create: kfree(fib6_entry); return ERR_PTR(err); } @@ -5451,6 +5742,7 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry); WARN_ON(fib6_entry->nrt6); + mlxsw_sp_fib_entry_priv_put(fib6_entry->common.priv); kfree(fib6_entry); } @@ -5508,8 +5800,8 @@ static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry) } static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, - struct fib6_info **rt_arr, - unsigned int nrt6) + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced; struct mlxsw_sp_fib_entry *replaced; @@ -5548,7 +5840,7 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, } replaced = fib_node->fib_entry; - err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common); + err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, op_ctx, &fib6_entry->common); if (err) goto err_fib_node_entry_link; @@ -5572,8 +5864,8 @@ err_fib6_entry_create: } static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, - struct fib6_info **rt_arr, - unsigned int nrt6) + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; @@ -5604,8 +5896,7 @@ static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp, fib6_entry = container_of(fib_node->fib_entry, struct mlxsw_sp_fib6_entry, common); - err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr, - nrt6); + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6); if (err) goto err_fib6_entry_nexthop_add; @@ -5616,19 +5907,20 @@ err_fib6_entry_nexthop_add: return err; } -static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, - struct fib6_info **rt_arr, - unsigned int nrt6) +static int mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; struct fib6_info *rt = rt_arr[0]; + int err; if (mlxsw_sp->router->aborted) - return; + return 0; if (mlxsw_sp_fib6_rt_should_ignore(rt)) - return; + return 0; /* Multipath routes are first added to the FIB trie and only then * notified. If we vetoed the addition, we will get a delete @@ -5637,58 +5929,66 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, */ fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); if (!fib6_entry) - return; + return 0; /* If not all the nexthops are deleted, then only reduce the nexthop * group. */ if (nrt6 != fib6_entry->nrt6) { - mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr, - nrt6); - return; + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, op_ctx, fib6_entry, rt_arr, nrt6); + return 0; } fib_node = fib6_entry->common.fib_node; - mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common); + err = __mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, op_ctx, &fib6_entry->common); mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; } static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_reg_ralxx_protocol proto, + enum mlxsw_sp_l3proto proto, u8 tree_id) { - char ralta_pl[MLXSW_REG_RALTA_LEN]; - char ralst_pl[MLXSW_REG_RALST_LEN]; + const struct mlxsw_sp_router_ll_ops *ll_ops = mlxsw_sp->router->proto_ll_ops[proto]; + enum mlxsw_reg_ralxx_protocol ralxx_proto = + (enum mlxsw_reg_ralxx_protocol) proto; + struct mlxsw_sp_fib_entry_priv *priv; + char xralta_pl[MLXSW_REG_XRALTA_LEN]; + char xralst_pl[MLXSW_REG_XRALST_LEN]; int i, err; - mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + mlxsw_reg_xralta_pack(xralta_pl, true, ralxx_proto, tree_id); + err = ll_ops->ralta_write(mlxsw_sp, xralta_pl); if (err) return err; - mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); + mlxsw_reg_xralst_pack(xralst_pl, 0xff, tree_id); + err = ll_ops->ralst_write(mlxsw_sp, xralst_pl); if (err) return err; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + struct mlxsw_sp_fib_entry_op_ctx *op_ctx = mlxsw_sp->router->ll_op_ctx; struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; - char raltb_pl[MLXSW_REG_RALTB_LEN]; - char ralue_pl[MLXSW_REG_RALUE_LEN]; + char xraltb_pl[MLXSW_REG_XRALTB_LEN]; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), - raltb_pl); + mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); + mlxsw_reg_xraltb_pack(xraltb_pl, vr->id, ralxx_proto, tree_id); + err = ll_ops->raltb_write(mlxsw_sp, xraltb_pl); if (err) return err; - mlxsw_reg_ralue_pack(ralue_pl, proto, - MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0); - mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), - ralue_pl); + priv = mlxsw_sp_fib_entry_priv_create(ll_ops); + if (IS_ERR(priv)) + return PTR_ERR(priv); + + ll_ops->fib_entry_pack(op_ctx, proto, MLXSW_SP_FIB_ENTRY_OP_WRITE, + vr->id, 0, NULL, priv); + ll_ops->fib_entry_act_ip2me_pack(op_ctx); + err = ll_ops->fib_entry_commit(mlxsw_sp, op_ctx, NULL); + mlxsw_sp_fib_entry_priv_put(priv); if (err) return err; } @@ -5784,7 +6084,7 @@ mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) { - enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; + enum mlxsw_sp_l3proto proto = MLXSW_SP_L3_PROTO_IPV4; int err; err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, @@ -5796,7 +6096,7 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) * packets that don't match any routes are trapped to the CPU. */ - proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; + proto = MLXSW_SP_L3_PROTO_IPV6; return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, MLXSW_SP_LPM_TREE_MIN + 1); } @@ -5901,15 +6201,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); } -struct mlxsw_sp_fib6_event_work { +struct mlxsw_sp_fib6_event { struct fib6_info **rt_arr; unsigned int nrt6; }; -struct mlxsw_sp_fib_event_work { - struct work_struct work; +struct mlxsw_sp_fib_event { + struct list_head list; /* node in fib queue */ union { - struct mlxsw_sp_fib6_event_work fib6_work; + struct mlxsw_sp_fib6_event fib6_event; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -5918,11 +6218,12 @@ struct mlxsw_sp_fib_event_work { }; struct mlxsw_sp *mlxsw_sp; unsigned long event; + int family; }; static int -mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, - struct fib6_entry_notifier_info *fen6_info) +mlxsw_sp_router_fib6_event_init(struct mlxsw_sp_fib6_event *fib6_event, + struct fib6_entry_notifier_info *fen6_info) { struct fib6_info *rt = fen6_info->rt; struct fib6_info **rt_arr; @@ -5936,8 +6237,8 @@ mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, if (!rt_arr) return -ENOMEM; - fib6_work->rt_arr = rt_arr; - fib6_work->nrt6 = nrt6; + fib6_event->rt_arr = rt_arr; + fib6_event->nrt6 = nrt6; rt_arr[0] = rt; fib6_info_hold(rt); @@ -5959,170 +6260,232 @@ mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, } static void -mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work) +mlxsw_sp_router_fib6_event_fini(struct mlxsw_sp_fib6_event *fib6_event) { int i; - for (i = 0; i < fib6_work->nrt6; i++) - mlxsw_sp_rt6_release(fib6_work->rt_arr[i]); - kfree(fib6_work->rt_arr); + for (i = 0; i < fib6_event->nrt6; i++) + mlxsw_sp_rt6_release(fib6_event->rt_arr[i]); + kfree(fib6_event->rt_arr); } -static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) +static void mlxsw_sp_router_fib4_event_process(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_event *fib_event) { - struct mlxsw_sp_fib_event_work *fib_work = - container_of(work, struct mlxsw_sp_fib_event_work, work); - struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; - mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: - err = mlxsw_sp_router_fib4_replace(mlxsw_sp, - &fib_work->fen_info); - if (err) + err = mlxsw_sp_router_fib4_replace(mlxsw_sp, op_ctx, &fib_event->fen_info); + if (err) { + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); mlxsw_sp_router_fib_abort(mlxsw_sp); - fib_info_put(fib_work->fen_info.fi); + } + fib_info_put(fib_event->fen_info.fi); break; case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info); - fib_info_put(fib_work->fen_info.fi); + err = mlxsw_sp_router_fib4_del(mlxsw_sp, op_ctx, &fib_event->fen_info); + if (err) + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); + fib_info_put(fib_event->fen_info.fi); break; case FIB_EVENT_NH_ADD: case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, - fib_work->fnh_info.fib_nh); - fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); + mlxsw_sp_nexthop4_event(mlxsw_sp, fib_event->event, fib_event->fnh_info.fib_nh); + fib_info_put(fib_event->fnh_info.fib_nh->nh_parent); break; } - mutex_unlock(&mlxsw_sp->router->lock); - kfree(fib_work); } -static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) +static void mlxsw_sp_router_fib6_event_process(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + struct mlxsw_sp_fib_event *fib_event) { - struct mlxsw_sp_fib_event_work *fib_work = - container_of(work, struct mlxsw_sp_fib_event_work, work); - struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; int err; - mutex_lock(&mlxsw_sp->router->lock); mlxsw_sp_span_respin(mlxsw_sp); - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: - err = mlxsw_sp_router_fib6_replace(mlxsw_sp, - fib_work->fib6_work.rt_arr, - fib_work->fib6_work.nrt6); - if (err) + err = mlxsw_sp_router_fib6_replace(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr, + fib_event->fib6_event.nrt6); + if (err) { + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); mlxsw_sp_router_fib_abort(mlxsw_sp); - mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); + } + mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); break; case FIB_EVENT_ENTRY_APPEND: - err = mlxsw_sp_router_fib6_append(mlxsw_sp, - fib_work->fib6_work.rt_arr, - fib_work->fib6_work.nrt6); - if (err) + err = mlxsw_sp_router_fib6_append(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr, + fib_event->fib6_event.nrt6); + if (err) { + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); mlxsw_sp_router_fib_abort(mlxsw_sp); - mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); + } + mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); break; case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fib6_del(mlxsw_sp, - fib_work->fib6_work.rt_arr, - fib_work->fib6_work.nrt6); - mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); + err = mlxsw_sp_router_fib6_del(mlxsw_sp, op_ctx, fib_event->fib6_event.rt_arr, + fib_event->fib6_event.nrt6); + if (err) + mlxsw_sp_fib_entry_op_ctx_priv_put_all(op_ctx); + mlxsw_sp_router_fib6_event_fini(&fib_event->fib6_event); break; } - mutex_unlock(&mlxsw_sp->router->lock); - kfree(fib_work); } -static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work) +static void mlxsw_sp_router_fibmr_event_process(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_event *fib_event) { - struct mlxsw_sp_fib_event_work *fib_work = - container_of(work, struct mlxsw_sp_fib_event_work, work); - struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; bool replace; int err; rtnl_lock(); mutex_lock(&mlxsw_sp->router->lock); - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_ADD: - replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + replace = fib_event->event == FIB_EVENT_ENTRY_REPLACE; - err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info, - replace); + err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_event->men_info, replace); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); - mr_cache_put(fib_work->men_info.mfc); + mr_cache_put(fib_event->men_info.mfc); break; case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info); - mr_cache_put(fib_work->men_info.mfc); + mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_event->men_info); + mr_cache_put(fib_event->men_info.mfc); break; case FIB_EVENT_VIF_ADD: err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp, - &fib_work->ven_info); + &fib_event->ven_info); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); - dev_put(fib_work->ven_info.dev); + dev_put(fib_event->ven_info.dev); break; case FIB_EVENT_VIF_DEL: - mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, - &fib_work->ven_info); - dev_put(fib_work->ven_info.dev); + mlxsw_sp_router_fibmr_vif_del(mlxsw_sp, &fib_event->ven_info); + dev_put(fib_event->ven_info.dev); break; } mutex_unlock(&mlxsw_sp->router->lock); rtnl_unlock(); - kfree(fib_work); } -static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, +static void mlxsw_sp_router_fib_event_work(struct work_struct *work) +{ + struct mlxsw_sp_router *router = container_of(work, struct mlxsw_sp_router, fib_event_work); + struct mlxsw_sp_fib_entry_op_ctx *op_ctx = router->ll_op_ctx; + struct mlxsw_sp *mlxsw_sp = router->mlxsw_sp; + struct mlxsw_sp_fib_event *next_fib_event; + struct mlxsw_sp_fib_event *fib_event; + int last_family = AF_UNSPEC; + LIST_HEAD(fib_event_queue); + + spin_lock_bh(&router->fib_event_queue_lock); + list_splice_init(&router->fib_event_queue, &fib_event_queue); + spin_unlock_bh(&router->fib_event_queue_lock); + + /* Router lock is held here to make sure per-instance + * operation context is not used in between FIB4/6 events + * processing. + */ + mutex_lock(&router->lock); + mlxsw_sp_fib_entry_op_ctx_clear(op_ctx); + list_for_each_entry_safe(fib_event, next_fib_event, + &fib_event_queue, list) { + /* Check if the next entry in the queue exists and it is + * of the same type (family and event) as the currect one. + * In that case it is permitted to do the bulking + * of multiple FIB entries to a single register write. + */ + op_ctx->bulk_ok = !list_is_last(&fib_event->list, &fib_event_queue) && + fib_event->family == next_fib_event->family && + fib_event->event == next_fib_event->event; + + /* In case family of this and the previous entry are different, context + * reinitialization is going to be needed now, indicate that. + * Note that since last_family is initialized to AF_UNSPEC, this is always + * going to happen for the first entry processed in the work. + */ + if (fib_event->family != last_family) + op_ctx->initialized = false; + + switch (fib_event->family) { + case AF_INET: + mlxsw_sp_router_fib4_event_process(mlxsw_sp, op_ctx, + fib_event); + break; + case AF_INET6: + mlxsw_sp_router_fib6_event_process(mlxsw_sp, op_ctx, + fib_event); + break; + case RTNL_FAMILY_IP6MR: + case RTNL_FAMILY_IPMR: + /* Unlock here as inside FIBMR the lock is taken again + * under RTNL. The per-instance operation context + * is not used by FIBMR. + */ + mutex_unlock(&router->lock); + mlxsw_sp_router_fibmr_event_process(mlxsw_sp, + fib_event); + mutex_lock(&router->lock); + break; + default: + WARN_ON_ONCE(1); + } + last_family = fib_event->family; + kfree(fib_event); + cond_resched(); + } + WARN_ON_ONCE(!list_empty(&router->ll_op_ctx->fib_entry_priv_list)); + mutex_unlock(&router->lock); +} + +static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event *fib_event, struct fib_notifier_info *info) { struct fib_entry_notifier_info *fen_info; struct fib_nh_notifier_info *fnh_info; - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_DEL: fen_info = container_of(info, struct fib_entry_notifier_info, info); - fib_work->fen_info = *fen_info; + fib_event->fen_info = *fen_info; /* Take reference on fib_info to prevent it from being - * freed while work is queued. Release it afterwards. + * freed while event is queued. Release it afterwards. */ - fib_info_hold(fib_work->fen_info.fi); + fib_info_hold(fib_event->fen_info.fi); break; case FIB_EVENT_NH_ADD: case FIB_EVENT_NH_DEL: fnh_info = container_of(info, struct fib_nh_notifier_info, info); - fib_work->fnh_info = *fnh_info; - fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + fib_event->fnh_info = *fnh_info; + fib_info_hold(fib_event->fnh_info.fib_nh->nh_parent); break; } } -static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, +static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event *fib_event, struct fib_notifier_info *info) { struct fib6_entry_notifier_info *fen6_info; int err; - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_APPEND: case FIB_EVENT_ENTRY_DEL: fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work, - fen6_info); + err = mlxsw_sp_router_fib6_event_init(&fib_event->fib6_event, + fen6_info); if (err) return err; break; @@ -6132,20 +6495,20 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, } static void -mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work, +mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event *fib_event, struct fib_notifier_info *info) { - switch (fib_work->event) { + switch (fib_event->event) { case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_ADD: case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info)); - mr_cache_hold(fib_work->men_info.mfc); + memcpy(&fib_event->men_info, info, sizeof(fib_event->men_info)); + mr_cache_hold(fib_event->men_info.mfc); break; case FIB_EVENT_VIF_ADD: case FIB_EVENT_VIF_DEL: - memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info)); - dev_hold(fib_work->ven_info.dev); + memcpy(&fib_event->ven_info, info, sizeof(fib_event->ven_info)); + dev_hold(fib_event->ven_info.dev); break; } } @@ -6202,7 +6565,7 @@ static int mlxsw_sp_router_fib_rule_event(unsigned long event, static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) { - struct mlxsw_sp_fib_event_work *fib_work; + struct mlxsw_sp_fib_event *fib_event; struct fib_notifier_info *info = ptr; struct mlxsw_sp_router *router; int err; @@ -6252,37 +6615,39 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, break; } - fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); - if (!fib_work) + fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC); + if (!fib_event) return NOTIFY_BAD; - fib_work->mlxsw_sp = router->mlxsw_sp; - fib_work->event = event; + fib_event->mlxsw_sp = router->mlxsw_sp; + fib_event->event = event; + fib_event->family = info->family; switch (info->family) { case AF_INET: - INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work); - mlxsw_sp_router_fib4_event(fib_work, info); + mlxsw_sp_router_fib4_event(fib_event, info); break; case AF_INET6: - INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); - err = mlxsw_sp_router_fib6_event(fib_work, info); + err = mlxsw_sp_router_fib6_event(fib_event, info); if (err) goto err_fib_event; break; case RTNL_FAMILY_IP6MR: case RTNL_FAMILY_IPMR: - INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work); - mlxsw_sp_router_fibmr_event(fib_work, info); + mlxsw_sp_router_fibmr_event(fib_event, info); break; } - mlxsw_core_schedule_work(&fib_work->work); + /* Enqueue the event and trigger the work */ + spin_lock_bh(&router->fib_event_queue_lock); + list_add_tail(&fib_event->list, &router->fib_event_queue); + spin_unlock_bh(&router->fib_event_queue_lock); + mlxsw_core_schedule_work(&router->fib_event_work); return NOTIFY_DONE; err_fib_event: - kfree(fib_work); + kfree(fib_event); return NOTIFY_BAD; } @@ -8057,6 +8422,45 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } +static const struct mlxsw_sp_router_ll_ops mlxsw_sp_router_ll_basic_ops = { + .ralta_write = mlxsw_sp_router_ll_basic_ralta_write, + .ralst_write = mlxsw_sp_router_ll_basic_ralst_write, + .raltb_write = mlxsw_sp_router_ll_basic_raltb_write, + .fib_entry_op_ctx_size = sizeof(struct mlxsw_sp_fib_entry_op_ctx_basic), + .fib_entry_pack = mlxsw_sp_router_ll_basic_fib_entry_pack, + .fib_entry_act_remote_pack = mlxsw_sp_router_ll_basic_fib_entry_act_remote_pack, + .fib_entry_act_local_pack = mlxsw_sp_router_ll_basic_fib_entry_act_local_pack, + .fib_entry_act_ip2me_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_pack, + .fib_entry_act_ip2me_tun_pack = mlxsw_sp_router_ll_basic_fib_entry_act_ip2me_tun_pack, + .fib_entry_commit = mlxsw_sp_router_ll_basic_fib_entry_commit, + .fib_entry_is_committed = mlxsw_sp_router_ll_basic_fib_entry_is_committed, +}; + +static int mlxsw_sp_router_ll_op_ctx_init(struct mlxsw_sp_router *router) +{ + size_t max_size = 0; + int i; + + for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) { + size_t size = router->proto_ll_ops[i]->fib_entry_op_ctx_size; + + if (size > max_size) + max_size = size; + } + router->ll_op_ctx = kzalloc(sizeof(*router->ll_op_ctx) + max_size, + GFP_KERNEL); + if (!router->ll_op_ctx) + return -ENOMEM; + INIT_LIST_HEAD(&router->ll_op_ctx->fib_entry_priv_list); + return 0; +} + +static void mlxsw_sp_router_ll_op_ctx_fini(struct mlxsw_sp_router *router) +{ + WARN_ON(!list_empty(&router->ll_op_ctx->fib_entry_priv_list)); + kfree(router->ll_op_ctx); +} + int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, struct netlink_ext_ack *extack) { @@ -8070,6 +8474,13 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->router = router; router->mlxsw_sp = mlxsw_sp; + router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_router_ll_basic_ops; + router->proto_ll_ops[MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_router_ll_basic_ops; + + err = mlxsw_sp_router_ll_op_ctx_init(router); + if (err) + goto err_ll_op_ctx_init; + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) @@ -8118,6 +8529,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_dscp_init; + INIT_WORK(&router->fib_event_work, mlxsw_sp_router_fib_event_work); + INIT_LIST_HEAD(&router->fib_event_queue); + spin_lock_init(&router->fib_event_queue_lock); + router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event; err = register_inetaddr_notifier(&router->inetaddr_nb); if (err) @@ -8151,6 +8566,7 @@ err_register_inet6addr_notifier: unregister_inetaddr_notifier(&router->inetaddr_nb); err_register_inetaddr_notifier: mlxsw_core_flush_owq(); + WARN_ON(!list_empty(&router->fib_event_queue)); err_dscp_init: err_mp_hash_init: mlxsw_sp_neigh_fini(mlxsw_sp); @@ -8171,6 +8587,8 @@ err_ipips_init: err_rifs_init: __mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_router_ll_op_ctx_fini(router); +err_ll_op_ctx_init: mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); return err; @@ -8184,6 +8602,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb); mlxsw_core_flush_owq(); + WARN_ON(!list_empty(&mlxsw_sp->router->fib_event_queue)); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); mlxsw_sp_mr_fini(mlxsw_sp); @@ -8193,6 +8612,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_ipips_fini(mlxsw_sp); mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router); mutex_destroy(&mlxsw_sp->router->lock); kfree(mlxsw_sp->router); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 8418dc3ae967..8230f6ff02ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -15,6 +15,26 @@ struct mlxsw_sp_router_nve_decap { u8 valid:1; }; +struct mlxsw_sp_fib_entry_op_ctx { + u8 bulk_ok:1, /* Indicate to the low-level op it is ok to bulk + * the actual entry with the one that is the next + * in queue. + */ + initialized:1; /* Bit that the low-level op sets in case + * the context priv is initialized. + */ + struct list_head fib_entry_priv_list; + unsigned long ll_priv[]; +}; + +static inline void +mlxsw_sp_fib_entry_op_ctx_clear(struct mlxsw_sp_fib_entry_op_ctx *op_ctx) +{ + WARN_ON_ONCE(!list_empty(&op_ctx->fib_entry_priv_list)); + memset(op_ctx, 0, sizeof(*op_ctx)); + INIT_LIST_HEAD(&op_ctx->fib_entry_priv_list); +} + struct mlxsw_sp_router { struct mlxsw_sp *mlxsw_sp; struct mlxsw_sp_rif **rifs; @@ -48,8 +68,58 @@ struct mlxsw_sp_router { bool adj_discard_index_valid; struct mlxsw_sp_router_nve_decap nve_decap_config; struct mutex lock; /* Protects shared router resources */ + struct work_struct fib_event_work; + struct list_head fib_event_queue; + spinlock_t fib_event_queue_lock; /* Protects fib event queue list */ + /* One set of ops for each protocol: IPv4 and IPv6 */ + const struct mlxsw_sp_router_ll_ops *proto_ll_ops[MLXSW_SP_L3_PROTO_MAX]; + struct mlxsw_sp_fib_entry_op_ctx *ll_op_ctx; +}; + +struct mlxsw_sp_fib_entry_priv { + refcount_t refcnt; + struct list_head list; /* Member in op_ctx->fib_entry_priv_list */ + unsigned long priv[]; +}; + +enum mlxsw_sp_fib_entry_op { + MLXSW_SP_FIB_ENTRY_OP_WRITE, + MLXSW_SP_FIB_ENTRY_OP_UPDATE, + MLXSW_SP_FIB_ENTRY_OP_DELETE, }; +/* Low-level router ops. Basically this is to handle the different + * register sets to work with ordinary and XM trees and FIB entries. + */ +struct mlxsw_sp_router_ll_ops { + int (*ralta_write)(struct mlxsw_sp *mlxsw_sp, char *xralta_pl); + int (*ralst_write)(struct mlxsw_sp *mlxsw_sp, char *xralst_pl); + int (*raltb_write)(struct mlxsw_sp *mlxsw_sp, char *xraltb_pl); + size_t fib_entry_op_ctx_size; + size_t fib_entry_priv_size; + void (*fib_entry_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_sp_l3proto proto, enum mlxsw_sp_fib_entry_op op, + u16 virtual_router, u8 prefix_len, unsigned char *addr, + struct mlxsw_sp_fib_entry_priv *priv); + void (*fib_entry_act_remote_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u32 adjacency_index, u16 ecmp_size); + void (*fib_entry_act_local_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + enum mlxsw_reg_ralue_trap_action trap_action, + u16 trap_id, u16 local_erif); + void (*fib_entry_act_ip2me_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx); + void (*fib_entry_act_ip2me_tun_pack)(struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + u32 tunnel_ptr); + int (*fib_entry_commit)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + bool *postponed_for_bulk); + bool (*fib_entry_is_committed)(struct mlxsw_sp_fib_entry_priv *priv); +}; + +int mlxsw_sp_fib_entry_commit(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry_op_ctx *op_ctx, + const struct mlxsw_sp_router_ll_ops *ll_ops); + struct mlxsw_sp_rif_ipip_lb; struct mlxsw_sp_rif_ipip_lb_config { enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; |