From 54c62e13ad765a346d220b2566f84c6092cf3564 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 10 Mar 2020 10:22:27 +0200 Subject: {IB,net}/mlx5: Setup mkey variant before mr create command invocation On reg_mr_callback() mlx5_ib is recalculating the mkey variant which is wrong and will lead to using a different key variant than the one submitted to firmware on create mkey command invocation. To fix this, we store the mkey variant before invoking the firmware command and use it later on completion (reg_mr_callback). Signed-off-by: Saeed Mahameed Reviewed-by: Eli Cohen Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 7 ++----- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6fa0a83c19de..45c3282dd5e1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -87,7 +87,6 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) struct mlx5_mr_cache *cache = &dev->cache; int c = order2idx(dev, mr->order); struct mlx5_cache_ent *ent = &cache->ent[c]; - u8 key; unsigned long flags; spin_lock_irqsave(&ent->lock, flags); @@ -102,10 +101,8 @@ static void reg_mr_callback(int status, struct mlx5_async_work *context) } mr->mmkey.type = MLX5_MKEY_MR; - spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); - key = dev->mdev->priv.mkey_key++; - spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; + mr->mmkey.key |= mlx5_idx_to_mkey( + MLX5_GET(create_mkey_out, mr->out, mkey_index)); cache->last_add = jiffies; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 42cc3c7ac5b6..770d13bb4f20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -56,6 +56,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); MLX5_SET(mkc, mkc, mkey_7_0, key); + mkey->key = key; if (callback) return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, @@ -68,7 +69,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); - mkey->key = mlx5_idx_to_mkey(mkey_index) | key; + mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", -- cgit v1.3-14-g43fede From fc6a9f86f08acd3665f788619afae0d2b2d5a480 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 10 Mar 2020 10:22:28 +0200 Subject: {IB,net}/mlx5: Assign mkey variant in mlx5_ib only mkey variant is not required for mlx5_core use, move the mkey variant counter to mlx5_ib. Signed-off-by: Saeed Mahameed Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 1 + drivers/infiniband/hw/mlx5/mlx5_ib.h | 5 +++ drivers/infiniband/hw/mlx5/mr.c | 58 +++++++++++++++++++++----- drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/mr.c | 8 +--- include/linux/mlx5/driver.h | 4 -- 6 files changed, 55 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e4bcfa81b70a..fce863621414 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -6390,6 +6390,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->reset_flow_resource_lock); xa_init(&dev->odp_mkeys); xa_init(&dev->sig_mrs); + spin_lock_init(&dev->mkey_lock); spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d9bffcc93587..89a050e516a8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -992,6 +992,11 @@ struct mlx5_ib_dev { /* sync used page count stats */ struct mlx5_ib_resources devr; + + /* protect mkey key part */ + spinlock_t mkey_lock; + u8 mkey_key; + struct mlx5_mr_cache cache; struct timer_list delay_timer; /* Prevents soft lock on massive reg MRs */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 45c3282dd5e1..1b83d00e8ecd 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -47,6 +47,46 @@ enum { #define MLX5_UMR_ALIGN 2048 +static void +create_mkey_callback(int status, struct mlx5_async_work *context); + +static void +assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, + u32 *in) +{ + void *mkc; + u8 key; + + spin_lock_irq(&dev->mkey_lock); + key = dev->mkey_key++; + spin_unlock_irq(&dev->mkey_lock); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, mkey_7_0, key); + mkey->key = key; +} + +static int +mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, + u32 *in, int inlen) +{ + assign_mkey_variant(dev, mkey, in); + return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen); +} + +static int +mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, + struct mlx5_core_mkey *mkey, + struct mlx5_async_ctx *async_ctx, + u32 *in, int inlen, u32 *out, int outlen, + struct mlx5_async_work *context) +{ + assign_mkey_variant(dev, mkey, in); + return mlx5_core_create_mkey_cb(dev->mdev, mkey, async_ctx, + in, inlen, out, outlen, + create_mkey_callback, context); +} + static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); static int mr_cache_max_order(struct mlx5_ib_dev *dev); @@ -79,7 +119,7 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void reg_mr_callback(int status, struct mlx5_async_work *context) +static void create_mkey_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = container_of(context, struct mlx5_ib_mr, cb_work); @@ -160,10 +200,10 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, + err = mlx5_ib_create_mkey_cb(dev, &mr->mmkey, &dev->async_ctx, in, inlen, mr->out, sizeof(mr->out), - reg_mr_callback, &mr->cb_work); + &mr->cb_work); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -682,7 +722,6 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -704,7 +743,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) MLX5_SET(mkc, mkc, length64, 1); set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1094,7 +1133,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, get_octo_len(virt_addr, length, page_shift)); } - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; @@ -1134,7 +1173,6 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, { struct mlx5_ib_dev *dev = to_mdev(pd->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_mr *mr; void *mkc; u32 *in; @@ -1157,7 +1195,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, MLX5_SET64(mkc, mkc, len, length); set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1635,7 +1673,7 @@ static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen); if (err) goto err_free_descs; @@ -1902,7 +1940,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); - err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); + err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); if (err) goto free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f554cfddcf4e..6b38ec72215a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1282,7 +1282,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mutex_init(&priv->alloc_mutex); mutex_init(&priv->pgdir_mutex); INIT_LIST_HEAD(&priv->pgdir_list); - spin_lock_init(&priv->mkey_lock); priv->dbg_root = debugfs_create_dir(dev_name(dev->device), mlx5_debugfs_root); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 770d13bb4f20..51814d023efb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -49,14 +49,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, int err; u8 key; - spin_lock_irq(&dev->priv.mkey_lock); - key = dev->priv.mkey_key++; - spin_unlock_irq(&dev->priv.mkey_lock); - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); - MLX5_SET(mkc, mkc, mkey_7_0, key); - mkey->key = key; if (callback) return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, @@ -66,6 +59,7 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, if (err) return err; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); mkey->iova = MLX5_GET64(mkc, mkc, start_addr); mkey->size = MLX5_GET64(mkc, mkc, len); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f2b4225ed650..e044703c056b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -575,10 +575,6 @@ struct mlx5_priv { /* end: alloc staff */ struct dentry *dbg_root; - /* protect mkey key part */ - spinlock_t mkey_lock; - u8 mkey_key; - struct list_head dev_list; struct list_head ctx_list; spinlock_t ctx_lock; -- cgit v1.3-14-g43fede From a3cfdd3928113012d0f2c5353277f4e27878a663 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Tue, 10 Mar 2020 10:22:30 +0200 Subject: {IB,net}/mlx5: Move asynchronous mkey creation to mlx5_ib As mlx5_ib is the only user of the mlx5_core_create_mkey_cb, move the logic inside mlx5_ib and cleanup the code in mlx5_core. Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 22 +++------------------- include/linux/mlx5/driver.h | 6 ------ 3 files changed, 6 insertions(+), 28 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 1b83d00e8ecd..8508af500972 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -81,10 +81,10 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev, u32 *in, int inlen, u32 *out, int outlen, struct mlx5_async_work *context) { + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); assign_mkey_variant(dev, mkey, in); - return mlx5_core_create_mkey_cb(dev->mdev, mkey, async_ctx, - in, inlen, out, outlen, - create_mkey_callback, context); + return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, + create_mkey_callback, context); } static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 51814d023efb..fd3e6d217c3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -36,12 +36,9 @@ #include #include "mlx5_core.h" -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context) +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen) { u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; u32 mkey_index; @@ -51,10 +48,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); - if (callback) - return mlx5_cmd_exec_cb(async_ctx, in, inlen, out, outlen, - callback, context); - err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); if (err) return err; @@ -70,15 +63,6 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mkey_index, key, mkey->key); return 0; } -EXPORT_SYMBOL(mlx5_core_create_mkey_cb); - -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - u32 *in, int inlen) -{ - return mlx5_core_create_mkey_cb(dev, mkey, NULL, in, inlen, - NULL, 0, NULL, NULL); -} EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e044703c056b..1de78f001d26 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -943,12 +943,6 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, struct mlx5_cmd_mailbox *head); -int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_async_ctx *async_ctx, u32 *in, - int inlen, u32 *out, int outlen, - mlx5_async_cbk_t callback, - struct mlx5_async_work *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, u32 *in, int inlen); -- cgit v1.3-14-g43fede From 826096d84f509d95ee8f72728fe19c44fbb9df6b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 16 Mar 2020 13:34:52 -0700 Subject: mlx5: Remove uninitialized use of key in mlx5_core_create_mkey Clang warns: ../drivers/net/ethernet/mellanox/mlx5/core/mr.c:63:21: warning: variable 'key' is uninitialized when used here [-Wuninitialized] mkey_index, key, mkey->key); ^~~ ../drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h:54:6: note: expanded from macro 'mlx5_core_dbg' ##__VA_ARGS__) ^~~~~~~~~~~ ../include/linux/dev_printk.h:114:39: note: expanded from macro 'dev_dbg' dynamic_dev_dbg(dev, dev_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~~~~ ../include/linux/dynamic_debug.h:158:19: note: expanded from macro 'dynamic_dev_dbg' dev, fmt, ##__VA_ARGS__) ^~~~~~~~~~~ ../include/linux/dynamic_debug.h:143:56: note: expanded from macro '_dynamic_func_call' __dynamic_func_call(__UNIQUE_ID(ddebug), fmt, func, ##__VA_ARGS__) ^~~~~~~~~~~ ../include/linux/dynamic_debug.h:125:15: note: expanded from macro '__dynamic_func_call' func(&id, ##__VA_ARGS__); \ ^~~~~~~~~~~ ../drivers/net/ethernet/mellanox/mlx5/core/mr.c:47:8: note: initialize the variable 'key' to silence this warning u8 key; ^ = '\0' 1 warning generated. key's initialization was removed in commit fc6a9f86f08a ("{IB,net}/mlx5: Assign mkey variant in mlx5_ib only") but its use was not fully removed. Remove it now so that there is no more warning. Fixes: fc6a9f86f08a ("{IB,net}/mlx5: Assign mkey variant in mlx5_ib only") Link: https://github.com/ClangBuiltLinux/linux/issues/932 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index fd3e6d217c3b..366f2cbfc6db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -44,7 +44,6 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 mkey_index; void *mkc; int err; - u8 key; MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); @@ -59,8 +58,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, mkey->key |= mlx5_idx_to_mkey(mkey_index); mkey->pd = MLX5_GET(mkc, mkc, pd); - mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", - mkey_index, key, mkey->key); + mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key); return 0; } EXPORT_SYMBOL(mlx5_core_create_mkey); -- cgit v1.3-14-g43fede From 6838a35a4567de7ddefd5c2d09ccfa41d754e4ee Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Tue, 17 Mar 2020 11:28:28 +0200 Subject: net/mlx5: Use a separate work queue for fib event handling In VF lag mode when remove the bonding module without bring down the bond device first, we could potentially have circular dependency when we unload IB devices and also handle fib events: 1. The bond work starts first; 2. The "modprobe -rv bonding" process tries to release the bond device, with the "pernet_ops_rwsem" lock hold; 3. The bond work blocks in unregister_netdevice_notifier() and waits for the lock because fib event came right before; 4. The kernel fib module tries to free all the fib entries by broadcasting the "FIB_EVENT_NH_DEL" event; 5. Upon the fib event this lag_mp module holds the fib lock and queue a fib work. So: bond work -> modprobe task -> kernel fib module -> lag_mp -> bond work Today we either reload IB devices in roce lag in nic mode or either handle fib events in switchdev mode, but a new feature could change that we'll need to reload IB devices also in switchdev mode so this is a future proof fix as one may not notice this later. Signed-off-by: Mark Zhang Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c | 14 ++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 416676c35b1f..e9089a793632 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -93,9 +93,8 @@ static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, static void mlx5_lag_fib_event_flush(struct notifier_block *nb) { struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); - struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); - flush_workqueue(ldev->wq); + flush_workqueue(mp->wq); } struct mlx5_fib_event_work { @@ -293,7 +292,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, return NOTIFY_DONE; } - queue_work(ldev->wq, &fib_work->work); + queue_work(mp->wq, &fib_work->work); return NOTIFY_DONE; } @@ -306,11 +305,17 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) if (mp->fib_nb.notifier_call) return 0; + mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); + if (!mp->wq) + return -ENOMEM; + mp->fib_nb.notifier_call = mlx5_lag_fib_event; err = register_fib_notifier(&init_net, &mp->fib_nb, mlx5_lag_fib_event_flush, NULL); - if (err) + if (err) { + destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; + } return err; } @@ -323,5 +328,6 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) return; unregister_fib_notifier(&init_net, &mp->fib_nb); + destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h index 79be89e9c7a4..258ac7b2964e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h @@ -16,6 +16,7 @@ enum mlx5_lag_port_affinity { struct lag_mp { struct notifier_block fib_nb; struct fib_info *mfi; /* used in tracking fib events */ + struct workqueue_struct *wq; }; #ifdef CONFIG_MLX5_ESWITCH -- cgit v1.3-14-g43fede From 49964352ca279acd2ae01321e86a55e50d48bf1d Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 13 Mar 2020 18:27:35 -0700 Subject: net/mlx5: E-Switch: Move eswitch chains to a new directory eswitch_offloads_chains.{c,h} were just introduced this kernel release cycle, eswitch is in high development demand right now and many features are planned to be added to it. eswitch deserves its own directory and here we move these new files to there, in preparation for upcoming eswitch features and new files. Signed-off-by: Saeed Mahameed Reviewed-by: Mark Bloch Reviewed-by: Roi Dayan --- drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- .../net/ethernet/mellanox/mlx5/core/esw/Makefile | 2 + .../net/ethernet/mellanox/mlx5/core/esw/chains.c | 944 +++++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/esw/chains.h | 49 ++ .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- .../mellanox/mlx5/core/eswitch_offloads_chains.c | 944 --------------------- .../mellanox/mlx5/core/eswitch_offloads_chains.h | 47 - 10 files changed, 1000 insertions(+), 996 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 7408ae380d23..6d32915000fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -43,7 +43,7 @@ mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o eswitch_offloads_chains.o + ecpf.o rdma.o esw/chains.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a22ad6b90847..f4b28eb9d943 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -13,11 +13,11 @@ #include #include +#include "esw/chains.h" #include "en/tc_ct.h" #include "en.h" #include "en_tc.h" #include "en_rep.h" -#include "eswitch_offloads_chains.h" #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8) #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index d7fa89276ea3..559453b4c6b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -41,7 +41,7 @@ #include #include "eswitch.h" -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" #include "en.h" #include "en_rep.h" #include "en_tc.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 901b5fa5568f..6474e0a01a54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -51,7 +51,7 @@ #include "en_rep.h" #include "en_tc.h" #include "eswitch.h" -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" #include "fs_core.h" #include "en/port.h" #include "en/tc_tun.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile new file mode 100644 index 000000000000..c78512eed8d7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c new file mode 100644 index 000000000000..029001040737 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c @@ -0,0 +1,944 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2020 Mellanox Technologies. + +#include +#include +#include + +#include "esw/chains.h" +#include "en/mapping.h" +#include "mlx5_core.h" +#include "fs_core.h" +#include "eswitch.h" +#include "en.h" +#include "en_tc.h" + +#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv) +#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock) +#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht) +#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping) +#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht) +#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left) +#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb) +#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb) +#define fdb_ignore_flow_level_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) +#define fdb_modify_header_fwd_to_table_supported(esw) \ + (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) + +/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), + * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated + * for each flow table pool. We can allocate up to 16M of each pool, + * and we keep track of how much we used via get_next_avail_sz_from_pool. + * Firmware doesn't report any of this for now. + * ESW_POOL is expected to be sorted from large to small and match firmware + * pools. + */ +#define ESW_SIZE (16 * 1024 * 1024) +static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, + 1 * 1024 * 1024, + 64 * 1024, + 128 }; +#define ESW_FT_TBL_SZ (64 * 1024) + +struct mlx5_esw_chains_priv { + struct rhashtable chains_ht; + struct rhashtable prios_ht; + /* Protects above chains_ht and prios_ht */ + struct mutex lock; + + struct mlx5_flow_table *tc_end_fdb; + struct mapping_ctx *chains_mapping; + + int fdb_left[ARRAY_SIZE(ESW_POOLS)]; +}; + +struct fdb_chain { + struct rhash_head node; + + u32 chain; + + int ref; + int id; + + struct mlx5_eswitch *esw; + struct list_head prios_list; + struct mlx5_flow_handle *restore_rule; + struct mlx5_modify_hdr *miss_modify_hdr; +}; + +struct fdb_prio_key { + u32 chain; + u32 prio; + u32 level; +}; + +struct fdb_prio { + struct rhash_head node; + struct list_head list; + + struct fdb_prio_key key; + + int ref; + + struct fdb_chain *fdb_chain; + struct mlx5_flow_table *fdb; + struct mlx5_flow_table *next_fdb; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_handle *miss_rule; +}; + +static const struct rhashtable_params chain_params = { + .head_offset = offsetof(struct fdb_chain, node), + .key_offset = offsetof(struct fdb_chain, chain), + .key_len = sizeof_field(struct fdb_chain, chain), + .automatic_shrinking = true, +}; + +static const struct rhashtable_params prio_params = { + .head_offset = offsetof(struct fdb_prio, node), + .key_offset = offsetof(struct fdb_prio, key), + .key_len = sizeof_field(struct fdb_prio, key), + .automatic_shrinking = true, +}; + +bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw) +{ + return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; +} + +bool mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw) +{ + return mlx5_esw_chains_prios_supported(esw) && + fdb_ignore_flow_level_supported(esw); +} + +u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_chains_prios_supported(esw)) + return 1; + + if (fdb_ignore_flow_level_supported(esw)) + return UINT_MAX - 1; + + return FDB_TC_MAX_CHAIN; +} + +u32 mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw) +{ + return mlx5_esw_chains_get_chain_range(esw) + 1; +} + +u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_chains_prios_supported(esw)) + return 1; + + if (fdb_ignore_flow_level_supported(esw)) + return UINT_MAX; + + return FDB_TC_MAX_PRIO; +} + +static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) +{ + if (fdb_ignore_flow_level_supported(esw)) + return UINT_MAX; + + return FDB_TC_LEVELS_PER_PRIO; +} + +#define POOL_NEXT_SIZE 0 +static int +mlx5_esw_chains_get_avail_sz_from_pool(struct mlx5_eswitch *esw, + int desired_size) +{ + int i, found_i = -1; + + for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) { + if (fdb_pool_left(esw)[i] && ESW_POOLS[i] > desired_size) { + found_i = i; + if (desired_size != POOL_NEXT_SIZE) + break; + } + } + + if (found_i != -1) { + --fdb_pool_left(esw)[found_i]; + return ESW_POOLS[found_i]; + } + + return 0; +} + +static void +mlx5_esw_chains_put_sz_to_pool(struct mlx5_eswitch *esw, int sz) +{ + int i; + + for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) { + if (sz == ESW_POOLS[i]) { + ++fdb_pool_left(esw)[i]; + return; + } + } + + WARN_ONCE(1, "Couldn't find size %d in fdb size pool", sz); +} + +static void +mlx5_esw_chains_init_sz_pool(struct mlx5_eswitch *esw) +{ + u32 fdb_max; + int i; + + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, log_max_ft_size); + + for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) + fdb_pool_left(esw)[i] = + ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; +} + +static struct mlx5_flow_table * +mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw, + u32 chain, u32 prio, u32 level) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *fdb; + int sz; + + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + sz = (chain == mlx5_esw_chains_get_ft_chain(esw)) ? + mlx5_esw_chains_get_avail_sz_from_pool(esw, ESW_FT_TBL_SZ) : + mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE); + if (!sz) + return ERR_PTR(-ENOSPC); + ft_attr.max_fte = sz; + + /* We use tc_slow_fdb(esw) as the table's next_ft till + * ignore_flow_level is allowed on FT creation and not just for FTEs. + * Instead caller should add an explicit miss rule if needed. + */ + ft_attr.next_ft = tc_slow_fdb(esw); + + /* The root table(chain 0, prio 1, level 0) is required to be + * connected to the previous prio (FDB_BYPASS_PATH if exists). + * We always create it, as a managed table, in order to align with + * fs_core logic. + */ + if (!fdb_ignore_flow_level_supported(esw) || + (chain == 0 && prio == 1 && level == 0)) { + ft_attr.level = level; + ft_attr.prio = prio - 1; + ns = mlx5_get_fdb_sub_ns(esw->dev, chain); + } else { + ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED; + ft_attr.prio = FDB_TC_OFFLOAD; + /* Firmware doesn't allow us to create another level 0 table, + * so we create all unmanaged tables as level 1. + * + * To connect them, we use explicit miss rules with + * ignore_flow_level. Caller is responsible to create + * these rules (if needed). + */ + ft_attr.level = 1; + ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); + } + + ft_attr.autogroup.num_reserved_entries = 2; + ft_attr.autogroup.max_num_groups = esw->params.large_group_num; + fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, + "Failed to create FDB table err %d (chain: %d, prio: %d, level: %d, size: %d)\n", + (int)PTR_ERR(fdb), chain, prio, level, sz); + mlx5_esw_chains_put_sz_to_pool(esw, sz); + return fdb; + } + + return fdb; +} + +static void +mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb) +{ + mlx5_esw_chains_put_sz_to_pool(esw, fdb->max_fte); + mlx5_destroy_flow_table(fdb); +} + +static int +create_fdb_chain_restore(struct fdb_chain *fdb_chain) +{ + char modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)]; + struct mlx5_eswitch *esw = fdb_chain->esw; + struct mlx5_modify_hdr *mod_hdr; + u32 index; + int err; + + if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw) || + !mlx5_esw_chains_prios_supported(esw)) + return 0; + + err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index); + if (err) + return err; + if (index == MLX5_FS_DEFAULT_FLOW_TAG) { + /* we got the special default flow tag id, so we won't know + * if we actually marked the packet with the restore rule + * we create. + * + * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. + */ + err = mapping_add(esw_chains_mapping(esw), + &fdb_chain->chain, &index); + mapping_remove(esw_chains_mapping(esw), + MLX5_FS_DEFAULT_FLOW_TAG); + if (err) + return err; + } + + fdb_chain->id = index; + + MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, modact, field, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield); + MLX5_SET(set_action_in, modact, offset, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8); + MLX5_SET(set_action_in, modact, length, + mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8); + MLX5_SET(set_action_in, modact, data, fdb_chain->id); + mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, + 1, modact); + if (IS_ERR(mod_hdr)) { + err = PTR_ERR(mod_hdr); + goto err_mod_hdr; + } + fdb_chain->miss_modify_hdr = mod_hdr; + + fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id); + if (IS_ERR(fdb_chain->restore_rule)) { + err = PTR_ERR(fdb_chain->restore_rule); + goto err_rule; + } + + return 0; + +err_rule: + mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); +err_mod_hdr: + /* Datapath can't find this mapping, so we can safely remove it */ + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); + return err; +} + +static void destroy_fdb_chain_restore(struct fdb_chain *fdb_chain) +{ + struct mlx5_eswitch *esw = fdb_chain->esw; + + if (!fdb_chain->miss_modify_hdr) + return; + + mlx5_del_flow_rules(fdb_chain->restore_rule); + mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); + mapping_remove(esw_chains_mapping(esw), fdb_chain->id); +} + +static struct fdb_chain * +mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) +{ + struct fdb_chain *fdb_chain = NULL; + int err; + + fdb_chain = kvzalloc(sizeof(*fdb_chain), GFP_KERNEL); + if (!fdb_chain) + return ERR_PTR(-ENOMEM); + + fdb_chain->esw = esw; + fdb_chain->chain = chain; + INIT_LIST_HEAD(&fdb_chain->prios_list); + + err = create_fdb_chain_restore(fdb_chain); + if (err) + goto err_restore; + + err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node, + chain_params); + if (err) + goto err_insert; + + return fdb_chain; + +err_insert: + destroy_fdb_chain_restore(fdb_chain); +err_restore: + kvfree(fdb_chain); + return ERR_PTR(err); +} + +static void +mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain) +{ + struct mlx5_eswitch *esw = fdb_chain->esw; + + rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node, + chain_params); + + destroy_fdb_chain_restore(fdb_chain); + kvfree(fdb_chain); +} + +static struct fdb_chain * +mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain) +{ + struct fdb_chain *fdb_chain; + + fdb_chain = rhashtable_lookup_fast(&esw_chains_ht(esw), &chain, + chain_params); + if (!fdb_chain) { + fdb_chain = mlx5_esw_chains_create_fdb_chain(esw, chain); + if (IS_ERR(fdb_chain)) + return fdb_chain; + } + + fdb_chain->ref++; + + return fdb_chain; +} + +static struct mlx5_flow_handle * +mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain, + struct mlx5_flow_table *fdb, + struct mlx5_flow_table *next_fdb) +{ + struct mlx5_eswitch *esw = fdb_chain->esw; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act act = {}; + + act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; + act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = next_fdb; + + if (next_fdb == tc_end_fdb(esw) && + mlx5_esw_chains_prios_supported(esw)) { + act.modify_hdr = fdb_chain->miss_modify_hdr; + act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + } + + return mlx5_add_flow_rules(fdb, NULL, &act, &dest, 1); +} + +static int +mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio, + struct mlx5_flow_table *next_fdb) +{ + struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {}; + struct fdb_chain *fdb_chain = fdb_prio->fdb_chain; + struct fdb_prio *pos; + int n = 0, err; + + if (fdb_prio->key.level) + return 0; + + /* Iterate in reverse order until reaching the level 0 rule of + * the previous priority, adding all the miss rules first, so we can + * revert them if any of them fails. + */ + pos = fdb_prio; + list_for_each_entry_continue_reverse(pos, + &fdb_chain->prios_list, + list) { + miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain, + pos->fdb, + next_fdb); + if (IS_ERR(miss_rules[n])) { + err = PTR_ERR(miss_rules[n]); + goto err_prev_rule; + } + + n++; + if (!pos->key.level) + break; + } + + /* Success, delete old miss rules, and update the pointers. */ + n = 0; + pos = fdb_prio; + list_for_each_entry_continue_reverse(pos, + &fdb_chain->prios_list, + list) { + mlx5_del_flow_rules(pos->miss_rule); + + pos->miss_rule = miss_rules[n]; + pos->next_fdb = next_fdb; + + n++; + if (!pos->key.level) + break; + } + + return 0; + +err_prev_rule: + while (--n >= 0) + mlx5_del_flow_rules(miss_rules[n]); + + return err; +} + +static void +mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain) +{ + if (--fdb_chain->ref == 0) + mlx5_esw_chains_destroy_fdb_chain(fdb_chain); +} + +static struct fdb_prio * +mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw, + u32 chain, u32 prio, u32 level) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_handle *miss_rule = NULL; + struct mlx5_flow_group *miss_group; + struct fdb_prio *fdb_prio = NULL; + struct mlx5_flow_table *next_fdb; + struct fdb_chain *fdb_chain; + struct mlx5_flow_table *fdb; + struct list_head *pos; + u32 *flow_group_in; + int err; + + fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain); + if (IS_ERR(fdb_chain)) + return ERR_CAST(fdb_chain); + + fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL); + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!fdb_prio || !flow_group_in) { + err = -ENOMEM; + goto err_alloc; + } + + /* Chain's prio list is sorted by prio and level. + * And all levels of some prio point to the next prio's level 0. + * Example list (prio, level): + * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0) + * In hardware, we will we have the following pointers: + * (3,0) -> (5,0) -> (7,0) -> Slow path + * (3,1) -> (5,0) + * (5,1) -> (7,0) + * (6,1) -> (7,0) + */ + + /* Default miss for each chain: */ + next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ? + tc_slow_fdb(esw) : + tc_end_fdb(esw); + list_for_each(pos, &fdb_chain->prios_list) { + struct fdb_prio *p = list_entry(pos, struct fdb_prio, list); + + /* exit on first pos that is larger */ + if (prio < p->key.prio || (prio == p->key.prio && + level < p->key.level)) { + /* Get next level 0 table */ + next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb; + break; + } + } + + fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + goto err_create; + } + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, + fdb->max_fte - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + fdb->max_fte - 1); + miss_group = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(miss_group)) { + err = PTR_ERR(miss_group); + goto err_group; + } + + /* Add miss rule to next_fdb */ + miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb); + if (IS_ERR(miss_rule)) { + err = PTR_ERR(miss_rule); + goto err_miss_rule; + } + + fdb_prio->miss_group = miss_group; + fdb_prio->miss_rule = miss_rule; + fdb_prio->next_fdb = next_fdb; + fdb_prio->fdb_chain = fdb_chain; + fdb_prio->key.chain = chain; + fdb_prio->key.prio = prio; + fdb_prio->key.level = level; + fdb_prio->fdb = fdb; + + err = rhashtable_insert_fast(&esw_prios_ht(esw), &fdb_prio->node, + prio_params); + if (err) + goto err_insert; + + list_add(&fdb_prio->list, pos->prev); + + /* Table is ready, connect it */ + err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb); + if (err) + goto err_update; + + kvfree(flow_group_in); + return fdb_prio; + +err_update: + list_del(&fdb_prio->list); + rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node, + prio_params); +err_insert: + mlx5_del_flow_rules(miss_rule); +err_miss_rule: + mlx5_destroy_flow_group(miss_group); +err_group: + mlx5_esw_chains_destroy_fdb_table(esw, fdb); +err_create: +err_alloc: + kvfree(fdb_prio); + kvfree(flow_group_in); + mlx5_esw_chains_put_fdb_chain(fdb_chain); + return ERR_PTR(err); +} + +static void +mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw, + struct fdb_prio *fdb_prio) +{ + struct fdb_chain *fdb_chain = fdb_prio->fdb_chain; + + WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio, + fdb_prio->next_fdb)); + + list_del(&fdb_prio->list); + rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node, + prio_params); + mlx5_del_flow_rules(fdb_prio->miss_rule); + mlx5_destroy_flow_group(fdb_prio->miss_group); + mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb); + mlx5_esw_chains_put_fdb_chain(fdb_chain); + kvfree(fdb_prio); +} + +struct mlx5_flow_table * +mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level) +{ + struct mlx5_flow_table *prev_fts; + struct fdb_prio *fdb_prio; + struct fdb_prio_key key; + int l = 0; + + if ((chain > mlx5_esw_chains_get_chain_range(esw) && + chain != mlx5_esw_chains_get_ft_chain(esw)) || + prio > mlx5_esw_chains_get_prio_range(esw) || + level > mlx5_esw_chains_get_level_range(esw)) + return ERR_PTR(-EOPNOTSUPP); + + /* create earlier levels for correct fs_core lookup when + * connecting tables. + */ + for (l = 0; l < level; l++) { + prev_fts = mlx5_esw_chains_get_table(esw, chain, prio, l); + if (IS_ERR(prev_fts)) { + fdb_prio = ERR_CAST(prev_fts); + goto err_get_prevs; + } + } + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&esw_chains_lock(esw)); + fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key, + prio_params); + if (!fdb_prio) { + fdb_prio = mlx5_esw_chains_create_fdb_prio(esw, chain, + prio, level); + if (IS_ERR(fdb_prio)) + goto err_create_prio; + } + + ++fdb_prio->ref; + mutex_unlock(&esw_chains_lock(esw)); + + return fdb_prio->fdb; + +err_create_prio: + mutex_unlock(&esw_chains_lock(esw)); +err_get_prevs: + while (--l >= 0) + mlx5_esw_chains_put_table(esw, chain, prio, l); + return ERR_CAST(fdb_prio); +} + +void +mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level) +{ + struct fdb_prio *fdb_prio; + struct fdb_prio_key key; + + key.chain = chain; + key.prio = prio; + key.level = level; + + mutex_lock(&esw_chains_lock(esw)); + fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key, + prio_params); + if (!fdb_prio) + goto err_get_prio; + + if (--fdb_prio->ref == 0) + mlx5_esw_chains_destroy_fdb_prio(esw, fdb_prio); + mutex_unlock(&esw_chains_lock(esw)); + + while (level-- > 0) + mlx5_esw_chains_put_table(esw, chain, prio, level); + + return; + +err_get_prio: + mutex_unlock(&esw_chains_lock(esw)); + WARN_ONCE(1, + "Couldn't find table: (chain: %d prio: %d level: %d)", + chain, prio, level); +} + +struct mlx5_flow_table * +mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) +{ + return tc_end_fdb(esw); +} + +struct mlx5_flow_table * +mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw) +{ + u32 chain, prio, level; + int err; + + if (!fdb_ignore_flow_level_supported(esw)) { + err = -EOPNOTSUPP; + + esw_warn(esw->dev, + "Couldn't create global flow table, ignore_flow_level not supported."); + goto err_ignore; + } + + chain = mlx5_esw_chains_get_chain_range(esw), + prio = mlx5_esw_chains_get_prio_range(esw); + level = mlx5_esw_chains_get_level_range(esw); + + return mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); + +err_ignore: + return ERR_PTR(err); +} + +void +mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft) +{ + mlx5_esw_chains_destroy_fdb_table(esw, ft); +} + +static int +mlx5_esw_chains_init(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_chains_priv *chains_priv; + struct mlx5_core_dev *dev = esw->dev; + u32 max_flow_counter, fdb_max; + struct mapping_ctx *mapping; + int err; + + chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); + if (!chains_priv) + return -ENOMEM; + esw_chains_priv(esw) = chains_priv; + + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + + esw_debug(dev, + "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n", + max_flow_counter, esw->params.large_group_num, fdb_max); + + mlx5_esw_chains_init_sz_pool(esw); + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); + } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Tc chains and priorities offload aren't supported\n"); + } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { + /* Disabled when ttl workaround is needed, e.g + * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig + */ + esw_warn(dev, + "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n"); + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + } else { + esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", + mlx5_esw_chains_get_chain_range(esw), + mlx5_esw_chains_get_prio_range(esw)); + } + + err = rhashtable_init(&esw_chains_ht(esw), &chain_params); + if (err) + goto init_chains_ht_err; + + err = rhashtable_init(&esw_prios_ht(esw), &prio_params); + if (err) + goto init_prios_ht_err; + + mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw), + true); + if (IS_ERR(mapping)) { + err = PTR_ERR(mapping); + goto mapping_err; + } + esw_chains_mapping(esw) = mapping; + + mutex_init(&esw_chains_lock(esw)); + + return 0; + +mapping_err: + rhashtable_destroy(&esw_prios_ht(esw)); +init_prios_ht_err: + rhashtable_destroy(&esw_chains_ht(esw)); +init_chains_ht_err: + kfree(chains_priv); + return err; +} + +static void +mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw) +{ + mutex_destroy(&esw_chains_lock(esw)); + mapping_destroy(esw_chains_mapping(esw)); + rhashtable_destroy(&esw_prios_ht(esw)); + rhashtable_destroy(&esw_chains_ht(esw)); + + kfree(esw_chains_priv(esw)); +} + +static int +mlx5_esw_chains_open(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_table *ft; + int err; + + /* Create tc_end_fdb(esw) which is the always created ft chain */ + ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw), + 1, 0); + if (IS_ERR(ft)) + return PTR_ERR(ft); + + tc_end_fdb(esw) = ft; + + /* Always open the root for fast path */ + ft = mlx5_esw_chains_get_table(esw, 0, 1, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto level_0_err; + } + + /* Open level 1 for split rules now if prios isn't supported */ + if (!mlx5_esw_chains_prios_supported(esw)) { + err = mlx5_esw_vport_tbl_get(esw); + if (err) + goto level_1_err; + } + + return 0; + +level_1_err: + mlx5_esw_chains_put_table(esw, 0, 1, 0); +level_0_err: + mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); + return err; +} + +static void +mlx5_esw_chains_close(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_chains_prios_supported(esw)) + mlx5_esw_vport_tbl_put(esw); + mlx5_esw_chains_put_table(esw, 0, 1, 0); + mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); +} + +int +mlx5_esw_chains_create(struct mlx5_eswitch *esw) +{ + int err; + + err = mlx5_esw_chains_init(esw); + if (err) + return err; + + err = mlx5_esw_chains_open(esw); + if (err) + goto err_open; + + return 0; + +err_open: + mlx5_esw_chains_cleanup(esw); + return err; +} + +void +mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) +{ + mlx5_esw_chains_close(esw); + mlx5_esw_chains_cleanup(esw); +} + +int +mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, + u32 *chain_mapping) +{ + return mapping_add(esw_chains_mapping(esw), &chain, chain_mapping); +} + +int +mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, u32 chain_mapping) +{ + return mapping_remove(esw_chains_mapping(esw), chain_mapping); +} + +int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, + u32 *chain) +{ + int err; + + err = mapping_find(esw_chains_mapping(esw), tag, chain); + if (err) { + esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag); + return -ENOENT; + } + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h new file mode 100644 index 000000000000..f8c4239846ea --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_ESW_CHAINS_H__ +#define __ML5_ESW_CHAINS_H__ + +#include "eswitch.h" + +bool +mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw); +bool +mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw); +u32 +mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw); +u32 +mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw); +u32 +mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw); + +struct mlx5_flow_table * +mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level); +void +mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, + u32 level); + +struct mlx5_flow_table * +mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw); + +struct mlx5_flow_table * +mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw); +void +mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft); + +int +mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, + u32 *chain_mapping); +int +mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, + u32 chain_mapping); + +int mlx5_esw_chains_create(struct mlx5_eswitch *esw); +void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw); + +int +mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain); + +#endif /* __ML5_ESW_CHAINS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 612bc7d1cdcd..f171eb2234b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -37,7 +37,7 @@ #include #include "mlx5_core.h" #include "eswitch.h" -#include "eswitch_offloads_chains.h" +#include "esw/chains.h" #include "rdma.h" #include "en.h" #include "fs_core.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c deleted file mode 100644 index 184cea62254f..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.c +++ /dev/null @@ -1,944 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -// Copyright (c) 2020 Mellanox Technologies. - -#include -#include -#include - -#include "eswitch_offloads_chains.h" -#include "en/mapping.h" -#include "mlx5_core.h" -#include "fs_core.h" -#include "eswitch.h" -#include "en.h" -#include "en_tc.h" - -#define esw_chains_priv(esw) ((esw)->fdb_table.offloads.esw_chains_priv) -#define esw_chains_lock(esw) (esw_chains_priv(esw)->lock) -#define esw_chains_ht(esw) (esw_chains_priv(esw)->chains_ht) -#define esw_chains_mapping(esw) (esw_chains_priv(esw)->chains_mapping) -#define esw_prios_ht(esw) (esw_chains_priv(esw)->prios_ht) -#define fdb_pool_left(esw) (esw_chains_priv(esw)->fdb_left) -#define tc_slow_fdb(esw) ((esw)->fdb_table.offloads.slow_fdb) -#define tc_end_fdb(esw) (esw_chains_priv(esw)->tc_end_fdb) -#define fdb_ignore_flow_level_supported(esw) \ - (MLX5_CAP_ESW_FLOWTABLE_FDB((esw)->dev, ignore_flow_level)) -#define fdb_modify_header_fwd_to_table_supported(esw) \ - (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table)) - -/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), - * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated - * for each flow table pool. We can allocate up to 16M of each pool, - * and we keep track of how much we used via get_next_avail_sz_from_pool. - * Firmware doesn't report any of this for now. - * ESW_POOL is expected to be sorted from large to small and match firmware - * pools. - */ -#define ESW_SIZE (16 * 1024 * 1024) -static const unsigned int ESW_POOLS[] = { 4 * 1024 * 1024, - 1 * 1024 * 1024, - 64 * 1024, - 128 }; -#define ESW_FT_TBL_SZ (64 * 1024) - -struct mlx5_esw_chains_priv { - struct rhashtable chains_ht; - struct rhashtable prios_ht; - /* Protects above chains_ht and prios_ht */ - struct mutex lock; - - struct mlx5_flow_table *tc_end_fdb; - struct mapping_ctx *chains_mapping; - - int fdb_left[ARRAY_SIZE(ESW_POOLS)]; -}; - -struct fdb_chain { - struct rhash_head node; - - u32 chain; - - int ref; - int id; - - struct mlx5_eswitch *esw; - struct list_head prios_list; - struct mlx5_flow_handle *restore_rule; - struct mlx5_modify_hdr *miss_modify_hdr; -}; - -struct fdb_prio_key { - u32 chain; - u32 prio; - u32 level; -}; - -struct fdb_prio { - struct rhash_head node; - struct list_head list; - - struct fdb_prio_key key; - - int ref; - - struct fdb_chain *fdb_chain; - struct mlx5_flow_table *fdb; - struct mlx5_flow_table *next_fdb; - struct mlx5_flow_group *miss_group; - struct mlx5_flow_handle *miss_rule; -}; - -static const struct rhashtable_params chain_params = { - .head_offset = offsetof(struct fdb_chain, node), - .key_offset = offsetof(struct fdb_chain, chain), - .key_len = sizeof_field(struct fdb_chain, chain), - .automatic_shrinking = true, -}; - -static const struct rhashtable_params prio_params = { - .head_offset = offsetof(struct fdb_prio, node), - .key_offset = offsetof(struct fdb_prio, key), - .key_len = sizeof_field(struct fdb_prio, key), - .automatic_shrinking = true, -}; - -bool mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw) -{ - return esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; -} - -bool mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw) -{ - return mlx5_esw_chains_prios_supported(esw) && - fdb_ignore_flow_level_supported(esw); -} - -u32 mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw) -{ - if (!mlx5_esw_chains_prios_supported(esw)) - return 1; - - if (fdb_ignore_flow_level_supported(esw)) - return UINT_MAX - 1; - - return FDB_TC_MAX_CHAIN; -} - -u32 mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw) -{ - return mlx5_esw_chains_get_chain_range(esw) + 1; -} - -u32 mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw) -{ - if (!mlx5_esw_chains_prios_supported(esw)) - return 1; - - if (fdb_ignore_flow_level_supported(esw)) - return UINT_MAX; - - return FDB_TC_MAX_PRIO; -} - -static unsigned int mlx5_esw_chains_get_level_range(struct mlx5_eswitch *esw) -{ - if (fdb_ignore_flow_level_supported(esw)) - return UINT_MAX; - - return FDB_TC_LEVELS_PER_PRIO; -} - -#define POOL_NEXT_SIZE 0 -static int -mlx5_esw_chains_get_avail_sz_from_pool(struct mlx5_eswitch *esw, - int desired_size) -{ - int i, found_i = -1; - - for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) { - if (fdb_pool_left(esw)[i] && ESW_POOLS[i] > desired_size) { - found_i = i; - if (desired_size != POOL_NEXT_SIZE) - break; - } - } - - if (found_i != -1) { - --fdb_pool_left(esw)[found_i]; - return ESW_POOLS[found_i]; - } - - return 0; -} - -static void -mlx5_esw_chains_put_sz_to_pool(struct mlx5_eswitch *esw, int sz) -{ - int i; - - for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) { - if (sz == ESW_POOLS[i]) { - ++fdb_pool_left(esw)[i]; - return; - } - } - - WARN_ONCE(1, "Couldn't find size %d in fdb size pool", sz); -} - -static void -mlx5_esw_chains_init_sz_pool(struct mlx5_eswitch *esw) -{ - u32 fdb_max; - int i; - - fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, log_max_ft_size); - - for (i = ARRAY_SIZE(ESW_POOLS) - 1; i >= 0; i--) - fdb_pool_left(esw)[i] = - ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; -} - -static struct mlx5_flow_table * -mlx5_esw_chains_create_fdb_table(struct mlx5_eswitch *esw, - u32 chain, u32 prio, u32 level) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_namespace *ns; - struct mlx5_flow_table *fdb; - int sz; - - if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) - ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | - MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - - sz = (chain == mlx5_esw_chains_get_ft_chain(esw)) ? - mlx5_esw_chains_get_avail_sz_from_pool(esw, ESW_FT_TBL_SZ) : - mlx5_esw_chains_get_avail_sz_from_pool(esw, POOL_NEXT_SIZE); - if (!sz) - return ERR_PTR(-ENOSPC); - ft_attr.max_fte = sz; - - /* We use tc_slow_fdb(esw) as the table's next_ft till - * ignore_flow_level is allowed on FT creation and not just for FTEs. - * Instead caller should add an explicit miss rule if needed. - */ - ft_attr.next_ft = tc_slow_fdb(esw); - - /* The root table(chain 0, prio 1, level 0) is required to be - * connected to the previous prio (FDB_BYPASS_PATH if exists). - * We always create it, as a managed table, in order to align with - * fs_core logic. - */ - if (!fdb_ignore_flow_level_supported(esw) || - (chain == 0 && prio == 1 && level == 0)) { - ft_attr.level = level; - ft_attr.prio = prio - 1; - ns = mlx5_get_fdb_sub_ns(esw->dev, chain); - } else { - ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED; - ft_attr.prio = FDB_TC_OFFLOAD; - /* Firmware doesn't allow us to create another level 0 table, - * so we create all unmanaged tables as level 1. - * - * To connect them, we use explicit miss rules with - * ignore_flow_level. Caller is responsible to create - * these rules (if needed). - */ - ft_attr.level = 1; - ns = mlx5_get_flow_namespace(esw->dev, MLX5_FLOW_NAMESPACE_FDB); - } - - ft_attr.autogroup.num_reserved_entries = 2; - ft_attr.autogroup.max_num_groups = esw->params.large_group_num; - fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(fdb)) { - esw_warn(esw->dev, - "Failed to create FDB table err %d (chain: %d, prio: %d, level: %d, size: %d)\n", - (int)PTR_ERR(fdb), chain, prio, level, sz); - mlx5_esw_chains_put_sz_to_pool(esw, sz); - return fdb; - } - - return fdb; -} - -static void -mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw, - struct mlx5_flow_table *fdb) -{ - mlx5_esw_chains_put_sz_to_pool(esw, fdb->max_fte); - mlx5_destroy_flow_table(fdb); -} - -static int -create_fdb_chain_restore(struct fdb_chain *fdb_chain) -{ - char modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)]; - struct mlx5_eswitch *esw = fdb_chain->esw; - struct mlx5_modify_hdr *mod_hdr; - u32 index; - int err; - - if (fdb_chain->chain == mlx5_esw_chains_get_ft_chain(esw) || - !mlx5_esw_chains_prios_supported(esw)) - return 0; - - err = mapping_add(esw_chains_mapping(esw), &fdb_chain->chain, &index); - if (err) - return err; - if (index == MLX5_FS_DEFAULT_FLOW_TAG) { - /* we got the special default flow tag id, so we won't know - * if we actually marked the packet with the restore rule - * we create. - * - * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0. - */ - err = mapping_add(esw_chains_mapping(esw), - &fdb_chain->chain, &index); - mapping_remove(esw_chains_mapping(esw), - MLX5_FS_DEFAULT_FLOW_TAG); - if (err) - return err; - } - - fdb_chain->id = index; - - MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); - MLX5_SET(set_action_in, modact, field, - mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mfield); - MLX5_SET(set_action_in, modact, offset, - mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].moffset * 8); - MLX5_SET(set_action_in, modact, length, - mlx5e_tc_attr_to_reg_mappings[CHAIN_TO_REG].mlen * 8); - MLX5_SET(set_action_in, modact, data, fdb_chain->id); - mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, - 1, modact); - if (IS_ERR(mod_hdr)) { - err = PTR_ERR(mod_hdr); - goto err_mod_hdr; - } - fdb_chain->miss_modify_hdr = mod_hdr; - - fdb_chain->restore_rule = esw_add_restore_rule(esw, fdb_chain->id); - if (IS_ERR(fdb_chain->restore_rule)) { - err = PTR_ERR(fdb_chain->restore_rule); - goto err_rule; - } - - return 0; - -err_rule: - mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); -err_mod_hdr: - /* Datapath can't find this mapping, so we can safely remove it */ - mapping_remove(esw_chains_mapping(esw), fdb_chain->id); - return err; -} - -static void destroy_fdb_chain_restore(struct fdb_chain *fdb_chain) -{ - struct mlx5_eswitch *esw = fdb_chain->esw; - - if (!fdb_chain->miss_modify_hdr) - return; - - mlx5_del_flow_rules(fdb_chain->restore_rule); - mlx5_modify_header_dealloc(esw->dev, fdb_chain->miss_modify_hdr); - mapping_remove(esw_chains_mapping(esw), fdb_chain->id); -} - -static struct fdb_chain * -mlx5_esw_chains_create_fdb_chain(struct mlx5_eswitch *esw, u32 chain) -{ - struct fdb_chain *fdb_chain = NULL; - int err; - - fdb_chain = kvzalloc(sizeof(*fdb_chain), GFP_KERNEL); - if (!fdb_chain) - return ERR_PTR(-ENOMEM); - - fdb_chain->esw = esw; - fdb_chain->chain = chain; - INIT_LIST_HEAD(&fdb_chain->prios_list); - - err = create_fdb_chain_restore(fdb_chain); - if (err) - goto err_restore; - - err = rhashtable_insert_fast(&esw_chains_ht(esw), &fdb_chain->node, - chain_params); - if (err) - goto err_insert; - - return fdb_chain; - -err_insert: - destroy_fdb_chain_restore(fdb_chain); -err_restore: - kvfree(fdb_chain); - return ERR_PTR(err); -} - -static void -mlx5_esw_chains_destroy_fdb_chain(struct fdb_chain *fdb_chain) -{ - struct mlx5_eswitch *esw = fdb_chain->esw; - - rhashtable_remove_fast(&esw_chains_ht(esw), &fdb_chain->node, - chain_params); - - destroy_fdb_chain_restore(fdb_chain); - kvfree(fdb_chain); -} - -static struct fdb_chain * -mlx5_esw_chains_get_fdb_chain(struct mlx5_eswitch *esw, u32 chain) -{ - struct fdb_chain *fdb_chain; - - fdb_chain = rhashtable_lookup_fast(&esw_chains_ht(esw), &chain, - chain_params); - if (!fdb_chain) { - fdb_chain = mlx5_esw_chains_create_fdb_chain(esw, chain); - if (IS_ERR(fdb_chain)) - return fdb_chain; - } - - fdb_chain->ref++; - - return fdb_chain; -} - -static struct mlx5_flow_handle * -mlx5_esw_chains_add_miss_rule(struct fdb_chain *fdb_chain, - struct mlx5_flow_table *fdb, - struct mlx5_flow_table *next_fdb) -{ - struct mlx5_eswitch *esw = fdb_chain->esw; - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_act act = {}; - - act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; - act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = next_fdb; - - if (next_fdb == tc_end_fdb(esw) && - mlx5_esw_chains_prios_supported(esw)) { - act.modify_hdr = fdb_chain->miss_modify_hdr; - act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - } - - return mlx5_add_flow_rules(fdb, NULL, &act, &dest, 1); -} - -static int -mlx5_esw_chains_update_prio_prevs(struct fdb_prio *fdb_prio, - struct mlx5_flow_table *next_fdb) -{ - struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {}; - struct fdb_chain *fdb_chain = fdb_prio->fdb_chain; - struct fdb_prio *pos; - int n = 0, err; - - if (fdb_prio->key.level) - return 0; - - /* Iterate in reverse order until reaching the level 0 rule of - * the previous priority, adding all the miss rules first, so we can - * revert them if any of them fails. - */ - pos = fdb_prio; - list_for_each_entry_continue_reverse(pos, - &fdb_chain->prios_list, - list) { - miss_rules[n] = mlx5_esw_chains_add_miss_rule(fdb_chain, - pos->fdb, - next_fdb); - if (IS_ERR(miss_rules[n])) { - err = PTR_ERR(miss_rules[n]); - goto err_prev_rule; - } - - n++; - if (!pos->key.level) - break; - } - - /* Success, delete old miss rules, and update the pointers. */ - n = 0; - pos = fdb_prio; - list_for_each_entry_continue_reverse(pos, - &fdb_chain->prios_list, - list) { - mlx5_del_flow_rules(pos->miss_rule); - - pos->miss_rule = miss_rules[n]; - pos->next_fdb = next_fdb; - - n++; - if (!pos->key.level) - break; - } - - return 0; - -err_prev_rule: - while (--n >= 0) - mlx5_del_flow_rules(miss_rules[n]); - - return err; -} - -static void -mlx5_esw_chains_put_fdb_chain(struct fdb_chain *fdb_chain) -{ - if (--fdb_chain->ref == 0) - mlx5_esw_chains_destroy_fdb_chain(fdb_chain); -} - -static struct fdb_prio * -mlx5_esw_chains_create_fdb_prio(struct mlx5_eswitch *esw, - u32 chain, u32 prio, u32 level) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5_flow_handle *miss_rule = NULL; - struct mlx5_flow_group *miss_group; - struct fdb_prio *fdb_prio = NULL; - struct mlx5_flow_table *next_fdb; - struct fdb_chain *fdb_chain; - struct mlx5_flow_table *fdb; - struct list_head *pos; - u32 *flow_group_in; - int err; - - fdb_chain = mlx5_esw_chains_get_fdb_chain(esw, chain); - if (IS_ERR(fdb_chain)) - return ERR_CAST(fdb_chain); - - fdb_prio = kvzalloc(sizeof(*fdb_prio), GFP_KERNEL); - flow_group_in = kvzalloc(inlen, GFP_KERNEL); - if (!fdb_prio || !flow_group_in) { - err = -ENOMEM; - goto err_alloc; - } - - /* Chain's prio list is sorted by prio and level. - * And all levels of some prio point to the next prio's level 0. - * Example list (prio, level): - * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0) - * In hardware, we will we have the following pointers: - * (3,0) -> (5,0) -> (7,0) -> Slow path - * (3,1) -> (5,0) - * (5,1) -> (7,0) - * (6,1) -> (7,0) - */ - - /* Default miss for each chain: */ - next_fdb = (chain == mlx5_esw_chains_get_ft_chain(esw)) ? - tc_slow_fdb(esw) : - tc_end_fdb(esw); - list_for_each(pos, &fdb_chain->prios_list) { - struct fdb_prio *p = list_entry(pos, struct fdb_prio, list); - - /* exit on first pos that is larger */ - if (prio < p->key.prio || (prio == p->key.prio && - level < p->key.level)) { - /* Get next level 0 table */ - next_fdb = p->key.level == 0 ? p->fdb : p->next_fdb; - break; - } - } - - fdb = mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - goto err_create; - } - - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, - fdb->max_fte - 2); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, - fdb->max_fte - 1); - miss_group = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(miss_group)) { - err = PTR_ERR(miss_group); - goto err_group; - } - - /* Add miss rule to next_fdb */ - miss_rule = mlx5_esw_chains_add_miss_rule(fdb_chain, fdb, next_fdb); - if (IS_ERR(miss_rule)) { - err = PTR_ERR(miss_rule); - goto err_miss_rule; - } - - fdb_prio->miss_group = miss_group; - fdb_prio->miss_rule = miss_rule; - fdb_prio->next_fdb = next_fdb; - fdb_prio->fdb_chain = fdb_chain; - fdb_prio->key.chain = chain; - fdb_prio->key.prio = prio; - fdb_prio->key.level = level; - fdb_prio->fdb = fdb; - - err = rhashtable_insert_fast(&esw_prios_ht(esw), &fdb_prio->node, - prio_params); - if (err) - goto err_insert; - - list_add(&fdb_prio->list, pos->prev); - - /* Table is ready, connect it */ - err = mlx5_esw_chains_update_prio_prevs(fdb_prio, fdb); - if (err) - goto err_update; - - kvfree(flow_group_in); - return fdb_prio; - -err_update: - list_del(&fdb_prio->list); - rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node, - prio_params); -err_insert: - mlx5_del_flow_rules(miss_rule); -err_miss_rule: - mlx5_destroy_flow_group(miss_group); -err_group: - mlx5_esw_chains_destroy_fdb_table(esw, fdb); -err_create: -err_alloc: - kvfree(fdb_prio); - kvfree(flow_group_in); - mlx5_esw_chains_put_fdb_chain(fdb_chain); - return ERR_PTR(err); -} - -static void -mlx5_esw_chains_destroy_fdb_prio(struct mlx5_eswitch *esw, - struct fdb_prio *fdb_prio) -{ - struct fdb_chain *fdb_chain = fdb_prio->fdb_chain; - - WARN_ON(mlx5_esw_chains_update_prio_prevs(fdb_prio, - fdb_prio->next_fdb)); - - list_del(&fdb_prio->list); - rhashtable_remove_fast(&esw_prios_ht(esw), &fdb_prio->node, - prio_params); - mlx5_del_flow_rules(fdb_prio->miss_rule); - mlx5_destroy_flow_group(fdb_prio->miss_group); - mlx5_esw_chains_destroy_fdb_table(esw, fdb_prio->fdb); - mlx5_esw_chains_put_fdb_chain(fdb_chain); - kvfree(fdb_prio); -} - -struct mlx5_flow_table * -mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, - u32 level) -{ - struct mlx5_flow_table *prev_fts; - struct fdb_prio *fdb_prio; - struct fdb_prio_key key; - int l = 0; - - if ((chain > mlx5_esw_chains_get_chain_range(esw) && - chain != mlx5_esw_chains_get_ft_chain(esw)) || - prio > mlx5_esw_chains_get_prio_range(esw) || - level > mlx5_esw_chains_get_level_range(esw)) - return ERR_PTR(-EOPNOTSUPP); - - /* create earlier levels for correct fs_core lookup when - * connecting tables. - */ - for (l = 0; l < level; l++) { - prev_fts = mlx5_esw_chains_get_table(esw, chain, prio, l); - if (IS_ERR(prev_fts)) { - fdb_prio = ERR_CAST(prev_fts); - goto err_get_prevs; - } - } - - key.chain = chain; - key.prio = prio; - key.level = level; - - mutex_lock(&esw_chains_lock(esw)); - fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key, - prio_params); - if (!fdb_prio) { - fdb_prio = mlx5_esw_chains_create_fdb_prio(esw, chain, - prio, level); - if (IS_ERR(fdb_prio)) - goto err_create_prio; - } - - ++fdb_prio->ref; - mutex_unlock(&esw_chains_lock(esw)); - - return fdb_prio->fdb; - -err_create_prio: - mutex_unlock(&esw_chains_lock(esw)); -err_get_prevs: - while (--l >= 0) - mlx5_esw_chains_put_table(esw, chain, prio, l); - return ERR_CAST(fdb_prio); -} - -void -mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, - u32 level) -{ - struct fdb_prio *fdb_prio; - struct fdb_prio_key key; - - key.chain = chain; - key.prio = prio; - key.level = level; - - mutex_lock(&esw_chains_lock(esw)); - fdb_prio = rhashtable_lookup_fast(&esw_prios_ht(esw), &key, - prio_params); - if (!fdb_prio) - goto err_get_prio; - - if (--fdb_prio->ref == 0) - mlx5_esw_chains_destroy_fdb_prio(esw, fdb_prio); - mutex_unlock(&esw_chains_lock(esw)); - - while (level-- > 0) - mlx5_esw_chains_put_table(esw, chain, prio, level); - - return; - -err_get_prio: - mutex_unlock(&esw_chains_lock(esw)); - WARN_ONCE(1, - "Couldn't find table: (chain: %d prio: %d level: %d)", - chain, prio, level); -} - -struct mlx5_flow_table * -mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) -{ - return tc_end_fdb(esw); -} - -struct mlx5_flow_table * -mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw) -{ - u32 chain, prio, level; - int err; - - if (!fdb_ignore_flow_level_supported(esw)) { - err = -EOPNOTSUPP; - - esw_warn(esw->dev, - "Couldn't create global flow table, ignore_flow_level not supported."); - goto err_ignore; - } - - chain = mlx5_esw_chains_get_chain_range(esw), - prio = mlx5_esw_chains_get_prio_range(esw); - level = mlx5_esw_chains_get_level_range(esw); - - return mlx5_esw_chains_create_fdb_table(esw, chain, prio, level); - -err_ignore: - return ERR_PTR(err); -} - -void -mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, - struct mlx5_flow_table *ft) -{ - mlx5_esw_chains_destroy_fdb_table(esw, ft); -} - -static int -mlx5_esw_chains_init(struct mlx5_eswitch *esw) -{ - struct mlx5_esw_chains_priv *chains_priv; - struct mlx5_core_dev *dev = esw->dev; - u32 max_flow_counter, fdb_max; - struct mapping_ctx *mapping; - int err; - - chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL); - if (!chains_priv) - return -ENOMEM; - esw_chains_priv(esw) = chains_priv; - - max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | - MLX5_CAP_GEN(dev, max_flow_counter_15_0); - fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); - - esw_debug(dev, - "Init esw offloads chains, max counters(%d), groups(%d), max flow table size(%d)\n", - max_flow_counter, esw->params.large_group_num, fdb_max); - - mlx5_esw_chains_init_sz_pool(esw); - - if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) && - esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { - esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; - esw_warn(dev, "Tc chains and priorities offload aren't supported, update firmware if needed\n"); - } else if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) { - esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; - esw_warn(dev, "Tc chains and priorities offload aren't supported\n"); - } else if (!fdb_modify_header_fwd_to_table_supported(esw)) { - /* Disabled when ttl workaround is needed, e.g - * when ESWITCH_IPV4_TTL_MODIFY_ENABLE = true in mlxconfig - */ - esw_warn(dev, - "Tc chains and priorities offload aren't supported, check firmware version, or mlxconfig settings\n"); - esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; - } else { - esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; - esw_info(dev, "Supported tc offload range - chains: %u, prios: %u\n", - mlx5_esw_chains_get_chain_range(esw), - mlx5_esw_chains_get_prio_range(esw)); - } - - err = rhashtable_init(&esw_chains_ht(esw), &chain_params); - if (err) - goto init_chains_ht_err; - - err = rhashtable_init(&esw_prios_ht(esw), &prio_params); - if (err) - goto init_prios_ht_err; - - mapping = mapping_create(sizeof(u32), esw_get_max_restore_tag(esw), - true); - if (IS_ERR(mapping)) { - err = PTR_ERR(mapping); - goto mapping_err; - } - esw_chains_mapping(esw) = mapping; - - mutex_init(&esw_chains_lock(esw)); - - return 0; - -mapping_err: - rhashtable_destroy(&esw_prios_ht(esw)); -init_prios_ht_err: - rhashtable_destroy(&esw_chains_ht(esw)); -init_chains_ht_err: - kfree(chains_priv); - return err; -} - -static void -mlx5_esw_chains_cleanup(struct mlx5_eswitch *esw) -{ - mutex_destroy(&esw_chains_lock(esw)); - mapping_destroy(esw_chains_mapping(esw)); - rhashtable_destroy(&esw_prios_ht(esw)); - rhashtable_destroy(&esw_chains_ht(esw)); - - kfree(esw_chains_priv(esw)); -} - -static int -mlx5_esw_chains_open(struct mlx5_eswitch *esw) -{ - struct mlx5_flow_table *ft; - int err; - - /* Create tc_end_fdb(esw) which is the always created ft chain */ - ft = mlx5_esw_chains_get_table(esw, mlx5_esw_chains_get_ft_chain(esw), - 1, 0); - if (IS_ERR(ft)) - return PTR_ERR(ft); - - tc_end_fdb(esw) = ft; - - /* Always open the root for fast path */ - ft = mlx5_esw_chains_get_table(esw, 0, 1, 0); - if (IS_ERR(ft)) { - err = PTR_ERR(ft); - goto level_0_err; - } - - /* Open level 1 for split rules now if prios isn't supported */ - if (!mlx5_esw_chains_prios_supported(esw)) { - err = mlx5_esw_vport_tbl_get(esw); - if (err) - goto level_1_err; - } - - return 0; - -level_1_err: - mlx5_esw_chains_put_table(esw, 0, 1, 0); -level_0_err: - mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); - return err; -} - -static void -mlx5_esw_chains_close(struct mlx5_eswitch *esw) -{ - if (!mlx5_esw_chains_prios_supported(esw)) - mlx5_esw_vport_tbl_put(esw); - mlx5_esw_chains_put_table(esw, 0, 1, 0); - mlx5_esw_chains_put_table(esw, mlx5_esw_chains_get_ft_chain(esw), 1, 0); -} - -int -mlx5_esw_chains_create(struct mlx5_eswitch *esw) -{ - int err; - - err = mlx5_esw_chains_init(esw); - if (err) - return err; - - err = mlx5_esw_chains_open(esw); - if (err) - goto err_open; - - return 0; - -err_open: - mlx5_esw_chains_cleanup(esw); - return err; -} - -void -mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) -{ - mlx5_esw_chains_close(esw); - mlx5_esw_chains_cleanup(esw); -} - -int -mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, - u32 *chain_mapping) -{ - return mapping_add(esw_chains_mapping(esw), &chain, chain_mapping); -} - -int -mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, u32 chain_mapping) -{ - return mapping_remove(esw_chains_mapping(esw), chain_mapping); -} - -int mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, - u32 *chain) -{ - int err; - - err = mapping_find(esw_chains_mapping(esw), tag, chain); - if (err) { - esw_warn(esw->dev, "Can't find chain for tag: %d\n", tag); - return -ENOENT; - } - - return 0; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h deleted file mode 100644 index f3b9ae6798f3..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_chains.h +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2020 Mellanox Technologies. */ - -#ifndef __ML5_ESW_CHAINS_H__ -#define __ML5_ESW_CHAINS_H__ - -bool -mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw); -bool -mlx5_esw_chains_backwards_supported(struct mlx5_eswitch *esw); -u32 -mlx5_esw_chains_get_prio_range(struct mlx5_eswitch *esw); -u32 -mlx5_esw_chains_get_chain_range(struct mlx5_eswitch *esw); -u32 -mlx5_esw_chains_get_ft_chain(struct mlx5_eswitch *esw); - -struct mlx5_flow_table * -mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, - u32 level); -void -mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio, - u32 level); - -struct mlx5_flow_table * -mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw); - -struct mlx5_flow_table * -mlx5_esw_chains_create_global_table(struct mlx5_eswitch *esw); -void -mlx5_esw_chains_destroy_global_table(struct mlx5_eswitch *esw, - struct mlx5_flow_table *ft); - -int -mlx5_esw_chains_get_chain_mapping(struct mlx5_eswitch *esw, u32 chain, - u32 *chain_mapping); -int -mlx5_esw_chains_put_chain_mapping(struct mlx5_eswitch *esw, - u32 chain_mapping); - -int mlx5_esw_chains_create(struct mlx5_eswitch *esw); -void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw); - -int -mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain); - -#endif /* __ML5_ESW_CHAINS_H__ */ -- cgit v1.3-14-g43fede From 5a37a8df809b14f682f7b5c863b4672132e2ffc9 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 25 Mar 2020 20:18:58 +0800 Subject: net/mlx5e: refactor indr setup block Refactor indr setup block for support ft indr setup in the next patch. The function mlx5e_rep_indr_offload exposes 'flags' in order set additional flag for FT in next patch. Rename mlx5e_rep_indr_setup_tc_block to mlx5e_rep_indr_setup_block and add flow_setup_cb_t callback parameters in order set the specific callback for FT in next patch. Signed-off-by: wenxu Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 42 ++++++++++++------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 559453b4c6b6..4c947b14b56d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -694,9 +694,9 @@ static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) static int mlx5e_rep_indr_offload(struct net_device *netdev, struct flow_cls_offload *flower, - struct mlx5e_rep_indr_block_priv *indr_priv) + struct mlx5e_rep_indr_block_priv *indr_priv, + unsigned long flags) { - unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); int err = 0; @@ -717,20 +717,22 @@ mlx5e_rep_indr_offload(struct net_device *netdev, return err; } -static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type, - void *type_data, void *indr_priv) +static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) { + unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD); struct mlx5e_rep_indr_block_priv *priv = indr_priv; switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_rep_indr_offload(priv->netdev, type_data, priv); + return mlx5e_rep_indr_offload(priv->netdev, type_data, priv, + flags); default: return -EOPNOTSUPP; } } -static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv) +static void mlx5e_rep_indr_block_unbind(void *cb_priv) { struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; @@ -741,9 +743,10 @@ static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv) static LIST_HEAD(mlx5e_block_cb_list); static int -mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, - struct mlx5e_rep_priv *rpriv, - struct flow_block_offload *f) +mlx5e_rep_indr_setup_block(struct net_device *netdev, + struct mlx5e_rep_priv *rpriv, + struct flow_block_offload *f, + flow_setup_cb_t *setup_cb) { struct mlx5e_rep_indr_block_priv *indr_priv; struct flow_block_cb *block_cb; @@ -769,9 +772,8 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, list_add(&indr_priv->list, &rpriv->uplink_priv.tc_indr_block_priv_list); - block_cb = flow_block_cb_alloc(mlx5e_rep_indr_setup_block_cb, - indr_priv, indr_priv, - mlx5e_rep_indr_tc_block_unbind); + block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv, + mlx5e_rep_indr_block_unbind); if (IS_ERR(block_cb)) { list_del(&indr_priv->list); kfree(indr_priv); @@ -786,9 +788,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, if (!indr_priv) return -ENOENT; - block_cb = flow_block_cb_lookup(f->block, - mlx5e_rep_indr_setup_block_cb, - indr_priv); + block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv); if (!block_cb) return -ENOENT; @@ -802,13 +802,13 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, } static -int mlx5e_rep_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv, - enum tc_setup_type type, void *type_data) +int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv, + enum tc_setup_type type, void *type_data) { switch (type) { case TC_SETUP_BLOCK: - return mlx5e_rep_indr_setup_tc_block(netdev, cb_priv, - type_data); + return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, + mlx5e_rep_indr_setup_tc_cb); default: return -EOPNOTSUPP; } @@ -820,7 +820,7 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, int err; err = __flow_indr_block_cb_register(netdev, rpriv, - mlx5e_rep_indr_setup_tc_cb, + mlx5e_rep_indr_setup_cb, rpriv); if (err) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); @@ -834,7 +834,7 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, struct net_device *netdev) { - __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, + __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb, rpriv); } -- cgit v1.3-14-g43fede From 07c264ab8e6c76efbc2eb06839f8dcd9fd0c3ebd Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 25 Mar 2020 20:18:59 +0800 Subject: net/mlx5e: add mlx5e_rep_indr_setup_ft_cb support Add mlx5e_rep_indr_setup_ft_cb to support indr block setup in FT mode. Both tc rules and flow table rules are of the same format, It can re-use tc parsing for that, and move the flow table rules to their steering domain(the specific chain_index), the indr block offload in FT also follow this scenario. Signed-off-by: wenxu Reviewed-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 49 ++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 4c947b14b56d..2a0243e4af75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -732,6 +732,52 @@ static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type, } } +static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type, + void *type_data, void *indr_priv) +{ + struct mlx5e_rep_indr_block_priv *priv = indr_priv; + struct flow_cls_offload *f = type_data; + struct flow_cls_offload tmp; + struct mlx5e_priv *mpriv; + struct mlx5_eswitch *esw; + unsigned long flags; + int err; + + mpriv = netdev_priv(priv->rpriv->netdev); + esw = mpriv->mdev->priv.eswitch; + + flags = MLX5_TC_FLAG(EGRESS) | + MLX5_TC_FLAG(ESW_OFFLOAD) | + MLX5_TC_FLAG(FT_OFFLOAD); + + switch (type) { + case TC_SETUP_CLSFLOWER: + memcpy(&tmp, f, sizeof(*f)); + + /* Re-use tc offload path by moving the ft flow to the + * reserved ft chain. + * + * FT offload can use prio range [0, INT_MAX], so we normalize + * it to range [1, mlx5_esw_chains_get_prio_range(esw)] + * as with tc, where prio 0 isn't supported. + * + * We only support chain 0 of FT offload. + */ + if (!mlx5_esw_chains_prios_supported(esw) || + tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) || + tmp.common.chain_index) + return -EOPNOTSUPP; + + tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw); + tmp.common.prio++; + err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags); + memcpy(&f->stats, &tmp.stats, sizeof(f->stats)); + return err; + default: + return -EOPNOTSUPP; + } +} + static void mlx5e_rep_indr_block_unbind(void *cb_priv) { struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; @@ -809,6 +855,9 @@ int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv, case TC_SETUP_BLOCK: return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, mlx5e_rep_indr_setup_tc_cb); + case TC_SETUP_FT: + return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data, + mlx5e_rep_indr_setup_ft_cb); default: return -EOPNOTSUPP; } -- cgit v1.3-14-g43fede