aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
diff options
context:
space:
mode:
authorRaed Salem <raeds@mellanox.com>2020-06-16 17:53:06 +0300
committerSaeed Mahameed <saeedm@nvidia.com>2020-10-12 15:37:45 -0700
commit5be019040cb7bab4caf152cacadffee91a78b506 (patch)
tree18f576addf5f410c0384024b25cbc6036a8cb198 /drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
parentnet/mlx5e: IPsec: Add TX steering rule per IPsec state (diff)
downloadlinux-dev-5be019040cb7bab4caf152cacadffee91a78b506.tar.xz
linux-dev-5be019040cb7bab4caf152cacadffee91a78b506.zip
net/mlx5e: IPsec: Add Connect-X IPsec Tx data path offload
In the TX data path, spot packets with xfrm stack IPsec offload indication. Fill Software-Parser segment in TX descriptor so that the hardware may parse the ESP protocol, and perform TX checksum offload on the inner payload. Support GSO, by providing the trailer data and ICV placeholder so HW can fill it post encryption operation. Padding alignment cannot be performed in HW (ConnectX-6Dx) due to a bug. Software can overcome this limitation by adding NETIF_F_HW_ESP to the gso_partial_features field in netdev so the packets being aligned by the stack. l4_inner_checksum cannot be offloaded by HW for IPsec tunnel type packet. Note that for GSO SKBs, the stack does not include an ESP trailer, unlike the non-GSO case. Below is the iperf3 performance report on two server of 24 cores Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz with ConnectX6-DX. All the bandwidth test uses iperf3 TCP traffic with packet size 128KB. Each tunnel uses one iperf3 stream with one thread (option -P1). TX crypto offload shows improvements on both bandwidth and CPU utilization. ---------------------------------------------------------------------- Mode | Num tunnel | BW | Send CPU util | Recv CPU util | | (Gbps) | (Average %) | (Average %) ---------------------------------------------------------------------- Cryto offload | | | | (RX only) | 1 | 4.7 | 4.2 | 3.5 ---------------------------------------------------------------------- Cryto offload | | | | (RX only) | 24 | 15.6 | 20 | 10 ---------------------------------------------------------------------- Non-offload | 1 | 4.6 | 4 | 5 ---------------------------------------------------------------------- Non-offload | 24 | 11.9 | 16 | 12 ---------------------------------------------------------------------- Cryto offload | | | | (TX & RX) | 1 | 11.9 | 2.1 | 5.9 ---------------------------------------------------------------------- Cryto offload | | | | (TX & RX) | 24 | 38 | 9.5 | 27.5 ---------------------------------------------------------------------- Cryto offload | | | | (TX only) | 1 | 4.7 | 0.7 | 5 ---------------------------------------------------------------------- Cryto offload | | | | (TX only) | 24 | 14.5 | 6 | 20 Regression tests show no degradation on non-ipsec and non-offload-ipsec traffics. The packet rate test uses pktgen UDP to transmit on single CPU, the instructions and cycles are measured on the transmit CPU. before: ---------------------------------------------------------------------- Non-offload | 1 | 4.7 | 4.2 | 5.1 ---------------------------------------------------------------------- Non-offload | 24 | 11.2 | 14 | 15 ---------------------------------------------------------------------- Non-ipsec | 1 | 28 | 4 | 5.7 ---------------------------------------------------------------------- Non-ipsec | 24 | 68.3 | 17.8 | 39.7 ---------------------------------------------------------------------- Non-ipsec packet rate(BURST=1000 BC=5 NCPUS=1 SIZE=60) 13.56Mpps, 456 instructions/pkt, 191 cycles/pkt after: ---------------------------------------------------------------------- Non-offload | 1 | 4.69 | 4.2 | 5 ---------------------------------------------------------------------- Non-offload | 24 | 11.9 | 13.5 | 15.1 ---------------------------------------------------------------------- Non-ipsec | 1 | 29 | 3.2 | 5.5 ---------------------------------------------------------------------- Non-ipsec | 24 | 68.2 | 18.5 | 39.8 ---------------------------------------------------------------------- Non-ipsec packet rate: 13.56Mpps, 472 instructions/pkt, 191 cycles/pkt Signed-off-by: Raed Salem <raeds@mellanox.com> Signed-off-by: Huy Nguyen <huyn@mellanox.com> Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com> Reviewed-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en_tx.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c43
1 files changed, 37 insertions, 6 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 13bd4f254ed7..82b4419af9d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -144,9 +144,29 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz);
}
+/* RM 2311217: no L4 inner checksum for IPsec tunnel type packet */
+static void
+ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+ struct mlx5_wqe_eth_seg *eseg)
+{
+ eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
+ if (skb->encapsulation) {
+ eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM;
+ sq->stats->csum_partial_inner++;
+ } else {
+ eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM;
+ sq->stats->csum_partial++;
+ }
+}
+
static inline void
mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
{
+ if (unlikely(eseg->flow_table_metadata & cpu_to_be32(MLX5_ETH_WQE_FT_META_IPSEC))) {
+ ipsec_txwqe_build_eseg_csum(sq, skb, eseg);
+ return;
+ }
+
if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM;
if (skb->encapsulation) {
@@ -237,12 +257,14 @@ struct mlx5e_tx_attr {
u16 headlen;
u16 ihs;
__be16 mss;
+ u16 insz;
u8 opcode;
};
struct mlx5e_tx_wqe_attr {
u16 ds_cnt;
u16 ds_cnt_inl;
+ u16 ds_cnt_ids;
u8 num_wqebbs;
};
@@ -299,6 +321,7 @@ static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb,
stats->packets++;
}
+ attr->insz = mlx5e_accel_tx_ids_len(sq, accel);
stats->bytes += attr->num_bytes;
}
@@ -307,9 +330,13 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at
{
u16 ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT;
u16 ds_cnt_inl = 0;
+ u16 ds_cnt_ids = 0;
- ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags;
+ if (attr->insz)
+ ds_cnt_ids = DIV_ROUND_UP(sizeof(struct mlx5_wqe_inline_seg) + attr->insz,
+ MLX5_SEND_WQE_DS);
+ ds_cnt += !!attr->headlen + skb_shinfo(skb)->nr_frags + ds_cnt_ids;
if (attr->ihs) {
u16 inl = attr->ihs - INL_HDR_START_SZ;
@@ -323,6 +350,7 @@ static void mlx5e_sq_calc_wqe_attr(struct sk_buff *skb, const struct mlx5e_tx_at
*wqe_attr = (struct mlx5e_tx_wqe_attr) {
.ds_cnt = ds_cnt,
.ds_cnt_inl = ds_cnt_inl,
+ .ds_cnt_ids = ds_cnt_ids,
.num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS),
};
}
@@ -398,11 +426,11 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
if (attr->ihs) {
if (skb_vlan_tag_present(skb)) {
- eseg->inline_hdr.sz = cpu_to_be16(attr->ihs + VLAN_HLEN);
+ eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs + VLAN_HLEN);
mlx5e_insert_vlan(eseg->inline_hdr.start, skb, attr->ihs);
stats->added_vlan_packets++;
} else {
- eseg->inline_hdr.sz = cpu_to_be16(attr->ihs);
+ eseg->inline_hdr.sz |= cpu_to_be16(attr->ihs);
memcpy(eseg->inline_hdr.start, skb->data, attr->ihs);
}
dseg += wqe_attr->ds_cnt_inl;
@@ -414,6 +442,7 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
stats->added_vlan_packets++;
}
+ dseg += wqe_attr->ds_cnt_ids;
num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb->data + attr->ihs,
attr->headlen, dseg);
if (unlikely(num_dma < 0))
@@ -430,7 +459,8 @@ err_drop:
static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
{
- return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs;
+ return !skb_is_nonlinear(skb) && !skb_vlan_tag_present(skb) && !attr->ihs &&
+ !attr->insz;
}
static bool mlx5e_tx_mpwqe_same_eseg(struct mlx5e_txqsq *sq, struct mlx5_wqe_eth_seg *eseg)
@@ -580,7 +610,7 @@ void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg)
{
- if (unlikely(!mlx5e_accel_tx_eseg(priv, sq, skb, eseg)))
+ if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg)))
return false;
mlx5e_txwqe_build_eseg_csum(sq, skb, eseg);
@@ -625,7 +655,8 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
wqe = MLX5E_TX_FETCH_WQE(sq, pi);
/* May update the WQE, but may not post other WQEs. */
- mlx5e_accel_tx_finish(sq, wqe, &accel);
+ mlx5e_accel_tx_finish(sq, wqe, &accel,
+ (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &wqe->eth)))
return NETDEV_TX_OK;