From 90f906243bf633f07757467506dfab3422b43ca2 Mon Sep 17 00:00:00 2001 From: Venkat Duvvuru Date: Thu, 31 Oct 2019 01:07:45 -0400 Subject: bnxt_en: Add support for L2 rewrite This patch adds support for packet edit offload of L2 fields (src mac & dst mac, also referred as L2 rewrite). Only when the mask is fully exact match for a field, the command is sent down to the adapter to offload such a flow. Otherwise, an error is returned. v2: Fix pointer alignment issue in bnxt_fill_l2_rewrite_fields() [MChan] Signed-off-by: Venkat Duvvuru Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 132 +++++++++++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h | 11 +++ 2 files changed, 143 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index c8062d020d1e..80b39ffa4ca2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "bnxt_hsi.h" @@ -36,6 +37,8 @@ #define is_vid_exactmatch(vlan_tci_mask) \ ((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK) +static bool is_wildcard(void *mask, int len); +static bool is_exactmatch(void *mask, int len); /* Return the dst fid of the func for flow forwarding * For PFs: src_fid is the fid of the PF * For VF-reps: src_fid the fid of the VF @@ -111,10 +114,115 @@ static int bnxt_tc_parse_tunnel_set(struct bnxt *bp, return 0; } +/* Key & Mask from the stack comes unaligned in multiple iterations. + * This routine consolidates such multiple unaligned values into one + * field each for Key & Mask (for src and dst macs separately) + * For example, + * Mask/Key Offset Iteration + * ========== ====== ========= + * dst mac 0xffffffff 0 1 + * dst mac 0x0000ffff 4 2 + * + * src mac 0xffff0000 4 1 + * src mac 0xffffffff 8 2 + * + * The above combination coming from the stack will be consolidated as + * Mask/Key + * ============== + * src mac: 0xffffffffffff + * dst mac: 0xffffffffffff + */ +static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask, + u8 *actual_key, u8 *actual_mask) +{ + u32 key = get_unaligned((u32 *)actual_key); + u32 mask = get_unaligned((u32 *)actual_mask); + + part_key &= part_mask; + part_key |= key & ~part_mask; + + put_unaligned(mask | part_mask, (u32 *)actual_mask); + put_unaligned(part_key, (u32 *)actual_key); +} + +static int +bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions, + u16 *eth_addr, u16 *eth_addr_mask) +{ + u16 *p; + int j; + + if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask))) + return -EINVAL; + + if (!is_wildcard(ð_addr_mask[0], ETH_ALEN)) { + if (!is_exactmatch(ð_addr_mask[0], ETH_ALEN)) + return -EINVAL; + /* FW expects dmac to be in u16 array format */ + p = eth_addr; + for (j = 0; j < 3; j++) + actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j)); + } + + if (!is_wildcard(ð_addr_mask[ETH_ALEN], ETH_ALEN)) { + if (!is_exactmatch(ð_addr_mask[ETH_ALEN], ETH_ALEN)) + return -EINVAL; + /* FW expects smac to be in u16 array format */ + p = ð_addr[ETH_ALEN / 2]; + for (j = 0; j < 3; j++) + actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j)); + } + + return 0; +} + +static int +bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions, + struct flow_action_entry *act, u8 *eth_addr, + u8 *eth_addr_mask) +{ + u32 mask, val, offset; + u8 htype; + + offset = act->mangle.offset; + htype = act->mangle.htype; + switch (htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_ETH: + if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) { + netdev_err(bp->dev, + "%s: eth_hdr: Invalid pedit field\n", + __func__); + return -EINVAL; + } + actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE; + mask = ~act->mangle.mask; + val = act->mangle.val; + + bnxt_set_l2_key_mask(val, mask, ð_addr[offset], + ð_addr_mask[offset]); + break; + default: + netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n", + __func__); + return -EINVAL; + } + return 0; +} + static int bnxt_tc_parse_actions(struct bnxt *bp, struct bnxt_tc_actions *actions, struct flow_action *flow_action) { + /* Used to store the L2 rewrite mask for dmac (6 bytes) followed by + * smac (6 bytes) if rewrite of both is specified, otherwise either + * dmac or smac + */ + u16 eth_addr_mask[ETH_ALEN] = { 0 }; + /* Used to store the L2 rewrite key for dmac (6 bytes) followed by + * smac (6 bytes) if rewrite of both is specified, otherwise either + * dmac or smac + */ + u16 eth_addr[ETH_ALEN] = { 0 }; struct flow_action_entry *act; int i, rc; @@ -148,11 +256,26 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, case FLOW_ACTION_TUNNEL_DECAP: actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP; break; + /* Packet edit: L2 rewrite, NAT, NAPT */ + case FLOW_ACTION_MANGLE: + rc = bnxt_tc_parse_pedit(bp, actions, act, + (u8 *)eth_addr, + (u8 *)eth_addr_mask); + if (rc) + return rc; + break; default: break; } } + if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) { + rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr, + eth_addr_mask); + if (rc) + return rc; + } + if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) { if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) { /* dst_fid is PF's fid */ @@ -401,6 +524,15 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, req.src_fid = cpu_to_le16(flow->src_fid); req.ref_flow_handle = ref_flow_handle; + if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) { + memcpy(req.l2_rewrite_dmac, actions->l2_rewrite_dmac, + ETH_ALEN); + memcpy(req.l2_rewrite_smac, actions->l2_rewrite_smac, + ETH_ALEN); + action_flags |= + CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE; + } + if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP || actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) { req.tunnel_handle = tunnel_handle; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h index 4f05305052f2..6d0d4857992b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h @@ -62,6 +62,12 @@ struct bnxt_tc_tunnel_key { __be32 id; }; +#define bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask) \ + ((is_wildcard(&(eth_addr)[0], ETH_ALEN) && \ + is_wildcard(&(eth_addr)[ETH_ALEN], ETH_ALEN)) || \ + (is_wildcard(&(eth_addr_mask)[0], ETH_ALEN) && \ + is_wildcard(&(eth_addr_mask)[ETH_ALEN], ETH_ALEN))) + struct bnxt_tc_actions { u32 flags; #define BNXT_TC_ACTION_FLAG_FWD BIT(0) @@ -71,6 +77,7 @@ struct bnxt_tc_actions { #define BNXT_TC_ACTION_FLAG_DROP BIT(5) #define BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP BIT(6) #define BNXT_TC_ACTION_FLAG_TUNNEL_DECAP BIT(7) +#define BNXT_TC_ACTION_FLAG_L2_REWRITE BIT(8) u16 dst_fid; struct net_device *dst_dev; @@ -79,6 +86,10 @@ struct bnxt_tc_actions { /* tunnel encap */ struct ip_tunnel_key tun_encap_key; +#define PEDIT_OFFSET_SMAC_LAST_4_BYTES 0x8 + __be16 l2_rewrite_dmac[3]; + __be16 l2_rewrite_smac[3]; + }; struct bnxt_tc_flow { -- cgit v1.2.3-59-g8ed1b From 08f8280e8788202a67a359952cd436707f8789bd Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 31 Oct 2019 01:07:46 -0400 Subject: bnxt: Avoid logging an unnecessary message when a flow can't be offloaded For every single case where bnxt_tc_can_offload() can fail, we are logging a user friendly descriptive message anyway, but because of the path it would take in case of failure, another redundant error message would get logged. Just freeing the node and returning from the point of failure should suffice. Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 80b39ffa4ca2..c35cde8a5b29 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1406,7 +1406,8 @@ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, if (!bnxt_tc_can_offload(bp, flow)) { rc = -EOPNOTSUPP; - goto free_node; + kfree_rcu(new_node, rcu); + return rc; } /* If a flow exists with the same cookie, delete it */ -- cgit v1.2.3-59-g8ed1b From 9b9eb518e3383d94c8b81ff403d524f2cee5b6b9 Mon Sep 17 00:00:00 2001 From: Somnath Kotur Date: Thu, 31 Oct 2019 01:07:47 -0400 Subject: bnxt_en: Add support for NAT(L3/L4 rewrite) Provides support for modifying L3/L4 Header parameters to support NAT. Sets the appropriate fields/bits in cfa_flow_alloc cmd. Sample cmd for offloading an IPv4 flow with SNAT: ovs-ofctl add-flow ovsbr0 "ip,nw_src=192.168.201.44 \ actions=mod_nw_src:203.31.220.144,output:p7p1" Replace 'nw_src' with 'nw_dst' in above cmd for DNAT with IPv4 Sample cmd for offloading an IPv4 flow with SNAPT: ovs-ofctl add-flow ovsbr0 "ip,nw_src=192.168.201.44 \ actions=mod_nw_src:203.31.220.144, mod_tp_src:6789,output:p7p1" Similar to DNAT, replace 'tp_src' with 'tp_dst' for offloading flow with DNAPT Sample cmd for offloading an IPv6 flow with SNAT: ovs-ofctl add-flow ovsbr0 "ipv6, ipv6_src=2001:5c0:9168::2/64 \ actions=load:0x1->NXM_NX_IPV6_SRC[0..63], \ load:0x20010db801920000->NXM_NX_IPV6_SRC[64..127],output:p7p1" Replace 'SRC' with DST' above for IPv6 DNAT Signed-off-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 140 +++++++++++++++++++++++++-- drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h | 11 ++- 2 files changed, 144 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index c35cde8a5b29..c801666a85fd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -114,7 +114,8 @@ static int bnxt_tc_parse_tunnel_set(struct bnxt *bp, return 0; } -/* Key & Mask from the stack comes unaligned in multiple iterations. +/* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes + * each(u32). * This routine consolidates such multiple unaligned values into one * field each for Key & Mask (for src and dst macs separately) * For example, @@ -178,14 +179,19 @@ bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions, static int bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions, - struct flow_action_entry *act, u8 *eth_addr, + struct flow_action_entry *act, int act_idx, u8 *eth_addr, u8 *eth_addr_mask) { - u32 mask, val, offset; + size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr); + size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr); + u32 mask, val, offset, idx; u8 htype; offset = act->mangle.offset; htype = act->mangle.htype; + mask = ~act->mangle.mask; + val = act->mangle.val; + switch (htype) { case FLOW_ACT_MANGLE_HDR_TYPE_ETH: if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) { @@ -195,12 +201,73 @@ bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions, return -EINVAL; } actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE; - mask = ~act->mangle.mask; - val = act->mangle.val; bnxt_set_l2_key_mask(val, mask, ð_addr[offset], ð_addr_mask[offset]); break; + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE; + actions->nat.l3_is_ipv4 = true; + if (offset == offsetof(struct iphdr, saddr)) { + actions->nat.src_xlate = true; + actions->nat.l3.ipv4.saddr.s_addr = htonl(val); + } else if (offset == offsetof(struct iphdr, daddr)) { + actions->nat.src_xlate = false; + actions->nat.l3.ipv4.daddr.s_addr = htonl(val); + } else { + netdev_err(bp->dev, + "%s: IPv4_hdr: Invalid pedit field\n", + __func__); + return -EINVAL; + } + + netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n", + actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr, + &actions->nat.l3.ipv4.daddr); + break; + + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE; + actions->nat.l3_is_ipv4 = false; + if (offset >= offsetof(struct ipv6hdr, saddr) && + offset < offset_of_ip6_daddr) { + /* 16 byte IPv6 address comes in 4 iterations of + * 4byte chunks each + */ + actions->nat.src_xlate = true; + idx = (offset - offset_of_ip6_saddr) / 4; + /* First 4bytes will be copied to idx 0 and so on */ + actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val); + } else if (offset >= offset_of_ip6_daddr && + offset < offset_of_ip6_daddr + 16) { + actions->nat.src_xlate = false; + idx = (offset - offset_of_ip6_daddr) / 4; + actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val); + } else { + netdev_err(bp->dev, + "%s: IPv6_hdr: Invalid pedit field\n", + __func__); + return -EINVAL; + } + break; + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + /* HW does not support L4 rewrite alone without L3 + * rewrite + */ + if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) { + netdev_err(bp->dev, + "Need to specify L3 rewrite as well\n"); + return -EINVAL; + } + if (actions->nat.src_xlate) + actions->nat.l4.ports.sport = htons(val); + else + actions->nat.l4.ports.dport = htons(val); + netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n", + actions->nat.l4.ports.sport, + actions->nat.l4.ports.dport); + break; default: netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n", __func__); @@ -258,7 +325,7 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, break; /* Packet edit: L2 rewrite, NAT, NAPT */ case FLOW_ACTION_MANGLE: - rc = bnxt_tc_parse_pedit(bp, actions, act, + rc = bnxt_tc_parse_pedit(bp, actions, act, i, (u8 *)eth_addr, (u8 *)eth_addr_mask); if (rc) @@ -533,6 +600,67 @@ static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow, CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE; } + if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) { + if (actions->nat.l3_is_ipv4) { + action_flags |= + CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS; + + if (actions->nat.src_xlate) { + action_flags |= + CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC; + /* L3 source rewrite */ + req.nat_ip_address[0] = + actions->nat.l3.ipv4.saddr.s_addr; + /* L4 source port */ + if (actions->nat.l4.ports.sport) + req.nat_port = + actions->nat.l4.ports.sport; + } else { + action_flags |= + CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST; + /* L3 destination rewrite */ + req.nat_ip_address[0] = + actions->nat.l3.ipv4.daddr.s_addr; + /* L4 destination port */ + if (actions->nat.l4.ports.dport) + req.nat_port = + actions->nat.l4.ports.dport; + } + netdev_dbg(bp->dev, + "req.nat_ip_address: %pI4 src_xlate: %d req.nat_port: %x\n", + req.nat_ip_address, actions->nat.src_xlate, + req.nat_port); + } else { + if (actions->nat.src_xlate) { + action_flags |= + CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC; + /* L3 source rewrite */ + memcpy(req.nat_ip_address, + actions->nat.l3.ipv6.saddr.s6_addr32, + sizeof(req.nat_ip_address)); + /* L4 source port */ + if (actions->nat.l4.ports.sport) + req.nat_port = + actions->nat.l4.ports.sport; + } else { + action_flags |= + CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST; + /* L3 destination rewrite */ + memcpy(req.nat_ip_address, + actions->nat.l3.ipv6.daddr.s6_addr32, + sizeof(req.nat_ip_address)); + /* L4 destination port */ + if (actions->nat.l4.ports.dport) + req.nat_port = + actions->nat.l4.ports.dport; + } + netdev_dbg(bp->dev, + "req.nat_ip_address: %pI6 src_xlate: %d req.nat_port: %x\n", + req.nat_ip_address, actions->nat.src_xlate, + req.nat_port); + } + } + if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP || actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) { req.tunnel_handle = tunnel_handle; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h index 6d0d4857992b..286754903543 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h @@ -78,6 +78,7 @@ struct bnxt_tc_actions { #define BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP BIT(6) #define BNXT_TC_ACTION_FLAG_TUNNEL_DECAP BIT(7) #define BNXT_TC_ACTION_FLAG_L2_REWRITE BIT(8) +#define BNXT_TC_ACTION_FLAG_NAT_XLATE BIT(9) u16 dst_fid; struct net_device *dst_dev; @@ -89,7 +90,15 @@ struct bnxt_tc_actions { #define PEDIT_OFFSET_SMAC_LAST_4_BYTES 0x8 __be16 l2_rewrite_dmac[3]; __be16 l2_rewrite_smac[3]; - + struct { + bool src_xlate; /* true => translate src, + * false => translate dst + * Mutually exclusive, i.e cannot set both + */ + bool l3_is_ipv4; /* false means L3 is ipv6 */ + struct bnxt_tc_l3_key l3; + struct bnxt_tc_l4_key l4; + } nat; }; struct bnxt_tc_flow { -- cgit v1.2.3-59-g8ed1b From 627c89d00fb969f9b3b4f3156716149631d2796c Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Thu, 31 Oct 2019 01:07:48 -0400 Subject: bnxt_en: flow_offload: offload tunnel decap rules via indirect callbacks The decap (VXLAN tunnel) flow rules are not getting offloaded with upstream kernel. This is because TC block callback infrastructure has been updated to use indirect callbacks to get offloaded rules from other higher level devices (such as tunnels), instead of ndo_setup_tc(). Since the decap rules are applied to the tunnel devices (e.g, vxlan_sys), the driver should register for indirect TC callback with tunnel devices to get the rules for offloading. This patch updates the driver to register and process indirect TC block callbacks from VXLAN tunnels. Signed-off-by: Sriharsha Basavapatna Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 12 +++ drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 153 ++++++++++++++++++++++++++- 3 files changed, 165 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c24caaaf05ca..71f9c9c53301 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10945,7 +10945,7 @@ static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } -static LIST_HEAD(bnxt_block_cb_list); +LIST_HEAD(bnxt_block_cb_list); static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 3e7d1fb1b0b1..a3545c846bfb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -29,6 +29,8 @@ #include #endif +extern struct list_head bnxt_block_cb_list; + struct page_pool; struct tx_bd { @@ -1244,6 +1246,14 @@ struct bnxt_tc_flow_stats { u64 bytes; }; +#ifdef CONFIG_BNXT_FLOWER_OFFLOAD +struct bnxt_flower_indr_block_cb_priv { + struct net_device *tunnel_netdev; + struct bnxt *bp; + struct list_head list; +}; +#endif + struct bnxt_tc_info { bool enabled; @@ -1821,6 +1831,8 @@ struct bnxt { u16 *cfa_code_map; /* cfa_code -> vf_idx map */ u8 switch_id[8]; struct bnxt_tc_info *tc_info; + struct list_head tc_indr_block_list; + struct notifier_block tc_netdev_nb; struct dentry *debugfs_pdev; struct device *hwmon_dev; }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index c801666a85fd..174412a55e53 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "bnxt_hsi.h" #include "bnxt.h" @@ -1841,6 +1842,147 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid, } } +static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct bnxt_flower_indr_block_cb_priv *priv = cb_priv; + struct flow_cls_offload *flower = type_data; + struct bnxt *bp = priv->bp; + + if (flower->common.chain_index) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower); + default: + return -EOPNOTSUPP; + } +} + +static struct bnxt_flower_indr_block_cb_priv * +bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev) +{ + struct bnxt_flower_indr_block_cb_priv *cb_priv; + + /* All callback list access should be protected by RTNL. */ + ASSERT_RTNL(); + + list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list) + if (cb_priv->tunnel_netdev == netdev) + return cb_priv; + + return NULL; +} + +static void bnxt_tc_setup_indr_rel(void *cb_priv) +{ + struct bnxt_flower_indr_block_cb_priv *priv = cb_priv; + + list_del(&priv->list); + kfree(priv); +} + +static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp, + struct flow_block_offload *f) +{ + struct bnxt_flower_indr_block_cb_priv *cb_priv; + struct flow_block_cb *block_cb; + + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case FLOW_BLOCK_BIND: + cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL); + if (!cb_priv) + return -ENOMEM; + + cb_priv->tunnel_netdev = netdev; + cb_priv->bp = bp; + list_add(&cb_priv->list, &bp->tc_indr_block_list); + + block_cb = flow_block_cb_alloc(bnxt_tc_setup_indr_block_cb, + cb_priv, cb_priv, + bnxt_tc_setup_indr_rel); + if (IS_ERR(block_cb)) { + list_del(&cb_priv->list); + kfree(cb_priv); + return PTR_ERR(block_cb); + } + + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list); + break; + case FLOW_BLOCK_UNBIND: + cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev); + if (!cb_priv) + return -ENOENT; + + block_cb = flow_block_cb_lookup(f->block, + bnxt_tc_setup_indr_block_cb, + cb_priv); + if (!block_cb) + return -ENOENT; + + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + break; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv, + enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data); + default: + return -EOPNOTSUPP; + } +} + +static bool bnxt_is_netdev_indr_offload(struct net_device *netdev) +{ + return netif_is_vxlan(netdev); +} + +static int bnxt_tc_indr_block_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *netdev; + struct bnxt *bp; + int rc; + + netdev = netdev_notifier_info_to_dev(ptr); + if (!bnxt_is_netdev_indr_offload(netdev)) + return NOTIFY_OK; + + bp = container_of(nb, struct bnxt, tc_netdev_nb); + + switch (event) { + case NETDEV_REGISTER: + rc = __flow_indr_block_cb_register(netdev, bp, + bnxt_tc_setup_indr_cb, + bp); + if (rc) + netdev_info(bp->dev, + "Failed to register indirect blk: dev: %s", + netdev->name); + break; + case NETDEV_UNREGISTER: + __flow_indr_block_cb_unregister(netdev, + bnxt_tc_setup_indr_cb, + bp); + break; + } + + return NOTIFY_DONE; +} + static const struct rhashtable_params bnxt_tc_flow_ht_params = { .head_offset = offsetof(struct bnxt_tc_flow_node, node), .key_offset = offsetof(struct bnxt_tc_flow_node, cookie), @@ -1924,7 +2066,15 @@ int bnxt_init_tc(struct bnxt *bp) bp->dev->hw_features |= NETIF_F_HW_TC; bp->dev->features |= NETIF_F_HW_TC; bp->tc_info = tc_info; - return 0; + + /* init indirect block notifications */ + INIT_LIST_HEAD(&bp->tc_indr_block_list); + bp->tc_netdev_nb.notifier_call = bnxt_tc_indr_block_event; + rc = register_netdevice_notifier(&bp->tc_netdev_nb); + if (!rc) + return 0; + + rhashtable_destroy(&tc_info->encap_table); destroy_decap_table: rhashtable_destroy(&tc_info->decap_table); @@ -1946,6 +2096,7 @@ void bnxt_shutdown_tc(struct bnxt *bp) if (!bnxt_tc_flower_enabled(bp)) return; + unregister_netdevice_notifier(&bp->tc_netdev_nb); rhashtable_destroy(&tc_info->flow_table); rhashtable_destroy(&tc_info->l2_table); rhashtable_destroy(&tc_info->decap_l2_table); -- cgit v1.2.3-59-g8ed1b From aa46dffff452f7c6d907c4e6a0062e2c53a87fc0 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 31 Oct 2019 01:07:49 -0400 Subject: bnxt_en: Improve bnxt_ulp_stop()/bnxt_ulp_start() call sequence. We call bnxt_ulp_stop() to notify the RDMA driver that some error or imminent reset is about to happen. After that we always call some variants of bnxt_close(). In the next patch, we will integrate the recently added error recovery with the RDMA driver. In response to ulp_stop, the RDMA driver may free MSIX vectors and that will also trigger bnxt_close(). To avoid bnxt_close() from being called twice, we set a new flag after ulp_stop is called. If the RDMA driver frees MSIX vectors while the new flag is set, we will not call bnxt_close(), knowing that it will happen in due course. With this change, we must make sure that the bnxt_close() call after ulp_stop will reset IRQ. Modify bnxt_reset_task() accordingly if we call ulp_stop. Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 18 ++++++++++-------- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 10 ++++++++-- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 3 ++- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 71f9c9c53301..237b22057913 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9927,12 +9927,15 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent) if (netif_running(bp->dev)) { int rc; - if (!silent) + if (silent) { + bnxt_close_nic(bp, false, false); + bnxt_open_nic(bp, false, false); + } else { bnxt_ulp_stop(bp); - bnxt_close_nic(bp, false, false); - rc = bnxt_open_nic(bp, false, false); - if (!silent && !rc) - bnxt_ulp_start(bp); + bnxt_close_nic(bp, true, false); + rc = bnxt_open_nic(bp, true, false); + bnxt_ulp_start(bp, rc); + } } } @@ -12005,10 +12008,9 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev) if (!err && netif_running(netdev)) err = bnxt_open(netdev); - if (!err) { + if (!err) result = PCI_ERS_RESULT_RECOVERED; - bnxt_ulp_start(bp); - } + bnxt_ulp_start(bp, err); } if (result != PCI_ERS_RESULT_RECOVERED && netif_running(netdev)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index b2c160947fc8..077fd101be60 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -182,7 +182,7 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id) edev->ulp_tbl[ulp_id].msix_requested = 0; edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED; - if (netif_running(dev)) { + if (netif_running(dev) && !(edev->flags & BNXT_EN_FLAG_ULP_STOPPED)) { bnxt_close_nic(bp, true, false); bnxt_open_nic(bp, true, false); } @@ -266,6 +266,7 @@ void bnxt_ulp_stop(struct bnxt *bp) if (!edev) return; + edev->flags |= BNXT_EN_FLAG_ULP_STOPPED; for (i = 0; i < BNXT_MAX_ULP; i++) { struct bnxt_ulp *ulp = &edev->ulp_tbl[i]; @@ -276,7 +277,7 @@ void bnxt_ulp_stop(struct bnxt *bp) } } -void bnxt_ulp_start(struct bnxt *bp) +void bnxt_ulp_start(struct bnxt *bp, int err) { struct bnxt_en_dev *edev = bp->edev; struct bnxt_ulp_ops *ops; @@ -285,6 +286,11 @@ void bnxt_ulp_start(struct bnxt *bp) if (!edev) return; + edev->flags &= ~BNXT_EN_FLAG_ULP_STOPPED; + + if (err) + return; + for (i = 0; i < BNXT_MAX_ULP; i++) { struct bnxt_ulp *ulp = &edev->ulp_tbl[i]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index cd78453d0bf0..9895406b9830 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -64,6 +64,7 @@ struct bnxt_en_dev { #define BNXT_EN_FLAG_ROCE_CAP (BNXT_EN_FLAG_ROCEV1_CAP | \ BNXT_EN_FLAG_ROCEV2_CAP) #define BNXT_EN_FLAG_MSIX_REQUESTED 0x4 + #define BNXT_EN_FLAG_ULP_STOPPED 0x8 const struct bnxt_en_ops *en_ops; struct bnxt_ulp ulp_tbl[BNXT_MAX_ULP]; }; @@ -92,7 +93,7 @@ int bnxt_get_ulp_msix_num(struct bnxt *bp); int bnxt_get_ulp_msix_base(struct bnxt *bp); int bnxt_get_ulp_stat_ctxs(struct bnxt *bp); void bnxt_ulp_stop(struct bnxt *bp); -void bnxt_ulp_start(struct bnxt *bp); +void bnxt_ulp_start(struct bnxt *bp, int err); void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs); void bnxt_ulp_shutdown(struct bnxt *bp); void bnxt_ulp_irq_stop(struct bnxt *bp); -- cgit v1.2.3-59-g8ed1b From f3a6d206c25ad9490f3a3c6d62baba9504227a75 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Thu, 31 Oct 2019 01:07:50 -0400 Subject: bnxt_en: Call bnxt_ulp_stop()/bnxt_ulp_start() during error recovery. Notify the RDMA driver by calling the bnxt_ulp_stop()/bnxt_ulp_start() hooks during error recovery. The current ULP IRQ start/stop sequence in error recovery (which is insufficient) is replaced with the full reset sequence when we call bnxt_ulp_stop()/bnxt_ulp_start(). Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 237b22057913..a8a2a6f79b6f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8762,6 +8762,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) } if (resc_reinit || fw_reset) { if (fw_reset) { + if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) + bnxt_ulp_stop(bp); rc = bnxt_fw_init_one(bp); if (rc) { set_bit(BNXT_STATE_ABORT_ERR, &bp->state); @@ -9224,13 +9226,16 @@ static int bnxt_open(struct net_device *dev) if (rc) { bnxt_hwrm_if_change(bp, false); } else { - if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state) && - BNXT_PF(bp)) { - struct bnxt_pf_info *pf = &bp->pf; - int n = pf->active_vfs; + if (test_and_clear_bit(BNXT_STATE_FW_RESET_DET, &bp->state)) { + if (BNXT_PF(bp)) { + struct bnxt_pf_info *pf = &bp->pf; + int n = pf->active_vfs; - if (n) - bnxt_cfg_hw_sriov(bp, &n, true); + if (n) + bnxt_cfg_hw_sriov(bp, &n, true); + } + if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)) + bnxt_ulp_start(bp, 0); } bnxt_hwmon_open(bp); } @@ -10051,8 +10056,8 @@ static void bnxt_reset(struct bnxt *bp, bool silent) static void bnxt_fw_reset_close(struct bnxt *bp) { + bnxt_ulp_stop(bp); __bnxt_close_nic(bp, true, false); - bnxt_ulp_irq_stop(bp); bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); bnxt_free_ctx_mem(bp); @@ -10734,13 +10739,13 @@ static void bnxt_fw_reset_task(struct work_struct *work) clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); dev_close(bp->dev); } - bnxt_ulp_irq_restart(bp, rc); - rtnl_unlock(); bp->fw_reset_state = 0; /* Make sure fw_reset_state is 0 before clearing the flag */ smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + bnxt_ulp_start(bp, rc); + rtnl_unlock(); break; } return; -- cgit v1.2.3-59-g8ed1b From 6a68749dbd777b832e1d84265bd6d8b39d1843ac Mon Sep 17 00:00:00 2001 From: Pavan Chebbi Date: Thu, 31 Oct 2019 01:07:51 -0400 Subject: bnxt_en: Call bnxt_ulp_stop()/bnxt_ulp_start() during suspend/resume. Inform the RDMA driver to stop/start during suspend/resume. The RDMA driver needs to stop and start just like error recovery. Signed-off-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a8a2a6f79b6f..477b8d7d94d1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11901,6 +11901,7 @@ static int bnxt_suspend(struct device *device) int rc = 0; rtnl_lock(); + bnxt_ulp_stop(bp); if (netif_running(dev)) { netif_device_detach(dev); rc = bnxt_close(dev); @@ -11934,6 +11935,7 @@ static int bnxt_resume(struct device *device) } resume_exit: + bnxt_ulp_start(bp, rc); rtnl_unlock(); return rc; } -- cgit v1.2.3-59-g8ed1b