diff options
Diffstat (limited to 'drivers/net/ethernet/netronome')
54 files changed, 8098 insertions, 2965 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 9cff3d48acbc..9c0861d03634 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -5,6 +5,7 @@ nfp-objs := \ nfpcore/nfp6000_pcie.o \ nfpcore/nfp_cppcore.o \ nfpcore/nfp_cpplib.o \ + nfpcore/nfp_dev.o \ nfpcore/nfp_hwinfo.o \ nfpcore/nfp_mip.o \ nfpcore/nfp_mutex.o \ @@ -19,18 +20,25 @@ nfp-objs := \ ccm_mbox.o \ devlink_param.o \ nfp_asm.o \ + nfd3/dp.o \ + nfd3/rings.o \ + nfd3/xsk.o \ + nfdk/dp.o \ + nfdk/rings.o \ nfp_app.o \ nfp_app_nic.o \ nfp_devlink.o \ nfp_hwmon.o \ nfp_main.o \ nfp_net_common.o \ + nfp_net_dp.o \ nfp_net_ctrl.o \ nfp_net_debugdump.o \ nfp_net_ethtool.o \ nfp_net_main.o \ nfp_net_repr.o \ nfp_net_sriov.o \ + nfp_net_xsk.o \ nfp_netvf_main.o \ nfp_port.o \ nfp_shared_buf.o \ diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index e31f8fbbc696..df2ab5cbd49b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -4233,7 +4233,7 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) } /* If the chain is ended by an load/store pair then this - * could serve as the new head of the the next chain. + * could serve as the new head of the next chain. */ if (curr_pair_is_memcpy(meta1, meta2)) { head_ld_meta = meta1; diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c index 84d66d138c3d..f80f1a6953fa 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/tls.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c @@ -289,7 +289,7 @@ nfp_net_tls_add(struct net_device *netdev, struct sock *sk, switch (sk->sk_family) { #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - if (sk->sk_ipv6only || + if (ipv6_only_sock(sk) || ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) { req_sz = sizeof(struct nfp_crypto_req_add_v6); ipv6 = true; @@ -474,6 +474,7 @@ int nfp_net_tls_rx_resync_req(struct net_device *netdev, { struct nfp_net *nn = netdev_priv(netdev); struct nfp_net_tls_offload_ctx *ntls; + struct net *net = dev_net(netdev); struct ipv6hdr *ipv6h; struct tcphdr *th; struct iphdr *iph; @@ -494,13 +495,13 @@ int nfp_net_tls_rx_resync_req(struct net_device *netdev, switch (ipv6h->version) { case 4: - sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo, + sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, iph->saddr, th->source, iph->daddr, th->dest, netdev->ifindex); break; #if IS_ENABLED(CONFIG_IPV6) case 6: - sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo, + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->dest), netdev->ifindex, 0); diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index a3242b36e216..2b383d92d7f5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -149,7 +149,7 @@ nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act, } /* Pre_lag action must be first on action list. - * If other actions already exist they need pushed forward. + * If other actions already exist they need to be pushed forward. */ if (act_len) memmove(nfp_flow->action_data + act_size, @@ -220,7 +220,8 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, } output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid); } else if (nfp_flower_internal_port_can_offload(app, out_dev)) { - if (!(priv->flower_ext_feats & NFP_FL_FEATS_PRE_TUN_RULES)) { + if (!(priv->flower_ext_feats & NFP_FL_FEATS_PRE_TUN_RULES) && + !(priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pre-tunnel rules not supported in loaded firmware"); return -EOPNOTSUPP; } @@ -426,6 +427,12 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, return -EOPNOTSUPP; } + if (ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: loaded firmware does not support tunnel flag offload"); + return -EOPNOTSUPP; + } + set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; @@ -435,7 +442,8 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, FIELD_PREP(NFP_FL_PRE_TUN_INDEX, pretun_idx); set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); - set_tun->tun_id = ip_tun->key.tun_id; + if (ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) + set_tun->tun_id = ip_tun->key.tun_id; if (ip_tun->key.ttl) { set_tun->ttl = ip_tun->key.ttl; @@ -473,17 +481,11 @@ nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun, set_tun->ttl = ip4_dst_hoplimit(&rt->dst); ip_rt_put(rt); } else { - set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); } } set_tun->tos = ip_tun->key.tos; - - if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) || - ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload"); - return -EOPNOTSUPP; - } set_tun->tun_flags = ip_tun->key.tun_flags; if (tun_type == NFP_FL_TUNNEL_GENEVE) { @@ -673,9 +675,9 @@ nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask, fl_hl_mask->hop_limit; break; case round_down(offsetof(struct ipv6hdr, flow_lbl), 4): - if (mask & ~IPV6_FLOW_LABEL_MASK || - exact & ~IPV6_FLOW_LABEL_MASK) { - NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 flow label action"); + if (mask & ~IPV6_FLOWINFO_MASK || + exact & ~IPV6_FLOWINFO_MASK) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 flow info action"); return -EOPNOTSUPP; } @@ -922,6 +924,51 @@ nfp_fl_pedit(const struct flow_action_entry *act, } } +static struct nfp_fl_meter *nfp_fl_meter(char *act_data) +{ + size_t act_size = sizeof(struct nfp_fl_meter); + struct nfp_fl_meter *meter_act; + + meter_act = (struct nfp_fl_meter *)act_data; + + memset(meter_act, 0, act_size); + + meter_act->head.jump_id = NFP_FL_ACTION_OPCODE_METER; + meter_act->head.len_lw = act_size >> NFP_FL_LW_SIZ; + + return meter_act; +} + +static int +nfp_flower_meter_action(struct nfp_app *app, + const struct flow_action_entry *action, + struct nfp_fl_payload *nfp_fl, int *a_len, + struct net_device *netdev, + struct netlink_ext_ack *extack) +{ + struct nfp_fl_meter *fl_meter; + u32 meter_id; + + if (*a_len + sizeof(struct nfp_fl_meter) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload:meter action size beyond the allowed maximum"); + return -EOPNOTSUPP; + } + + meter_id = action->hw_index; + if (!nfp_flower_search_meter_entry(app, meter_id)) { + NL_SET_ERR_MSG_MOD(extack, + "can not offload flow table with unsupported police action."); + return -EOPNOTSUPP; + } + + fl_meter = nfp_fl_meter(&nfp_fl->action_data[*a_len]); + *a_len += sizeof(struct nfp_fl_meter); + fl_meter->meter_id = cpu_to_be32(meter_id); + + return 0; +} + static int nfp_flower_output_action(struct nfp_app *app, const struct flow_action_entry *act, @@ -985,6 +1032,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, struct nfp_flower_pedit_acts *set_act, bool *pkt_host, struct netlink_ext_ack *extack, int act_idx) { + struct nfp_flower_priv *fl_priv = app->priv; struct nfp_fl_pre_tunnel *pre_tun; struct nfp_fl_set_tun *set_tun; struct nfp_fl_push_vlan *psh_v; @@ -1149,6 +1197,18 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, *pkt_host = true; break; + case FLOW_ACTION_POLICE: + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_METER)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: unsupported police action in action list"); + return -EOPNOTSUPP; + } + + err = nfp_flower_meter_action(app, act, nfp_fl, a_len, netdev, + extack); + if (err) + return err; + break; default: /* Currently we do not handle any other actions. */ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported action in action list"); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 784292b16290..2df2af1da716 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -85,6 +85,7 @@ #define NFP_FL_ACTION_OPCODE_SET_TCP 15 #define NFP_FL_ACTION_OPCODE_PRE_LAG 16 #define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_METER 24 #define NFP_FL_ACTION_OPCODE_PUSH_GENEVE 26 #define NFP_FL_ACTION_OPCODE_NUM 32 @@ -95,8 +96,6 @@ #define NFP_FL_PUSH_VLAN_PRIO GENMASK(15, 13) #define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0) -#define IPV6_FLOW_LABEL_MASK cpu_to_be32(0x000fffff) - /* LAG ports */ #define NFP_FL_LAG_OUT 0xC0DE0000 @@ -260,6 +259,12 @@ struct nfp_fl_set_mpls { __be32 lse; }; +struct nfp_fl_meter { + struct nfp_fl_act_head head; + __be16 reserved; + __be32 meter_id; +}; + /* Metadata with L2 (1W/4B) * ---------------------------------------------------------------- * 3 2 1 @@ -723,6 +728,8 @@ static inline bool nfp_fl_is_netdev_to_offload(struct net_device *netdev) return true; if (netif_is_gretap(netdev)) return true; + if (netif_is_ip6gretap(netdev)) + return true; return false; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c index bfd7d1c35076..f693119541d5 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.c +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2021 Corigine, Inc. */ +#include <net/tc_act/tc_csum.h> +#include <net/tc_act/tc_ct.h> + #include "conntrack.h" #include "../nfp_port.h" @@ -56,9 +59,17 @@ bool is_pre_ct_flow(struct flow_cls_offload *flow) int i; flow_action_for_each(i, act, &flow->rule->action) { - if (act->id == FLOW_ACTION_CT && !act->ct.action) - return true; + if (act->id == FLOW_ACTION_CT) { + /* The pre_ct rule only have the ct or ct nat action, cannot + * contains other ct action e.g ct commit and so on. + */ + if ((!act->ct.action || act->ct.action == TCA_CT_ACT_NAT)) + return true; + else + return false; + } } + return false; } @@ -66,24 +77,173 @@ bool is_post_ct_flow(struct flow_cls_offload *flow) { struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct flow_dissector *dissector = rule->match.dissector; + struct flow_action_entry *act; + bool exist_ct_clear = false; struct flow_match_ct ct; + int i; + + /* post ct entry cannot contains any ct action except ct_clear. */ + flow_action_for_each(i, act, &flow->rule->action) { + if (act->id == FLOW_ACTION_CT) { + /* ignore ct clear action. */ + if (act->ct.action == TCA_CT_ACT_CLEAR) { + exist_ct_clear = true; + continue; + } + + return false; + } + } if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) { flow_rule_match_ct(rule, &ct); if (ct.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) return true; + } else { + /* when do nat with ct, the post ct entry ignore the ct status, + * will match the nat field(sip/dip) instead. In this situation, + * the flow chain index is not zero and contains ct clear action. + */ + if (flow->common.chain_index && exist_ct_clear) + return true; } + return false; } +/** + * get_mangled_key() - Mangle the key if mangle act exists + * @rule: rule that carries the actions + * @buf: pointer to key to be mangled + * @offset: used to adjust mangled offset in L2/L3/L4 header + * @key_sz: key size + * @htype: mangling type + * + * Returns buf where the mangled key stores. + */ +static void *get_mangled_key(struct flow_rule *rule, void *buf, + u32 offset, size_t key_sz, + enum flow_action_mangle_base htype) +{ + struct flow_action_entry *act; + u32 *val = (u32 *)buf; + u32 off, msk, key; + int i; + + flow_action_for_each(i, act, &rule->action) { + if (act->id == FLOW_ACTION_MANGLE && + act->mangle.htype == htype) { + off = act->mangle.offset - offset; + msk = act->mangle.mask; + key = act->mangle.val; + + /* Mangling is supposed to be u32 aligned */ + if (off % 4 || off >= key_sz) + continue; + + val[off >> 2] &= msk; + val[off >> 2] |= key; + } + } + + return buf; +} + +/* Only tos and ttl are involved in flow_match_ip structure, which + * doesn't conform to the layout of ip/ipv6 header definition. So + * they need particular process here: fill them into the ip/ipv6 + * header, so that mangling actions can work directly. + */ +#define NFP_IPV4_TOS_MASK GENMASK(23, 16) +#define NFP_IPV4_TTL_MASK GENMASK(31, 24) +#define NFP_IPV6_TCLASS_MASK GENMASK(27, 20) +#define NFP_IPV6_HLIMIT_MASK GENMASK(7, 0) +static void *get_mangled_tos_ttl(struct flow_rule *rule, void *buf, + bool is_v6) +{ + struct flow_match_ip match; + /* IPv4's ttl field is in third dword. */ + __be32 ip_hdr[3]; + u32 tmp, hdr_len; + + flow_rule_match_ip(rule, &match); + + if (is_v6) { + tmp = FIELD_PREP(NFP_IPV6_TCLASS_MASK, match.key->tos); + ip_hdr[0] = cpu_to_be32(tmp); + tmp = FIELD_PREP(NFP_IPV6_HLIMIT_MASK, match.key->ttl); + ip_hdr[1] = cpu_to_be32(tmp); + hdr_len = 2 * sizeof(__be32); + } else { + tmp = FIELD_PREP(NFP_IPV4_TOS_MASK, match.key->tos); + ip_hdr[0] = cpu_to_be32(tmp); + tmp = FIELD_PREP(NFP_IPV4_TTL_MASK, match.key->ttl); + ip_hdr[2] = cpu_to_be32(tmp); + hdr_len = 3 * sizeof(__be32); + } + + get_mangled_key(rule, ip_hdr, 0, hdr_len, + is_v6 ? FLOW_ACT_MANGLE_HDR_TYPE_IP6 : + FLOW_ACT_MANGLE_HDR_TYPE_IP4); + + match.key = buf; + + if (is_v6) { + tmp = be32_to_cpu(ip_hdr[0]); + match.key->tos = FIELD_GET(NFP_IPV6_TCLASS_MASK, tmp); + tmp = be32_to_cpu(ip_hdr[1]); + match.key->ttl = FIELD_GET(NFP_IPV6_HLIMIT_MASK, tmp); + } else { + tmp = be32_to_cpu(ip_hdr[0]); + match.key->tos = FIELD_GET(NFP_IPV4_TOS_MASK, tmp); + tmp = be32_to_cpu(ip_hdr[2]); + match.key->ttl = FIELD_GET(NFP_IPV4_TTL_MASK, tmp); + } + + return buf; +} + +/* Note entry1 and entry2 are not swappable. only skip ip and + * tport merge check for pre_ct and post_ct when pre_ct do nat. + */ +static bool nfp_ct_merge_check_cannot_skip(struct nfp_fl_ct_flow_entry *entry1, + struct nfp_fl_ct_flow_entry *entry2) +{ + /* only pre_ct have NFP_FL_ACTION_DO_NAT flag. */ + if ((entry1->flags & NFP_FL_ACTION_DO_NAT) && + entry2->type == CT_TYPE_POST_CT) + return false; + + return true; +} + +/* Note entry1 and entry2 are not swappable, entry1 should be + * the former flow whose mangle action need be taken into account + * if existed, and entry2 should be the latter flow whose action + * we don't care. + */ static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1, struct nfp_fl_ct_flow_entry *entry2) { unsigned int ovlp_keys = entry1->rule->match.dissector->used_keys & entry2->rule->match.dissector->used_keys; - bool out; + bool out, is_v6 = false; + u8 ip_proto = 0; + /* Temporary buffer for mangling keys, 64 is enough to cover max + * struct size of key in various fields that may be mangled. + * Supported fields to mangle: + * mac_src/mac_dst(struct flow_match_eth_addrs, 12B) + * nw_tos/nw_ttl(struct flow_match_ip, 2B) + * nw_src/nw_dst(struct flow_match_ipv4/6_addrs, 32B) + * tp_src/tp_dst(struct flow_match_ports, 4B) + */ + char buf[64]; + + if (entry1->netdev && entry2->netdev && + entry1->netdev != entry2->netdev) + return -EINVAL; - /* check the overlapped fields one by one, the unmasked part + /* Check the overlapped fields one by one, the unmasked part * should not conflict with each other. */ if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) { @@ -101,36 +261,84 @@ static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1, flow_rule_match_basic(entry1->rule, &match1); flow_rule_match_basic(entry2->rule, &match2); + + /* n_proto field is a must in ct-related flows, + * it should be either ipv4 or ipv6. + */ + is_v6 = match1.key->n_proto == htons(ETH_P_IPV6); + /* ip_proto field is a must when port field is cared */ + ip_proto = match1.key->ip_proto; + COMPARE_UNMASKED_FIELDS(match1, match2, &out); if (out) goto check_failed; } - if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { + /* if pre ct entry do nat, the nat ip exists in nft entry, + * will be do merge check when do nft and post ct merge, + * so skip this ip merge check here. + */ + if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS)) && + nfp_ct_merge_check_cannot_skip(entry1, entry2)) { struct flow_match_ipv4_addrs match1, match2; flow_rule_match_ipv4_addrs(entry1->rule, &match1); flow_rule_match_ipv4_addrs(entry2->rule, &match2); + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, + offsetof(struct iphdr, saddr), + sizeof(*match1.key), + FLOW_ACT_MANGLE_HDR_TYPE_IP4); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); if (out) goto check_failed; } - if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { + /* if pre ct entry do nat, the nat ip exists in nft entry, + * will be do merge check when do nft and post ct merge, + * so skip this ip merge check here. + */ + if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS)) && + nfp_ct_merge_check_cannot_skip(entry1, entry2)) { struct flow_match_ipv6_addrs match1, match2; flow_rule_match_ipv6_addrs(entry1->rule, &match1); flow_rule_match_ipv6_addrs(entry2->rule, &match2); + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, + offsetof(struct ipv6hdr, saddr), + sizeof(*match1.key), + FLOW_ACT_MANGLE_HDR_TYPE_IP6); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); if (out) goto check_failed; } - if (ovlp_keys & BIT(FLOW_DISSECTOR_KEY_PORTS)) { + /* if pre ct entry do nat, the nat tport exists in nft entry, + * will be do merge check when do nft and post ct merge, + * so skip this tport merge check here. + */ + if ((ovlp_keys & BIT(FLOW_DISSECTOR_KEY_PORTS)) && + nfp_ct_merge_check_cannot_skip(entry1, entry2)) { + enum flow_action_mangle_base htype = FLOW_ACT_MANGLE_UNSPEC; struct flow_match_ports match1, match2; flow_rule_match_ports(entry1->rule, &match1); flow_rule_match_ports(entry2->rule, &match2); + + if (ip_proto == IPPROTO_UDP) + htype = FLOW_ACT_MANGLE_HDR_TYPE_UDP; + else if (ip_proto == IPPROTO_TCP) + htype = FLOW_ACT_MANGLE_HDR_TYPE_TCP; + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, 0, + sizeof(*match1.key), htype); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); if (out) goto check_failed; @@ -141,6 +349,12 @@ static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1, flow_rule_match_eth_addrs(entry1->rule, &match1); flow_rule_match_eth_addrs(entry2->rule, &match2); + + memcpy(buf, match1.key, sizeof(*match1.key)); + match1.key = get_mangled_key(entry1->rule, buf, 0, + sizeof(*match1.key), + FLOW_ACT_MANGLE_HDR_TYPE_ETH); + COMPARE_UNMASKED_FIELDS(match1, match2, &out); if (out) goto check_failed; @@ -181,6 +395,8 @@ static int nfp_ct_merge_check(struct nfp_fl_ct_flow_entry *entry1, flow_rule_match_ip(entry1->rule, &match1); flow_rule_match_ip(entry2->rule, &match2); + + match1.key = get_mangled_tos_ttl(entry1->rule, buf, is_v6); COMPARE_UNMASKED_FIELDS(match1, match2, &out); if (out) goto check_failed; @@ -252,77 +468,46 @@ check_failed: return -EINVAL; } -static int nfp_ct_check_mangle_merge(struct flow_action_entry *a_in, - struct flow_rule *rule) +static int nfp_ct_check_vlan_merge(struct flow_action_entry *a_in, + struct flow_rule *rule) { - enum flow_action_mangle_base htype = a_in->mangle.htype; - u32 offset = a_in->mangle.offset; + struct flow_match_vlan match; + + if (unlikely(flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN))) + return -EOPNOTSUPP; + + /* post_ct does not match VLAN KEY, can be merged. */ + if (likely(!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN))) + return 0; + + switch (a_in->id) { + /* pre_ct has pop vlan, post_ct cannot match VLAN KEY, cannot be merged. */ + case FLOW_ACTION_VLAN_POP: + return -EOPNOTSUPP; - switch (htype) { - case FLOW_ACT_MANGLE_HDR_TYPE_ETH: - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_MANGLE: + flow_rule_match_vlan(rule, &match); + /* different vlan id, cannot be merged. */ + if ((match.key->vlan_id & match.mask->vlan_id) ^ + (a_in->vlan.vid & match.mask->vlan_id)) return -EOPNOTSUPP; - break; - case FLOW_ACT_MANGLE_HDR_TYPE_IP4: - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { - struct flow_match_ip match; - - flow_rule_match_ip(rule, &match); - if (offset == offsetof(struct iphdr, ttl) && - match.mask->ttl) - return -EOPNOTSUPP; - if (offset == round_down(offsetof(struct iphdr, tos), 4) && - match.mask->tos) - return -EOPNOTSUPP; - } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { - struct flow_match_ipv4_addrs match; - - flow_rule_match_ipv4_addrs(rule, &match); - if (offset == offsetof(struct iphdr, saddr) && - match.mask->src) - return -EOPNOTSUPP; - if (offset == offsetof(struct iphdr, daddr) && - match.mask->dst) - return -EOPNOTSUPP; - } - break; - case FLOW_ACT_MANGLE_HDR_TYPE_IP6: - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { - struct flow_match_ip match; - - flow_rule_match_ip(rule, &match); - if (offset == round_down(offsetof(struct ipv6hdr, hop_limit), 4) && - match.mask->ttl) - return -EOPNOTSUPP; - /* for ipv6, tos and flow_lbl are in the same word */ - if (offset == round_down(offsetof(struct ipv6hdr, flow_lbl), 4) && - match.mask->tos) - return -EOPNOTSUPP; - } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { - struct flow_match_ipv6_addrs match; - - flow_rule_match_ipv6_addrs(rule, &match); - if (offset >= offsetof(struct ipv6hdr, saddr) && - offset < offsetof(struct ipv6hdr, daddr) && - memchr_inv(&match.mask->src, 0, sizeof(match.mask->src))) - return -EOPNOTSUPP; - if (offset >= offsetof(struct ipv6hdr, daddr) && - offset < sizeof(struct ipv6hdr) && - memchr_inv(&match.mask->dst, 0, sizeof(match.mask->dst))) - return -EOPNOTSUPP; - } - break; - case FLOW_ACT_MANGLE_HDR_TYPE_TCP: - case FLOW_ACT_MANGLE_HDR_TYPE_UDP: - /* currently only can modify ports */ - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) + + /* different tpid, cannot be merged. */ + if ((match.key->vlan_tpid & match.mask->vlan_tpid) ^ + (a_in->vlan.proto & match.mask->vlan_tpid)) return -EOPNOTSUPP; + + /* different priority, cannot be merged. */ + if ((match.key->vlan_priority & match.mask->vlan_priority) ^ + (a_in->vlan.prio & match.mask->vlan_priority)) + return -EOPNOTSUPP; + break; default: - break; + return -EOPNOTSUPP; } + return 0; } @@ -331,22 +516,18 @@ static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry, struct nfp_fl_ct_flow_entry *nft_entry) { struct flow_action_entry *act; - int err, i; + int i, err; /* Check for pre_ct->action conflicts */ flow_action_for_each(i, act, &pre_ct_entry->rule->action) { switch (act->id) { - case FLOW_ACTION_MANGLE: - err = nfp_ct_check_mangle_merge(act, nft_entry->rule); - if (err) - return err; - err = nfp_ct_check_mangle_merge(act, post_ct_entry->rule); - if (err) - return err; - break; case FLOW_ACTION_VLAN_PUSH: case FLOW_ACTION_VLAN_POP: case FLOW_ACTION_VLAN_MANGLE: + err = nfp_ct_check_vlan_merge(act, post_ct_entry->rule); + if (err) + return err; + break; case FLOW_ACTION_MPLS_PUSH: case FLOW_ACTION_MPLS_POP: case FLOW_ACTION_MPLS_MANGLE: @@ -359,11 +540,6 @@ static int nfp_ct_merge_act_check(struct nfp_fl_ct_flow_entry *pre_ct_entry, /* Check for nft->action conflicts */ flow_action_for_each(i, act, &nft_entry->rule->action) { switch (act->id) { - case FLOW_ACTION_MANGLE: - err = nfp_ct_check_mangle_merge(act, post_ct_entry->rule); - if (err) - return err; - break; case FLOW_ACTION_VLAN_PUSH: case FLOW_ACTION_VLAN_POP: case FLOW_ACTION_VLAN_MANGLE: @@ -403,6 +579,12 @@ static int nfp_ct_check_meta(struct nfp_fl_ct_flow_entry *post_ct_entry, return -EINVAL; return 0; + } else { + /* post_ct with ct clear action will not match the + * ct status when nft is nat entry. + */ + if (nft_entry->flags & NFP_FL_ACTION_DO_MANGLE) + return 0; } return -EINVAL; @@ -442,6 +624,11 @@ nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map) key_size += sizeof(struct nfp_flower_ipv6); } + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) { + map[FLOW_PAY_QINQ] = key_size; + key_size += sizeof(struct nfp_flower_vlan); + } + if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) { map[FLOW_PAY_GRE] = key_size; if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) @@ -450,11 +637,6 @@ nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map) key_size += sizeof(struct nfp_flower_ipv4_gre_tun); } - if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) { - map[FLOW_PAY_QINQ] = key_size; - key_size += sizeof(struct nfp_flower_vlan); - } - if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) || (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) { map[FLOW_PAY_UDP_TUN] = key_size; @@ -472,11 +654,37 @@ nfp_fl_calc_key_layers_sz(struct nfp_fl_key_ls in_key_ls, uint16_t *map) return key_size; } +/* get the csum flag according the ip proto and mangle action. */ +static void nfp_fl_get_csum_flag(struct flow_action_entry *a_in, u8 ip_proto, u32 *csum) +{ + if (a_in->id != FLOW_ACTION_MANGLE) + return; + + switch (a_in->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + *csum |= TCA_CSUM_UPDATE_FLAG_IPV4HDR; + if (ip_proto == IPPROTO_TCP) + *csum |= TCA_CSUM_UPDATE_FLAG_TCP; + else if (ip_proto == IPPROTO_UDP) + *csum |= TCA_CSUM_UPDATE_FLAG_UDP; + break; + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + *csum |= TCA_CSUM_UPDATE_FLAG_TCP; + break; + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + *csum |= TCA_CSUM_UPDATE_FLAG_UDP; + break; + default: + break; + } +} + static int nfp_fl_merge_actions_offload(struct flow_rule **rules, struct nfp_flower_priv *priv, struct net_device *netdev, struct nfp_fl_payload *flow_pay) { + enum flow_action_hw_stats tmp_stats = FLOW_ACTION_HW_STATS_DONT_CARE; struct flow_action_entry *a_in; int i, j, num_actions, id; struct flow_rule *a_rule; @@ -486,19 +694,29 @@ static int nfp_fl_merge_actions_offload(struct flow_rule **rules, rules[CT_TYPE_NFT]->action.num_entries + rules[CT_TYPE_POST_CT]->action.num_entries; - a_rule = flow_rule_alloc(num_actions); + /* Add one action to make sure there is enough room to add an checksum action + * when do nat. + */ + a_rule = flow_rule_alloc(num_actions + 1); if (!a_rule) return -ENOMEM; /* Actions need a BASIC dissector. */ a_rule->match = rules[CT_TYPE_PRE_CT]->match; + /* post_ct entry have one action at least. */ + if (rules[CT_TYPE_POST_CT]->action.num_entries != 0) { + tmp_stats = rules[CT_TYPE_POST_CT]->action.entries[0].hw_stats; + } /* Copy actions */ for (j = 0; j < _CT_TYPE_MAX; j++) { + u32 csum_updated = 0; + u8 ip_proto = 0; + if (flow_rule_match_key(rules[j], FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; - /* ip_proto is the only field that needed in later compile_action, + /* ip_proto is the only field that is needed in later compile_action, * needed to set the correct checksum flags. It doesn't really matter * which input rule's ip_proto field we take as the earlier merge checks * would have made sure that they don't conflict. We do not know which @@ -506,8 +724,10 @@ static int nfp_fl_merge_actions_offload(struct flow_rule **rules, * through the subflows and assign the proper subflow to a_rule */ flow_rule_match_basic(rules[j], &match); - if (match.mask->ip_proto) + if (match.mask->ip_proto) { a_rule->match = rules[j]->match; + ip_proto = match.key->ip_proto; + } } for (i = 0; i < rules[j]->action.num_entries; i++) { @@ -524,11 +744,32 @@ static int nfp_fl_merge_actions_offload(struct flow_rule **rules, case FLOW_ACTION_CT_METADATA: continue; default: + /* nft entry is generated by tc ct, which mangle action do not care + * the stats, inherit the post entry stats to meet the + * flow_action_hw_stats_check. + */ + if (j == CT_TYPE_NFT) { + if (a_in->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE) + a_in->hw_stats = tmp_stats; + nfp_fl_get_csum_flag(a_in, ip_proto, &csum_updated); + } memcpy(&a_rule->action.entries[offset++], a_in, sizeof(struct flow_action_entry)); break; } } + /* nft entry have mangle action, but do not have checksum action when do NAT, + * hardware will automatically fix IPv4 and TCP/UDP checksum. so add an csum action + * to meet csum action check. + */ + if (csum_updated) { + struct flow_action_entry *csum_action; + + csum_action = &a_rule->action.entries[offset++]; + csum_action->id = FLOW_ACTION_CSUM; + csum_action->csum_flags = csum_updated; + csum_action->hw_stats = tmp_stats; + } } /* Some actions would have been ignored, so update the num_entries field */ @@ -693,6 +934,17 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) } } + if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) { + offset = key_map[FLOW_PAY_QINQ]; + key = kdata + offset; + msk = mdata + offset; + for (i = 0; i < _CT_TYPE_MAX; i++) { + nfp_flower_compile_vlan((struct nfp_flower_vlan *)key, + (struct nfp_flower_vlan *)msk, + rules[i]); + } + } + if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) { offset = key_map[FLOW_PAY_GRE]; key = kdata + offset; @@ -733,17 +985,6 @@ static int nfp_fl_ct_add_offload(struct nfp_fl_nft_tc_merge *m_entry) } } - if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) { - offset = key_map[FLOW_PAY_QINQ]; - key = kdata + offset; - msk = mdata + offset; - for (i = 0; i < _CT_TYPE_MAX; i++) { - nfp_flower_compile_vlan((struct nfp_flower_vlan *)key, - (struct nfp_flower_vlan *)msk, - rules[i]); - } - } - if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN || key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { offset = key_map[FLOW_PAY_UDP_TUN]; @@ -914,13 +1155,13 @@ static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt, /* Check that the two tc flows are also compatible with * the nft entry. No need to check the pre_ct and post_ct * entries as that was already done during pre_merge. - * The nft entry does not have a netdev or chain populated, so + * The nft entry does not have a chain populated, so * skip this check. */ err = nfp_ct_merge_check(pre_ct_entry, nft_entry); if (err) return err; - err = nfp_ct_merge_check(post_ct_entry, nft_entry); + err = nfp_ct_merge_check(nft_entry, post_ct_entry); if (err) return err; err = nfp_ct_check_meta(post_ct_entry, nft_entry); @@ -948,7 +1189,7 @@ static int nfp_ct_do_nft_merge(struct nfp_fl_ct_zone_entry *zt, nft_m_entry->tc_m_parent = tc_m_entry; nft_m_entry->nft_parent = nft_entry; nft_m_entry->tc_flower_cookie = 0; - /* Copy the netdev from one the pre_ct entry. When the tc_m_entry was created + /* Copy the netdev from the pre_ct entry. When the tc_m_entry was created * it only combined them if the netdevs were the same, so can use any of them. */ nft_m_entry->netdev = pre_ct_entry->netdev; @@ -999,15 +1240,13 @@ static int nfp_ct_do_tc_merge(struct nfp_fl_ct_zone_entry *zt, pre_ct_entry = ct_entry2; } - if (post_ct_entry->netdev != pre_ct_entry->netdev) - return -EINVAL; /* Checks that the chain_index of the filter matches the * chain_index of the GOTO action. */ if (post_ct_entry->chain_index != pre_ct_entry->chain_index) return -EINVAL; - err = nfp_ct_merge_check(post_ct_entry, pre_ct_entry); + err = nfp_ct_merge_check(pre_ct_entry, post_ct_entry); if (err) return err; @@ -1080,7 +1319,7 @@ nfp_fl_ct_zone_entry *get_nfp_zone_entry(struct nfp_flower_priv *priv, zt->priv = priv; zt->nft = NULL; - /* init the various hash tables and lists*/ + /* init the various hash tables and lists */ INIT_LIST_HEAD(&zt->pre_ct_list); INIT_LIST_HEAD(&zt->post_ct_list); INIT_LIST_HEAD(&zt->nft_flows_list); @@ -1114,6 +1353,63 @@ err_tc_merge_tb_init: return ERR_PTR(err); } +static struct net_device *get_netdev_from_rule(struct flow_rule *rule) +{ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) { + struct flow_match_meta match; + + flow_rule_match_meta(rule, &match); + if (match.key->ingress_ifindex & match.mask->ingress_ifindex) + return __dev_get_by_index(&init_net, + match.key->ingress_ifindex); + } + + return NULL; +} + +static void nfp_nft_ct_translate_mangle_action(struct flow_action_entry *mangle_action) +{ + if (mangle_action->id != FLOW_ACTION_MANGLE) + return; + + switch (mangle_action->mangle.htype) { + case FLOW_ACT_MANGLE_HDR_TYPE_IP4: + case FLOW_ACT_MANGLE_HDR_TYPE_IP6: + mangle_action->mangle.val = (__force u32)cpu_to_be32(mangle_action->mangle.val); + mangle_action->mangle.mask = (__force u32)cpu_to_be32(mangle_action->mangle.mask); + return; + + case FLOW_ACT_MANGLE_HDR_TYPE_TCP: + case FLOW_ACT_MANGLE_HDR_TYPE_UDP: + mangle_action->mangle.val = (__force u16)cpu_to_be16(mangle_action->mangle.val); + mangle_action->mangle.mask = (__force u16)cpu_to_be16(mangle_action->mangle.mask); + return; + + default: + return; + } +} + +static int nfp_nft_ct_set_flow_flag(struct flow_action_entry *act, + struct nfp_fl_ct_flow_entry *entry) +{ + switch (act->id) { + case FLOW_ACTION_CT: + if (act->ct.action == TCA_CT_ACT_NAT) + entry->flags |= NFP_FL_ACTION_DO_NAT; + break; + + case FLOW_ACTION_MANGLE: + entry->flags |= NFP_FL_ACTION_DO_MANGLE; + break; + + default: + break; + } + + return 0; +} + static struct nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt, struct net_device *netdev, @@ -1154,6 +1450,9 @@ nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt, entry->rule->match.dissector = &nft_match->dissector; entry->rule->match.mask = &nft_match->mask; entry->rule->match.key = &nft_match->key; + + if (!netdev) + netdev = get_netdev_from_rule(entry->rule); } else { entry->rule->match.dissector = flow->rule->match.dissector; entry->rule->match.mask = flow->rule->match.mask; @@ -1177,6 +1476,13 @@ nfp_fl_ct_flow_entry *nfp_fl_ct_add_flow(struct nfp_fl_ct_zone_entry *zt, new_act = &entry->rule->action.entries[i]; memcpy(new_act, act, sizeof(struct flow_action_entry)); + /* nft entry mangle field is host byte order, need translate to + * network byte order. + */ + if (is_nft) + nfp_nft_ct_translate_mangle_action(new_act); + + nfp_nft_ct_set_flow_flag(new_act, entry); /* Entunnel is a special case, need to allocate and copy * tunnel info. */ @@ -1266,7 +1572,7 @@ static void nfp_free_nft_merge_children(void *entry, bool is_nft_flow) */ if (is_nft_flow) { - /* Need to iterate through list of nft_flow entries*/ + /* Need to iterate through list of nft_flow entries */ struct nfp_fl_ct_flow_entry *ct_entry = entry; list_for_each_entry_safe(m_entry, tmp, &ct_entry->children, @@ -1274,7 +1580,7 @@ static void nfp_free_nft_merge_children(void *entry, bool is_nft_flow) cleanup_nft_merge_entry(m_entry); } } else { - /* Need to iterate through list of tc_merged_flow entries*/ + /* Need to iterate through list of tc_merged_flow entries */ struct nfp_fl_ct_tc_merge *ct_entry = entry; list_for_each_entry_safe(m_entry, tmp, &ct_entry->children, diff --git a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h index beb6cceff9d8..762c0b36e269 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/conntrack.h +++ b/drivers/net/ethernet/netronome/nfp/flower/conntrack.h @@ -103,6 +103,10 @@ enum nfp_nfp_layer_name { _FLOW_PAY_LAYERS_MAX }; +/* NFP flow entry flags. */ +#define NFP_FL_ACTION_DO_NAT BIT(0) +#define NFP_FL_ACTION_DO_MANGLE BIT(1) + /** * struct nfp_fl_ct_flow_entry - Flow entry containing conntrack flow information * @cookie: Flow cookie, same as original TC flow, used as key @@ -115,6 +119,7 @@ enum nfp_nfp_layer_name { * @rule: Reference to the original TC flow rule * @stats: Used to cache stats for updating * @tun_offset: Used to indicate tunnel action offset in action list + * @flags: Used to indicate flow flag like NAT which used by merge. */ struct nfp_fl_ct_flow_entry { unsigned long cookie; @@ -127,6 +132,7 @@ struct nfp_fl_ct_flow_entry { struct flow_rule *rule; struct flow_stats stats; u8 tun_offset; // Set to NFP_FL_CT_NO_TUN if no tun + u8 flags; }; /** diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c index 63907aeb3884..e92860e20a24 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -234,7 +234,7 @@ nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group, } /* To signal the end of a batch, both the switch and last flags are set - * and the the reserved SYNC group ID is used. + * and the reserved SYNC group ID is used. */ if (*batch == NFP_FL_LAG_BATCH_FINISHED) { flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST; @@ -576,7 +576,7 @@ nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag, group->dirty = true; group->slave_cnt = slave_count; - /* Group may have been on queue for removal but is now offfloable. */ + /* Group may have been on queue for removal but is now offloadable. */ group->to_remove = false; mutex_unlock(&lag->lock); diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index ac1dcfa1d179..4d960a9641b3 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -266,7 +266,7 @@ nfp_flower_reprs_reify(struct nfp_app *app, enum nfp_repr_type type, int i, err, count = 0; reprs = rcu_dereference_protected(app->reprs[type], - lockdep_is_held(&app->pf->lock)); + nfp_app_is_locked(app)); if (!reprs) return 0; @@ -295,7 +295,7 @@ nfp_flower_wait_repr_reify(struct nfp_app *app, atomic_t *replies, int tot_repl) if (!tot_repl) return 0; - lockdep_assert_held(&app->pf->lock); + assert_nfp_app_locked(app); if (!wait_event_timeout(priv->reify_wait_queue, atomic_read(replies) >= tot_repl, NFP_FL_REPLY_TIMEOUT)) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 917c450a7aad..cb799d18682d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -12,7 +12,9 @@ #include <linux/rhashtable.h> #include <linux/time64.h> #include <linux/types.h> +#include <net/flow_offload.h> #include <net/pkt_cls.h> +#include <net/pkt_sched.h> #include <net/tcp.h> #include <linux/workqueue.h> #include <linux/idr.h> @@ -48,6 +50,8 @@ struct nfp_app; #define NFP_FL_FEATS_IPV6_TUN BIT(7) #define NFP_FL_FEATS_VLAN_QINQ BIT(8) #define NFP_FL_FEATS_QOS_PPS BIT(9) +#define NFP_FL_FEATS_QOS_METER BIT(10) +#define NFP_FL_FEATS_DECAP_V2 BIT(11) #define NFP_FL_FEATS_HOST_ACK BIT(31) #define NFP_FL_ENABLE_FLOW_MERGE BIT(0) @@ -63,7 +67,9 @@ struct nfp_app; NFP_FL_FEATS_PRE_TUN_RULES | \ NFP_FL_FEATS_IPV6_TUN | \ NFP_FL_FEATS_VLAN_QINQ | \ - NFP_FL_FEATS_QOS_PPS) + NFP_FL_FEATS_QOS_PPS | \ + NFP_FL_FEATS_QOS_METER | \ + NFP_FL_FEATS_DECAP_V2) struct nfp_fl_mask_id { struct circ_buf mask_id_free_list; @@ -82,12 +88,8 @@ struct nfp_fl_stats_id { * @offloaded_macs: Hashtable of the offloaded MAC addresses * @ipv4_off_list: List of IPv4 addresses to offload * @ipv6_off_list: List of IPv6 addresses to offload - * @neigh_off_list_v4: List of IPv4 neighbour offloads - * @neigh_off_list_v6: List of IPv6 neighbour offloads * @ipv4_off_lock: Lock for the IPv4 address list * @ipv6_off_lock: Lock for the IPv6 address list - * @neigh_off_lock_v4: Lock for the IPv4 neighbour address list - * @neigh_off_lock_v6: Lock for the IPv6 neighbour address list * @mac_off_ids: IDA to manage id assignment for offloaded MACs * @neigh_nb: Notifier to monitor neighbour state */ @@ -95,17 +97,95 @@ struct nfp_fl_tunnel_offloads { struct rhashtable offloaded_macs; struct list_head ipv4_off_list; struct list_head ipv6_off_list; - struct list_head neigh_off_list_v4; - struct list_head neigh_off_list_v6; struct mutex ipv4_off_lock; struct mutex ipv6_off_lock; - spinlock_t neigh_off_lock_v4; - spinlock_t neigh_off_lock_v6; struct ida mac_off_ids; struct notifier_block neigh_nb; }; /** + * struct nfp_tun_neigh - basic neighbour data + * @dst_addr: Destination MAC address + * @src_addr: Source MAC address + * @port_id: NFP port to output packet on - associated with source IPv4 + */ +struct nfp_tun_neigh { + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 port_id; +}; + +/** + * struct nfp_tun_neigh_ext - extended neighbour data + * @vlan_tpid: VLAN_TPID match field + * @vlan_tci: VLAN_TCI match field + * @host_ctx: Host context ID to be saved here + */ +struct nfp_tun_neigh_ext { + __be16 vlan_tpid; + __be16 vlan_tci; + __be32 host_ctx; +}; + +/** + * struct nfp_tun_neigh_v4 - neighbour/route entry on the NFP for IPv4 + * @dst_ipv4: Destination IPv4 address + * @src_ipv4: Source IPv4 address + * @common: Neighbour/route common info + * @ext: Neighbour/route extended info + */ +struct nfp_tun_neigh_v4 { + __be32 dst_ipv4; + __be32 src_ipv4; + struct nfp_tun_neigh common; + struct nfp_tun_neigh_ext ext; +}; + +/** + * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP for IPv6 + * @dst_ipv6: Destination IPv6 address + * @src_ipv6: Source IPv6 address + * @common: Neighbour/route common info + * @ext: Neighbour/route extended info + */ +struct nfp_tun_neigh_v6 { + struct in6_addr dst_ipv6; + struct in6_addr src_ipv6; + struct nfp_tun_neigh common; + struct nfp_tun_neigh_ext ext; +}; + +/** + * struct nfp_neigh_entry + * @neigh_cookie: Cookie for hashtable lookup + * @ht_node: rhash_head entry for hashtable + * @list_head: Needed as member of linked_nn_entries list + * @payload: The neighbour info payload + * @flow: Linked flow rule + * @is_ipv6: Flag to indicate if payload is ipv6 or ipv4 + */ +struct nfp_neigh_entry { + unsigned long neigh_cookie; + struct rhash_head ht_node; + struct list_head list_head; + char *payload; + struct nfp_predt_entry *flow; + bool is_ipv6; +}; + +/** + * struct nfp_predt_entry + * @list_head: List head to attach to predt_list + * @flow_pay: Direct link to flow_payload + * @nn_list: List of linked nfp_neigh_entries + */ +struct nfp_predt_entry { + struct list_head list_head; + struct nfp_fl_payload *flow_pay; + struct list_head nn_list; +}; + +/** * struct nfp_mtu_conf - manage MTU setting * @portnum: NFP port number of repr with requested MTU change * @requested_val: MTU value requested for repr @@ -191,11 +271,16 @@ struct nfp_fl_internal_ports { * @qos_stats_work: Workqueue for qos stats processing * @qos_rate_limiters: Current active qos rate limiters * @qos_stats_lock: Lock on qos stats updates + * @meter_stats_lock: Lock on meter stats updates + * @meter_table: Hash table used to store the meter table * @pre_tun_rule_cnt: Number of pre-tunnel rules offloaded * @merge_table: Hash table to store merged flows * @ct_zone_table: Hash table used to store the different zones * @ct_zone_wc: Special zone entry for wildcarded zone matches * @ct_map_table: Hash table used to referennce ct flows + * @predt_list: List to keep track of decap pretun flows + * @neigh_table: Table to keep track of neighbor entries + * @predt_lock: Lock to serialise predt/neigh table updates */ struct nfp_flower_priv { struct nfp_app *app; @@ -228,11 +313,16 @@ struct nfp_flower_priv { struct delayed_work qos_stats_work; unsigned int qos_rate_limiters; spinlock_t qos_stats_lock; /* Protect the qos stats */ + struct mutex meter_stats_lock; /* Protect the meter stats */ + struct rhashtable meter_table; int pre_tun_rule_cnt; struct rhashtable merge_table; struct rhashtable ct_zone_table; struct nfp_fl_ct_zone_entry *ct_zone_wc; struct rhashtable ct_map_table; + struct list_head predt_list; + struct rhashtable neigh_table; + spinlock_t predt_lock; /* Lock to serialise predt/neigh table updates */ }; /** @@ -336,9 +426,14 @@ struct nfp_fl_payload { struct list_head linked_flows; bool in_hw; struct { + struct nfp_predt_entry *predt; struct net_device *dev; + __be16 vlan_tpid; __be16 vlan_tci; __be16 port_idx; + u8 loc_mac[ETH_ALEN]; + u8 rem_mac[ETH_ALEN]; + bool is_ipv6; } pre_tun_rule; }; @@ -361,6 +456,7 @@ struct nfp_fl_payload_link { extern const struct rhashtable_params nfp_flower_table_params; extern const struct rhashtable_params merge_table_params; +extern const struct rhashtable_params neigh_table_params; struct nfp_merge_info { u64 parent_ctx; @@ -374,6 +470,31 @@ struct nfp_fl_stats_frame { __be64 stats_cookie; }; +struct nfp_meter_stats_entry { + u64 pkts; + u64 bytes; + u64 drops; +}; + +struct nfp_meter_entry { + struct rhash_head ht_node; + u32 meter_id; + bool bps; + u32 rate; + u32 burst; + u64 used; + struct nfp_meter_stats { + u64 update; + struct nfp_meter_stats_entry curr; + struct nfp_meter_stats_entry prev; + } stats; +}; + +enum nfp_meter_op { + NFP_METER_ADD, + NFP_METER_DEL, +}; + static inline bool nfp_flower_internal_port_can_offload(struct nfp_app *app, struct net_device *netdev) @@ -547,6 +668,10 @@ void nfp_flower_non_repr_priv_put(struct nfp_app *app, struct net_device *netdev); u32 nfp_flower_get_port_id_from_netdev(struct nfp_app *app, struct net_device *netdev); +void nfp_tun_link_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt); +void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt); int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, struct nfp_fl_payload *flow); int nfp_flower_xmit_pre_tun_del_flow(struct nfp_app *app, @@ -569,4 +694,18 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, void nfp_flower_update_merge_stats(struct nfp_app *app, struct nfp_fl_payload *sub_flow); + +int nfp_setup_tc_act_offload(struct nfp_app *app, + struct flow_offload_action *fl_act); +int nfp_init_meter_table(struct nfp_app *app); +void nfp_flower_stats_meter_request_all(struct nfp_flower_priv *fl_priv); +void nfp_act_stats_reply(struct nfp_app *app, void *pmsg); +int nfp_flower_offload_one_police(struct nfp_app *app, bool ingress, + bool pps, u32 id, u32 rate, u32 burst); +int nfp_flower_setup_meter_entry(struct nfp_app *app, + const struct flow_action_entry *action, + enum nfp_meter_op op, + u32 meter_id); +struct nfp_meter_entry * +nfp_flower_search_meter_entry(struct nfp_app *app, u32 meter_id); #endif diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index 9d86eea4dc16..e01430139b6d 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -98,16 +98,18 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, { if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct flow_match_eth_addrs match; + u8 tmp; int i; flow_rule_match_eth_addrs(rule, &match); /* Populate mac frame. */ for (i = 0; i < ETH_ALEN; i++) { - ext->mac_dst[i] |= match.key->dst[i] & - match.mask->dst[i]; + tmp = match.key->dst[i] & match.mask->dst[i]; + ext->mac_dst[i] |= tmp & (~msk->mac_dst[i]); msk->mac_dst[i] |= match.mask->dst[i]; - ext->mac_src[i] |= match.key->src[i] & - match.mask->src[i]; + + tmp = match.key->src[i] & match.mask->src[i]; + ext->mac_src[i] |= tmp & (~msk->mac_src[i]); msk->mac_src[i] |= match.mask->src[i]; } } @@ -189,11 +191,16 @@ nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, { if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_match_ports match; + __be16 tmp; flow_rule_match_ports(rule, &match); - ext->port_src |= match.key->src & match.mask->src; - ext->port_dst |= match.key->dst & match.mask->dst; + + tmp = match.key->src & match.mask->src; + ext->port_src |= tmp & (~msk->port_src); msk->port_src |= match.mask->src; + + tmp = match.key->dst & match.mask->dst; + ext->port_dst |= tmp & (~msk->port_dst); msk->port_dst |= match.mask->dst; } } @@ -212,11 +219,16 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { struct flow_match_ip match; + u8 tmp; flow_rule_match_ip(rule, &match); - ext->tos |= match.key->tos & match.mask->tos; - ext->ttl |= match.key->ttl & match.mask->ttl; + + tmp = match.key->tos & match.mask->tos; + ext->tos |= tmp & (~msk->tos); msk->tos |= match.mask->tos; + + tmp = match.key->ttl & match.mask->ttl; + ext->ttl |= tmp & (~msk->ttl); msk->ttl |= match.mask->ttl; } @@ -325,11 +337,16 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, { if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) { struct flow_match_ipv4_addrs match; + __be32 tmp; flow_rule_match_ipv4_addrs(rule, &match); - ext->ipv4_src |= match.key->src & match.mask->src; - ext->ipv4_dst |= match.key->dst & match.mask->dst; + + tmp = match.key->src & match.mask->src; + ext->ipv4_src |= tmp & (~msk->ipv4_src); msk->ipv4_src |= match.mask->src; + + tmp = match.key->dst & match.mask->dst; + ext->ipv4_dst |= tmp & (~msk->ipv4_dst); msk->ipv4_dst |= match.mask->dst; } @@ -342,15 +359,21 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, { if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) { struct flow_match_ipv6_addrs match; + u8 tmp; int i; flow_rule_match_ipv6_addrs(rule, &match); for (i = 0; i < sizeof(ext->ipv6_src); i++) { - ext->ipv6_src.s6_addr[i] |= match.key->src.s6_addr[i] & - match.mask->src.s6_addr[i]; - ext->ipv6_dst.s6_addr[i] |= match.key->dst.s6_addr[i] & - match.mask->dst.s6_addr[i]; + tmp = match.key->src.s6_addr[i] & + match.mask->src.s6_addr[i]; + ext->ipv6_src.s6_addr[i] |= tmp & + (~msk->ipv6_src.s6_addr[i]); msk->ipv6_src.s6_addr[i] |= match.mask->src.s6_addr[i]; + + tmp = match.key->dst.s6_addr[i] & + match.mask->dst.s6_addr[i]; + ext->ipv6_dst.s6_addr[i] |= tmp & + (~msk->ipv6_dst.s6_addr[i]); msk->ipv6_dst.s6_addr[i] |= match.mask->dst.s6_addr[i]; } } @@ -602,6 +625,14 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, msk += sizeof(struct nfp_flower_ipv6); } + if (NFP_FLOWER_LAYER2_QINQ & key_ls->key_layer_two) { + nfp_flower_compile_vlan((struct nfp_flower_vlan *)ext, + (struct nfp_flower_vlan *)msk, + rule); + ext += sizeof(struct nfp_flower_vlan); + msk += sizeof(struct nfp_flower_vlan); + } + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) { if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { struct nfp_flower_ipv6_gre_tun *gre_match; @@ -637,14 +668,6 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, } } - if (NFP_FLOWER_LAYER2_QINQ & key_ls->key_layer_two) { - nfp_flower_compile_vlan((struct nfp_flower_vlan *)ext, - (struct nfp_flower_vlan *)msk, - rule); - ext += sizeof(struct nfp_flower_vlan); - msk += sizeof(struct nfp_flower_vlan); - } - if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN || key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) { diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 2af9faee96c5..0f06ef6e24bf 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -43,15 +43,14 @@ static int nfp_release_stats_entry(struct nfp_app *app, u32 stats_context_id) struct circ_buf *ring; ring = &priv->stats_ids.free_list; - /* Check if buffer is full. */ - if (!CIRC_SPACE(ring->head, ring->tail, - priv->stats_ring_size * NFP_FL_STATS_ELEM_RS - - NFP_FL_STATS_ELEM_RS + 1)) + /* Check if buffer is full, stats_ring_size must be power of 2 */ + if (!CIRC_SPACE(ring->head, ring->tail, priv->stats_ring_size)) return -ENOBUFS; - memcpy(&ring->buf[ring->head], &stats_context_id, NFP_FL_STATS_ELEM_RS); - ring->head = (ring->head + NFP_FL_STATS_ELEM_RS) % - (priv->stats_ring_size * NFP_FL_STATS_ELEM_RS); + /* Each increment of head represents size of NFP_FL_STATS_ELEM_RS */ + memcpy(&ring->buf[ring->head * NFP_FL_STATS_ELEM_RS], + &stats_context_id, NFP_FL_STATS_ELEM_RS); + ring->head = (ring->head + 1) & (priv->stats_ring_size - 1); return 0; } @@ -86,11 +85,14 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id) return -ENOENT; } - memcpy(&temp_stats_id, &ring->buf[ring->tail], NFP_FL_STATS_ELEM_RS); + /* Each increment of tail represents size of NFP_FL_STATS_ELEM_RS */ + memcpy(&temp_stats_id, &ring->buf[ring->tail * NFP_FL_STATS_ELEM_RS], + NFP_FL_STATS_ELEM_RS); *stats_context_id = temp_stats_id; - memcpy(&ring->buf[ring->tail], &freed_stats_id, NFP_FL_STATS_ELEM_RS); - ring->tail = (ring->tail + NFP_FL_STATS_ELEM_RS) % - (priv->stats_ring_size * NFP_FL_STATS_ELEM_RS); + memcpy(&ring->buf[ring->tail * NFP_FL_STATS_ELEM_RS], &freed_stats_id, + NFP_FL_STATS_ELEM_RS); + /* stats_ring_size must be power of 2 */ + ring->tail = (ring->tail + 1) & (priv->stats_ring_size - 1); return 0; } @@ -138,13 +140,18 @@ static int nfp_release_mask_id(struct nfp_app *app, u8 mask_id) struct circ_buf *ring; ring = &priv->mask_ids.mask_id_free_list; - /* Checking if buffer is full. */ + /* Checking if buffer is full, + * NFP_FLOWER_MASK_ENTRY_RS must be power of 2 + */ if (CIRC_SPACE(ring->head, ring->tail, NFP_FLOWER_MASK_ENTRY_RS) == 0) return -ENOBUFS; - memcpy(&ring->buf[ring->head], &mask_id, NFP_FLOWER_MASK_ELEMENT_RS); - ring->head = (ring->head + NFP_FLOWER_MASK_ELEMENT_RS) % - (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS); + /* Each increment of head represents size of + * NFP_FLOWER_MASK_ELEMENT_RS + */ + memcpy(&ring->buf[ring->head * NFP_FLOWER_MASK_ELEMENT_RS], &mask_id, + NFP_FLOWER_MASK_ELEMENT_RS); + ring->head = (ring->head + 1) & (NFP_FLOWER_MASK_ENTRY_RS - 1); priv->mask_ids.last_used[mask_id] = ktime_get(); @@ -171,7 +178,11 @@ static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id) if (ring->head == ring->tail) goto err_not_found; - memcpy(&temp_id, &ring->buf[ring->tail], NFP_FLOWER_MASK_ELEMENT_RS); + /* Each increment of tail represents size of + * NFP_FLOWER_MASK_ELEMENT_RS + */ + memcpy(&temp_id, &ring->buf[ring->tail * NFP_FLOWER_MASK_ELEMENT_RS], + NFP_FLOWER_MASK_ELEMENT_RS); *mask_id = temp_id; reuse_timeout = ktime_add_ns(priv->mask_ids.last_used[*mask_id], @@ -180,9 +191,10 @@ static int nfp_mask_alloc(struct nfp_app *app, u8 *mask_id) if (ktime_before(ktime_get(), reuse_timeout)) goto err_not_found; - memcpy(&ring->buf[ring->tail], &freed_id, NFP_FLOWER_MASK_ELEMENT_RS); - ring->tail = (ring->tail + NFP_FLOWER_MASK_ELEMENT_RS) % - (NFP_FLOWER_MASK_ENTRY_RS * NFP_FLOWER_MASK_ELEMENT_RS); + memcpy(&ring->buf[ring->tail * NFP_FLOWER_MASK_ELEMENT_RS], &freed_id, + NFP_FLOWER_MASK_ELEMENT_RS); + /* NFP_FLOWER_MASK_ENTRY_RS must be power of 2 */ + ring->tail = (ring->tail + 1) & (NFP_FLOWER_MASK_ENTRY_RS - 1); return 0; @@ -327,7 +339,7 @@ int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, goto err_free_ctx_entry; } - /* Do net allocate a mask-id for pre_tun_rules. These flows are used to + /* Do not allocate a mask-id for pre_tun_rules. These flows are used to * configure the pre_tun table and are never actually send to the * firmware as an add-flow message. This causes the mask-id allocation * on the firmware to get out of sync if allocated here. @@ -338,11 +350,6 @@ int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, nfp_flow->meta.mask_len, &nfp_flow->meta.flags, &new_mask_id)) { NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate a new mask id"); - if (nfp_release_stats_entry(app, stats_cxt)) { - NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release stats context"); - err = -EINVAL; - goto err_remove_rhash; - } err = -ENOENT; goto err_remove_rhash; } @@ -359,21 +366,6 @@ int nfp_compile_flow_metadata(struct nfp_app *app, u32 cookie, check_entry = nfp_flower_search_fl_table(app, cookie, netdev); if (check_entry) { NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot offload duplicate flow entry"); - if (nfp_release_stats_entry(app, stats_cxt)) { - NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release stats context"); - err = -EINVAL; - goto err_remove_mask; - } - - if (!nfp_flow->pre_tun_rule.dev && - !nfp_check_mask_remove(app, nfp_flow->mask_data, - nfp_flow->meta.mask_len, - NULL, &new_mask_id)) { - NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release mask id"); - err = -EINVAL; - goto err_remove_mask; - } - err = -EEXIST; goto err_remove_mask; } @@ -510,6 +502,12 @@ const struct rhashtable_params nfp_ct_map_params = { .automatic_shrinking = true, }; +const struct rhashtable_params neigh_table_params = { + .key_offset = offsetof(struct nfp_neigh_entry, neigh_cookie), + .head_offset = offsetof(struct nfp_neigh_entry, ht_node), + .key_len = sizeof(unsigned long), +}; + int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, unsigned int host_num_mems) { @@ -538,6 +536,12 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, if (err) goto err_free_ct_zone_table; + err = rhashtable_init(&priv->neigh_table, &neigh_table_params); + if (err) + goto err_free_ct_map_table; + + INIT_LIST_HEAD(&priv->predt_list); + get_random_bytes(&priv->mask_id_seed, sizeof(priv->mask_id_seed)); /* Init ring buffer and unallocated mask_ids. */ @@ -545,7 +549,7 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, kmalloc_array(NFP_FLOWER_MASK_ENTRY_RS, NFP_FLOWER_MASK_ELEMENT_RS, GFP_KERNEL); if (!priv->mask_ids.mask_id_free_list.buf) - goto err_free_ct_map_table; + goto err_free_neigh_table; priv->mask_ids.init_unallocated = NFP_FLOWER_MASK_ENTRY_RS - 1; @@ -573,6 +577,7 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, goto err_free_ring_buf; spin_lock_init(&priv->stats_lock); + spin_lock_init(&priv->predt_lock); return 0; @@ -582,6 +587,8 @@ err_free_last_used: kfree(priv->mask_ids.last_used); err_free_mask_id: kfree(priv->mask_ids.mask_id_free_list.buf); +err_free_neigh_table: + rhashtable_destroy(&priv->neigh_table); err_free_ct_map_table: rhashtable_destroy(&priv->ct_map_table); err_free_ct_zone_table: @@ -708,6 +715,8 @@ void nfp_flower_metadata_cleanup(struct nfp_app *app) rhashtable_free_and_destroy(&priv->ct_map_table, nfp_free_map_table_entry, NULL); + rhashtable_free_and_destroy(&priv->neigh_table, + nfp_check_rhashtable_empty, NULL); kvfree(priv->stats); kfree(priv->mask_ids.mask_id_free_list.buf); kfree(priv->mask_ids.last_used); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 224089d04d98..8593cafa6368 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -359,7 +359,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, flow_rule_match_enc_opts(rule, &enc_op); if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { - /* check if GRE, which has no enc_ports */ + /* Check if GRE, which has no enc_ports */ if (!netif_is_gretap(netdev) && !netif_is_ip6gretap(netdev)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels"); return -EOPNOTSUPP; @@ -373,10 +373,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (ipv6_tun) { key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; key_size += - sizeof(struct nfp_flower_ipv6_udp_tun); + sizeof(struct nfp_flower_ipv6_gre_tun); } else { key_size += - sizeof(struct nfp_flower_ipv4_udp_tun); + sizeof(struct nfp_flower_ipv4_gre_tun); } if (enc_op.key) { @@ -1016,7 +1016,7 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, nfp_flower_is_merge_flow(sub_flow2)) return -EINVAL; - /* check if the two flows are already merged */ + /* Check if the two flows are already merged */ parent_ctx = (u64)(be32_to_cpu(sub_flow1->meta.host_ctx_id)) << 32; parent_ctx |= (u64)(be32_to_cpu(sub_flow2->meta.host_ctx_id)); if (rhashtable_lookup_fast(&priv->merge_table, @@ -1170,6 +1170,11 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } + if (key_layer & NFP_FLOWER_LAYER_IPV6) + flow->pre_tun_rule.is_ipv6 = true; + else + flow->pre_tun_rule.is_ipv6 = false; + /* Skip fields known to exist. */ mask += sizeof(struct nfp_flower_meta_tci); ext += sizeof(struct nfp_flower_meta_tci); @@ -1180,13 +1185,6 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, mask += sizeof(struct nfp_flower_in_port); ext += sizeof(struct nfp_flower_in_port); - /* Ensure destination MAC address matches pre_tun_dev. */ - mac = (struct nfp_flower_mac_mpls *)ext; - if (memcmp(&mac->mac_dst[0], flow->pre_tun_rule.dev->dev_addr, 6)) { - NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: dest MAC must match output dev MAC"); - return -EOPNOTSUPP; - } - /* Ensure destination MAC address is fully matched. */ mac = (struct nfp_flower_mac_mpls *)mask; if (!is_broadcast_ether_addr(&mac->mac_dst[0])) { @@ -1194,11 +1192,36 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } + /* Ensure source MAC address is fully matched. This is only needed + * for firmware with the DECAP_V2 feature enabled. Don't do this + * for firmware without this feature to keep old behaviour. + */ + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + mac = (struct nfp_flower_mac_mpls *)mask; + if (!is_broadcast_ether_addr(&mac->mac_src[0])) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported pre-tunnel rule: source MAC field must not be masked"); + return -EOPNOTSUPP; + } + } + if (mac->mpls_lse) { NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: MPLS not supported"); return -EOPNOTSUPP; } + /* Ensure destination MAC address matches pre_tun_dev. */ + mac = (struct nfp_flower_mac_mpls *)ext; + if (memcmp(&mac->mac_dst[0], flow->pre_tun_rule.dev->dev_addr, 6)) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported pre-tunnel rule: dest MAC must match output dev MAC"); + return -EOPNOTSUPP; + } + + /* Save mac addresses in pre_tun_rule entry for later use */ + memcpy(&flow->pre_tun_rule.loc_mac, &mac->mac_dst[0], ETH_ALEN); + memcpy(&flow->pre_tun_rule.rem_mac, &mac->mac_src[0], ETH_ALEN); + mask += sizeof(struct nfp_flower_mac_mpls); ext += sizeof(struct nfp_flower_mac_mpls); if (key_layer & NFP_FLOWER_LAYER_IPV4 || @@ -1227,17 +1250,21 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, if ((priv->flower_ext_feats & NFP_FL_FEATS_VLAN_QINQ)) { if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_QINQ) { struct nfp_flower_vlan *vlan_tags; + u16 vlan_tpid; u16 vlan_tci; vlan_tags = (struct nfp_flower_vlan *)ext; vlan_tci = be16_to_cpu(vlan_tags->outer_tci); + vlan_tpid = be16_to_cpu(vlan_tags->outer_tpid); vlan_tci &= ~NFP_FLOWER_MASK_VLAN_PRESENT; flow->pre_tun_rule.vlan_tci = cpu_to_be16(vlan_tci); + flow->pre_tun_rule.vlan_tpid = cpu_to_be16(vlan_tpid); vlan = true; } else { flow->pre_tun_rule.vlan_tci = cpu_to_be16(0xffff); + flow->pre_tun_rule.vlan_tpid = cpu_to_be16(0xffff); } } @@ -1274,9 +1301,14 @@ static bool offload_pre_check(struct flow_cls_offload *flow) { struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct flow_dissector *dissector = rule->match.dissector; + struct flow_match_ct ct; - if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) - return false; + if (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CT)) { + flow_rule_match_ct(rule, &ct); + /* Allow special case where CT match is all 0 */ + if (memchr_inv(ct.key, 0, sizeof(*ct.key))) + return false; + } if (flow->common.chain_index) return false; @@ -1362,11 +1394,30 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, goto err_release_metadata; } - if (flow_pay->pre_tun_rule.dev) - err = nfp_flower_xmit_pre_tun_flow(app, flow_pay); - else + if (flow_pay->pre_tun_rule.dev) { + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + struct nfp_predt_entry *predt; + + predt = kzalloc(sizeof(*predt), GFP_KERNEL); + if (!predt) { + err = -ENOMEM; + goto err_remove_rhash; + } + predt->flow_pay = flow_pay; + INIT_LIST_HEAD(&predt->nn_list); + spin_lock_bh(&priv->predt_lock); + list_add(&predt->list_head, &priv->predt_list); + flow_pay->pre_tun_rule.predt = predt; + nfp_tun_link_and_update_nn_entries(app, predt); + spin_unlock_bh(&priv->predt_lock); + } else { + err = nfp_flower_xmit_pre_tun_flow(app, flow_pay); + } + } else { err = nfp_flower_xmit_flow(app, flow_pay, NFP_FLOWER_CMSG_TYPE_FLOW_ADD); + } + if (err) goto err_remove_rhash; @@ -1538,11 +1589,25 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, goto err_free_merge_flow; } - if (nfp_flow->pre_tun_rule.dev) - err = nfp_flower_xmit_pre_tun_del_flow(app, nfp_flow); - else + if (nfp_flow->pre_tun_rule.dev) { + if (priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) { + struct nfp_predt_entry *predt; + + predt = nfp_flow->pre_tun_rule.predt; + if (predt) { + spin_lock_bh(&priv->predt_lock); + nfp_tun_unlink_and_update_nn_entries(app, predt); + list_del(&predt->list_head); + spin_unlock_bh(&priv->predt_lock); + kfree(predt); + } + } else { + err = nfp_flower_xmit_pre_tun_del_flow(app, nfp_flow); + } + } else { err = nfp_flower_xmit_flow(app, nfp_flow, NFP_FLOWER_CMSG_TYPE_FLOW_DEL); + } /* Fall through on error. */ err_free_merge_flow: @@ -1861,12 +1926,29 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct Qdisc *sch, str return 0; } +static int +nfp_setup_tc_no_dev(struct nfp_app *app, enum tc_setup_type type, void *data) +{ + if (!data) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_ACT: + return nfp_setup_tc_act_offload(app, data); + default: + return -EOPNOTSUPP; + } +} + int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv, enum tc_setup_type type, void *type_data, void *data, void (*cleanup)(struct flow_block_cb *block_cb)) { + if (!netdev) + return nfp_setup_tc_no_dev(cb_priv, type, data); + if (!nfp_fl_is_netdev_to_offload(netdev)) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c index 784c6dbf8bc4..99052a925d9e 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/qos_conf.c @@ -1,7 +1,11 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2019 Netronome Systems, Inc. */ +#include <linux/hash.h> +#include <linux/hashtable.h> +#include <linux/jhash.h> #include <linux/math64.h> +#include <linux/vmalloc.h> #include <net/pkt_cls.h> #include <net/pkt_sched.h> @@ -11,10 +15,14 @@ #define NFP_FL_QOS_UPDATE msecs_to_jiffies(1000) #define NFP_FL_QOS_PPS BIT(15) +#define NFP_FL_QOS_METER BIT(10) struct nfp_police_cfg_head { __be32 flags_opts; - __be32 port; + union { + __be32 meter_id; + __be32 port; + }; }; enum NFP_FL_QOS_TYPES { @@ -46,7 +54,15 @@ enum NFP_FL_QOS_TYPES { * | Committed Information Rate | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * Word[0](FLag options): - * [15] p(pps) 1 for pps ,0 for bps + * [15] p(pps) 1 for pps, 0 for bps + * + * Meter control message + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-------------------------------+-+---+-----+-+---------+-+---+-+ + * | Reserved |p| Y |TYPE |E|TSHFV |P| PC|R| + * +-------------------------------+-+---+-----+-+---------+-+---+-+ + * | meter ID | + * +-------------------------------+-------------------------------+ * */ struct nfp_police_config { @@ -67,6 +83,84 @@ struct nfp_police_stats_reply { __be64 drop_pkts; }; +int nfp_flower_offload_one_police(struct nfp_app *app, bool ingress, + bool pps, u32 id, u32 rate, u32 burst) +{ + struct nfp_police_config *config; + struct sk_buff *skb; + + skb = nfp_flower_cmsg_alloc(app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + if (pps) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_PPS); + if (!ingress) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_METER); + + if (ingress) + config->head.port = cpu_to_be32(id); + else + config->head.meter_id = cpu_to_be32(id); + + config->bkt_tkn_p = cpu_to_be32(burst); + config->bkt_tkn_c = cpu_to_be32(burst); + config->pbs = cpu_to_be32(burst); + config->cbs = cpu_to_be32(burst); + config->pir = cpu_to_be32(rate); + config->cir = cpu_to_be32(rate); + nfp_ctrl_tx(app->ctrl, skb); + + return 0; +} + +static int nfp_policer_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack, + bool ingress) +{ + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when exceed action is not drop"); + return -EOPNOTSUPP; + } + + if (ingress) { + if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not continue or ok"); + return -EOPNOTSUPP; + } + } else { + if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && + act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not pipe or ok"); + return -EOPNOTSUPP; + } + } + + if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT && + !flow_action_is_last_entry(action, act)) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is ok, but action is not last"); + return -EOPNOTSUPP; + } + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + return 0; +} + static int nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, struct tc_cls_matchall_offload *flow, @@ -77,15 +171,15 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_priv *fl_priv = app->priv; struct flow_action_entry *action = NULL; struct nfp_flower_repr_priv *repr_priv; - struct nfp_police_config *config; u32 netdev_port_id, i; struct nfp_repr *repr; - struct sk_buff *skb; bool pps_support; u32 bps_num = 0; u32 pps_num = 0; u32 burst; + bool pps; u64 rate; + int err; if (!nfp_netdev_is_nfp_repr(netdev)) { NL_SET_ERR_MSG_MOD(extack, "unsupported offload: qos rate limit offload not supported on higher level port"); @@ -132,6 +226,11 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, "unsupported offload: qos rate limit offload requires police action"); return -EOPNOTSUPP; } + + err = nfp_policer_validate(&flow->rule->action, action, extack, true); + if (err) + return err; + if (action->police.rate_bytes_ps > 0) { if (bps_num++) { NL_SET_ERR_MSG_MOD(extack, @@ -169,23 +268,12 @@ nfp_flower_install_rate_limiter(struct nfp_app *app, struct net_device *netdev, } if (rate != 0) { - skb = nfp_flower_cmsg_alloc(repr->app, sizeof(struct nfp_police_config), - NFP_FLOWER_CMSG_TYPE_QOS_MOD, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - config = nfp_flower_cmsg_get_data(skb); - memset(config, 0, sizeof(struct nfp_police_config)); + pps = false; if (action->police.rate_pkt_ps > 0) - config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_PPS); - config->head.port = cpu_to_be32(netdev_port_id); - config->bkt_tkn_p = cpu_to_be32(burst); - config->bkt_tkn_c = cpu_to_be32(burst); - config->pbs = cpu_to_be32(burst); - config->cbs = cpu_to_be32(burst); - config->pir = cpu_to_be32(rate); - config->cir = cpu_to_be32(rate); - nfp_ctrl_tx(repr->app->ctrl, skb); + pps = true; + nfp_flower_offload_one_police(repr->app, true, + pps, netdev_port_id, + rate, burst); } } repr_priv->qos_table.netdev_port_id = netdev_port_id; @@ -266,6 +354,9 @@ void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb) u32 netdev_port_id; msg = nfp_flower_cmsg_get_data(skb); + if (be32_to_cpu(msg->head.flags_opts) & NFP_FL_QOS_METER) + return nfp_act_stats_reply(app, msg); + netdev_port_id = be32_to_cpu(msg->head.port); rcu_read_lock(); netdev = nfp_app_dev_get(app, netdev_port_id, NULL); @@ -297,7 +388,7 @@ exit_unlock_rcu: static void nfp_flower_stats_rlim_request(struct nfp_flower_priv *fl_priv, - u32 netdev_port_id) + u32 id, bool ingress) { struct nfp_police_cfg_head *head; struct sk_buff *skb; @@ -308,10 +399,15 @@ nfp_flower_stats_rlim_request(struct nfp_flower_priv *fl_priv, GFP_ATOMIC); if (!skb) return; - head = nfp_flower_cmsg_get_data(skb); + memset(head, 0, sizeof(struct nfp_police_cfg_head)); - head->port = cpu_to_be32(netdev_port_id); + if (ingress) { + head->port = cpu_to_be32(id); + } else { + head->flags_opts = cpu_to_be32(NFP_FL_QOS_METER); + head->meter_id = cpu_to_be32(id); + } nfp_ctrl_tx(fl_priv->app->ctrl, skb); } @@ -341,7 +437,8 @@ nfp_flower_stats_rlim_request_all(struct nfp_flower_priv *fl_priv) if (!netdev_port_id) continue; - nfp_flower_stats_rlim_request(fl_priv, netdev_port_id); + nfp_flower_stats_rlim_request(fl_priv, + netdev_port_id, true); } } @@ -359,6 +456,8 @@ static void update_stats_cache(struct work_struct *work) qos_stats_work); nfp_flower_stats_rlim_request_all(fl_priv); + nfp_flower_stats_meter_request_all(fl_priv); + schedule_delayed_work(&fl_priv->qos_stats_work, NFP_FL_QOS_UPDATE); } @@ -406,6 +505,9 @@ void nfp_flower_qos_init(struct nfp_app *app) struct nfp_flower_priv *fl_priv = app->priv; spin_lock_init(&fl_priv->qos_stats_lock); + mutex_init(&fl_priv->meter_stats_lock); + nfp_init_meter_table(app); + INIT_DELAYED_WORK(&fl_priv->qos_stats_work, &update_stats_cache); } @@ -441,3 +543,339 @@ int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev, return -EOPNOTSUPP; } } + +/* Offload tc action, currently only for tc police */ + +static const struct rhashtable_params stats_meter_table_params = { + .key_offset = offsetof(struct nfp_meter_entry, meter_id), + .head_offset = offsetof(struct nfp_meter_entry, ht_node), + .key_len = sizeof(u32), +}; + +struct nfp_meter_entry * +nfp_flower_search_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_lookup_fast(&priv->meter_table, &meter_id, + stats_meter_table_params); +} + +static struct nfp_meter_entry * +nfp_flower_add_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_flower_priv *priv = app->priv; + + meter_entry = rhashtable_lookup_fast(&priv->meter_table, + &meter_id, + stats_meter_table_params); + if (meter_entry) + return meter_entry; + + meter_entry = kzalloc(sizeof(*meter_entry), GFP_KERNEL); + if (!meter_entry) + return NULL; + + meter_entry->meter_id = meter_id; + meter_entry->used = jiffies; + if (rhashtable_insert_fast(&priv->meter_table, &meter_entry->ht_node, + stats_meter_table_params)) { + kfree(meter_entry); + return NULL; + } + + priv->qos_rate_limiters++; + if (priv->qos_rate_limiters == 1) + schedule_delayed_work(&priv->qos_stats_work, + NFP_FL_QOS_UPDATE); + + return meter_entry; +} + +static void nfp_flower_del_meter_entry(struct nfp_app *app, u32 meter_id) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_flower_priv *priv = app->priv; + + meter_entry = rhashtable_lookup_fast(&priv->meter_table, &meter_id, + stats_meter_table_params); + if (!meter_entry) + return; + + rhashtable_remove_fast(&priv->meter_table, + &meter_entry->ht_node, + stats_meter_table_params); + kfree(meter_entry); + priv->qos_rate_limiters--; + if (!priv->qos_rate_limiters) + cancel_delayed_work_sync(&priv->qos_stats_work); +} + +int nfp_flower_setup_meter_entry(struct nfp_app *app, + const struct flow_action_entry *action, + enum nfp_meter_op op, + u32 meter_id) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + int err = 0; + + mutex_lock(&fl_priv->meter_stats_lock); + + switch (op) { + case NFP_METER_DEL: + nfp_flower_del_meter_entry(app, meter_id); + goto exit_unlock; + case NFP_METER_ADD: + meter_entry = nfp_flower_add_meter_entry(app, meter_id); + break; + default: + err = -EOPNOTSUPP; + goto exit_unlock; + } + + if (!meter_entry) { + err = -ENOMEM; + goto exit_unlock; + } + + if (action->police.rate_bytes_ps > 0) { + meter_entry->bps = true; + meter_entry->rate = action->police.rate_bytes_ps; + meter_entry->burst = action->police.burst; + } else { + meter_entry->bps = false; + meter_entry->rate = action->police.rate_pkt_ps; + meter_entry->burst = action->police.burst_pkt; + } + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); + return err; +} + +int nfp_init_meter_table(struct nfp_app *app) +{ + struct nfp_flower_priv *priv = app->priv; + + return rhashtable_init(&priv->meter_table, &stats_meter_table_params); +} + +void +nfp_flower_stats_meter_request_all(struct nfp_flower_priv *fl_priv) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct rhashtable_iter iter; + + mutex_lock(&fl_priv->meter_stats_lock); + rhashtable_walk_enter(&fl_priv->meter_table, &iter); + rhashtable_walk_start(&iter); + + while ((meter_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(meter_entry)) + continue; + nfp_flower_stats_rlim_request(fl_priv, + meter_entry->meter_id, false); + } + + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + mutex_unlock(&fl_priv->meter_stats_lock); +} + +static int +nfp_act_install_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct flow_action_entry *paction = &fl_act->action.entries[0]; + u32 action_num = fl_act->action.num_entries; + struct nfp_flower_priv *fl_priv = app->priv; + struct flow_action_entry *action = NULL; + u32 burst, i, meter_id; + bool pps_support, pps; + bool add = false; + u64 rate; + int err; + + pps_support = !!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_PPS); + + for (i = 0 ; i < action_num; i++) { + /* Set qos associate data for this interface */ + action = paction + i; + if (action->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + continue; + } + + err = nfp_policer_validate(&fl_act->action, action, extack, false); + if (err) + return err; + + if (action->police.rate_bytes_ps > 0) { + rate = action->police.rate_bytes_ps; + burst = action->police.burst; + } else if (action->police.rate_pkt_ps > 0 && pps_support) { + rate = action->police.rate_pkt_ps; + burst = action->police.burst_pkt; + } else { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: unsupported qos rate limit"); + continue; + } + + if (rate != 0) { + meter_id = action->hw_index; + if (nfp_flower_setup_meter_entry(app, action, NFP_METER_ADD, meter_id)) + continue; + + pps = false; + if (action->police.rate_pkt_ps > 0) + pps = true; + nfp_flower_offload_one_police(app, false, pps, meter_id, + rate, burst); + add = true; + } + } + + return add ? 0 : -EOPNOTSUPP; +} + +static int +nfp_act_remove_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_police_config *config; + struct sk_buff *skb; + u32 meter_id; + bool pps; + + /* Delete qos associate data for this interface */ + if (fl_act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + meter_id = fl_act->index; + meter_entry = nfp_flower_search_meter_entry(app, meter_id); + if (!meter_entry) { + NL_SET_ERR_MSG_MOD(extack, + "no meter entry when delete the action index."); + return -ENOENT; + } + pps = !meter_entry->bps; + + skb = nfp_flower_cmsg_alloc(app, sizeof(struct nfp_police_config), + NFP_FLOWER_CMSG_TYPE_QOS_DEL, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + config = nfp_flower_cmsg_get_data(skb); + memset(config, 0, sizeof(struct nfp_police_config)); + config->head.flags_opts = cpu_to_be32(NFP_FL_QOS_METER); + config->head.meter_id = cpu_to_be32(meter_id); + if (pps) + config->head.flags_opts |= cpu_to_be32(NFP_FL_QOS_PPS); + + nfp_ctrl_tx(app->ctrl, skb); + nfp_flower_setup_meter_entry(app, NULL, NFP_METER_DEL, meter_id); + + return 0; +} + +void +nfp_act_stats_reply(struct nfp_app *app, void *pmsg) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + struct nfp_police_stats_reply *msg = pmsg; + u32 meter_id; + + meter_id = be32_to_cpu(msg->head.meter_id); + mutex_lock(&fl_priv->meter_stats_lock); + + meter_entry = nfp_flower_search_meter_entry(app, meter_id); + if (!meter_entry) + goto exit_unlock; + + meter_entry->stats.curr.pkts = be64_to_cpu(msg->pass_pkts) + + be64_to_cpu(msg->drop_pkts); + meter_entry->stats.curr.bytes = be64_to_cpu(msg->pass_bytes) + + be64_to_cpu(msg->drop_bytes); + meter_entry->stats.curr.drops = be64_to_cpu(msg->drop_pkts); + if (!meter_entry->stats.update) { + meter_entry->stats.prev.pkts = meter_entry->stats.curr.pkts; + meter_entry->stats.prev.bytes = meter_entry->stats.curr.bytes; + meter_entry->stats.prev.drops = meter_entry->stats.curr.drops; + } + + meter_entry->stats.update = jiffies; + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); +} + +static int +nfp_act_stats_actions(struct nfp_app *app, struct flow_offload_action *fl_act, + struct netlink_ext_ack *extack) +{ + struct nfp_flower_priv *fl_priv = app->priv; + struct nfp_meter_entry *meter_entry = NULL; + u64 diff_bytes, diff_pkts, diff_drops; + int err = 0; + + if (fl_act->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, + "unsupported offload: qos rate limit offload requires police action"); + return -EOPNOTSUPP; + } + + mutex_lock(&fl_priv->meter_stats_lock); + meter_entry = nfp_flower_search_meter_entry(app, fl_act->index); + if (!meter_entry) { + err = -ENOENT; + goto exit_unlock; + } + diff_pkts = meter_entry->stats.curr.pkts > meter_entry->stats.prev.pkts ? + meter_entry->stats.curr.pkts - meter_entry->stats.prev.pkts : 0; + diff_bytes = meter_entry->stats.curr.bytes > meter_entry->stats.prev.bytes ? + meter_entry->stats.curr.bytes - meter_entry->stats.prev.bytes : 0; + diff_drops = meter_entry->stats.curr.drops > meter_entry->stats.prev.drops ? + meter_entry->stats.curr.drops - meter_entry->stats.prev.drops : 0; + + flow_stats_update(&fl_act->stats, diff_bytes, diff_pkts, diff_drops, + meter_entry->stats.update, + FLOW_ACTION_HW_STATS_DELAYED); + + meter_entry->stats.prev.pkts = meter_entry->stats.curr.pkts; + meter_entry->stats.prev.bytes = meter_entry->stats.curr.bytes; + meter_entry->stats.prev.drops = meter_entry->stats.curr.drops; + +exit_unlock: + mutex_unlock(&fl_priv->meter_stats_lock); + return err; +} + +int nfp_setup_tc_act_offload(struct nfp_app *app, + struct flow_offload_action *fl_act) +{ + struct netlink_ext_ack *extack = fl_act->extack; + struct nfp_flower_priv *fl_priv = app->priv; + + if (!(fl_priv->flower_ext_feats & NFP_FL_FEATS_QOS_METER)) + return -EOPNOTSUPP; + + switch (fl_act->command) { + case FLOW_ACT_REPLACE: + return nfp_act_install_actions(app, fl_act, extack); + case FLOW_ACT_DESTROY: + return nfp_act_remove_actions(app, fl_act, extack); + case FLOW_ACT_STATS: + return nfp_act_stats_actions(app, fl_act, extack); + default: + return -EOPNOTSUPP; + } +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index dfb4468fe287..52f67157bd0f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -77,38 +77,6 @@ struct nfp_tun_active_tuns_v6 { }; /** - * struct nfp_tun_neigh - neighbour/route entry on the NFP - * @dst_ipv4: destination IPv4 address - * @src_ipv4: source IPv4 address - * @dst_addr: destination MAC address - * @src_addr: source MAC address - * @port_id: NFP port to output packet on - associated with source IPv4 - */ -struct nfp_tun_neigh { - __be32 dst_ipv4; - __be32 src_ipv4; - u8 dst_addr[ETH_ALEN]; - u8 src_addr[ETH_ALEN]; - __be32 port_id; -}; - -/** - * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP - * @dst_ipv6: destination IPv6 address - * @src_ipv6: source IPv6 address - * @dst_addr: destination MAC address - * @src_addr: source MAC address - * @port_id: NFP port to output packet on - associated with source IPv6 - */ -struct nfp_tun_neigh_v6 { - struct in6_addr dst_ipv6; - struct in6_addr src_ipv6; - u8 dst_addr[ETH_ALEN]; - u8 src_addr[ETH_ALEN]; - __be32 port_id; -}; - -/** * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup * @ingress_port: ingress port of packet that signalled request * @ipv4_addr: destination ipv4 address for route @@ -313,9 +281,15 @@ static int nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, gfp_t flag) { + struct nfp_flower_priv *priv = app->priv; struct sk_buff *skb; unsigned char *msg; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_DECAP_V2) && + (mtype == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH || + mtype == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6)) + plen -= sizeof(struct nfp_tun_neigh_ext); + skb = nfp_flower_cmsg_alloc(app, plen, mtype, flag); if (!skb) return -ENOMEM; @@ -327,193 +301,268 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata, return 0; } -static bool -__nfp_tun_has_route(struct list_head *route_list, spinlock_t *list_lock, - void *add, int add_len) -{ - struct nfp_offloaded_route *entry; - - spin_lock_bh(list_lock); - list_for_each_entry(entry, route_list, list) - if (!memcmp(entry->ip_add, add, add_len)) { - spin_unlock_bh(list_lock); - return true; - } - spin_unlock_bh(list_lock); - return false; -} - -static int -__nfp_tun_add_route_to_cache(struct list_head *route_list, - spinlock_t *list_lock, void *add, int add_len) -{ - struct nfp_offloaded_route *entry; - - spin_lock_bh(list_lock); - list_for_each_entry(entry, route_list, list) - if (!memcmp(entry->ip_add, add, add_len)) { - spin_unlock_bh(list_lock); - return 0; - } - - entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC); - if (!entry) { - spin_unlock_bh(list_lock); - return -ENOMEM; - } - - memcpy(entry->ip_add, add, add_len); - list_add_tail(&entry->list, route_list); - spin_unlock_bh(list_lock); - - return 0; -} - static void -__nfp_tun_del_route_from_cache(struct list_head *route_list, - spinlock_t *list_lock, void *add, int add_len) +nfp_tun_mutual_link(struct nfp_predt_entry *predt, + struct nfp_neigh_entry *neigh) { - struct nfp_offloaded_route *entry; - - spin_lock_bh(list_lock); - list_for_each_entry(entry, route_list, list) - if (!memcmp(entry->ip_add, add, add_len)) { - list_del(&entry->list); - kfree(entry); - break; - } - spin_unlock_bh(list_lock); -} + struct nfp_fl_payload *flow_pay = predt->flow_pay; + struct nfp_tun_neigh_ext *ext; + struct nfp_tun_neigh *common; -static bool nfp_tun_has_route_v4(struct nfp_app *app, __be32 *ipv4_addr) -{ - struct nfp_flower_priv *priv = app->priv; + if (flow_pay->pre_tun_rule.is_ipv6 != neigh->is_ipv6) + return; - return __nfp_tun_has_route(&priv->tun.neigh_off_list_v4, - &priv->tun.neigh_off_lock_v4, ipv4_addr, - sizeof(*ipv4_addr)); -} + /* In the case of bonding it is possible that there might already + * be a flow linked (as the MAC address gets shared). If a flow + * is already linked just return. + */ + if (neigh->flow) + return; -static bool -nfp_tun_has_route_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) -{ - struct nfp_flower_priv *priv = app->priv; + common = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->common : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->common; + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + + if (memcmp(flow_pay->pre_tun_rule.loc_mac, + common->src_addr, ETH_ALEN) || + memcmp(flow_pay->pre_tun_rule.rem_mac, + common->dst_addr, ETH_ALEN)) + return; - return __nfp_tun_has_route(&priv->tun.neigh_off_list_v6, - &priv->tun.neigh_off_lock_v6, ipv6_addr, - sizeof(*ipv6_addr)); + list_add(&neigh->list_head, &predt->nn_list); + neigh->flow = predt; + ext->host_ctx = flow_pay->meta.host_ctx_id; + ext->vlan_tci = flow_pay->pre_tun_rule.vlan_tci; + ext->vlan_tpid = flow_pay->pre_tun_rule.vlan_tpid; } static void -nfp_tun_add_route_to_cache_v4(struct nfp_app *app, __be32 *ipv4_addr) +nfp_tun_link_predt_entries(struct nfp_app *app, + struct nfp_neigh_entry *nn_entry) { struct nfp_flower_priv *priv = app->priv; + struct nfp_predt_entry *predt, *tmp; - __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v4, - &priv->tun.neigh_off_lock_v4, ipv4_addr, - sizeof(*ipv4_addr)); + list_for_each_entry_safe(predt, tmp, &priv->predt_list, list_head) { + nfp_tun_mutual_link(predt, nn_entry); + } } -static void -nfp_tun_add_route_to_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +void nfp_tun_link_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt) { struct nfp_flower_priv *priv = app->priv; - - __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v6, - &priv->tun.neigh_off_lock_v6, ipv6_addr, - sizeof(*ipv6_addr)); + struct nfp_neigh_entry *nn_entry; + struct rhashtable_iter iter; + size_t neigh_size; + u8 type; + + rhashtable_walk_enter(&priv->neigh_table, &iter); + rhashtable_walk_start(&iter); + while ((nn_entry = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(nn_entry)) + continue; + nfp_tun_mutual_link(predt, nn_entry); + neigh_size = nn_entry->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = nn_entry->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } -static void -nfp_tun_del_route_from_cache_v4(struct nfp_app *app, __be32 *ipv4_addr) +static void nfp_tun_cleanup_nn_entries(struct nfp_app *app) { struct nfp_flower_priv *priv = app->priv; - - __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v4, - &priv->tun.neigh_off_lock_v4, ipv4_addr, - sizeof(*ipv4_addr)); + struct nfp_neigh_entry *neigh; + struct nfp_tun_neigh_ext *ext; + struct rhashtable_iter iter; + size_t neigh_size; + u8 type; + + rhashtable_walk_enter(&priv->neigh_table, &iter); + rhashtable_walk_start(&iter); + while ((neigh = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(neigh)) + continue; + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + + neigh_size = neigh->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = neigh->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, neigh->payload, + GFP_ATOMIC); + + rhashtable_remove_fast(&priv->neigh_table, &neigh->ht_node, + neigh_table_params); + if (neigh->flow) + list_del(&neigh->list_head); + kfree(neigh); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); } -static void -nfp_tun_del_route_from_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr) +void nfp_tun_unlink_and_update_nn_entries(struct nfp_app *app, + struct nfp_predt_entry *predt) { - struct nfp_flower_priv *priv = app->priv; - - __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v6, - &priv->tun.neigh_off_lock_v6, ipv6_addr, - sizeof(*ipv6_addr)); + struct nfp_neigh_entry *neigh, *tmp; + struct nfp_tun_neigh_ext *ext; + size_t neigh_size; + u8 type; + + list_for_each_entry_safe(neigh, tmp, &predt->nn_list, list_head) { + ext = neigh->is_ipv6 ? + &((struct nfp_tun_neigh_v6 *)neigh->payload)->ext : + &((struct nfp_tun_neigh_v4 *)neigh->payload)->ext; + neigh->flow = NULL; + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + list_del(&neigh->list_head); + neigh_size = neigh->is_ipv6 ? + sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + type = neigh->is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_flower_xmit_tun_conf(app, type, neigh_size, neigh->payload, + GFP_ATOMIC); + } } static void -nfp_tun_write_neigh_v4(struct net_device *netdev, struct nfp_app *app, - struct flowi4 *flow, struct neighbour *neigh, gfp_t flag) +nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app, + void *flow, struct neighbour *neigh, bool is_ipv6, + bool override) { - struct nfp_tun_neigh payload; + bool neigh_invalid = !(neigh->nud_state & NUD_VALID) || neigh->dead; + size_t neigh_size = is_ipv6 ? sizeof(struct nfp_tun_neigh_v6) : + sizeof(struct nfp_tun_neigh_v4); + unsigned long cookie = (unsigned long)neigh; + struct nfp_flower_priv *priv = app->priv; + struct nfp_neigh_entry *nn_entry; u32 port_id; + u8 mtype; port_id = nfp_flower_get_port_id_from_netdev(app, netdev); if (!port_id) return; - memset(&payload, 0, sizeof(struct nfp_tun_neigh)); - payload.dst_ipv4 = flow->daddr; + spin_lock_bh(&priv->predt_lock); + nn_entry = rhashtable_lookup_fast(&priv->neigh_table, &cookie, + neigh_table_params); + if (!nn_entry && !neigh_invalid) { + struct nfp_tun_neigh_ext *ext; + struct nfp_tun_neigh *common; + + nn_entry = kzalloc(sizeof(*nn_entry) + neigh_size, + GFP_ATOMIC); + if (!nn_entry) + goto err; + + nn_entry->payload = (char *)&nn_entry[1]; + nn_entry->neigh_cookie = cookie; + nn_entry->is_ipv6 = is_ipv6; + nn_entry->flow = NULL; + if (is_ipv6) { + struct flowi6 *flowi6 = (struct flowi6 *)flow; + struct nfp_tun_neigh_v6 *payload; + + payload = (struct nfp_tun_neigh_v6 *)nn_entry->payload; + payload->src_ipv6 = flowi6->saddr; + payload->dst_ipv6 = flowi6->daddr; + common = &payload->common; + ext = &payload->ext; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6; + } else { + struct flowi4 *flowi4 = (struct flowi4 *)flow; + struct nfp_tun_neigh_v4 *payload; + + payload = (struct nfp_tun_neigh_v4 *)nn_entry->payload; + payload->src_ipv4 = flowi4->saddr; + payload->dst_ipv4 = flowi4->daddr; + common = &payload->common; + ext = &payload->ext; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + } + ext->host_ctx = cpu_to_be32(U32_MAX); + ext->vlan_tpid = cpu_to_be16(U16_MAX); + ext->vlan_tci = cpu_to_be16(U16_MAX); + ether_addr_copy(common->src_addr, netdev->dev_addr); + neigh_ha_snapshot(common->dst_addr, neigh, netdev); + common->port_id = cpu_to_be32(port_id); + + if (rhashtable_insert_fast(&priv->neigh_table, + &nn_entry->ht_node, + neigh_table_params)) + goto err; + + nfp_tun_link_predt_entries(app, nn_entry); + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + } else if (nn_entry && neigh_invalid) { + if (is_ipv6) { + struct flowi6 *flowi6 = (struct flowi6 *)flow; + struct nfp_tun_neigh_v6 *payload; + + payload = (struct nfp_tun_neigh_v6 *)nn_entry->payload; + memset(payload, 0, sizeof(struct nfp_tun_neigh_v6)); + payload->dst_ipv6 = flowi6->daddr; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6; + } else { + struct flowi4 *flowi4 = (struct flowi4 *)flow; + struct nfp_tun_neigh_v4 *payload; - /* If entry has expired send dst IP with all other fields 0. */ - if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { - nfp_tun_del_route_from_cache_v4(app, &payload.dst_ipv4); + payload = (struct nfp_tun_neigh_v4 *)nn_entry->payload; + memset(payload, 0, sizeof(struct nfp_tun_neigh_v4)); + payload->dst_ipv4 = flowi4->daddr; + mtype = NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + } /* Trigger ARP to verify invalid neighbour state. */ neigh_event_send(neigh, NULL); - goto send_msg; + rhashtable_remove_fast(&priv->neigh_table, + &nn_entry->ht_node, + neigh_table_params); + + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); + + if (nn_entry->flow) + list_del(&nn_entry->list_head); + kfree(nn_entry); + } else if (nn_entry && !neigh_invalid && override) { + mtype = is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 : + NFP_FLOWER_CMSG_TYPE_TUN_NEIGH; + nfp_tun_link_predt_entries(app, nn_entry); + nfp_flower_xmit_tun_conf(app, mtype, neigh_size, + nn_entry->payload, + GFP_ATOMIC); } - /* Have a valid neighbour so populate rest of entry. */ - payload.src_ipv4 = flow->saddr; - ether_addr_copy(payload.src_addr, netdev->dev_addr); - neigh_ha_snapshot(payload.dst_addr, neigh, netdev); - payload.port_id = cpu_to_be32(port_id); - /* Add destination of new route to NFP cache. */ - nfp_tun_add_route_to_cache_v4(app, &payload.dst_ipv4); - -send_msg: - nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, - sizeof(struct nfp_tun_neigh), - (unsigned char *)&payload, flag); -} - -static void -nfp_tun_write_neigh_v6(struct net_device *netdev, struct nfp_app *app, - struct flowi6 *flow, struct neighbour *neigh, gfp_t flag) -{ - struct nfp_tun_neigh_v6 payload; - u32 port_id; - - port_id = nfp_flower_get_port_id_from_netdev(app, netdev); - if (!port_id) - return; - - memset(&payload, 0, sizeof(struct nfp_tun_neigh_v6)); - payload.dst_ipv6 = flow->daddr; - - /* If entry has expired send dst IP with all other fields 0. */ - if (!(neigh->nud_state & NUD_VALID) || neigh->dead) { - nfp_tun_del_route_from_cache_v6(app, &payload.dst_ipv6); - /* Trigger probe to verify invalid neighbour state. */ - neigh_event_send(neigh, NULL); - goto send_msg; - } + spin_unlock_bh(&priv->predt_lock); + return; - /* Have a valid neighbour so populate rest of entry. */ - payload.src_ipv6 = flow->saddr; - ether_addr_copy(payload.src_addr, netdev->dev_addr); - neigh_ha_snapshot(payload.dst_addr, neigh, netdev); - payload.port_id = cpu_to_be32(port_id); - /* Add destination of new route to NFP cache. */ - nfp_tun_add_route_to_cache_v6(app, &payload.dst_ipv6); - -send_msg: - nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6, - sizeof(struct nfp_tun_neigh_v6), - (unsigned char *)&payload, flag); +err: + kfree(nn_entry); + spin_unlock_bh(&priv->predt_lock); + nfp_flower_cmsg_warn(app, "Neighbour configuration failed.\n"); } static int @@ -522,12 +571,9 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, { struct nfp_flower_priv *app_priv; struct netevent_redirect *redir; - struct flowi4 flow4 = {}; - struct flowi6 flow6 = {}; struct neighbour *n; struct nfp_app *app; - struct rtable *rt; - bool ipv6 = false; + bool neigh_invalid; int err; switch (event) { @@ -542,13 +588,7 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, return NOTIFY_DONE; } - if (n->tbl->family == AF_INET6) - ipv6 = true; - - if (ipv6) - flow6.daddr = *(struct in6_addr *)n->primary_key; - else - flow4.daddr = *(__be32 *)n->primary_key; + neigh_invalid = !(n->nud_state & NUD_VALID) || n->dead; app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb); app = app_priv->app; @@ -557,38 +597,51 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event, !nfp_flower_internal_port_can_offload(app, n->dev)) return NOTIFY_DONE; - /* Only concerned with changes to routes already added to NFP. */ - if ((ipv6 && !nfp_tun_has_route_v6(app, &flow6.daddr)) || - (!ipv6 && !nfp_tun_has_route_v4(app, &flow4.daddr))) - return NOTIFY_DONE; - #if IS_ENABLED(CONFIG_INET) - if (ipv6) { + if (n->tbl->family == AF_INET6) { #if IS_ENABLED(CONFIG_IPV6) - struct dst_entry *dst; - - dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(n->dev), NULL, - &flow6, NULL); - if (IS_ERR(dst)) - return NOTIFY_DONE; + struct flowi6 flow6 = {}; - dst_release(dst); - flow6.flowi6_proto = IPPROTO_UDP; - nfp_tun_write_neigh_v6(n->dev, app, &flow6, n, GFP_ATOMIC); + flow6.daddr = *(struct in6_addr *)n->primary_key; + if (!neigh_invalid) { + struct dst_entry *dst; + /* Use ipv6_dst_lookup_flow to populate flow6->saddr + * and other fields. This information is only needed + * for new entries, lookup can be skipped when an entry + * gets invalidated - as only the daddr is needed for + * deleting. + */ + dst = ip6_dst_lookup_flow(dev_net(n->dev), NULL, + &flow6, NULL); + if (IS_ERR(dst)) + return NOTIFY_DONE; + + dst_release(dst); + } + nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false); #else return NOTIFY_DONE; #endif /* CONFIG_IPV6 */ } else { - /* Do a route lookup to populate flow data. */ - rt = ip_route_output_key(dev_net(n->dev), &flow4); - err = PTR_ERR_OR_ZERO(rt); - if (err) - return NOTIFY_DONE; + struct flowi4 flow4 = {}; - ip_rt_put(rt); - - flow4.flowi4_proto = IPPROTO_UDP; - nfp_tun_write_neigh_v4(n->dev, app, &flow4, n, GFP_ATOMIC); + flow4.daddr = *(__be32 *)n->primary_key; + if (!neigh_invalid) { + struct rtable *rt; + /* Use ip_route_output_key to populate flow4->saddr and + * other fields. This information is only needed for + * new entries, lookup can be skipped when an entry + * gets invalidated - as only the daddr is needed for + * deleting. + */ + rt = ip_route_output_key(dev_net(n->dev), &flow4); + err = PTR_ERR_OR_ZERO(rt); + if (err) + return NOTIFY_DONE; + + ip_rt_put(rt); + } + nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false); } #else return NOTIFY_DONE; @@ -631,7 +684,7 @@ void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb) ip_rt_put(rt); if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh_v4(n->dev, app, &flow, n, GFP_ATOMIC); + nfp_tun_write_neigh(n->dev, app, &flow, n, false, true); neigh_release(n); rcu_read_unlock(); return; @@ -673,7 +726,7 @@ void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb) if (!n) goto fail_rcu_unlock; - nfp_tun_write_neigh_v6(n->dev, app, &flow, n, GFP_ATOMIC); + nfp_tun_write_neigh(n->dev, app, &flow, n, true, true); neigh_release(n); rcu_read_unlock(); return; @@ -922,8 +975,8 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, int port, bool mod) { struct nfp_flower_priv *priv = app->priv; - int ida_idx = NFP_MAX_MAC_INDEX, err; struct nfp_tun_offloaded_mac *entry; + int ida_idx = -1, err; u16 nfp_mac_idx = 0; entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); @@ -942,8 +995,8 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, if (!nfp_mac_idx) { /* Assign a global index if non-repr or MAC is now shared. */ if (entry || !port) { - ida_idx = ida_simple_get(&priv->tun.mac_off_ids, 0, - NFP_MAX_MAC_INDEX, GFP_KERNEL); + ida_idx = ida_alloc_max(&priv->tun.mac_off_ids, + NFP_MAX_MAC_INDEX, GFP_KERNEL); if (ida_idx < 0) return ida_idx; @@ -997,8 +1050,8 @@ err_remove_hash: err_free_entry: kfree(entry); err_free_ida: - if (ida_idx != NFP_MAX_MAC_INDEX) - ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + if (ida_idx != -1) + ida_free(&priv->tun.mac_off_ids, ida_idx); return err; } @@ -1011,6 +1064,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_repr_priv *repr_priv; struct nfp_tun_offloaded_mac *entry; struct nfp_repr *repr; + u16 nfp_mac_idx; int ida_idx; entry = nfp_tunnel_lookup_offloaded_macs(app, mac); @@ -1018,7 +1072,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, return 0; entry->ref_count--; - /* If del is part of a mod then mac_list is still in use elsewheree. */ + /* If del is part of a mod then mac_list is still in use elsewhere. */ if (nfp_netdev_is_nfp_repr(netdev) && !mod) { repr = netdev_priv(netdev); repr_priv = repr->app_priv; @@ -1029,8 +1083,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, entry->bridge_count--; if (!entry->bridge_count && entry->ref_count) { - u16 nfp_mac_idx; - nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; if (__nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, false)) { @@ -1046,7 +1098,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, /* If MAC is now used by 1 repr set the offloaded MAC index to port. */ if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) { - u16 nfp_mac_idx; int port, err; repr_priv = list_first_entry(&entry->repr_list, @@ -1063,7 +1114,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, } ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); - ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + ida_free(&priv->tun.mac_off_ids, ida_idx); entry->index = nfp_mac_idx; return 0; } @@ -1074,10 +1125,16 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs, &entry->ht_node, offloaded_macs_params)); + + if (nfp_flower_is_supported_bridge(netdev)) + nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; + else + nfp_mac_idx = entry->index; + /* If MAC has global ID then extract and free the ida entry. */ - if (nfp_tunnel_is_mac_idx_global(entry->index)) { + if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) { ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); - ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); + ida_free(&priv->tun.mac_off_ids, ida_idx); } kfree(entry); @@ -1364,10 +1421,6 @@ int nfp_tunnel_config_start(struct nfp_app *app) INIT_LIST_HEAD(&priv->tun.ipv6_off_list); /* Initialise priv data for neighbour offloading. */ - spin_lock_init(&priv->tun.neigh_off_lock_v4); - INIT_LIST_HEAD(&priv->tun.neigh_off_list_v4); - spin_lock_init(&priv->tun.neigh_off_lock_v6); - INIT_LIST_HEAD(&priv->tun.neigh_off_list_v6); priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler; err = register_netevent_notifier(&priv->tun.neigh_nb); @@ -1382,11 +1435,8 @@ int nfp_tunnel_config_start(struct nfp_app *app) void nfp_tunnel_config_stop(struct nfp_app *app) { - struct nfp_offloaded_route *route_entry, *temp; struct nfp_flower_priv *priv = app->priv; struct nfp_ipv4_addr_entry *ip_entry; - struct nfp_tun_neigh_v6 ipv6_route; - struct nfp_tun_neigh ipv4_route; struct list_head *ptr, *storage; unregister_netevent_notifier(&priv->tun.neigh_nb); @@ -1402,36 +1452,9 @@ void nfp_tunnel_config_stop(struct nfp_app *app) mutex_destroy(&priv->tun.ipv6_off_lock); - /* Free memory in the route list and remove entries from fw cache. */ - list_for_each_entry_safe(route_entry, temp, - &priv->tun.neigh_off_list_v4, list) { - memset(&ipv4_route, 0, sizeof(ipv4_route)); - memcpy(&ipv4_route.dst_ipv4, &route_entry->ip_add, - sizeof(ipv4_route.dst_ipv4)); - list_del(&route_entry->list); - kfree(route_entry); - - nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH, - sizeof(struct nfp_tun_neigh), - (unsigned char *)&ipv4_route, - GFP_KERNEL); - } - - list_for_each_entry_safe(route_entry, temp, - &priv->tun.neigh_off_list_v6, list) { - memset(&ipv6_route, 0, sizeof(ipv6_route)); - memcpy(&ipv6_route.dst_ipv6, &route_entry->ip_add, - sizeof(ipv6_route.dst_ipv6)); - list_del(&route_entry->list); - kfree(route_entry); - - nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6, - sizeof(struct nfp_tun_neigh), - (unsigned char *)&ipv6_route, - GFP_KERNEL); - } - /* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */ rhashtable_free_and_destroy(&priv->tun.offloaded_macs, nfp_check_rhashtable_empty, NULL); + + nfp_tun_cleanup_nn_entries(app); } diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c new file mode 100644 index 000000000000..448c1c1afaee --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -0,0 +1,1382 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +#include <linux/bpf_trace.h> +#include <linux/netdevice.h> +#include <linux/bitfield.h> + +#include "../nfp_app.h" +#include "../nfp_net.h" +#include "../nfp_net_dp.h" +#include "../nfp_net_xsk.h" +#include "../crypto/crypto.h" +#include "../crypto/fw.h" +#include "nfd3.h" + +/* Transmit processing + * + * One queue controller peripheral queue is used for transmit. The + * driver en-queues packets for transmit by advancing the write + * pointer. The device indicates that packets have transmitted by + * advancing the read pointer. The driver maintains a local copy of + * the read and write pointer in @struct nfp_net_tx_ring. The driver + * keeps @wr_p in sync with the queue controller write pointer and can + * determine how many packets have been transmitted by comparing its + * copy of the read pointer @rd_p with the read pointer maintained by + * the queue controller peripheral. + */ + +/* Wrappers for deciding when to stop and restart TX queues */ +static int nfp_nfd3_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) +{ + return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4); +} + +static int nfp_nfd3_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) +{ + return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1); +} + +/** + * nfp_nfd3_tx_ring_stop() - stop tx ring + * @nd_q: netdev queue + * @tx_ring: driver tx queue structure + * + * Safely stop TX ring. Remember that while we are running .start_xmit() + * someone else may be cleaning the TX ring completions so we need to be + * extra careful here. + */ +static void +nfp_nfd3_tx_ring_stop(struct netdev_queue *nd_q, + struct nfp_net_tx_ring *tx_ring) +{ + netif_tx_stop_queue(nd_q); + + /* We can race with the TX completion out of NAPI so recheck */ + smp_mb(); + if (unlikely(nfp_nfd3_tx_ring_should_wake(tx_ring))) + netif_tx_start_queue(nd_q); +} + +/** + * nfp_nfd3_tx_tso() - Set up Tx descriptor for LSO + * @r_vec: per-ring structure + * @txbuf: Pointer to driver soft TX descriptor + * @txd: Pointer to HW TX descriptor + * @skb: Pointer to SKB + * @md_bytes: Prepend length + * + * Set up Tx descriptor for LSO, do nothing for non-LSO skbs. + * Return error on packet header greater than maximum supported LSO header size. + */ +static void +nfp_nfd3_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfd3_tx_buf *txbuf, + struct nfp_nfd3_tx_desc *txd, struct sk_buff *skb, u32 md_bytes) +{ + u32 l3_offset, l4_offset, hdrlen; + u16 mss; + + if (!skb_is_gso(skb)) + return; + + if (!skb->encapsulation) { + l3_offset = skb_network_offset(skb); + l4_offset = skb_transport_offset(skb); + hdrlen = skb_tcp_all_headers(skb); + } else { + l3_offset = skb_inner_network_offset(skb); + l4_offset = skb_inner_transport_offset(skb); + hdrlen = skb_inner_tcp_all_headers(skb); + } + + txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs; + txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1); + + mss = skb_shinfo(skb)->gso_size & NFD3_DESC_TX_MSS_MASK; + txd->l3_offset = l3_offset - md_bytes; + txd->l4_offset = l4_offset - md_bytes; + txd->lso_hdrlen = hdrlen - md_bytes; + txd->mss = cpu_to_le16(mss); + txd->flags |= NFD3_DESC_TX_LSO; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_lso++; + u64_stats_update_end(&r_vec->tx_sync); +} + +/** + * nfp_nfd3_tx_csum() - Set TX CSUM offload flags in TX descriptor + * @dp: NFP Net data path struct + * @r_vec: per-ring structure + * @txbuf: Pointer to driver soft TX descriptor + * @txd: Pointer to TX descriptor + * @skb: Pointer to SKB + * + * This function sets the TX checksum flags in the TX descriptor based + * on the configuration and the protocol of the packet to be transmitted. + */ +static void +nfp_nfd3_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_nfd3_tx_buf *txbuf, struct nfp_nfd3_tx_desc *txd, + struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct iphdr *iph; + u8 l4_hdr; + + if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) + return; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return; + + txd->flags |= NFD3_DESC_TX_CSUM; + if (skb->encapsulation) + txd->flags |= NFD3_DESC_TX_ENCAP; + + iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + + if (iph->version == 4) { + txd->flags |= NFD3_DESC_TX_IP4_CSUM; + l4_hdr = iph->protocol; + } else if (ipv6h->version == 6) { + l4_hdr = ipv6h->nexthdr; + } else { + nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); + return; + } + + switch (l4_hdr) { + case IPPROTO_TCP: + txd->flags |= NFD3_DESC_TX_TCP_CSUM; + break; + case IPPROTO_UDP: + txd->flags |= NFD3_DESC_TX_UDP_CSUM; + break; + default: + nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr); + return; + } + + u64_stats_update_begin(&r_vec->tx_sync); + if (skb->encapsulation) + r_vec->hw_csum_tx_inner += txbuf->pkt_cnt; + else + r_vec->hw_csum_tx += txbuf->pkt_cnt; + u64_stats_update_end(&r_vec->tx_sync); +} + +static int nfp_nfd3_prep_tx_meta(struct nfp_net_dp *dp, struct sk_buff *skb, u64 tls_handle) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + unsigned char *data; + bool vlan_insert; + u32 meta_id = 0; + int md_bytes; + + if (unlikely(md_dst || tls_handle)) { + if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) + md_dst = NULL; + } + + vlan_insert = skb_vlan_tag_present(skb) && (dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2); + + if (!(md_dst || tls_handle || vlan_insert)) + return 0; + + md_bytes = sizeof(meta_id) + + !!md_dst * NFP_NET_META_PORTID_SIZE + + !!tls_handle * NFP_NET_META_CONN_HANDLE_SIZE + + vlan_insert * NFP_NET_META_VLAN_SIZE; + + if (unlikely(skb_cow_head(skb, md_bytes))) + return -ENOMEM; + + data = skb_push(skb, md_bytes) + md_bytes; + if (md_dst) { + data -= NFP_NET_META_PORTID_SIZE; + put_unaligned_be32(md_dst->u.port_info.port_id, data); + meta_id = NFP_NET_META_PORTID; + } + if (tls_handle) { + /* conn handle is opaque, we just use u64 to be able to quickly + * compare it to zero + */ + data -= NFP_NET_META_CONN_HANDLE_SIZE; + memcpy(data, &tls_handle, sizeof(tls_handle)); + meta_id <<= NFP_NET_META_FIELD_SIZE; + meta_id |= NFP_NET_META_CONN_HANDLE; + } + if (vlan_insert) { + data -= NFP_NET_META_VLAN_SIZE; + /* data type of skb->vlan_proto is __be16 + * so it fills metadata without calling put_unaligned_be16 + */ + memcpy(data, &skb->vlan_proto, sizeof(skb->vlan_proto)); + put_unaligned_be16(skb_vlan_tag_get(skb), data + sizeof(skb->vlan_proto)); + meta_id <<= NFP_NET_META_FIELD_SIZE; + meta_id |= NFP_NET_META_VLAN; + } + + data -= sizeof(meta_id); + put_unaligned_be32(meta_id, data); + + return md_bytes; +} + +/** + * nfp_nfd3_tx() - Main transmit entry point + * @skb: SKB to transmit + * @netdev: netdev structure + * + * Return: NETDEV_TX_OK on success. + */ +netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int f, nr_frags, wr_idx, md_bytes; + struct nfp_net_tx_ring *tx_ring; + struct nfp_net_r_vector *r_vec; + struct nfp_nfd3_tx_buf *txbuf; + struct nfp_nfd3_tx_desc *txd; + struct netdev_queue *nd_q; + const skb_frag_t *frag; + struct nfp_net_dp *dp; + dma_addr_t dma_addr; + unsigned int fsize; + u64 tls_handle = 0; + u16 qidx; + + dp = &nn->dp; + qidx = skb_get_queue_mapping(skb); + tx_ring = &dp->tx_rings[qidx]; + r_vec = tx_ring->r_vec; + + nr_frags = skb_shinfo(skb)->nr_frags; + + if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { + nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", + qidx, tx_ring->wr_p, tx_ring->rd_p); + nd_q = netdev_get_tx_queue(dp->netdev, qidx); + netif_tx_stop_queue(nd_q); + nfp_net_tx_xmit_more_flush(tx_ring); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + return NETDEV_TX_BUSY; + } + + skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags); + if (unlikely(!skb)) { + nfp_net_tx_xmit_more_flush(tx_ring); + return NETDEV_TX_OK; + } + + md_bytes = nfp_nfd3_prep_tx_meta(dp, skb, tls_handle); + if (unlikely(md_bytes < 0)) + goto err_flush; + + /* Start with the head skbuf */ + dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), + DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_dma_err; + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->skb = skb; + txbuf->dma_addr = dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = skb->len; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = (nr_frags ? 0 : NFD3_DESC_TX_EOP) | md_bytes; + txd->dma_len = cpu_to_le16(skb_headlen(skb)); + nfp_desc_set_dma_addr_40b(txd, dma_addr); + txd->data_len = cpu_to_le16(skb->len); + + txd->flags = 0; + txd->mss = 0; + txd->lso_hdrlen = 0; + + /* Do not reorder - tso may adjust pkt cnt, vlan may override fields */ + nfp_nfd3_tx_tso(r_vec, txbuf, txd, skb, md_bytes); + nfp_nfd3_tx_csum(dp, r_vec, txbuf, txd, skb); + if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { + txd->flags |= NFD3_DESC_TX_VLAN; + txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); + } + + /* Gather DMA */ + if (nr_frags > 0) { + __le64 second_half; + + /* all descs must match except for in addr, length and eop */ + second_half = txd->vals8[1]; + + for (f = 0; f < nr_frags; f++) { + frag = &skb_shinfo(skb)->frags[f]; + fsize = skb_frag_size(frag); + + dma_addr = skb_frag_dma_map(dp->dev, frag, 0, + fsize, DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_unmap; + + wr_idx = D_IDX(tx_ring, wr_idx + 1); + tx_ring->txbufs[wr_idx].skb = skb; + tx_ring->txbufs[wr_idx].dma_addr = dma_addr; + tx_ring->txbufs[wr_idx].fidx = f; + + txd = &tx_ring->txds[wr_idx]; + txd->dma_len = cpu_to_le16(fsize); + nfp_desc_set_dma_addr_40b(txd, dma_addr); + txd->offset_eop = md_bytes | + ((f == nr_frags - 1) ? NFD3_DESC_TX_EOP : 0); + txd->vals8[1] = second_half; + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_gather++; + u64_stats_update_end(&r_vec->tx_sync); + } + + skb_tx_timestamp(skb); + + nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); + + tx_ring->wr_p += nr_frags + 1; + if (nfp_nfd3_tx_ring_should_stop(tx_ring)) + nfp_nfd3_tx_ring_stop(nd_q, tx_ring); + + tx_ring->wr_ptr_add += nr_frags + 1; + if (__netdev_tx_sent_queue(nd_q, txbuf->real_len, netdev_xmit_more())) + nfp_net_tx_xmit_more_flush(tx_ring); + + return NETDEV_TX_OK; + +err_unmap: + while (--f >= 0) { + frag = &skb_shinfo(skb)->frags[f]; + dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + tx_ring->txbufs[wr_idx].skb = NULL; + tx_ring->txbufs[wr_idx].dma_addr = 0; + tx_ring->txbufs[wr_idx].fidx = -2; + wr_idx = wr_idx - 1; + if (wr_idx < 0) + wr_idx += tx_ring->cnt; + } + dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + tx_ring->txbufs[wr_idx].skb = NULL; + tx_ring->txbufs[wr_idx].dma_addr = 0; + tx_ring->txbufs[wr_idx].fidx = -2; +err_dma_err: + nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); +err_flush: + nfp_net_tx_xmit_more_flush(tx_ring); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + nfp_net_tls_tx_undo(skb, tls_handle); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** + * nfp_nfd3_tx_complete() - Handled completed TX packets + * @tx_ring: TX ring structure + * @budget: NAPI budget (only used as bool to determine if in NAPI context) + */ +void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + u32 done_pkts = 0, done_bytes = 0; + struct netdev_queue *nd_q; + u32 qcp_rd_p; + int todo; + + if (tx_ring->wr_p == tx_ring->rd_p) + return; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + + while (todo--) { + const skb_frag_t *frag; + struct nfp_nfd3_tx_buf *tx_buf; + struct sk_buff *skb; + int fidx, nr_frags; + int idx; + + idx = D_IDX(tx_ring, tx_ring->rd_p++); + tx_buf = &tx_ring->txbufs[idx]; + + skb = tx_buf->skb; + if (!skb) + continue; + + nr_frags = skb_shinfo(skb)->nr_frags; + fidx = tx_buf->fidx; + + if (fidx == -1) { + /* unmap head */ + dma_unmap_single(dp->dev, tx_buf->dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + + done_pkts += tx_buf->pkt_cnt; + done_bytes += tx_buf->real_len; + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[fidx]; + dma_unmap_page(dp->dev, tx_buf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + /* check for last gather fragment */ + if (fidx == nr_frags - 1) + napi_consume_skb(skb, budget); + + tx_buf->dma_addr = 0; + tx_buf->skb = NULL; + tx_buf->fidx = -2; + } + + tx_ring->qcp_rd_p = qcp_rd_p; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + if (!dp->netdev) + return; + + nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); + netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); + if (nfp_nfd3_tx_ring_should_wake(tx_ring)) { + /* Make sure TX thread will see updated tx_ring->rd_p */ + smp_mb(); + + if (unlikely(netif_tx_queue_stopped(nd_q))) + netif_tx_wake_queue(nd_q); + } + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); +} + +static bool nfp_nfd3_xdp_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + u32 done_pkts = 0, done_bytes = 0; + bool done_all; + int idx, todo; + u32 qcp_rd_p; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return true; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + + done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; + todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); + + tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); + + done_pkts = todo; + while (todo--) { + idx = D_IDX(tx_ring, tx_ring->rd_p); + tx_ring->rd_p++; + + done_bytes += tx_ring->txbufs[idx].real_len; + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); + + return done_all; +} + +/* Receive processing + */ + +static void * +nfp_nfd3_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) +{ + void *frag; + + if (!dp->xdp_prog) { + frag = napi_alloc_frag(dp->fl_bufsz); + if (unlikely(!frag)) + return NULL; + } else { + struct page *page; + + page = dev_alloc_page(); + if (unlikely(!page)) + return NULL; + frag = page_address(page); + } + + *dma_addr = nfp_net_dma_map_rx(dp, frag); + if (dma_mapping_error(dp->dev, *dma_addr)) { + nfp_net_free_frag(frag, dp->xdp_prog); + nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return frag; +} + +/** + * nfp_nfd3_rx_give_one() - Put mapped skb on the software and hardware rings + * @dp: NFP Net data path struct + * @rx_ring: RX ring structure + * @frag: page fragment buffer + * @dma_addr: DMA address of skb mapping + */ +static void +nfp_nfd3_rx_give_one(const struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring, + void *frag, dma_addr_t dma_addr) +{ + unsigned int wr_idx; + + wr_idx = D_IDX(rx_ring, rx_ring->wr_p); + + nfp_net_dma_sync_dev_rx(dp, dma_addr); + + /* Stash SKB and DMA address away */ + rx_ring->rxbufs[wr_idx].frag = frag; + rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; + + /* Fill freelist descriptor */ + rx_ring->rxds[wr_idx].fld.reserved = 0; + rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; + /* DMA address is expanded to 48-bit width in freelist for NFP3800, + * so the *_48b macro is used accordingly, it's also OK to fill + * a 40-bit address since the top 8 bits are get set to 0. + */ + nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, + dma_addr + dp->rx_dma_off); + + rx_ring->wr_p++; + if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { + /* Update write pointer of the freelist queue. Make + * sure all writes are flushed before telling the hardware. + */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); + } +} + +/** + * nfp_nfd3_rx_ring_fill_freelist() - Give buffers from the ring to FW + * @dp: NFP Net data path struct + * @rx_ring: RX ring to fill + */ +void nfp_nfd3_rx_ring_fill_freelist(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring) +{ + unsigned int i; + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return nfp_net_xsk_rx_ring_fill_freelist(rx_ring); + + for (i = 0; i < rx_ring->cnt - 1; i++) + nfp_nfd3_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, + rx_ring->rxbufs[i].dma_addr); +} + +/** + * nfp_nfd3_rx_csum_has_errors() - group check if rxd has any csum errors + * @flags: RX descriptor flags field in CPU byte order + */ +static int nfp_nfd3_rx_csum_has_errors(u16 flags) +{ + u16 csum_all_checked, csum_all_ok; + + csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; + csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; + + return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); +} + +/** + * nfp_nfd3_rx_csum() - set SKB checksum field based on RX descriptor flags + * @dp: NFP Net data path struct + * @r_vec: per-ring structure + * @rxd: Pointer to RX descriptor + * @meta: Parsed metadata prepend + * @skb: Pointer to SKB + */ +void +nfp_nfd3_rx_csum(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta, struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + if (!(dp->netdev->features & NETIF_F_RXCSUM)) + return; + + if (meta->csum_type) { + skb->ip_summed = meta->csum_type; + skb->csum = meta->csum; + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_complete++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + if (nfp_nfd3_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_error++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + /* Assume that the firmware will never report inner CSUM_OK unless outer + * L4 headers were successfully parsed. FW will always report zero UDP + * checksum as CSUM_OK. + */ + if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } + + if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_inner_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } +} + +static void +nfp_nfd3_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, + unsigned int type, __be32 *hash) +{ + if (!(netdev->features & NETIF_F_RXHASH)) + return; + + switch (type) { + case NFP_NET_RSS_IPV4: + case NFP_NET_RSS_IPV6: + case NFP_NET_RSS_IPV6_EX: + meta->hash_type = PKT_HASH_TYPE_L3; + break; + default: + meta->hash_type = PKT_HASH_TYPE_L4; + break; + } + + meta->hash = get_unaligned_be32(hash); +} + +static void +nfp_nfd3_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta, + void *data, struct nfp_net_rx_desc *rxd) +{ + struct nfp_net_rx_hash *rx_hash = data; + + if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) + return; + + nfp_nfd3_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type), + &rx_hash->hash); +} + +bool +nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, + void *data, void *pkt, unsigned int pkt_len, int meta_len) +{ + u32 meta_info, vlan_info; + + meta_info = get_unaligned_be32(data); + data += 4; + + while (meta_info) { + switch (meta_info & NFP_NET_META_FIELD_MASK) { + case NFP_NET_META_HASH: + meta_info >>= NFP_NET_META_FIELD_SIZE; + nfp_nfd3_set_hash(netdev, meta, + meta_info & NFP_NET_META_FIELD_MASK, + (__be32 *)data); + data += 4; + break; + case NFP_NET_META_MARK: + meta->mark = get_unaligned_be32(data); + data += 4; + break; + case NFP_NET_META_VLAN: + vlan_info = get_unaligned_be32(data); + if (FIELD_GET(NFP_NET_META_VLAN_STRIP, vlan_info)) { + meta->vlan.stripped = true; + meta->vlan.tpid = FIELD_GET(NFP_NET_META_VLAN_TPID_MASK, + vlan_info); + meta->vlan.tci = FIELD_GET(NFP_NET_META_VLAN_TCI_MASK, + vlan_info); + } + data += 4; + break; + case NFP_NET_META_PORTID: + meta->portid = get_unaligned_be32(data); + data += 4; + break; + case NFP_NET_META_CSUM: + meta->csum_type = CHECKSUM_COMPLETE; + meta->csum = + (__force __wsum)__get_unaligned_cpu32(data); + data += 4; + break; + case NFP_NET_META_RESYNC_INFO: + if (nfp_net_tls_rx_resync_req(netdev, data, pkt, + pkt_len)) + return false; + data += sizeof(struct nfp_net_tls_resync_req); + break; + default: + return true; + } + + meta_info >>= NFP_NET_META_FIELD_SIZE; + } + + return data != pkt; +} + +static void +nfp_nfd3_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, + struct sk_buff *skb) +{ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + /* If we have both skb and rxbuf the replacement buffer allocation + * must have failed, count this as an alloc failure. + */ + if (skb && rxbuf) + r_vec->rx_replace_buf_alloc_fail++; + u64_stats_update_end(&r_vec->rx_sync); + + /* skb is build based on the frag, free_skb() would free the frag + * so to be able to reuse it we need an extra ref. + */ + if (skb && rxbuf && skb->head == rxbuf->frag) + page_ref_inc(virt_to_head_page(rxbuf->frag)); + if (rxbuf) + nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); + if (skb) + dev_kfree_skb_any(skb); +} + +static bool +nfp_nfd3_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, + struct nfp_net_tx_ring *tx_ring, + struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, + unsigned int pkt_len, bool *completed) +{ + unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; + struct nfp_nfd3_tx_buf *txbuf; + struct nfp_nfd3_tx_desc *txd; + int wr_idx; + + /* Reject if xdp_adjust_tail grow packet beyond DMA area */ + if (pkt_len + dma_off > dma_map_sz) + return false; + + if (unlikely(nfp_net_tx_full(tx_ring, 1))) { + if (!*completed) { + nfp_nfd3_xdp_complete(tx_ring); + *completed = true; + } + + if (unlikely(nfp_net_tx_full(tx_ring, 1))) { + nfp_nfd3_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, + NULL); + return false; + } + } + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + + nfp_nfd3_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr); + + txbuf->frag = rxbuf->frag; + txbuf->dma_addr = rxbuf->dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = pkt_len; + + dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, + pkt_len, DMA_BIDIRECTIONAL); + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = NFD3_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(pkt_len); + nfp_desc_set_dma_addr_40b(txd, rxbuf->dma_addr + dma_off); + txd->data_len = cpu_to_le16(pkt_len); + + txd->flags = 0; + txd->mss = 0; + txd->lso_hdrlen = 0; + + tx_ring->wr_p++; + tx_ring->wr_ptr_add++; + return true; +} + +/** + * nfp_nfd3_rx() - receive up to @budget packets on @rx_ring + * @rx_ring: RX ring to receive from + * @budget: NAPI budget + * + * Note, this function is separated out from the napi poll function to + * more cleanly separate packet receive code from other bookkeeping + * functions performed in the napi poll function. + * + * Return: Number of packets received. + */ +static int nfp_nfd3_rx(struct nfp_net_rx_ring *rx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct nfp_net_tx_ring *tx_ring; + struct bpf_prog *xdp_prog; + bool xdp_tx_cmpl = false; + unsigned int true_bufsz; + struct sk_buff *skb; + int pkts_polled = 0; + struct xdp_buff xdp; + int idx; + + xdp_prog = READ_ONCE(dp->xdp_prog); + true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; + xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, + &rx_ring->xdp_rxq); + tx_ring = r_vec->xdp_ring; + + while (pkts_polled < budget) { + unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; + struct nfp_net_rx_buf *rxbuf; + struct nfp_net_rx_desc *rxd; + struct nfp_meta_parsed meta; + bool redir_egress = false; + struct net_device *netdev; + dma_addr_t new_dma_addr; + u32 meta_len_xdp = 0; + void *new_frag; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + break; + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + memset(&meta, 0, sizeof(meta)); + + rx_ring->rd_p++; + pkts_polled++; + + rxbuf = &rx_ring->rxbufs[idx]; + /* < meta_len > + * <-- [rx_offset] --> + * --------------------------------------------------------- + * | [XX] | metadata | packet | XXXX | + * --------------------------------------------------------- + * <---------------- data_len ---------------> + * + * The rx_offset is fixed for all packets, the meta_len can vary + * on a packet by packet basis. If rx_offset is set to zero + * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the + * buffer and is immediately followed by the packet (no [XX]). + */ + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; + if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + pkt_off += meta_len; + else + pkt_off += dp->rx_offset; + meta_off = pkt_off - meta_len; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + if (unlikely(meta_len > NFP_NET_MAX_PREPEND || + (dp->rx_offset && meta_len > dp->rx_offset))) { + nn_dp_warn(dp, "oversized RX packet metadata %u\n", + meta_len); + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + continue; + } + + nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, + data_len); + + if (!dp->chained_metadata_format) { + nfp_nfd3_set_hash_desc(dp->netdev, &meta, + rxbuf->frag + meta_off, rxd); + } else if (meta_len) { + if (unlikely(nfp_nfd3_parse_meta(dp->netdev, &meta, + rxbuf->frag + meta_off, + rxbuf->frag + pkt_off, + pkt_len, meta_len))) { + nn_dp_warn(dp, "invalid RX packet metadata\n"); + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + } + + if (xdp_prog && !meta.portid) { + void *orig_data = rxbuf->frag + pkt_off; + unsigned int dma_off; + int act; + + xdp_prepare_buff(&xdp, + rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, + pkt_off - NFP_NET_RX_BUF_HEADROOM, + pkt_len, true); + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + pkt_len = xdp.data_end - xdp.data; + pkt_off += xdp.data - orig_data; + + switch (act) { + case XDP_PASS: + meta_len_xdp = xdp.data - xdp.data_meta; + break; + case XDP_TX: + dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; + if (unlikely(!nfp_nfd3_tx_xdp_buf(dp, rx_ring, + tx_ring, + rxbuf, + dma_off, + pkt_len, + &xdp_tx_cmpl))) + trace_xdp_exception(dp->netdev, + xdp_prog, act); + continue; + default: + bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } + } + + if (likely(!meta.portid)) { + netdev = dp->netdev; + } else if (meta.portid == NFP_META_PORT_ID_CTRL) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, + pkt_len); + nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } else { + struct nfp_net *nn; + + nn = netdev_priv(dp->netdev); + netdev = nfp_app_dev_get(nn->app, meta.portid, + &redir_egress); + if (unlikely(!netdev)) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + + if (nfp_netdev_is_nfp_repr(netdev)) + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + + skb = build_skb(rxbuf->frag, true_bufsz); + if (unlikely(!skb)) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + continue; + } + new_frag = nfp_nfd3_napi_alloc_one(dp, &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); + continue; + } + + nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); + + nfp_nfd3_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); + + skb_reserve(skb, pkt_off); + skb_put(skb, pkt_len); + + skb->mark = meta.mark; + skb_set_hash(skb, meta.hash, meta.hash_type); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, netdev); + + nfp_nfd3_rx_csum(dp, r_vec, rxd, &meta, skb); + +#ifdef CONFIG_TLS_DEVICE + if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) { + skb->decrypted = true; + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_tls_rx++; + u64_stats_update_end(&r_vec->rx_sync); + } +#endif + + if (unlikely(!nfp_net_vlan_strip(skb, rxd, &meta))) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, NULL, skb); + continue; + } + + if (meta_len_xdp) + skb_metadata_set(skb, meta_len_xdp); + + if (likely(!redir_egress)) { + napi_gro_receive(&rx_ring->r_vec->napi, skb); + } else { + skb->dev = netdev; + skb_reset_network_header(skb); + __skb_push(skb, ETH_HLEN); + dev_queue_xmit(skb); + } + } + + if (xdp_prog) { + if (tx_ring->wr_ptr_add) + nfp_net_tx_xmit_more_flush(tx_ring); + else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && + !xdp_tx_cmpl) + if (!nfp_nfd3_xdp_complete(tx_ring)) + pkts_polled = budget; + } + + return pkts_polled; +} + +/** + * nfp_nfd3_poll() - napi poll function + * @napi: NAPI structure + * @budget: NAPI budget + * + * Return: number of packets polled. + */ +int nfp_nfd3_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + unsigned int pkts_polled = 0; + + if (r_vec->tx_ring) + nfp_nfd3_tx_complete(r_vec->tx_ring, budget); + if (r_vec->rx_ring) + pkts_polled = nfp_nfd3_rx(r_vec->rx_ring, budget); + + if (pkts_polled < budget) + if (napi_complete_done(napi, pkts_polled)) + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + + if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->rx_sync); + pkts = r_vec->rx_pkts; + bytes = r_vec->rx_bytes; + } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->rx_dim, dim_sample); + } + + if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->tx_sync); + pkts = r_vec->tx_pkts; + bytes = r_vec->tx_bytes; + } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->tx_dim, dim_sample); + } + + return pkts_polled; +} + +/* Control device data path + */ + +bool +nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, bool old) +{ + unsigned int real_len = skb->len, meta_len = 0; + struct nfp_net_tx_ring *tx_ring; + struct nfp_nfd3_tx_buf *txbuf; + struct nfp_nfd3_tx_desc *txd; + struct nfp_net_dp *dp; + dma_addr_t dma_addr; + int wr_idx; + + dp = &r_vec->nfp_net->dp; + tx_ring = r_vec->tx_ring; + + if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { + nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); + goto err_free; + } + + if (unlikely(nfp_net_tx_full(tx_ring, 1))) { + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + if (!old) + __skb_queue_tail(&r_vec->queue, skb); + else + __skb_queue_head(&r_vec->queue, skb); + return true; + } + + if (nfp_app_ctrl_has_meta(nn->app)) { + if (unlikely(skb_headroom(skb) < 8)) { + nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); + goto err_free; + } + meta_len = 8; + put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); + put_unaligned_be32(NFP_NET_META_PORTID, skb_push(skb, 4)); + } + + /* Start with the head skbuf */ + dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), + DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_dma_warn; + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + /* Stash the soft descriptor of the head then initialize it */ + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->skb = skb; + txbuf->dma_addr = dma_addr; + txbuf->fidx = -1; + txbuf->pkt_cnt = 1; + txbuf->real_len = real_len; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = meta_len | NFD3_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(skb_headlen(skb)); + nfp_desc_set_dma_addr_40b(txd, dma_addr); + txd->data_len = cpu_to_le16(skb->len); + + txd->flags = 0; + txd->mss = 0; + txd->lso_hdrlen = 0; + + tx_ring->wr_p++; + tx_ring->wr_ptr_add++; + nfp_net_tx_xmit_more_flush(tx_ring); + + return false; + +err_dma_warn: + nn_dp_warn(dp, "Failed to DMA map TX CTRL buffer\n"); +err_free: + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(skb); + return false; +} + +static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) +{ + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&r_vec->queue))) + if (nfp_nfd3_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) + return; +} + +static bool +nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) +{ + u32 meta_type, meta_tag; + + if (!nfp_app_ctrl_has_meta(nn->app)) + return !meta_len; + + if (meta_len != 8) + return false; + + meta_type = get_unaligned_be32(data); + meta_tag = get_unaligned_be32(data + 4); + + return (meta_type == NFP_NET_META_PORTID && + meta_tag == NFP_META_PORT_ID_CTRL); +} + +static bool +nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) +{ + unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; + struct nfp_net_rx_buf *rxbuf; + struct nfp_net_rx_desc *rxd; + dma_addr_t new_dma_addr; + struct sk_buff *skb; + void *new_frag; + int idx; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + return false; + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + rx_ring->rd_p++; + + rxbuf = &rx_ring->rxbufs[idx]; + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; + if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + pkt_off += meta_len; + else + pkt_off += dp->rx_offset; + meta_off = pkt_off - meta_len; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); + + if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { + nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", + meta_len); + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + return true; + } + + skb = build_skb(rxbuf->frag, dp->fl_bufsz); + if (unlikely(!skb)) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + return true; + } + new_frag = nfp_nfd3_napi_alloc_one(dp, &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); + return true; + } + + nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); + + nfp_nfd3_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); + + skb_reserve(skb, pkt_off); + skb_put(skb, pkt_len); + + nfp_app_ctrl_rx(nn->app, skb); + + return true; +} + +static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) +{ + struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; + struct nfp_net *nn = r_vec->nfp_net; + struct nfp_net_dp *dp = &nn->dp; + unsigned int budget = 512; + + while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) + continue; + + return budget; +} + +void nfp_nfd3_ctrl_poll(struct tasklet_struct *t) +{ + struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); + + spin_lock(&r_vec->lock); + nfp_nfd3_tx_complete(r_vec->tx_ring, 0); + __nfp_ctrl_tx_queued(r_vec); + spin_unlock(&r_vec->lock); + + if (nfp_ctrl_rx(r_vec)) { + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } else { + tasklet_schedule(&r_vec->tasklet); + nn_dp_warn(&r_vec->nfp_net->dp, + "control message budget exceeded!\n"); + } +} diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/nfd3.h b/drivers/net/ethernet/netronome/nfp/nfd3/nfd3.h new file mode 100644 index 000000000000..7a0df9e6c3c4 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfd3/nfd3.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +#ifndef _NFP_DP_NFD3_H_ +#define _NFP_DP_NFD3_H_ + +struct sk_buff; +struct net_device; + +/* TX descriptor format */ + +#define NFD3_DESC_TX_EOP BIT(7) +#define NFD3_DESC_TX_OFFSET_MASK GENMASK(6, 0) +#define NFD3_DESC_TX_MSS_MASK GENMASK(13, 0) + +/* Flags in the host TX descriptor */ +#define NFD3_DESC_TX_CSUM BIT(7) +#define NFD3_DESC_TX_IP4_CSUM BIT(6) +#define NFD3_DESC_TX_TCP_CSUM BIT(5) +#define NFD3_DESC_TX_UDP_CSUM BIT(4) +#define NFD3_DESC_TX_VLAN BIT(3) +#define NFD3_DESC_TX_LSO BIT(2) +#define NFD3_DESC_TX_ENCAP BIT(1) +#define NFD3_DESC_TX_O_IP4_CSUM BIT(0) + +struct nfp_nfd3_tx_desc { + union { + struct { + u8 dma_addr_hi; /* High bits of host buf address */ + __le16 dma_len; /* Length to DMA for this desc */ + u8 offset_eop; /* Offset in buf where pkt starts + + * highest bit is eop flag. + */ + __le32 dma_addr_lo; /* Low 32bit of host buf addr */ + + __le16 mss; /* MSS to be used for LSO */ + u8 lso_hdrlen; /* LSO, TCP payload offset */ + u8 flags; /* TX Flags, see @NFD3_DESC_TX_* */ + union { + struct { + u8 l3_offset; /* L3 header offset */ + u8 l4_offset; /* L4 header offset */ + }; + __le16 vlan; /* VLAN tag to add if indicated */ + }; + __le16 data_len; /* Length of frame + meta data */ + } __packed; + __le32 vals[4]; + __le64 vals8[2]; + }; +}; + +/** + * struct nfp_nfd3_tx_buf - software TX buffer descriptor + * @skb: normal ring, sk_buff associated with this buffer + * @frag: XDP ring, page frag associated with this buffer + * @xdp: XSK buffer pool handle (for AF_XDP) + * @dma_addr: DMA mapping address of the buffer + * @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags) + * @pkt_cnt: Number of packets to be produced out of the skb associated + * with this buffer (valid only on the head's buffer). + * Will be 1 for all non-TSO packets. + * @is_xsk_tx: Flag if buffer is a RX buffer after a XDP_TX action and not a + * buffer from the TX queue (for AF_XDP). + * @real_len: Number of bytes which to be produced out of the skb (valid only + * on the head's buffer). Equal to skb->len for non-TSO packets. + */ +struct nfp_nfd3_tx_buf { + union { + struct sk_buff *skb; + void *frag; + struct xdp_buff *xdp; + }; + dma_addr_t dma_addr; + union { + struct { + short int fidx; + u16 pkt_cnt; + }; + struct { + bool is_xsk_tx; + }; + }; + u32 real_len; +}; + +void +nfp_nfd3_rx_csum(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta, struct sk_buff *skb); +bool +nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, + void *data, void *pkt, unsigned int pkt_len, int meta_len); +void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget); +int nfp_nfd3_poll(struct napi_struct *napi, int budget); +netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev); +bool +nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, bool old); +void nfp_nfd3_ctrl_poll(struct tasklet_struct *t); +void nfp_nfd3_rx_ring_fill_freelist(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring); +void nfp_nfd3_xsk_tx_free(struct nfp_nfd3_tx_buf *txbuf); +int nfp_nfd3_xsk_poll(struct napi_struct *napi, int budget); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/rings.c b/drivers/net/ethernet/netronome/nfp/nfd3/rings.c new file mode 100644 index 000000000000..a03190c9313c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfd3/rings.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +#include <linux/seq_file.h> + +#include "../nfp_net.h" +#include "../nfp_net_dp.h" +#include "../nfp_net_xsk.h" +#include "nfd3.h" + +static void nfp_nfd3_xsk_tx_bufs_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_nfd3_tx_buf *txbuf; + unsigned int idx; + + while (tx_ring->rd_p != tx_ring->wr_p) { + idx = D_IDX(tx_ring, tx_ring->rd_p); + txbuf = &tx_ring->txbufs[idx]; + + txbuf->real_len = 0; + + tx_ring->qcp_rd_p++; + tx_ring->rd_p++; + + if (tx_ring->r_vec->xsk_pool) { + if (txbuf->is_xsk_tx) + nfp_nfd3_xsk_tx_free(txbuf); + + xsk_tx_completed(tx_ring->r_vec->xsk_pool, 1); + } + } +} + +/** + * nfp_nfd3_tx_ring_reset() - Free any untransmitted buffers and reset pointers + * @dp: NFP Net data path struct + * @tx_ring: TX ring structure + * + * Assumes that the device is stopped, must be idempotent. + */ +static void +nfp_nfd3_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + struct netdev_queue *nd_q; + const skb_frag_t *frag; + + while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) { + struct nfp_nfd3_tx_buf *tx_buf; + struct sk_buff *skb; + int idx, nr_frags; + + idx = D_IDX(tx_ring, tx_ring->rd_p); + tx_buf = &tx_ring->txbufs[idx]; + + skb = tx_ring->txbufs[idx].skb; + nr_frags = skb_shinfo(skb)->nr_frags; + + if (tx_buf->fidx == -1) { + /* unmap head */ + dma_unmap_single(dp->dev, tx_buf->dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + } else { + /* unmap fragment */ + frag = &skb_shinfo(skb)->frags[tx_buf->fidx]; + dma_unmap_page(dp->dev, tx_buf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + } + + /* check for last gather fragment */ + if (tx_buf->fidx == nr_frags - 1) + dev_kfree_skb_any(skb); + + tx_buf->dma_addr = 0; + tx_buf->skb = NULL; + tx_buf->fidx = -2; + + tx_ring->qcp_rd_p++; + tx_ring->rd_p++; + } + + if (tx_ring->is_xdp) + nfp_nfd3_xsk_tx_bufs_free(tx_ring); + + memset(tx_ring->txds, 0, tx_ring->size); + tx_ring->wr_p = 0; + tx_ring->rd_p = 0; + tx_ring->qcp_rd_p = 0; + tx_ring->wr_ptr_add = 0; + + if (tx_ring->is_xdp || !dp->netdev) + return; + + nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); + netdev_tx_reset_queue(nd_q); +} + +/** + * nfp_nfd3_tx_ring_free() - Free resources allocated to a TX ring + * @tx_ring: TX ring to free + */ +static void nfp_nfd3_tx_ring_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + + kvfree(tx_ring->txbufs); + + if (tx_ring->txds) + dma_free_coherent(dp->dev, tx_ring->size, + tx_ring->txds, tx_ring->dma); + + tx_ring->cnt = 0; + tx_ring->txbufs = NULL; + tx_ring->txds = NULL; + tx_ring->dma = 0; + tx_ring->size = 0; +} + +/** + * nfp_nfd3_tx_ring_alloc() - Allocate resource for a TX ring + * @dp: NFP Net data path struct + * @tx_ring: TX Ring structure to allocate + * + * Return: 0 on success, negative errno otherwise. + */ +static int +nfp_nfd3_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + + tx_ring->cnt = dp->txd_cnt; + + tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds)); + tx_ring->txds = dma_alloc_coherent(dp->dev, tx_ring->size, + &tx_ring->dma, + GFP_KERNEL | __GFP_NOWARN); + if (!tx_ring->txds) { + netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", + tx_ring->cnt); + goto err_alloc; + } + + tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs), + GFP_KERNEL); + if (!tx_ring->txbufs) + goto err_alloc; + + if (!tx_ring->is_xdp && dp->netdev) + netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask, + tx_ring->idx); + + return 0; + +err_alloc: + nfp_nfd3_tx_ring_free(tx_ring); + return -ENOMEM; +} + +static void +nfp_nfd3_tx_ring_bufs_free(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + unsigned int i; + + if (!tx_ring->is_xdp) + return; + + for (i = 0; i < tx_ring->cnt; i++) { + if (!tx_ring->txbufs[i].frag) + return; + + nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[i].dma_addr); + __free_page(virt_to_page(tx_ring->txbufs[i].frag)); + } +} + +static int +nfp_nfd3_tx_ring_bufs_alloc(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_nfd3_tx_buf *txbufs = tx_ring->txbufs; + unsigned int i; + + if (!tx_ring->is_xdp) + return 0; + + for (i = 0; i < tx_ring->cnt; i++) { + txbufs[i].frag = nfp_net_rx_alloc_one(dp, &txbufs[i].dma_addr); + if (!txbufs[i].frag) { + nfp_nfd3_tx_ring_bufs_free(dp, tx_ring); + return -ENOMEM; + } + } + + return 0; +} + +static void +nfp_nfd3_print_tx_descs(struct seq_file *file, + struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_ring *tx_ring, + u32 d_rd_p, u32 d_wr_p) +{ + struct nfp_nfd3_tx_desc *txd; + u32 txd_cnt = tx_ring->cnt; + int i; + + for (i = 0; i < txd_cnt; i++) { + struct xdp_buff *xdp; + struct sk_buff *skb; + + txd = &tx_ring->txds[i]; + seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i, + txd->vals[0], txd->vals[1], + txd->vals[2], txd->vals[3]); + + if (!tx_ring->is_xdp) { + skb = READ_ONCE(tx_ring->txbufs[i].skb); + if (skb) + seq_printf(file, " skb->head=%p skb->data=%p", + skb->head, skb->data); + } else { + xdp = READ_ONCE(tx_ring->txbufs[i].xdp); + if (xdp) + seq_printf(file, " xdp->data=%p", xdp->data); + } + + if (tx_ring->txbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &tx_ring->txbufs[i].dma_addr); + + if (i == tx_ring->rd_p % txd_cnt) + seq_puts(file, " H_RD"); + if (i == tx_ring->wr_p % txd_cnt) + seq_puts(file, " H_WR"); + if (i == d_rd_p % txd_cnt) + seq_puts(file, " D_RD"); + if (i == d_wr_p % txd_cnt) + seq_puts(file, " D_WR"); + + seq_putc(file, '\n'); + } +} + +#define NFP_NFD3_CFG_CTRL_SUPPORTED \ + (NFP_NET_CFG_CTRL_ENABLE | NFP_NET_CFG_CTRL_PROMISC | \ + NFP_NET_CFG_CTRL_L2BC | NFP_NET_CFG_CTRL_L2MC | \ + NFP_NET_CFG_CTRL_RXCSUM | NFP_NET_CFG_CTRL_TXCSUM | \ + NFP_NET_CFG_CTRL_RXVLAN | NFP_NET_CFG_CTRL_TXVLAN | \ + NFP_NET_CFG_CTRL_RXVLAN_V2 | NFP_NET_CFG_CTRL_RXQINQ | \ + NFP_NET_CFG_CTRL_TXVLAN_V2 | \ + NFP_NET_CFG_CTRL_GATHER | NFP_NET_CFG_CTRL_LSO | \ + NFP_NET_CFG_CTRL_CTAG_FILTER | NFP_NET_CFG_CTRL_CMSG_DATA | \ + NFP_NET_CFG_CTRL_RINGCFG | NFP_NET_CFG_CTRL_RSS | \ + NFP_NET_CFG_CTRL_IRQMOD | NFP_NET_CFG_CTRL_TXRWB | \ + NFP_NET_CFG_CTRL_VEPA | \ + NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE | \ + NFP_NET_CFG_CTRL_BPF | NFP_NET_CFG_CTRL_LSO2 | \ + NFP_NET_CFG_CTRL_RSS2 | NFP_NET_CFG_CTRL_CSUM_COMPLETE | \ + NFP_NET_CFG_CTRL_LIVE_ADDR) + +const struct nfp_dp_ops nfp_nfd3_ops = { + .version = NFP_NFD_VER_NFD3, + .tx_min_desc_per_pkt = 1, + .cap_mask = NFP_NFD3_CFG_CTRL_SUPPORTED, + .dma_mask = DMA_BIT_MASK(40), + .poll = nfp_nfd3_poll, + .xsk_poll = nfp_nfd3_xsk_poll, + .ctrl_poll = nfp_nfd3_ctrl_poll, + .xmit = nfp_nfd3_tx, + .ctrl_tx_one = nfp_nfd3_ctrl_tx_one, + .rx_ring_fill_freelist = nfp_nfd3_rx_ring_fill_freelist, + .tx_ring_alloc = nfp_nfd3_tx_ring_alloc, + .tx_ring_reset = nfp_nfd3_tx_ring_reset, + .tx_ring_free = nfp_nfd3_tx_ring_free, + .tx_ring_bufs_alloc = nfp_nfd3_tx_ring_bufs_alloc, + .tx_ring_bufs_free = nfp_nfd3_tx_ring_bufs_free, + .print_tx_descs = nfp_nfd3_print_tx_descs +}; diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c b/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c new file mode 100644 index 000000000000..5d9db8c2a5b4 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfd3/xsk.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#include <linux/bpf_trace.h> +#include <linux/netdevice.h> + +#include "../nfp_app.h" +#include "../nfp_net.h" +#include "../nfp_net_dp.h" +#include "../nfp_net_xsk.h" +#include "nfd3.h" + +static bool +nfp_nfd3_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_ring *rx_ring, + struct nfp_net_tx_ring *tx_ring, + struct nfp_net_xsk_rx_buf *xrxbuf, unsigned int pkt_len, + int pkt_off) +{ + struct xsk_buff_pool *pool = r_vec->xsk_pool; + struct nfp_nfd3_tx_buf *txbuf; + struct nfp_nfd3_tx_desc *txd; + unsigned int wr_idx; + + if (nfp_net_tx_space(tx_ring) < 1) + return false; + + xsk_buff_raw_dma_sync_for_device(pool, xrxbuf->dma_addr + pkt_off, + pkt_len); + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + txbuf = &tx_ring->txbufs[wr_idx]; + txbuf->xdp = xrxbuf->xdp; + txbuf->real_len = pkt_len; + txbuf->is_xsk_tx = true; + + /* Build TX descriptor */ + txd = &tx_ring->txds[wr_idx]; + txd->offset_eop = NFD3_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(pkt_len); + nfp_desc_set_dma_addr_40b(txd, xrxbuf->dma_addr + pkt_off); + txd->data_len = cpu_to_le16(pkt_len); + + txd->flags = 0; + txd->mss = 0; + txd->lso_hdrlen = 0; + + tx_ring->wr_ptr_add++; + tx_ring->wr_p++; + + return true; +} + +static void nfp_nfd3_xsk_rx_skb(struct nfp_net_rx_ring *rx_ring, + const struct nfp_net_rx_desc *rxd, + struct nfp_net_xsk_rx_buf *xrxbuf, + const struct nfp_meta_parsed *meta, + unsigned int pkt_len, + bool meta_xdp, + unsigned int *skbs_polled) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct net_device *netdev; + struct sk_buff *skb; + + if (likely(!meta->portid)) { + netdev = dp->netdev; + } else { + struct nfp_net *nn = netdev_priv(dp->netdev); + + netdev = nfp_app_dev_get(nn->app, meta->portid, NULL); + if (unlikely(!netdev)) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + return; + } + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + + skb = napi_alloc_skb(&r_vec->napi, pkt_len); + if (!skb) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + return; + } + skb_put_data(skb, xrxbuf->xdp->data, pkt_len); + + skb->mark = meta->mark; + skb_set_hash(skb, meta->hash, meta->hash_type); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, netdev); + + nfp_nfd3_rx_csum(dp, r_vec, rxd, meta, skb); + + if (unlikely(!nfp_net_vlan_strip(skb, rxd, meta))) { + dev_kfree_skb_any(skb); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + return; + } + + if (meta_xdp) + skb_metadata_set(skb, + xrxbuf->xdp->data - xrxbuf->xdp->data_meta); + + napi_gro_receive(&rx_ring->r_vec->napi, skb); + + nfp_net_xsk_rx_free(xrxbuf); + + (*skbs_polled)++; +} + +static unsigned int +nfp_nfd3_xsk_rx(struct nfp_net_rx_ring *rx_ring, int budget, + unsigned int *skbs_polled) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct nfp_net_tx_ring *tx_ring; + struct bpf_prog *xdp_prog; + bool xdp_redir = false; + int pkts_polled = 0; + + xdp_prog = READ_ONCE(dp->xdp_prog); + tx_ring = r_vec->xdp_ring; + + while (pkts_polled < budget) { + unsigned int meta_len, data_len, pkt_len, pkt_off; + struct nfp_net_xsk_rx_buf *xrxbuf; + struct nfp_net_rx_desc *rxd; + struct nfp_meta_parsed meta; + int idx, act; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + break; + + rx_ring->rd_p++; + pkts_polled++; + + xrxbuf = &rx_ring->xsk_rxbufs[idx]; + + /* If starved of buffers "drop" it and scream. */ + if (rx_ring->rd_p >= rx_ring->wr_p) { + nn_dp_warn(dp, "Starved of RX buffers\n"); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + break; + } + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + memset(&meta, 0, sizeof(meta)); + + /* Only supporting AF_XDP with dynamic metadata so buffer layout + * is always: + * + * --------------------------------------------------------- + * | off | metadata | packet | XXXX | + * --------------------------------------------------------- + */ + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + if (unlikely(meta_len > NFP_NET_MAX_PREPEND)) { + nn_dp_warn(dp, "Oversized RX packet metadata %u\n", + meta_len); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + continue; + } + + /* Stats update. */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + xrxbuf->xdp->data += meta_len; + xrxbuf->xdp->data_end = xrxbuf->xdp->data + pkt_len; + xdp_set_data_meta_invalid(xrxbuf->xdp); + xsk_buff_dma_sync_for_cpu(xrxbuf->xdp, r_vec->xsk_pool); + net_prefetch(xrxbuf->xdp->data); + + if (meta_len) { + if (unlikely(nfp_nfd3_parse_meta(dp->netdev, &meta, + xrxbuf->xdp->data - + meta_len, + xrxbuf->xdp->data, + pkt_len, meta_len))) { + nn_dp_warn(dp, "Invalid RX packet metadata\n"); + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + continue; + } + + if (unlikely(meta.portid)) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + if (meta.portid != NFP_META_PORT_ID_CTRL) { + nfp_nfd3_xsk_rx_skb(rx_ring, rxd, + xrxbuf, &meta, + pkt_len, false, + skbs_polled); + continue; + } + + nfp_app_ctrl_rx_raw(nn->app, xrxbuf->xdp->data, + pkt_len); + nfp_net_xsk_rx_free(xrxbuf); + continue; + } + } + + act = bpf_prog_run_xdp(xdp_prog, xrxbuf->xdp); + + pkt_len = xrxbuf->xdp->data_end - xrxbuf->xdp->data; + pkt_off = xrxbuf->xdp->data - xrxbuf->xdp->data_hard_start; + + switch (act) { + case XDP_PASS: + nfp_nfd3_xsk_rx_skb(rx_ring, rxd, xrxbuf, &meta, pkt_len, + true, skbs_polled); + break; + case XDP_TX: + if (!nfp_nfd3_xsk_tx_xdp(dp, r_vec, rx_ring, tx_ring, + xrxbuf, pkt_len, pkt_off)) + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + else + nfp_net_xsk_rx_unstash(xrxbuf); + break; + case XDP_REDIRECT: + if (xdp_do_redirect(dp->netdev, xrxbuf->xdp, xdp_prog)) { + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + } else { + nfp_net_xsk_rx_unstash(xrxbuf); + xdp_redir = true; + } + break; + default: + bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + nfp_net_xsk_rx_drop(r_vec, xrxbuf); + break; + } + } + + nfp_net_xsk_rx_ring_fill_freelist(r_vec->rx_ring); + + if (xdp_redir) + xdp_do_flush_map(); + + if (tx_ring->wr_ptr_add) + nfp_net_tx_xmit_more_flush(tx_ring); + + return pkts_polled; +} + +void nfp_nfd3_xsk_tx_free(struct nfp_nfd3_tx_buf *txbuf) +{ + xsk_buff_free(txbuf->xdp); + + txbuf->dma_addr = 0; + txbuf->xdp = NULL; +} + +static bool nfp_nfd3_xsk_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + u32 done_pkts = 0, done_bytes = 0, reused = 0; + bool done_all; + int idx, todo; + u32 qcp_rd_p; + + if (tx_ring->wr_p == tx_ring->rd_p) + return true; + + /* Work out how many descriptors have been transmitted. */ + qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return true; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + + done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; + todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); + + tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); + + done_pkts = todo; + while (todo--) { + struct nfp_nfd3_tx_buf *txbuf; + + idx = D_IDX(tx_ring, tx_ring->rd_p); + tx_ring->rd_p++; + + txbuf = &tx_ring->txbufs[idx]; + if (unlikely(!txbuf->real_len)) + continue; + + done_bytes += txbuf->real_len; + txbuf->real_len = 0; + + if (txbuf->is_xsk_tx) { + nfp_nfd3_xsk_tx_free(txbuf); + reused++; + } + } + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + xsk_tx_completed(r_vec->xsk_pool, done_pkts - reused); + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); + + return done_all; +} + +static void nfp_nfd3_xsk_tx(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct xdp_desc desc[NFP_NET_XSK_TX_BATCH]; + struct xsk_buff_pool *xsk_pool; + struct nfp_nfd3_tx_desc *txd; + u32 pkts = 0, wr_idx; + u32 i, got; + + xsk_pool = r_vec->xsk_pool; + + while (nfp_net_tx_space(tx_ring) >= NFP_NET_XSK_TX_BATCH) { + for (i = 0; i < NFP_NET_XSK_TX_BATCH; i++) + if (!xsk_tx_peek_desc(xsk_pool, &desc[i])) + break; + got = i; + if (!got) + break; + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); + prefetchw(&tx_ring->txds[wr_idx]); + + for (i = 0; i < got; i++) + xsk_buff_raw_dma_sync_for_device(xsk_pool, desc[i].addr, + desc[i].len); + + for (i = 0; i < got; i++) { + wr_idx = D_IDX(tx_ring, tx_ring->wr_p + i); + + tx_ring->txbufs[wr_idx].real_len = desc[i].len; + tx_ring->txbufs[wr_idx].is_xsk_tx = false; + + /* Build TX descriptor. */ + txd = &tx_ring->txds[wr_idx]; + nfp_desc_set_dma_addr_40b(txd, + xsk_buff_raw_get_dma(xsk_pool, desc[i].addr)); + txd->offset_eop = NFD3_DESC_TX_EOP; + txd->dma_len = cpu_to_le16(desc[i].len); + txd->data_len = cpu_to_le16(desc[i].len); + } + + tx_ring->wr_p += got; + pkts += got; + } + + if (!pkts) + return; + + xsk_tx_release(xsk_pool); + /* Ensure all records are visible before incrementing write counter. */ + wmb(); + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, pkts); +} + +int nfp_nfd3_xsk_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + unsigned int pkts_polled, skbs = 0; + + pkts_polled = nfp_nfd3_xsk_rx(r_vec->rx_ring, budget, &skbs); + + if (pkts_polled < budget) { + if (r_vec->tx_ring) + nfp_nfd3_tx_complete(r_vec->tx_ring, budget); + + if (!nfp_nfd3_xsk_complete(r_vec->xdp_ring)) + pkts_polled = budget; + + nfp_nfd3_xsk_tx(r_vec->xdp_ring); + + if (pkts_polled < budget && napi_complete_done(napi, skbs)) + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } + + return pkts_polled; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c new file mode 100644 index 000000000000..2b427d8ccb2f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -0,0 +1,1539 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +#include <linux/bpf_trace.h> +#include <linux/netdevice.h> +#include <linux/overflow.h> +#include <linux/sizes.h> +#include <linux/bitfield.h> + +#include "../nfp_app.h" +#include "../nfp_net.h" +#include "../nfp_net_dp.h" +#include "../crypto/crypto.h" +#include "../crypto/fw.h" +#include "nfdk.h" + +static int nfp_nfdk_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) +{ + return !nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT * 2); +} + +static int nfp_nfdk_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) +{ + return nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT); +} + +static void nfp_nfdk_tx_ring_stop(struct netdev_queue *nd_q, + struct nfp_net_tx_ring *tx_ring) +{ + netif_tx_stop_queue(nd_q); + + /* We can race with the TX completion out of NAPI so recheck */ + smp_mb(); + if (unlikely(nfp_nfdk_tx_ring_should_wake(tx_ring))) + netif_tx_start_queue(nd_q); +} + +static __le64 +nfp_nfdk_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfdk_tx_buf *txbuf, + struct sk_buff *skb) +{ + u32 segs, hdrlen, l3_offset, l4_offset; + struct nfp_nfdk_tx_desc txd; + u16 mss; + + if (!skb->encapsulation) { + l3_offset = skb_network_offset(skb); + l4_offset = skb_transport_offset(skb); + hdrlen = skb_tcp_all_headers(skb); + } else { + l3_offset = skb_inner_network_offset(skb); + l4_offset = skb_inner_transport_offset(skb); + hdrlen = skb_inner_tcp_all_headers(skb); + } + + segs = skb_shinfo(skb)->gso_segs; + mss = skb_shinfo(skb)->gso_size & NFDK_DESC_TX_MSS_MASK; + + txd.l3_offset = l3_offset; + txd.l4_offset = l4_offset; + txd.lso_meta_res = 0; + txd.mss = cpu_to_le16(mss); + txd.lso_hdrlen = hdrlen; + txd.lso_totsegs = segs; + + txbuf->pkt_cnt = segs; + txbuf->real_len = skb->len + hdrlen * (txbuf->pkt_cnt - 1); + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_lso++; + u64_stats_update_end(&r_vec->tx_sync); + + return txd.raw; +} + +static u8 +nfp_nfdk_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + unsigned int pkt_cnt, struct sk_buff *skb, u64 flags) +{ + struct ipv6hdr *ipv6h; + struct iphdr *iph; + + if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) + return flags; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return flags; + + flags |= NFDK_DESC_TX_L4_CSUM; + + iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); + ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + + /* L3 checksum offloading flag is not required for ipv6 */ + if (iph->version == 4) { + flags |= NFDK_DESC_TX_L3_CSUM; + } else if (ipv6h->version != 6) { + nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); + return flags; + } + + u64_stats_update_begin(&r_vec->tx_sync); + if (!skb->encapsulation) { + r_vec->hw_csum_tx += pkt_cnt; + } else { + flags |= NFDK_DESC_TX_ENCAP; + r_vec->hw_csum_tx_inner += pkt_cnt; + } + u64_stats_update_end(&r_vec->tx_sync); + + return flags; +} + +static int +nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring, + struct sk_buff *skb) +{ + unsigned int n_descs, wr_p, nop_slots; + const skb_frag_t *frag, *fend; + struct nfp_nfdk_tx_desc *txd; + unsigned int nr_frags; + unsigned int wr_idx; + int err; + +recount_descs: + n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb)); + nr_frags = skb_shinfo(skb)->nr_frags; + frag = skb_shinfo(skb)->frags; + fend = frag + nr_frags; + for (; frag < fend; frag++) + n_descs += DIV_ROUND_UP(skb_frag_size(frag), + NFDK_TX_MAX_DATA_PER_DESC); + + if (unlikely(n_descs > NFDK_TX_DESC_GATHER_MAX)) { + if (skb_is_nonlinear(skb)) { + err = skb_linearize(skb); + if (err) + return err; + goto recount_descs; + } + return -EINVAL; + } + + /* Under count by 1 (don't count meta) for the round down to work out */ + n_descs += !!skb_is_gso(skb); + + if (round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != + round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) + goto close_block; + + if ((u32)tx_ring->data_pending + skb->len > NFDK_TX_MAX_DATA_PER_BLOCK) + goto close_block; + + return 0; + +close_block: + wr_p = tx_ring->wr_p; + nop_slots = D_BLOCK_CPL(wr_p); + + wr_idx = D_IDX(tx_ring, wr_p); + tx_ring->ktxbufs[wr_idx].skb = NULL; + txd = &tx_ring->ktxds[wr_idx]; + + memset(txd, 0, array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); + + tx_ring->data_pending = 0; + tx_ring->wr_p += nop_slots; + tx_ring->wr_ptr_add += nop_slots; + + return 0; +} + +static int +nfp_nfdk_prep_tx_meta(struct nfp_net_dp *dp, struct nfp_app *app, + struct sk_buff *skb) +{ + struct metadata_dst *md_dst = skb_metadata_dst(skb); + unsigned char *data; + bool vlan_insert; + u32 meta_id = 0; + int md_bytes; + + if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) + md_dst = NULL; + + vlan_insert = skb_vlan_tag_present(skb) && (dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2); + + if (!(md_dst || vlan_insert)) + return 0; + + md_bytes = sizeof(meta_id) + + !!md_dst * NFP_NET_META_PORTID_SIZE + + vlan_insert * NFP_NET_META_VLAN_SIZE; + + if (unlikely(skb_cow_head(skb, md_bytes))) + return -ENOMEM; + + data = skb_push(skb, md_bytes) + md_bytes; + if (md_dst) { + data -= NFP_NET_META_PORTID_SIZE; + put_unaligned_be32(md_dst->u.port_info.port_id, data); + meta_id = NFP_NET_META_PORTID; + } + if (vlan_insert) { + data -= NFP_NET_META_VLAN_SIZE; + /* data type of skb->vlan_proto is __be16 + * so it fills metadata without calling put_unaligned_be16 + */ + memcpy(data, &skb->vlan_proto, sizeof(skb->vlan_proto)); + put_unaligned_be16(skb_vlan_tag_get(skb), data + sizeof(skb->vlan_proto)); + meta_id <<= NFP_NET_META_FIELD_SIZE; + meta_id |= NFP_NET_META_VLAN; + } + + meta_id = FIELD_PREP(NFDK_META_LEN, md_bytes) | + FIELD_PREP(NFDK_META_FIELDS, meta_id); + + data -= sizeof(meta_id); + put_unaligned_be32(meta_id, data); + + return NFDK_DESC_TX_CHAIN_META; +} + +/** + * nfp_nfdk_tx() - Main transmit entry point + * @skb: SKB to transmit + * @netdev: netdev structure + * + * Return: NETDEV_TX_OK on success. + */ +netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_nfdk_tx_buf *txbuf, *etxbuf; + u32 cnt, tmp_dlen, dlen_type = 0; + struct nfp_net_tx_ring *tx_ring; + struct nfp_net_r_vector *r_vec; + const skb_frag_t *frag, *fend; + struct nfp_nfdk_tx_desc *txd; + unsigned int real_len, qidx; + unsigned int dma_len, type; + struct netdev_queue *nd_q; + struct nfp_net_dp *dp; + int nr_frags, wr_idx; + dma_addr_t dma_addr; + u64 metadata; + + dp = &nn->dp; + qidx = skb_get_queue_mapping(skb); + tx_ring = &dp->tx_rings[qidx]; + r_vec = tx_ring->r_vec; + nd_q = netdev_get_tx_queue(dp->netdev, qidx); + + /* Don't bother counting frags, assume the worst */ + if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { + nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", + qidx, tx_ring->wr_p, tx_ring->rd_p); + netif_tx_stop_queue(nd_q); + nfp_net_tx_xmit_more_flush(tx_ring); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + return NETDEV_TX_BUSY; + } + + metadata = nfp_nfdk_prep_tx_meta(dp, nn->app, skb); + if (unlikely((int)metadata < 0)) + goto err_flush; + + if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) + goto err_flush; + + /* nr_frags will change after skb_linearize so we get nr_frags after + * nfp_nfdk_tx_maybe_close_block function + */ + nr_frags = skb_shinfo(skb)->nr_frags; + /* DMA map all */ + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + txd = &tx_ring->ktxds[wr_idx]; + txbuf = &tx_ring->ktxbufs[wr_idx]; + + dma_len = skb_headlen(skb); + if (skb_is_gso(skb)) + type = NFDK_DESC_TX_TYPE_TSO; + else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + type = NFDK_DESC_TX_TYPE_SIMPLE; + else + type = NFDK_DESC_TX_TYPE_GATHER; + + dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_warn_dma; + + txbuf->skb = skb; + txbuf++; + + txbuf->dma_addr = dma_addr; + txbuf++; + + /* FIELD_PREP() implicitly truncates to chunk */ + dma_len -= 1; + + /* We will do our best to pass as much data as we can in descriptor + * and we need to make sure the first descriptor includes whole head + * since there is limitation in firmware side. Sometimes the value of + * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than + * headlen. + */ + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | + FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); + + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + /* starts at bit 0 */ + BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1)); + + /* Preserve the original dlen_type, this way below the EOP logic + * can use dlen_type. + */ + tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; + dma_len -= tmp_dlen; + dma_addr += tmp_dlen + 1; + txd++; + + /* The rest of the data (if any) will be in larger dma descritors + * and is handled with the fragment loop. + */ + frag = skb_shinfo(skb)->frags; + fend = frag + nr_frags; + + while (true) { + while (dma_len > 0) { + dma_len -= 1; + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); + + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + dma_len -= dlen_type; + dma_addr += dlen_type + 1; + txd++; + } + + if (frag >= fend) + break; + + dma_len = skb_frag_size(frag); + dma_addr = skb_frag_dma_map(dp->dev, frag, 0, dma_len, + DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_unmap; + + txbuf->dma_addr = dma_addr; + txbuf++; + + frag++; + } + + (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); + + if (!skb_is_gso(skb)) { + real_len = skb->len; + /* Metadata desc */ + metadata = nfp_nfdk_tx_csum(dp, r_vec, 1, skb, metadata); + txd->raw = cpu_to_le64(metadata); + txd++; + } else { + /* lso desc should be placed after metadata desc */ + (txd + 1)->raw = nfp_nfdk_tx_tso(r_vec, txbuf, skb); + real_len = txbuf->real_len; + /* Metadata desc */ + metadata = nfp_nfdk_tx_csum(dp, r_vec, txbuf->pkt_cnt, skb, metadata); + txd->raw = cpu_to_le64(metadata); + txd += 2; + txbuf++; + } + + cnt = txd - tx_ring->ktxds - wr_idx; + if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != + round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) + goto err_warn_overflow; + + skb_tx_timestamp(skb); + + tx_ring->wr_p += cnt; + if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) + tx_ring->data_pending += skb->len; + else + tx_ring->data_pending = 0; + + if (nfp_nfdk_tx_ring_should_stop(tx_ring)) + nfp_nfdk_tx_ring_stop(nd_q, tx_ring); + + tx_ring->wr_ptr_add += cnt; + if (__netdev_tx_sent_queue(nd_q, real_len, netdev_xmit_more())) + nfp_net_tx_xmit_more_flush(tx_ring); + + return NETDEV_TX_OK; + +err_warn_overflow: + WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", + wr_idx, skb_headlen(skb), nr_frags, cnt); + if (skb_is_gso(skb)) + txbuf--; +err_unmap: + /* txbuf pointed to the next-to-use */ + etxbuf = txbuf; + /* first txbuf holds the skb */ + txbuf = &tx_ring->ktxbufs[wr_idx + 1]; + if (txbuf < etxbuf) { + dma_unmap_single(dp->dev, txbuf->dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + txbuf->raw = 0; + txbuf++; + } + frag = skb_shinfo(skb)->frags; + while (etxbuf < txbuf) { + dma_unmap_page(dp->dev, txbuf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + txbuf->raw = 0; + frag++; + txbuf++; + } +err_warn_dma: + nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); +err_flush: + nfp_net_tx_xmit_more_flush(tx_ring); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +/** + * nfp_nfdk_tx_complete() - Handled completed TX packets + * @tx_ring: TX ring structure + * @budget: NAPI budget (only used as bool to determine if in NAPI context) + */ +static void nfp_nfdk_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + u32 done_pkts = 0, done_bytes = 0; + struct nfp_nfdk_tx_buf *ktxbufs; + struct device *dev = dp->dev; + struct netdev_queue *nd_q; + u32 rd_p, qcp_rd_p; + int todo; + + rd_p = tx_ring->rd_p; + if (tx_ring->wr_p == rd_p) + return; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); + + if (qcp_rd_p == tx_ring->qcp_rd_p) + return; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + ktxbufs = tx_ring->ktxbufs; + + while (todo > 0) { + const skb_frag_t *frag, *fend; + unsigned int size, n_descs = 1; + struct nfp_nfdk_tx_buf *txbuf; + struct sk_buff *skb; + + txbuf = &ktxbufs[D_IDX(tx_ring, rd_p)]; + skb = txbuf->skb; + txbuf++; + + /* Closed block */ + if (!skb) { + n_descs = D_BLOCK_CPL(rd_p); + goto next; + } + + /* Unmap head */ + size = skb_headlen(skb); + n_descs += nfp_nfdk_headlen_to_segs(size); + dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE); + txbuf++; + + /* Unmap frags */ + frag = skb_shinfo(skb)->frags; + fend = frag + skb_shinfo(skb)->nr_frags; + for (; frag < fend; frag++) { + size = skb_frag_size(frag); + n_descs += DIV_ROUND_UP(size, + NFDK_TX_MAX_DATA_PER_DESC); + dma_unmap_page(dev, txbuf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + txbuf++; + } + + if (!skb_is_gso(skb)) { + done_bytes += skb->len; + done_pkts++; + } else { + done_bytes += txbuf->real_len; + done_pkts += txbuf->pkt_cnt; + n_descs++; + } + + napi_consume_skb(skb, budget); +next: + rd_p += n_descs; + todo -= n_descs; + } + + tx_ring->rd_p = rd_p; + tx_ring->qcp_rd_p = qcp_rd_p; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_bytes += done_bytes; + r_vec->tx_pkts += done_pkts; + u64_stats_update_end(&r_vec->tx_sync); + + if (!dp->netdev) + return; + + nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); + netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); + if (nfp_nfdk_tx_ring_should_wake(tx_ring)) { + /* Make sure TX thread will see updated tx_ring->rd_p */ + smp_mb(); + + if (unlikely(netif_tx_queue_stopped(nd_q))) + netif_tx_wake_queue(nd_q); + } + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); +} + +/* Receive processing */ +static void * +nfp_nfdk_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) +{ + void *frag; + + if (!dp->xdp_prog) { + frag = napi_alloc_frag(dp->fl_bufsz); + if (unlikely(!frag)) + return NULL; + } else { + struct page *page; + + page = dev_alloc_page(); + if (unlikely(!page)) + return NULL; + frag = page_address(page); + } + + *dma_addr = nfp_net_dma_map_rx(dp, frag); + if (dma_mapping_error(dp->dev, *dma_addr)) { + nfp_net_free_frag(frag, dp->xdp_prog); + nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return frag; +} + +/** + * nfp_nfdk_rx_give_one() - Put mapped skb on the software and hardware rings + * @dp: NFP Net data path struct + * @rx_ring: RX ring structure + * @frag: page fragment buffer + * @dma_addr: DMA address of skb mapping + */ +static void +nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring, + void *frag, dma_addr_t dma_addr) +{ + unsigned int wr_idx; + + wr_idx = D_IDX(rx_ring, rx_ring->wr_p); + + nfp_net_dma_sync_dev_rx(dp, dma_addr); + + /* Stash SKB and DMA address away */ + rx_ring->rxbufs[wr_idx].frag = frag; + rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; + + /* Fill freelist descriptor */ + rx_ring->rxds[wr_idx].fld.reserved = 0; + rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; + nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, + dma_addr + dp->rx_dma_off); + + rx_ring->wr_p++; + if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { + /* Update write pointer of the freelist queue. Make + * sure all writes are flushed before telling the hardware. + */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); + } +} + +/** + * nfp_nfdk_rx_ring_fill_freelist() - Give buffers from the ring to FW + * @dp: NFP Net data path struct + * @rx_ring: RX ring to fill + */ +void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring) +{ + unsigned int i; + + for (i = 0; i < rx_ring->cnt - 1; i++) + nfp_nfdk_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, + rx_ring->rxbufs[i].dma_addr); +} + +/** + * nfp_nfdk_rx_csum_has_errors() - group check if rxd has any csum errors + * @flags: RX descriptor flags field in CPU byte order + */ +static int nfp_nfdk_rx_csum_has_errors(u16 flags) +{ + u16 csum_all_checked, csum_all_ok; + + csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; + csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; + + return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); +} + +/** + * nfp_nfdk_rx_csum() - set SKB checksum field based on RX descriptor flags + * @dp: NFP Net data path struct + * @r_vec: per-ring structure + * @rxd: Pointer to RX descriptor + * @meta: Parsed metadata prepend + * @skb: Pointer to SKB + */ +static void +nfp_nfdk_rx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_desc *rxd, struct nfp_meta_parsed *meta, + struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + if (!(dp->netdev->features & NETIF_F_RXCSUM)) + return; + + if (meta->csum_type) { + skb->ip_summed = meta->csum_type; + skb->csum = meta->csum; + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_complete++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + if (nfp_nfdk_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_error++; + u64_stats_update_end(&r_vec->rx_sync); + return; + } + + /* Assume that the firmware will never report inner CSUM_OK unless outer + * L4 headers were successfully parsed. FW will always report zero UDP + * checksum as CSUM_OK. + */ + if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } + + if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || + rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { + __skb_incr_checksum_unnecessary(skb); + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_csum_rx_inner_ok++; + u64_stats_update_end(&r_vec->rx_sync); + } +} + +static void +nfp_nfdk_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, + unsigned int type, __be32 *hash) +{ + if (!(netdev->features & NETIF_F_RXHASH)) + return; + + switch (type) { + case NFP_NET_RSS_IPV4: + case NFP_NET_RSS_IPV6: + case NFP_NET_RSS_IPV6_EX: + meta->hash_type = PKT_HASH_TYPE_L3; + break; + default: + meta->hash_type = PKT_HASH_TYPE_L4; + break; + } + + meta->hash = get_unaligned_be32(hash); +} + +static bool +nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, + void *data, void *pkt, unsigned int pkt_len, int meta_len) +{ + u32 meta_info, vlan_info; + + meta_info = get_unaligned_be32(data); + data += 4; + + while (meta_info) { + switch (meta_info & NFP_NET_META_FIELD_MASK) { + case NFP_NET_META_HASH: + meta_info >>= NFP_NET_META_FIELD_SIZE; + nfp_nfdk_set_hash(netdev, meta, + meta_info & NFP_NET_META_FIELD_MASK, + (__be32 *)data); + data += 4; + break; + case NFP_NET_META_MARK: + meta->mark = get_unaligned_be32(data); + data += 4; + break; + case NFP_NET_META_VLAN: + vlan_info = get_unaligned_be32(data); + if (FIELD_GET(NFP_NET_META_VLAN_STRIP, vlan_info)) { + meta->vlan.stripped = true; + meta->vlan.tpid = FIELD_GET(NFP_NET_META_VLAN_TPID_MASK, + vlan_info); + meta->vlan.tci = FIELD_GET(NFP_NET_META_VLAN_TCI_MASK, + vlan_info); + } + data += 4; + break; + case NFP_NET_META_PORTID: + meta->portid = get_unaligned_be32(data); + data += 4; + break; + case NFP_NET_META_CSUM: + meta->csum_type = CHECKSUM_COMPLETE; + meta->csum = + (__force __wsum)__get_unaligned_cpu32(data); + data += 4; + break; + case NFP_NET_META_RESYNC_INFO: + if (nfp_net_tls_rx_resync_req(netdev, data, pkt, + pkt_len)) + return false; + data += sizeof(struct nfp_net_tls_resync_req); + break; + default: + return true; + } + + meta_info >>= NFP_NET_META_FIELD_SIZE; + } + + return data != pkt; +} + +static void +nfp_nfdk_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, + struct sk_buff *skb) +{ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + /* If we have both skb and rxbuf the replacement buffer allocation + * must have failed, count this as an alloc failure. + */ + if (skb && rxbuf) + r_vec->rx_replace_buf_alloc_fail++; + u64_stats_update_end(&r_vec->rx_sync); + + /* skb is build based on the frag, free_skb() would free the frag + * so to be able to reuse it we need an extra ref. + */ + if (skb && rxbuf && skb->head == rxbuf->frag) + page_ref_inc(virt_to_head_page(rxbuf->frag)); + if (rxbuf) + nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); + if (skb) + dev_kfree_skb_any(skb); +} + +static bool nfp_nfdk_xdp_complete(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct nfp_net_rx_ring *rx_ring; + u32 qcp_rd_p, done = 0; + bool done_all; + int todo; + + /* Work out how many descriptors have been transmitted */ + qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); + if (qcp_rd_p == tx_ring->qcp_rd_p) + return true; + + todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); + + done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; + todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); + + rx_ring = r_vec->rx_ring; + while (todo > 0) { + int idx = D_IDX(tx_ring, tx_ring->rd_p + done); + struct nfp_nfdk_tx_buf *txbuf; + unsigned int step = 1; + + txbuf = &tx_ring->ktxbufs[idx]; + if (!txbuf->raw) + goto next; + + if (NFDK_TX_BUF_INFO(txbuf->val) != NFDK_TX_BUF_INFO_SOP) { + WARN_ONCE(1, "Unexpected TX buffer in XDP TX ring\n"); + goto next; + } + + /* Two successive txbufs are used to stash virtual and dma + * address respectively, recycle and clean them here. + */ + nfp_nfdk_rx_give_one(dp, rx_ring, + (void *)NFDK_TX_BUF_PTR(txbuf[0].val), + txbuf[1].dma_addr); + txbuf[0].raw = 0; + txbuf[1].raw = 0; + step = 2; + + u64_stats_update_begin(&r_vec->tx_sync); + /* Note: tx_bytes not accumulated. */ + r_vec->tx_pkts++; + u64_stats_update_end(&r_vec->tx_sync); +next: + todo -= step; + done += step; + } + + tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + done); + tx_ring->rd_p += done; + + WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, + "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", + tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); + + return done_all; +} + +static bool +nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, + struct nfp_net_tx_ring *tx_ring, + struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, + unsigned int pkt_len, bool *completed) +{ + unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; + unsigned int dma_len, type, cnt, dlen_type, tmp_dlen; + struct nfp_nfdk_tx_buf *txbuf; + struct nfp_nfdk_tx_desc *txd; + unsigned int n_descs; + dma_addr_t dma_addr; + int wr_idx; + + /* Reject if xdp_adjust_tail grow packet beyond DMA area */ + if (pkt_len + dma_off > dma_map_sz) + return false; + + /* Make sure there's still at least one block available after + * aligning to block boundary, so that the txds used below + * won't wrap around the tx_ring. + */ + if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { + if (!*completed) { + nfp_nfdk_xdp_complete(tx_ring); + *completed = true; + } + + if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { + nfp_nfdk_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, + NULL); + return false; + } + } + + /* Check if cross block boundary */ + n_descs = nfp_nfdk_headlen_to_segs(pkt_len); + if ((round_down(tx_ring->wr_p, NFDK_TX_DESC_BLOCK_CNT) != + round_down(tx_ring->wr_p + n_descs, NFDK_TX_DESC_BLOCK_CNT)) || + ((u32)tx_ring->data_pending + pkt_len > + NFDK_TX_MAX_DATA_PER_BLOCK)) { + unsigned int nop_slots = D_BLOCK_CPL(tx_ring->wr_p); + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + txd = &tx_ring->ktxds[wr_idx]; + memset(txd, 0, + array_size(nop_slots, sizeof(struct nfp_nfdk_tx_desc))); + + tx_ring->data_pending = 0; + tx_ring->wr_p += nop_slots; + tx_ring->wr_ptr_add += nop_slots; + } + + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + + txbuf = &tx_ring->ktxbufs[wr_idx]; + + txbuf[0].val = (unsigned long)rxbuf->frag | NFDK_TX_BUF_INFO_SOP; + txbuf[1].dma_addr = rxbuf->dma_addr; + /* Note: pkt len not stored */ + + dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, + pkt_len, DMA_BIDIRECTIONAL); + + /* Build TX descriptor */ + txd = &tx_ring->ktxds[wr_idx]; + dma_len = pkt_len; + dma_addr = rxbuf->dma_addr + dma_off; + + if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + type = NFDK_DESC_TX_TYPE_SIMPLE; + else + type = NFDK_DESC_TX_TYPE_GATHER; + + /* FIELD_PREP() implicitly truncates to chunk */ + dma_len -= 1; + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | + FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); + + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; + dma_len -= tmp_dlen; + dma_addr += tmp_dlen + 1; + txd++; + + while (dma_len > 0) { + dma_len -= 1; + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + dlen_type &= NFDK_DESC_TX_DMA_LEN; + dma_len -= dlen_type; + dma_addr += dlen_type + 1; + txd++; + } + + (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); + + /* Metadata desc */ + txd->raw = 0; + txd++; + + cnt = txd - tx_ring->ktxds - wr_idx; + tx_ring->wr_p += cnt; + if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) + tx_ring->data_pending += pkt_len; + else + tx_ring->data_pending = 0; + + tx_ring->wr_ptr_add += cnt; + return true; +} + +/** + * nfp_nfdk_rx() - receive up to @budget packets on @rx_ring + * @rx_ring: RX ring to receive from + * @budget: NAPI budget + * + * Note, this function is separated out from the napi poll function to + * more cleanly separate packet receive code from other bookkeeping + * functions performed in the napi poll function. + * + * Return: Number of packets received. + */ +static int nfp_nfdk_rx(struct nfp_net_rx_ring *rx_ring, int budget) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + struct nfp_net_tx_ring *tx_ring; + struct bpf_prog *xdp_prog; + bool xdp_tx_cmpl = false; + unsigned int true_bufsz; + struct sk_buff *skb; + int pkts_polled = 0; + struct xdp_buff xdp; + int idx; + + xdp_prog = READ_ONCE(dp->xdp_prog); + true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; + xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, + &rx_ring->xdp_rxq); + tx_ring = r_vec->xdp_ring; + + while (pkts_polled < budget) { + unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; + struct nfp_net_rx_buf *rxbuf; + struct nfp_net_rx_desc *rxd; + struct nfp_meta_parsed meta; + bool redir_egress = false; + struct net_device *netdev; + dma_addr_t new_dma_addr; + u32 meta_len_xdp = 0; + void *new_frag; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + break; + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + memset(&meta, 0, sizeof(meta)); + + rx_ring->rd_p++; + pkts_polled++; + + rxbuf = &rx_ring->rxbufs[idx]; + /* < meta_len > + * <-- [rx_offset] --> + * --------------------------------------------------------- + * | [XX] | metadata | packet | XXXX | + * --------------------------------------------------------- + * <---------------- data_len ---------------> + * + * The rx_offset is fixed for all packets, the meta_len can vary + * on a packet by packet basis. If rx_offset is set to zero + * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the + * buffer and is immediately followed by the packet (no [XX]). + */ + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; + if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + pkt_off += meta_len; + else + pkt_off += dp->rx_offset; + meta_off = pkt_off - meta_len; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + if (unlikely(meta_len > NFP_NET_MAX_PREPEND || + (dp->rx_offset && meta_len > dp->rx_offset))) { + nn_dp_warn(dp, "oversized RX packet metadata %u\n", + meta_len); + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + continue; + } + + nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, + data_len); + + if (meta_len) { + if (unlikely(nfp_nfdk_parse_meta(dp->netdev, &meta, + rxbuf->frag + meta_off, + rxbuf->frag + pkt_off, + pkt_len, meta_len))) { + nn_dp_warn(dp, "invalid RX packet metadata\n"); + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + } + + if (xdp_prog && !meta.portid) { + void *orig_data = rxbuf->frag + pkt_off; + unsigned int dma_off; + int act; + + xdp_prepare_buff(&xdp, + rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, + pkt_off - NFP_NET_RX_BUF_HEADROOM, + pkt_len, true); + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + pkt_len = xdp.data_end - xdp.data; + pkt_off += xdp.data - orig_data; + + switch (act) { + case XDP_PASS: + meta_len_xdp = xdp.data - xdp.data_meta; + break; + case XDP_TX: + dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; + if (unlikely(!nfp_nfdk_tx_xdp_buf(dp, rx_ring, + tx_ring, + rxbuf, + dma_off, + pkt_len, + &xdp_tx_cmpl))) + trace_xdp_exception(dp->netdev, + xdp_prog, act); + continue; + default: + bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(dp->netdev, xdp_prog, act); + fallthrough; + case XDP_DROP: + nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } + } + + if (likely(!meta.portid)) { + netdev = dp->netdev; + } else if (meta.portid == NFP_META_PORT_ID_CTRL) { + struct nfp_net *nn = netdev_priv(dp->netdev); + + nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, + pkt_len); + nfp_nfdk_rx_give_one(dp, rx_ring, rxbuf->frag, + rxbuf->dma_addr); + continue; + } else { + struct nfp_net *nn; + + nn = netdev_priv(dp->netdev); + netdev = nfp_app_dev_get(nn->app, meta.portid, + &redir_egress); + if (unlikely(!netdev)) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, + NULL); + continue; + } + + if (nfp_netdev_is_nfp_repr(netdev)) + nfp_repr_inc_rx_stats(netdev, pkt_len); + } + + skb = build_skb(rxbuf->frag, true_bufsz); + if (unlikely(!skb)) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + continue; + } + new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); + continue; + } + + nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); + + nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); + + skb_reserve(skb, pkt_off); + skb_put(skb, pkt_len); + + skb->mark = meta.mark; + skb_set_hash(skb, meta.hash, meta.hash_type); + + skb_record_rx_queue(skb, rx_ring->idx); + skb->protocol = eth_type_trans(skb, netdev); + + nfp_nfdk_rx_csum(dp, r_vec, rxd, &meta, skb); + + if (unlikely(!nfp_net_vlan_strip(skb, rxd, &meta))) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, NULL, skb); + continue; + } + + if (meta_len_xdp) + skb_metadata_set(skb, meta_len_xdp); + + if (likely(!redir_egress)) { + napi_gro_receive(&rx_ring->r_vec->napi, skb); + } else { + skb->dev = netdev; + skb_reset_network_header(skb); + __skb_push(skb, ETH_HLEN); + dev_queue_xmit(skb); + } + } + + if (xdp_prog) { + if (tx_ring->wr_ptr_add) + nfp_net_tx_xmit_more_flush(tx_ring); + else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && + !xdp_tx_cmpl) + if (!nfp_nfdk_xdp_complete(tx_ring)) + pkts_polled = budget; + } + + return pkts_polled; +} + +/** + * nfp_nfdk_poll() - napi poll function + * @napi: NAPI structure + * @budget: NAPI budget + * + * Return: number of packets polled. + */ +int nfp_nfdk_poll(struct napi_struct *napi, int budget) +{ + struct nfp_net_r_vector *r_vec = + container_of(napi, struct nfp_net_r_vector, napi); + unsigned int pkts_polled = 0; + + if (r_vec->tx_ring) + nfp_nfdk_tx_complete(r_vec->tx_ring, budget); + if (r_vec->rx_ring) + pkts_polled = nfp_nfdk_rx(r_vec->rx_ring, budget); + + if (pkts_polled < budget) + if (napi_complete_done(napi, pkts_polled)) + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + + if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->rx_sync); + pkts = r_vec->rx_pkts; + bytes = r_vec->rx_bytes; + } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->rx_dim, dim_sample); + } + + if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { + struct dim_sample dim_sample = {}; + unsigned int start; + u64 pkts, bytes; + + do { + start = u64_stats_fetch_begin(&r_vec->tx_sync); + pkts = r_vec->tx_pkts; + bytes = r_vec->tx_bytes; + } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + + dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); + net_dim(&r_vec->tx_dim, dim_sample); + } + + return pkts_polled; +} + +/* Control device data path + */ + +bool +nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, bool old) +{ + u32 cnt, tmp_dlen, dlen_type = 0; + struct nfp_net_tx_ring *tx_ring; + struct nfp_nfdk_tx_buf *txbuf; + struct nfp_nfdk_tx_desc *txd; + unsigned int dma_len, type; + struct nfp_net_dp *dp; + dma_addr_t dma_addr; + u64 metadata = 0; + int wr_idx; + + dp = &r_vec->nfp_net->dp; + tx_ring = r_vec->tx_ring; + + if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { + nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); + goto err_free; + } + + /* Don't bother counting frags, assume the worst */ + if (unlikely(nfp_net_tx_full(tx_ring, NFDK_TX_DESC_STOP_CNT))) { + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_busy++; + u64_stats_update_end(&r_vec->tx_sync); + if (!old) + __skb_queue_tail(&r_vec->queue, skb); + else + __skb_queue_head(&r_vec->queue, skb); + return NETDEV_TX_BUSY; + } + + if (nfp_app_ctrl_has_meta(nn->app)) { + if (unlikely(skb_headroom(skb) < 8)) { + nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); + goto err_free; + } + metadata = NFDK_DESC_TX_CHAIN_META; + put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); + put_unaligned_be32(FIELD_PREP(NFDK_META_LEN, 8) | + FIELD_PREP(NFDK_META_FIELDS, + NFP_NET_META_PORTID), + skb_push(skb, 4)); + } + + if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb)) + goto err_free; + + /* DMA map all */ + wr_idx = D_IDX(tx_ring, tx_ring->wr_p); + txd = &tx_ring->ktxds[wr_idx]; + txbuf = &tx_ring->ktxbufs[wr_idx]; + + dma_len = skb_headlen(skb); + if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + type = NFDK_DESC_TX_TYPE_SIMPLE; + else + type = NFDK_DESC_TX_TYPE_GATHER; + + dma_addr = dma_map_single(dp->dev, skb->data, dma_len, DMA_TO_DEVICE); + if (dma_mapping_error(dp->dev, dma_addr)) + goto err_warn_dma; + + txbuf->skb = skb; + txbuf++; + + txbuf->dma_addr = dma_addr; + txbuf++; + + dma_len -= 1; + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, + dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ? + NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) | + FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type); + + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD; + dma_len -= tmp_dlen; + dma_addr += tmp_dlen + 1; + txd++; + + while (dma_len > 0) { + dma_len -= 1; + dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len); + txd->dma_len_type = cpu_to_le16(dlen_type); + nfp_desc_set_dma_addr_48b(txd, dma_addr); + + dlen_type &= NFDK_DESC_TX_DMA_LEN; + dma_len -= dlen_type; + dma_addr += dlen_type + 1; + txd++; + } + + (txd - 1)->dma_len_type = cpu_to_le16(dlen_type | NFDK_DESC_TX_EOP); + + /* Metadata desc */ + txd->raw = cpu_to_le64(metadata); + txd++; + + cnt = txd - tx_ring->ktxds - wr_idx; + if (unlikely(round_down(wr_idx, NFDK_TX_DESC_BLOCK_CNT) != + round_down(wr_idx + cnt - 1, NFDK_TX_DESC_BLOCK_CNT))) + goto err_warn_overflow; + + tx_ring->wr_p += cnt; + if (tx_ring->wr_p % NFDK_TX_DESC_BLOCK_CNT) + tx_ring->data_pending += skb->len; + else + tx_ring->data_pending = 0; + + tx_ring->wr_ptr_add += cnt; + nfp_net_tx_xmit_more_flush(tx_ring); + + return NETDEV_TX_OK; + +err_warn_overflow: + WARN_ONCE(1, "unable to fit packet into a descriptor wr_idx:%d head:%d frags:%d cnt:%d", + wr_idx, skb_headlen(skb), 0, cnt); + txbuf--; + dma_unmap_single(dp->dev, txbuf->dma_addr, + skb_headlen(skb), DMA_TO_DEVICE); + txbuf->raw = 0; +err_warn_dma: + nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); +err_free: + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) +{ + struct sk_buff *skb; + + while ((skb = __skb_dequeue(&r_vec->queue))) + if (nfp_nfdk_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) + return; +} + +static bool +nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) +{ + u32 meta_type, meta_tag; + + if (!nfp_app_ctrl_has_meta(nn->app)) + return !meta_len; + + if (meta_len != 8) + return false; + + meta_type = get_unaligned_be32(data); + meta_tag = get_unaligned_be32(data + 4); + + return (meta_type == NFP_NET_META_PORTID && + meta_tag == NFP_META_PORT_ID_CTRL); +} + +static bool +nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) +{ + unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; + struct nfp_net_rx_buf *rxbuf; + struct nfp_net_rx_desc *rxd; + dma_addr_t new_dma_addr; + struct sk_buff *skb; + void *new_frag; + int idx; + + idx = D_IDX(rx_ring, rx_ring->rd_p); + + rxd = &rx_ring->rxds[idx]; + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) + return false; + + /* Memory barrier to ensure that we won't do other reads + * before the DD bit. + */ + dma_rmb(); + + rx_ring->rd_p++; + + rxbuf = &rx_ring->rxbufs[idx]; + meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; + data_len = le16_to_cpu(rxd->rxd.data_len); + pkt_len = data_len - meta_len; + + pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; + if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + pkt_off += meta_len; + else + pkt_off += dp->rx_offset; + meta_off = pkt_off - meta_len; + + /* Stats update */ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_pkts++; + r_vec->rx_bytes += pkt_len; + u64_stats_update_end(&r_vec->rx_sync); + + nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); + + if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { + nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", + meta_len); + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + return true; + } + + skb = build_skb(rxbuf->frag, dp->fl_bufsz); + if (unlikely(!skb)) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); + return true; + } + new_frag = nfp_nfdk_napi_alloc_one(dp, &new_dma_addr); + if (unlikely(!new_frag)) { + nfp_nfdk_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); + return true; + } + + nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); + + nfp_nfdk_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); + + skb_reserve(skb, pkt_off); + skb_put(skb, pkt_len); + + nfp_app_ctrl_rx(nn->app, skb); + + return true; +} + +static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) +{ + struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; + struct nfp_net *nn = r_vec->nfp_net; + struct nfp_net_dp *dp = &nn->dp; + unsigned int budget = 512; + + while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) + continue; + + return budget; +} + +void nfp_nfdk_ctrl_poll(struct tasklet_struct *t) +{ + struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); + + spin_lock(&r_vec->lock); + nfp_nfdk_tx_complete(r_vec->tx_ring, 0); + __nfp_ctrl_tx_queued(r_vec); + spin_unlock(&r_vec->lock); + + if (nfp_ctrl_rx(r_vec)) { + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); + } else { + tasklet_schedule(&r_vec->tasklet); + nn_dp_warn(&r_vec->nfp_net->dp, + "control message budget exceeded!\n"); + } +} diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h b/drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h new file mode 100644 index 000000000000..0ea51d9f2325 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfdk/nfdk.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef _NFP_DP_NFDK_H_ +#define _NFP_DP_NFDK_H_ + +#include <linux/bitops.h> +#include <linux/types.h> + +#define NFDK_TX_DESC_PER_SIMPLE_PKT 2 + +#define NFDK_TX_MAX_DATA_PER_HEAD SZ_4K +#define NFDK_TX_MAX_DATA_PER_DESC SZ_16K +#define NFDK_TX_DESC_BLOCK_SZ 256 +#define NFDK_TX_DESC_BLOCK_CNT (NFDK_TX_DESC_BLOCK_SZ / \ + sizeof(struct nfp_nfdk_tx_desc)) +#define NFDK_TX_DESC_STOP_CNT (NFDK_TX_DESC_BLOCK_CNT * \ + NFDK_TX_DESC_PER_SIMPLE_PKT) +#define NFDK_TX_MAX_DATA_PER_BLOCK SZ_64K +#define NFDK_TX_DESC_GATHER_MAX 17 + +/* TX descriptor format */ + +#define NFDK_DESC_TX_MSS_MASK GENMASK(13, 0) + +#define NFDK_DESC_TX_CHAIN_META BIT(3) +#define NFDK_DESC_TX_ENCAP BIT(2) +#define NFDK_DESC_TX_L4_CSUM BIT(1) +#define NFDK_DESC_TX_L3_CSUM BIT(0) + +#define NFDK_DESC_TX_DMA_LEN_HEAD GENMASK(11, 0) +#define NFDK_DESC_TX_TYPE_HEAD GENMASK(15, 12) +#define NFDK_DESC_TX_DMA_LEN GENMASK(13, 0) +#define NFDK_DESC_TX_TYPE_NOP 0 +#define NFDK_DESC_TX_TYPE_GATHER 1 +#define NFDK_DESC_TX_TYPE_TSO 2 +#define NFDK_DESC_TX_TYPE_SIMPLE 8 +#define NFDK_DESC_TX_EOP BIT(14) + +#define NFDK_META_LEN GENMASK(7, 0) +#define NFDK_META_FIELDS GENMASK(31, 8) + +#define D_BLOCK_CPL(idx) (NFDK_TX_DESC_BLOCK_CNT - \ + (idx) % NFDK_TX_DESC_BLOCK_CNT) + +struct nfp_nfdk_tx_desc { + union { + struct { + __le16 dma_addr_hi; /* High bits of host buf address */ + __le16 dma_len_type; /* Length to DMA for this desc */ + __le32 dma_addr_lo; /* Low 32bit of host buf addr */ + }; + + struct { + __le16 mss; /* MSS to be used for LSO */ + u8 lso_hdrlen; /* LSO, TCP payload offset */ + u8 lso_totsegs; /* LSO, total segments */ + u8 l3_offset; /* L3 header offset */ + u8 l4_offset; /* L4 header offset */ + __le16 lso_meta_res; /* Rsvd bits in TSO metadata */ + }; + + struct { + u8 flags; /* TX Flags, see @NFDK_DESC_TX_* */ + u8 reserved[7]; /* meta byte placeholder */ + }; + + __le32 vals[2]; + __le64 raw; + }; +}; + +/* The device don't make use of the 2 or 3 least significant bits of the address + * due to alignment constraints. The driver can make use of those bits to carry + * information about the buffer before giving it to the device. + * + * NOTE: The driver must clear the lower bits before handing the buffer to the + * device. + * + * - NFDK_TX_BUF_INFO_SOP - Start of a packet + * Mark the buffer as a start of a packet. This is used in the XDP TX process + * to stash virtual and DMA address so that they can be recycled when the TX + * operation is completed. + */ +#define NFDK_TX_BUF_PTR(val) ((val) & ~(sizeof(void *) - 1)) +#define NFDK_TX_BUF_INFO(val) ((val) & (sizeof(void *) - 1)) +#define NFDK_TX_BUF_INFO_SOP BIT(0) + +struct nfp_nfdk_tx_buf { + union { + /* First slot */ + union { + struct sk_buff *skb; + void *frag; + unsigned long val; + }; + + /* 1 + nr_frags next slots */ + dma_addr_t dma_addr; + + /* TSO (optional) */ + struct { + u32 pkt_cnt; + u32 real_len; + }; + + u64 raw; + }; +}; + +static inline int nfp_nfdk_headlen_to_segs(unsigned int headlen) +{ + /* First descriptor fits less data, so adjust for that */ + return DIV_ROUND_UP(headlen + + NFDK_TX_MAX_DATA_PER_DESC - + NFDK_TX_MAX_DATA_PER_HEAD, + NFDK_TX_MAX_DATA_PER_DESC); +} + +int nfp_nfdk_poll(struct napi_struct *napi, int budget); +netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev); +bool +nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, bool old); +void nfp_nfdk_ctrl_poll(struct tasklet_struct *t); +void nfp_nfdk_rx_ring_fill_freelist(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring); +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/rings.c b/drivers/net/ethernet/netronome/nfp/nfdk/rings.c new file mode 100644 index 000000000000..fdb8144a63e0 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfdk/rings.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/seq_file.h> + +#include "../nfp_net.h" +#include "../nfp_net_dp.h" +#include "nfdk.h" + +static void +nfp_nfdk_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + struct device *dev = dp->dev; + struct netdev_queue *nd_q; + + while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) { + const skb_frag_t *frag, *fend; + unsigned int size, n_descs = 1; + struct nfp_nfdk_tx_buf *txbuf; + int nr_frags, rd_idx; + struct sk_buff *skb; + + rd_idx = D_IDX(tx_ring, tx_ring->rd_p); + txbuf = &tx_ring->ktxbufs[rd_idx]; + + skb = txbuf->skb; + if (!skb) { + n_descs = D_BLOCK_CPL(tx_ring->rd_p); + goto next; + } + + nr_frags = skb_shinfo(skb)->nr_frags; + txbuf++; + + /* Unmap head */ + size = skb_headlen(skb); + dma_unmap_single(dev, txbuf->dma_addr, size, DMA_TO_DEVICE); + n_descs += nfp_nfdk_headlen_to_segs(size); + txbuf++; + + frag = skb_shinfo(skb)->frags; + fend = frag + nr_frags; + for (; frag < fend; frag++) { + size = skb_frag_size(frag); + dma_unmap_page(dev, txbuf->dma_addr, + skb_frag_size(frag), DMA_TO_DEVICE); + n_descs += DIV_ROUND_UP(size, + NFDK_TX_MAX_DATA_PER_DESC); + txbuf++; + } + + if (skb_is_gso(skb)) + n_descs++; + + dev_kfree_skb_any(skb); +next: + tx_ring->rd_p += n_descs; + } + + memset(tx_ring->txds, 0, tx_ring->size); + tx_ring->data_pending = 0; + tx_ring->wr_p = 0; + tx_ring->rd_p = 0; + tx_ring->qcp_rd_p = 0; + tx_ring->wr_ptr_add = 0; + + if (tx_ring->is_xdp || !dp->netdev) + return; + + nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); + netdev_tx_reset_queue(nd_q); +} + +static void nfp_nfdk_tx_ring_free(struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + + kvfree(tx_ring->ktxbufs); + + if (tx_ring->ktxds) + dma_free_coherent(dp->dev, tx_ring->size, + tx_ring->ktxds, tx_ring->dma); + + tx_ring->cnt = 0; + tx_ring->txbufs = NULL; + tx_ring->txds = NULL; + tx_ring->dma = 0; + tx_ring->size = 0; +} + +static int +nfp_nfdk_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + struct nfp_net_r_vector *r_vec = tx_ring->r_vec; + + tx_ring->cnt = dp->txd_cnt * NFDK_TX_DESC_PER_SIMPLE_PKT; + tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->ktxds)); + tx_ring->ktxds = dma_alloc_coherent(dp->dev, tx_ring->size, + &tx_ring->dma, + GFP_KERNEL | __GFP_NOWARN); + if (!tx_ring->ktxds) { + netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", + tx_ring->cnt); + goto err_alloc; + } + + tx_ring->ktxbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->ktxbufs), + GFP_KERNEL); + if (!tx_ring->ktxbufs) + goto err_alloc; + + if (!tx_ring->is_xdp && dp->netdev) + netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask, + tx_ring->idx); + + return 0; + +err_alloc: + nfp_nfdk_tx_ring_free(tx_ring); + return -ENOMEM; +} + +static void +nfp_nfdk_tx_ring_bufs_free(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ +} + +static int +nfp_nfdk_tx_ring_bufs_alloc(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + return 0; +} + +static void +nfp_nfdk_print_tx_descs(struct seq_file *file, + struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_ring *tx_ring, + u32 d_rd_p, u32 d_wr_p) +{ + struct nfp_nfdk_tx_desc *txd; + u32 txd_cnt = tx_ring->cnt; + int i; + + for (i = 0; i < txd_cnt; i++) { + txd = &tx_ring->ktxds[i]; + + seq_printf(file, "%04d: 0x%08x 0x%08x 0x%016llx", i, + txd->vals[0], txd->vals[1], tx_ring->ktxbufs[i].raw); + + if (i == tx_ring->rd_p % txd_cnt) + seq_puts(file, " H_RD"); + if (i == tx_ring->wr_p % txd_cnt) + seq_puts(file, " H_WR"); + if (i == d_rd_p % txd_cnt) + seq_puts(file, " D_RD"); + if (i == d_wr_p % txd_cnt) + seq_puts(file, " D_WR"); + + seq_putc(file, '\n'); + } +} + +#define NFP_NFDK_CFG_CTRL_SUPPORTED \ + (NFP_NET_CFG_CTRL_ENABLE | NFP_NET_CFG_CTRL_PROMISC | \ + NFP_NET_CFG_CTRL_L2BC | NFP_NET_CFG_CTRL_L2MC | \ + NFP_NET_CFG_CTRL_RXCSUM | NFP_NET_CFG_CTRL_TXCSUM | \ + NFP_NET_CFG_CTRL_RXVLAN | \ + NFP_NET_CFG_CTRL_RXVLAN_V2 | NFP_NET_CFG_CTRL_RXQINQ | \ + NFP_NET_CFG_CTRL_TXVLAN_V2 | \ + NFP_NET_CFG_CTRL_GATHER | NFP_NET_CFG_CTRL_LSO | \ + NFP_NET_CFG_CTRL_CTAG_FILTER | NFP_NET_CFG_CTRL_CMSG_DATA | \ + NFP_NET_CFG_CTRL_RINGCFG | NFP_NET_CFG_CTRL_IRQMOD | \ + NFP_NET_CFG_CTRL_TXRWB | NFP_NET_CFG_CTRL_VEPA | \ + NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE | \ + NFP_NET_CFG_CTRL_BPF | NFP_NET_CFG_CTRL_LSO2 | \ + NFP_NET_CFG_CTRL_RSS2 | NFP_NET_CFG_CTRL_CSUM_COMPLETE | \ + NFP_NET_CFG_CTRL_LIVE_ADDR) + +const struct nfp_dp_ops nfp_nfdk_ops = { + .version = NFP_NFD_VER_NFDK, + .tx_min_desc_per_pkt = NFDK_TX_DESC_PER_SIMPLE_PKT, + .cap_mask = NFP_NFDK_CFG_CTRL_SUPPORTED, + .dma_mask = DMA_BIT_MASK(48), + .poll = nfp_nfdk_poll, + .ctrl_poll = nfp_nfdk_ctrl_poll, + .xmit = nfp_nfdk_tx, + .ctrl_tx_one = nfp_nfdk_ctrl_tx_one, + .rx_ring_fill_freelist = nfp_nfdk_rx_ring_fill_freelist, + .tx_ring_alloc = nfp_nfdk_tx_ring_alloc, + .tx_ring_reset = nfp_nfdk_tx_ring_reset, + .tx_ring_free = nfp_nfdk_tx_ring_free, + .tx_ring_bufs_alloc = nfp_nfdk_tx_ring_bufs_alloc, + .tx_ring_bufs_free = nfp_nfdk_tx_ring_bufs_free, + .print_tx_descs = nfp_nfdk_print_tx_descs +}; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c index 3a973282b2bb..bb3f46c74f77 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c @@ -121,7 +121,7 @@ struct nfp_reprs * nfp_reprs_get_locked(struct nfp_app *app, enum nfp_repr_type type) { return rcu_dereference_protected(app->reprs[type], - lockdep_is_held(&app->pf->lock)); + nfp_app_is_locked(app)); } struct nfp_reprs * @@ -230,7 +230,7 @@ struct nfp_app *nfp_app_alloc(struct nfp_pf *pf, enum nfp_app_id id) struct nfp_app *app; if (id >= ARRAY_SIZE(apps) || !apps[id]) { - nfp_err(pf->cpp, "unknown FW app ID 0x%02hhx, driver too old or support for FW not built in\n", id); + nfp_err(pf->cpp, "unknown FW app ID 0x%02x, driver too old or support for FW not built in\n", id); return ERR_PTR(-EINVAL); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 3e9baff07100..dd56207df246 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -75,7 +75,7 @@ extern const struct nfp_app_type app_abm; * @bpf: BPF ndo offload-related calls * @xdp_offload: offload an XDP program * @eswitch_mode_get: get SR-IOV eswitch mode - * @eswitch_mode_set: set SR-IOV eswitch mode (under pf->lock) + * @eswitch_mode_set: set SR-IOV eswitch mode * @sriov_enable: app-specific sriov initialisation * @sriov_disable: app-specific sriov clean-up * @dev_get: get representor or internal port representing netdev @@ -174,6 +174,16 @@ struct nfp_app { void *priv; }; +static inline void assert_nfp_app_locked(struct nfp_app *app) +{ + devl_assert_locked(priv_to_devlink(app->pf)); +} + +static inline bool nfp_app_is_locked(struct nfp_app *app) +{ + return devl_lock_is_held(priv_to_devlink(app->pf)); +} + void nfp_check_rhashtable_empty(void *ptr, void *arg); bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c index bea978df7713..405786c00334 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_devlink.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_devlink.c @@ -26,12 +26,11 @@ nfp_devlink_fill_eth_port(struct nfp_port *port, } static int -nfp_devlink_fill_eth_port_from_id(struct nfp_pf *pf, unsigned int port_index, +nfp_devlink_fill_eth_port_from_id(struct nfp_pf *pf, + struct devlink_port *dl_port, struct nfp_eth_table_port *copy) { - struct nfp_port *port; - - port = nfp_port_from_id(pf, NFP_PORT_PHYS_PORT, port_index); + struct nfp_port *port = container_of(dl_port, struct nfp_port, dl_port); return nfp_devlink_fill_eth_port(port, copy); } @@ -62,7 +61,7 @@ nfp_devlink_set_lanes(struct nfp_pf *pf, unsigned int idx, unsigned int lanes) } static int -nfp_devlink_port_split(struct devlink *devlink, unsigned int port_index, +nfp_devlink_port_split(struct devlink *devlink, struct devlink_port *port, unsigned int count, struct netlink_ext_ack *extack) { struct nfp_pf *pf = devlink_priv(devlink); @@ -70,33 +69,25 @@ nfp_devlink_port_split(struct devlink *devlink, unsigned int port_index, unsigned int lanes; int ret; - mutex_lock(&pf->lock); - rtnl_lock(); - ret = nfp_devlink_fill_eth_port_from_id(pf, port_index, ð_port); + ret = nfp_devlink_fill_eth_port_from_id(pf, port, ð_port); rtnl_unlock(); if (ret) - goto out; + return ret; - if (eth_port.port_lanes % count) { - ret = -EINVAL; - goto out; - } + if (eth_port.port_lanes % count) + return -EINVAL; /* Special case the 100G CXP -> 2x40G split */ lanes = eth_port.port_lanes / count; if (eth_port.lanes == 10 && count == 2) lanes = 8 / count; - ret = nfp_devlink_set_lanes(pf, eth_port.index, lanes); -out: - mutex_unlock(&pf->lock); - - return ret; + return nfp_devlink_set_lanes(pf, eth_port.index, lanes); } static int -nfp_devlink_port_unsplit(struct devlink *devlink, unsigned int port_index, +nfp_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, struct netlink_ext_ack *extack) { struct nfp_pf *pf = devlink_priv(devlink); @@ -104,29 +95,21 @@ nfp_devlink_port_unsplit(struct devlink *devlink, unsigned int port_index, unsigned int lanes; int ret; - mutex_lock(&pf->lock); - rtnl_lock(); - ret = nfp_devlink_fill_eth_port_from_id(pf, port_index, ð_port); + ret = nfp_devlink_fill_eth_port_from_id(pf, port, ð_port); rtnl_unlock(); if (ret) - goto out; + return ret; - if (!eth_port.is_split) { - ret = -EINVAL; - goto out; - } + if (!eth_port.is_split) + return -EINVAL; /* Special case the 100G CXP -> 2x40G unsplit */ lanes = eth_port.port_lanes; if (eth_port.port_lanes == 8) lanes = 10; - ret = nfp_devlink_set_lanes(pf, eth_port.index, lanes); -out: - mutex_unlock(&pf->lock); - - return ret; + return nfp_devlink_set_lanes(pf, eth_port.index, lanes); } static int @@ -161,13 +144,8 @@ static int nfp_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { struct nfp_pf *pf = devlink_priv(devlink); - int ret; - - mutex_lock(&pf->lock); - ret = nfp_app_eswitch_mode_set(pf->app, mode); - mutex_unlock(&pf->lock); - return ret; + return nfp_app_eswitch_mode_set(pf->app, mode); } static const struct nfp_devlink_versions_simple { @@ -375,12 +353,12 @@ int nfp_devlink_port_register(struct nfp_app *app, struct nfp_port *port) devlink = priv_to_devlink(app->pf); - return devlink_port_register(devlink, &port->dl_port, port->eth_id); + return devl_port_register(devlink, &port->dl_port, port->eth_id); } void nfp_devlink_port_unregister(struct nfp_port *port) { - devlink_port_unregister(&port->dl_port); + devl_port_unregister(&port->dl_port); } void nfp_devlink_port_type_eth_set(struct nfp_port *port) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index bb3b8a7f6c5d..71301dbd8fb5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -19,6 +19,7 @@ #include "nfpcore/nfp.h" #include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_dev.h" #include "nfpcore/nfp_nffw.h" #include "nfpcore/nfp_nsp.h" @@ -32,17 +33,37 @@ static const char nfp_driver_name[] = "nfp"; static const struct pci_device_id nfp_pci_device_ids[] = { - { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NETRONOME_NFP6000, + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP3800, PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, - PCI_ANY_ID, 0, + PCI_ANY_ID, 0, NFP_DEV_NFP3800, }, - { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NETRONOME_NFP5000, + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP4000, PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, - PCI_ANY_ID, 0, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, }, - { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NETRONOME_NFP4000, + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP5000, PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, - PCI_ANY_ID, 0, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, + }, + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP6000, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP3800, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP3800, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP4000, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP5000, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP6000, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000, }, { 0, } /* Required last entry. */ }; @@ -222,6 +243,7 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs) { #ifdef CONFIG_PCI_IOV struct nfp_pf *pf = pci_get_drvdata(pdev); + struct devlink *devlink; int err; if (num_vfs > pf->limit_vfs) { @@ -236,7 +258,8 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs) return err; } - mutex_lock(&pf->lock); + devlink = priv_to_devlink(pf); + devl_lock(devlink); err = nfp_app_sriov_enable(pf->app, num_vfs); if (err) { @@ -250,11 +273,11 @@ static int nfp_pcie_sriov_enable(struct pci_dev *pdev, int num_vfs) dev_dbg(&pdev->dev, "Created %d VFs.\n", pf->num_vfs); - mutex_unlock(&pf->lock); + devl_unlock(devlink); return num_vfs; err_sriov_disable: - mutex_unlock(&pf->lock); + devl_unlock(devlink); pci_disable_sriov(pdev); return err; #endif @@ -265,8 +288,10 @@ static int nfp_pcie_sriov_disable(struct pci_dev *pdev) { #ifdef CONFIG_PCI_IOV struct nfp_pf *pf = pci_get_drvdata(pdev); + struct devlink *devlink; - mutex_lock(&pf->lock); + devlink = priv_to_devlink(pf); + devl_lock(devlink); /* If the VFs are assigned we cannot shut down SR-IOV without * causing issues, so just leave the hardware available but @@ -274,7 +299,7 @@ static int nfp_pcie_sriov_disable(struct pci_dev *pdev) */ if (pci_vfs_assigned(pdev)) { dev_warn(&pdev->dev, "Disabling while VFs assigned - VFs will not be deallocated\n"); - mutex_unlock(&pf->lock); + devl_unlock(devlink); return -EPERM; } @@ -282,7 +307,7 @@ static int nfp_pcie_sriov_disable(struct pci_dev *pdev) pf->num_vfs = 0; - mutex_unlock(&pf->lock); + devl_unlock(devlink); pci_disable_sriov(pdev); dev_dbg(&pdev->dev, "Removed VFs.\n"); @@ -367,7 +392,7 @@ nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf) /* First try to find a firmware image specific for this device */ interface = nfp_cpp_interface(pf->cpp); nfp_cpp_serial(pf->cpp, &serial); - sprintf(fw_name, "netronome/serial-%pMF-%02hhx-%02hhx.nffw", + sprintf(fw_name, "netronome/serial-%pMF-%02x-%02x.nffw", serial, interface >> 8, interface & 0xff); fw = nfp_net_fw_request(pdev, pf, fw_name); if (fw) @@ -385,7 +410,9 @@ nfp_net_fw_find(struct pci_dev *pdev, struct nfp_pf *pf) return NULL; } - fw_model = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); + fw_model = nfp_hwinfo_lookup(pf->hwinfo, "nffw.partno"); + if (!fw_model) + fw_model = nfp_hwinfo_lookup(pf->hwinfo, "assembly.partno"); if (!fw_model) { dev_err(&pdev->dev, "Error: can't read part number\n"); return NULL; @@ -664,25 +691,91 @@ static int nfp_pf_find_rtsyms(struct nfp_pf *pf) return 0; } +int nfp_net_pf_get_app_id(struct nfp_pf *pf) +{ + return nfp_pf_rtsym_read_optional(pf, "_pf%u_net_app_id", + NFP_APP_CORE_NIC); +} + +static u64 nfp_net_pf_get_app_cap(struct nfp_pf *pf) +{ + char name[32]; + int err = 0; + u64 val; + + snprintf(name, sizeof(name), "_pf%u_net_app_cap", nfp_cppcore_pcie_unit(pf->cpp)); + + val = nfp_rtsym_read_le(pf->rtbl, name, &err); + if (err) { + if (err != -ENOENT) + nfp_err(pf->cpp, "Unable to read symbol %s\n", name); + + return 0; + } + + return val; +} + +static void nfp_pf_cfg_hwinfo(struct nfp_pf *pf) +{ + struct nfp_nsp *nsp; + char hwinfo[32]; + bool sp_indiff; + int err; + + nsp = nfp_nsp_open(pf->cpp); + if (IS_ERR(nsp)) + return; + + if (!nfp_nsp_has_hwinfo_set(nsp)) + goto end; + + sp_indiff = (nfp_net_pf_get_app_id(pf) == NFP_APP_FLOWER_NIC) || + (nfp_net_pf_get_app_cap(pf) & NFP_NET_APP_CAP_SP_INDIFF); + + /* No need to clean `sp_indiff` in driver, management firmware + * will do it when application firmware is unloaded. + */ + snprintf(hwinfo, sizeof(hwinfo), "sp_indiff=%d", sp_indiff); + err = nfp_nsp_hwinfo_set(nsp, hwinfo, sizeof(hwinfo)); + /* Not a fatal error, no need to return error to stop driver from loading */ + if (err) { + nfp_warn(pf->cpp, "HWinfo(sp_indiff=%d) set failed: %d\n", sp_indiff, err); + } else { + /* Need reinit eth_tbl since the eth table state may change + * after sp_indiff is configured. + */ + kfree(pf->eth_tbl); + pf->eth_tbl = __nfp_eth_read_ports(pf->cpp, nsp); + } + +end: + nfp_nsp_close(nsp); +} + static int nfp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) { + const struct nfp_dev_info *dev_info; struct devlink *devlink; struct nfp_pf *pf; int err; - if (pdev->vendor == PCI_VENDOR_ID_NETRONOME && - pdev->device == PCI_DEVICE_ID_NETRONOME_NFP6000_VF) + if ((pdev->vendor == PCI_VENDOR_ID_NETRONOME || + pdev->vendor == PCI_VENDOR_ID_CORIGINE) && + (pdev->device == PCI_DEVICE_ID_NFP3800_VF || + pdev->device == PCI_DEVICE_ID_NFP6000_VF)) dev_warn(&pdev->dev, "Binding NFP VF device to the NFP PF driver, the VF driver is called 'nfp_netvf'\n"); + dev_info = &nfp_dev_info[pci_id->driver_data]; + err = pci_enable_device(pdev); if (err < 0) return err; pci_set_master(pdev); - err = dma_set_mask_and_coherent(&pdev->dev, - DMA_BIT_MASK(NFP_NET_MAX_DMA_BITS)); + err = dma_set_mask_and_coherent(&pdev->dev, dev_info->dma_mask); if (err) goto err_pci_disable; @@ -700,9 +793,9 @@ static int nfp_pci_probe(struct pci_dev *pdev, pf = devlink_priv(devlink); INIT_LIST_HEAD(&pf->vnics); INIT_LIST_HEAD(&pf->ports); - mutex_init(&pf->lock); pci_set_drvdata(pdev, pf); pf->pdev = pdev; + pf->dev_info = dev_info; pf->wq = alloc_workqueue("nfp-%s", 0, 2, pci_name(pdev)); if (!pf->wq) { @@ -710,7 +803,7 @@ static int nfp_pci_probe(struct pci_dev *pdev, goto err_pci_priv_unset; } - pf->cpp = nfp_cpp_from_nfp6000_pcie(pdev); + pf->cpp = nfp_cpp_from_nfp6000_pcie(pdev, dev_info); if (IS_ERR(pf->cpp)) { err = PTR_ERR(pf->cpp); goto err_disable_msix; @@ -760,6 +853,8 @@ static int nfp_pci_probe(struct pci_dev *pdev, goto err_fw_unload; } + nfp_pf_cfg_hwinfo(pf); + err = nfp_net_pci_probe(pf); if (err) goto err_fw_unload; @@ -790,7 +885,6 @@ err_disable_msix: destroy_workqueue(pf->wq); err_pci_priv_unset: pci_set_drvdata(pdev, NULL); - mutex_destroy(&pf->lock); devlink_free(devlink); err_rel_regions: pci_release_regions(pdev); @@ -827,7 +921,6 @@ static void __nfp_pci_shutdown(struct pci_dev *pdev, bool unload_fw) kfree(pf->eth_tbl); kfree(pf->nspi); - mutex_destroy(&pf->lock); devlink_free(priv_to_devlink(pf)); pci_release_regions(pdev); pci_disable_device(pdev); @@ -856,7 +949,9 @@ static int __init nfp_main_init(void) { int err; - pr_info("%s: NFP PCIe Driver, Copyright (C) 2014-2017 Netronome Systems\n", + pr_info("%s: NFP PCIe Driver, Copyright (C) 2014-2020 Netronome Systems\n", + nfp_driver_name); + pr_info("%s: NFP PCIe Driver, Copyright (C) 2021-2022 Corigine Inc.\n", nfp_driver_name); nfp_net_debugfs_create(); @@ -900,6 +995,6 @@ MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x10.nffw"); MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_2x25.nffw"); MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_1x10_1x25.nffw"); -MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>"); +MODULE_AUTHOR("Corigine, Inc. <oss-drivers@corigine.com>"); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("The Netronome Flow Processor (NFP) driver."); +MODULE_DESCRIPTION("The Network Flow Processor (NFP) driver."); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index a7dede946a33..afd3edfa2428 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -13,7 +13,6 @@ #include <linux/list.h> #include <linux/types.h> #include <linux/msi.h> -#include <linux/mutex.h> #include <linux/pci.h> #include <linux/workqueue.h> #include <net/devlink.h> @@ -48,6 +47,7 @@ struct nfp_dumpspec { /** * struct nfp_pf - NFP PF-specific device structure * @pdev: Backpointer to PCI device + * @dev_info: NFP ASIC params * @cpp: Pointer to the CPP handle * @app: Pointer to the APP handle * @data_vnic_bar: Pointer to the CPP area for the data vNICs' BARs @@ -84,10 +84,12 @@ struct nfp_dumpspec { * @port_refresh_work: Work entry for taking netdevs out * @shared_bufs: Array of shared buffer structures if FW has any SBs * @num_shared_bufs: Number of elements in @shared_bufs - * @lock: Protects all fields which may change after probe + * + * Fields which may change after proble are protected by devlink instance lock. */ struct nfp_pf { struct pci_dev *pdev; + const struct nfp_dev_info *dev_info; struct nfp_cpp *cpp; @@ -139,8 +141,6 @@ struct nfp_pf { struct nfp_shared_buf *shared_bufs; unsigned int num_shared_bufs; - - struct mutex lock; }; extern struct pci_driver nfp_netvf_pci_driver; @@ -161,6 +161,7 @@ bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); int nfp_pf_rtsym_read_optional(struct nfp_pf *pf, const char *format, unsigned int default_val); +int nfp_net_pf_get_app_id(struct nfp_pf *pf); u8 __iomem * nfp_pf_map_rtsym(struct nfp_pf *pf, const char *name, const char *sym_fmt, unsigned int min_size, struct nfp_cpp_area **area); @@ -190,4 +191,7 @@ int nfp_shared_buf_pool_set(struct nfp_pf *pf, unsigned int sb, int nfp_devlink_params_register(struct nfp_pf *pf); void nfp_devlink_params_unregister(struct nfp_pf *pf); + +unsigned int nfp_net_lr2speed(unsigned int linkrate); +unsigned int nfp_net_speed2lr(unsigned int speed); #endif /* NFP_MAIN_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 0b1865e9f0b5..a101ff30a1ae 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -63,9 +63,6 @@ #define NFP_NET_Q0_BAR 2 #define NFP_NET_Q1_BAR 4 /* OBSOLETE */ -/* Max bits in DMA address */ -#define NFP_NET_MAX_DMA_BITS 40 - /* Default size for MTU and freelist buffer sizes */ #define NFP_NET_DEFAULT_MTU 1500U @@ -85,11 +82,6 @@ NFP_NET_MAX_TX_RINGS : NFP_NET_MAX_RX_RINGS) #define NFP_NET_MAX_IRQS (NFP_NET_NON_Q_VECTORS + NFP_NET_MAX_R_VECS) -#define NFP_NET_MIN_TX_DESCS 256 /* Min. # of Tx descs per ring */ -#define NFP_NET_MIN_RX_DESCS 256 /* Min. # of Rx descs per ring */ -#define NFP_NET_MAX_TX_DESCS (256 * 1024) /* Max. # of Tx descs per ring */ -#define NFP_NET_MAX_RX_DESCS (256 * 1024) /* Max. # of Rx descs per ring */ - #define NFP_NET_TX_DESCS_DEFAULT 4096 /* Default # of Tx descs per ring */ #define NFP_NET_RX_DESCS_DEFAULT 4096 /* Default # of Rx descs per ring */ @@ -105,104 +97,61 @@ /* Forward declarations */ struct nfp_cpp; +struct nfp_dev_info; +struct nfp_dp_ops; struct nfp_eth_table_port; struct nfp_net; struct nfp_net_r_vector; struct nfp_port; +struct xsk_buff_pool; + +struct nfp_nfd3_tx_desc; +struct nfp_nfd3_tx_buf; + +struct nfp_nfdk_tx_desc; +struct nfp_nfdk_tx_buf; /* Convenience macro for wrapping descriptor index on ring size */ #define D_IDX(ring, idx) ((idx) & ((ring)->cnt - 1)) /* Convenience macro for writing dma address into RX/TX descriptors */ -#define nfp_desc_set_dma_addr(desc, dma_addr) \ +#define nfp_desc_set_dma_addr_40b(desc, dma_addr) \ do { \ - __typeof(desc) __d = (desc); \ + __typeof__(desc) __d = (desc); \ dma_addr_t __addr = (dma_addr); \ \ __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ __d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \ } while (0) -/* TX descriptor format */ - -#define PCIE_DESC_TX_EOP BIT(7) -#define PCIE_DESC_TX_OFFSET_MASK GENMASK(6, 0) -#define PCIE_DESC_TX_MSS_MASK GENMASK(13, 0) - -/* Flags in the host TX descriptor */ -#define PCIE_DESC_TX_CSUM BIT(7) -#define PCIE_DESC_TX_IP4_CSUM BIT(6) -#define PCIE_DESC_TX_TCP_CSUM BIT(5) -#define PCIE_DESC_TX_UDP_CSUM BIT(4) -#define PCIE_DESC_TX_VLAN BIT(3) -#define PCIE_DESC_TX_LSO BIT(2) -#define PCIE_DESC_TX_ENCAP BIT(1) -#define PCIE_DESC_TX_O_IP4_CSUM BIT(0) - -struct nfp_net_tx_desc { - union { - struct { - u8 dma_addr_hi; /* High bits of host buf address */ - __le16 dma_len; /* Length to DMA for this desc */ - u8 offset_eop; /* Offset in buf where pkt starts + - * highest bit is eop flag. - */ - __le32 dma_addr_lo; /* Low 32bit of host buf addr */ - - __le16 mss; /* MSS to be used for LSO */ - u8 lso_hdrlen; /* LSO, TCP payload offset */ - u8 flags; /* TX Flags, see @PCIE_DESC_TX_* */ - union { - struct { - u8 l3_offset; /* L3 header offset */ - u8 l4_offset; /* L4 header offset */ - }; - __le16 vlan; /* VLAN tag to add if indicated */ - }; - __le16 data_len; /* Length of frame + meta data */ - } __packed; - __le32 vals[4]; - __le64 vals8[2]; - }; -}; - -/** - * struct nfp_net_tx_buf - software TX buffer descriptor - * @skb: normal ring, sk_buff associated with this buffer - * @frag: XDP ring, page frag associated with this buffer - * @dma_addr: DMA mapping address of the buffer - * @fidx: Fragment index (-1 for the head and [0..nr_frags-1] for frags) - * @pkt_cnt: Number of packets to be produced out of the skb associated - * with this buffer (valid only on the head's buffer). - * Will be 1 for all non-TSO packets. - * @real_len: Number of bytes which to be produced out of the skb (valid only - * on the head's buffer). Equal to skb->len for non-TSO packets. - */ -struct nfp_net_tx_buf { - union { - struct sk_buff *skb; - void *frag; - }; - dma_addr_t dma_addr; - short int fidx; - u16 pkt_cnt; - u32 real_len; -}; +#define nfp_desc_set_dma_addr_48b(desc, dma_addr) \ + do { \ + __typeof__(desc) __d = (desc); \ + dma_addr_t __addr = (dma_addr); \ + \ + __d->dma_addr_hi = cpu_to_le16(upper_32_bits(__addr)); \ + __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \ + } while (0) /** * struct nfp_net_tx_ring - TX ring structure * @r_vec: Back pointer to ring vector structure * @idx: Ring index from Linux's perspective - * @qcidx: Queue Controller Peripheral (QCP) queue index for the TX queue + * @data_pending: number of bytes added to current block (NFDK only) * @qcp_q: Pointer to base of the QCP TX queue + * @txrwb: TX pointer write back area * @cnt: Size of the queue in number of descriptors * @wr_p: TX ring write pointer (free running) * @rd_p: TX ring read pointer (free running) * @qcp_rd_p: Local copy of QCP TX queue read pointer * @wr_ptr_add: Accumulated number of buffers to add to QCP write pointer * (used for .xmit_more delayed kick) - * @txbufs: Array of transmitted TX buffers, to free on transmit - * @txds: Virtual address of TX ring in host memory + * @txbufs: Array of transmitted TX buffers, to free on transmit (NFD3) + * @ktxbufs: Array of transmitted TX buffers, to free on transmit (NFDK) + * @txds: Virtual address of TX ring in host memory (NFD3) + * @ktxds: Virtual address of TX ring in host memory (NFDK) + * + * @qcidx: Queue Controller Peripheral (QCP) queue index for the TX queue * @dma: DMA address of the TX ring * @size: Size, in bytes, of the TX ring (needed to free) * @is_xdp: Is this a XDP TX ring? @@ -210,9 +159,10 @@ struct nfp_net_tx_buf { struct nfp_net_tx_ring { struct nfp_net_r_vector *r_vec; - u32 idx; - int qcidx; + u16 idx; + u16 data_pending; u8 __iomem *qcp_q; + u64 *txrwb; u32 cnt; u32 wr_p; @@ -221,8 +171,17 @@ struct nfp_net_tx_ring { u32 wr_ptr_add; - struct nfp_net_tx_buf *txbufs; - struct nfp_net_tx_desc *txds; + union { + struct nfp_nfd3_tx_buf *txbufs; + struct nfp_nfdk_tx_buf *ktxbufs; + }; + union { + struct nfp_nfd3_tx_desc *txds; + struct nfp_nfdk_tx_desc *ktxds; + }; + + /* Cold data follows */ + int qcidx; dma_addr_t dma; size_t size; @@ -266,8 +225,8 @@ struct nfp_net_tx_ring { struct nfp_net_rx_desc { union { struct { - u8 dma_addr_hi; /* High bits of the buf address */ - __le16 reserved; /* Must be zero */ + __le16 dma_addr_hi; /* High bits of the buf address */ + u8 reserved; /* Must be zero */ u8 meta_len_dd; /* Must be zero */ __le32 dma_addr_lo; /* Low bits of the buffer address */ @@ -289,6 +248,8 @@ struct nfp_net_rx_desc { }; #define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0) +#define NFP_NET_VLAN_CTAG 0 +#define NFP_NET_VLAN_STAG 1 struct nfp_meta_parsed { u8 hash_type; @@ -297,6 +258,11 @@ struct nfp_meta_parsed { u32 mark; u32 portid; __wsum csum; + struct { + bool stripped; + u8 tpid; + u16 tci; + } vlan; }; struct nfp_net_rx_hash { @@ -315,6 +281,16 @@ struct nfp_net_rx_buf { }; /** + * struct nfp_net_xsk_rx_buf - software RX XSK buffer descriptor + * @dma_addr: DMA mapping address of the buffer + * @xdp: XSK buffer pool handle (for AF_XDP) + */ +struct nfp_net_xsk_rx_buf { + dma_addr_t dma_addr; + struct xdp_buff *xdp; +}; + +/** * struct nfp_net_rx_ring - RX ring structure * @r_vec: Back pointer to ring vector structure * @cnt: Size of the queue in number of descriptors @@ -324,6 +300,7 @@ struct nfp_net_rx_buf { * @fl_qcidx: Queue Controller Peripheral (QCP) queue index for the freelist * @qcp_fl: Pointer to base of the QCP freelist queue * @rxbufs: Array of transmitted FL/RX buffers + * @xsk_rxbufs: Array of transmitted FL/RX buffers (for AF_XDP) * @rxds: Virtual address of FL/RX ring in host memory * @xdp_rxq: RX-ring info avail for XDP * @dma: DMA address of the FL/RX ring @@ -342,6 +319,7 @@ struct nfp_net_rx_ring { u8 __iomem *qcp_fl; struct nfp_net_rx_buf *rxbufs; + struct nfp_net_xsk_rx_buf *xsk_rxbufs; struct nfp_net_rx_desc *rxds; struct xdp_rxq_info xdp_rxq; @@ -360,6 +338,7 @@ struct nfp_net_rx_ring { * @tx_ring: Pointer to TX ring * @rx_ring: Pointer to RX ring * @xdp_ring: Pointer to an extra TX ring for XDP + * @xsk_pool: XSK buffer pool active on vector queue pair (for AF_XDP) * @irq_entry: MSI-X table entry (use for talking to the device) * @event_ctr: Number of interrupt * @rx_dim: Dynamic interrupt moderation structure for RX @@ -431,6 +410,7 @@ struct nfp_net_r_vector { u64 rx_replace_buf_alloc_fail; struct nfp_net_tx_ring *xdp_ring; + struct xsk_buff_pool *xsk_pool; struct u64_stats_sync tx_sync; u64 tx_pkts; @@ -460,13 +440,17 @@ struct nfp_net_fw_version { u8 minor; u8 major; u8 class; - u8 resv; + + /* This byte can be exploited for more use, currently, + * BIT0: dp type, BIT[7:1]: reserved + */ + u8 extend; } __packed; static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, - u8 resv, u8 class, u8 major, u8 minor) + u8 extend, u8 class, u8 major, u8 minor) { - return fw_ver->resv == resv && + return fw_ver->extend == extend && fw_ver->class == class && fw_ver->major == major && fw_ver->minor == minor; @@ -494,13 +478,17 @@ struct nfp_stat_pair { * @rx_rings: Array of pre-allocated RX ring structures * @ctrl_bar: Pointer to mapped control BAR * - * @txd_cnt: Size of the TX ring in number of descriptors - * @rxd_cnt: Size of the RX ring in number of descriptors + * @ops: Callbacks and parameters for this vNIC's NFD version + * @txrwb: TX pointer write back area (indexed by queue id) + * @txrwb_dma: TX pointer write back area DMA address + * @txd_cnt: Size of the TX ring in number of min size packets + * @rxd_cnt: Size of the RX ring in number of min size packets * @num_r_vecs: Number of used ring vectors * @num_tx_rings: Currently configured number of TX rings * @num_stack_tx_rings: Number of TX rings used by the stack (not XDP) * @num_rx_rings: Currently configured number of RX rings * @mtu: Device MTU + * @xsk_pools: XSK buffer pools, @max_r_vecs in size (for AF_XDP). */ struct nfp_net_dp { struct device *dev; @@ -527,6 +515,11 @@ struct nfp_net_dp { /* Cold data follows */ + const struct nfp_dp_ops *ops; + + u64 *txrwb; + dma_addr_t txrwb_dma; + unsigned int txd_cnt; unsigned int rxd_cnt; @@ -537,11 +530,14 @@ struct nfp_net_dp { unsigned int num_rx_rings; unsigned int mtu; + + struct xsk_buff_pool **xsk_pools; }; /** * struct nfp_net - NFP network device structure * @dp: Datapath structure + * @dev_info: NFP ASIC params * @id: vNIC id within the PF (0 for VFs) * @fw_ver: Firmware version * @cap: Capabilities advertised by the Firmware @@ -615,6 +611,7 @@ struct nfp_net_dp { struct nfp_net { struct nfp_net_dp dp; + const struct nfp_dev_info *dev_info; struct nfp_net_fw_version fw_ver; u32 id; @@ -767,7 +764,6 @@ static inline void nn_pci_flush(struct nfp_net *nn) * either add to a pointer or to read the pointer value. */ #define NFP_QCP_QUEUE_ADDR_SZ 0x800 -#define NFP_QCP_QUEUE_AREA_SZ 0x80000 #define NFP_QCP_QUEUE_OFF(_x) ((_x) * NFP_QCP_QUEUE_ADDR_SZ) #define NFP_QCP_QUEUE_ADD_RPTR 0x0000 #define NFP_QCP_QUEUE_ADD_WPTR 0x0004 @@ -776,50 +772,21 @@ static inline void nn_pci_flush(struct nfp_net *nn) #define NFP_QCP_QUEUE_STS_HI 0x000c #define NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask 0x3ffff -/* The offset of a QCP queues in the PCIe Target */ -#define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff))) - /* nfp_qcp_ptr - Read or Write Pointer of a queue */ enum nfp_qcp_ptr { NFP_QCP_READ_PTR = 0, NFP_QCP_WRITE_PTR }; -/* There appear to be an *undocumented* upper limit on the value which - * one can add to a queue and that value is either 0x3f or 0x7f. We - * go with 0x3f as a conservative measure. - */ -#define NFP_QCP_MAX_ADD 0x3f - -static inline void _nfp_qcp_ptr_add(u8 __iomem *q, - enum nfp_qcp_ptr ptr, u32 val) -{ - u32 off; - - if (ptr == NFP_QCP_READ_PTR) - off = NFP_QCP_QUEUE_ADD_RPTR; - else - off = NFP_QCP_QUEUE_ADD_WPTR; - - while (val > NFP_QCP_MAX_ADD) { - writel(NFP_QCP_MAX_ADD, q + off); - val -= NFP_QCP_MAX_ADD; - } - - writel(val, q + off); -} - /** * nfp_qcp_rd_ptr_add() - Add the value to the read pointer of a queue * * @q: Base address for queue structure * @val: Value to add to the queue pointer - * - * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed. */ static inline void nfp_qcp_rd_ptr_add(u8 __iomem *q, u32 val) { - _nfp_qcp_ptr_add(q, NFP_QCP_READ_PTR, val); + writel(val, q + NFP_QCP_QUEUE_ADD_RPTR); } /** @@ -827,12 +794,10 @@ static inline void nfp_qcp_rd_ptr_add(u8 __iomem *q, u32 val) * * @q: Base address for queue structure * @val: Value to add to the queue pointer - * - * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed. */ static inline void nfp_qcp_wr_ptr_add(u8 __iomem *q, u32 val) { - _nfp_qcp_ptr_add(q, NFP_QCP_WRITE_PTR, val); + writel(val, q + NFP_QCP_QUEUE_ADD_WPTR); } static inline u32 _nfp_qcp_read(u8 __iomem *q, enum nfp_qcp_ptr ptr) @@ -875,6 +840,8 @@ static inline u32 nfp_qcp_wr_ptr_read(u8 __iomem *q) return _nfp_qcp_read(q, NFP_QCP_WRITE_PTR); } +u32 nfp_qcp_queue_offset(const struct nfp_dev_info *dev_info, u16 queue); + static inline bool nfp_net_is_data_vnic(struct nfp_net *nn) { WARN_ON_ONCE(!nn->dp.netdev && nn->port); @@ -921,11 +888,13 @@ static inline void nn_ctrl_bar_unlock(struct nfp_net *nn) /* Globals */ extern const char nfp_driver_version[]; -extern const struct net_device_ops nfp_net_netdev_ops; +extern const struct net_device_ops nfp_nfd3_netdev_ops; +extern const struct net_device_ops nfp_nfdk_netdev_ops; static inline bool nfp_netdev_is_nfp_net(struct net_device *netdev) { - return netdev->netdev_ops == &nfp_net_netdev_ops; + return netdev->netdev_ops == &nfp_nfd3_netdev_ops || + netdev->netdev_ops == &nfp_nfdk_netdev_ops; } static inline int nfp_net_coalesce_para_check(u32 usecs, u32 pkts) @@ -941,7 +910,8 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, void __iomem *ctrl_bar); struct nfp_net * -nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev, +nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, + void __iomem *ctrl_bar, bool needs_netdev, unsigned int max_tx_rings, unsigned int max_rx_rings); void nfp_net_free(struct nfp_net *nn); @@ -972,6 +942,10 @@ void nfp_net_irqs_disable(struct pci_dev *pdev); void nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries, unsigned int n); +struct sk_buff * +nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, u64 *tls_handle, int *nr_frags); +void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle); struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn); int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *new, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 850bfdf83d0a..27f4786ace4f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ /* * nfp_net_common.c @@ -13,7 +13,6 @@ #include <linux/bitfield.h> #include <linux/bpf.h> -#include <linux/bpf_trace.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -32,19 +31,24 @@ #include <linux/ethtool.h> #include <linux/log2.h> #include <linux/if_vlan.h> +#include <linux/if_bridge.h> #include <linux/random.h> #include <linux/vmalloc.h> #include <linux/ktime.h> #include <net/tls.h> #include <net/vxlan.h> +#include <net/xdp_sock_drv.h> +#include "nfpcore/nfp_dev.h" #include "nfpcore/nfp_nsp.h" #include "ccm.h" #include "nfp_app.h" #include "nfp_net_ctrl.h" #include "nfp_net.h" +#include "nfp_net_dp.h" #include "nfp_net_sriov.h" +#include "nfp_net_xsk.h" #include "nfp_port.h" #include "crypto/crypto.h" #include "crypto/fw.h" @@ -63,33 +67,10 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, put_unaligned_le32(reg, fw_ver); } -static dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag) +u32 nfp_qcp_queue_offset(const struct nfp_dev_info *dev_info, u16 queue) { - return dma_map_single_attrs(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM, - dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA, - dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC); -} - -static void -nfp_net_dma_sync_dev_rx(const struct nfp_net_dp *dp, dma_addr_t dma_addr) -{ - dma_sync_single_for_device(dp->dev, dma_addr, - dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA, - dp->rx_dma_dir); -} - -static void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr) -{ - dma_unmap_single_attrs(dp->dev, dma_addr, - dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA, - dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC); -} - -static void nfp_net_dma_sync_cpu_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr, - unsigned int len) -{ - dma_sync_single_for_cpu(dp->dev, dma_addr - NFP_NET_RX_BUF_HEADROOM, - len, dp->rx_dma_dir); + queue &= dev_info->qc_idx_mask; + return dev_info->qc_addr_offset + NFP_QCP_QUEUE_ADDR_SZ * queue; } /* Firmware reconfig @@ -375,19 +356,6 @@ int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd) */ /** - * nfp_net_irq_unmask() - Unmask automasked interrupt - * @nn: NFP Network structure - * @entry_nr: MSI-X table entry - * - * Clear the ICR for the IRQ entry. - */ -static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) -{ - nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); - nn_pci_flush(nn); -} - -/** * nfp_net_irqs_alloc() - allocates MSI-X irqs * @pdev: PCI device structure * @irq_entries: Array to be initialized and used to hold the irq entries @@ -506,19 +474,22 @@ static void nfp_net_read_link_status(struct nfp_net *nn) { unsigned long flags; bool link_up; - u32 sts; + u16 sts; spin_lock_irqsave(&nn->link_status_lock, flags); - sts = nn_readl(nn, NFP_NET_CFG_STS); + sts = nn_readw(nn, NFP_NET_CFG_STS); link_up = !!(sts & NFP_NET_CFG_STS_LINK); if (nn->link_up == link_up) goto out; nn->link_up = link_up; - if (nn->port) + if (nn->port) { set_bit(NFP_PORT_CHANGED, &nn->port->flags); + if (nn->port->link_cb) + nn->port->link_cb(nn->port); + } if (nn->link_up) { netif_carrier_on(nn->dp.netdev); @@ -569,49 +540,6 @@ static irqreturn_t nfp_net_irq_exn(int irq, void *data) } /** - * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring - * @tx_ring: TX ring structure - * @r_vec: IRQ vector servicing this ring - * @idx: Ring index - * @is_xdp: Is this an XDP TX ring? - */ -static void -nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, - struct nfp_net_r_vector *r_vec, unsigned int idx, - bool is_xdp) -{ - struct nfp_net *nn = r_vec->nfp_net; - - tx_ring->idx = idx; - tx_ring->r_vec = r_vec; - tx_ring->is_xdp = is_xdp; - u64_stats_init(&tx_ring->r_vec->tx_sync); - - tx_ring->qcidx = tx_ring->idx * nn->stride_tx; - tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx); -} - -/** - * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring - * @rx_ring: RX ring structure - * @r_vec: IRQ vector servicing this ring - * @idx: Ring index - */ -static void -nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring, - struct nfp_net_r_vector *r_vec, unsigned int idx) -{ - struct nfp_net *nn = r_vec->nfp_net; - - rx_ring->idx = idx; - rx_ring->r_vec = r_vec; - u64_stats_init(&rx_ring->r_vec->rx_sync); - - rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx; - rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx); -} - -/** * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN) * @nn: NFP Network structure * @ctrl_offset: Control BAR offset where IRQ configuration should be written @@ -658,178 +586,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset, free_irq(nn->irq_entries[vector_idx].vector, nn); } -/* Transmit - * - * One queue controller peripheral queue is used for transmit. The - * driver en-queues packets for transmit by advancing the write - * pointer. The device indicates that packets have transmitted by - * advancing the read pointer. The driver maintains a local copy of - * the read and write pointer in @struct nfp_net_tx_ring. The driver - * keeps @wr_p in sync with the queue controller write pointer and can - * determine how many packets have been transmitted by comparing its - * copy of the read pointer @rd_p with the read pointer maintained by - * the queue controller peripheral. - */ - -/** - * nfp_net_tx_full() - Check if the TX ring is full - * @tx_ring: TX ring to check - * @dcnt: Number of descriptors that need to be enqueued (must be >= 1) - * - * This function checks, based on the *host copy* of read/write - * pointer if a given TX ring is full. The real TX queue may have - * some newly made available slots. - * - * Return: True if the ring is full. - */ -static int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) -{ - return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt); -} - -/* Wrappers for deciding when to stop and restart TX queues */ -static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) -{ - return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4); -} - -static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) -{ - return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1); -} - -/** - * nfp_net_tx_ring_stop() - stop tx ring - * @nd_q: netdev queue - * @tx_ring: driver tx queue structure - * - * Safely stop TX ring. Remember that while we are running .start_xmit() - * someone else may be cleaning the TX ring completions so we need to be - * extra careful here. - */ -static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q, - struct nfp_net_tx_ring *tx_ring) -{ - netif_tx_stop_queue(nd_q); - - /* We can race with the TX completion out of NAPI so recheck */ - smp_mb(); - if (unlikely(nfp_net_tx_ring_should_wake(tx_ring))) - netif_tx_start_queue(nd_q); -} - -/** - * nfp_net_tx_tso() - Set up Tx descriptor for LSO - * @r_vec: per-ring structure - * @txbuf: Pointer to driver soft TX descriptor - * @txd: Pointer to HW TX descriptor - * @skb: Pointer to SKB - * @md_bytes: Prepend length - * - * Set up Tx descriptor for LSO, do nothing for non-LSO skbs. - * Return error on packet header greater than maximum supported LSO header size. - */ -static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec, - struct nfp_net_tx_buf *txbuf, - struct nfp_net_tx_desc *txd, struct sk_buff *skb, - u32 md_bytes) -{ - u32 l3_offset, l4_offset, hdrlen; - u16 mss; - - if (!skb_is_gso(skb)) - return; - - if (!skb->encapsulation) { - l3_offset = skb_network_offset(skb); - l4_offset = skb_transport_offset(skb); - hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb); - } else { - l3_offset = skb_inner_network_offset(skb); - l4_offset = skb_inner_transport_offset(skb); - hdrlen = skb_inner_transport_header(skb) - skb->data + - inner_tcp_hdrlen(skb); - } - - txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs; - txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1); - - mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK; - txd->l3_offset = l3_offset - md_bytes; - txd->l4_offset = l4_offset - md_bytes; - txd->lso_hdrlen = hdrlen - md_bytes; - txd->mss = cpu_to_le16(mss); - txd->flags |= PCIE_DESC_TX_LSO; - - u64_stats_update_begin(&r_vec->tx_sync); - r_vec->tx_lso++; - u64_stats_update_end(&r_vec->tx_sync); -} - -/** - * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor - * @dp: NFP Net data path struct - * @r_vec: per-ring structure - * @txbuf: Pointer to driver soft TX descriptor - * @txd: Pointer to TX descriptor - * @skb: Pointer to SKB - * - * This function sets the TX checksum flags in the TX descriptor based - * on the configuration and the protocol of the packet to be transmitted. - */ -static void nfp_net_tx_csum(struct nfp_net_dp *dp, - struct nfp_net_r_vector *r_vec, - struct nfp_net_tx_buf *txbuf, - struct nfp_net_tx_desc *txd, struct sk_buff *skb) -{ - struct ipv6hdr *ipv6h; - struct iphdr *iph; - u8 l4_hdr; - - if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) - return; - - if (skb->ip_summed != CHECKSUM_PARTIAL) - return; - - txd->flags |= PCIE_DESC_TX_CSUM; - if (skb->encapsulation) - txd->flags |= PCIE_DESC_TX_ENCAP; - - iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); - ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); - - if (iph->version == 4) { - txd->flags |= PCIE_DESC_TX_IP4_CSUM; - l4_hdr = iph->protocol; - } else if (ipv6h->version == 6) { - l4_hdr = ipv6h->nexthdr; - } else { - nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); - return; - } - - switch (l4_hdr) { - case IPPROTO_TCP: - txd->flags |= PCIE_DESC_TX_TCP_CSUM; - break; - case IPPROTO_UDP: - txd->flags |= PCIE_DESC_TX_UDP_CSUM; - break; - default: - nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr); - return; - } - - u64_stats_update_begin(&r_vec->tx_sync); - if (skb->encapsulation) - r_vec->hw_csum_tx_inner += txbuf->pkt_cnt; - else - r_vec->hw_csum_tx += txbuf->pkt_cnt; - u64_stats_update_end(&r_vec->tx_sync); -} - -static struct sk_buff * +struct sk_buff * nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, struct sk_buff *skb, u64 *tls_handle, int *nr_frags) { @@ -844,7 +601,7 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) return skb; - datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + datalen = skb->len - skb_tcp_all_headers(skb); seq = ntohl(tcp_hdr(skb)->seq); ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); resync_pending = tls_offload_tx_resync_pending(skb->sk); @@ -901,7 +658,7 @@ nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, return skb; } -static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) +void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) { #ifdef CONFIG_TLS_DEVICE struct nfp_net_tls_offload_ctx *ntls; @@ -912,7 +669,7 @@ static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))) return; - datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + datalen = skb->len - skb_tcp_all_headers(skb); seq = ntohl(tcp_hdr(skb)->seq); ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); @@ -923,411 +680,6 @@ static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) #endif } -static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) -{ - wmb(); - nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); - tx_ring->wr_ptr_add = 0; -} - -static int nfp_net_prep_tx_meta(struct sk_buff *skb, u64 tls_handle) -{ - struct metadata_dst *md_dst = skb_metadata_dst(skb); - unsigned char *data; - u32 meta_id = 0; - int md_bytes; - - if (likely(!md_dst && !tls_handle)) - return 0; - if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) { - if (!tls_handle) - return 0; - md_dst = NULL; - } - - md_bytes = 4 + !!md_dst * 4 + !!tls_handle * 8; - - if (unlikely(skb_cow_head(skb, md_bytes))) - return -ENOMEM; - - meta_id = 0; - data = skb_push(skb, md_bytes) + md_bytes; - if (md_dst) { - data -= 4; - put_unaligned_be32(md_dst->u.port_info.port_id, data); - meta_id = NFP_NET_META_PORTID; - } - if (tls_handle) { - /* conn handle is opaque, we just use u64 to be able to quickly - * compare it to zero - */ - data -= 8; - memcpy(data, &tls_handle, sizeof(tls_handle)); - meta_id <<= NFP_NET_META_FIELD_SIZE; - meta_id |= NFP_NET_META_CONN_HANDLE; - } - - data -= 4; - put_unaligned_be32(meta_id, data); - - return md_bytes; -} - -/** - * nfp_net_tx() - Main transmit entry point - * @skb: SKB to transmit - * @netdev: netdev structure - * - * Return: NETDEV_TX_OK on success. - */ -static netdev_tx_t nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) -{ - struct nfp_net *nn = netdev_priv(netdev); - const skb_frag_t *frag; - int f, nr_frags, wr_idx, md_bytes; - struct nfp_net_tx_ring *tx_ring; - struct nfp_net_r_vector *r_vec; - struct nfp_net_tx_buf *txbuf; - struct nfp_net_tx_desc *txd; - struct netdev_queue *nd_q; - struct nfp_net_dp *dp; - dma_addr_t dma_addr; - unsigned int fsize; - u64 tls_handle = 0; - u16 qidx; - - dp = &nn->dp; - qidx = skb_get_queue_mapping(skb); - tx_ring = &dp->tx_rings[qidx]; - r_vec = tx_ring->r_vec; - - nr_frags = skb_shinfo(skb)->nr_frags; - - if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { - nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", - qidx, tx_ring->wr_p, tx_ring->rd_p); - nd_q = netdev_get_tx_queue(dp->netdev, qidx); - netif_tx_stop_queue(nd_q); - nfp_net_tx_xmit_more_flush(tx_ring); - u64_stats_update_begin(&r_vec->tx_sync); - r_vec->tx_busy++; - u64_stats_update_end(&r_vec->tx_sync); - return NETDEV_TX_BUSY; - } - - skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags); - if (unlikely(!skb)) { - nfp_net_tx_xmit_more_flush(tx_ring); - return NETDEV_TX_OK; - } - - md_bytes = nfp_net_prep_tx_meta(skb, tls_handle); - if (unlikely(md_bytes < 0)) - goto err_flush; - - /* Start with the head skbuf */ - dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), - DMA_TO_DEVICE); - if (dma_mapping_error(dp->dev, dma_addr)) - goto err_dma_err; - - wr_idx = D_IDX(tx_ring, tx_ring->wr_p); - - /* Stash the soft descriptor of the head then initialize it */ - txbuf = &tx_ring->txbufs[wr_idx]; - txbuf->skb = skb; - txbuf->dma_addr = dma_addr; - txbuf->fidx = -1; - txbuf->pkt_cnt = 1; - txbuf->real_len = skb->len; - - /* Build TX descriptor */ - txd = &tx_ring->txds[wr_idx]; - txd->offset_eop = (nr_frags ? 0 : PCIE_DESC_TX_EOP) | md_bytes; - txd->dma_len = cpu_to_le16(skb_headlen(skb)); - nfp_desc_set_dma_addr(txd, dma_addr); - txd->data_len = cpu_to_le16(skb->len); - - txd->flags = 0; - txd->mss = 0; - txd->lso_hdrlen = 0; - - /* Do not reorder - tso may adjust pkt cnt, vlan may override fields */ - nfp_net_tx_tso(r_vec, txbuf, txd, skb, md_bytes); - nfp_net_tx_csum(dp, r_vec, txbuf, txd, skb); - if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { - txd->flags |= PCIE_DESC_TX_VLAN; - txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); - } - - /* Gather DMA */ - if (nr_frags > 0) { - __le64 second_half; - - /* all descs must match except for in addr, length and eop */ - second_half = txd->vals8[1]; - - for (f = 0; f < nr_frags; f++) { - frag = &skb_shinfo(skb)->frags[f]; - fsize = skb_frag_size(frag); - - dma_addr = skb_frag_dma_map(dp->dev, frag, 0, - fsize, DMA_TO_DEVICE); - if (dma_mapping_error(dp->dev, dma_addr)) - goto err_unmap; - - wr_idx = D_IDX(tx_ring, wr_idx + 1); - tx_ring->txbufs[wr_idx].skb = skb; - tx_ring->txbufs[wr_idx].dma_addr = dma_addr; - tx_ring->txbufs[wr_idx].fidx = f; - - txd = &tx_ring->txds[wr_idx]; - txd->dma_len = cpu_to_le16(fsize); - nfp_desc_set_dma_addr(txd, dma_addr); - txd->offset_eop = md_bytes | - ((f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0); - txd->vals8[1] = second_half; - } - - u64_stats_update_begin(&r_vec->tx_sync); - r_vec->tx_gather++; - u64_stats_update_end(&r_vec->tx_sync); - } - - skb_tx_timestamp(skb); - - nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); - - tx_ring->wr_p += nr_frags + 1; - if (nfp_net_tx_ring_should_stop(tx_ring)) - nfp_net_tx_ring_stop(nd_q, tx_ring); - - tx_ring->wr_ptr_add += nr_frags + 1; - if (__netdev_tx_sent_queue(nd_q, txbuf->real_len, netdev_xmit_more())) - nfp_net_tx_xmit_more_flush(tx_ring); - - return NETDEV_TX_OK; - -err_unmap: - while (--f >= 0) { - frag = &skb_shinfo(skb)->frags[f]; - dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, - skb_frag_size(frag), DMA_TO_DEVICE); - tx_ring->txbufs[wr_idx].skb = NULL; - tx_ring->txbufs[wr_idx].dma_addr = 0; - tx_ring->txbufs[wr_idx].fidx = -2; - wr_idx = wr_idx - 1; - if (wr_idx < 0) - wr_idx += tx_ring->cnt; - } - dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, - skb_headlen(skb), DMA_TO_DEVICE); - tx_ring->txbufs[wr_idx].skb = NULL; - tx_ring->txbufs[wr_idx].dma_addr = 0; - tx_ring->txbufs[wr_idx].fidx = -2; -err_dma_err: - nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); -err_flush: - nfp_net_tx_xmit_more_flush(tx_ring); - u64_stats_update_begin(&r_vec->tx_sync); - r_vec->tx_errors++; - u64_stats_update_end(&r_vec->tx_sync); - nfp_net_tls_tx_undo(skb, tls_handle); - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; -} - -/** - * nfp_net_tx_complete() - Handled completed TX packets - * @tx_ring: TX ring structure - * @budget: NAPI budget (only used as bool to determine if in NAPI context) - */ -static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) -{ - struct nfp_net_r_vector *r_vec = tx_ring->r_vec; - struct nfp_net_dp *dp = &r_vec->nfp_net->dp; - struct netdev_queue *nd_q; - u32 done_pkts = 0, done_bytes = 0; - u32 qcp_rd_p; - int todo; - - if (tx_ring->wr_p == tx_ring->rd_p) - return; - - /* Work out how many descriptors have been transmitted */ - qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); - - if (qcp_rd_p == tx_ring->qcp_rd_p) - return; - - todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); - - while (todo--) { - const skb_frag_t *frag; - struct nfp_net_tx_buf *tx_buf; - struct sk_buff *skb; - int fidx, nr_frags; - int idx; - - idx = D_IDX(tx_ring, tx_ring->rd_p++); - tx_buf = &tx_ring->txbufs[idx]; - - skb = tx_buf->skb; - if (!skb) - continue; - - nr_frags = skb_shinfo(skb)->nr_frags; - fidx = tx_buf->fidx; - - if (fidx == -1) { - /* unmap head */ - dma_unmap_single(dp->dev, tx_buf->dma_addr, - skb_headlen(skb), DMA_TO_DEVICE); - - done_pkts += tx_buf->pkt_cnt; - done_bytes += tx_buf->real_len; - } else { - /* unmap fragment */ - frag = &skb_shinfo(skb)->frags[fidx]; - dma_unmap_page(dp->dev, tx_buf->dma_addr, - skb_frag_size(frag), DMA_TO_DEVICE); - } - - /* check for last gather fragment */ - if (fidx == nr_frags - 1) - napi_consume_skb(skb, budget); - - tx_buf->dma_addr = 0; - tx_buf->skb = NULL; - tx_buf->fidx = -2; - } - - tx_ring->qcp_rd_p = qcp_rd_p; - - u64_stats_update_begin(&r_vec->tx_sync); - r_vec->tx_bytes += done_bytes; - r_vec->tx_pkts += done_pkts; - u64_stats_update_end(&r_vec->tx_sync); - - if (!dp->netdev) - return; - - nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); - netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); - if (nfp_net_tx_ring_should_wake(tx_ring)) { - /* Make sure TX thread will see updated tx_ring->rd_p */ - smp_mb(); - - if (unlikely(netif_tx_queue_stopped(nd_q))) - netif_tx_wake_queue(nd_q); - } - - WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, - "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", - tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); -} - -static bool nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring) -{ - struct nfp_net_r_vector *r_vec = tx_ring->r_vec; - u32 done_pkts = 0, done_bytes = 0; - bool done_all; - int idx, todo; - u32 qcp_rd_p; - - /* Work out how many descriptors have been transmitted */ - qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); - - if (qcp_rd_p == tx_ring->qcp_rd_p) - return true; - - todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); - - done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; - todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); - - tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); - - done_pkts = todo; - while (todo--) { - idx = D_IDX(tx_ring, tx_ring->rd_p); - tx_ring->rd_p++; - - done_bytes += tx_ring->txbufs[idx].real_len; - } - - u64_stats_update_begin(&r_vec->tx_sync); - r_vec->tx_bytes += done_bytes; - r_vec->tx_pkts += done_pkts; - u64_stats_update_end(&r_vec->tx_sync); - - WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, - "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", - tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); - - return done_all; -} - -/** - * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers - * @dp: NFP Net data path struct - * @tx_ring: TX ring structure - * - * Assumes that the device is stopped, must be idempotent. - */ -static void -nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) -{ - const skb_frag_t *frag; - struct netdev_queue *nd_q; - - while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) { - struct nfp_net_tx_buf *tx_buf; - struct sk_buff *skb; - int idx, nr_frags; - - idx = D_IDX(tx_ring, tx_ring->rd_p); - tx_buf = &tx_ring->txbufs[idx]; - - skb = tx_ring->txbufs[idx].skb; - nr_frags = skb_shinfo(skb)->nr_frags; - - if (tx_buf->fidx == -1) { - /* unmap head */ - dma_unmap_single(dp->dev, tx_buf->dma_addr, - skb_headlen(skb), DMA_TO_DEVICE); - } else { - /* unmap fragment */ - frag = &skb_shinfo(skb)->frags[tx_buf->fidx]; - dma_unmap_page(dp->dev, tx_buf->dma_addr, - skb_frag_size(frag), DMA_TO_DEVICE); - } - - /* check for last gather fragment */ - if (tx_buf->fidx == nr_frags - 1) - dev_kfree_skb_any(skb); - - tx_buf->dma_addr = 0; - tx_buf->skb = NULL; - tx_buf->fidx = -2; - - tx_ring->qcp_rd_p++; - tx_ring->rd_p++; - } - - memset(tx_ring->txds, 0, tx_ring->size); - tx_ring->wr_p = 0; - tx_ring->rd_p = 0; - tx_ring->qcp_rd_p = 0; - tx_ring->wr_ptr_add = 0; - - if (tx_ring->is_xdp || !dp->netdev) - return; - - nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); - netdev_tx_reset_queue(nd_q); -} - static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct nfp_net *nn = netdev_priv(netdev); @@ -1335,1008 +687,43 @@ static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue) nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue); } -/* Receive processing - */ +/* Receive processing */ static unsigned int -nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp) +nfp_net_calc_fl_bufsz_data(struct nfp_net_dp *dp) { - unsigned int fl_bufsz; + unsigned int fl_bufsz = 0; - fl_bufsz = NFP_NET_RX_BUF_HEADROOM; - fl_bufsz += dp->rx_dma_off; if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) fl_bufsz += NFP_NET_MAX_PREPEND; else fl_bufsz += dp->rx_offset; fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu; - fl_bufsz = SKB_DATA_ALIGN(fl_bufsz); - fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - return fl_bufsz; } -static void -nfp_net_free_frag(void *frag, bool xdp) -{ - if (!xdp) - skb_free_frag(frag); - else - __free_page(virt_to_page(frag)); -} - -/** - * nfp_net_rx_alloc_one() - Allocate and map page frag for RX - * @dp: NFP Net data path struct - * @dma_addr: Pointer to storage for DMA address (output param) - * - * This function will allcate a new page frag, map it for DMA. - * - * Return: allocated page frag or NULL on failure. - */ -static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) -{ - void *frag; - - if (!dp->xdp_prog) { - frag = netdev_alloc_frag(dp->fl_bufsz); - } else { - struct page *page; - - page = alloc_page(GFP_KERNEL); - frag = page ? page_address(page) : NULL; - } - if (!frag) { - nn_dp_warn(dp, "Failed to alloc receive page frag\n"); - return NULL; - } - - *dma_addr = nfp_net_dma_map_rx(dp, frag); - if (dma_mapping_error(dp->dev, *dma_addr)) { - nfp_net_free_frag(frag, dp->xdp_prog); - nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); - return NULL; - } - - return frag; -} - -static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) -{ - void *frag; - - if (!dp->xdp_prog) { - frag = napi_alloc_frag(dp->fl_bufsz); - if (unlikely(!frag)) - return NULL; - } else { - struct page *page; - - page = dev_alloc_page(); - if (unlikely(!page)) - return NULL; - frag = page_address(page); - } - - *dma_addr = nfp_net_dma_map_rx(dp, frag); - if (dma_mapping_error(dp->dev, *dma_addr)) { - nfp_net_free_frag(frag, dp->xdp_prog); - nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); - return NULL; - } - - return frag; -} - -/** - * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings - * @dp: NFP Net data path struct - * @rx_ring: RX ring structure - * @frag: page fragment buffer - * @dma_addr: DMA address of skb mapping - */ -static void nfp_net_rx_give_one(const struct nfp_net_dp *dp, - struct nfp_net_rx_ring *rx_ring, - void *frag, dma_addr_t dma_addr) -{ - unsigned int wr_idx; - - wr_idx = D_IDX(rx_ring, rx_ring->wr_p); - - nfp_net_dma_sync_dev_rx(dp, dma_addr); - - /* Stash SKB and DMA address away */ - rx_ring->rxbufs[wr_idx].frag = frag; - rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; - - /* Fill freelist descriptor */ - rx_ring->rxds[wr_idx].fld.reserved = 0; - rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; - nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld, - dma_addr + dp->rx_dma_off); - - rx_ring->wr_p++; - if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { - /* Update write pointer of the freelist queue. Make - * sure all writes are flushed before telling the hardware. - */ - wmb(); - nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); - } -} - -/** - * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable - * @rx_ring: RX ring structure - * - * Assumes that the device is stopped, must be idempotent. - */ -static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) -{ - unsigned int wr_idx, last_idx; - - /* wr_p == rd_p means ring was never fed FL bufs. RX rings are always - * kept at cnt - 1 FL bufs. - */ - if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0) - return; - - /* Move the empty entry to the end of the list */ - wr_idx = D_IDX(rx_ring, rx_ring->wr_p); - last_idx = rx_ring->cnt - 1; - rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr; - rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag; - rx_ring->rxbufs[last_idx].dma_addr = 0; - rx_ring->rxbufs[last_idx].frag = NULL; - - memset(rx_ring->rxds, 0, rx_ring->size); - rx_ring->wr_p = 0; - rx_ring->rd_p = 0; -} - -/** - * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring - * @dp: NFP Net data path struct - * @rx_ring: RX ring to remove buffers from - * - * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1) - * entries. After device is disabled nfp_net_rx_ring_reset() must be called - * to restore required ring geometry. - */ -static void -nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp, - struct nfp_net_rx_ring *rx_ring) -{ - unsigned int i; - - for (i = 0; i < rx_ring->cnt - 1; i++) { - /* NULL skb can only happen when initial filling of the ring - * fails to allocate enough buffers and calls here to free - * already allocated ones. - */ - if (!rx_ring->rxbufs[i].frag) - continue; - - nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr); - nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog); - rx_ring->rxbufs[i].dma_addr = 0; - rx_ring->rxbufs[i].frag = NULL; - } -} - -/** - * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW) - * @dp: NFP Net data path struct - * @rx_ring: RX ring to remove buffers from - */ -static int -nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp, - struct nfp_net_rx_ring *rx_ring) -{ - struct nfp_net_rx_buf *rxbufs; - unsigned int i; - - rxbufs = rx_ring->rxbufs; - - for (i = 0; i < rx_ring->cnt - 1; i++) { - rxbufs[i].frag = nfp_net_rx_alloc_one(dp, &rxbufs[i].dma_addr); - if (!rxbufs[i].frag) { - nfp_net_rx_ring_bufs_free(dp, rx_ring); - return -ENOMEM; - } - } - - return 0; -} - -/** - * nfp_net_rx_ring_fill_freelist() - Give buffers from the ring to FW - * @dp: NFP Net data path struct - * @rx_ring: RX ring to fill - */ -static void -nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp, - struct nfp_net_rx_ring *rx_ring) -{ - unsigned int i; - - for (i = 0; i < rx_ring->cnt - 1; i++) - nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, - rx_ring->rxbufs[i].dma_addr); -} - -/** - * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors - * @flags: RX descriptor flags field in CPU byte order - */ -static int nfp_net_rx_csum_has_errors(u16 flags) -{ - u16 csum_all_checked, csum_all_ok; - - csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; - csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; - - return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); -} - -/** - * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags - * @dp: NFP Net data path struct - * @r_vec: per-ring structure - * @rxd: Pointer to RX descriptor - * @meta: Parsed metadata prepend - * @skb: Pointer to SKB - */ -static void nfp_net_rx_csum(struct nfp_net_dp *dp, - struct nfp_net_r_vector *r_vec, - struct nfp_net_rx_desc *rxd, - struct nfp_meta_parsed *meta, struct sk_buff *skb) -{ - skb_checksum_none_assert(skb); - - if (!(dp->netdev->features & NETIF_F_RXCSUM)) - return; - - if (meta->csum_type) { - skb->ip_summed = meta->csum_type; - skb->csum = meta->csum; - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->hw_csum_rx_complete++; - u64_stats_update_end(&r_vec->rx_sync); - return; - } - - if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->hw_csum_rx_error++; - u64_stats_update_end(&r_vec->rx_sync); - return; - } - - /* Assume that the firmware will never report inner CSUM_OK unless outer - * L4 headers were successfully parsed. FW will always report zero UDP - * checksum as CSUM_OK. - */ - if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || - rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { - __skb_incr_checksum_unnecessary(skb); - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->hw_csum_rx_ok++; - u64_stats_update_end(&r_vec->rx_sync); - } - - if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || - rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { - __skb_incr_checksum_unnecessary(skb); - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->hw_csum_rx_inner_ok++; - u64_stats_update_end(&r_vec->rx_sync); - } -} - -static void -nfp_net_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, - unsigned int type, __be32 *hash) -{ - if (!(netdev->features & NETIF_F_RXHASH)) - return; - - switch (type) { - case NFP_NET_RSS_IPV4: - case NFP_NET_RSS_IPV6: - case NFP_NET_RSS_IPV6_EX: - meta->hash_type = PKT_HASH_TYPE_L3; - break; - default: - meta->hash_type = PKT_HASH_TYPE_L4; - break; - } - - meta->hash = get_unaligned_be32(hash); -} - -static void -nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta, - void *data, struct nfp_net_rx_desc *rxd) -{ - struct nfp_net_rx_hash *rx_hash = data; - - if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) - return; - - nfp_net_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type), - &rx_hash->hash); -} - -static bool -nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, - void *data, void *pkt, unsigned int pkt_len, int meta_len) -{ - u32 meta_info; - - meta_info = get_unaligned_be32(data); - data += 4; - - while (meta_info) { - switch (meta_info & NFP_NET_META_FIELD_MASK) { - case NFP_NET_META_HASH: - meta_info >>= NFP_NET_META_FIELD_SIZE; - nfp_net_set_hash(netdev, meta, - meta_info & NFP_NET_META_FIELD_MASK, - (__be32 *)data); - data += 4; - break; - case NFP_NET_META_MARK: - meta->mark = get_unaligned_be32(data); - data += 4; - break; - case NFP_NET_META_PORTID: - meta->portid = get_unaligned_be32(data); - data += 4; - break; - case NFP_NET_META_CSUM: - meta->csum_type = CHECKSUM_COMPLETE; - meta->csum = - (__force __wsum)__get_unaligned_cpu32(data); - data += 4; - break; - case NFP_NET_META_RESYNC_INFO: - if (nfp_net_tls_rx_resync_req(netdev, data, pkt, - pkt_len)) - return false; - data += sizeof(struct nfp_net_tls_resync_req); - break; - default: - return true; - } - - meta_info >>= NFP_NET_META_FIELD_SIZE; - } - - return data != pkt; -} - -static void -nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, - struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, - struct sk_buff *skb) -{ - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->rx_drops++; - /* If we have both skb and rxbuf the replacement buffer allocation - * must have failed, count this as an alloc failure. - */ - if (skb && rxbuf) - r_vec->rx_replace_buf_alloc_fail++; - u64_stats_update_end(&r_vec->rx_sync); - - /* skb is build based on the frag, free_skb() would free the frag - * so to be able to reuse it we need an extra ref. - */ - if (skb && rxbuf && skb->head == rxbuf->frag) - page_ref_inc(virt_to_head_page(rxbuf->frag)); - if (rxbuf) - nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); - if (skb) - dev_kfree_skb_any(skb); -} - -static bool -nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, - struct nfp_net_tx_ring *tx_ring, - struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, - unsigned int pkt_len, bool *completed) -{ - unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; - struct nfp_net_tx_buf *txbuf; - struct nfp_net_tx_desc *txd; - int wr_idx; - - /* Reject if xdp_adjust_tail grow packet beyond DMA area */ - if (pkt_len + dma_off > dma_map_sz) - return false; - - if (unlikely(nfp_net_tx_full(tx_ring, 1))) { - if (!*completed) { - nfp_net_xdp_complete(tx_ring); - *completed = true; - } - - if (unlikely(nfp_net_tx_full(tx_ring, 1))) { - nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, - NULL); - return false; - } - } - - wr_idx = D_IDX(tx_ring, tx_ring->wr_p); - - /* Stash the soft descriptor of the head then initialize it */ - txbuf = &tx_ring->txbufs[wr_idx]; - - nfp_net_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr); - - txbuf->frag = rxbuf->frag; - txbuf->dma_addr = rxbuf->dma_addr; - txbuf->fidx = -1; - txbuf->pkt_cnt = 1; - txbuf->real_len = pkt_len; - - dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, - pkt_len, DMA_BIDIRECTIONAL); - - /* Build TX descriptor */ - txd = &tx_ring->txds[wr_idx]; - txd->offset_eop = PCIE_DESC_TX_EOP; - txd->dma_len = cpu_to_le16(pkt_len); - nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off); - txd->data_len = cpu_to_le16(pkt_len); - - txd->flags = 0; - txd->mss = 0; - txd->lso_hdrlen = 0; - - tx_ring->wr_p++; - tx_ring->wr_ptr_add++; - return true; -} - -/** - * nfp_net_rx() - receive up to @budget packets on @rx_ring - * @rx_ring: RX ring to receive from - * @budget: NAPI budget - * - * Note, this function is separated out from the napi poll function to - * more cleanly separate packet receive code from other bookkeeping - * functions performed in the napi poll function. - * - * Return: Number of packets received. - */ -static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) -{ - struct nfp_net_r_vector *r_vec = rx_ring->r_vec; - struct nfp_net_dp *dp = &r_vec->nfp_net->dp; - struct nfp_net_tx_ring *tx_ring; - struct bpf_prog *xdp_prog; - bool xdp_tx_cmpl = false; - unsigned int true_bufsz; - struct sk_buff *skb; - int pkts_polled = 0; - struct xdp_buff xdp; - int idx; - - xdp_prog = READ_ONCE(dp->xdp_prog); - true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; - xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, - &rx_ring->xdp_rxq); - tx_ring = r_vec->xdp_ring; - - while (pkts_polled < budget) { - unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; - struct nfp_net_rx_buf *rxbuf; - struct nfp_net_rx_desc *rxd; - struct nfp_meta_parsed meta; - bool redir_egress = false; - struct net_device *netdev; - dma_addr_t new_dma_addr; - u32 meta_len_xdp = 0; - void *new_frag; - - idx = D_IDX(rx_ring, rx_ring->rd_p); - - rxd = &rx_ring->rxds[idx]; - if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) - break; - - /* Memory barrier to ensure that we won't do other reads - * before the DD bit. - */ - dma_rmb(); - - memset(&meta, 0, sizeof(meta)); - - rx_ring->rd_p++; - pkts_polled++; - - rxbuf = &rx_ring->rxbufs[idx]; - /* < meta_len > - * <-- [rx_offset] --> - * --------------------------------------------------------- - * | [XX] | metadata | packet | XXXX | - * --------------------------------------------------------- - * <---------------- data_len ---------------> - * - * The rx_offset is fixed for all packets, the meta_len can vary - * on a packet by packet basis. If rx_offset is set to zero - * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the - * buffer and is immediately followed by the packet (no [XX]). - */ - meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; - data_len = le16_to_cpu(rxd->rxd.data_len); - pkt_len = data_len - meta_len; - - pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; - if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) - pkt_off += meta_len; - else - pkt_off += dp->rx_offset; - meta_off = pkt_off - meta_len; - - /* Stats update */ - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->rx_pkts++; - r_vec->rx_bytes += pkt_len; - u64_stats_update_end(&r_vec->rx_sync); - - if (unlikely(meta_len > NFP_NET_MAX_PREPEND || - (dp->rx_offset && meta_len > dp->rx_offset))) { - nn_dp_warn(dp, "oversized RX packet metadata %u\n", - meta_len); - nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); - continue; - } - - nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, - data_len); - - if (!dp->chained_metadata_format) { - nfp_net_set_hash_desc(dp->netdev, &meta, - rxbuf->frag + meta_off, rxd); - } else if (meta_len) { - if (unlikely(nfp_net_parse_meta(dp->netdev, &meta, - rxbuf->frag + meta_off, - rxbuf->frag + pkt_off, - pkt_len, meta_len))) { - nn_dp_warn(dp, "invalid RX packet metadata\n"); - nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, - NULL); - continue; - } - } - - if (xdp_prog && !meta.portid) { - void *orig_data = rxbuf->frag + pkt_off; - unsigned int dma_off; - int act; - - xdp_prepare_buff(&xdp, - rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, - pkt_off - NFP_NET_RX_BUF_HEADROOM, - pkt_len, true); - - act = bpf_prog_run_xdp(xdp_prog, &xdp); - - pkt_len = xdp.data_end - xdp.data; - pkt_off += xdp.data - orig_data; - - switch (act) { - case XDP_PASS: - meta_len_xdp = xdp.data - xdp.data_meta; - break; - case XDP_TX: - dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; - if (unlikely(!nfp_net_tx_xdp_buf(dp, rx_ring, - tx_ring, rxbuf, - dma_off, - pkt_len, - &xdp_tx_cmpl))) - trace_xdp_exception(dp->netdev, - xdp_prog, act); - continue; - default: - bpf_warn_invalid_xdp_action(act); - fallthrough; - case XDP_ABORTED: - trace_xdp_exception(dp->netdev, xdp_prog, act); - fallthrough; - case XDP_DROP: - nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, - rxbuf->dma_addr); - continue; - } - } - - if (likely(!meta.portid)) { - netdev = dp->netdev; - } else if (meta.portid == NFP_META_PORT_ID_CTRL) { - struct nfp_net *nn = netdev_priv(dp->netdev); - - nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, - pkt_len); - nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, - rxbuf->dma_addr); - continue; - } else { - struct nfp_net *nn; - - nn = netdev_priv(dp->netdev); - netdev = nfp_app_dev_get(nn->app, meta.portid, - &redir_egress); - if (unlikely(!netdev)) { - nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, - NULL); - continue; - } - - if (nfp_netdev_is_nfp_repr(netdev)) - nfp_repr_inc_rx_stats(netdev, pkt_len); - } - - skb = build_skb(rxbuf->frag, true_bufsz); - if (unlikely(!skb)) { - nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); - continue; - } - new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr); - if (unlikely(!new_frag)) { - nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); - continue; - } - - nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); - - nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); - - skb_reserve(skb, pkt_off); - skb_put(skb, pkt_len); - - skb->mark = meta.mark; - skb_set_hash(skb, meta.hash, meta.hash_type); - - skb_record_rx_queue(skb, rx_ring->idx); - skb->protocol = eth_type_trans(skb, netdev); - - nfp_net_rx_csum(dp, r_vec, rxd, &meta, skb); - -#ifdef CONFIG_TLS_DEVICE - if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) { - skb->decrypted = true; - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->hw_tls_rx++; - u64_stats_update_end(&r_vec->rx_sync); - } -#endif - - if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), - le16_to_cpu(rxd->rxd.vlan)); - if (meta_len_xdp) - skb_metadata_set(skb, meta_len_xdp); - - if (likely(!redir_egress)) { - napi_gro_receive(&rx_ring->r_vec->napi, skb); - } else { - skb->dev = netdev; - skb_reset_network_header(skb); - __skb_push(skb, ETH_HLEN); - dev_queue_xmit(skb); - } - } - - if (xdp_prog) { - if (tx_ring->wr_ptr_add) - nfp_net_tx_xmit_more_flush(tx_ring); - else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && - !xdp_tx_cmpl) - if (!nfp_net_xdp_complete(tx_ring)) - pkts_polled = budget; - } - - return pkts_polled; -} - -/** - * nfp_net_poll() - napi poll function - * @napi: NAPI structure - * @budget: NAPI budget - * - * Return: number of packets polled. - */ -static int nfp_net_poll(struct napi_struct *napi, int budget) -{ - struct nfp_net_r_vector *r_vec = - container_of(napi, struct nfp_net_r_vector, napi); - unsigned int pkts_polled = 0; - - if (r_vec->tx_ring) - nfp_net_tx_complete(r_vec->tx_ring, budget); - if (r_vec->rx_ring) - pkts_polled = nfp_net_rx(r_vec->rx_ring, budget); - - if (pkts_polled < budget) - if (napi_complete_done(napi, pkts_polled)) - nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); - - if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { - struct dim_sample dim_sample = {}; - unsigned int start; - u64 pkts, bytes; - - do { - start = u64_stats_fetch_begin(&r_vec->rx_sync); - pkts = r_vec->rx_pkts; - bytes = r_vec->rx_bytes; - } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); - - dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); - net_dim(&r_vec->rx_dim, dim_sample); - } - - if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { - struct dim_sample dim_sample = {}; - unsigned int start; - u64 pkts, bytes; - - do { - start = u64_stats_fetch_begin(&r_vec->tx_sync); - pkts = r_vec->tx_pkts; - bytes = r_vec->tx_bytes; - } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); - - dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); - net_dim(&r_vec->tx_dim, dim_sample); - } - - return pkts_polled; -} - -/* Control device data path - */ - -static bool -nfp_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, - struct sk_buff *skb, bool old) -{ - unsigned int real_len = skb->len, meta_len = 0; - struct nfp_net_tx_ring *tx_ring; - struct nfp_net_tx_buf *txbuf; - struct nfp_net_tx_desc *txd; - struct nfp_net_dp *dp; - dma_addr_t dma_addr; - int wr_idx; - - dp = &r_vec->nfp_net->dp; - tx_ring = r_vec->tx_ring; - - if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { - nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); - goto err_free; - } - - if (unlikely(nfp_net_tx_full(tx_ring, 1))) { - u64_stats_update_begin(&r_vec->tx_sync); - r_vec->tx_busy++; - u64_stats_update_end(&r_vec->tx_sync); - if (!old) - __skb_queue_tail(&r_vec->queue, skb); - else - __skb_queue_head(&r_vec->queue, skb); - return true; - } - - if (nfp_app_ctrl_has_meta(nn->app)) { - if (unlikely(skb_headroom(skb) < 8)) { - nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); - goto err_free; - } - meta_len = 8; - put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); - put_unaligned_be32(NFP_NET_META_PORTID, skb_push(skb, 4)); - } - - /* Start with the head skbuf */ - dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), - DMA_TO_DEVICE); - if (dma_mapping_error(dp->dev, dma_addr)) - goto err_dma_warn; - - wr_idx = D_IDX(tx_ring, tx_ring->wr_p); - - /* Stash the soft descriptor of the head then initialize it */ - txbuf = &tx_ring->txbufs[wr_idx]; - txbuf->skb = skb; - txbuf->dma_addr = dma_addr; - txbuf->fidx = -1; - txbuf->pkt_cnt = 1; - txbuf->real_len = real_len; - - /* Build TX descriptor */ - txd = &tx_ring->txds[wr_idx]; - txd->offset_eop = meta_len | PCIE_DESC_TX_EOP; - txd->dma_len = cpu_to_le16(skb_headlen(skb)); - nfp_desc_set_dma_addr(txd, dma_addr); - txd->data_len = cpu_to_le16(skb->len); - - txd->flags = 0; - txd->mss = 0; - txd->lso_hdrlen = 0; - - tx_ring->wr_p++; - tx_ring->wr_ptr_add++; - nfp_net_tx_xmit_more_flush(tx_ring); - - return false; - -err_dma_warn: - nn_dp_warn(dp, "Failed to DMA map TX CTRL buffer\n"); -err_free: - u64_stats_update_begin(&r_vec->tx_sync); - r_vec->tx_errors++; - u64_stats_update_end(&r_vec->tx_sync); - dev_kfree_skb_any(skb); - return false; -} - -bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb) -{ - struct nfp_net_r_vector *r_vec = &nn->r_vecs[0]; - - return nfp_ctrl_tx_one(nn, r_vec, skb, false); -} - -bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb) -{ - struct nfp_net_r_vector *r_vec = &nn->r_vecs[0]; - bool ret; - - spin_lock_bh(&r_vec->lock); - ret = nfp_ctrl_tx_one(nn, r_vec, skb, false); - spin_unlock_bh(&r_vec->lock); - - return ret; -} - -static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) +static unsigned int nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp) { - struct sk_buff *skb; - - while ((skb = __skb_dequeue(&r_vec->queue))) - if (nfp_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) - return; -} - -static bool -nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) -{ - u32 meta_type, meta_tag; - - if (!nfp_app_ctrl_has_meta(nn->app)) - return !meta_len; - - if (meta_len != 8) - return false; - - meta_type = get_unaligned_be32(data); - meta_tag = get_unaligned_be32(data + 4); - - return (meta_type == NFP_NET_META_PORTID && - meta_tag == NFP_META_PORT_ID_CTRL); -} - -static bool -nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, - struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) -{ - unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; - struct nfp_net_rx_buf *rxbuf; - struct nfp_net_rx_desc *rxd; - dma_addr_t new_dma_addr; - struct sk_buff *skb; - void *new_frag; - int idx; - - idx = D_IDX(rx_ring, rx_ring->rd_p); - - rxd = &rx_ring->rxds[idx]; - if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) - return false; - - /* Memory barrier to ensure that we won't do other reads - * before the DD bit. - */ - dma_rmb(); - - rx_ring->rd_p++; - - rxbuf = &rx_ring->rxbufs[idx]; - meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; - data_len = le16_to_cpu(rxd->rxd.data_len); - pkt_len = data_len - meta_len; - - pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; - if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) - pkt_off += meta_len; - else - pkt_off += dp->rx_offset; - meta_off = pkt_off - meta_len; - - /* Stats update */ - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->rx_pkts++; - r_vec->rx_bytes += pkt_len; - u64_stats_update_end(&r_vec->rx_sync); - - nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); - - if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { - nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", - meta_len); - nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); - return true; - } - - skb = build_skb(rxbuf->frag, dp->fl_bufsz); - if (unlikely(!skb)) { - nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); - return true; - } - new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr); - if (unlikely(!new_frag)) { - nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); - return true; - } - - nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); - - nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); - - skb_reserve(skb, pkt_off); - skb_put(skb, pkt_len); - - nfp_app_ctrl_rx(nn->app, skb); - - return true; -} + unsigned int fl_bufsz; -static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) -{ - struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; - struct nfp_net *nn = r_vec->nfp_net; - struct nfp_net_dp *dp = &nn->dp; - unsigned int budget = 512; + fl_bufsz = NFP_NET_RX_BUF_HEADROOM; + fl_bufsz += dp->rx_dma_off; + fl_bufsz += nfp_net_calc_fl_bufsz_data(dp); - while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) - continue; + fl_bufsz = SKB_DATA_ALIGN(fl_bufsz); + fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - return budget; + return fl_bufsz; } -static void nfp_ctrl_poll(struct tasklet_struct *t) +static unsigned int nfp_net_calc_fl_bufsz_xsk(struct nfp_net_dp *dp) { - struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); + unsigned int fl_bufsz; - spin_lock(&r_vec->lock); - nfp_net_tx_complete(r_vec->tx_ring, 0); - __nfp_ctrl_tx_queued(r_vec); - spin_unlock(&r_vec->lock); + fl_bufsz = XDP_PACKET_HEADROOM; + fl_bufsz += nfp_net_calc_fl_bufsz_data(dp); - if (nfp_ctrl_rx(r_vec)) { - nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); - } else { - tasklet_schedule(&r_vec->tasklet); - nn_dp_warn(&r_vec->nfp_net->dp, - "control message budget exceeded!\n"); - } + return fl_bufsz; } /* Setup and Configuration @@ -2371,7 +758,7 @@ static void nfp_net_vecs_init(struct nfp_net *nn) __skb_queue_head_init(&r_vec->queue); spin_lock_init(&r_vec->lock); - tasklet_setup(&r_vec->tasklet, nfp_ctrl_poll); + tasklet_setup(&r_vec->tasklet, nn->dp.ops->ctrl_poll); tasklet_disable(&r_vec->tasklet); } @@ -2379,263 +766,23 @@ static void nfp_net_vecs_init(struct nfp_net *nn) } } -/** - * nfp_net_tx_ring_free() - Free resources allocated to a TX ring - * @tx_ring: TX ring to free - */ -static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring) -{ - struct nfp_net_r_vector *r_vec = tx_ring->r_vec; - struct nfp_net_dp *dp = &r_vec->nfp_net->dp; - - kvfree(tx_ring->txbufs); - - if (tx_ring->txds) - dma_free_coherent(dp->dev, tx_ring->size, - tx_ring->txds, tx_ring->dma); - - tx_ring->cnt = 0; - tx_ring->txbufs = NULL; - tx_ring->txds = NULL; - tx_ring->dma = 0; - tx_ring->size = 0; -} - -/** - * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring - * @dp: NFP Net data path struct - * @tx_ring: TX Ring structure to allocate - * - * Return: 0 on success, negative errno otherwise. - */ -static int -nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) -{ - struct nfp_net_r_vector *r_vec = tx_ring->r_vec; - - tx_ring->cnt = dp->txd_cnt; - - tx_ring->size = array_size(tx_ring->cnt, sizeof(*tx_ring->txds)); - tx_ring->txds = dma_alloc_coherent(dp->dev, tx_ring->size, - &tx_ring->dma, - GFP_KERNEL | __GFP_NOWARN); - if (!tx_ring->txds) { - netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", - tx_ring->cnt); - goto err_alloc; - } - - tx_ring->txbufs = kvcalloc(tx_ring->cnt, sizeof(*tx_ring->txbufs), - GFP_KERNEL); - if (!tx_ring->txbufs) - goto err_alloc; - - if (!tx_ring->is_xdp && dp->netdev) - netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask, - tx_ring->idx); - - return 0; - -err_alloc: - nfp_net_tx_ring_free(tx_ring); - return -ENOMEM; -} - static void -nfp_net_tx_ring_bufs_free(struct nfp_net_dp *dp, - struct nfp_net_tx_ring *tx_ring) -{ - unsigned int i; - - if (!tx_ring->is_xdp) - return; - - for (i = 0; i < tx_ring->cnt; i++) { - if (!tx_ring->txbufs[i].frag) - return; - - nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[i].dma_addr); - __free_page(virt_to_page(tx_ring->txbufs[i].frag)); - } -} - -static int -nfp_net_tx_ring_bufs_alloc(struct nfp_net_dp *dp, - struct nfp_net_tx_ring *tx_ring) -{ - struct nfp_net_tx_buf *txbufs = tx_ring->txbufs; - unsigned int i; - - if (!tx_ring->is_xdp) - return 0; - - for (i = 0; i < tx_ring->cnt; i++) { - txbufs[i].frag = nfp_net_rx_alloc_one(dp, &txbufs[i].dma_addr); - if (!txbufs[i].frag) { - nfp_net_tx_ring_bufs_free(dp, tx_ring); - return -ENOMEM; - } - } - - return 0; -} - -static int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp) -{ - unsigned int r; - - dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings), - GFP_KERNEL); - if (!dp->tx_rings) - return -ENOMEM; - - for (r = 0; r < dp->num_tx_rings; r++) { - int bias = 0; - - if (r >= dp->num_stack_tx_rings) - bias = dp->num_stack_tx_rings; - - nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias], - r, bias); - - if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r])) - goto err_free_prev; - - if (nfp_net_tx_ring_bufs_alloc(dp, &dp->tx_rings[r])) - goto err_free_ring; - } - - return 0; - -err_free_prev: - while (r--) { - nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]); -err_free_ring: - nfp_net_tx_ring_free(&dp->tx_rings[r]); - } - kfree(dp->tx_rings); - return -ENOMEM; -} - -static void nfp_net_tx_rings_free(struct nfp_net_dp *dp) +nfp_net_napi_add(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, int idx) { - unsigned int r; - - for (r = 0; r < dp->num_tx_rings; r++) { - nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]); - nfp_net_tx_ring_free(&dp->tx_rings[r]); - } - - kfree(dp->tx_rings); -} - -/** - * nfp_net_rx_ring_free() - Free resources allocated to a RX ring - * @rx_ring: RX ring to free - */ -static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) -{ - struct nfp_net_r_vector *r_vec = rx_ring->r_vec; - struct nfp_net_dp *dp = &r_vec->nfp_net->dp; - if (dp->netdev) - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - kvfree(rx_ring->rxbufs); - - if (rx_ring->rxds) - dma_free_coherent(dp->dev, rx_ring->size, - rx_ring->rxds, rx_ring->dma); - - rx_ring->cnt = 0; - rx_ring->rxbufs = NULL; - rx_ring->rxds = NULL; - rx_ring->dma = 0; - rx_ring->size = 0; -} - -/** - * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring - * @dp: NFP Net data path struct - * @rx_ring: RX ring to allocate - * - * Return: 0 on success, negative errno otherwise. - */ -static int -nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) -{ - int err; - - if (dp->netdev) { - err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, - rx_ring->idx, rx_ring->r_vec->napi.napi_id); - if (err < 0) - return err; - } - - rx_ring->cnt = dp->rxd_cnt; - rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds)); - rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size, - &rx_ring->dma, - GFP_KERNEL | __GFP_NOWARN); - if (!rx_ring->rxds) { - netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", - rx_ring->cnt); - goto err_alloc; - } - - rx_ring->rxbufs = kvcalloc(rx_ring->cnt, sizeof(*rx_ring->rxbufs), - GFP_KERNEL); - if (!rx_ring->rxbufs) - goto err_alloc; - - return 0; - -err_alloc: - nfp_net_rx_ring_free(rx_ring); - return -ENOMEM; -} - -static int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp) -{ - unsigned int r; - - dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings), - GFP_KERNEL); - if (!dp->rx_rings) - return -ENOMEM; - - for (r = 0; r < dp->num_rx_rings; r++) { - nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r); - - if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r])) - goto err_free_prev; - - if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r])) - goto err_free_ring; - } - - return 0; - -err_free_prev: - while (r--) { - nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]); -err_free_ring: - nfp_net_rx_ring_free(&dp->rx_rings[r]); - } - kfree(dp->rx_rings); - return -ENOMEM; + netif_napi_add(dp->netdev, &r_vec->napi, + nfp_net_has_xsk_pool_slow(dp, idx) ? dp->ops->xsk_poll : dp->ops->poll); + else + tasklet_enable(&r_vec->tasklet); } -static void nfp_net_rx_rings_free(struct nfp_net_dp *dp) +static void +nfp_net_napi_del(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec) { - unsigned int r; - - for (r = 0; r < dp->num_rx_rings; r++) { - nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]); - nfp_net_rx_ring_free(&dp->rx_rings[r]); - } - - kfree(dp->rx_rings); + if (dp->netdev) + netif_napi_del(&r_vec->napi); + else + tasklet_disable(&r_vec->tasklet); } static void @@ -2648,6 +795,17 @@ nfp_net_vector_assign_rings(struct nfp_net_dp *dp, r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ? &dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL; + + if (nfp_net_has_xsk_pool_slow(dp, idx) || r_vec->xsk_pool) { + r_vec->xsk_pool = dp->xdp_prog ? dp->xsk_pools[idx] : NULL; + + if (r_vec->xsk_pool) + xsk_pool_set_rxq_info(r_vec->xsk_pool, + &r_vec->rx_ring->xdp_rxq); + + nfp_net_napi_del(dp, r_vec); + nfp_net_napi_add(dp, r_vec, idx); + } } static int @@ -2656,23 +814,14 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, { int err; - /* Setup NAPI */ - if (nn->dp.netdev) - netif_napi_add(nn->dp.netdev, &r_vec->napi, - nfp_net_poll, NAPI_POLL_WEIGHT); - else - tasklet_enable(&r_vec->tasklet); + nfp_net_napi_add(&nn->dp, r_vec, idx); snprintf(r_vec->name, sizeof(r_vec->name), "%s-rxtx-%d", nfp_net_name(nn), idx); err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name, r_vec); if (err) { - if (nn->dp.netdev) - netif_napi_del(&r_vec->napi); - else - tasklet_disable(&r_vec->tasklet); - + nfp_net_napi_del(&nn->dp, r_vec); nn_err(nn, "Error requesting IRQ %d\n", r_vec->irq_vector); return err; } @@ -2690,11 +839,7 @@ static void nfp_net_cleanup_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec) { irq_set_affinity_hint(r_vec->irq_vector, NULL); - if (nn->dp.netdev) - netif_napi_del(&r_vec->napi); - else - tasklet_disable(&r_vec->tasklet); - + nfp_net_napi_del(&nn->dp, r_vec); free_irq(r_vec->irq_vector, r_vec); } @@ -2768,17 +913,6 @@ static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *addr) nn_writew(nn, NFP_NET_CFG_MACADDR + 6, get_unaligned_be16(addr + 4)); } -static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx) -{ - nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), 0); - nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), 0); - nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0); - - nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0); - nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0); - nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0); -} - /** * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP * @nn: NFP Net device to reconfigure @@ -2808,8 +942,11 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn) if (err) nn_err(nn, "Could not disable device: %d\n", err); - for (r = 0; r < nn->dp.num_rx_rings; r++) + for (r = 0; r < nn->dp.num_rx_rings; r++) { nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]); + if (nfp_net_has_xsk_pool_slow(&nn->dp, nn->dp.rx_rings[r].idx)) + nfp_net_xsk_rx_bufs_free(&nn->dp.rx_rings[r]); + } for (r = 0; r < nn->dp.num_tx_rings; r++) nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]); for (r = 0; r < nn->dp.num_r_vecs; r++) @@ -2818,25 +955,6 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn) nn->dp.ctrl = new_ctrl; } -static void -nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn, - struct nfp_net_rx_ring *rx_ring, unsigned int idx) -{ - /* Write the DMA address, size and MSI-X info to the device */ - nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma); - nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt)); - nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_entry); -} - -static void -nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn, - struct nfp_net_tx_ring *tx_ring, unsigned int idx) -{ - nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma); - nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt)); - nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry); -} - /** * nfp_net_set_config_and_enable() - Write control BAR and enable NFP * @nn: NFP Net device to reconfigure @@ -2866,11 +984,11 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn) for (r = 0; r < nn->dp.num_rx_rings; r++) nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r); - nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->dp.num_tx_rings == 64 ? - 0xffffffffffffffffULL : ((u64)1 << nn->dp.num_tx_rings) - 1); + nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, + U64_MAX >> (64 - nn->dp.num_tx_rings)); - nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->dp.num_rx_rings == 64 ? - 0xffffffffffffffffULL : ((u64)1 << nn->dp.num_rx_rings) - 1); + nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, + U64_MAX >> (64 - nn->dp.num_rx_rings)); if (nn->dp.netdev) nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr); @@ -3296,20 +1414,39 @@ struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn) *new = nn->dp; + new->xsk_pools = kmemdup(new->xsk_pools, + array_size(nn->max_r_vecs, + sizeof(new->xsk_pools)), + GFP_KERNEL); + if (!new->xsk_pools) { + kfree(new); + return NULL; + } + /* Clear things which need to be recomputed */ new->fl_bufsz = 0; new->tx_rings = NULL; new->rx_rings = NULL; new->num_r_vecs = 0; new->num_stack_tx_rings = 0; + new->txrwb = NULL; + new->txrwb_dma = 0; return new; } +static void nfp_net_free_dp(struct nfp_net_dp *dp) +{ + kfree(dp->xsk_pools); + kfree(dp); +} + static int nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp, struct netlink_ext_ack *extack) { + unsigned int r, xsk_min_fl_bufsz; + /* XDP-enabled tests */ if (!dp->xdp_prog) return 0; @@ -3322,6 +1459,18 @@ nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp, return -EINVAL; } + xsk_min_fl_bufsz = nfp_net_calc_fl_bufsz_xsk(dp); + for (r = 0; r < nn->max_r_vecs; r++) { + if (!dp->xsk_pools[r]) + continue; + + if (xsk_pool_get_rx_frame_size(dp->xsk_pools[r]) < xsk_min_fl_bufsz) { + NL_SET_ERR_MSG_MOD(extack, + "XSK buffer pool chunk size too small"); + return -EINVAL; + } + } + return 0; } @@ -3389,7 +1538,7 @@ int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp, nfp_net_open_stack(nn); exit_free_dp: - kfree(dp); + nfp_net_free_dp(dp); return err; @@ -3398,7 +1547,7 @@ err_free_rx: err_cleanup_vecs: for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--) nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); - kfree(dp); + nfp_net_free_dp(dp); return err; } @@ -3482,21 +1631,21 @@ static void nfp_net_stat64(struct net_device *netdev, unsigned int start; do { - start = u64_stats_fetch_begin(&r_vec->rx_sync); + start = u64_stats_fetch_begin_irq(&r_vec->rx_sync); data[0] = r_vec->rx_pkts; data[1] = r_vec->rx_bytes; data[2] = r_vec->rx_drops; - } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + } while (u64_stats_fetch_retry_irq(&r_vec->rx_sync, start)); stats->rx_packets += data[0]; stats->rx_bytes += data[1]; stats->rx_dropped += data[2]; do { - start = u64_stats_fetch_begin(&r_vec->tx_sync); + start = u64_stats_fetch_begin_irq(&r_vec->tx_sync); data[0] = r_vec->tx_pkts; data[1] = r_vec->tx_bytes; data[2] = r_vec->tx_errors; - } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + } while (u64_stats_fetch_retry_irq(&r_vec->tx_sync, start)); stats->tx_packets += data[0]; stats->tx_bytes += data[1]; stats->tx_errors += data[2]; @@ -3547,16 +1696,18 @@ static int nfp_net_set_features(struct net_device *netdev, if (changed & NETIF_F_HW_VLAN_CTAG_RX) { if (features & NETIF_F_HW_VLAN_CTAG_RX) - new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN; + new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXVLAN_V2 ?: + NFP_NET_CFG_CTRL_RXVLAN; else - new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN; + new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN_ANY; } if (changed & NETIF_F_HW_VLAN_CTAG_TX) { if (features & NETIF_F_HW_VLAN_CTAG_TX) - new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN; + new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_TXVLAN_V2 ?: + NFP_NET_CFG_CTRL_TXVLAN; else - new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN; + new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN_ANY; } if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) { @@ -3566,6 +1717,13 @@ static int nfp_net_set_features(struct net_device *netdev, new_ctrl &= ~NFP_NET_CFG_CTRL_CTAG_FILTER; } + if (changed & NETIF_F_HW_VLAN_STAG_RX) { + if (features & NETIF_F_HW_VLAN_STAG_RX) + new_ctrl |= NFP_NET_CFG_CTRL_RXQINQ; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_RXQINQ; + } + if (changed & NETIF_F_SG) { if (features & NETIF_F_SG) new_ctrl |= NFP_NET_CFG_CTRL_GATHER; @@ -3595,6 +1753,27 @@ static int nfp_net_set_features(struct net_device *netdev, } static netdev_features_t +nfp_net_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + if ((features & NETIF_F_HW_VLAN_CTAG_RX) && + (features & NETIF_F_HW_VLAN_STAG_RX)) { + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) { + features &= ~NETIF_F_HW_VLAN_CTAG_RX; + netdev->wanted_features &= ~NETIF_F_HW_VLAN_CTAG_RX; + netdev_warn(netdev, + "S-tag and C-tag stripping can't be enabled at the same time. Enabling S-tag stripping and disabling C-tag stripping\n"); + } else if (netdev->features & NETIF_F_HW_VLAN_STAG_RX) { + features &= ~NETIF_F_HW_VLAN_STAG_RX; + netdev->wanted_features &= ~NETIF_F_HW_VLAN_STAG_RX; + netdev_warn(netdev, + "S-tag and C-tag stripping can't be enabled at the same time. Enabling C-tag stripping and disabling S-tag stripping\n"); + } + } + return features; +} + +static netdev_features_t nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features) { @@ -3610,8 +1789,7 @@ nfp_net_features_check(struct sk_buff *skb, struct net_device *dev, if (skb_is_gso(skb)) { u32 hdrlen; - hdrlen = skb_inner_transport_header(skb) - skb->data + - inner_tcp_hdrlen(skb); + hdrlen = skb_inner_tcp_all_headers(skb); /* Assume worst case scenario of having longest possible * metadata prepend - 8B @@ -3716,6 +1894,9 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) return nfp_net_xdp_setup_drv(nn, xdp); case XDP_SETUP_PROG_HW: return nfp_net_xdp_setup_hw(nn, xdp); + case XDP_SETUP_XSK_POOL: + return nfp_net_xsk_setup_pool(netdev, xdp->xsk.pool, + xdp->xsk.queue_id); default: return nfp_app_bpf(nn->app, nn, xdp); } @@ -3742,7 +1923,70 @@ static int nfp_net_set_mac_address(struct net_device *netdev, void *addr) return 0; } -const struct net_device_ops nfp_net_netdev_ops = { +static int nfp_net_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) +{ + struct nfp_net *nn = netdev_priv(dev); + u16 mode; + + if (!(nn->cap & NFP_NET_CFG_CTRL_VEPA)) + return -EOPNOTSUPP; + + mode = (nn->dp.ctrl & NFP_NET_CFG_CTRL_VEPA) ? + BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB; + + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, + nlflags, filter_mask, NULL); +} + +static int nfp_net_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, + u16 flags, struct netlink_ext_ack *extack) +{ + struct nfp_net *nn = netdev_priv(dev); + struct nlattr *attr, *br_spec; + int rem, err; + u32 new_ctrl; + u16 mode; + + if (!(nn->cap & NFP_NET_CFG_CTRL_VEPA)) + return -EOPNOTSUPP; + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; + + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + if (nla_len(attr) < sizeof(mode)) + return -EINVAL; + + new_ctrl = nn->dp.ctrl; + mode = nla_get_u16(attr); + if (mode == BRIDGE_MODE_VEPA) + new_ctrl |= NFP_NET_CFG_CTRL_VEPA; + else if (mode == BRIDGE_MODE_VEB) + new_ctrl &= ~NFP_NET_CFG_CTRL_VEPA; + else + return -EOPNOTSUPP; + + if (new_ctrl == nn->dp.ctrl) + return 0; + + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (!err) + nn->dp.ctrl = new_ctrl; + + return err; + } + + return -EINVAL; +} + +const struct net_device_ops nfp_nfd3_netdev_ops = { .ndo_init = nfp_app_ndo_init, .ndo_uninit = nfp_app_ndo_uninit, .ndo_open = nfp_net_netdev_open, @@ -3753,6 +1997,7 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_vlan_rx_kill_vid = nfp_net_vlan_rx_kill_vid, .ndo_set_vf_mac = nfp_app_set_vf_mac, .ndo_set_vf_vlan = nfp_app_set_vf_vlan, + .ndo_set_vf_rate = nfp_app_set_vf_rate, .ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk, .ndo_set_vf_trust = nfp_app_set_vf_trust, .ndo_get_vf_config = nfp_app_get_vf_config, @@ -3763,10 +2008,45 @@ const struct net_device_ops nfp_net_netdev_ops = { .ndo_change_mtu = nfp_net_change_mtu, .ndo_set_mac_address = nfp_net_set_mac_address, .ndo_set_features = nfp_net_set_features, + .ndo_fix_features = nfp_net_fix_features, .ndo_features_check = nfp_net_features_check, .ndo_get_phys_port_name = nfp_net_get_phys_port_name, .ndo_bpf = nfp_net_xdp, + .ndo_xsk_wakeup = nfp_net_xsk_wakeup, .ndo_get_devlink_port = nfp_devlink_get_devlink_port, + .ndo_bridge_getlink = nfp_net_bridge_getlink, + .ndo_bridge_setlink = nfp_net_bridge_setlink, +}; + +const struct net_device_ops nfp_nfdk_netdev_ops = { + .ndo_init = nfp_app_ndo_init, + .ndo_uninit = nfp_app_ndo_uninit, + .ndo_open = nfp_net_netdev_open, + .ndo_stop = nfp_net_netdev_close, + .ndo_start_xmit = nfp_net_tx, + .ndo_get_stats64 = nfp_net_stat64, + .ndo_vlan_rx_add_vid = nfp_net_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = nfp_net_vlan_rx_kill_vid, + .ndo_set_vf_mac = nfp_app_set_vf_mac, + .ndo_set_vf_vlan = nfp_app_set_vf_vlan, + .ndo_set_vf_rate = nfp_app_set_vf_rate, + .ndo_set_vf_spoofchk = nfp_app_set_vf_spoofchk, + .ndo_set_vf_trust = nfp_app_set_vf_trust, + .ndo_get_vf_config = nfp_app_get_vf_config, + .ndo_set_vf_link_state = nfp_app_set_vf_link_state, + .ndo_setup_tc = nfp_port_setup_tc, + .ndo_tx_timeout = nfp_net_tx_timeout, + .ndo_set_rx_mode = nfp_net_set_rx_mode, + .ndo_change_mtu = nfp_net_change_mtu, + .ndo_set_mac_address = nfp_net_set_mac_address, + .ndo_set_features = nfp_net_set_features, + .ndo_fix_features = nfp_net_fix_features, + .ndo_features_check = nfp_net_features_check, + .ndo_get_phys_port_name = nfp_net_get_phys_port_name, + .ndo_bpf = nfp_net_xdp, + .ndo_get_devlink_port = nfp_devlink_get_devlink_port, + .ndo_bridge_getlink = nfp_net_bridge_getlink, + .ndo_bridge_setlink = nfp_net_bridge_setlink, }; static int nfp_udp_tunnel_sync(struct net_device *netdev, unsigned int table) @@ -3806,15 +2086,15 @@ static const struct udp_tunnel_nic_info nfp_udp_tunnels = { */ void nfp_net_info(struct nfp_net *nn) { - nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", + nn_info(nn, "NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", nn->dp.is_vf ? "VF " : "", nn->dp.num_tx_rings, nn->max_tx_rings, nn->dp.num_rx_rings, nn->max_rx_rings); nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n", - nn->fw_ver.resv, nn->fw_ver.class, + nn->fw_ver.extend, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor, nn->max_mtu); - nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", nn->cap, nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", @@ -3823,6 +2103,9 @@ void nfp_net_info(struct nfp_net *nn) nn->cap & NFP_NET_CFG_CTRL_TXCSUM ? "TXCSUM " : "", nn->cap & NFP_NET_CFG_CTRL_RXVLAN ? "RXVLAN " : "", nn->cap & NFP_NET_CFG_CTRL_TXVLAN ? "TXVLAN " : "", + nn->cap & NFP_NET_CFG_CTRL_RXQINQ ? "RXQINQ " : "", + nn->cap & NFP_NET_CFG_CTRL_RXVLAN_V2 ? "RXVLANv2 " : "", + nn->cap & NFP_NET_CFG_CTRL_TXVLAN_V2 ? "TXVLANv2 " : "", nn->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "", nn->cap & NFP_NET_CFG_CTRL_GATHER ? "GATHER " : "", nn->cap & NFP_NET_CFG_CTRL_LSO ? "TSO1 " : "", @@ -3832,6 +2115,8 @@ void nfp_net_info(struct nfp_net *nn) nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER ? "CTAG_FILTER " : "", nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", + nn->cap & NFP_NET_CFG_CTRL_TXRWB ? "TXRWB " : "", + nn->cap & NFP_NET_CFG_CTRL_VEPA ? "VEPA " : "", nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "", nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ? @@ -3843,6 +2128,7 @@ void nfp_net_info(struct nfp_net *nn) /** * nfp_net_alloc() - Allocate netdev and related structure * @pdev: PCI device + * @dev_info: NFP ASIC params * @ctrl_bar: PCI IOMEM with vNIC config memory * @needs_netdev: Whether to allocate a netdev for this vNIC * @max_tx_rings: Maximum number of TX rings supported by device @@ -3855,9 +2141,11 @@ void nfp_net_info(struct nfp_net *nn) * Return: NFP Net device structure, or ERR_PTR on error. */ struct nfp_net * -nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev, +nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info, + void __iomem *ctrl_bar, bool needs_netdev, unsigned int max_tx_rings, unsigned int max_rx_rings) { + u64 dma_mask = dma_get_mask(&pdev->dev); struct nfp_net *nn; int err; @@ -3880,7 +2168,36 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev, nn->dp.dev = &pdev->dev; nn->dp.ctrl_bar = ctrl_bar; + nn->dev_info = dev_info; nn->pdev = pdev; + nfp_net_get_fw_version(&nn->fw_ver, ctrl_bar); + + switch (FIELD_GET(NFP_NET_CFG_VERSION_DP_MASK, nn->fw_ver.extend)) { + case NFP_NET_CFG_VERSION_DP_NFD3: + nn->dp.ops = &nfp_nfd3_ops; + break; + case NFP_NET_CFG_VERSION_DP_NFDK: + if (nn->fw_ver.major < 5) { + dev_err(&pdev->dev, + "NFDK must use ABI 5 or newer, found: %d\n", + nn->fw_ver.major); + err = -EINVAL; + goto err_free_nn; + } + nn->dp.ops = &nfp_nfdk_ops; + break; + default: + err = -EINVAL; + goto err_free_nn; + } + + if ((dma_mask & nn->dp.ops->dma_mask) != dma_mask) { + dev_err(&pdev->dev, + "DMA mask of loaded firmware: %llx, required DMA mask: %llx\n", + nn->dp.ops->dma_mask, dma_mask); + err = -EINVAL; + goto err_free_nn; + } nn->max_tx_rings = max_tx_rings; nn->max_rx_rings = max_rx_rings; @@ -3893,6 +2210,14 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev, nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings); nn->dp.num_r_vecs = min_t(unsigned int, nn->dp.num_r_vecs, num_online_cpus()); + nn->max_r_vecs = nn->dp.num_r_vecs; + + nn->dp.xsk_pools = kcalloc(nn->max_r_vecs, sizeof(nn->dp.xsk_pools), + GFP_KERNEL); + if (!nn->dp.xsk_pools) { + err = -ENOMEM; + goto err_free_nn; + } nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT; nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; @@ -3932,6 +2257,7 @@ void nfp_net_free(struct nfp_net *nn) WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted); nfp_ccm_mbox_free(nn); + kfree(nn->dp.xsk_pools); if (nn->dp.netdev) free_netdev(nn->dp.netdev); else @@ -4048,8 +2374,12 @@ static void nfp_net_netdev_init(struct nfp_net *nn) if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) netdev->hw_features |= NETIF_F_RXHASH; if (nn->cap & NFP_NET_CFG_CTRL_VXLAN) { - if (nn->cap & NFP_NET_CFG_CTRL_LSO) - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + if (nn->cap & NFP_NET_CFG_CTRL_LSO) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + } netdev->udp_tunnel_nic_info = &nfp_udp_tunnels; nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN; } @@ -4063,41 +2393,57 @@ static void nfp_net_netdev_init(struct nfp_net *nn) netdev->vlan_features = netdev->hw_features; - if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) { + if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN_ANY) { netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; - nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXVLAN; + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXVLAN_V2 ?: + NFP_NET_CFG_CTRL_RXVLAN; } - if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) { + if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN_ANY) { if (nn->cap & NFP_NET_CFG_CTRL_LSO2) { nn_warn(nn, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n"); } else { netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX; - nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXVLAN; + nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_TXVLAN_V2 ?: + NFP_NET_CFG_CTRL_TXVLAN; } } if (nn->cap & NFP_NET_CFG_CTRL_CTAG_FILTER) { netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; nn->dp.ctrl |= NFP_NET_CFG_CTRL_CTAG_FILTER; } + if (nn->cap & NFP_NET_CFG_CTRL_RXQINQ) { + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX; + nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXQINQ; + } netdev->features = netdev->hw_features; if (nfp_app_has_tc(nn->app) && nn->port) netdev->hw_features |= NETIF_F_HW_TC; - /* Advertise but disable TSO by default. */ - netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); - nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY; + /* C-Tag strip and S-Tag strip can't be supported simultaneously, + * so enable C-Tag strip and disable S-Tag strip by default. + */ + netdev->features &= ~NETIF_F_HW_VLAN_STAG_RX; + nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_RXQINQ; /* Finalise the netdev setup */ - netdev->netdev_ops = &nfp_net_netdev_ops; + switch (nn->dp.ops->version) { + case NFP_NFD_VER_NFD3: + netdev->netdev_ops = &nfp_nfd3_netdev_ops; + break; + case NFP_NFD_VER_NFDK: + netdev->netdev_ops = &nfp_nfdk_netdev_ops; + break; + } + netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); /* MTU range: 68 - hw-specific max */ netdev->min_mtu = ETH_MIN_MTU; netdev->max_mtu = nn->max_mtu; - netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS; + netif_set_tso_max_segs(netdev, NFP_NET_LSO_MAX_SEGS); netif_carrier_off(netdev); @@ -4138,6 +2484,9 @@ static int nfp_net_read_caps(struct nfp_net *nn) nn->dp.rx_offset = NFP_NET_RX_OFFSET; } + /* Mask out NFD-version-specific features */ + nn->cap &= nn->dp.ops->cap_mask; + /* For control vNICs mask out the capabilities app doesn't want. */ if (!nn->dp.netdev) nn->cap &= nn->app->type->ctrl_cap_mask; @@ -4190,6 +2539,10 @@ int nfp_net_init(struct nfp_net *nn) nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD; } + /* Enable TX pointer writeback, if supported */ + if (nn->cap & NFP_NET_CFG_CTRL_TXRWB) + nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXRWB; + /* Stash the re-configuration queue away. First odd queue in TX Bar */ nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 3d61a8cb60b0..6714d5e8fdab 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -1,8 +1,7 @@ /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ /* Copyright (C) 2015-2018 Netronome Systems, Inc. */ -/* - * nfp_net_ctrl.h +/* nfp_net_ctrl.h * Netronome network device driver: Control BAR layout * Authors: Jakub Kicinski <jakub.kicinski@netronome.com> * Jason McMullan <jason.mcmullan@netronome.com> @@ -15,33 +14,36 @@ #include <linux/types.h> -/** - * Configuration BAR size. +/* 64-bit per app capabilities */ +#define NFP_NET_APP_CAP_SP_INDIFF BIT_ULL(0) /* indifferent to port speed */ + +/* Configuration BAR size. * * The configuration BAR is 8K in size, but due to * THB-350, 32k needs to be reserved. */ #define NFP_NET_CFG_BAR_SZ (32 * 1024) -/** - * Offset in Freelist buffer where packet starts on RX - */ +/* Offset in Freelist buffer where packet starts on RX */ #define NFP_NET_RX_OFFSET 32 -/** - * LSO parameters +/* LSO parameters * %NFP_NET_LSO_MAX_HDR_SZ: Maximum header size supported for LSO frames * %NFP_NET_LSO_MAX_SEGS: Maximum number of segments LSO frame can produce */ #define NFP_NET_LSO_MAX_HDR_SZ 255 #define NFP_NET_LSO_MAX_SEGS 64 -/** - * Prepend field types - */ +/* working with metadata vlan api (NFD version >= 2.0) */ +#define NFP_NET_META_VLAN_STRIP BIT(31) +#define NFP_NET_META_VLAN_TPID_MASK GENMASK(19, 16) +#define NFP_NET_META_VLAN_TCI_MASK GENMASK(15, 0) + +/* Prepend field types */ #define NFP_NET_META_FIELD_SIZE 4 #define NFP_NET_META_HASH 1 /* next field carries hash type */ #define NFP_NET_META_MARK 2 +#define NFP_NET_META_VLAN 4 /* ctag or stag type */ #define NFP_NET_META_PORTID 5 #define NFP_NET_META_CSUM 6 /* checksum complete type */ #define NFP_NET_META_CONN_HANDLE 7 @@ -49,9 +51,11 @@ #define NFP_META_PORT_ID_CTRL ~0U -/** - * Hash type pre-pended when a RSS hash was computed - */ +/* Prepend field sizes */ +#define NFP_NET_META_VLAN_SIZE 4 +#define NFP_NET_META_PORTID_SIZE 4 +#define NFP_NET_META_CONN_HANDLE_SIZE 8 +/* Hash type pre-pended when a RSS hash was computed */ #define NFP_NET_RSS_NONE 0 #define NFP_NET_RSS_IPV4 1 #define NFP_NET_RSS_IPV6 2 @@ -63,16 +67,14 @@ #define NFP_NET_RSS_IPV6_UDP 8 #define NFP_NET_RSS_IPV6_EX_UDP 9 -/** - * Ring counts +/* Ring counts * %NFP_NET_TXR_MAX: Maximum number of TX rings * %NFP_NET_RXR_MAX: Maximum number of RX rings */ #define NFP_NET_TXR_MAX 64 #define NFP_NET_RXR_MAX 64 -/** - * Read/Write config words (0x0000 - 0x002c) +/* Read/Write config words (0x0000 - 0x002c) * %NFP_NET_CFG_CTRL: Global control * %NFP_NET_CFG_UPDATE: Indicate which fields are updated * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings @@ -100,12 +102,15 @@ #define NFP_NET_CFG_CTRL_LSO (0x1 << 10) /* LSO/TSO (version 1) */ #define NFP_NET_CFG_CTRL_CTAG_FILTER (0x1 << 11) /* VLAN CTAG filtering */ #define NFP_NET_CFG_CTRL_CMSG_DATA (0x1 << 12) /* RX cmsgs on data Qs */ +#define NFP_NET_CFG_CTRL_RXQINQ (0x1 << 13) /* Enable S-tag strip */ +#define NFP_NET_CFG_CTRL_RXVLAN_V2 (0x1 << 15) /* Enable C-tag strip */ #define NFP_NET_CFG_CTRL_RINGCFG (0x1 << 16) /* Ring runtime changes */ #define NFP_NET_CFG_CTRL_RSS (0x1 << 17) /* RSS (version 1) */ #define NFP_NET_CFG_CTRL_IRQMOD (0x1 << 18) /* Interrupt moderation */ -#define NFP_NET_CFG_CTRL_RINGPRIO (0x1 << 19) /* Ring priorities */ #define NFP_NET_CFG_CTRL_MSIXAUTO (0x1 << 20) /* MSI-X auto-masking */ #define NFP_NET_CFG_CTRL_TXRWB (0x1 << 21) /* Write-back of TX ring*/ +#define NFP_NET_CFG_CTRL_VEPA (0x1 << 22) /* Enable VEPA mode */ +#define NFP_NET_CFG_CTRL_TXVLAN_V2 (0x1 << 23) /* Enable VLAN C-tag insert*/ #define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ #define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ #define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */ @@ -122,6 +127,10 @@ NFP_NET_CFG_CTRL_CSUM_COMPLETE) #define NFP_NET_CFG_CTRL_CHAIN_META (NFP_NET_CFG_CTRL_RSS2 | \ NFP_NET_CFG_CTRL_CSUM_COMPLETE) +#define NFP_NET_CFG_CTRL_RXVLAN_ANY (NFP_NET_CFG_CTRL_RXVLAN | \ + NFP_NET_CFG_CTRL_RXVLAN_V2) +#define NFP_NET_CFG_CTRL_TXVLAN_ANY (NFP_NET_CFG_CTRL_TXVLAN | \ + NFP_NET_CFG_CTRL_TXVLAN_V2) #define NFP_NET_CFG_UPDATE 0x0004 #define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ @@ -147,8 +156,7 @@ #define NFP_NET_CFG_LSC 0x0020 #define NFP_NET_CFG_MACADDR 0x0024 -/** - * Read-only words (0x0030 - 0x0050): +/* Read-only words (0x0030 - 0x0050): * %NFP_NET_CFG_VERSION: Firmware version number * %NFP_NET_CFG_STS: Status * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) @@ -162,7 +170,10 @@ * - define more STS bits */ #define NFP_NET_CFG_VERSION 0x0030 -#define NFP_NET_CFG_VERSION_RESERVED_MASK (0xff << 24) +#define NFP_NET_CFG_VERSION_RESERVED_MASK (0xfe << 24) +#define NFP_NET_CFG_VERSION_DP_NFD3 0 +#define NFP_NET_CFG_VERSION_DP_NFDK 1 +#define NFP_NET_CFG_VERSION_DP_MASK 1 #define NFP_NET_CFG_VERSION_CLASS_MASK (0xff << 16) #define NFP_NET_CFG_VERSION_CLASS(x) (((x) & 0xff) << 16) #define NFP_NET_CFG_VERSION_CLASS_GENERIC 0 @@ -185,6 +196,10 @@ #define NFP_NET_CFG_STS_LINK_RATE_40G 5 #define NFP_NET_CFG_STS_LINK_RATE_50G 6 #define NFP_NET_CFG_STS_LINK_RATE_100G 7 +/* NSP Link rate is a 16-bit word. It's determined by NSP and + * written to CFG BAR by NFP driver. + */ +#define NFP_NET_CFG_STS_NSP_LINK_RATE 0x0036 #define NFP_NET_CFG_CAP 0x0038 #define NFP_NET_CFG_MAX_TXRINGS 0x003c #define NFP_NET_CFG_MAX_RXRINGS 0x0040 @@ -193,36 +208,31 @@ #define NFP_NET_CFG_START_TXQ 0x0048 #define NFP_NET_CFG_START_RXQ 0x004c -/** - * Prepend configuration +/* Prepend configuration */ #define NFP_NET_CFG_RX_OFFSET 0x0050 #define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */ -/** - * RSS capabilities +/* RSS capabilities * %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as * %NFP_NET_CFG_RSS_HFUNC) */ #define NFP_NET_CFG_RSS_CAP 0x0054 #define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000 -/** - * TLV area start +/* TLV area start * %NFP_NET_CFG_TLV_BASE: start anchor of the TLV area */ #define NFP_NET_CFG_TLV_BASE 0x0058 -/** - * VXLAN/UDP encap configuration +/* VXLAN/UDP encap configuration * %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports * %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes */ #define NFP_NET_CFG_VXLAN_PORT 0x0060 #define NFP_NET_CFG_VXLAN_SZ 0x0008 -/** - * BPF section +/* BPF section * %NFP_NET_CFG_BPF_ABI: BPF ABI version * %NFP_NET_CFG_BPF_CAP: BPF capabilities * %NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes @@ -247,14 +257,12 @@ #define NFP_NET_CFG_BPF_CFG_MASK 7ULL #define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK) -/** - * 40B reserved for future use (0x0098 - 0x00c0) +/* 40B reserved for future use (0x0098 - 0x00c0) */ #define NFP_NET_CFG_RESERVED 0x0098 #define NFP_NET_CFG_RESERVED_SZ 0x0028 -/** - * RSS configuration (0x0100 - 0x01ac): +/* RSS configuration (0x0100 - 0x01ac): * Used only when NFP_NET_CFG_CTRL_RSS is enabled * %NFP_NET_CFG_RSS_CFG: RSS configuration word * %NFP_NET_CFG_RSS_KEY: RSS "secret" key @@ -281,8 +289,7 @@ NFP_NET_CFG_RSS_KEY_SZ) #define NFP_NET_CFG_RSS_ITBL_SZ 0x80 -/** - * TX ring configuration (0x200 - 0x800) +/* TX ring configuration (0x200 - 0x800) * %NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration * %NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) * %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) @@ -301,8 +308,7 @@ #define NFP_NET_CFG_TXR_IRQ_MOD(_x) (NFP_NET_CFG_TXR_BASE + 0x500 + \ ((_x) * 0x4)) -/** - * RX ring configuration (0x0800 - 0x0c00) +/* RX ring configuration (0x0800 - 0x0c00) * %NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration * %NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) * %NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) @@ -318,8 +324,7 @@ #define NFP_NET_CFG_RXR_IRQ_MOD(_x) (NFP_NET_CFG_RXR_BASE + 0x300 + \ ((_x) * 0x4)) -/** - * Interrupt Control/Cause registers (0x0c00 - 0x0d00) +/* Interrupt Control/Cause registers (0x0c00 - 0x0d00) * These registers are only used when MSI-X auto-masking is not * enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index * by MSI-X entry and are 1B in size. If an entry is zero, the @@ -334,8 +339,7 @@ #define NFP_NET_CFG_ICR_RXTX 0x1 #define NFP_NET_CFG_ICR_LSC 0x2 -/** - * General device stats (0x0d00 - 0x0d90) +/* General device stats (0x0d00 - 0x0d90) * all counters are 64bit. */ #define NFP_NET_CFG_STATS_BASE 0x0d00 @@ -368,8 +372,7 @@ #define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0) #define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8) -/** - * Per ring stats (0x1000 - 0x1800) +/* Per ring stats (0x1000 - 0x1800) * options, 64bit per entry * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) @@ -381,8 +384,7 @@ #define NFP_NET_CFG_RXR_STATS(_x) (NFP_NET_CFG_RXR_STATS_BASE + \ ((_x) * 0x10)) -/** - * General use mailbox area (0x1800 - 0x19ff) +/* General use mailbox area (0x1800 - 0x19ff) * 4B used for update command and 4B return code * followed by a max of 504B of variable length value */ @@ -399,8 +401,7 @@ #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET 5 #define NFP_NET_CFG_MBOX_CMD_TLV_CMSG 6 -/** - * VLAN filtering using general use mailbox +/* VLAN filtering using general use mailbox * %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox * %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter * %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter @@ -411,8 +412,7 @@ #define NFP_NET_CFG_VLAN_FILTER_PROTO (NFP_NET_CFG_VLAN_FILTER + 2) #define NFP_NET_CFG_VLAN_FILTER_SZ 0x0004 -/** - * TLV capabilities +/* TLV capabilities * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV * %NFP_NET_CFG_TLV_LENGTH: Offset of length within the TLV @@ -438,8 +438,7 @@ #define NFP_NET_CFG_TLV_HEADER_TYPE 0x7fff0000 #define NFP_NET_CFG_TLV_HEADER_LENGTH 0x0000ffff -/** - * Capability TLV types +/* Capability TLV types * * %NFP_NET_CFG_TLV_TYPE_UNKNOWN: * Special TLV type to catch bugs, should never be encountered. Drivers should @@ -512,8 +511,7 @@ struct device; -/** - * struct nfp_net_tlv_caps - parsed control BAR TLV capabilities +/* struct nfp_net_tlv_caps - parsed control BAR TLV capabilities * @me_freq_mhz: ME clock_freq (MHz) * @mbox_off: vNIC mailbox area offset * @mbox_len: vNIC mailbox area length diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c index 553c708694e8..d8b735ccf899 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -1,10 +1,11 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -/* Copyright (C) 2015-2018 Netronome Systems, Inc. */ +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ #include <linux/debugfs.h> #include <linux/module.h> #include <linux/rtnetlink.h> #include "nfp_net.h" +#include "nfp_net_dp.h" static struct dentry *nfp_dir; @@ -42,13 +43,19 @@ static int nfp_rx_q_show(struct seq_file *file, void *data) seq_printf(file, "%04d: 0x%08x 0x%08x", i, rxd->vals[0], rxd->vals[1]); - frag = READ_ONCE(rx_ring->rxbufs[i].frag); - if (frag) - seq_printf(file, " frag=%p", frag); + if (!r_vec->xsk_pool) { + frag = READ_ONCE(rx_ring->rxbufs[i].frag); + if (frag) + seq_printf(file, " frag=%p", frag); - if (rx_ring->rxbufs[i].dma_addr) - seq_printf(file, " dma_addr=%pad", - &rx_ring->rxbufs[i].dma_addr); + if (rx_ring->rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->rxbufs[i].dma_addr); + } else { + if (rx_ring->xsk_rxbufs[i].dma_addr) + seq_printf(file, " dma_addr=%pad", + &rx_ring->xsk_rxbufs[i].dma_addr); + } if (i == rx_ring->rd_p % rxd_cnt) seq_puts(file, " H_RD "); @@ -74,10 +81,8 @@ static int nfp_tx_q_show(struct seq_file *file, void *data) { struct nfp_net_r_vector *r_vec = file->private; struct nfp_net_tx_ring *tx_ring; - struct nfp_net_tx_desc *txd; - int d_rd_p, d_wr_p, txd_cnt; struct nfp_net *nn; - int i; + int d_rd_p, d_wr_p; rtnl_lock(); @@ -91,49 +96,20 @@ static int nfp_tx_q_show(struct seq_file *file, void *data) if (!nfp_net_running(nn)) goto out; - txd_cnt = tx_ring->cnt; - d_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q); d_wr_p = nfp_qcp_wr_ptr_read(tx_ring->qcp_q); - seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u\n", + seq_printf(file, "TX[%02d,%02d%s]: cnt=%u dma=%pad host=%p H_RD=%u H_WR=%u D_RD=%u D_WR=%u", tx_ring->idx, tx_ring->qcidx, tx_ring == r_vec->tx_ring ? "" : "xdp", tx_ring->cnt, &tx_ring->dma, tx_ring->txds, tx_ring->rd_p, tx_ring->wr_p, d_rd_p, d_wr_p); + if (tx_ring->txrwb) + seq_printf(file, " TXRWB=%llu", *tx_ring->txrwb); + seq_putc(file, '\n'); - for (i = 0; i < txd_cnt; i++) { - txd = &tx_ring->txds[i]; - seq_printf(file, "%04d: 0x%08x 0x%08x 0x%08x 0x%08x", i, - txd->vals[0], txd->vals[1], - txd->vals[2], txd->vals[3]); - - if (tx_ring == r_vec->tx_ring) { - struct sk_buff *skb = READ_ONCE(tx_ring->txbufs[i].skb); - - if (skb) - seq_printf(file, " skb->head=%p skb->data=%p", - skb->head, skb->data); - } else { - seq_printf(file, " frag=%p", - READ_ONCE(tx_ring->txbufs[i].frag)); - } - - if (tx_ring->txbufs[i].dma_addr) - seq_printf(file, " dma_addr=%pad", - &tx_ring->txbufs[i].dma_addr); - - if (i == tx_ring->rd_p % txd_cnt) - seq_puts(file, " H_RD"); - if (i == tx_ring->wr_p % txd_cnt) - seq_puts(file, " H_WR"); - if (i == d_rd_p % txd_cnt) - seq_puts(file, " D_RD"); - if (i == d_wr_p % txd_cnt) - seq_puts(file, " D_WR"); - - seq_putc(file, '\n'); - } + nfp_net_debugfs_print_tx_descs(file, &nn->dp, r_vec, tx_ring, + d_rd_p, d_wr_p); out: rtnl_unlock(); return 0; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c new file mode 100644 index 000000000000..550df83b798c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2015-2019 Netronome Systems, Inc. */ + +#include "nfp_app.h" +#include "nfp_net_dp.h" +#include "nfp_net_xsk.h" + +/** + * nfp_net_rx_alloc_one() - Allocate and map page frag for RX + * @dp: NFP Net data path struct + * @dma_addr: Pointer to storage for DMA address (output param) + * + * This function will allcate a new page frag, map it for DMA. + * + * Return: allocated page frag or NULL on failure. + */ +void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) +{ + void *frag; + + if (!dp->xdp_prog) { + frag = netdev_alloc_frag(dp->fl_bufsz); + } else { + struct page *page; + + page = alloc_page(GFP_KERNEL); + frag = page ? page_address(page) : NULL; + } + if (!frag) { + nn_dp_warn(dp, "Failed to alloc receive page frag\n"); + return NULL; + } + + *dma_addr = nfp_net_dma_map_rx(dp, frag); + if (dma_mapping_error(dp->dev, *dma_addr)) { + nfp_net_free_frag(frag, dp->xdp_prog); + nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return frag; +} + +/** + * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring + * @tx_ring: TX ring structure + * @dp: NFP Net data path struct + * @r_vec: IRQ vector servicing this ring + * @idx: Ring index + * @is_xdp: Is this an XDP TX ring? + */ +static void +nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, unsigned int idx, + bool is_xdp) +{ + struct nfp_net *nn = r_vec->nfp_net; + + tx_ring->idx = idx; + tx_ring->r_vec = r_vec; + tx_ring->is_xdp = is_xdp; + u64_stats_init(&tx_ring->r_vec->tx_sync); + + tx_ring->qcidx = tx_ring->idx * nn->stride_tx; + tx_ring->txrwb = dp->txrwb ? &dp->txrwb[idx] : NULL; + tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx); +} + +/** + * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring + * @rx_ring: RX ring structure + * @r_vec: IRQ vector servicing this ring + * @idx: Ring index + */ +static void +nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring, + struct nfp_net_r_vector *r_vec, unsigned int idx) +{ + struct nfp_net *nn = r_vec->nfp_net; + + rx_ring->idx = idx; + rx_ring->r_vec = r_vec; + u64_stats_init(&rx_ring->r_vec->rx_sync); + + rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx; + rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx); +} + +/** + * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable + * @rx_ring: RX ring structure + * + * Assumes that the device is stopped, must be idempotent. + */ +void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) +{ + unsigned int wr_idx, last_idx; + + /* wr_p == rd_p means ring was never fed FL bufs. RX rings are always + * kept at cnt - 1 FL bufs. + */ + if (rx_ring->wr_p == 0 && rx_ring->rd_p == 0) + return; + + /* Move the empty entry to the end of the list */ + wr_idx = D_IDX(rx_ring, rx_ring->wr_p); + last_idx = rx_ring->cnt - 1; + if (rx_ring->r_vec->xsk_pool) { + rx_ring->xsk_rxbufs[wr_idx] = rx_ring->xsk_rxbufs[last_idx]; + memset(&rx_ring->xsk_rxbufs[last_idx], 0, + sizeof(*rx_ring->xsk_rxbufs)); + } else { + rx_ring->rxbufs[wr_idx] = rx_ring->rxbufs[last_idx]; + memset(&rx_ring->rxbufs[last_idx], 0, sizeof(*rx_ring->rxbufs)); + } + + memset(rx_ring->rxds, 0, rx_ring->size); + rx_ring->wr_p = 0; + rx_ring->rd_p = 0; +} + +/** + * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring + * @dp: NFP Net data path struct + * @rx_ring: RX ring to remove buffers from + * + * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1) + * entries. After device is disabled nfp_net_rx_ring_reset() must be called + * to restore required ring geometry. + */ +static void +nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring) +{ + unsigned int i; + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return; + + for (i = 0; i < rx_ring->cnt - 1; i++) { + /* NULL skb can only happen when initial filling of the ring + * fails to allocate enough buffers and calls here to free + * already allocated ones. + */ + if (!rx_ring->rxbufs[i].frag) + continue; + + nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr); + nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog); + rx_ring->rxbufs[i].dma_addr = 0; + rx_ring->rxbufs[i].frag = NULL; + } +} + +/** + * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW) + * @dp: NFP Net data path struct + * @rx_ring: RX ring to remove buffers from + */ +static int +nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_rx_buf *rxbufs; + unsigned int i; + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + return 0; + + rxbufs = rx_ring->rxbufs; + + for (i = 0; i < rx_ring->cnt - 1; i++) { + rxbufs[i].frag = nfp_net_rx_alloc_one(dp, &rxbufs[i].dma_addr); + if (!rxbufs[i].frag) { + nfp_net_rx_ring_bufs_free(dp, rx_ring); + return -ENOMEM; + } + } + + return 0; +} + +int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp) +{ + unsigned int r; + + dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings), + GFP_KERNEL); + if (!dp->tx_rings) + return -ENOMEM; + + if (dp->ctrl & NFP_NET_CFG_CTRL_TXRWB) { + dp->txrwb = dma_alloc_coherent(dp->dev, + dp->num_tx_rings * sizeof(u64), + &dp->txrwb_dma, GFP_KERNEL); + if (!dp->txrwb) + goto err_free_rings; + } + + for (r = 0; r < dp->num_tx_rings; r++) { + int bias = 0; + + if (r >= dp->num_stack_tx_rings) + bias = dp->num_stack_tx_rings; + + nfp_net_tx_ring_init(&dp->tx_rings[r], dp, + &nn->r_vecs[r - bias], r, bias); + + if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r])) + goto err_free_prev; + + if (nfp_net_tx_ring_bufs_alloc(dp, &dp->tx_rings[r])) + goto err_free_ring; + } + + return 0; + +err_free_prev: + while (r--) { + nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]); +err_free_ring: + nfp_net_tx_ring_free(dp, &dp->tx_rings[r]); + } + if (dp->txrwb) + dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64), + dp->txrwb, dp->txrwb_dma); +err_free_rings: + kfree(dp->tx_rings); + return -ENOMEM; +} + +void nfp_net_tx_rings_free(struct nfp_net_dp *dp) +{ + unsigned int r; + + for (r = 0; r < dp->num_tx_rings; r++) { + nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]); + nfp_net_tx_ring_free(dp, &dp->tx_rings[r]); + } + + if (dp->txrwb) + dma_free_coherent(dp->dev, dp->num_tx_rings * sizeof(u64), + dp->txrwb, dp->txrwb_dma); + kfree(dp->tx_rings); +} + +/** + * nfp_net_rx_ring_free() - Free resources allocated to a RX ring + * @rx_ring: RX ring to free + */ +static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct nfp_net_dp *dp = &r_vec->nfp_net->dp; + + if (dp->netdev) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) + kvfree(rx_ring->xsk_rxbufs); + else + kvfree(rx_ring->rxbufs); + + if (rx_ring->rxds) + dma_free_coherent(dp->dev, rx_ring->size, + rx_ring->rxds, rx_ring->dma); + + rx_ring->cnt = 0; + rx_ring->rxbufs = NULL; + rx_ring->xsk_rxbufs = NULL; + rx_ring->rxds = NULL; + rx_ring->dma = 0; + rx_ring->size = 0; +} + +/** + * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring + * @dp: NFP Net data path struct + * @rx_ring: RX ring to allocate + * + * Return: 0 on success, negative errno otherwise. + */ +static int +nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) +{ + enum xdp_mem_type mem_type; + size_t rxbuf_sw_desc_sz; + int err; + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) { + mem_type = MEM_TYPE_XSK_BUFF_POOL; + rxbuf_sw_desc_sz = sizeof(*rx_ring->xsk_rxbufs); + } else { + mem_type = MEM_TYPE_PAGE_ORDER0; + rxbuf_sw_desc_sz = sizeof(*rx_ring->rxbufs); + } + + if (dp->netdev) { + err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, + rx_ring->idx, rx_ring->r_vec->napi.napi_id); + if (err < 0) + return err; + + err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, mem_type, NULL); + if (err) + goto err_alloc; + } + + rx_ring->cnt = dp->rxd_cnt; + rx_ring->size = array_size(rx_ring->cnt, sizeof(*rx_ring->rxds)); + rx_ring->rxds = dma_alloc_coherent(dp->dev, rx_ring->size, + &rx_ring->dma, + GFP_KERNEL | __GFP_NOWARN); + if (!rx_ring->rxds) { + netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", + rx_ring->cnt); + goto err_alloc; + } + + if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) { + rx_ring->xsk_rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz, + GFP_KERNEL); + if (!rx_ring->xsk_rxbufs) + goto err_alloc; + } else { + rx_ring->rxbufs = kvcalloc(rx_ring->cnt, rxbuf_sw_desc_sz, + GFP_KERNEL); + if (!rx_ring->rxbufs) + goto err_alloc; + } + + return 0; + +err_alloc: + nfp_net_rx_ring_free(rx_ring); + return -ENOMEM; +} + +int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp) +{ + unsigned int r; + + dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings), + GFP_KERNEL); + if (!dp->rx_rings) + return -ENOMEM; + + for (r = 0; r < dp->num_rx_rings; r++) { + nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r); + + if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r])) + goto err_free_prev; + + if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r])) + goto err_free_ring; + } + + return 0; + +err_free_prev: + while (r--) { + nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]); +err_free_ring: + nfp_net_rx_ring_free(&dp->rx_rings[r]); + } + kfree(dp->rx_rings); + return -ENOMEM; +} + +void nfp_net_rx_rings_free(struct nfp_net_dp *dp) +{ + unsigned int r; + + for (r = 0; r < dp->num_rx_rings; r++) { + nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]); + nfp_net_rx_ring_free(&dp->rx_rings[r]); + } + + kfree(dp->rx_rings); +} + +void +nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_rx_ring *rx_ring, unsigned int idx) +{ + /* Write the DMA address, size and MSI-X info to the device */ + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_entry); +} + +void +nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_tx_ring *tx_ring, unsigned int idx) +{ + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma); + if (tx_ring->txrwb) { + *tx_ring->txrwb = 0; + nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx), + nn->dp.txrwb_dma + idx * sizeof(u64)); + } + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry); +} + +void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx) +{ + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), 0); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), 0); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0); + + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0); + nn_writeq(nn, NFP_NET_CFG_TXR_WB_ADDR(idx), 0); + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0); +} + +netdev_tx_t nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + + return nn->dp.ops->xmit(skb, netdev); +} + +bool __nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb) +{ + struct nfp_net_r_vector *r_vec = &nn->r_vecs[0]; + + return nn->dp.ops->ctrl_tx_one(nn, r_vec, skb, false); +} + +bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb) +{ + struct nfp_net_r_vector *r_vec = &nn->r_vecs[0]; + bool ret; + + spin_lock_bh(&r_vec->lock); + ret = nn->dp.ops->ctrl_tx_one(nn, r_vec, skb, false); + spin_unlock_bh(&r_vec->lock); + + return ret; +} + +bool nfp_net_vlan_strip(struct sk_buff *skb, const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta) +{ + u16 tpid = 0, tci = 0; + + if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) { + tpid = ETH_P_8021Q; + tci = le16_to_cpu(rxd->rxd.vlan); + } else if (meta->vlan.stripped) { + if (meta->vlan.tpid == NFP_NET_VLAN_CTAG) + tpid = ETH_P_8021Q; + else if (meta->vlan.tpid == NFP_NET_VLAN_STAG) + tpid = ETH_P_8021AD; + else + return false; + + tci = meta->vlan.tci; + } + if (tpid) + __vlan_hwaccel_put_tag(skb, htons(tpid), tci); + + return true; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h new file mode 100644 index 000000000000..831c83ce0d3d --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_dp.h @@ -0,0 +1,219 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef _NFP_NET_DP_ +#define _NFP_NET_DP_ + +#include "nfp_net.h" + +static inline dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag) +{ + return dma_map_single_attrs(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM, + dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA, + dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC); +} + +static inline void +nfp_net_dma_sync_dev_rx(const struct nfp_net_dp *dp, dma_addr_t dma_addr) +{ + dma_sync_single_for_device(dp->dev, dma_addr, + dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA, + dp->rx_dma_dir); +} + +static inline void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, + dma_addr_t dma_addr) +{ + dma_unmap_single_attrs(dp->dev, dma_addr, + dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA, + dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC); +} + +static inline void nfp_net_dma_sync_cpu_rx(struct nfp_net_dp *dp, + dma_addr_t dma_addr, + unsigned int len) +{ + dma_sync_single_for_cpu(dp->dev, dma_addr - NFP_NET_RX_BUF_HEADROOM, + len, dp->rx_dma_dir); +} + +/** + * nfp_net_tx_full() - check if the TX ring is full + * @tx_ring: TX ring to check + * @dcnt: Number of descriptors that need to be enqueued (must be >= 1) + * + * This function checks, based on the *host copy* of read/write + * pointer if a given TX ring is full. The real TX queue may have + * some newly made available slots. + * + * Return: True if the ring is full. + */ +static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) +{ + return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt); +} + +static inline void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) +{ + wmb(); /* drain writebuffer */ + nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add); + tx_ring->wr_ptr_add = 0; +} + +static inline u32 +nfp_net_read_tx_cmpl(struct nfp_net_tx_ring *tx_ring, struct nfp_net_dp *dp) +{ + if (tx_ring->txrwb) + return *tx_ring->txrwb; + return nfp_qcp_rd_ptr_read(tx_ring->qcp_q); +} + +static inline void nfp_net_free_frag(void *frag, bool xdp) +{ + if (!xdp) + skb_free_frag(frag); + else + __free_page(virt_to_page(frag)); +} + +/** + * nfp_net_irq_unmask() - Unmask automasked interrupt + * @nn: NFP Network structure + * @entry_nr: MSI-X table entry + * + * Clear the ICR for the IRQ entry. + */ +static inline void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) +{ + nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); + nn_pci_flush(nn); +} + +struct seq_file; + +/* Common */ +void +nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_rx_ring *rx_ring, unsigned int idx); +void +nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_tx_ring *tx_ring, unsigned int idx); +void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx); + +void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr); +int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp); +int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp); +void nfp_net_rx_rings_free(struct nfp_net_dp *dp); +void nfp_net_tx_rings_free(struct nfp_net_dp *dp); +void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring); +bool nfp_net_vlan_strip(struct sk_buff *skb, const struct nfp_net_rx_desc *rxd, + const struct nfp_meta_parsed *meta); + +enum nfp_nfd_version { + NFP_NFD_VER_NFD3, + NFP_NFD_VER_NFDK, +}; + +/** + * struct nfp_dp_ops - Hooks to wrap different implementation of different dp + * @version: Indicate dp type + * @tx_min_desc_per_pkt: Minimal TX descs needed for each packet + * @cap_mask: Mask of supported features + * @dma_mask: DMA addressing capability + * @poll: Napi poll for normal rx/tx + * @xsk_poll: Napi poll when xsk is enabled + * @ctrl_poll: Tasklet poll for ctrl rx/tx + * @xmit: Xmit for normal path + * @ctrl_tx_one: Xmit for ctrl path + * @rx_ring_fill_freelist: Give buffers from the ring to FW + * @tx_ring_alloc: Allocate resource for a TX ring + * @tx_ring_reset: Free any untransmitted buffers and reset pointers + * @tx_ring_free: Free resources allocated to a TX ring + * @tx_ring_bufs_alloc: Allocate resource for each TX buffer + * @tx_ring_bufs_free: Free resources allocated to each TX buffer + * @print_tx_descs: Show TX ring's info for debug purpose + */ +struct nfp_dp_ops { + enum nfp_nfd_version version; + unsigned int tx_min_desc_per_pkt; + u32 cap_mask; + u64 dma_mask; + + int (*poll)(struct napi_struct *napi, int budget); + int (*xsk_poll)(struct napi_struct *napi, int budget); + void (*ctrl_poll)(struct tasklet_struct *t); + netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *netdev); + bool (*ctrl_tx_one)(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, bool old); + void (*rx_ring_fill_freelist)(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring); + int (*tx_ring_alloc)(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring); + void (*tx_ring_reset)(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring); + void (*tx_ring_free)(struct nfp_net_tx_ring *tx_ring); + int (*tx_ring_bufs_alloc)(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring); + void (*tx_ring_bufs_free)(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring); + + void (*print_tx_descs)(struct seq_file *file, + struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_ring *tx_ring, + u32 d_rd_p, u32 d_wr_p); +}; + +static inline void +nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + return dp->ops->tx_ring_reset(dp, tx_ring); +} + +static inline void +nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp, + struct nfp_net_rx_ring *rx_ring) +{ + dp->ops->rx_ring_fill_freelist(dp, rx_ring); +} + +static inline int +nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + return dp->ops->tx_ring_alloc(dp, tx_ring); +} + +static inline void +nfp_net_tx_ring_free(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) +{ + dp->ops->tx_ring_free(tx_ring); +} + +static inline int +nfp_net_tx_ring_bufs_alloc(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + return dp->ops->tx_ring_bufs_alloc(dp, tx_ring); +} + +static inline void +nfp_net_tx_ring_bufs_free(struct nfp_net_dp *dp, + struct nfp_net_tx_ring *tx_ring) +{ + dp->ops->tx_ring_bufs_free(dp, tx_ring); +} + +static inline void +nfp_net_debugfs_print_tx_descs(struct seq_file *file, struct nfp_net_dp *dp, + struct nfp_net_r_vector *r_vec, + struct nfp_net_tx_ring *tx_ring, + u32 d_rd_p, u32 d_wr_p) +{ + dp->ops->print_tx_descs(file, r_vec, tx_ring, d_rd_p, d_wr_p); +} + +extern const struct nfp_dp_ops nfp_nfd3_ops; +extern const struct nfp_dp_ops nfp_nfdk_ops; + +netdev_tx_t nfp_net_tx(struct sk_buff *skb, struct net_device *netdev); + +#endif /* _NFP_NET_DP_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index cf7882933993..22a5d2419084 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -21,12 +21,15 @@ #include <linux/sfp.h> #include "nfpcore/nfp.h" +#include "nfpcore/nfp_dev.h" #include "nfpcore/nfp_nsp.h" #include "nfp_app.h" #include "nfp_main.h" #include "nfp_net_ctrl.h" +#include "nfp_net_dp.h" #include "nfp_net.h" #include "nfp_port.h" +#include "nfpcore/nfp_cpp.h" struct nfp_et_stat { char name[ETH_GSTRING_LEN]; @@ -202,7 +205,7 @@ nfp_get_drvinfo(struct nfp_app *app, struct pci_dev *pdev, { char nsp_version[ETHTOOL_FWVERS_LEN] = {}; - strlcpy(drvinfo->driver, dev_driver_string(&pdev->dev), + strscpy(drvinfo->driver, dev_driver_string(&pdev->dev), sizeof(drvinfo->driver)); nfp_net_get_nspinfo(app, nsp_version); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), @@ -217,20 +220,51 @@ nfp_net_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) struct nfp_net *nn = netdev_priv(netdev); snprintf(vnic_version, sizeof(vnic_version), "%d.%d.%d.%d", - nn->fw_ver.resv, nn->fw_ver.class, + nn->fw_ver.extend, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor); - strlcpy(drvinfo->bus_info, pci_name(nn->pdev), + strscpy(drvinfo->bus_info, pci_name(nn->pdev), sizeof(drvinfo->bus_info)); nfp_get_drvinfo(nn->app, nn->pdev, vnic_version, drvinfo); } +static int +nfp_net_nway_reset(struct net_device *netdev) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + if (!netif_running(netdev)) + return 0; + + err = nfp_eth_set_configured(port->app->cpp, eth_port->index, false); + if (err) { + netdev_info(netdev, "Link down failed: %d\n", err); + return err; + } + + err = nfp_eth_set_configured(port->app->cpp, eth_port->index, true); + if (err) { + netdev_info(netdev, "Link up failed: %d\n", err); + return err; + } + + netdev_info(netdev, "Link reset succeeded\n"); + return 0; +} + static void nfp_app_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { struct nfp_app *app = nfp_app_from_netdev(netdev); - strlcpy(drvinfo->bus_info, pci_name(app->pdev), + strscpy(drvinfo->bus_info, pci_name(app->pdev), sizeof(drvinfo->bus_info)); nfp_get_drvinfo(app, app->pdev, "*", drvinfo); } @@ -270,25 +304,14 @@ static int nfp_net_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - static const u32 ls_to_ethtool[] = { - [NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED] = 0, - [NFP_NET_CFG_STS_LINK_RATE_UNKNOWN] = SPEED_UNKNOWN, - [NFP_NET_CFG_STS_LINK_RATE_1G] = SPEED_1000, - [NFP_NET_CFG_STS_LINK_RATE_10G] = SPEED_10000, - [NFP_NET_CFG_STS_LINK_RATE_25G] = SPEED_25000, - [NFP_NET_CFG_STS_LINK_RATE_40G] = SPEED_40000, - [NFP_NET_CFG_STS_LINK_RATE_50G] = SPEED_50000, - [NFP_NET_CFG_STS_LINK_RATE_100G] = SPEED_100000, - }; struct nfp_eth_table_port *eth_port; struct nfp_port *port; struct nfp_net *nn; - u32 sts, ls; + unsigned int speed; + u16 sts; /* Init to unknowns */ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); - ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.port = PORT_OTHER; cmd->base.speed = SPEED_UNKNOWN; cmd->base.duplex = DUPLEX_UNKNOWN; @@ -296,8 +319,15 @@ nfp_net_get_link_ksettings(struct net_device *netdev, port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); if (eth_port) { - cmd->base.autoneg = eth_port->aneg != NFP_ANEG_DISABLED ? - AUTONEG_ENABLE : AUTONEG_DISABLE; + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); + if (eth_port->supp_aneg) { + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + if (eth_port->aneg == NFP_ANEG_AUTO) { + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); + cmd->base.autoneg = AUTONEG_ENABLE; + } + } nfp_net_set_fec_link_mode(eth_port, cmd); } @@ -316,18 +346,15 @@ nfp_net_get_link_ksettings(struct net_device *netdev, return -EOPNOTSUPP; nn = netdev_priv(netdev); - sts = nn_readl(nn, NFP_NET_CFG_STS); - - ls = FIELD_GET(NFP_NET_CFG_STS_LINK_RATE, sts); - if (ls == NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED) + sts = nn_readw(nn, NFP_NET_CFG_STS); + speed = nfp_net_lr2speed(FIELD_GET(NFP_NET_CFG_STS_LINK_RATE, sts)); + if (!speed) return -EOPNOTSUPP; - if (ls == NFP_NET_CFG_STS_LINK_RATE_UNKNOWN || - ls >= ARRAY_SIZE(ls_to_ethtool)) - return 0; - - cmd->base.speed = ls_to_ethtool[ls]; - cmd->base.duplex = DUPLEX_FULL; + if (speed != SPEED_UNKNOWN) { + cmd->base.speed = speed; + cmd->base.duplex = DUPLEX_FULL; + } return 0; } @@ -336,6 +363,7 @@ static int nfp_net_set_link_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) { + bool req_aneg = (cmd->base.autoneg == AUTONEG_ENABLE); struct nfp_eth_table_port *eth_port; struct nfp_port *port; struct nfp_nsp *nsp; @@ -355,13 +383,25 @@ nfp_net_set_link_ksettings(struct net_device *netdev, if (IS_ERR(nsp)) return PTR_ERR(nsp); - err = __nfp_eth_set_aneg(nsp, cmd->base.autoneg == AUTONEG_ENABLE ? - NFP_ANEG_AUTO : NFP_ANEG_DISABLED); + if (req_aneg && !eth_port->supp_aneg) { + netdev_warn(netdev, "Autoneg is not supported.\n"); + err = -EOPNOTSUPP; + goto err_bad_set; + } + + err = __nfp_eth_set_aneg(nsp, req_aneg ? NFP_ANEG_AUTO : NFP_ANEG_DISABLED); if (err) goto err_bad_set; + if (cmd->base.speed != SPEED_UNKNOWN) { u32 speed = cmd->base.speed / eth_port->lanes; + if (req_aneg) { + netdev_err(netdev, "Speed changing is not allowed when working on autoneg mode.\n"); + err = -EINVAL; + goto err_bad_set; + } + err = __nfp_eth_set_speed(nsp, speed); if (err) goto err_bad_set; @@ -381,12 +421,15 @@ err_bad_set: } static void nfp_net_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { struct nfp_net *nn = netdev_priv(netdev); + u32 qc_max = nn->dev_info->max_qc_size; - ring->rx_max_pending = NFP_NET_MAX_RX_DESCS; - ring->tx_max_pending = NFP_NET_MAX_TX_DESCS; + ring->rx_max_pending = qc_max; + ring->tx_max_pending = qc_max / nn->dp.ops->tx_min_desc_per_pkt; ring->rx_pending = nn->dp.rxd_cnt; ring->tx_pending = nn->dp.txd_cnt; } @@ -406,21 +449,26 @@ static int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt) } static int nfp_net_set_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring, + struct netlink_ext_ack *extack) { + u32 tx_dpp, qc_min, qc_max, rxd_cnt, txd_cnt; struct nfp_net *nn = netdev_priv(netdev); - u32 rxd_cnt, txd_cnt; /* We don't have separate queues/rings for small/large frames. */ if (ring->rx_mini_pending || ring->rx_jumbo_pending) return -EINVAL; + qc_min = nn->dev_info->min_qc_size; + qc_max = nn->dev_info->max_qc_size; + tx_dpp = nn->dp.ops->tx_min_desc_per_pkt; /* Round up to supported values */ rxd_cnt = roundup_pow_of_two(ring->rx_pending); txd_cnt = roundup_pow_of_two(ring->tx_pending); - if (rxd_cnt < NFP_NET_MIN_RX_DESCS || rxd_cnt > NFP_NET_MAX_RX_DESCS || - txd_cnt < NFP_NET_MIN_TX_DESCS || txd_cnt > NFP_NET_MAX_TX_DESCS) + if (rxd_cnt < qc_min || rxd_cnt > qc_max || + txd_cnt < qc_min / tx_dpp || txd_cnt > qc_max / tx_dpp) return -EINVAL; if (nn->dp.rxd_cnt == rxd_cnt && nn->dp.txd_cnt == txd_cnt) @@ -432,6 +480,160 @@ static int nfp_net_set_ringparam(struct net_device *netdev, return nfp_net_set_ring_size(nn, rxd_cnt, txd_cnt); } +static int nfp_test_link(struct net_device *netdev) +{ + if (!netif_carrier_ok(netdev) || !(netdev->flags & IFF_UP)) + return 1; + + return 0; +} + +static int nfp_test_nsp(struct net_device *netdev) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + struct nfp_nsp_identify *nspi; + struct nfp_nsp *nsp; + int err; + + nsp = nfp_nsp_open(app->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + netdev_info(netdev, "NSP Test: failed to access the NSP: %d\n", err); + goto exit; + } + + if (nfp_nsp_get_abi_ver_minor(nsp) < 15) { + err = -EOPNOTSUPP; + goto exit_close_nsp; + } + + nspi = kzalloc(sizeof(*nspi), GFP_KERNEL); + if (!nspi) { + err = -ENOMEM; + goto exit_close_nsp; + } + + err = nfp_nsp_read_identify(nsp, nspi, sizeof(*nspi)); + if (err < 0) + netdev_info(netdev, "NSP Test: reading bsp version failed %d\n", err); + + kfree(nspi); +exit_close_nsp: + nfp_nsp_close(nsp); +exit: + return err; +} + +static int nfp_test_fw(struct net_device *netdev) +{ + struct nfp_net *nn = netdev_priv(netdev); + int err; + + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + netdev_info(netdev, "FW Test: update failed %d\n", err); + + return err; +} + +static int nfp_test_reg(struct net_device *netdev) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + struct nfp_cpp *cpp = app->cpp; + u32 model = nfp_cpp_model(cpp); + u32 value; + int err; + + err = nfp_cpp_model_autodetect(cpp, &value); + if (err < 0) { + netdev_info(netdev, "REG Test: NFP model detection failed %d\n", err); + return err; + } + + return (value == model) ? 0 : 1; +} + +static bool link_test_supported(struct net_device *netdev) +{ + return true; +} + +static bool nsp_test_supported(struct net_device *netdev) +{ + if (nfp_app_from_netdev(netdev)) + return true; + + return false; +} + +static bool fw_test_supported(struct net_device *netdev) +{ + if (nfp_netdev_is_nfp_net(netdev)) + return true; + + return false; +} + +static bool reg_test_supported(struct net_device *netdev) +{ + if (nfp_app_from_netdev(netdev)) + return true; + + return false; +} + +static struct nfp_self_test_item { + char name[ETH_GSTRING_LEN]; + bool (*is_supported)(struct net_device *dev); + int (*func)(struct net_device *dev); +} nfp_self_test[] = { + {"Link Test", link_test_supported, nfp_test_link}, + {"NSP Test", nsp_test_supported, nfp_test_nsp}, + {"Firmware Test", fw_test_supported, nfp_test_fw}, + {"Register Test", reg_test_supported, nfp_test_reg} +}; + +#define NFP_TEST_TOTAL_NUM ARRAY_SIZE(nfp_self_test) + +static void nfp_get_self_test_strings(struct net_device *netdev, u8 *data) +{ + int i; + + for (i = 0; i < NFP_TEST_TOTAL_NUM; i++) + if (nfp_self_test[i].is_supported(netdev)) + ethtool_sprintf(&data, nfp_self_test[i].name); +} + +static int nfp_get_self_test_count(struct net_device *netdev) +{ + int i, count = 0; + + for (i = 0; i < NFP_TEST_TOTAL_NUM; i++) + if (nfp_self_test[i].is_supported(netdev)) + count++; + + return count; +} + +static void nfp_net_self_test(struct net_device *netdev, struct ethtool_test *eth_test, + u64 *data) +{ + int i, ret, count = 0; + + netdev_info(netdev, "Start self test\n"); + + for (i = 0; i < NFP_TEST_TOTAL_NUM; i++) { + if (nfp_self_test[i].is_supported(netdev)) { + ret = nfp_self_test[i].func(netdev); + if (ret) + eth_test->flags |= ETH_TEST_FL_FAILED; + data[count++] = ret; + } + } + + netdev_info(netdev, "Test end\n"); +} + static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); @@ -484,7 +686,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) unsigned int start; do { - start = u64_stats_fetch_begin(&nn->r_vecs[i].rx_sync); + start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].rx_sync); data[0] = nn->r_vecs[i].rx_pkts; tmp[0] = nn->r_vecs[i].hw_csum_rx_ok; tmp[1] = nn->r_vecs[i].hw_csum_rx_inner_ok; @@ -492,10 +694,10 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[3] = nn->r_vecs[i].hw_csum_rx_error; tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail; tmp[5] = nn->r_vecs[i].hw_tls_rx; - } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start)); + } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].rx_sync, start)); do { - start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); + start = u64_stats_fetch_begin_irq(&nn->r_vecs[i].tx_sync); data[1] = nn->r_vecs[i].tx_pkts; data[2] = nn->r_vecs[i].tx_busy; tmp[6] = nn->r_vecs[i].hw_csum_tx; @@ -505,7 +707,7 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[10] = nn->r_vecs[i].hw_tls_tx; tmp[11] = nn->r_vecs[i].tls_tx_fallback; tmp[12] = nn->r_vecs[i].tls_tx_no_fallback; - } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); + } while (u64_stats_fetch_retry_irq(&nn->r_vecs[i].tx_sync, start)); data += NN_RVEC_PER_Q_STATS; @@ -695,6 +897,9 @@ static void nfp_net_get_strings(struct net_device *netdev, data = nfp_mac_get_stats_strings(netdev, data); data = nfp_app_port_get_stats_strings(nn->port, data); break; + case ETH_SS_TEST: + nfp_get_self_test_strings(netdev, data); + break; } } @@ -729,6 +934,8 @@ static int nfp_net_get_sset_count(struct net_device *netdev, int sset) cnt += nfp_mac_get_stats_count(netdev); cnt += nfp_app_port_get_stats_count(nn->port); return cnt; + case ETH_SS_TEST: + return nfp_get_self_test_count(netdev); default: return -EOPNOTSUPP; } @@ -747,6 +954,9 @@ static void nfp_port_get_strings(struct net_device *netdev, data = nfp_mac_get_stats_strings(netdev, data); data = nfp_app_port_get_stats_strings(port, data); break; + case ETH_SS_TEST: + nfp_get_self_test_strings(netdev, data); + break; } } @@ -776,6 +986,8 @@ static int nfp_port_get_sset_count(struct net_device *netdev, int sset) count = nfp_mac_get_stats_count(netdev); count += nfp_app_port_get_stats_count(port); return count; + case ETH_SS_TEST: + return nfp_get_self_test_count(netdev); default: return -EOPNOTSUPP; } @@ -833,7 +1045,7 @@ nfp_port_get_fecparam(struct net_device *netdev, return 0; param->fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec_modes_supported); - param->active_fec = nfp_port_fec_nsp_to_ethtool(eth_port->fec); + param->active_fec = nfp_port_fec_nsp_to_ethtool(BIT(eth_port->act_fec)); return 0; } @@ -1220,6 +1432,8 @@ nfp_port_get_module_info(struct net_device *netdev, u8 data; port = nfp_port_from_netdev(netdev); + /* update port state to get latest interface */ + set_bit(NFP_PORT_CHANGED, &port->flags); eth_port = nfp_port_get_eth_port(port); if (!eth_port) return -EOPNOTSUPP; @@ -1450,14 +1664,219 @@ static int nfp_net_set_channels(struct net_device *netdev, return nfp_net_set_num_rings(nn, total_rx, total_tx); } +static void nfp_port_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + eth_port = nfp_port_get_eth_port(port); + if (!eth_port) + return; + + /* Currently pause frame support is fixed */ + pause->autoneg = AUTONEG_DISABLE; + pause->rx_pause = 1; + pause->tx_pause = 1; +} + +static int nfp_net_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + /* Control LED to blink */ + err = nfp_eth_set_idmode(port->app->cpp, eth_port->index, 1); + break; + + case ETHTOOL_ID_INACTIVE: + /* Control LED to normal mode */ + err = nfp_eth_set_idmode(port->app->cpp, eth_port->index, 0); + break; + + case ETHTOOL_ID_ON: + case ETHTOOL_ID_OFF: + default: + return -EOPNOTSUPP; + } + + return err; +} + +#define NFP_EEPROM_LEN ETH_ALEN + +static int +nfp_net_get_eeprom_len(struct net_device *netdev) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return 0; + + return NFP_EEPROM_LEN; +} + +static int +nfp_net_get_nsp_hwindex(struct net_device *netdev, + struct nfp_nsp **nspptr, + u32 *index) +{ + struct nfp_eth_table_port *eth_port; + struct nfp_port *port; + struct nfp_nsp *nsp; + int err; + + port = nfp_port_from_netdev(netdev); + eth_port = __nfp_port_get_eth_port(port); + if (!eth_port) + return -EOPNOTSUPP; + + nsp = nfp_nsp_open(port->app->cpp); + if (IS_ERR(nsp)) { + err = PTR_ERR(nsp); + netdev_err(netdev, "Failed to access the NSP: %d\n", err); + return err; + } + + if (!nfp_nsp_has_hwinfo_lookup(nsp)) { + netdev_err(netdev, "NSP doesn't support PF MAC generation\n"); + nfp_nsp_close(nsp); + return -EOPNOTSUPP; + } + + *nspptr = nsp; + *index = eth_port->eth_index; + + return 0; +} + +static int +nfp_net_get_port_mac_by_hwinfo(struct net_device *netdev, + u8 *mac_addr) +{ + char hwinfo[32] = {}; + struct nfp_nsp *nsp; + u32 index; + int err; + + err = nfp_net_get_nsp_hwindex(netdev, &nsp, &index); + if (err) + return err; + + snprintf(hwinfo, sizeof(hwinfo), "eth%u.mac", index); + err = nfp_nsp_hwinfo_lookup(nsp, hwinfo, sizeof(hwinfo)); + nfp_nsp_close(nsp); + if (err) { + netdev_err(netdev, "Reading persistent MAC address failed: %d\n", + err); + return -EOPNOTSUPP; + } + + if (sscanf(hwinfo, "%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", + &mac_addr[0], &mac_addr[1], &mac_addr[2], + &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) { + netdev_err(netdev, "Can't parse persistent MAC address (%s)\n", + hwinfo); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +nfp_net_set_port_mac_by_hwinfo(struct net_device *netdev, + u8 *mac_addr) +{ + char hwinfo[32] = {}; + struct nfp_nsp *nsp; + u32 index; + int err; + + err = nfp_net_get_nsp_hwindex(netdev, &nsp, &index); + if (err) + return err; + + snprintf(hwinfo, sizeof(hwinfo), + "eth%u.mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx", + index, mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3], + mac_addr[4], mac_addr[5]); + + err = nfp_nsp_hwinfo_set(nsp, hwinfo, sizeof(hwinfo)); + nfp_nsp_close(nsp); + if (err) { + netdev_err(netdev, "HWinfo set failed: %d, hwinfo: %s\n", + err, hwinfo); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +nfp_net_get_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct nfp_net *nn = netdev_priv(netdev); + u8 buf[NFP_EEPROM_LEN] = {}; + + if (eeprom->len == 0) + return -EINVAL; + + if (nfp_net_get_port_mac_by_hwinfo(netdev, buf)) + return -EOPNOTSUPP; + + eeprom->magic = nn->pdev->vendor | (nn->pdev->device << 16); + memcpy(bytes, buf + eeprom->offset, eeprom->len); + + return 0; +} + +static int +nfp_net_set_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct nfp_net *nn = netdev_priv(netdev); + u8 buf[NFP_EEPROM_LEN] = {}; + + if (eeprom->len == 0) + return -EINVAL; + + if (eeprom->magic != (nn->pdev->vendor | nn->pdev->device << 16)) + return -EINVAL; + + if (nfp_net_get_port_mac_by_hwinfo(netdev, buf)) + return -EOPNOTSUPP; + + memcpy(buf + eeprom->offset, bytes, eeprom->len); + if (nfp_net_set_port_mac_by_hwinfo(netdev, buf)) + return -EOPNOTSUPP; + + return 0; +} + static const struct ethtool_ops nfp_net_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = nfp_net_get_drvinfo, + .nway_reset = nfp_net_nway_reset, .get_link = ethtool_op_get_link, .get_ringparam = nfp_net_get_ringparam, .set_ringparam = nfp_net_set_ringparam, + .self_test = nfp_net_self_test, .get_strings = nfp_net_get_strings, .get_ethtool_stats = nfp_net_get_stats, .get_sset_count = nfp_net_get_sset_count, @@ -1472,6 +1891,9 @@ static const struct ethtool_ops nfp_net_ethtool_ops = { .set_dump = nfp_app_set_dump, .get_dump_flag = nfp_app_get_dump_flag, .get_dump_data = nfp_app_get_dump_data, + .get_eeprom_len = nfp_net_get_eeprom_len, + .get_eeprom = nfp_net_get_eeprom, + .set_eeprom = nfp_net_set_eeprom, .get_module_info = nfp_port_get_module_info, .get_module_eeprom = nfp_port_get_module_eeprom, .get_coalesce = nfp_net_get_coalesce, @@ -1482,13 +1904,17 @@ static const struct ethtool_ops nfp_net_ethtool_ops = { .set_link_ksettings = nfp_net_set_link_ksettings, .get_fecparam = nfp_port_get_fecparam, .set_fecparam = nfp_port_set_fecparam, + .get_pauseparam = nfp_port_get_pauseparam, + .set_phys_id = nfp_net_set_phys_id, }; const struct ethtool_ops nfp_port_ethtool_ops = { .get_drvinfo = nfp_app_get_drvinfo, + .nway_reset = nfp_net_nway_reset, .get_link = ethtool_op_get_link, .get_strings = nfp_port_get_strings, .get_ethtool_stats = nfp_port_get_stats, + .self_test = nfp_net_self_test, .get_sset_count = nfp_port_get_sset_count, .set_dump = nfp_app_set_dump, .get_dump_flag = nfp_app_get_dump_flag, @@ -1499,6 +1925,8 @@ const struct ethtool_ops nfp_port_ethtool_ops = { .set_link_ksettings = nfp_net_set_link_ksettings, .get_fecparam = nfp_port_get_fecparam, .set_fecparam = nfp_port_set_fecparam, + .get_pauseparam = nfp_port_get_pauseparam, + .set_phys_id = nfp_net_set_phys_id, }; void nfp_net_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 751f76cd4f79..3bae92dc899e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -22,6 +22,7 @@ #include "nfpcore/nfp.h" #include "nfpcore/nfp_cpp.h" +#include "nfpcore/nfp_dev.h" #include "nfpcore/nfp_nffw.h" #include "nfpcore/nfp_nsp.h" #include "nfpcore/nfp6000_pcie.h" @@ -76,12 +77,6 @@ static int nfp_net_pf_get_num_ports(struct nfp_pf *pf) return nfp_pf_rtsym_read_optional(pf, "nfd_cfg_pf%u_num_ports", 1); } -static int nfp_net_pf_get_app_id(struct nfp_pf *pf) -{ - return nfp_pf_rtsym_read_optional(pf, "_pf%u_net_app_id", - NFP_APP_CORE_NIC); -} - static void nfp_net_pf_free_vnic(struct nfp_pf *pf, struct nfp_net *nn) { if (nfp_net_is_data_vnic(nn)) @@ -116,13 +111,12 @@ nfp_net_pf_alloc_vnic(struct nfp_pf *pf, bool needs_netdev, n_rx_rings = readl(ctrl_bar + NFP_NET_CFG_MAX_RXRINGS); /* Allocate and initialise the vNIC */ - nn = nfp_net_alloc(pf->pdev, ctrl_bar, needs_netdev, + nn = nfp_net_alloc(pf->pdev, pf->dev_info, ctrl_bar, needs_netdev, n_tx_rings, n_rx_rings); if (IS_ERR(nn)) return nn; nn->app = pf->app; - nfp_net_get_fw_version(&nn->fw_ver, ctrl_bar); nn->tx_bar = qc_bar + tx_base * NFP_QCP_QUEUE_ADDR_SZ; nn->rx_bar = qc_bar + rx_base * NFP_QCP_QUEUE_ADDR_SZ; nn->dp.is_vf = 0; @@ -202,6 +196,9 @@ nfp_net_pf_alloc_vnics(struct nfp_pf *pf, void __iomem *ctrl_bar, goto err_free_prev; } + if (nn->port) + nn->port->link_cb = nfp_net_refresh_port_table; + ctrl_bar += NFP_PF_CSR_SLICE_SIZE; /* Kill the vNIC if app init marked it as invalid */ @@ -307,6 +304,7 @@ err_prev_deinit: static int nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) { + struct devlink *devlink = priv_to_devlink(pf); u8 __iomem *ctrl_bar; int err; @@ -314,9 +312,9 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) if (IS_ERR(pf->app)) return PTR_ERR(pf->app); - mutex_lock(&pf->lock); + devl_lock(devlink); err = nfp_app_init(pf->app); - mutex_unlock(&pf->lock); + devl_unlock(devlink); if (err) goto err_free; @@ -343,9 +341,9 @@ nfp_net_pf_app_init(struct nfp_pf *pf, u8 __iomem *qc_bar, unsigned int stride) err_unmap: nfp_cpp_area_release_free(pf->ctrl_vnic_bar); err_app_clean: - mutex_lock(&pf->lock); + devl_lock(devlink); nfp_app_clean(pf->app); - mutex_unlock(&pf->lock); + devl_unlock(devlink); err_free: nfp_app_free(pf->app); pf->app = NULL; @@ -354,14 +352,16 @@ err_free: static void nfp_net_pf_app_clean(struct nfp_pf *pf) { + struct devlink *devlink = priv_to_devlink(pf); + if (pf->ctrl_vnic) { nfp_net_pf_free_vnic(pf, pf->ctrl_vnic); nfp_cpp_area_release_free(pf->ctrl_vnic_bar); } - mutex_lock(&pf->lock); + devl_lock(devlink); nfp_app_clean(pf->app); - mutex_unlock(&pf->lock); + devl_unlock(devlink); nfp_app_free(pf->app); pf->app = NULL; @@ -495,8 +495,9 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf) } cpp_id = NFP_CPP_ISLAND_ID(0, NFP_CPP_ACTION_RW, 0, 0); - mem = nfp_cpp_map_area(pf->cpp, "net.qc", cpp_id, NFP_PCIE_QUEUE(0), - NFP_QCP_QUEUE_AREA_SZ, &pf->qc_area); + mem = nfp_cpp_map_area(pf->cpp, "net.qc", cpp_id, + nfp_qcp_queue_offset(pf->dev_info, 0), + pf->dev_info->qc_area_sz, &pf->qc_area); if (IS_ERR(mem)) { nfp_err(pf->cpp, "Failed to map Queue Controller area.\n"); err = PTR_ERR(mem); @@ -519,6 +520,57 @@ err_unmap_ctrl: return err; } +static const unsigned int lr_to_speed[] = { + [NFP_NET_CFG_STS_LINK_RATE_UNSUPPORTED] = 0, + [NFP_NET_CFG_STS_LINK_RATE_UNKNOWN] = SPEED_UNKNOWN, + [NFP_NET_CFG_STS_LINK_RATE_1G] = SPEED_1000, + [NFP_NET_CFG_STS_LINK_RATE_10G] = SPEED_10000, + [NFP_NET_CFG_STS_LINK_RATE_25G] = SPEED_25000, + [NFP_NET_CFG_STS_LINK_RATE_40G] = SPEED_40000, + [NFP_NET_CFG_STS_LINK_RATE_50G] = SPEED_50000, + [NFP_NET_CFG_STS_LINK_RATE_100G] = SPEED_100000, +}; + +unsigned int nfp_net_lr2speed(unsigned int linkrate) +{ + if (linkrate < ARRAY_SIZE(lr_to_speed)) + return lr_to_speed[linkrate]; + + return SPEED_UNKNOWN; +} + +unsigned int nfp_net_speed2lr(unsigned int speed) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(lr_to_speed); i++) { + if (speed == lr_to_speed[i]) + return i; + } + + return NFP_NET_CFG_STS_LINK_RATE_UNKNOWN; +} + +static void nfp_net_notify_port_speed(struct nfp_port *port) +{ + struct net_device *netdev = port->netdev; + struct nfp_net *nn; + u16 sts; + + if (!nfp_netdev_is_nfp_net(netdev)) + return; + + nn = netdev_priv(netdev); + sts = nn_readw(nn, NFP_NET_CFG_STS); + + if (!(sts & NFP_NET_CFG_STS_LINK)) { + nn_writew(nn, NFP_NET_CFG_STS_NSP_LINK_RATE, NFP_NET_CFG_STS_LINK_RATE_UNKNOWN); + return; + } + + nn_writew(nn, NFP_NET_CFG_STS_NSP_LINK_RATE, nfp_net_speed2lr(port->eth_port->speed)); +} + static int nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port, struct nfp_eth_table *eth_table) @@ -540,18 +592,20 @@ nfp_net_eth_port_update(struct nfp_cpp *cpp, struct nfp_port *port, } memcpy(port->eth_port, eth_port, sizeof(*eth_port)); + nfp_net_notify_port_speed(port); return 0; } int nfp_net_refresh_port_table_sync(struct nfp_pf *pf) { + struct devlink *devlink = priv_to_devlink(pf); struct nfp_eth_table *eth_table; struct nfp_net *nn, *next; struct nfp_port *port; int err; - lockdep_assert_held(&pf->lock); + devl_assert_locked(devlink); /* Check for nfp_net_pci_remove() racing against us */ if (list_empty(&pf->vnics)) @@ -600,10 +654,11 @@ static void nfp_net_refresh_vnics(struct work_struct *work) { struct nfp_pf *pf = container_of(work, struct nfp_pf, port_refresh_work); + struct devlink *devlink = priv_to_devlink(pf); - mutex_lock(&pf->lock); + devl_lock(devlink); nfp_net_refresh_port_table_sync(pf); - mutex_unlock(&pf->lock); + devl_unlock(devlink); } void nfp_net_refresh_port_table(struct nfp_port *port) @@ -672,9 +727,11 @@ int nfp_net_pci_probe(struct nfp_pf *pf) } nfp_net_get_fw_version(&fw_ver, ctrl_bar); - if (fw_ver.resv || fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { + if (fw_ver.extend & NFP_NET_CFG_VERSION_RESERVED_MASK || + fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { nfp_err(pf->cpp, "Unknown Firmware ABI %d.%d.%d.%d\n", - fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor); + fw_ver.extend, fw_ver.class, + fw_ver.major, fw_ver.minor); err = -EINVAL; goto err_unmap; } @@ -690,7 +747,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) break; default: nfp_err(pf->cpp, "Unsupported Firmware ABI %d.%d.%d.%d\n", - fw_ver.resv, fw_ver.class, + fw_ver.extend, fw_ver.class, fw_ver.major, fw_ver.minor); err = -EINVAL; goto err_unmap; @@ -709,7 +766,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (err) goto err_shared_buf_unreg; - mutex_lock(&pf->lock); + devl_lock(devlink); pf->ddir = nfp_net_debugfs_device_add(pf->pdev); /* Allocate the vnics and do basic init */ @@ -729,7 +786,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (err) goto err_stop_app; - mutex_unlock(&pf->lock); + devl_unlock(devlink); devlink_register(devlink); return 0; @@ -742,7 +799,7 @@ err_free_vnics: nfp_net_pf_free_vnics(pf); err_clean_ddir: nfp_net_debugfs_dir_clean(&pf->ddir); - mutex_unlock(&pf->lock); + devl_unlock(devlink); nfp_devlink_params_unregister(pf); err_shared_buf_unreg: nfp_shared_buf_unregister(pf); @@ -756,10 +813,11 @@ err_unmap: void nfp_net_pci_remove(struct nfp_pf *pf) { + struct devlink *devlink = priv_to_devlink(pf); struct nfp_net *nn, *next; devlink_unregister(priv_to_devlink(pf)); - mutex_lock(&pf->lock); + devl_lock(devlink); list_for_each_entry_safe(nn, next, &pf->vnics, vnic_list) { if (!nfp_net_is_data_vnic(nn)) continue; @@ -771,7 +829,7 @@ void nfp_net_pci_remove(struct nfp_pf *pf) /* stop app first, to avoid double free of ctrl vNIC's ddir */ nfp_net_debugfs_dir_clean(&pf->ddir); - mutex_unlock(&pf->lock); + devl_unlock(devlink); nfp_devlink_params_unregister(pf); nfp_shared_buf_unregister(pf); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index 369f6ae700c7..8b77582bdfa0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -20,7 +20,7 @@ struct net_device * nfp_repr_get_locked(struct nfp_app *app, struct nfp_reprs *set, unsigned int id) { return rcu_dereference_protected(set->reprs[id], - lockdep_is_held(&app->pf->lock)); + nfp_app_is_locked(app)); } static void @@ -286,8 +286,7 @@ nfp_repr_transfer_features(struct net_device *netdev, struct net_device *lower) if (repr->dst->u.port_info.lower_dev != lower) return; - netdev->gso_max_size = lower->gso_max_size; - netdev->gso_max_segs = lower->gso_max_segs; + netif_inherit_tso_max(netdev, lower); netdev_update_features(netdev); } @@ -366,9 +365,9 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, netdev->vlan_features = netdev->hw_features; - if (repr_cap & NFP_NET_CFG_CTRL_RXVLAN) + if (repr_cap & NFP_NET_CFG_CTRL_RXVLAN_ANY) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; - if (repr_cap & NFP_NET_CFG_CTRL_TXVLAN) { + if (repr_cap & NFP_NET_CFG_CTRL_TXVLAN_ANY) { if (repr_cap & NFP_NET_CFG_CTRL_LSO2) netdev_warn(netdev, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n"); else @@ -376,12 +375,16 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev, } if (repr_cap & NFP_NET_CFG_CTRL_CTAG_FILTER) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + if (repr_cap & NFP_NET_CFG_CTRL_RXQINQ) + netdev->hw_features |= NETIF_F_HW_VLAN_STAG_RX; netdev->features = netdev->hw_features; - /* Advertise but disable TSO by default. */ - netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); - netdev->gso_max_segs = NFP_NET_LSO_MAX_SEGS; + /* C-Tag strip and S-Tag strip can't be supported simultaneously, + * so enable C-Tag strip and disable S-Tag strip by default. + */ + netdev->features &= ~NETIF_F_HW_VLAN_STAG_RX; + netif_set_tso_max_segs(netdev, NFP_NET_LSO_MAX_SEGS); netdev->priv_flags |= IFF_NO_QUEUE | IFF_DISABLE_NETPOLL; netdev->features |= NETIF_F_LLTX; @@ -476,7 +479,7 @@ nfp_reprs_clean_and_free_by_type(struct nfp_app *app, enum nfp_repr_type type) int i; reprs = rcu_dereference_protected(app->reprs[type], - lockdep_is_held(&app->pf->lock)); + nfp_app_is_locked(app)); if (!reprs) return; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c index 3fdaaf8ed2ba..6eeeb0fda91f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.c @@ -15,7 +15,7 @@ #include "nfp_net_sriov.h" static int -nfp_net_sriov_check(struct nfp_app *app, int vf, u16 cap, const char *msg) +nfp_net_sriov_check(struct nfp_app *app, int vf, u16 cap, const char *msg, bool warn) { u16 cap_vf; @@ -24,12 +24,14 @@ nfp_net_sriov_check(struct nfp_app *app, int vf, u16 cap, const char *msg) cap_vf = readw(app->pf->vfcfg_tbl2 + NFP_NET_VF_CFG_MB_CAP); if ((cap_vf & cap) != cap) { - nfp_warn(app->pf->cpp, "ndo_set_vf_%s not supported\n", msg); + if (warn) + nfp_warn(app->pf->cpp, "ndo_set_vf_%s not supported\n", msg); return -EOPNOTSUPP; } if (vf < 0 || vf >= app->pf->num_vfs) { - nfp_warn(app->pf->cpp, "invalid VF id %d\n", vf); + if (warn) + nfp_warn(app->pf->cpp, "invalid VF id %d\n", vf); return -EINVAL; } @@ -65,7 +67,7 @@ int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) unsigned int vf_offset; int err; - err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_MAC, "mac"); + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_MAC, "mac", true); if (err) return err; @@ -95,15 +97,17 @@ int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) { struct nfp_app *app = nfp_app_from_netdev(netdev); + u16 update = NFP_NET_VF_CFG_MB_UPD_VLAN; + bool is_proto_sup = true; unsigned int vf_offset; - u16 vlan_tci; + u32 vlan_tag; int err; - err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN, "vlan"); + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN, "vlan", true); if (err) return err; - if (vlan_proto != htons(ETH_P_8021Q)) + if (!eth_type_vlan(vlan_proto)) return -EOPNOTSUPP; if (vlan > 4095 || qos > 7) { @@ -112,14 +116,63 @@ int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, return -EINVAL; } + /* Check if fw supports or not */ + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO, "vlan_proto", true); + if (err) + is_proto_sup = false; + + if (vlan_proto != htons(ETH_P_8021Q)) { + if (!is_proto_sup) + return -EOPNOTSUPP; + update |= NFP_NET_VF_CFG_MB_UPD_VLAN_PROTO; + } + /* Write VLAN tag to VF entry in VF config symbol */ - vlan_tci = FIELD_PREP(NFP_NET_VF_CFG_VLAN_VID, vlan) | + vlan_tag = FIELD_PREP(NFP_NET_VF_CFG_VLAN_VID, vlan) | FIELD_PREP(NFP_NET_VF_CFG_VLAN_QOS, qos); + + /* vlan_tag of 0 means that the configuration should be cleared and in + * such circumstances setting the TPID has no meaning when + * configuring firmware. + */ + if (vlan_tag && is_proto_sup) + vlan_tag |= FIELD_PREP(NFP_NET_VF_CFG_VLAN_PROT, ntohs(vlan_proto)); + vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ; - writew(vlan_tci, app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_VLAN); + writel(vlan_tag, app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_VLAN); - return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_VLAN, - "vlan"); + return nfp_net_sriov_update(app, vf, update, "vlan"); +} + +int nfp_app_set_vf_rate(struct net_device *netdev, int vf, + int min_tx_rate, int max_tx_rate) +{ + struct nfp_app *app = nfp_app_from_netdev(netdev); + u32 vf_offset, ratevalue; + int err; + + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_RATE, "rate", true); + if (err) + return err; + + if (max_tx_rate >= NFP_NET_VF_RATE_MAX || + min_tx_rate >= NFP_NET_VF_RATE_MAX) { + nfp_warn(app->cpp, "tx-rate exceeds %d.\n", + NFP_NET_VF_RATE_MAX); + return -EINVAL; + } + + vf_offset = NFP_NET_VF_CFG_MB_SZ + vf * NFP_NET_VF_CFG_SZ; + ratevalue = FIELD_PREP(NFP_NET_VF_CFG_MAX_RATE, + max_tx_rate ? max_tx_rate : + NFP_NET_VF_RATE_MAX) | + FIELD_PREP(NFP_NET_VF_CFG_MIN_RATE, min_tx_rate); + + writel(ratevalue, + app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_RATE); + + return nfp_net_sriov_update(app, vf, NFP_NET_VF_CFG_MB_UPD_RATE, + "rate"); } int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable) @@ -130,7 +183,7 @@ int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable) int err; err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_SPOOF, - "spoofchk"); + "spoofchk", true); if (err) return err; @@ -154,7 +207,7 @@ int nfp_app_set_vf_trust(struct net_device *netdev, int vf, bool enable) int err; err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_TRUST, - "trust"); + "trust", true); if (err) return err; @@ -179,7 +232,7 @@ int nfp_app_set_vf_link_state(struct net_device *netdev, int vf, int err; err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_LINK_STATE, - "link_state"); + "link_state", true); if (err) return err; @@ -208,14 +261,13 @@ int nfp_app_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi) { struct nfp_app *app = nfp_app_from_netdev(netdev); - unsigned int vf_offset; - u16 vlan_tci; - u32 mac_hi; + u32 vf_offset, mac_hi, rate; + u32 vlan_tag; u16 mac_lo; u8 flags; int err; - err = nfp_net_sriov_check(app, vf, 0, ""); + err = nfp_net_sriov_check(app, vf, 0, "", true); if (err) return err; @@ -225,7 +277,7 @@ int nfp_app_get_vf_config(struct net_device *netdev, int vf, mac_lo = readw(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_MAC_LO); flags = readb(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_CTRL); - vlan_tci = readw(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_VLAN); + vlan_tag = readl(app->pf->vfcfg_tbl2 + vf_offset + NFP_NET_VF_CFG_VLAN); memset(ivi, 0, sizeof(*ivi)); ivi->vf = vf; @@ -233,12 +285,27 @@ int nfp_app_get_vf_config(struct net_device *netdev, int vf, put_unaligned_be32(mac_hi, &ivi->mac[0]); put_unaligned_be16(mac_lo, &ivi->mac[4]); - ivi->vlan = FIELD_GET(NFP_NET_VF_CFG_VLAN_VID, vlan_tci); - ivi->qos = FIELD_GET(NFP_NET_VF_CFG_VLAN_QOS, vlan_tci); - + ivi->vlan = FIELD_GET(NFP_NET_VF_CFG_VLAN_VID, vlan_tag); + ivi->qos = FIELD_GET(NFP_NET_VF_CFG_VLAN_QOS, vlan_tag); + if (!nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO, "vlan_proto", false)) + ivi->vlan_proto = htons(FIELD_GET(NFP_NET_VF_CFG_VLAN_PROT, vlan_tag)); ivi->spoofchk = FIELD_GET(NFP_NET_VF_CFG_CTRL_SPOOF, flags); ivi->trusted = FIELD_GET(NFP_NET_VF_CFG_CTRL_TRUST, flags); ivi->linkstate = FIELD_GET(NFP_NET_VF_CFG_CTRL_LINK_STATE, flags); + err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_RATE, "rate", false); + if (!err) { + rate = readl(app->pf->vfcfg_tbl2 + vf_offset + + NFP_NET_VF_CFG_RATE); + + ivi->max_tx_rate = FIELD_GET(NFP_NET_VF_CFG_MAX_RATE, rate); + ivi->min_tx_rate = FIELD_GET(NFP_NET_VF_CFG_MIN_RATE, rate); + + if (ivi->max_tx_rate == NFP_NET_VF_RATE_MAX) + ivi->max_tx_rate = 0; + if (ivi->min_tx_rate == NFP_NET_VF_RATE_MAX) + ivi->min_tx_rate = 0; + } + return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h index a3db0cbf6425..2d445fa199dc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_sriov.h @@ -4,8 +4,7 @@ #ifndef _NFP_NET_SRIOV_H_ #define _NFP_NET_SRIOV_H_ -/** - * SRIOV VF configuration. +/* SRIOV VF configuration. * The configuration memory begins with a mailbox region for communication with * the firmware followed by individual VF entries. */ @@ -20,6 +19,8 @@ #define NFP_NET_VF_CFG_MB_CAP_SPOOF (0x1 << 2) #define NFP_NET_VF_CFG_MB_CAP_LINK_STATE (0x1 << 3) #define NFP_NET_VF_CFG_MB_CAP_TRUST (0x1 << 4) +#define NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO (0x1 << 5) +#define NFP_NET_VF_CFG_MB_CAP_RATE (0x1 << 6) #define NFP_NET_VF_CFG_MB_RET 0x2 #define NFP_NET_VF_CFG_MB_UPD 0x4 #define NFP_NET_VF_CFG_MB_UPD_MAC (0x1 << 0) @@ -27,6 +28,8 @@ #define NFP_NET_VF_CFG_MB_UPD_SPOOF (0x1 << 2) #define NFP_NET_VF_CFG_MB_UPD_LINK_STATE (0x1 << 3) #define NFP_NET_VF_CFG_MB_UPD_TRUST (0x1 << 4) +#define NFP_NET_VF_CFG_MB_UPD_VLAN_PROTO (0x1 << 5) +#define NFP_NET_VF_CFG_MB_UPD_RATE (0x1 << 6) #define NFP_NET_VF_CFG_MB_VF_NUM 0x7 /* VF config entry @@ -44,12 +47,20 @@ #define NFP_NET_VF_CFG_LS_MODE_ENABLE 1 #define NFP_NET_VF_CFG_LS_MODE_DISABLE 2 #define NFP_NET_VF_CFG_VLAN 0x8 +#define NFP_NET_VF_CFG_VLAN_PROT 0xffff0000 #define NFP_NET_VF_CFG_VLAN_QOS 0xe000 #define NFP_NET_VF_CFG_VLAN_VID 0x0fff +#define NFP_NET_VF_CFG_RATE 0xc +#define NFP_NET_VF_CFG_MIN_RATE 0x0000ffff +#define NFP_NET_VF_CFG_MAX_RATE 0xffff0000 + +#define NFP_NET_VF_RATE_MAX 0xffff int nfp_app_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); int nfp_app_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, __be16 vlan_proto); +int nfp_app_set_vf_rate(struct net_device *netdev, int vf, int min_tx_rate, + int max_tx_rate); int nfp_app_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); int nfp_app_set_vf_trust(struct net_device *netdev, int vf, bool setting); int nfp_app_set_vf_link_state(struct net_device *netdev, int vf, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c new file mode 100644 index 000000000000..aea507aed49d --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#include <linux/dma-direction.h> +#include <linux/dma-mapping.h> +#include <linux/slab.h> +#include <net/xdp_sock_drv.h> +#include <trace/events/xdp.h> + +#include "nfp_app.h" +#include "nfp_net.h" +#include "nfp_net_dp.h" +#include "nfp_net_xsk.h" + +static void +nfp_net_xsk_rx_bufs_stash(struct nfp_net_rx_ring *rx_ring, unsigned int idx, + struct xdp_buff *xdp) +{ + unsigned int headroom; + + headroom = xsk_pool_get_headroom(rx_ring->r_vec->xsk_pool); + + rx_ring->rxds[idx].fld.reserved = 0; + rx_ring->rxds[idx].fld.meta_len_dd = 0; + + rx_ring->xsk_rxbufs[idx].xdp = xdp; + rx_ring->xsk_rxbufs[idx].dma_addr = + xsk_buff_xdp_get_frame_dma(xdp) + headroom; +} + +void nfp_net_xsk_rx_unstash(struct nfp_net_xsk_rx_buf *rxbuf) +{ + rxbuf->dma_addr = 0; + rxbuf->xdp = NULL; +} + +void nfp_net_xsk_rx_free(struct nfp_net_xsk_rx_buf *rxbuf) +{ + if (rxbuf->xdp) + xsk_buff_free(rxbuf->xdp); + + nfp_net_xsk_rx_unstash(rxbuf); +} + +void nfp_net_xsk_rx_bufs_free(struct nfp_net_rx_ring *rx_ring) +{ + unsigned int i; + + if (!rx_ring->cnt) + return; + + for (i = 0; i < rx_ring->cnt - 1; i++) + nfp_net_xsk_rx_free(&rx_ring->xsk_rxbufs[i]); +} + +void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring) +{ + struct nfp_net_r_vector *r_vec = rx_ring->r_vec; + struct xsk_buff_pool *pool = r_vec->xsk_pool; + unsigned int wr_idx, wr_ptr_add = 0; + struct xdp_buff *xdp; + + while (nfp_net_rx_space(rx_ring)) { + wr_idx = D_IDX(rx_ring, rx_ring->wr_p); + + xdp = xsk_buff_alloc(pool); + if (!xdp) + break; + + nfp_net_xsk_rx_bufs_stash(rx_ring, wr_idx, xdp); + + /* DMA address is expanded to 48-bit width in freelist for NFP3800, + * so the *_48b macro is used accordingly, it's also OK to fill + * a 40-bit address since the top 8 bits are get set to 0. + */ + nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, + rx_ring->xsk_rxbufs[wr_idx].dma_addr); + + rx_ring->wr_p++; + wr_ptr_add++; + } + + /* Ensure all records are visible before incrementing write counter. */ + wmb(); + nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, wr_ptr_add); +} + +void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec, + struct nfp_net_xsk_rx_buf *xrxbuf) +{ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + nfp_net_xsk_rx_free(xrxbuf); +} + +static void nfp_net_xsk_pool_unmap(struct device *dev, + struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_unmap(pool, 0); +} + +static int nfp_net_xsk_pool_map(struct device *dev, struct xsk_buff_pool *pool) +{ + return xsk_pool_dma_map(pool, dev, 0); +} + +int nfp_net_xsk_setup_pool(struct net_device *netdev, + struct xsk_buff_pool *pool, u16 queue_id) +{ + struct nfp_net *nn = netdev_priv(netdev); + + struct xsk_buff_pool *prev_pool; + struct nfp_net_dp *dp; + int err; + + /* NFDK doesn't implement xsk yet. */ + if (nn->dp.ops->version == NFP_NFD_VER_NFDK) + return -EOPNOTSUPP; + + /* Reject on old FWs so we can drop some checks on datapath. */ + if (nn->dp.rx_offset != NFP_NET_CFG_RX_OFFSET_DYNAMIC) + return -EOPNOTSUPP; + if (!nn->dp.chained_metadata_format) + return -EOPNOTSUPP; + + /* Install */ + if (pool) { + err = nfp_net_xsk_pool_map(nn->dp.dev, pool); + if (err) + return err; + } + + /* Reconfig/swap */ + dp = nfp_net_clone_dp(nn); + if (!dp) { + err = -ENOMEM; + goto err_unmap; + } + + prev_pool = dp->xsk_pools[queue_id]; + dp->xsk_pools[queue_id] = pool; + + err = nfp_net_ring_reconfig(nn, dp, NULL); + if (err) + goto err_unmap; + + /* Uninstall */ + if (prev_pool) + nfp_net_xsk_pool_unmap(nn->dp.dev, prev_pool); + + return 0; +err_unmap: + if (pool) + nfp_net_xsk_pool_unmap(nn->dp.dev, pool); + + return err; +} + +int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags) +{ + struct nfp_net *nn = netdev_priv(netdev); + + /* queue_id comes from a zero-copy socket, installed with XDP_SETUP_XSK_POOL, + * so it must be within our vector range. Moreover, our napi structs + * are statically allocated, so we can always kick them without worrying + * if reconfig is in progress or interface down. + */ + napi_schedule(&nn->r_vecs[queue_id].napi); + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h new file mode 100644 index 000000000000..6d281eb2fc1c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_xsk.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2018 Netronome Systems, Inc */ +/* Copyright (C) 2021 Corigine, Inc */ + +#ifndef _NFP_XSK_H_ +#define _NFP_XSK_H_ + +#include <net/xdp_sock_drv.h> + +#define NFP_NET_XSK_TX_BATCH 16 /* XSK TX transmission batch size. */ + +static inline bool nfp_net_has_xsk_pool_slow(struct nfp_net_dp *dp, + unsigned int qid) +{ + return dp->xdp_prog && dp->xsk_pools[qid]; +} + +static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring) +{ + return rx_ring->cnt - rx_ring->wr_p + rx_ring->rd_p - 1; +} + +static inline int nfp_net_tx_space(struct nfp_net_tx_ring *tx_ring) +{ + return tx_ring->cnt - tx_ring->wr_p + tx_ring->rd_p - 1; +} + +void nfp_net_xsk_rx_unstash(struct nfp_net_xsk_rx_buf *rxbuf); +void nfp_net_xsk_rx_free(struct nfp_net_xsk_rx_buf *rxbuf); +void nfp_net_xsk_rx_drop(struct nfp_net_r_vector *r_vec, + struct nfp_net_xsk_rx_buf *xrxbuf); +int nfp_net_xsk_setup_pool(struct net_device *netdev, struct xsk_buff_pool *pool, + u16 queue_id); + +void nfp_net_xsk_rx_bufs_free(struct nfp_net_rx_ring *rx_ring); + +void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring); + +int nfp_net_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); + +#endif /* _NFP_XSK_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index 87f2268b16d6..e19bb0150cb5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/etherdevice.h> +#include "nfpcore/nfp_dev.h" #include "nfp_net_ctrl.h" #include "nfp_net.h" #include "nfp_main.h" @@ -36,11 +37,22 @@ struct nfp_net_vf { static const char nfp_net_driver_name[] = "nfp_netvf"; -#define PCI_DEVICE_NFP6000VF 0x6003 static const struct pci_device_id nfp_netvf_pci_device_ids[] = { - { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_NFP6000VF, + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP3800_VF, PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, - PCI_ANY_ID, 0, + PCI_ANY_ID, 0, NFP_DEV_NFP3800_VF, + }, + { PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NFP6000_VF, + PCI_VENDOR_ID_NETRONOME, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000_VF, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP3800_VF, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP3800_VF, + }, + { PCI_VENDOR_ID_CORIGINE, PCI_DEVICE_ID_NFP6000_VF, + PCI_VENDOR_ID_CORIGINE, PCI_ANY_ID, + PCI_ANY_ID, 0, NFP_DEV_NFP6000_VF, }, { 0, } /* Required last entry. */ }; @@ -65,6 +77,7 @@ static void nfp_netvf_get_mac_addr(struct nfp_net *nn) static int nfp_netvf_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id) { + const struct nfp_dev_info *dev_info; struct nfp_net_fw_version fw_ver; int max_tx_rings, max_rx_rings; u32 tx_bar_off, rx_bar_off; @@ -78,6 +91,8 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, int stride; int err; + dev_info = &nfp_dev_info[pci_id->driver_data]; + vf = kzalloc(sizeof(*vf), GFP_KERNEL); if (!vf) return -ENOMEM; @@ -95,8 +110,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, pci_set_master(pdev); - err = dma_set_mask_and_coherent(&pdev->dev, - DMA_BIT_MASK(NFP_NET_MAX_DMA_BITS)); + err = dma_set_mask_and_coherent(&pdev->dev, dev_info->dma_mask); if (err) goto err_pci_regions; @@ -116,9 +130,11 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, } nfp_net_get_fw_version(&fw_ver, ctrl_bar); - if (fw_ver.resv || fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { + if (fw_ver.extend & NFP_NET_CFG_VERSION_RESERVED_MASK || + fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) { dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n", - fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor); + fw_ver.extend, fw_ver.class, + fw_ver.major, fw_ver.minor); err = -EINVAL; goto err_ctrl_unmap; } @@ -138,7 +154,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, break; default: dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n", - fw_ver.resv, fw_ver.class, + fw_ver.extend, fw_ver.class, fw_ver.major, fw_ver.minor); err = -EINVAL; goto err_ctrl_unmap; @@ -167,19 +183,19 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, } startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ); - tx_bar_off = NFP_PCIE_QUEUE(startq); + tx_bar_off = nfp_qcp_queue_offset(dev_info, startq); startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ); - rx_bar_off = NFP_PCIE_QUEUE(startq); + rx_bar_off = nfp_qcp_queue_offset(dev_info, startq); /* Allocate and initialise the netdev */ - nn = nfp_net_alloc(pdev, ctrl_bar, true, max_tx_rings, max_rx_rings); + nn = nfp_net_alloc(pdev, dev_info, ctrl_bar, true, + max_tx_rings, max_rx_rings); if (IS_ERR(nn)) { err = PTR_ERR(nn); goto err_ctrl_unmap; } vf->nn = nn; - nn->fw_ver = fw_ver; nn->dp.is_vf = 1; nn->stride_tx = stride; nn->stride_rx = stride; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c index 93c5bfc0510b..4f2308570dcf 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c @@ -75,23 +75,6 @@ int nfp_port_set_features(struct net_device *netdev, netdev_features_t features) return 0; } -struct nfp_port * -nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id) -{ - struct nfp_port *port; - - lockdep_assert_held(&pf->lock); - - if (type != NFP_PORT_PHYS_PORT) - return NULL; - - list_for_each_entry(port, &pf->ports, port_list) - if (port->eth_id == id) - return port; - - return NULL; -} - struct nfp_eth_table_port *__nfp_port_get_eth_port(struct nfp_port *port) { if (!port) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h index ae4da189d955..6793cdf9ff11 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_port.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h @@ -46,6 +46,7 @@ enum nfp_port_flags { * @tc_offload_cnt: number of active TC offloads, how offloads are counted * is not defined, use as a boolean * @app: backpointer to the app structure + * @link_cb: callback when link status changed * @dl_port: devlink port structure * @eth_id: for %NFP_PORT_PHYS_PORT port ID in NFP enumeration scheme * @eth_forced: for %NFP_PORT_PHYS_PORT port is forced UP or DOWN, don't change @@ -66,6 +67,7 @@ struct nfp_port { unsigned long tc_offload_cnt; struct nfp_app *app; + void (*link_cb)(struct nfp_port *port); struct devlink_port dl_port; @@ -106,8 +108,6 @@ nfp_port_set_features(struct net_device *netdev, netdev_features_t features); struct nfp_port *nfp_port_from_netdev(struct net_device *netdev); int nfp_port_get_port_parent_id(struct net_device *netdev, struct netdev_phys_item_id *ppid); -struct nfp_port * -nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id); struct nfp_eth_table_port *__nfp_port_get_eth_port(struct nfp_port *port); struct nfp_eth_table_port *nfp_port_get_eth_port(struct nfp_port *port); @@ -132,8 +132,7 @@ void nfp_devlink_port_unregister(struct nfp_port *port); void nfp_devlink_port_type_eth_set(struct nfp_port *port); void nfp_devlink_port_type_clear(struct nfp_port *port); -/** - * Mac stats (0x0000 - 0x0200) +/* Mac stats (0x0000 - 0x0200) * all counters are 64bit. */ #define NFP_MAC_STATS_BASE 0x0000 diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h index afab6f0fc564..6ad43c7cefe6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/crc32.h @@ -4,7 +4,6 @@ #ifndef NFP_CRC32_H #define NFP_CRC32_H -#include <linux/kernel.h> #include <linux/crc32.h> /** diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index 252fe06f58aa..33b4c2856316 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -28,6 +28,7 @@ #include <linux/pci.h> #include "nfp_cpp.h" +#include "nfp_dev.h" #include "nfp6000/nfp6000.h" @@ -100,11 +101,7 @@ #define NFP_PCIE_P2C_GENERAL_TOKEN_OFFSET(bar, x) ((x) << ((bar)->bitsize - 4)) #define NFP_PCIE_P2C_GENERAL_SIZE(bar) (1 << ((bar)->bitsize - 4)) -#define NFP_PCIE_CFG_BAR_PCIETOCPPEXPANSIONBAR(bar, slot) \ - (0x400 + ((bar) * 8 + (slot)) * 4) - -#define NFP_PCIE_CPP_BAR_PCIETOCPPEXPANSIONBAR(bar, slot) \ - (((bar) * 8 + (slot)) * 4) +#define NFP_PCIE_P2C_EXPBAR_OFFSET(bar_index) ((bar_index) * 4) /* The number of explicit BARs to reserve. * Minimum is 0, maximum is 4 on the NFP6000. @@ -145,6 +142,7 @@ struct nfp_bar { struct nfp6000_pcie { struct pci_dev *pdev; struct device *dev; + const struct nfp_dev_info *dev_info; /* PCI BAR management */ spinlock_t bar_lock; /* Protect the PCI2CPP BAR cache */ @@ -269,19 +267,16 @@ compute_bar(const struct nfp6000_pcie *nfp, const struct nfp_bar *bar, static int nfp6000_bar_write(struct nfp6000_pcie *nfp, struct nfp_bar *bar, u32 newcfg) { - int base, slot; - int xbar; + unsigned int xbar; - base = bar->index >> 3; - slot = bar->index & 7; + xbar = NFP_PCIE_P2C_EXPBAR_OFFSET(bar->index); if (nfp->iomem.csr) { - xbar = NFP_PCIE_CPP_BAR_PCIETOCPPEXPANSIONBAR(base, slot); writel(newcfg, nfp->iomem.csr + xbar); /* Readback to ensure BAR is flushed */ readl(nfp->iomem.csr + xbar); } else { - xbar = NFP_PCIE_CFG_BAR_PCIETOCPPEXPANSIONBAR(base, slot); + xbar += nfp->dev_info->pcie_cfg_expbar_offset; pci_write_config_dword(nfp->pdev, xbar, newcfg); } @@ -622,16 +617,17 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) nfp6000_bar_write(nfp, bar, barcfg_msix_general); - nfp->expl.data = bar->iomem + NFP_PCIE_SRAM + 0x1000; + nfp->expl.data = bar->iomem + NFP_PCIE_SRAM + + nfp->dev_info->pcie_expl_offset; switch (nfp->pdev->device) { - case PCI_DEVICE_ID_NETRONOME_NFP3800: + case PCI_DEVICE_ID_NFP3800: pf = nfp->pdev->devfn & 7; nfp->iomem.csr = bar->iomem + NFP_PCIE_BAR(pf); break; - case PCI_DEVICE_ID_NETRONOME_NFP4000: - case PCI_DEVICE_ID_NETRONOME_NFP5000: - case PCI_DEVICE_ID_NETRONOME_NFP6000: + case PCI_DEVICE_ID_NFP4000: + case PCI_DEVICE_ID_NFP5000: + case PCI_DEVICE_ID_NFP6000: nfp->iomem.csr = bar->iomem + NFP_PCIE_BAR(0); break; default: @@ -644,12 +640,12 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) } switch (nfp->pdev->device) { - case PCI_DEVICE_ID_NETRONOME_NFP3800: + case PCI_DEVICE_ID_NFP3800: expl_groups = 1; break; - case PCI_DEVICE_ID_NETRONOME_NFP4000: - case PCI_DEVICE_ID_NETRONOME_NFP5000: - case PCI_DEVICE_ID_NETRONOME_NFP6000: + case PCI_DEVICE_ID_NFP4000: + case PCI_DEVICE_ID_NFP5000: + case PCI_DEVICE_ID_NFP6000: expl_groups = 4; break; default: @@ -1306,18 +1302,20 @@ static const struct nfp_cpp_operations nfp6000_pcie_ops = { /** * nfp_cpp_from_nfp6000_pcie() - Build a NFP CPP bus from a NFP6000 PCI device * @pdev: NFP6000 PCI device + * @dev_info: NFP ASIC params * * Return: NFP CPP handle */ -struct nfp_cpp *nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev) +struct nfp_cpp * +nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev, const struct nfp_dev_info *dev_info) { struct nfp6000_pcie *nfp; u16 interface; int err; /* Finished with card initialization. */ - dev_info(&pdev->dev, - "Netronome Flow Processor NFP4000/NFP5000/NFP6000 PCIe Card Probe\n"); + dev_info(&pdev->dev, "Network Flow Processor %s PCIe Card Probe\n", + dev_info->chip_names); pcie_print_link_status(pdev); nfp = kzalloc(sizeof(*nfp), GFP_KERNEL); @@ -1328,6 +1326,7 @@ struct nfp_cpp *nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev) nfp->dev = &pdev->dev; nfp->pdev = pdev; + nfp->dev_info = dev_info; init_waitqueue_head(&nfp->bar_waiters); spin_lock_init(&nfp->bar_lock); diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h index 6d1bffa6eac6..097660b673db 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.h @@ -11,6 +11,7 @@ #include "nfp_cpp.h" -struct nfp_cpp *nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev); +struct nfp_cpp * +nfp_cpp_from_nfp6000_pcie(struct pci_dev *pdev, const struct nfp_dev_info *dev_info); #endif /* NFP6000_PCIE_H */ diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h index 2dd0f5842873..3d379e937184 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpp.h @@ -32,10 +32,6 @@ #define PCI_64BIT_BAR_COUNT 3 -/* NFP hardware vendor/device ids. - */ -#define PCI_DEVICE_ID_NETRONOME_NFP3800 0x3800 - #define NFP_CPP_NUM_TARGETS 16 /* Max size of area it should be safe to request */ #define NFP_CPP_SAFE_AREA_SIZE SZ_2M diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index 34c0d2ddf9ef..a8286d0032d1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -874,7 +874,6 @@ area_cache_get(struct nfp_cpp *cpp, u32 id, } /* Adjust the start address to be cache size aligned */ - cache->id = id; cache->addr = addr & ~(u64)(cache->size - 1); /* Re-init to the new ID and address */ @@ -894,6 +893,8 @@ area_cache_get(struct nfp_cpp *cpp, u32 id, return NULL; } + cache->id = id; + exit: /* Adjust offset */ *offset = addr - cache->addr; diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c index 85734c6badf5..508ae6b571ca 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cpplib.c @@ -22,6 +22,7 @@ #include "nfp6000/nfp_xpb.h" /* NFP6000 PL */ +#define NFP_PL_DEVICE_PART_NFP6000 0x6200 #define NFP_PL_DEVICE_ID 0x00000004 #define NFP_PL_DEVICE_ID_MASK GENMASK(7, 0) #define NFP_PL_DEVICE_PART_MASK GENMASK(31, 16) @@ -130,8 +131,12 @@ int nfp_cpp_model_autodetect(struct nfp_cpp *cpp, u32 *model) return err; *model = reg & NFP_PL_DEVICE_MODEL_MASK; - if (*model & NFP_PL_DEVICE_ID_MASK) - *model -= 0x10; + /* Disambiguate the NFP4000/NFP5000/NFP6000 chips */ + if (FIELD_GET(NFP_PL_DEVICE_PART_MASK, reg) == + NFP_PL_DEVICE_PART_NFP6000) { + if (*model & NFP_PL_DEVICE_ID_MASK) + *model -= 0x10; + } return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c new file mode 100644 index 000000000000..0725b51c2a95 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/dma-mapping.h> +#include <linux/kernel.h> +#include <linux/sizes.h> + +#include "nfp_dev.h" + +const struct nfp_dev_info nfp_dev_info[NFP_DEV_CNT] = { + [NFP_DEV_NFP3800] = { + .dma_mask = DMA_BIT_MASK(48), + .qc_idx_mask = GENMASK(8, 0), + .qc_addr_offset = 0x400000, + .min_qc_size = 512, + .max_qc_size = SZ_64K, + + .chip_names = "NFP3800", + .pcie_cfg_expbar_offset = 0x0a00, + .pcie_expl_offset = 0xd000, + .qc_area_sz = 0x100000, + }, + [NFP_DEV_NFP3800_VF] = { + .dma_mask = DMA_BIT_MASK(48), + .qc_idx_mask = GENMASK(8, 0), + .qc_addr_offset = 0, + .min_qc_size = 512, + .max_qc_size = SZ_64K, + }, + [NFP_DEV_NFP6000] = { + .dma_mask = DMA_BIT_MASK(40), + .qc_idx_mask = GENMASK(7, 0), + .qc_addr_offset = 0x80000, + .min_qc_size = 256, + .max_qc_size = SZ_256K, + + .chip_names = "NFP4000/NFP5000/NFP6000", + .pcie_cfg_expbar_offset = 0x0400, + .pcie_expl_offset = 0x1000, + .qc_area_sz = 0x80000, + }, + [NFP_DEV_NFP6000_VF] = { + .dma_mask = DMA_BIT_MASK(40), + .qc_idx_mask = GENMASK(7, 0), + .qc_addr_offset = 0, + .min_qc_size = 256, + .max_qc_size = SZ_256K, + }, +}; diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.h new file mode 100644 index 000000000000..e4d38178de0f --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_dev.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef _NFP_DEV_H_ +#define _NFP_DEV_H_ + +#include <linux/types.h> + +#define PCI_VENDOR_ID_CORIGINE 0x1da8 +#define PCI_DEVICE_ID_NFP3800 0x3800 +#define PCI_DEVICE_ID_NFP4000 0x4000 +#define PCI_DEVICE_ID_NFP5000 0x5000 +#define PCI_DEVICE_ID_NFP6000 0x6000 +#define PCI_DEVICE_ID_NFP3800_VF 0x3803 +#define PCI_DEVICE_ID_NFP6000_VF 0x6003 + +enum nfp_dev_id { + NFP_DEV_NFP3800, + NFP_DEV_NFP3800_VF, + NFP_DEV_NFP6000, + NFP_DEV_NFP6000_VF, + NFP_DEV_CNT, +}; + +struct nfp_dev_info { + /* Required fields */ + u64 dma_mask; + u32 qc_idx_mask; + u32 qc_addr_offset; + u32 min_qc_size; + u32 max_qc_size; + + /* PF-only fields */ + const char *chip_names; + u32 pcie_cfg_expbar_offset; + u32 pcie_expl_offset; + u32 qc_area_sz; +}; + +extern const struct nfp_dev_info nfp_dev_info[NFP_DEV_CNT]; + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 10e7d8b21c46..730fea214b8a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -513,7 +513,7 @@ nfp_nsp_command_buf_dma_sg(struct nfp_nsp *nsp, dma_size = BIT_ULL(dma_order); nseg = DIV_ROUND_UP(max_size, chunk_size); - chunks = kzalloc(array_size(sizeof(*chunks), nseg), GFP_KERNEL); + chunks = kcalloc(nseg, sizeof(*chunks), GFP_KERNEL); if (!chunks) return -ENOMEM; diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h index f5360bae6f75..992d72ac98d3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.h @@ -132,6 +132,7 @@ enum nfp_eth_fec { * @ports.interface: interface (module) plugged in * @ports.media: media type of the @interface * @ports.fec: forward error correction mode + * @ports.act_fec: active forward error correction mode * @ports.aneg: auto negotiation mode * @ports.mac_addr: interface MAC address * @ports.label_port: port id @@ -162,6 +163,7 @@ struct nfp_eth_table { enum nfp_eth_media media; enum nfp_eth_fec fec; + enum nfp_eth_fec act_fec; enum nfp_eth_aneg aneg; u8 mac_addr[ETH_ALEN]; @@ -172,6 +174,7 @@ struct nfp_eth_table { bool enabled; bool tx_enabled; bool rx_enabled; + bool supp_aneg; bool override_changed; @@ -196,6 +199,8 @@ int nfp_eth_set_configured(struct nfp_cpp *cpp, unsigned int idx, int nfp_eth_set_fec(struct nfp_cpp *cpp, unsigned int idx, enum nfp_eth_fec mode); +int nfp_eth_set_idmode(struct nfp_cpp *cpp, unsigned int idx, bool state); + static inline bool nfp_eth_can_support_fec(struct nfp_eth_table_port *eth_port) { return !!eth_port->fec_modes_supported; diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c index 311a5be25acb..bb64efec4c46 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp_eth.c @@ -27,6 +27,7 @@ #define NSP_ETH_PORT_PHYLABEL GENMASK_ULL(59, 54) #define NSP_ETH_PORT_FEC_SUPP_BASER BIT_ULL(60) #define NSP_ETH_PORT_FEC_SUPP_RS BIT_ULL(61) +#define NSP_ETH_PORT_SUPP_ANEG BIT_ULL(63) #define NSP_ETH_PORT_LANES_MASK cpu_to_le64(NSP_ETH_PORT_LANES) @@ -40,6 +41,7 @@ #define NSP_ETH_STATE_OVRD_CHNG BIT_ULL(22) #define NSP_ETH_STATE_ANEG GENMASK_ULL(25, 23) #define NSP_ETH_STATE_FEC GENMASK_ULL(27, 26) +#define NSP_ETH_STATE_ACT_FEC GENMASK_ULL(29, 28) #define NSP_ETH_CTRL_CONFIGURED BIT_ULL(0) #define NSP_ETH_CTRL_ENABLED BIT_ULL(1) @@ -49,6 +51,7 @@ #define NSP_ETH_CTRL_SET_LANES BIT_ULL(5) #define NSP_ETH_CTRL_SET_ANEG BIT_ULL(6) #define NSP_ETH_CTRL_SET_FEC BIT_ULL(7) +#define NSP_ETH_CTRL_SET_IDMODE BIT_ULL(8) enum nfp_eth_raw { NSP_ETH_RAW_PORT = 0, @@ -169,7 +172,14 @@ nfp_eth_port_translate(struct nfp_nsp *nsp, const union eth_table_entry *src, if (dst->fec_modes_supported) dst->fec_modes_supported |= NFP_FEC_AUTO | NFP_FEC_DISABLED; - dst->fec = 1 << FIELD_GET(NSP_ETH_STATE_FEC, state); + dst->fec = FIELD_GET(NSP_ETH_STATE_FEC, state); + dst->act_fec = dst->fec; + + if (nfp_nsp_get_abi_ver_minor(nsp) < 33) + return; + + dst->act_fec = FIELD_GET(NSP_ETH_STATE_ACT_FEC, state); + dst->supp_aneg = FIELD_GET(NSP_ETH_PORT_SUPP_ANEG, port); } static void @@ -492,6 +502,36 @@ nfp_eth_set_bit_config(struct nfp_nsp *nsp, unsigned int raw_idx, return 0; } +int nfp_eth_set_idmode(struct nfp_cpp *cpp, unsigned int idx, bool state) +{ + union eth_table_entry *entries; + struct nfp_nsp *nsp; + u64 reg; + + nsp = nfp_eth_config_start(cpp, idx); + if (IS_ERR(nsp)) + return PTR_ERR(nsp); + + /* Set this features were added in ABI 0.32 */ + if (nfp_nsp_get_abi_ver_minor(nsp) < 32) { + nfp_err(nfp_nsp_cpp(nsp), + "set id mode operation not supported, please update flash\n"); + nfp_eth_config_cleanup_end(nsp); + return -EOPNOTSUPP; + } + + entries = nfp_nsp_config_entries(nsp); + + reg = le64_to_cpu(entries[idx].control); + reg &= ~NSP_ETH_CTRL_SET_IDMODE; + reg |= FIELD_PREP(NSP_ETH_CTRL_SET_IDMODE, state); + entries[idx].control = cpu_to_le64(reg); + + nfp_nsp_config_set_modified(nsp, true); + + return nfp_eth_config_commit_end(nsp); +} + #define NFP_ETH_SET_BIT_CONFIG(nsp, raw_idx, mask, val, ctrl_bit) \ ({ \ __BF_FIELD_CHECK(mask, 0ULL, val, "NFP_ETH_SET_BIT_CONFIG: "); \ |