aboutsummaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/actions.c2
-rw-r--r--net/openvswitch/conntrack.c21
-rw-r--r--net/openvswitch/datapath.c133
-rw-r--r--net/openvswitch/datapath.h12
-rw-r--r--net/openvswitch/flow.c20
-rw-r--r--net/openvswitch/flow.h10
-rw-r--r--net/openvswitch/flow_netlink.c87
-rw-r--r--net/openvswitch/flow_table.c381
-rw-r--r--net/openvswitch/flow_table.h19
-rw-r--r--net/openvswitch/vport-internal_dev.c11
-rw-r--r--net/openvswitch/vport.c5
11 files changed, 522 insertions, 179 deletions
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 1c77f520f474..12936c151cc0 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -200,7 +200,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
if (err)
return err;
- flow_key->mpls.top_lse = lse;
+ flow_key->mpls.lse[0] = lse;
return 0;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 05249eb45082..df9c80bf621d 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -971,6 +971,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
ct = nf_ct_get(skb, &ctinfo);
if (ct) {
+ bool add_helper = false;
+
/* Packets starting a new connection must be NATted before the
* helper, so that the helper knows about the NAT. We enforce
* this by delaying both NAT and helper calls for unconfirmed
@@ -988,16 +990,17 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
}
/* Userspace may decide to perform a ct lookup without a helper
- * specified followed by a (recirculate and) commit with one.
- * Therefore, for unconfirmed connections which we will commit,
- * we need to attach the helper here.
+ * specified followed by a (recirculate and) commit with one,
+ * or attach a helper in a later commit. Therefore, for
+ * connections which we will commit, we may need to attach
+ * the helper here.
*/
- if (!nf_ct_is_confirmed(ct) && info->commit &&
- info->helper && !nfct_help(ct)) {
+ if (info->commit && info->helper && !nfct_help(ct)) {
int err = __nf_ct_try_assign_helper(ct, info->ct,
GFP_ATOMIC);
if (err)
return err;
+ add_helper = true;
/* helper installed, add seqadj if NAT is required */
if (info->nat && !nfct_seqadj(ct)) {
@@ -1007,11 +1010,13 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
}
/* Call the helper only if:
- * - nf_conntrack_in() was executed above ("!cached") for a
- * confirmed connection, or
+ * - nf_conntrack_in() was executed above ("!cached") or a
+ * helper was just attached ("add_helper") for a confirmed
+ * connection, or
* - When committing an unconfirmed connection.
*/
- if ((nf_ct_is_confirmed(ct) ? !cached : info->commit) &&
+ if ((nf_ct_is_confirmed(ct) ? !cached || add_helper :
+ info->commit) &&
ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
return -EINVAL;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f30e406fbec5..93d4991ddc1f 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
stats = this_cpu_ptr(dp->stats_percpu);
/* Look up flow. */
- flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
+ flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
+ &n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
@@ -349,7 +350,8 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
- + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
/* OVS_PACKET_ATTR_USERDATA */
if (upcall_info->userdata)
@@ -392,6 +394,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
size_t len;
unsigned int hlen;
int err, dp_ifindex;
+ u64 hash;
dp_ifindex = get_dpifindex(dp);
if (!dp_ifindex)
@@ -484,23 +487,30 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
}
/* Add OVS_PACKET_ATTR_MRU */
- if (upcall_info->mru) {
- if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
- upcall_info->mru)) {
- err = -ENOBUFS;
- goto out;
- }
- pad_packet(dp, user_skb);
+ if (upcall_info->mru &&
+ nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
+ err = -ENOBUFS;
+ goto out;
}
/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
- if (cutlen > 0) {
- if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
- skb->len)) {
- err = -ENOBUFS;
- goto out;
- }
- pad_packet(dp, user_skb);
+ if (cutlen > 0 &&
+ nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
+ err = -ENOBUFS;
+ goto out;
+ }
+
+ /* Add OVS_PACKET_ATTR_HASH */
+ hash = skb_get_hash_raw(skb);
+ if (skb->sw_hash)
+ hash |= OVS_PACKET_HASH_SW_BIT;
+
+ if (skb->l4_hash)
+ hash |= OVS_PACKET_HASH_L4_BIT;
+
+ if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
+ err = -ENOBUFS;
+ goto out;
}
/* Only reserve room for attribute header, packet data is added
@@ -542,6 +552,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct vport *input_vport;
u16 mru = 0;
+ u64 hash;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
@@ -567,6 +578,14 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
}
OVS_CB(packet)->mru = mru;
+ if (a[OVS_PACKET_ATTR_HASH]) {
+ hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
+
+ __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
+ !!(hash & OVS_PACKET_HASH_SW_BIT),
+ !!(hash & OVS_PACKET_HASH_L4_BIT));
+ }
+
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
@@ -1575,6 +1594,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
return 0;
}
+static int ovs_dp_stats_init(struct datapath *dp)
+{
+ dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+ if (!dp->stats_percpu)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ovs_dp_vport_init(struct datapath *dp)
+{
+ int i;
+
+ dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!dp->ports)
+ return -ENOMEM;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->ports[i]);
+
+ return 0;
+}
+
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
@@ -1583,7 +1627,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct vport *vport;
struct ovs_net *ovs_net;
- int err, i;
+ int err;
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
@@ -1596,35 +1640,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_free_reply;
+ goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table);
if (err)
- goto err_free_dp;
+ goto err_destroy_dp;
- dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
- if (!dp->stats_percpu) {
- err = -ENOMEM;
+ err = ovs_dp_stats_init(dp);
+ if (err)
goto err_destroy_table;
- }
- dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!dp->ports) {
- err = -ENOMEM;
- goto err_destroy_percpu;
- }
-
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&dp->ports[i]);
+ err = ovs_dp_vport_init(dp);
+ if (err)
+ goto err_destroy_stats;
err = ovs_meters_init(dp);
if (err)
- goto err_destroy_ports_array;
+ goto err_destroy_ports;
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@ -1656,6 +1691,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_reset_user_features(skb, info);
}
+ ovs_unlock();
goto err_destroy_meters;
}
@@ -1672,17 +1708,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
return 0;
err_destroy_meters:
- ovs_unlock();
ovs_meters_exit(dp);
-err_destroy_ports_array:
+err_destroy_ports:
kfree(dp->ports);
-err_destroy_percpu:
+err_destroy_stats:
free_percpu(dp->stats_percpu);
err_destroy_table:
ovs_flow_tbl_destroy(&dp->table);
-err_free_dp:
+err_destroy_dp:
kfree(dp);
-err_free_reply:
+err_destroy_reply:
kfree_skb(reply);
err:
return err;
@@ -1881,7 +1916,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
/* Called with ovs_mutex or RCU read lock. */
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
struct net *net, u32 portid, u32 seq,
- u32 flags, u8 cmd)
+ u32 flags, u8 cmd, gfp_t gfp)
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
@@ -1902,7 +1937,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
goto nla_put_failure;
if (!net_eq(net, dev_net(vport->dev))) {
- int id = peernet2id_alloc(net, dev_net(vport->dev));
+ int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
goto nla_put_failure;
@@ -1943,11 +1978,12 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
struct sk_buff *skb;
int retval;
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
+ retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
+ GFP_KERNEL);
BUG_ON(retval < 0);
return skb;
@@ -2089,7 +2125,7 @@ restart:
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_NEW);
+ OVS_VPORT_CMD_NEW, GFP_KERNEL);
new_headroom = netdev_get_fwd_headroom(vport->dev);
@@ -2150,7 +2186,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_SET);
+ OVS_VPORT_CMD_SET, GFP_KERNEL);
BUG_ON(err < 0);
ovs_unlock();
@@ -2190,7 +2226,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_DEL);
+ OVS_VPORT_CMD_DEL, GFP_KERNEL);
BUG_ON(err < 0);
/* the vport deletion may trigger dp headroom update */
@@ -2237,7 +2273,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_GET);
+ OVS_VPORT_CMD_GET, GFP_ATOMIC);
BUG_ON(err < 0);
rcu_read_unlock();
@@ -2273,7 +2309,8 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
- OVS_VPORT_CMD_GET) < 0)
+ OVS_VPORT_CMD_GET,
+ GFP_ATOMIC) < 0)
goto out;
j++;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 81e85dde8217..e239a46c2f94 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -139,6 +139,18 @@ struct ovs_net {
bool xt_label;
};
+/**
+ * enum ovs_pkt_hash_types - hash info to include with a packet
+ * to send to userspace.
+ * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
+ * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
+ * over transport ports.
+ */
+enum ovs_pkt_hash_types {
+ OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
+ OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
+};
+
extern unsigned int ovs_net_id;
void ovs_lock(void);
void ovs_unlock(void);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 38147e6a20f5..9d375e74b607 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -637,27 +637,35 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
} else if (eth_p_mpls(key->eth.type)) {
- size_t stack_len = MPLS_HLEN;
+ u8 label_count = 1;
+ memset(&key->mpls, 0, sizeof(key->mpls));
skb_set_inner_network_header(skb, skb->mac_len);
while (1) {
__be32 lse;
- error = check_header(skb, skb->mac_len + stack_len);
+ error = check_header(skb, skb->mac_len +
+ label_count * MPLS_HLEN);
if (unlikely(error))
return 0;
memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
- if (stack_len == MPLS_HLEN)
- memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
+ if (label_count <= MPLS_LABEL_DEPTH)
+ memcpy(&key->mpls.lse[label_count - 1], &lse,
+ MPLS_HLEN);
- skb_set_inner_network_header(skb, skb->mac_len + stack_len);
+ skb_set_inner_network_header(skb, skb->mac_len +
+ label_count * MPLS_HLEN);
if (lse & htonl(MPLS_LS_S_MASK))
break;
- stack_len += MPLS_HLEN;
+ label_count++;
}
+ if (label_count > MPLS_LABEL_DEPTH)
+ label_count = MPLS_LABEL_DEPTH;
+
+ key->mpls.num_labels_mask = GENMASK(label_count - 1, 0);
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index b830d5ff7af4..fd8ed766bdd1 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -30,6 +30,7 @@ enum sw_flow_mac_proto {
MAC_PROTO_ETHERNET,
};
#define SW_FLOW_KEY_INVALID 0x80
+#define MPLS_LABEL_DEPTH 3
/* Store options at the end of the array if they are less than the
* maximum size. This allows us to get the benefits of variable length
@@ -85,9 +86,6 @@ struct sw_flow_key {
*/
union {
struct {
- __be32 top_lse; /* top label stack entry */
- } mpls;
- struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
@@ -135,6 +133,11 @@ struct sw_flow_key {
} nd;
};
} ipv6;
+ struct {
+ u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */
+ __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */
+ } mpls;
+
struct ovs_key_nsh nsh; /* network service header */
};
struct {
@@ -166,7 +169,6 @@ struct sw_flow_key_range {
struct sw_flow_mask {
int ref_count;
struct rcu_head rcu;
- struct list_head list;
struct sw_flow_key_range range;
struct sw_flow_key key;
};
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index d7559c64795d..65c2e3458ff5 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -424,7 +424,7 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
.next = ovs_tunnel_key_lens, },
- [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
+ [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE },
[OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) },
[OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
[OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
@@ -1628,10 +1628,25 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
const struct ovs_key_mpls *mpls_key;
+ u32 hdr_len;
+ u32 label_count, label_count_mask, i;
mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
- SW_FLOW_KEY_PUT(match, mpls.top_lse,
- mpls_key->mpls_lse, is_mask);
+ hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]);
+ label_count = hdr_len / sizeof(struct ovs_key_mpls);
+
+ if (label_count == 0 || label_count > MPLS_LABEL_DEPTH ||
+ hdr_len % sizeof(struct ovs_key_mpls))
+ return -EINVAL;
+
+ label_count_mask = GENMASK(label_count - 1, 0);
+
+ for (i = 0 ; i < label_count; i++)
+ SW_FLOW_KEY_PUT(match, mpls.lse[i],
+ mpls_key[i].mpls_lse, is_mask);
+
+ SW_FLOW_KEY_PUT(match, mpls.num_labels_mask,
+ label_count_mask, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
}
@@ -2114,13 +2129,18 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
} else if (eth_p_mpls(swkey->eth.type)) {
+ u8 i, num_labels;
struct ovs_key_mpls *mpls_key;
- nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+ num_labels = hweight_long(output->mpls.num_labels_mask);
+ nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS,
+ num_labels * sizeof(*mpls_key));
if (!nla)
goto nla_put_failure;
+
mpls_key = nla_data(nla);
- mpls_key->mpls_lse = output->mpls.top_lse;
+ for (i = 0; i < num_labels; i++)
+ mpls_key[i].mpls_lse = output->mpls.lse[i];
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -2406,13 +2426,14 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci, bool log);
+ __be16 eth_type, __be16 vlan_tci,
+ u32 mpls_label_count, bool log);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- bool log, bool last)
+ u32 mpls_label_count, bool log, bool last)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -2463,7 +2484,7 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
return err;
err = __ovs_nla_copy_actions(net, actions, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2478,7 +2499,7 @@ static int validate_and_copy_clone(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- bool log, bool last)
+ u32 mpls_label_count, bool log, bool last)
{
int start, err;
u32 exec;
@@ -2498,7 +2519,7 @@ static int validate_and_copy_clone(struct net *net,
return err;
err = __ovs_nla_copy_actions(net, attr, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2864,6 +2885,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
+ u32 mpls_label_count,
bool log, bool last)
{
const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
@@ -2912,7 +2934,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2925,7 +2947,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
- eth_type, vlan_tci, log);
+ eth_type, vlan_tci, mpls_label_count, log);
if (err)
return err;
@@ -2952,7 +2974,8 @@ static int copy_action(const struct nlattr *from,
static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
- __be16 eth_type, __be16 vlan_tci, bool log)
+ __be16 eth_type, __be16 vlan_tci,
+ u32 mpls_label_count, bool log)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
@@ -3065,25 +3088,36 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
!eth_p_mpls(eth_type)))
return -EINVAL;
eth_type = mpls->mpls_ethertype;
+ mpls_label_count++;
break;
}
- case OVS_ACTION_ATTR_POP_MPLS:
+ case OVS_ACTION_ATTR_POP_MPLS: {
+ __be16 proto;
if (vlan_tci & htons(VLAN_CFI_MASK) ||
!eth_p_mpls(eth_type))
return -EINVAL;
- /* Disallow subsequent L2.5+ set and mpls_pop actions
- * as there is no check here to ensure that the new
- * eth_type is valid and thus set actions could
- * write off the end of the packet or otherwise
- * corrupt it.
+ /* Disallow subsequent L2.5+ set actions and mpls_pop
+ * actions once the last MPLS label in the packet is
+ * is popped as there is no check here to ensure that
+ * the new eth type is valid and thus set actions could
+ * write off the end of the packet or otherwise corrupt
+ * it.
*
* Support for these actions is planned using packet
* recirculation.
*/
- eth_type = htons(0);
+ proto = nla_get_be16(a);
+ mpls_label_count--;
+
+ if (!eth_p_mpls(proto) || !mpls_label_count)
+ eth_type = htons(0);
+ else
+ eth_type = proto;
+
break;
+ }
case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa,
@@ -3106,6 +3140,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_sample(net, a, key, sfa,
eth_type, vlan_tci,
+ mpls_label_count,
log, last);
if (err)
return err;
@@ -3176,6 +3211,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_clone(net, a, key, sfa,
eth_type, vlan_tci,
+ mpls_label_count,
log, last);
if (err)
return err;
@@ -3188,8 +3224,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_check_pkt_len(net, a, key, sfa,
eth_type,
- vlan_tci, log,
- last);
+ vlan_tci,
+ mpls_label_count,
+ log, last);
if (err)
return err;
skip_copy = true;
@@ -3219,14 +3256,18 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
struct sw_flow_actions **sfa, bool log)
{
int err;
+ u32 mpls_label_count = 0;
*sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
+ if (eth_p_mpls(key->eth.type))
+ mpls_label_count = hweight_long(key->mpls.num_labels_mask);
+
(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
- key->eth.vlan.tci, log);
+ key->eth.vlan.tci, mpls_label_count, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index cf3582c5ed70..5904e93e5765 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -34,8 +34,13 @@
#include <net/ndisc.h>
#define TBL_MIN_BUCKETS 1024
+#define MASK_ARRAY_SIZE_MIN 16
#define REHASH_INTERVAL (10 * 60 * HZ)
+#define MC_HASH_SHIFT 8
+#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT)
+#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
+
static struct kmem_cache *flow_cache;
struct kmem_cache *flow_stats_cache __read_mostly;
@@ -164,14 +169,133 @@ static struct table_instance *table_instance_alloc(int new_size)
return ti;
}
+static struct mask_array *tbl_mask_array_alloc(int size)
+{
+ struct mask_array *new;
+
+ size = max(MASK_ARRAY_SIZE_MIN, size);
+ new = kzalloc(sizeof(struct mask_array) +
+ sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ new->count = 0;
+ new->max = size;
+
+ return new;
+}
+
+static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
+{
+ struct mask_array *old;
+ struct mask_array *new;
+
+ new = tbl_mask_array_alloc(size);
+ if (!new)
+ return -ENOMEM;
+
+ old = ovsl_dereference(tbl->mask_array);
+ if (old) {
+ int i;
+
+ for (i = 0; i < old->max; i++) {
+ if (ovsl_dereference(old->masks[i]))
+ new->masks[new->count++] = old->masks[i];
+ }
+ }
+
+ rcu_assign_pointer(tbl->mask_array, new);
+ kfree_rcu(old, rcu);
+
+ return 0;
+}
+
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+ struct sw_flow_mask *new)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int err, ma_count = READ_ONCE(ma->count);
+
+ if (ma_count >= ma->max) {
+ err = tbl_mask_array_realloc(tbl, ma->max +
+ MASK_ARRAY_SIZE_MIN);
+ if (err)
+ return err;
+
+ ma = ovsl_dereference(tbl->mask_array);
+ }
+
+ BUG_ON(ovsl_dereference(ma->masks[ma_count]));
+
+ rcu_assign_pointer(ma->masks[ma_count], new);
+ WRITE_ONCE(ma->count, ma_count +1);
+
+ return 0;
+}
+
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+ struct sw_flow_mask *mask)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int i, ma_count = READ_ONCE(ma->count);
+
+ /* Remove the deleted mask pointers from the array */
+ for (i = 0; i < ma_count; i++) {
+ if (mask == ovsl_dereference(ma->masks[i]))
+ goto found;
+ }
+
+ BUG();
+ return;
+
+found:
+ WRITE_ONCE(ma->count, ma_count -1);
+
+ rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
+ RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
+
+ kfree_rcu(mask, rcu);
+
+ /* Shrink the mask array if necessary. */
+ if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+ ma_count <= (ma->max / 3))
+ tbl_mask_array_realloc(tbl, ma->max / 2);
+}
+
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+ if (mask) {
+ /* ovs-lock is required to protect mask-refcount and
+ * mask list.
+ */
+ ASSERT_OVSL();
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count)
+ tbl_mask_array_del_mask(tbl, mask);
+ }
+}
+
int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti, *ufid_ti;
+ struct mask_array *ma;
- ti = table_instance_alloc(TBL_MIN_BUCKETS);
+ table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
+ MC_HASH_ENTRIES,
+ __alignof__(struct mask_cache_entry));
+ if (!table->mask_cache)
+ return -ENOMEM;
+ ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
+ if (!ma)
+ goto free_mask_cache;
+
+ ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
- return -ENOMEM;
+ goto free_mask_array;
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ufid_ti)
@@ -179,7 +303,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
rcu_assign_pointer(table->ti, ti);
rcu_assign_pointer(table->ufid_ti, ufid_ti);
- INIT_LIST_HEAD(&table->mask_list);
+ rcu_assign_pointer(table->mask_array, ma);
table->last_rehash = jiffies;
table->count = 0;
table->ufid_count = 0;
@@ -187,6 +311,10 @@ int ovs_flow_tbl_init(struct flow_table *table)
free_ti:
__table_instance_destroy(ti);
+free_mask_array:
+ kfree(ma);
+free_mask_cache:
+ free_percpu(table->mask_cache);
return -ENOMEM;
}
@@ -197,7 +325,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
__table_instance_destroy(ti);
}
-static void table_instance_destroy(struct table_instance *ti,
+static void table_instance_flow_free(struct flow_table *table,
+ struct table_instance *ti,
+ struct table_instance *ufid_ti,
+ struct sw_flow *flow,
+ bool count)
+{
+ hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
+ if (count)
+ table->count--;
+
+ if (ovs_identifier_is_ufid(&flow->id)) {
+ hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+
+ if (count)
+ table->ufid_count--;
+ }
+
+ flow_mask_remove(table, flow->mask);
+}
+
+static void table_instance_destroy(struct flow_table *table,
+ struct table_instance *ti,
struct table_instance *ufid_ti,
bool deferred)
{
@@ -214,13 +363,12 @@ static void table_instance_destroy(struct table_instance *ti,
struct sw_flow *flow;
struct hlist_head *head = &ti->buckets[i];
struct hlist_node *n;
- int ver = ti->node_ver;
- int ufid_ver = ufid_ti->node_ver;
- hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
- hlist_del_rcu(&flow->flow_table.node[ver]);
- if (ovs_identifier_is_ufid(&flow->id))
- hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
+ hlist_for_each_entry_safe(flow, n, head,
+ flow_table.node[ti->node_ver]) {
+
+ table_instance_flow_free(table, ti, ufid_ti,
+ flow, false);
ovs_flow_free(flow, deferred);
}
}
@@ -243,7 +391,9 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
struct table_instance *ti = rcu_dereference_raw(table->ti);
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
- table_instance_destroy(ti, ufid_ti, false);
+ free_percpu(table->mask_cache);
+ kfree_rcu(rcu_dereference_raw(table->mask_array), rcu);
+ table_instance_destroy(table, ti, ufid_ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -359,7 +509,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
flow_table->count = 0;
flow_table->ufid_count = 0;
- table_instance_destroy(old_ti, old_ufid_ti, true);
+ table_instance_destroy(flow_table, old_ti, old_ufid_ti, true);
return 0;
err_free_ti:
@@ -370,13 +520,10 @@ err_free_ti:
static u32 flow_hash(const struct sw_flow_key *key,
const struct sw_flow_key_range *range)
{
- int key_start = range->start;
- int key_end = range->end;
- const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
- int hash_u32s = (key_end - key_start) >> 2;
+ const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
/* Make sure number of hash bytes are multiple of u32. */
- BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+ int hash_u32s = range_n_bytes(range) >> 2;
return jhash2(hash_key, hash_u32s, 0);
}
@@ -425,7 +572,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
const struct sw_flow_key *unmasked,
- const struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask,
+ u32 *n_mask_hit)
{
struct sw_flow *flow;
struct hlist_head *head;
@@ -435,6 +583,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
ovs_flow_mask_key(&masked_key, unmasked, false, mask);
hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash);
+ (*n_mask_hit)++;
+
hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
@@ -443,46 +593,147 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
return NULL;
}
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
- const struct sw_flow_key *key,
- u32 *n_mask_hit)
+/* Flow lookup does full lookup on flow table. It starts with
+ * mask from index passed in *index.
+ */
+static struct sw_flow *flow_lookup(struct flow_table *tbl,
+ struct table_instance *ti,
+ struct mask_array *ma,
+ const struct sw_flow_key *key,
+ u32 *n_mask_hit,
+ u32 *index)
{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct sw_flow *flow;
struct sw_flow_mask *mask;
+ int i;
+
+ if (likely(*index < ma->max)) {
+ mask = rcu_dereference_ovsl(ma->masks[*index]);
+ if (mask) {
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+ if (flow)
+ return flow;
+ }
+ }
+
+ for (i = 0; i < ma->max; i++) {
+
+ if (i == *index)
+ continue;
+
+ mask = rcu_dereference_ovsl(ma->masks[i]);
+ if (unlikely(!mask))
+ break;
+
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+ if (flow) { /* Found */
+ *index = i;
+ return flow;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * mask_cache maps flow to probable mask. This cache is not tightly
+ * coupled cache, It means updates to mask list can result in inconsistent
+ * cache entry in mask cache.
+ * This is per cpu cache and is divided in MC_HASH_SEGS segments.
+ * In case of a hash collision the entry is hashed in next segment.
+ * */
+struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
+ const struct sw_flow_key *key,
+ u32 skb_hash,
+ u32 *n_mask_hit)
+{
+ struct mask_array *ma = rcu_dereference(tbl->mask_array);
+ struct table_instance *ti = rcu_dereference(tbl->ti);
+ struct mask_cache_entry *entries, *ce;
struct sw_flow *flow;
+ u32 hash;
+ int seg;
*n_mask_hit = 0;
- list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
- (*n_mask_hit)++;
- flow = masked_flow_lookup(ti, key, mask);
- if (flow) /* Found */
+ if (unlikely(!skb_hash)) {
+ u32 mask_index = 0;
+
+ return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
+ }
+
+ /* Pre and post recirulation flows usually have the same skb_hash
+ * value. To avoid hash collisions, rehash the 'skb_hash' with
+ * 'recirc_id'. */
+ if (key->recirc_id)
+ skb_hash = jhash_1word(skb_hash, key->recirc_id);
+
+ ce = NULL;
+ hash = skb_hash;
+ entries = this_cpu_ptr(tbl->mask_cache);
+
+ /* Find the cache entry 'ce' to operate on. */
+ for (seg = 0; seg < MC_HASH_SEGS; seg++) {
+ int index = hash & (MC_HASH_ENTRIES - 1);
+ struct mask_cache_entry *e;
+
+ e = &entries[index];
+ if (e->skb_hash == skb_hash) {
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
+ &e->mask_index);
+ if (!flow)
+ e->skb_hash = 0;
return flow;
+ }
+
+ if (!ce || e->skb_hash < ce->skb_hash)
+ ce = e; /* A better replacement cache candidate. */
+
+ hash >>= MC_HASH_SHIFT;
}
- return NULL;
+
+ /* Cache miss, do full lookup. */
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
+ if (flow)
+ ce->skb_hash = skb_hash;
+
+ return flow;
}
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,
const struct sw_flow_key *key)
{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
u32 __always_unused n_mask_hit;
+ u32 index = 0;
- return ovs_flow_tbl_lookup_stats(tbl, key, &n_mask_hit);
+ return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
}
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
const struct sw_flow_match *match)
{
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
- struct sw_flow_mask *mask;
- struct sw_flow *flow;
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int i;
/* Always called under ovs-mutex. */
- list_for_each_entry(mask, &tbl->mask_list, list) {
- flow = masked_flow_lookup(ti, match->key, mask);
+ for (i = 0; i < ma->max; i++) {
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ u32 __always_unused n_mask_hit;
+ struct sw_flow_mask *mask;
+ struct sw_flow *flow;
+
+ mask = ovsl_dereference(ma->masks[i]);
+ if (!mask)
+ continue;
+
+ flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
if (flow && ovs_identifier_is_key(&flow->id) &&
- ovs_flow_cmp_unmasked_key(flow, match))
+ ovs_flow_cmp_unmasked_key(flow, match)) {
return flow;
+ }
}
+
return NULL;
}
@@ -528,13 +779,8 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
int ovs_flow_tbl_num_masks(const struct flow_table *table)
{
- struct sw_flow_mask *mask;
- int num = 0;
-
- list_for_each_entry(mask, &table->mask_list, list)
- num++;
-
- return num;
+ struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
+ return READ_ONCE(ma->count);
}
static struct table_instance *table_instance_expand(struct table_instance *ti,
@@ -543,24 +789,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
}
-/* Remove 'mask' from the mask list, if it is not needed any more. */
-static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
-{
- if (mask) {
- /* ovs-lock is required to protect mask-refcount and
- * mask list.
- */
- ASSERT_OVSL();
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- list_del_rcu(&mask->list);
- kfree_rcu(mask, rcu);
- }
- }
-}
-
/* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
@@ -568,17 +796,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0);
- hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
- table->count--;
- if (ovs_identifier_is_ufid(&flow->id)) {
- hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
- table->ufid_count--;
- }
-
- /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
- * accessible as long as the RCU read lock is held.
- */
- flow_mask_remove(table, flow->mask);
+ table_instance_flow_free(table, ti, ufid_ti, flow, true);
}
static struct sw_flow_mask *mask_alloc(void)
@@ -606,13 +824,16 @@ static bool mask_equal(const struct sw_flow_mask *a,
static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
const struct sw_flow_mask *mask)
{
- struct list_head *ml;
+ struct mask_array *ma;
+ int i;
+
+ ma = ovsl_dereference(tbl->mask_array);
+ for (i = 0; i < ma->max; i++) {
+ struct sw_flow_mask *t;
+ t = ovsl_dereference(ma->masks[i]);
- list_for_each(ml, &tbl->mask_list) {
- struct sw_flow_mask *m;
- m = container_of(ml, struct sw_flow_mask, list);
- if (mask_equal(mask, m))
- return m;
+ if (t && mask_equal(mask, t))
+ return t;
}
return NULL;
@@ -623,6 +844,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
const struct sw_flow_mask *new)
{
struct sw_flow_mask *mask;
+
mask = flow_mask_find(tbl, new);
if (!mask) {
/* Allocate a new mask if none exsits. */
@@ -631,7 +853,12 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
return -ENOMEM;
mask->key = new->key;
mask->range = new->range;
- list_add_rcu(&mask->list, &tbl->mask_list);
+
+ /* Add mask to mask-list. */
+ if (tbl_mask_array_add_mask(tbl, mask)) {
+ kfree(mask);
+ return -ENOMEM;
+ }
} else {
BUG_ON(!mask->ref_count);
mask->ref_count++;
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index bc52045b63ff..8a5cea6ae111 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -22,6 +22,17 @@
#include "flow.h"
+struct mask_cache_entry {
+ u32 skb_hash;
+ u32 mask_index;
+};
+
+struct mask_array {
+ struct rcu_head rcu;
+ int count, max;
+ struct sw_flow_mask __rcu *masks[];
+};
+
struct table_instance {
struct hlist_head *buckets;
unsigned int n_buckets;
@@ -34,7 +45,8 @@ struct table_instance {
struct flow_table {
struct table_instance __rcu *ti;
struct table_instance __rcu *ufid_ti;
- struct list_head mask_list;
+ struct mask_cache_entry __percpu *mask_cache;
+ struct mask_array __rcu *mask_array;
unsigned long last_rehash;
unsigned int count;
unsigned int ufid_count;
@@ -60,8 +72,9 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table);
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table,
u32 *bucket, u32 *idx);
struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *,
- const struct sw_flow_key *,
- u32 *n_mask_hit);
+ const struct sw_flow_key *,
+ u32 skb_hash,
+ u32 *n_mask_hit);
struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
const struct sw_flow_key *);
struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 21c90d3a7ebf..58a7b8312c28 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -137,7 +137,7 @@ static void do_setup(struct net_device *netdev)
netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
IFF_NO_QUEUE;
netdev->needs_free_netdev = true;
- netdev->priv_destructor = internal_dev_destructor;
+ netdev->priv_destructor = NULL;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
@@ -159,7 +159,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
struct internal_dev *internal_dev;
struct net_device *dev;
int err;
- bool free_vport = true;
vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms);
if (IS_ERR(vport)) {
@@ -190,10 +189,9 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
rtnl_lock();
err = register_netdevice(vport->dev);
- if (err) {
- free_vport = false;
+ if (err)
goto error_unlock;
- }
+ vport->dev->priv_destructor = internal_dev_destructor;
dev_set_promiscuity(vport->dev, 1);
rtnl_unlock();
@@ -207,8 +205,7 @@ error_unlock:
error_free_netdev:
free_netdev(dev);
error_free_vport:
- if (free_vport)
- ovs_vport_free(vport);
+ ovs_vport_free(vport);
error:
return ERR_PTR(err);
}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 3fc38d16c456..5da9392b03d6 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -403,8 +403,9 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
ids = rcu_dereference(vport->upcall_portids);
- if (ids->n_ids == 1 && ids->ids[0] == 0)
- return 0;
+ /* If there is only one portid, select it in the fast-path. */
+ if (ids->n_ids == 1)
+ return ids->ids[0];
hash = skb_get_hash(skb);
ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);