aboutsummaryrefslogtreecommitdiffstats
path: root/net/openvswitch
diff options
context:
space:
mode:
Diffstat (limited to 'net/openvswitch')
-rw-r--r--net/openvswitch/Kconfig13
-rw-r--r--net/openvswitch/Makefile2
-rw-r--r--net/openvswitch/actions.c230
-rw-r--r--net/openvswitch/conntrack.c744
-rw-r--r--net/openvswitch/conntrack.h86
-rw-r--r--net/openvswitch/datapath.c86
-rw-r--r--net/openvswitch/datapath.h13
-rw-r--r--net/openvswitch/flow.c6
-rw-r--r--net/openvswitch/flow.h11
-rw-r--r--net/openvswitch/flow_netlink.c147
-rw-r--r--net/openvswitch/flow_netlink.h13
-rw-r--r--net/openvswitch/flow_table.c2
-rw-r--r--net/openvswitch/vport-geneve.c179
-rw-r--r--net/openvswitch/vport-internal_dev.c2
-rw-r--r--net/openvswitch/vport-netdev.c2
-rw-r--r--net/openvswitch/vport.c7
-rw-r--r--net/openvswitch/vport.h6
17 files changed, 1287 insertions, 262 deletions
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 422dc0567de9..af7cdef42066 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -31,6 +31,17 @@ config OPENVSWITCH
If unsure, say N.
+config OPENVSWITCH_CONNTRACK
+ bool "Open vSwitch conntrack action support"
+ depends on OPENVSWITCH
+ depends on NF_CONNTRACK
+ default OPENVSWITCH
+ ---help---
+ If you say Y here, then Open vSwitch module will be able to pass
+ packets through conntrack.
+
+ Say N to exclude this support and reduce the binary size.
+
config OPENVSWITCH_GRE
tristate "Open vSwitch GRE tunneling support"
depends on OPENVSWITCH
@@ -59,7 +70,7 @@ config OPENVSWITCH_VXLAN
config OPENVSWITCH_GENEVE
tristate "Open vSwitch Geneve tunneling support"
depends on OPENVSWITCH
- depends on GENEVE_CORE
+ depends on GENEVE
default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create geneve vport.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 6e1701de04d8..5b5913b06f54 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,6 +15,8 @@ openvswitch-y := \
vport-internal_dev.o \
vport-netdev.o
+openvswitch-$(CONFIG_OPENVSWITCH_CONNTRACK) += conntrack.o
+
obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 4f4200717bef..4487543806bb 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -22,6 +22,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/openvswitch.h>
+#include <linux/netfilter_ipv6.h>
#include <linux/sctp.h>
#include <linux/tcp.h>
#include <linux/udp.h>
@@ -29,8 +30,10 @@
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
+#include <net/dst.h>
#include <net/ip.h>
#include <net/ipv6.h>
+#include <net/ip6_fib.h>
#include <net/checksum.h>
#include <net/dsfield.h>
#include <net/mpls.h>
@@ -38,6 +41,7 @@
#include "datapath.h"
#include "flow.h"
+#include "conntrack.h"
#include "vport.h"
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
@@ -52,6 +56,20 @@ struct deferred_action {
struct sw_flow_key pkt_key;
};
+#define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN)
+struct ovs_frag_data {
+ unsigned long dst;
+ struct vport *vport;
+ struct ovs_skb_cb cb;
+ __be16 inner_protocol;
+ __u16 vlan_tci;
+ __be16 vlan_proto;
+ unsigned int l2_len;
+ u8 l2_data[MAX_L2_LEN];
+};
+
+static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
+
#define DEFERRED_ACTION_FIFO_SIZE 10
struct action_fifo {
int head;
@@ -185,10 +203,6 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
return 0;
}
-/* 'KEY' must not have any bits set outside of the 'MASK' */
-#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
-#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))
-
static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
const __be32 *mpls_lse, const __be32 *mask)
{
@@ -201,7 +215,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
return err;
stack = (__be32 *)skb_mpls_header(skb);
- lse = MASKED(*stack, *mpls_lse, *mask);
+ lse = OVS_MASKED(*stack, *mpls_lse, *mask);
if (skb->ip_summed == CHECKSUM_COMPLETE) {
__be32 diff[] = { ~(*stack), lse };
@@ -244,9 +258,9 @@ static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
const u16 *src = (const u16 *)src_;
const u16 *mask = (const u16 *)mask_;
- SET_MASKED(dst[0], src[0], mask[0]);
- SET_MASKED(dst[1], src[1], mask[1]);
- SET_MASKED(dst[2], src[2], mask[2]);
+ OVS_SET_MASKED(dst[0], src[0], mask[0]);
+ OVS_SET_MASKED(dst[1], src[1], mask[1]);
+ OVS_SET_MASKED(dst[2], src[2], mask[2]);
}
static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
@@ -338,10 +352,10 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
const __be32 mask[4], __be32 masked[4])
{
- masked[0] = MASKED(old[0], addr[0], mask[0]);
- masked[1] = MASKED(old[1], addr[1], mask[1]);
- masked[2] = MASKED(old[2], addr[2], mask[2]);
- masked[3] = MASKED(old[3], addr[3], mask[3]);
+ masked[0] = OVS_MASKED(old[0], addr[0], mask[0]);
+ masked[1] = OVS_MASKED(old[1], addr[1], mask[1]);
+ masked[2] = OVS_MASKED(old[2], addr[2], mask[2]);
+ masked[3] = OVS_MASKED(old[3], addr[3], mask[3]);
}
static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
@@ -358,15 +372,15 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
{
/* Bits 21-24 are always unmasked, so this retains their values. */
- SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
- SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
- SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+ OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
+ OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
+ OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
}
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
u8 mask)
{
- new_ttl = MASKED(nh->ttl, new_ttl, mask);
+ new_ttl = OVS_MASKED(nh->ttl, new_ttl, mask);
csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
nh->ttl = new_ttl;
@@ -392,7 +406,7 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
* makes sense to check if the value actually changed.
*/
if (mask->ipv4_src) {
- new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
+ new_addr = OVS_MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
if (unlikely(new_addr != nh->saddr)) {
set_ip_addr(skb, nh, &nh->saddr, new_addr);
@@ -400,7 +414,7 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
}
}
if (mask->ipv4_dst) {
- new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
+ new_addr = OVS_MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
if (unlikely(new_addr != nh->daddr)) {
set_ip_addr(skb, nh, &nh->daddr, new_addr);
@@ -488,7 +502,8 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
*(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
}
if (mask->ipv6_hlimit) {
- SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
+ OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
+ mask->ipv6_hlimit);
flow_key->ip.ttl = nh->hop_limit;
}
return 0;
@@ -517,8 +532,8 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
uh = udp_hdr(skb);
/* Either of the masks is non-zero, so do not bother checking them. */
- src = MASKED(uh->source, key->udp_src, mask->udp_src);
- dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
+ src = OVS_MASKED(uh->source, key->udp_src, mask->udp_src);
+ dst = OVS_MASKED(uh->dest, key->udp_dst, mask->udp_dst);
if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
if (likely(src != uh->source)) {
@@ -558,12 +573,12 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
return err;
th = tcp_hdr(skb);
- src = MASKED(th->source, key->tcp_src, mask->tcp_src);
+ src = OVS_MASKED(th->source, key->tcp_src, mask->tcp_src);
if (likely(src != th->source)) {
set_tp_port(skb, &th->source, src, &th->check);
flow_key->tp.src = src;
}
- dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
+ dst = OVS_MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
if (likely(dst != th->dest)) {
set_tp_port(skb, &th->dest, dst, &th->check);
flow_key->tp.dst = dst;
@@ -590,8 +605,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
old_csum = sh->checksum;
old_correct_csum = sctp_compute_cksum(skb, sctphoff);
- sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
- sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
+ sh->source = OVS_MASKED(sh->source, key->sctp_src, mask->sctp_src);
+ sh->dest = OVS_MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
new_csum = sctp_compute_cksum(skb, sctphoff);
@@ -605,14 +620,145 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
-static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+static int ovs_vport_output(struct sock *sock, struct sk_buff *skb)
+{
+ struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
+ struct vport *vport = data->vport;
+
+ if (skb_cow_head(skb, data->l2_len) < 0) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ __skb_dst_copy(skb, data->dst);
+ *OVS_CB(skb) = data->cb;
+ skb->inner_protocol = data->inner_protocol;
+ skb->vlan_tci = data->vlan_tci;
+ skb->vlan_proto = data->vlan_proto;
+
+ /* Reconstruct the MAC header. */
+ skb_push(skb, data->l2_len);
+ memcpy(skb->data, &data->l2_data, data->l2_len);
+ ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len);
+ skb_reset_mac_header(skb);
+
+ ovs_vport_send(vport, skb);
+ return 0;
+}
+
+static unsigned int
+ovs_dst_get_mtu(const struct dst_entry *dst)
+{
+ return dst->dev->mtu;
+}
+
+static struct dst_ops ovs_dst_ops = {
+ .family = AF_UNSPEC,
+ .mtu = ovs_dst_get_mtu,
+};
+
+/* prepare_frag() is called once per (larger-than-MTU) frame; its inverse is
+ * ovs_vport_output(), which is called once per fragmented packet.
+ */
+static void prepare_frag(struct vport *vport, struct sk_buff *skb)
+{
+ unsigned int hlen = skb_network_offset(skb);
+ struct ovs_frag_data *data;
+
+ data = this_cpu_ptr(&ovs_frag_data_storage);
+ data->dst = skb->_skb_refdst;
+ data->vport = vport;
+ data->cb = *OVS_CB(skb);
+ data->inner_protocol = skb->inner_protocol;
+ data->vlan_tci = skb->vlan_tci;
+ data->vlan_proto = skb->vlan_proto;
+ data->l2_len = hlen;
+ memcpy(&data->l2_data, skb->data, hlen);
+
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ skb_pull(skb, hlen);
+}
+
+static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
+ __be16 ethertype)
+{
+ if (skb_network_offset(skb) > MAX_L2_LEN) {
+ OVS_NLERR(1, "L2 header too long to fragment");
+ return;
+ }
+
+ if (ethertype == htons(ETH_P_IP)) {
+ struct dst_entry ovs_dst;
+ unsigned long orig_dst;
+
+ prepare_frag(vport, skb);
+ dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
+ DST_OBSOLETE_NONE, DST_NOCOUNT);
+ ovs_dst.dev = vport->dev;
+
+ orig_dst = skb->_skb_refdst;
+ skb_dst_set_noref(skb, &ovs_dst);
+ IPCB(skb)->frag_max_size = mru;
+
+ ip_do_fragment(skb->sk, skb, ovs_vport_output);
+ refdst_drop(orig_dst);
+ } else if (ethertype == htons(ETH_P_IPV6)) {
+ const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+ unsigned long orig_dst;
+ struct rt6_info ovs_rt;
+
+ if (!v6ops) {
+ kfree_skb(skb);
+ return;
+ }
+
+ prepare_frag(vport, skb);
+ memset(&ovs_rt, 0, sizeof(ovs_rt));
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
+ DST_OBSOLETE_NONE, DST_NOCOUNT);
+ ovs_rt.dst.dev = vport->dev;
+
+ orig_dst = skb->_skb_refdst;
+ skb_dst_set_noref(skb, &ovs_rt.dst);
+ IP6CB(skb)->frag_max_size = mru;
+
+ v6ops->fragment(skb->sk, skb, ovs_vport_output);
+ refdst_drop(orig_dst);
+ } else {
+ WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
+ ovs_vport_name(vport), ntohs(ethertype), mru,
+ vport->dev->mtu);
+ kfree_skb(skb);
+ }
+}
+
+static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
+ struct sw_flow_key *key)
{
struct vport *vport = ovs_vport_rcu(dp, out_port);
- if (likely(vport))
- ovs_vport_send(vport, skb);
- else
+ if (likely(vport)) {
+ u16 mru = OVS_CB(skb)->mru;
+
+ if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
+ ovs_vport_send(vport, skb);
+ } else if (mru <= vport->dev->mtu) {
+ __be16 ethertype = key->eth.type;
+
+ if (!is_flow_key_valid(key)) {
+ if (eth_p_mpls(skb->protocol))
+ ethertype = skb->inner_protocol;
+ else
+ ethertype = vlan_get_protocol(skb);
+ }
+
+ ovs_fragment(vport, skb, mru, ethertype);
+ } else {
+ kfree_skb(skb);
+ }
+ } else {
kfree_skb(skb);
+ }
}
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
@@ -626,6 +772,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_ACTION;
+ upcall.mru = OVS_CB(skb)->mru;
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
@@ -770,12 +917,13 @@ static int execute_masked_set_action(struct sk_buff *skb,
switch (nla_type(a)) {
case OVS_KEY_ATTR_PRIORITY:
- SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
+ OVS_SET_MASKED(skb->priority, nla_get_u32(a),
+ *get_mask(a, u32 *));
flow_key->phy.priority = skb->priority;
break;
case OVS_KEY_ATTR_SKB_MARK:
- SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
+ OVS_SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
flow_key->phy.skb_mark = skb->mark;
break;
@@ -818,6 +966,13 @@ static int execute_masked_set_action(struct sk_buff *skb,
err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
__be32 *));
break;
+
+ case OVS_KEY_ATTR_CT_STATE:
+ case OVS_KEY_ATTR_CT_ZONE:
+ case OVS_KEY_ATTR_CT_MARK:
+ case OVS_KEY_ATTR_CT_LABEL:
+ err = -EINVAL;
+ break;
}
return err;
@@ -887,7 +1042,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
if (out_skb)
- do_output(dp, out_skb, prev_port);
+ do_output(dp, out_skb, prev_port, key);
prev_port = -1;
}
@@ -944,6 +1099,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, key, a, attr, len);
break;
+
+ case OVS_ACTION_ATTR_CT:
+ err = ovs_ct_execute(ovs_dp_get_net(dp), skb, key,
+ nla_data(a));
+
+ /* Hide stolen IP fragments from user space. */
+ if (err == -EINPROGRESS)
+ return 0;
+ break;
}
if (unlikely(err)) {
@@ -953,7 +1117,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
}
if (prev_port != -1)
- do_output(dp, skb, prev_port);
+ do_output(dp, skb, prev_port, key);
else
consume_skb(skb);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
new file mode 100644
index 000000000000..886bd2758502
--- /dev/null
+++ b/net/openvswitch/conntrack.c
@@ -0,0 +1,744 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/openvswitch.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_labels.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+#include "datapath.h"
+#include "conntrack.h"
+#include "flow.h"
+#include "flow_netlink.h"
+
+struct ovs_ct_len_tbl {
+ size_t maxlen;
+ size_t minlen;
+};
+
+/* Metadata mark for masked write to conntrack mark */
+struct md_mark {
+ u32 value;
+ u32 mask;
+};
+
+/* Metadata label for masked write to conntrack label. */
+struct md_label {
+ struct ovs_key_ct_label value;
+ struct ovs_key_ct_label mask;
+};
+
+/* Conntrack action context for execution. */
+struct ovs_conntrack_info {
+ struct nf_conntrack_helper *helper;
+ struct nf_conntrack_zone zone;
+ struct nf_conn *ct;
+ u32 flags;
+ u16 family;
+ struct md_mark mark;
+ struct md_label label;
+};
+
+static u16 key_to_nfproto(const struct sw_flow_key *key)
+{
+ switch (ntohs(key->eth.type)) {
+ case ETH_P_IP:
+ return NFPROTO_IPV4;
+ case ETH_P_IPV6:
+ return NFPROTO_IPV6;
+ default:
+ return NFPROTO_UNSPEC;
+ }
+}
+
+/* Map SKB connection state into the values used by flow definition. */
+static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
+{
+ u8 ct_state = OVS_CS_F_TRACKED;
+
+ switch (ctinfo) {
+ case IP_CT_ESTABLISHED_REPLY:
+ case IP_CT_RELATED_REPLY:
+ case IP_CT_NEW_REPLY:
+ ct_state |= OVS_CS_F_REPLY_DIR;
+ break;
+ default:
+ break;
+ }
+
+ switch (ctinfo) {
+ case IP_CT_ESTABLISHED:
+ case IP_CT_ESTABLISHED_REPLY:
+ ct_state |= OVS_CS_F_ESTABLISHED;
+ break;
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ ct_state |= OVS_CS_F_RELATED;
+ break;
+ case IP_CT_NEW:
+ case IP_CT_NEW_REPLY:
+ ct_state |= OVS_CS_F_NEW;
+ break;
+ default:
+ break;
+ }
+
+ return ct_state;
+}
+
+static void ovs_ct_get_label(const struct nf_conn *ct,
+ struct ovs_key_ct_label *label)
+{
+ struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
+
+ if (cl) {
+ size_t len = cl->words * sizeof(long);
+
+ if (len > OVS_CT_LABEL_LEN)
+ len = OVS_CT_LABEL_LEN;
+ else if (len < OVS_CT_LABEL_LEN)
+ memset(label, 0, OVS_CT_LABEL_LEN);
+ memcpy(label, cl->bits, len);
+ } else {
+ memset(label, 0, OVS_CT_LABEL_LEN);
+ }
+}
+
+static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
+ const struct nf_conntrack_zone *zone,
+ const struct nf_conn *ct)
+{
+ key->ct.state = state;
+ key->ct.zone = zone->id;
+ key->ct.mark = ct ? ct->mark : 0;
+ ovs_ct_get_label(ct, &key->ct.label);
+}
+
+/* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
+ * previously sent the packet to conntrack via the ct action.
+ */
+static void ovs_ct_update_key(const struct sk_buff *skb,
+ struct sw_flow_key *key, bool post_ct)
+{
+ const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ u8 state = 0;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+ state = ovs_ct_get_state(ctinfo);
+ if (ct->master)
+ state |= OVS_CS_F_RELATED;
+ zone = nf_ct_zone(ct);
+ } else if (post_ct) {
+ state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
+ }
+ __ovs_ct_update_key(key, state, zone, ct);
+}
+
+void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
+{
+ ovs_ct_update_key(skb, key, false);
+}
+
+int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
+{
+ if (nla_put_u8(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
+ return -EMSGSIZE;
+
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+ nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
+ return -EMSGSIZE;
+
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
+ return -EMSGSIZE;
+
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ nla_put(skb, OVS_KEY_ATTR_CT_LABEL, sizeof(key->ct.label),
+ &key->ct.label))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
+ u32 ct_mark, u32 mask)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ u32 new_mark;
+
+ if (!IS_ENABLED(CONFIG_NF_CONNTRACK_MARK))
+ return -ENOTSUPP;
+
+ /* The connection could be invalid, in which case set_mark is no-op. */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return 0;
+
+ new_mark = ct_mark | (ct->mark & ~(mask));
+ if (ct->mark != new_mark) {
+ ct->mark = new_mark;
+ nf_conntrack_event_cache(IPCT_MARK, ct);
+ key->ct.mark = new_mark;
+ }
+
+ return 0;
+}
+
+static int ovs_ct_set_label(struct sk_buff *skb, struct sw_flow_key *key,
+ const struct ovs_key_ct_label *label,
+ const struct ovs_key_ct_label *mask)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_labels *cl;
+ struct nf_conn *ct;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS))
+ return -ENOTSUPP;
+
+ /* The connection could be invalid, in which case set_label is no-op.*/
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return 0;
+
+ cl = nf_ct_labels_find(ct);
+ if (!cl) {
+ nf_ct_labels_ext_add(ct);
+ cl = nf_ct_labels_find(ct);
+ }
+ if (!cl || cl->words * sizeof(long) < OVS_CT_LABEL_LEN)
+ return -ENOSPC;
+
+ err = nf_connlabels_replace(ct, (u32 *)label, (u32 *)mask,
+ OVS_CT_LABEL_LEN / sizeof(u32));
+ if (err)
+ return err;
+
+ ovs_ct_get_label(ct, &key->ct.label);
+ return 0;
+}
+
+/* 'skb' should already be pulled to nh_ofs. */
+static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
+{
+ const struct nf_conntrack_helper *helper;
+ const struct nf_conn_help *help;
+ enum ip_conntrack_info ctinfo;
+ unsigned int protoff;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ return NF_ACCEPT;
+
+ help = nfct_help(ct);
+ if (!help)
+ return NF_ACCEPT;
+
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ return NF_ACCEPT;
+
+ switch (proto) {
+ case NFPROTO_IPV4:
+ protoff = ip_hdrlen(skb);
+ break;
+ case NFPROTO_IPV6: {
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ __be16 frag_off;
+
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("proto header not found\n");
+ return NF_ACCEPT;
+ }
+ break;
+ }
+ default:
+ WARN_ONCE(1, "helper invoked on non-IP family!");
+ return NF_DROP;
+ }
+
+ return helper->help(skb, protoff, ct, ctinfo);
+}
+
+static int handle_fragments(struct net *net, struct sw_flow_key *key,
+ u16 zone, struct sk_buff *skb)
+{
+ struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
+
+ if (key->eth.type == htons(ETH_P_IP)) {
+ enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
+ int err;
+
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ err = ip_defrag(skb, user);
+ if (err)
+ return err;
+
+ ovs_cb.mru = IPCB(skb)->frag_max_size;
+ } else if (key->eth.type == htons(ETH_P_IPV6)) {
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
+ struct sk_buff *reasm;
+
+ memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+ reasm = nf_ct_frag6_gather(skb, user);
+ if (!reasm)
+ return -EINPROGRESS;
+
+ if (skb == reasm)
+ return -EINVAL;
+
+ key->ip.proto = ipv6_hdr(reasm)->nexthdr;
+ skb_morph(skb, reasm);
+ consume_skb(reasm);
+ ovs_cb.mru = IP6CB(skb)->frag_max_size;
+#else
+ return -EPFNOSUPPORT;
+#endif
+ } else {
+ return -EPFNOSUPPORT;
+ }
+
+ key->ip.frag = OVS_FRAG_TYPE_NONE;
+ skb_clear_hash(skb);
+ skb->ignore_df = 1;
+ *OVS_CB(skb) = ovs_cb;
+
+ return 0;
+}
+
+static struct nf_conntrack_expect *
+ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
+ u16 proto, const struct sk_buff *skb)
+{
+ struct nf_conntrack_tuple tuple;
+
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
+ return NULL;
+ return __nf_ct_expect_find(net, zone, &tuple);
+}
+
+/* Determine whether skb->nfct is equal to the result of conntrack lookup. */
+static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
+ const struct ovs_conntrack_info *info)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return false;
+ if (!net_eq(net, read_pnet(&ct->ct_net)))
+ return false;
+ if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
+ return false;
+ if (info->helper) {
+ struct nf_conn_help *help;
+
+ help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
+ if (help && rcu_access_pointer(help->helper) != info->helper)
+ return false;
+ }
+
+ return true;
+}
+
+static int __ovs_ct_lookup(struct net *net, const struct sw_flow_key *key,
+ const struct ovs_conntrack_info *info,
+ struct sk_buff *skb)
+{
+ /* If we are recirculating packets to match on conntrack fields and
+ * committing with a separate conntrack action, then we don't need to
+ * actually run the packet through conntrack twice unless it's for a
+ * different zone.
+ */
+ if (!skb_nfct_cached(net, skb, info)) {
+ struct nf_conn *tmpl = info->ct;
+
+ /* Associate skb with specified zone. */
+ if (tmpl) {
+ if (skb->nfct)
+ nf_conntrack_put(skb->nfct);
+ nf_conntrack_get(&tmpl->ct_general);
+ skb->nfct = &tmpl->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ }
+
+ if (nf_conntrack_in(net, info->family, NF_INET_PRE_ROUTING,
+ skb) != NF_ACCEPT)
+ return -ENOENT;
+
+ if (ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
+ WARN_ONCE(1, "helper rejected packet");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Lookup connection and read fields into key. */
+static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
+ const struct ovs_conntrack_info *info,
+ struct sk_buff *skb)
+{
+ struct nf_conntrack_expect *exp;
+
+ exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
+ if (exp) {
+ u8 state;
+
+ state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
+ __ovs_ct_update_key(key, state, &info->zone, exp->master);
+ } else {
+ int err;
+
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
+
+ ovs_ct_update_key(skb, key, true);
+ }
+
+ return 0;
+}
+
+/* Lookup connection and confirm if unconfirmed. */
+static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
+ const struct ovs_conntrack_info *info,
+ struct sk_buff *skb)
+{
+ u8 state;
+ int err;
+
+ state = key->ct.state;
+ if (key->ct.zone == info->zone.id &&
+ ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) {
+ /* Previous lookup has shown that this connection is already
+ * tracked and committed. Skip committing.
+ */
+ return 0;
+ }
+
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
+ if (nf_conntrack_confirm(skb) != NF_ACCEPT)
+ return -EINVAL;
+
+ ovs_ct_update_key(skb, key, true);
+
+ return 0;
+}
+
+static bool label_nonzero(const struct ovs_key_ct_label *label)
+{
+ size_t i;
+
+ for (i = 0; i < sizeof(*label); i++)
+ if (label->ct_label[i])
+ return true;
+
+ return false;
+}
+
+int ovs_ct_execute(struct net *net, struct sk_buff *skb,
+ struct sw_flow_key *key,
+ const struct ovs_conntrack_info *info)
+{
+ int nh_ofs;
+ int err;
+
+ /* The conntrack module expects to be working at L3. */
+ nh_ofs = skb_network_offset(skb);
+ skb_pull(skb, nh_ofs);
+
+ if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
+ err = handle_fragments(net, key, info->zone.id, skb);
+ if (err)
+ return err;
+ }
+
+ if (info->flags & OVS_CT_F_COMMIT)
+ err = ovs_ct_commit(net, key, info, skb);
+ else
+ err = ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ goto err;
+
+ if (info->mark.mask) {
+ err = ovs_ct_set_mark(skb, key, info->mark.value,
+ info->mark.mask);
+ if (err)
+ goto err;
+ }
+ if (label_nonzero(&info->label.mask))
+ err = ovs_ct_set_label(skb, key, &info->label.value,
+ &info->label.mask);
+err:
+ skb_push(skb, nh_ofs);
+ return err;
+}
+
+static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
+ const struct sw_flow_key *key, bool log)
+{
+ struct nf_conntrack_helper *helper;
+ struct nf_conn_help *help;
+
+ helper = nf_conntrack_helper_try_module_get(name, info->family,
+ key->ip.proto);
+ if (!helper) {
+ OVS_NLERR(log, "Unknown helper \"%s\"", name);
+ return -EINVAL;
+ }
+
+ help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
+ if (!help) {
+ module_put(helper->me);
+ return -ENOMEM;
+ }
+
+ rcu_assign_pointer(help->helper, helper);
+ info->helper = helper;
+ return 0;
+}
+
+static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
+ [OVS_CT_ATTR_FLAGS] = { .minlen = sizeof(u32),
+ .maxlen = sizeof(u32) },
+ [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
+ .maxlen = sizeof(u16) },
+ [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
+ .maxlen = sizeof(struct md_mark) },
+ [OVS_CT_ATTR_LABEL] = { .minlen = sizeof(struct md_label),
+ .maxlen = sizeof(struct md_label) },
+ [OVS_CT_ATTR_HELPER] = { .minlen = 1,
+ .maxlen = NF_CT_HELPER_NAME_LEN }
+};
+
+static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
+ const char **helper, bool log)
+{
+ struct nlattr *a;
+ int rem;
+
+ nla_for_each_nested(a, attr, rem) {
+ int type = nla_type(a);
+ int maxlen = ovs_ct_attr_lens[type].maxlen;
+ int minlen = ovs_ct_attr_lens[type].minlen;
+
+ if (type > OVS_CT_ATTR_MAX) {
+ OVS_NLERR(log,
+ "Unknown conntrack attr (type=%d, max=%d)",
+ type, OVS_CT_ATTR_MAX);
+ return -EINVAL;
+ }
+ if (nla_len(a) < minlen || nla_len(a) > maxlen) {
+ OVS_NLERR(log,
+ "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
+ type, nla_len(a), maxlen);
+ return -EINVAL;
+ }
+
+ switch (type) {
+ case OVS_CT_ATTR_FLAGS:
+ info->flags = nla_get_u32(a);
+ break;
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ case OVS_CT_ATTR_ZONE:
+ info->zone.id = nla_get_u16(a);
+ break;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case OVS_CT_ATTR_MARK: {
+ struct md_mark *mark = nla_data(a);
+
+ info->mark = *mark;
+ break;
+ }
+#endif
+#ifdef CONFIG_NF_CONNTRACK_LABELS
+ case OVS_CT_ATTR_LABEL: {
+ struct md_label *label = nla_data(a);
+
+ info->label = *label;
+ break;
+ }
+#endif
+ case OVS_CT_ATTR_HELPER:
+ *helper = nla_data(a);
+ if (!memchr(*helper, '\0', nla_len(a))) {
+ OVS_NLERR(log, "Invalid conntrack helper");
+ return -EINVAL;
+ }
+ break;
+ default:
+ OVS_NLERR(log, "Unknown conntrack attr (%d)",
+ type);
+ return -EINVAL;
+ }
+ }
+
+ if (rem > 0) {
+ OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
+{
+ if (attr == OVS_KEY_ATTR_CT_STATE)
+ return true;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+ attr == OVS_KEY_ATTR_CT_ZONE)
+ return true;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ attr == OVS_KEY_ATTR_CT_MARK)
+ return true;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ attr == OVS_KEY_ATTR_CT_LABEL) {
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ return ovs_net->xt_label;
+ }
+
+ return false;
+}
+
+int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **sfa, bool log)
+{
+ struct ovs_conntrack_info ct_info;
+ const char *helper = NULL;
+ u16 family;
+ int err;
+
+ family = key_to_nfproto(key);
+ if (family == NFPROTO_UNSPEC) {
+ OVS_NLERR(log, "ct family unspecified");
+ return -EINVAL;
+ }
+
+ memset(&ct_info, 0, sizeof(ct_info));
+ ct_info.family = family;
+
+ nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
+ NF_CT_DEFAULT_ZONE_DIR, 0);
+
+ err = parse_ct(attr, &ct_info, &helper, log);
+ if (err)
+ return err;
+
+ /* Set up template for tracking connections in specific zones. */
+ ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
+ if (!ct_info.ct) {
+ OVS_NLERR(log, "Failed to allocate conntrack template");
+ return -ENOMEM;
+ }
+ if (helper) {
+ err = ovs_ct_add_helper(&ct_info, helper, key, log);
+ if (err)
+ goto err_free_ct;
+ }
+
+ err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
+ sizeof(ct_info), log);
+ if (err)
+ goto err_free_ct;
+
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ nf_conntrack_get(&ct_info.ct->ct_general);
+ return 0;
+err_free_ct:
+ nf_conntrack_free(ct_info.ct);
+ return err;
+}
+
+int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
+ struct sk_buff *skb)
+{
+ struct nlattr *start;
+
+ start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
+ if (!start)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, OVS_CT_ATTR_FLAGS, ct_info->flags))
+ return -EMSGSIZE;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
+ nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
+ return -EMSGSIZE;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
+ nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
+ &ct_info->mark))
+ return -EMSGSIZE;
+ if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
+ nla_put(skb, OVS_CT_ATTR_LABEL, sizeof(ct_info->label),
+ &ct_info->label))
+ return -EMSGSIZE;
+ if (ct_info->helper) {
+ if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
+ ct_info->helper->name))
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(skb, start);
+
+ return 0;
+}
+
+void ovs_ct_free_action(const struct nlattr *a)
+{
+ struct ovs_conntrack_info *ct_info = nla_data(a);
+
+ if (ct_info->helper)
+ module_put(ct_info->helper->me);
+ if (ct_info->ct)
+ nf_ct_put(ct_info->ct);
+}
+
+void ovs_ct_init(struct net *net)
+{
+ unsigned int n_bits = sizeof(struct ovs_key_ct_label) * BITS_PER_BYTE;
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ if (nf_connlabels_get(net, n_bits)) {
+ ovs_net->xt_label = false;
+ OVS_NLERR(true, "Failed to set connlabel length");
+ } else {
+ ovs_net->xt_label = true;
+ }
+}
+
+void ovs_ct_exit(struct net *net)
+{
+ struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
+
+ if (ovs_net->xt_label)
+ nf_connlabels_put(net);
+}
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
new file mode 100644
index 000000000000..3cb30667a7dc
--- /dev/null
+++ b/net/openvswitch/conntrack.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef OVS_CONNTRACK_H
+#define OVS_CONNTRACK_H 1
+
+#include "flow.h"
+
+struct ovs_conntrack_info;
+enum ovs_key_attr;
+
+#if defined(CONFIG_OPENVSWITCH_CONNTRACK)
+void ovs_ct_init(struct net *);
+void ovs_ct_exit(struct net *);
+bool ovs_ct_verify(struct net *, enum ovs_key_attr attr);
+int ovs_ct_copy_action(struct net *, const struct nlattr *,
+ const struct sw_flow_key *, struct sw_flow_actions **,
+ bool log);
+int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
+
+int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
+ const struct ovs_conntrack_info *);
+
+void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
+int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb);
+void ovs_ct_free_action(const struct nlattr *a);
+#else
+#include <linux/errno.h>
+
+static inline void ovs_ct_init(struct net *net) { }
+
+static inline void ovs_ct_exit(struct net *net) { }
+
+static inline bool ovs_ct_verify(struct net *net, int attr)
+{
+ return false;
+}
+
+static inline int ovs_ct_copy_action(struct net *net, const struct nlattr *nla,
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **acts, bool log)
+{
+ return -ENOTSUPP;
+}
+
+static inline int ovs_ct_action_to_attr(const struct ovs_conntrack_info *info,
+ struct sk_buff *skb)
+{
+ return -ENOTSUPP;
+}
+
+static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
+ struct sw_flow_key *key,
+ const struct ovs_conntrack_info *info)
+{
+ return -ENOTSUPP;
+}
+
+static inline void ovs_ct_fill_key(const struct sk_buff *skb,
+ struct sw_flow_key *key)
+{
+ key->ct.state = 0;
+ key->ct.zone = 0;
+ key->ct.mark = 0;
+ memset(&key->ct.label, 0, sizeof(key->ct.label));
+}
+
+static inline int ovs_ct_put_key(const struct sw_flow_key *key,
+ struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline void ovs_ct_free_action(const struct nlattr *a) { }
+#endif
+#endif /* ovs_conntrack.h */
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ffe984f5b95c..ec0f8d9cee73 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -275,6 +275,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+ upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall);
if (unlikely(error))
kfree_skb(skb);
@@ -400,9 +401,23 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
if (upcall_info->actions_len)
size += nla_total_size(upcall_info->actions_len);
+ /* OVS_PACKET_ATTR_MRU */
+ if (upcall_info->mru)
+ size += nla_total_size(sizeof(upcall_info->mru));
+
return size;
}
+static void pad_packet(struct datapath *dp, struct sk_buff *skb)
+{
+ if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
+ size_t plen = NLA_ALIGN(skb->len) - skb->len;
+
+ if (plen > 0)
+ memset(skb_put(skb, plen), 0, plen);
+ }
+}
+
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
const struct sw_flow_key *key,
const struct dp_upcall_info *upcall_info)
@@ -492,6 +507,16 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
nla_nest_cancel(user_skb, nla);
}
+ /* Add OVS_PACKET_ATTR_MRU */
+ if (upcall_info->mru) {
+ if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
+ upcall_info->mru)) {
+ err = -ENOBUFS;
+ goto out;
+ }
+ pad_packet(dp, user_skb);
+ }
+
/* Only reserve room for attribute header, packet data is added
* in skb_zerocopy() */
if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
@@ -505,12 +530,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
goto out;
/* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
- if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
- size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
-
- if (plen > 0)
- memset(skb_put(user_skb, plen), 0, plen);
- }
+ pad_packet(dp, user_skb);
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
@@ -527,6 +547,7 @@ out:
static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
{
struct ovs_header *ovs_header = info->userhdr;
+ struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct sw_flow_actions *acts;
struct sk_buff *packet;
@@ -535,6 +556,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct ethhdr *eth;
struct vport *input_vport;
+ u16 mru = 0;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
@@ -564,18 +586,25 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
else
packet->protocol = htons(ETH_P_802_2);
+ /* Set packet's mru */
+ if (a[OVS_PACKET_ATTR_MRU]) {
+ mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
+ packet->ignore_df = 1;
+ }
+ OVS_CB(packet)->mru = mru;
+
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
if (IS_ERR(flow))
goto err_kfree_skb;
- err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
- &flow->key, log);
+ err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
+ packet, &flow->key, log);
if (err)
goto err_flow_free;
- err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
+ err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
&flow->key, &acts, log);
if (err)
goto err_flow_free;
@@ -586,7 +615,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
packet->mark = flow->key.phy.skb_mark;
rcu_read_lock();
- dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp_rcu(net, ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto err_unlock;
@@ -598,6 +627,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (!input_vport)
goto err_unlock;
+ packet->dev = input_vport->dev;
OVS_CB(packet)->input_vport = input_vport;
sf_acts = rcu_dereference(flow->sf_acts);
@@ -624,6 +654,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
+ [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
};
static const struct genl_ops dp_packet_genl_ops[] = {
@@ -713,7 +744,7 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
/* OVS_FLOW_ATTR_ACTIONS */
if (should_fill_actions(ufid_flags))
- len += nla_total_size(acts->actions_len);
+ len += nla_total_size(acts->orig_len);
return len
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
@@ -880,6 +911,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
+ struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow *flow = NULL, *new_flow;
@@ -915,7 +947,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
/* Extract key. */
ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto err_kfree_flow;
@@ -929,8 +961,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_kfree_flow;
/* Validate actions. */
- error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
- &acts, log);
+ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
+ &new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
goto err_kfree_flow;
@@ -944,7 +976,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
ovs_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp(net, ovs_header->dp_ifindex);
if (unlikely(!dp)) {
error = -ENODEV;
goto err_unlock_ovs;
@@ -1038,7 +1070,8 @@ error:
}
/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
-static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
+static struct sw_flow_actions *get_flow_actions(struct net *net,
+ const struct nlattr *a,
const struct sw_flow_key *key,
const struct sw_flow_mask *mask,
bool log)
@@ -1048,7 +1081,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
int error;
ovs_flow_mask_key(&masked_key, key, mask);
- error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
+ error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
if (error) {
OVS_NLERR(log,
"Actions may not be safe on all matching packets");
@@ -1060,6 +1093,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
{
+ struct net *net = sock_net(skb->sk);
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
struct sw_flow_key key;
@@ -1084,15 +1118,15 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
+ error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
goto error;
/* Validate actions. */
if (a[OVS_FLOW_ATTR_ACTIONS]) {
- acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
- log);
+ acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key,
+ &mask, log);
if (IS_ERR(acts)) {
error = PTR_ERR(acts);
goto error;
@@ -1108,7 +1142,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
}
ovs_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp(net, ovs_header->dp_ifindex);
if (unlikely(!dp)) {
error = -ENODEV;
goto err_unlock_ovs;
@@ -1174,6 +1208,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
+ struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
struct sw_flow *flow;
@@ -1188,7 +1223,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
if (a[OVS_FLOW_ATTR_KEY]) {
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
+ err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
log);
} else if (!ufid_present) {
OVS_NLERR(log,
@@ -1232,6 +1267,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
struct ovs_header *ovs_header = info->userhdr;
+ struct net *net = sock_net(skb->sk);
struct sw_flow_key key;
struct sk_buff *reply;
struct sw_flow *flow = NULL;
@@ -1246,8 +1282,8 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
if (a[OVS_FLOW_ATTR_KEY]) {
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
- log);
+ err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
+ NULL, log);
if (unlikely(err))
return err;
}
@@ -2203,6 +2239,7 @@ static int __net_init ovs_init_net(struct net *net)
INIT_LIST_HEAD(&ovs_net->dps);
INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
+ ovs_ct_init(net);
return 0;
}
@@ -2237,6 +2274,7 @@ static void __net_exit ovs_exit_net(struct net *dnet)
struct net *net;
LIST_HEAD(head);
+ ovs_ct_exit(dnet);
ovs_lock();
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 6b28c5cedb23..4e785ab88973 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -27,6 +27,7 @@
#include <linux/u64_stats_sync.h>
#include <net/ip_tunnels.h>
+#include "conntrack.h"
#include "flow.h"
#include "flow_table.h"
#include "vport.h"
@@ -97,10 +98,13 @@ struct datapath {
* NULL if the packet is not being tunneled.
* @input_vport: The original vport packet came in on. This value is cached
* when a packet is received by OVS.
+ * @mru: The maximum received fragement size; 0 if the packet is not
+ * fragmented.
*/
struct ovs_skb_cb {
struct ip_tunnel_info *egress_tun_info;
struct vport *input_vport;
+ u16 mru;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -113,6 +117,7 @@ struct ovs_skb_cb {
* then no packet is sent and the packet is accounted in the datapath's @n_lost
* counter.
* @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
+ * @mru: If not zero, Maximum received IP fragment size.
*/
struct dp_upcall_info {
const struct ip_tunnel_info *egress_tun_info;
@@ -121,6 +126,7 @@ struct dp_upcall_info {
int actions_len;
u32 portid;
u8 cmd;
+ u16 mru;
};
/**
@@ -132,6 +138,9 @@ struct ovs_net {
struct list_head dps;
struct work_struct dp_notify_work;
struct vport_net vport_net;
+
+ /* Module reference for configuring conntrack. */
+ bool xt_label;
};
extern int ovs_net_id;
@@ -200,6 +209,10 @@ void ovs_dp_notify_wq(struct work_struct *work);
int action_fifos_init(void);
void action_fifos_exit(void);
+/* 'KEY' must not have any bits set outside of the 'MASK' */
+#define OVS_MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
+#define OVS_SET_MASKED(OLD, KEY, MASK) ((OLD) = OVS_MASKED(OLD, KEY, MASK))
+
#define OVS_NLERR(logging_allowed, fmt, ...) \
do { \
if (logging_allowed && net_ratelimit()) \
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 8db22ef73626..5a3195e538ce 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -49,6 +49,7 @@
#include "datapath.h"
#include "flow.h"
#include "flow_netlink.h"
+#include "conntrack.h"
u64 ovs_flow_used_time(unsigned long flow_jiffies)
{
@@ -707,13 +708,14 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->phy.priority = skb->priority;
key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
key->phy.skb_mark = skb->mark;
+ ovs_ct_fill_key(skb, key);
key->ovs_flow_hash = 0;
key->recirc_id = 0;
return key_extract(skb, key);
}
-int ovs_flow_key_extract_userspace(const struct nlattr *attr,
+int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
struct sk_buff *skb,
struct sw_flow_key *key, bool log)
{
@@ -722,7 +724,7 @@ int ovs_flow_key_extract_userspace(const struct nlattr *attr,
memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
/* Extract metadata from netlink attributes. */
- err = ovs_nla_get_flow_metadata(attr, key, log);
+ err = ovs_nla_get_flow_metadata(net, attr, key, log);
if (err)
return err;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index b62cdb3e3589..fe527d2dd4b7 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -111,6 +111,14 @@ struct sw_flow_key {
} nd;
} ipv6;
};
+ struct {
+ /* Connection tracking fields. */
+ u16 zone;
+ u32 mark;
+ u8 state;
+ struct ovs_key_ct_label label;
+ } ct;
+
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
struct sw_flow_key_range {
@@ -144,6 +152,7 @@ struct sw_flow_id {
struct sw_flow_actions {
struct rcu_head rcu;
+ size_t orig_len; /* From flow_cmd_new netlink actions size */
u32 actions_len;
struct nlattr actions[];
};
@@ -212,7 +221,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb,
struct sw_flow_key *key);
/* Extract key from packet coming from userspace. */
-int ovs_flow_key_extract_userspace(const struct nlattr *attr,
+int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr,
struct sk_buff *skb,
struct sw_flow_key *key, bool log);
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index a6eb77ab1a64..e22c5bfe8575 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -281,7 +281,7 @@ size_t ovs_key_attr_size(void)
/* Whenever adding new OVS_KEY_ FIELDS, we should consider
* updating this function.
*/
- BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 26);
return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
@@ -290,6 +290,10 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
+ nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
+ + nla_total_size(1) /* OVS_KEY_ATTR_CT_STATE */
+ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */
+ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */
+ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABEL */
+ nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
@@ -339,6 +343,10 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
.next = ovs_tunnel_key_lens, },
[OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
+ [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u8) },
+ [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) },
+ [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_CT_LABEL] = { .len = sizeof(struct ovs_key_ct_label) },
};
static bool is_all_zero(const u8 *fp, size_t size)
@@ -534,19 +542,19 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
tun_flags |= TUNNEL_KEY;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
nla_get_in_addr(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
nla_get_in_addr(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TOS:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
+ SW_FLOW_KEY_PUT(match, tun_key.tos,
nla_get_u8(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TTL:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
+ SW_FLOW_KEY_PUT(match, tun_key.ttl,
nla_get_u8(a), is_mask);
ttl = true;
break;
@@ -609,7 +617,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
}
if (!is_mask) {
- if (!match->key->tun_key.ipv4_dst) {
+ if (!match->key->tun_key.u.ipv4.dst) {
OVS_NLERR(log, "IPv4 tunnel dst address is zero");
return -EINVAL;
}
@@ -647,18 +655,18 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE;
- if (output->ipv4_src &&
+ if (output->u.ipv4.src &&
nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
- output->ipv4_src))
+ output->u.ipv4.src))
return -EMSGSIZE;
- if (output->ipv4_dst &&
+ if (output->u.ipv4.dst &&
nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
- output->ipv4_dst))
+ output->u.ipv4.dst))
return -EMSGSIZE;
- if (output->ipv4_tos &&
- nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
+ if (output->tos &&
+ nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
return -EMSGSIZE;
- if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
+ if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
return -EMSGSIZE;
if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
@@ -715,9 +723,9 @@ int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
egress_tun_info->options_len);
}
-static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
- const struct nlattr **a, bool is_mask,
- bool log)
+static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
+ u64 *attrs, const struct nlattr **a,
+ bool is_mask, bool log)
{
if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
@@ -768,16 +776,47 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
return -EINVAL;
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
}
+
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
+ u8 ct_state = nla_get_u8(a[OVS_KEY_ATTR_CT_STATE]);
+
+ SW_FLOW_KEY_PUT(match, ct.state, ct_state, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
+ u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
+
+ SW_FLOW_KEY_PUT(match, ct.zone, ct_zone, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
+ u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
+
+ SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
+ }
+ if (*attrs & (1 << OVS_KEY_ATTR_CT_LABEL) &&
+ ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABEL)) {
+ const struct ovs_key_ct_label *cl;
+
+ cl = nla_data(a[OVS_KEY_ATTR_CT_LABEL]);
+ SW_FLOW_KEY_MEMCPY(match, ct.label, cl->ct_label,
+ sizeof(*cl), is_mask);
+ *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABEL);
+ }
return 0;
}
-static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
- const struct nlattr **a, bool is_mask,
- bool log)
+static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
+ u64 attrs, const struct nlattr **a,
+ bool is_mask, bool log)
{
int err;
- err = metadata_from_nlattrs(match, &attrs, a, is_mask, log);
+ err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
if (err)
return err;
@@ -1029,6 +1068,7 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
* mask. In case the 'mask' is NULL, the flow is treated as exact match
* flow. Otherwise, it is treated as a wildcarded flow, except the mask
* does not include any don't care bit.
+ * @net: Used to determine per-namespace field support.
* @match: receives the extracted flow match information.
* @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
* sequence. The fields should of the packet that triggered the creation
@@ -1039,7 +1079,7 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
* probing for feature compatibility this should be passed in as false to
* suppress unnecessary error logging.
*/
-int ovs_nla_get_match(struct sw_flow_match *match,
+int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
const struct nlattr *nla_key,
const struct nlattr *nla_mask,
bool log)
@@ -1089,7 +1129,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
}
}
- err = ovs_key_from_nlattrs(match, key_attrs, a, false, log);
+ err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
if (err)
return err;
@@ -1116,7 +1156,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
/* The userspace does not send tunnel attributes that
* are 0, but we should not wildcard them nonetheless.
*/
- if (match->key->tun_key.ipv4_dst)
+ if (match->key->tun_key.u.ipv4.dst)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
0xff, true);
@@ -1169,7 +1209,8 @@ int ovs_nla_get_match(struct sw_flow_match *match,
}
}
- err = ovs_key_from_nlattrs(match, mask_attrs, a, true, log);
+ err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
+ log);
if (err)
goto free_newmask;
}
@@ -1250,7 +1291,7 @@ u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
* extracted from the packet itself.
*/
-int ovs_nla_get_flow_metadata(const struct nlattr *attr,
+int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr,
struct sw_flow_key *key,
bool log)
{
@@ -1266,9 +1307,10 @@ int ovs_nla_get_flow_metadata(const struct nlattr *attr,
memset(&match, 0, sizeof(match));
match.key = key;
+ memset(&key->ct, 0, sizeof(key->ct));
key->phy.in_port = DP_MAX_PORTS;
- return metadata_from_nlattrs(&match, &attrs, a, false, log);
+ return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
}
static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
@@ -1287,7 +1329,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if ((swkey->tun_key.ipv4_dst || is_mask)) {
+ if ((swkey->tun_key.u.ipv4.dst || is_mask)) {
const void *opts = NULL;
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
@@ -1314,6 +1356,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
goto nla_put_failure;
+ if (ovs_ct_put_key(output, skb))
+ goto nla_put_failure;
+
nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
if (!nla)
goto nla_put_failure;
@@ -1574,6 +1619,9 @@ void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
case OVS_ACTION_ATTR_SET:
ovs_nla_free_set_action(a);
break;
+ case OVS_ACTION_ATTR_CT:
+ ovs_ct_free_action(a);
+ break;
}
}
@@ -1619,6 +1667,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
acts->actions_len = (*sfa)->actions_len;
+ acts->orig_len = (*sfa)->orig_len;
kfree(*sfa);
*sfa = acts;
@@ -1646,8 +1695,8 @@ static struct nlattr *__add_action(struct sw_flow_actions **sfa,
return a;
}
-static int add_action(struct sw_flow_actions **sfa, int attrtype,
- void *data, int len, bool log)
+int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
+ int len, bool log)
{
struct nlattr *a;
@@ -1662,7 +1711,7 @@ static inline int add_nested_action_start(struct sw_flow_actions **sfa,
int used = (*sfa)->actions_len;
int err;
- err = add_action(sfa, attrtype, NULL, 0, log);
+ err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
if (err)
return err;
@@ -1678,12 +1727,12 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
a->nla_len = sfa->actions_len - st_offset;
}
-static int __ovs_nla_copy_actions(const struct nlattr *attr,
+static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log);
-static int validate_and_copy_sample(const struct nlattr *attr,
+static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
@@ -1715,15 +1764,15 @@ static int validate_and_copy_sample(const struct nlattr *attr,
start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
if (start < 0)
return start;
- err = add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
- nla_data(probability), sizeof(u32), log);
+ err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_PROBABILITY,
+ nla_data(probability), sizeof(u32), log);
if (err)
return err;
st_acts = add_nested_action_start(sfa, OVS_SAMPLE_ATTR_ACTIONS, log);
if (st_acts < 0)
return st_acts;
- err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
+ err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa,
eth_type, vlan_tci, log);
if (err)
return err;
@@ -1892,6 +1941,8 @@ static int validate_set(const struct nlattr *a,
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
+ case OVS_KEY_ATTR_CT_MARK:
+ case OVS_KEY_ATTR_CT_LABEL:
case OVS_KEY_ATTR_ETHERNET:
break;
@@ -2057,7 +2108,7 @@ static int copy_action(const struct nlattr *from,
return 0;
}
-static int __ovs_nla_copy_actions(const struct nlattr *attr,
+static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci, bool log)
@@ -2081,7 +2132,8 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
[OVS_ACTION_ATTR_SET] = (u32)-1,
[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
- [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
+ [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
+ [OVS_ACTION_ATTR_CT] = (u32)-1,
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -2188,13 +2240,20 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
break;
case OVS_ACTION_ATTR_SAMPLE:
- err = validate_and_copy_sample(a, key, depth, sfa,
+ err = validate_and_copy_sample(net, a, key, depth, sfa,
eth_type, vlan_tci, log);
if (err)
return err;
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_CT:
+ err = ovs_ct_copy_action(net, a, key, sfa, log);
+ if (err)
+ return err;
+ skip_copy = true;
+ break;
+
default:
OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL;
@@ -2213,7 +2272,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
}
/* 'key' must be the masked key. */
-int ovs_nla_copy_actions(const struct nlattr *attr,
+int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
{
@@ -2223,7 +2282,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
if (IS_ERR(*sfa))
return PTR_ERR(*sfa);
- err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
+ (*sfa)->orig_len = nla_len(attr);
+ err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
key->eth.tci, log);
if (err)
ovs_nla_free_flow_actions(*sfa);
@@ -2348,6 +2408,13 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
if (err)
return err;
break;
+
+ case OVS_ACTION_ATTR_CT:
+ err = ovs_ct_action_to_attr(nla_data(a), skb);
+ if (err)
+ return err;
+ break;
+
default:
if (nla_put(skb, type, nla_len(a), nla_data(a)))
return -EMSGSIZE;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index acd074408f0a..07878e22e783 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -45,15 +45,16 @@ void ovs_match_init(struct sw_flow_match *match,
int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
int attr, bool is_mask, struct sk_buff *);
-int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
- bool log);
+int ovs_nla_get_flow_metadata(struct net *, const struct nlattr *,
+ struct sw_flow_key *, bool log);
int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
-int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
- const struct nlattr *mask, bool log);
+int ovs_nla_get_match(struct net *, struct sw_flow_match *,
+ const struct nlattr *key, const struct nlattr *mask,
+ bool log);
int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
const struct ip_tunnel_info *);
@@ -62,9 +63,11 @@ int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
const struct sw_flow_key *key, bool log);
u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
-int ovs_nla_copy_actions(const struct nlattr *attr,
+int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log);
+int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype,
+ void *data, int len, bool log);
int ovs_nla_put_actions(const struct nlattr *attr,
int len, struct sk_buff *skb);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 3a9d1dde76ed..d22d8e948d0f 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -426,7 +426,7 @@ static u32 flow_hash(const struct sw_flow_key *key,
static int flow_key_start(const struct sw_flow_key *key)
{
- if (key->tun_key.ipv4_dst)
+ if (key->tun_key.u.ipv4.dst)
return 0;
else
return rounddown(offsetof(struct sw_flow_key, phy),
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 1da3a14d1010..fa37c95f7339 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -26,95 +26,44 @@
#include "datapath.h"
#include "vport.h"
+#include "vport-netdev.h"
static struct vport_ops ovs_geneve_vport_ops;
-
/**
* struct geneve_port - Keeps track of open UDP ports
- * @gs: The socket created for this port number.
- * @name: vport name.
+ * @dst_port: destination port.
*/
struct geneve_port {
- struct geneve_sock *gs;
- char name[IFNAMSIZ];
+ u16 port_no;
};
-static LIST_HEAD(geneve_ports);
-
static inline struct geneve_port *geneve_vport(const struct vport *vport)
{
return vport_priv(vport);
}
-/* Convert 64 bit tunnel ID to 24 bit VNI. */
-static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
-{
-#ifdef __BIG_ENDIAN
- vni[0] = (__force __u8)(tun_id >> 16);
- vni[1] = (__force __u8)(tun_id >> 8);
- vni[2] = (__force __u8)tun_id;
-#else
- vni[0] = (__force __u8)((__force u64)tun_id >> 40);
- vni[1] = (__force __u8)((__force u64)tun_id >> 48);
- vni[2] = (__force __u8)((__force u64)tun_id >> 56);
-#endif
-}
-
-/* Convert 24 bit VNI to 64 bit tunnel ID. */
-static __be64 vni_to_tunnel_id(const __u8 *vni)
-{
-#ifdef __BIG_ENDIAN
- return (vni[0] << 16) | (vni[1] << 8) | vni[2];
-#else
- return (__force __be64)(((__force u64)vni[0] << 40) |
- ((__force u64)vni[1] << 48) |
- ((__force u64)vni[2] << 56));
-#endif
-}
-
-static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
-{
- struct vport *vport = gs->rcv_data;
- struct genevehdr *geneveh = geneve_hdr(skb);
- int opts_len;
- struct ip_tunnel_info tun_info;
- __be64 key;
- __be16 flags;
-
- opts_len = geneveh->opt_len * 4;
-
- flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
- (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
- (geneveh->oam ? TUNNEL_OAM : 0) |
- (geneveh->critical ? TUNNEL_CRIT_OPT : 0);
-
- key = vni_to_tunnel_id(geneveh->vni);
-
- ip_tunnel_info_init(&tun_info, ip_hdr(skb),
- udp_hdr(skb)->source, udp_hdr(skb)->dest,
- key, flags, geneveh->options, opts_len);
-
- ovs_vport_receive(vport, skb, &tun_info);
-}
-
static int geneve_get_options(const struct vport *vport,
struct sk_buff *skb)
{
struct geneve_port *geneve_port = geneve_vport(vport);
- struct inet_sock *sk = inet_sk(geneve_port->gs->sock->sk);
- if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(sk->inet_sport)))
+ if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, geneve_port->port_no))
return -EMSGSIZE;
return 0;
}
-static void geneve_tnl_destroy(struct vport *vport)
+static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
+ struct ip_tunnel_info *egress_tun_info)
{
struct geneve_port *geneve_port = geneve_vport(vport);
+ struct net *net = ovs_dp_get_net(vport->dp);
+ __be16 dport = htons(geneve_port->port_no);
+ __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
- geneve_sock_release(geneve_port->gs);
-
- ovs_vport_deferred_free(vport);
+ return ovs_tunnel_get_egress_info(egress_tun_info,
+ ovs_dp_get_net(vport->dp),
+ OVS_CB(skb)->egress_tun_info,
+ IPPROTO_UDP, skb->mark, sport, dport);
}
static struct vport *geneve_tnl_create(const struct vport_parms *parms)
@@ -122,11 +71,11 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
struct net *net = ovs_dp_get_net(parms->dp);
struct nlattr *options = parms->options;
struct geneve_port *geneve_port;
- struct geneve_sock *gs;
+ struct net_device *dev;
struct vport *vport;
struct nlattr *a;
- int err;
u16 dst_port;
+ int err;
if (!options) {
err = -EINVAL;
@@ -148,104 +97,40 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms)
return vport;
geneve_port = geneve_vport(vport);
- strncpy(geneve_port->name, parms->name, IFNAMSIZ);
+ geneve_port->port_no = dst_port;
- gs = geneve_sock_add(net, htons(dst_port), geneve_rcv, vport, true, 0);
- if (IS_ERR(gs)) {
+ rtnl_lock();
+ dev = geneve_dev_create_fb(net, parms->name, NET_NAME_USER, dst_port);
+ if (IS_ERR(dev)) {
+ rtnl_unlock();
ovs_vport_free(vport);
- return (void *)gs;
+ return ERR_CAST(dev);
}
- geneve_port->gs = gs;
+ dev_change_flags(dev, dev->flags | IFF_UP);
+ rtnl_unlock();
return vport;
error:
return ERR_PTR(err);
}
-static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
+static struct vport *geneve_create(const struct vport_parms *parms)
{
- const struct ip_tunnel_key *tun_key;
- struct ip_tunnel_info *tun_info;
- struct net *net = ovs_dp_get_net(vport->dp);
- struct geneve_port *geneve_port = geneve_vport(vport);
- __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
- __be16 sport;
- struct rtable *rt;
- struct flowi4 fl;
- u8 vni[3], opts_len, *opts;
- __be16 df;
- int err;
-
- tun_info = OVS_CB(skb)->egress_tun_info;
- if (unlikely(!tun_info)) {
- err = -EINVAL;
- goto error;
- }
-
- tun_key = &tun_info->key;
- rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto error;
- }
-
- df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
- sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
- tunnel_id_to_vni(tun_key->tun_id, vni);
- skb->ignore_df = 1;
-
- if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
- opts = (u8 *)tun_info->options;
- opts_len = tun_info->options_len;
- } else {
- opts = NULL;
- opts_len = 0;
- }
-
- err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
- tun_key->ipv4_dst, tun_key->ipv4_tos,
- tun_key->ipv4_ttl, df, sport, dport,
- tun_key->tun_flags, vni, opts_len, opts,
- !!(tun_key->tun_flags & TUNNEL_CSUM), false);
- if (err < 0)
- ip_rt_put(rt);
- return err;
-
-error:
- kfree_skb(skb);
- return err;
-}
-
-static const char *geneve_get_name(const struct vport *vport)
-{
- struct geneve_port *geneve_port = geneve_vport(vport);
-
- return geneve_port->name;
-}
+ struct vport *vport;
-static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct ip_tunnel_info *egress_tun_info)
-{
- struct geneve_port *geneve_port = geneve_vport(vport);
- struct net *net = ovs_dp_get_net(vport->dp);
- __be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
- __be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
+ vport = geneve_tnl_create(parms);
+ if (IS_ERR(vport))
+ return vport;
- /* Get tp_src and tp_dst, refert to geneve_build_header().
- */
- return ovs_tunnel_get_egress_info(egress_tun_info,
- ovs_dp_get_net(vport->dp),
- OVS_CB(skb)->egress_tun_info,
- IPPROTO_UDP, skb->mark, sport, dport);
+ return ovs_netdev_link(vport, parms->name);
}
static struct vport_ops ovs_geneve_vport_ops = {
.type = OVS_VPORT_TYPE_GENEVE,
- .create = geneve_tnl_create,
- .destroy = geneve_tnl_destroy,
- .get_name = geneve_get_name,
+ .create = geneve_create,
+ .destroy = ovs_netdev_tunnel_destroy,
.get_options = geneve_get_options,
- .send = geneve_tnl_send,
+ .send = ovs_netdev_send,
.owner = THIS_MODULE,
.get_egress_tun_info = geneve_get_egress_tun_info,
};
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index c058bbf876c3..80b3e12ec882 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -135,7 +135,7 @@ static void do_setup(struct net_device *netdev)
netdev->netdev_ops = &internal_dev_netdev_ops;
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
- netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH;
netdev->destructor = internal_dev_destructor;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 4b70aaa4a746..a75011505039 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -57,7 +57,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
skb_push(skb, ETH_HLEN);
ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
- ovs_vport_receive(vport, skb, skb_tunnel_info(skb, AF_INET));
+ ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
return;
error:
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index d14f59403c5e..e2dc9dac59e6 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -484,6 +484,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->egress_tun_info = NULL;
+ OVS_CB(skb)->mru = 0;
/* Extract flow from 'skb' into 'key'. */
error = ovs_flow_key_extract(tun_info, skb, &key);
if (unlikely(error)) {
@@ -603,9 +604,9 @@ int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info,
* saddr, tp_src and tp_dst
*/
__ip_tunnel_info_init(egress_tun_info,
- fl.saddr, tun_key->ipv4_dst,
- tun_key->ipv4_tos,
- tun_key->ipv4_ttl,
+ fl.saddr, tun_key->u.ipv4.dst,
+ tun_key->tos,
+ tun_key->ttl,
tp_src, tp_dst,
tun_key->tun_id,
tun_key->tun_flags,
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 1a689c28b5a6..b88b3ee86f07 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -254,9 +254,9 @@ static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
struct rtable *rt;
memset(fl, 0, sizeof(*fl));
- fl->daddr = key->ipv4_dst;
- fl->saddr = key->ipv4_src;
- fl->flowi4_tos = RT_TOS(key->ipv4_tos);
+ fl->daddr = key->u.ipv4.dst;
+ fl->saddr = key->u.ipv4.src;
+ fl->flowi4_tos = RT_TOS(key->tos);
fl->flowi4_mark = mark;
fl->flowi4_proto = protocol;