aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/common.c2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bridge/br_forward.c5
-rw-r--r--net/bridge/br_input.c60
-rw-r--r--net/bridge/br_netlink.c4
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/br_sysfs_if.c2
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/core/dev.c86
-rw-r--r--net/core/ethtool.c1
-rw-r--r--net/core/neighbour.c32
-rw-r--r--net/core/skbuff.c14
-rw-r--r--net/core/sock.c2
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/dsa/Kconfig11
-rw-r--r--net/dsa/dsa.c135
-rw-r--r--net/dsa/slave.c64
-rw-r--r--net/dsa/tag_dsa.c2
-rw-r--r--net/dsa/tag_edsa.c2
-rw-r--r--net/dsa/tag_trailer.c2
-rw-r--r--net/ieee802154/dgram.c2
-rw-r--r--net/ieee802154/raw.c2
-rw-r--r--net/ipv4/Kconfig9
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/cipso_ipv4.c8
-rw-r--r--net/ipv4/esp4.c14
-rw-r--r--net/ipv4/fou.c388
-rw-r--r--net/ipv4/geneve.c2
-rw-r--r--net/ipv4/igmp.c45
-rw-r--r--net/ipv4/ip_fragment.c16
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ip_tunnel.c61
-rw-r--r--net/ipv4/ipconfig.c19
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c6
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/syncookies.c86
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/ipv4/tcp_input.c32
-rw-r--r--net/ipv4/tcp_offload.c2
-rw-r--r--net/ipv4/tcp_output.c13
-rw-r--r--net/ipv4/udp.c15
-rw-r--r--net/ipv4/udp_offload.c69
-rw-r--r--net/ipv6/addrconf.c50
-rw-r--r--net/ipv6/datagram.c4
-rw-r--r--net/ipv6/esp6.c10
-rw-r--r--net/ipv6/exthdrs.c2
-rw-r--r--net/ipv6/icmp.c1
-rw-r--r--net/ipv6/ip6_flowlabel.c7
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c10
-rw-r--r--net/ipv6/ip6_tunnel.c94
-rw-r--r--net/ipv6/ip6_vti.c10
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/mcast.c15
-rw-r--r--net/ipv6/raw.c7
-rw-r--r--net/ipv6/reassembly.c12
-rw-r--r--net/ipv6/route.c94
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/syncookies.c13
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/ipv6/udp_offload.c4
-rw-r--r--net/ipx/af_ipx.c5
-rw-r--r--net/ipx/ipx_proc.c18
-rw-r--r--net/ipx/sysctl_net_ipx.c4
-rw-r--r--net/irda/af_irda.c2
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/l2tp/l2tp_ppp.c2
-rw-r--r--net/lapb/lapb_iface.c17
-rw-r--r--net/llc/af_llc.c3
-rw-r--r--net/llc/llc_if.c2
-rw-r--r--net/mpls/mpls_gso.c3
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/llcp_sock.c2
-rw-r--r--net/nfc/rawsock.c2
-rw-r--r--net/openvswitch/Kconfig19
-rw-r--r--net/openvswitch/Makefile14
-rw-r--r--net/openvswitch/actions.c147
-rw-r--r--net/openvswitch/datapath.c232
-rw-r--r--net/openvswitch/datapath.h4
-rw-r--r--net/openvswitch/flow.c30
-rw-r--r--net/openvswitch/flow.h17
-rw-r--r--net/openvswitch/flow_netlink.c322
-rw-r--r--net/openvswitch/flow_netlink.h5
-rw-r--r--net/openvswitch/flow_table.c11
-rw-r--r--net/openvswitch/flow_table.h2
-rw-r--r--net/openvswitch/vport-geneve.c23
-rw-r--r--net/openvswitch/vport-gre.c23
-rw-r--r--net/openvswitch/vport-internal_dev.c22
-rw-r--r--net/openvswitch/vport-netdev.c14
-rw-r--r--net/openvswitch/vport-netdev.h3
-rw-r--r--net/openvswitch/vport-vxlan.c23
-rw-r--r--net/openvswitch/vport.c102
-rw-r--r--net/openvswitch/vport.h14
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/phonet/datagram.c2
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rxrpc/ar-recvmsg.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/sch_netem.c27
-rw-r--r--net/sctp/proc.c12
-rw-r--r--net/sctp/socket.c2
-rw-r--r--net/tipc/msg.c4
-rw-r--r--net/tipc/socket.c9
-rw-r--r--net/unix/af_unix.c6
-rw-r--r--net/vmw_vsock/vmci_transport.c3
-rw-r--r--net/x25/af_x25.c2
126 files changed, 1995 insertions, 798 deletions
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index c00897f65a31..425942db17f6 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1758,7 +1758,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
copied = size;
msg->msg_flags |= MSG_TRUNC;
}
- err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, offset, msg, copied);
if (!err && msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_at *, sat, msg->msg_name);
diff --git a/net/atm/common.c b/net/atm/common.c
index 6a765156a3f6..9cd1ccae9a11 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -554,7 +554,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
msg->msg_flags |= MSG_TRUNC;
}
- error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ error = skb_copy_datagram_msg(skb, 0, msg, copied);
if (error)
return error;
sock_recv_ts_and_drops(msg, sk, skb);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c35c3f48fc0f..f4f835e19378 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1634,7 +1634,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_flags |= MSG_TRUNC;
}
- skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ skb_copy_datagram_msg(skb, 0, msg, copied);
if (msg->msg_name) {
ax25_digi digi;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 339c74ad4553..0a7cc565f93e 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -237,7 +237,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
}
skb_reset_transport_header(skb);
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err == 0) {
sock_recv_ts_and_drops(msg, sk, skb);
@@ -328,7 +328,7 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
}
chunk = min_t(unsigned int, skb->len, size);
- if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, chunk)) {
+ if (skb_copy_datagram_msg(skb, 0, msg, chunk)) {
skb_queue_head(&sk->sk_receive_queue, skb);
if (!copied)
copied = -EFAULT;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 115f149362ba..29e1ec7189bd 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -878,7 +878,7 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
}
skb_reset_transport_header(skb);
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
switch (hci_pi(sk)->channel) {
case HCI_CHANNEL_RAW:
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 44cb786b925a..f96933a823e3 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -184,6 +184,11 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
/* Do not flood unicast traffic to ports that turn it off */
if (unicast && !(p->flags & BR_FLOOD))
continue;
+
+ /* Do not flood to ports that enable proxy ARP */
+ if (p->flags & BR_PROXYARP)
+ continue;
+
prev = maybe_deliver(prev, p, skb, __packet_hook);
if (IS_ERR(prev))
goto out;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 6fd5522df696..1f1de715197c 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -16,6 +16,8 @@
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/netfilter_bridge.h>
+#include <linux/neighbour.h>
+#include <net/arp.h>
#include <linux/export.h>
#include <linux/rculist.h>
#include "br_private.h"
@@ -57,6 +59,60 @@ static int br_pass_frame_up(struct sk_buff *skb)
netif_receive_skb);
}
+static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid)
+{
+ struct net_device *dev = br->dev;
+ struct neighbour *n;
+ struct arphdr *parp;
+ u8 *arpptr, *sha;
+ __be32 sip, tip;
+
+ if (dev->flags & IFF_NOARP)
+ return;
+
+ if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
+ dev->stats.tx_dropped++;
+ return;
+ }
+ parp = arp_hdr(skb);
+
+ if (parp->ar_pro != htons(ETH_P_IP) ||
+ parp->ar_op != htons(ARPOP_REQUEST) ||
+ parp->ar_hln != dev->addr_len ||
+ parp->ar_pln != 4)
+ return;
+
+ arpptr = (u8 *)parp + sizeof(struct arphdr);
+ sha = arpptr;
+ arpptr += dev->addr_len; /* sha */
+ memcpy(&sip, arpptr, sizeof(sip));
+ arpptr += sizeof(sip);
+ arpptr += dev->addr_len; /* tha */
+ memcpy(&tip, arpptr, sizeof(tip));
+
+ if (ipv4_is_loopback(tip) ||
+ ipv4_is_multicast(tip))
+ return;
+
+ n = neigh_lookup(&arp_tbl, &tip, dev);
+ if (n) {
+ struct net_bridge_fdb_entry *f;
+
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_release(n);
+ return;
+ }
+
+ f = __br_fdb_get(br, n->ha, vid);
+ if (f)
+ arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
+ sha, n->ha, sha);
+
+ neigh_release(n);
+ }
+}
+
/* note: already called with rcu_read_lock */
int br_handle_frame_finish(struct sk_buff *skb)
{
@@ -98,6 +154,10 @@ int br_handle_frame_finish(struct sk_buff *skb)
dst = NULL;
if (is_broadcast_ether_addr(dest)) {
+ if (p->flags & BR_PROXYARP &&
+ skb->protocol == htons(ETH_P_ARP))
+ br_do_proxy_arp(skb, br, vid);
+
skb2 = skb;
unicast = false;
} else if (is_multicast_ether_addr(dest)) {
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 2ff9706647f2..86c239b06f6e 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -60,7 +60,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) ||
nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) ||
- nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)))
+ nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) ||
+ nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)))
return -EMSGSIZE;
return 0;
@@ -332,6 +333,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
+ br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
if (tb[IFLA_BRPORT_COST]) {
err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4d783d071305..8f3f08140258 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -172,6 +172,7 @@ struct net_bridge_port
#define BR_FLOOD 0x00000040
#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING)
#define BR_PROMISC 0x00000080
+#define BR_PROXYARP 0x00000100
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
struct bridge_mcast_own_query ip4_own_query;
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index e561cd59b8a6..2de5d91199e8 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -170,6 +170,7 @@ BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
BRPORT_ATTR_FLAG(learning, BR_LEARNING);
BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD);
+BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -213,6 +214,7 @@ static const struct brport_attribute *brport_attrs[] = {
&brport_attr_multicast_router,
&brport_attr_multicast_fast_leave,
#endif
+ &brport_attr_proxyarp,
NULL
};
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 43f750e88e19..fbcd156099fb 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -293,7 +293,7 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
copylen = len;
}
- ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, copylen);
+ ret = skb_copy_datagram_msg(skb, 0, m, copylen);
if (ret)
goto out_free;
diff --git a/net/core/dev.c b/net/core/dev.c
index 945bbd001359..70bb609c283d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -118,6 +118,7 @@
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <net/ip.h>
+#include <net/mpls.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/jhash.h>
@@ -2530,7 +2531,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb,
netdev_features_t features,
__be16 type)
{
- if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC))
+ if (eth_p_mpls(type))
features &= skb->dev->mpls_features;
return features;
@@ -4316,20 +4317,28 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
local_irq_enable();
}
+static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+ return sd->rps_ipi_list != NULL;
+#else
+ return false;
+#endif
+}
+
static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
-#ifdef CONFIG_RPS
/* Check if we have pending ipi, its better to send them now,
* not waiting net_rx_action() end.
*/
- if (sd->rps_ipi_list) {
+ if (sd_has_rps_ipi_waiting(sd)) {
local_irq_disable();
net_rps_action_and_irq_enable(sd);
}
-#endif
+
napi->weight = weight_p;
local_irq_disable();
while (1) {
@@ -4356,7 +4365,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
* We can use a plain write instead of clear_bit(),
* and we dont need an smp_mb() memory barrier.
*/
- list_del(&napi->poll_list);
napi->state = 0;
rps_unlock(sd);
@@ -4376,7 +4384,8 @@ static int process_backlog(struct napi_struct *napi, int quota)
* __napi_schedule - schedule for receive
* @n: entry to schedule
*
- * The entry's receive function will be scheduled to run
+ * The entry's receive function will be scheduled to run.
+ * Consider using __napi_schedule_irqoff() if hard irqs are masked.
*/
void __napi_schedule(struct napi_struct *n)
{
@@ -4388,12 +4397,24 @@ void __napi_schedule(struct napi_struct *n)
}
EXPORT_SYMBOL(__napi_schedule);
+/**
+ * __napi_schedule_irqoff - schedule for receive
+ * @n: entry to schedule
+ *
+ * Variant of __napi_schedule() assuming hard irqs are masked
+ */
+void __napi_schedule_irqoff(struct napi_struct *n)
+{
+ ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+}
+EXPORT_SYMBOL(__napi_schedule_irqoff);
+
void __napi_complete(struct napi_struct *n)
{
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
BUG_ON(n->gro_list);
- list_del(&n->poll_list);
+ list_del_init(&n->poll_list);
smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state);
}
@@ -4411,9 +4432,15 @@ void napi_complete(struct napi_struct *n)
return;
napi_gro_flush(n, false);
- local_irq_save(flags);
- __napi_complete(n);
- local_irq_restore(flags);
+
+ if (likely(list_empty(&n->poll_list))) {
+ WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+ } else {
+ /* If n->poll_list is not empty, we need to mask irqs */
+ local_irq_save(flags);
+ __napi_complete(n);
+ local_irq_restore(flags);
+ }
}
EXPORT_SYMBOL(napi_complete);
@@ -4507,29 +4534,28 @@ static void net_rx_action(struct softirq_action *h)
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
+ LIST_HEAD(list);
+ LIST_HEAD(repoll);
void *have;
local_irq_disable();
+ list_splice_init(&sd->poll_list, &list);
+ local_irq_enable();
- while (!list_empty(&sd->poll_list)) {
+ while (!list_empty(&list)) {
struct napi_struct *n;
int work, weight;
- /* If softirq window is exhuasted then punt.
+ /* If softirq window is exhausted then punt.
* Allow this to run for 2 jiffies since which will allow
* an average latency of 1.5/HZ.
*/
if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
goto softnet_break;
- local_irq_enable();
- /* Even though interrupts have been re-enabled, this
- * access is safe because interrupts can only add new
- * entries to the tail of this list, and only ->poll()
- * calls can remove this head entry from the list.
- */
- n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
+ n = list_first_entry(&list, struct napi_struct, poll_list);
+ list_del_init(&n->poll_list);
have = netpoll_poll_lock(n);
@@ -4551,8 +4577,6 @@ static void net_rx_action(struct softirq_action *h)
budget -= work;
- local_irq_disable();
-
/* Drivers must not modify the NAPI state if they
* consume the entire weight. In such cases this code
* still "owns" the NAPI instance and therefore can
@@ -4560,32 +4584,40 @@ static void net_rx_action(struct softirq_action *h)
*/
if (unlikely(work == weight)) {
if (unlikely(napi_disable_pending(n))) {
- local_irq_enable();
napi_complete(n);
- local_irq_disable();
} else {
if (n->gro_list) {
/* flush too old packets
* If HZ < 1000, flush all packets.
*/
- local_irq_enable();
napi_gro_flush(n, HZ >= 1000);
- local_irq_disable();
}
- list_move_tail(&n->poll_list, &sd->poll_list);
+ list_add_tail(&n->poll_list, &repoll);
}
}
netpoll_poll_unlock(have);
}
+
+ if (!sd_has_rps_ipi_waiting(sd) &&
+ list_empty(&list) &&
+ list_empty(&repoll))
+ return;
out:
+ local_irq_disable();
+
+ list_splice_tail_init(&sd->poll_list, &list);
+ list_splice_tail(&repoll, &list);
+ list_splice(&list, &sd->poll_list);
+ if (!list_empty(&sd->poll_list))
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+
net_rps_action_and_irq_enable(sd);
return;
softnet_break:
sd->time_squeeze++;
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
goto out;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 06dfb293e5aa..b0f84f5ddda8 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -84,7 +84,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
[NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation",
[NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation",
[NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
- [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation",
[NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
[NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ef31fef25e5a..edd04116ecb7 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -773,7 +773,7 @@ static void neigh_periodic_work(struct work_struct *work)
if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
struct neigh_parms *p;
tbl->last_rand = jiffies;
- for (p = &tbl->parms; p; p = p->next)
+ list_for_each_entry(p, &tbl->parms_list, list)
p->reachable_time =
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
}
@@ -1446,7 +1446,7 @@ static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
{
struct neigh_parms *p;
- for (p = &tbl->parms; p; p = p->next) {
+ list_for_each_entry(p, &tbl->parms_list, list) {
if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
(!p->dev && !ifindex && net_eq(net, &init_net)))
return p;
@@ -1481,8 +1481,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
}
write_lock_bh(&tbl->lock);
- p->next = tbl->parms.next;
- tbl->parms.next = p;
+ list_add(&p->list, &tbl->parms.list);
write_unlock_bh(&tbl->lock);
neigh_parms_data_state_cleanall(p);
@@ -1501,24 +1500,15 @@ static void neigh_rcu_free_parms(struct rcu_head *head)
void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
{
- struct neigh_parms **p;
-
if (!parms || parms == &tbl->parms)
return;
write_lock_bh(&tbl->lock);
- for (p = &tbl->parms.next; *p; p = &(*p)->next) {
- if (*p == parms) {
- *p = parms->next;
- parms->dead = 1;
- write_unlock_bh(&tbl->lock);
- if (parms->dev)
- dev_put(parms->dev);
- call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
- return;
- }
- }
+ list_del(&parms->list);
+ parms->dead = 1;
write_unlock_bh(&tbl->lock);
- neigh_dbg(1, "%s: not found\n", __func__);
+ if (parms->dev)
+ dev_put(parms->dev);
+ call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
}
EXPORT_SYMBOL(neigh_parms_release);
@@ -1535,6 +1525,8 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
unsigned long now = jiffies;
unsigned long phsize;
+ INIT_LIST_HEAD(&tbl->parms_list);
+ list_add(&tbl->parms.list, &tbl->parms_list);
write_pnet(&tbl->parms.net, &init_net);
atomic_set(&tbl->parms.refcnt, 1);
tbl->parms.reachable_time =
@@ -2154,7 +2146,9 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
NLM_F_MULTI) <= 0)
break;
- for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
+ nidx = 0;
+ p = list_next_entry(&tbl->parms, list);
+ list_for_each_entry_from(p, &tbl->parms_list, list) {
if (!net_eq(neigh_parms_net(p), net))
continue;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c16615bfb61e..700189604f3d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3013,7 +3013,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (nskb->len == len + doffset)
goto perform_csum_check;
- if (!sg) {
+ if (!sg && !nskb->remcsum_offload) {
nskb->ip_summed = CHECKSUM_NONE;
nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb, len),
@@ -3085,7 +3085,7 @@ skip_fraglist:
nskb->truesize += nskb->data_len;
perform_csum_check:
- if (!csum) {
+ if (!csum && !nskb->remcsum_offload) {
nskb->csum = skb_checksum(nskb, doffset,
nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
@@ -3099,6 +3099,16 @@ perform_csum_check:
* (see validate_xmit_skb_list() for example)
*/
segs->prev = tail;
+
+ /* Following permits correct backpressure, for protocols
+ * using skb_set_owner_w().
+ * Idea is to tranfert ownership from head_skb to last segment.
+ */
+ if (head_skb->destructor == sock_wfree) {
+ swap(tail->truesize, head_skb->truesize);
+ swap(tail->destructor, head_skb->destructor);
+ swap(tail->sk, head_skb->sk);
+ }
return segs;
err:
diff --git a/net/core/sock.c b/net/core/sock.c
index 15e0c67b1069..ac56dd06c306 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2457,7 +2457,7 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
msg->msg_flags |= MSG_TRUNC;
copied = len;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto out_free_skb;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 5ab6627cf370..8e6ae9422a7b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -896,7 +896,7 @@ verify_sock_status:
else if (len < skb->len)
msg->msg_flags |= MSG_TRUNC;
- if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
+ if (skb_copy_datagram_msg(skb, 0, msg, len)) {
/* Exception. Bailout! */
len = -EFAULT;
break;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index a585fd6352eb..5f8ac404535b 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -11,6 +11,17 @@ config NET_DSA
if NET_DSA
+config NET_DSA_HWMON
+ bool "Distributed Switch Architecture HWMON support"
+ default y
+ depends on HWMON && !(NET_DSA=y && HWMON=m)
+ ---help---
+ Say Y if you want to expose thermal sensor data on switches supported
+ by the Distributed Switch Architecture.
+
+ Some of those switches contain thermal sensors. This data is available
+ via the hwmon sysfs interface and exposes the onboard sensors.
+
# tagging formats
config NET_DSA_TAG_BRCM
bool
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 6317b41c99b0..dd646a8025cb 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -9,6 +9,9 @@
* (at your option) any later version.
*/
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/hwmon.h>
#include <linux/list.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
@@ -17,6 +20,7 @@
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_platform.h>
+#include <linux/sysfs.h>
#include "dsa_priv.h"
char dsa_driver_version[] = "0.1";
@@ -71,6 +75,104 @@ dsa_switch_probe(struct device *host_dev, int sw_addr, char **_name)
return ret;
}
+/* hwmon support ************************************************************/
+
+#ifdef CONFIG_NET_DSA_HWMON
+
+static ssize_t temp1_input_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dsa_switch *ds = dev_get_drvdata(dev);
+ int temp, ret;
+
+ ret = ds->drv->get_temp(ds, &temp);
+ if (ret < 0)
+ return ret;
+
+ return sprintf(buf, "%d\n", temp * 1000);
+}
+static DEVICE_ATTR_RO(temp1_input);
+
+static ssize_t temp1_max_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dsa_switch *ds = dev_get_drvdata(dev);
+ int temp, ret;
+
+ ret = ds->drv->get_temp_limit(ds, &temp);
+ if (ret < 0)
+ return ret;
+
+ return sprintf(buf, "%d\n", temp * 1000);
+}
+
+static ssize_t temp1_max_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct dsa_switch *ds = dev_get_drvdata(dev);
+ int temp, ret;
+
+ ret = kstrtoint(buf, 0, &temp);
+ if (ret < 0)
+ return ret;
+
+ ret = ds->drv->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000));
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+static DEVICE_ATTR(temp1_max, S_IRUGO, temp1_max_show, temp1_max_store);
+
+static ssize_t temp1_max_alarm_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct dsa_switch *ds = dev_get_drvdata(dev);
+ bool alarm;
+ int ret;
+
+ ret = ds->drv->get_temp_alarm(ds, &alarm);
+ if (ret < 0)
+ return ret;
+
+ return sprintf(buf, "%d\n", alarm);
+}
+static DEVICE_ATTR_RO(temp1_max_alarm);
+
+static struct attribute *dsa_hwmon_attrs[] = {
+ &dev_attr_temp1_input.attr, /* 0 */
+ &dev_attr_temp1_max.attr, /* 1 */
+ &dev_attr_temp1_max_alarm.attr, /* 2 */
+ NULL
+};
+
+static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct dsa_switch *ds = dev_get_drvdata(dev);
+ struct dsa_switch_driver *drv = ds->drv;
+ umode_t mode = attr->mode;
+
+ if (index == 1) {
+ if (!drv->get_temp_limit)
+ mode = 0;
+ else if (drv->set_temp_limit)
+ mode |= S_IWUSR;
+ } else if (index == 2 && !drv->get_temp_alarm) {
+ mode = 0;
+ }
+ return mode;
+}
+
+static const struct attribute_group dsa_hwmon_group = {
+ .attrs = dsa_hwmon_attrs,
+ .is_visible = dsa_hwmon_attrs_visible,
+};
+__ATTRIBUTE_GROUPS(dsa_hwmon);
+
+#endif /* CONFIG_NET_DSA_HWMON */
/* basic switch operations **************************************************/
static struct dsa_switch *
@@ -228,6 +330,31 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
ds->ports[i] = slave_dev;
}
+#ifdef CONFIG_NET_DSA_HWMON
+ /* If the switch provides a temperature sensor,
+ * register with hardware monitoring subsystem.
+ * Treat registration error as non-fatal and ignore it.
+ */
+ if (drv->get_temp) {
+ const char *netname = netdev_name(dst->master_netdev);
+ char hname[IFNAMSIZ + 1];
+ int i, j;
+
+ /* Create valid hwmon 'name' attribute */
+ for (i = j = 0; i < IFNAMSIZ && netname[i]; i++) {
+ if (isalnum(netname[i]))
+ hname[j++] = netname[i];
+ }
+ hname[j] = '\0';
+ scnprintf(ds->hwmon_name, sizeof(ds->hwmon_name), "%s_dsa%d",
+ hname, index);
+ ds->hwmon_dev = hwmon_device_register_with_groups(NULL,
+ ds->hwmon_name, ds, dsa_hwmon_groups);
+ if (IS_ERR(ds->hwmon_dev))
+ ds->hwmon_dev = NULL;
+ }
+#endif /* CONFIG_NET_DSA_HWMON */
+
return ds;
out_free:
@@ -239,6 +366,10 @@ out:
static void dsa_switch_destroy(struct dsa_switch *ds)
{
+#ifdef CONFIG_NET_DSA_HWMON
+ if (ds->hwmon_dev)
+ hwmon_device_unregister(ds->hwmon_dev);
+#endif
}
#ifdef CONFIG_PM_SLEEP
@@ -447,6 +578,7 @@ static int dsa_of_probe(struct platform_device *pdev)
const char *port_name;
int chip_index, port_index;
const unsigned int *sw_addr, *port_reg;
+ u32 eeprom_len;
int ret;
mdio = of_parse_phandle(np, "dsa,mii-bus", 0);
@@ -498,6 +630,9 @@ static int dsa_of_probe(struct platform_device *pdev)
if (cd->sw_addr > PHY_MAX_ADDR)
continue;
+ if (!of_property_read_u32(np, "eeprom-length", &eeprom_len))
+ cd->eeprom_len = eeprom_len;
+
for_each_available_child_of_node(child, port) {
port_reg = of_get_property(port, "reg", NULL);
if (!port_reg)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ab03e00ffe8f..0ea466dad818 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -249,6 +249,27 @@ static void dsa_slave_get_drvinfo(struct net_device *dev,
strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
}
+static int dsa_slave_get_regs_len(struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ if (ds->drv->get_regs_len)
+ return ds->drv->get_regs_len(ds, p->port);
+
+ return -EOPNOTSUPP;
+}
+
+static void
+dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ if (ds->drv->get_regs)
+ ds->drv->get_regs(ds, p->port, regs, _p);
+}
+
static int dsa_slave_nway_reset(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -271,6 +292,44 @@ static u32 dsa_slave_get_link(struct net_device *dev)
return -EOPNOTSUPP;
}
+static int dsa_slave_get_eeprom_len(struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ if (ds->pd->eeprom_len)
+ return ds->pd->eeprom_len;
+
+ if (ds->drv->get_eeprom_len)
+ return ds->drv->get_eeprom_len(ds);
+
+ return 0;
+}
+
+static int dsa_slave_get_eeprom(struct net_device *dev,
+ struct ethtool_eeprom *eeprom, u8 *data)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ if (ds->drv->get_eeprom)
+ return ds->drv->get_eeprom(ds, eeprom, data);
+
+ return -EOPNOTSUPP;
+}
+
+static int dsa_slave_set_eeprom(struct net_device *dev,
+ struct ethtool_eeprom *eeprom, u8 *data)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+
+ if (ds->drv->set_eeprom)
+ return ds->drv->set_eeprom(ds, eeprom, data);
+
+ return -EOPNOTSUPP;
+}
+
static void dsa_slave_get_strings(struct net_device *dev,
uint32_t stringset, uint8_t *data)
{
@@ -385,8 +444,13 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_settings = dsa_slave_get_settings,
.set_settings = dsa_slave_set_settings,
.get_drvinfo = dsa_slave_get_drvinfo,
+ .get_regs_len = dsa_slave_get_regs_len,
+ .get_regs = dsa_slave_get_regs,
.nway_reset = dsa_slave_nway_reset,
.get_link = dsa_slave_get_link,
+ .get_eeprom_len = dsa_slave_get_eeprom_len,
+ .get_eeprom = dsa_slave_get_eeprom,
+ .set_eeprom = dsa_slave_set_eeprom,
.get_strings = dsa_slave_get_strings,
.get_ethtool_stats = dsa_slave_get_ethtool_stats,
.get_sset_count = dsa_slave_get_sset_count,
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index ce90c8bdc658..2dab27063273 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -63,8 +63,6 @@ static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
dsa_header[3] = 0x00;
}
- skb->protocol = htons(ETH_P_DSA);
-
skb->dev = p->parent->dst->master_netdev;
dev_queue_xmit(skb);
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 94fcce778679..9aeda596f7ec 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -76,8 +76,6 @@ static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
edsa_header[7] = 0x00;
}
- skb->protocol = htons(ETH_P_EDSA);
-
skb->dev = p->parent->dst->master_netdev;
dev_queue_xmit(skb);
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 115fdca34077..e268f9db8893 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -57,8 +57,6 @@ static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
trailer[2] = 0x10;
trailer[3] = 0x00;
- nskb->protocol = htons(ETH_P_TRAILER);
-
nskb->dev = p->parent->dst->master_netdev;
dev_queue_xmit(nskb);
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index ef2ad8aaef13..fc9193eabd41 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -324,7 +324,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
}
/* FIXME: skip headers if necessary ?! */
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto done;
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 9d1f64806f02..73a4d53463de 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -195,7 +195,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
copied = len;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto done;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e682b48e0709..bd2901604842 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -322,6 +322,15 @@ config NET_FOU
network mechanisms and optimizations for UDP (such as ECMP
and RSS) can be leveraged to provide better service.
+config NET_FOU_IP_TUNNELS
+ bool "IP: FOU encapsulation of IP tunnels"
+ depends on NET_IPIP || NET_IPGRE || IPV6_SIT
+ select NET_FOU
+ ---help---
+ Allow configuration of FOU or GUE encapsulation for IP tunnels.
+ When this option is enabled IP tunnels can be configured to use
+ FOU or GUE encapsulation.
+
config GENEVE
tristate "Generic Network Virtualization Encapsulation (Geneve)"
depends on INET
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b7fe5b03906..3a096bb2d596 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1222,7 +1222,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_TCPV6 |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
- SKB_GSO_MPLS |
+ SKB_GSO_TUNNEL_REMCSUM |
0)))
goto out;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 4715f25dfe03..5160c710f2eb 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -50,7 +50,7 @@
#include <net/netlabel.h>
#include <net/cipso_ipv4.h>
#include <linux/atomic.h>
-#include <asm/bug.h>
+#include <linux/bug.h>
#include <asm/unaligned.h>
/* List of available DOI definitions */
@@ -72,6 +72,7 @@ struct cipso_v4_map_cache_bkt {
u32 size;
struct list_head list;
};
+
struct cipso_v4_map_cache_entry {
u32 hash;
unsigned char *key;
@@ -82,7 +83,8 @@ struct cipso_v4_map_cache_entry {
u32 activity;
struct list_head list;
};
-static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL;
+
+static struct cipso_v4_map_cache_bkt *cipso_v4_cache;
/* Restricted bitmap (tag #1) flags */
int cipso_v4_rbm_optfmt = 0;
@@ -539,7 +541,7 @@ doi_add_return:
/**
* cipso_v4_doi_free - Frees a DOI definition
- * @entry: the entry's RCU field
+ * @doi_def: the DOI definition
*
* Description:
* This function frees all of the memory associated with a DOI definition.
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 360b565918c4..60173d4d3a0e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -392,8 +392,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
if (elen <= 0)
goto out;
- if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+ err = skb_cow_data(skb, 0, &trailer);
+ if (err < 0)
goto out;
+
nfrags = err;
assoclen = sizeof(*esph);
@@ -601,12 +603,12 @@ static int esp_init_authenc(struct xfrm_state *x)
BUG_ON(!aalg_desc);
err = -EINVAL;
- if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+ if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
crypto_aead_authsize(aead)) {
- NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
- x->aalg->alg_name,
- crypto_aead_authsize(aead),
- aalg_desc->uinfo.auth.icv_fullbits/8);
+ pr_info("ESP: %s digestsize %u != %hu\n",
+ x->aalg->alg_name,
+ crypto_aead_authsize(aead),
+ aalg_desc->uinfo.auth.icv_fullbits / 8);
goto free_key;
}
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 32e78924e246..740ae099a0d9 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -38,21 +38,17 @@ static inline struct fou *fou_from_sock(struct sock *sk)
return sk->sk_user_data;
}
-static int fou_udp_encap_recv_deliver(struct sk_buff *skb,
- u8 protocol, size_t len)
+static void fou_recv_pull(struct sk_buff *skb, size_t len)
{
struct iphdr *iph = ip_hdr(skb);
/* Remove 'len' bytes from the packet (UDP header and
- * FOU header if present), modify the protocol to the one
- * we found, and then call rcv_encap.
+ * FOU header if present).
*/
iph->tot_len = htons(ntohs(iph->tot_len) - len);
__skb_pull(skb, len);
skb_postpull_rcsum(skb, udp_hdr(skb), len);
skb_reset_transport_header(skb);
-
- return -protocol;
}
static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
@@ -62,16 +58,78 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
if (!fou)
return 1;
- return fou_udp_encap_recv_deliver(skb, fou->protocol,
- sizeof(struct udphdr));
+ fou_recv_pull(skb, sizeof(struct udphdr));
+
+ return -fou->protocol;
+}
+
+static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
+ void *data, int hdrlen, u8 ipproto)
+{
+ __be16 *pd = data;
+ u16 start = ntohs(pd[0]);
+ u16 offset = ntohs(pd[1]);
+ u16 poffset = 0;
+ u16 plen;
+ __wsum csum, delta;
+ __sum16 *psum;
+
+ if (skb->remcsum_offload) {
+ /* Already processed in GRO path */
+ skb->remcsum_offload = 0;
+ return guehdr;
+ }
+
+ if (start > skb->len - hdrlen ||
+ offset > skb->len - hdrlen - sizeof(u16))
+ return NULL;
+
+ if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
+ __skb_checksum_complete(skb);
+
+ plen = hdrlen + offset + sizeof(u16);
+ if (!pskb_may_pull(skb, plen))
+ return NULL;
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+ if (ipproto == IPPROTO_IP && sizeof(struct iphdr) < plen) {
+ struct iphdr *ip = (struct iphdr *)(skb->data + hdrlen);
+
+ /* If next header happens to be IP we can skip that for the
+ * checksum calculation since the IP header checksum is zero
+ * if correct.
+ */
+ poffset = ip->ihl * 4;
+ }
+
+ csum = csum_sub(skb->csum, skb_checksum(skb, poffset + hdrlen,
+ start - poffset - hdrlen, 0));
+
+ /* Set derived checksum in packet */
+ psum = (__sum16 *)(skb->data + hdrlen + offset);
+ delta = csum_sub(csum_fold(csum), *psum);
+ *psum = csum_fold(csum);
+
+ /* Adjust skb->csum since we changed the packet */
+ skb->csum = csum_add(skb->csum, delta);
+
+ return guehdr;
+}
+
+static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
+{
+ /* No support yet */
+ kfree_skb(skb);
+ return 0;
}
static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
{
struct fou *fou = fou_from_sock(sk);
- size_t len;
+ size_t len, optlen, hdrlen;
struct guehdr *guehdr;
- struct udphdr *uh;
+ void *data;
+ u16 doffset = 0;
if (!fou)
return 1;
@@ -80,25 +138,61 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
if (!pskb_may_pull(skb, len))
goto drop;
- uh = udp_hdr(skb);
- guehdr = (struct guehdr *)&uh[1];
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+ optlen = guehdr->hlen << 2;
+ len += optlen;
- len += guehdr->hlen << 2;
if (!pskb_may_pull(skb, len))
goto drop;
- uh = udp_hdr(skb);
- guehdr = (struct guehdr *)&uh[1];
+ /* guehdr may change after pull */
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
- if (guehdr->version != 0)
- goto drop;
+ hdrlen = sizeof(struct guehdr) + optlen;
- if (guehdr->flags) {
- /* No support yet */
+ if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
goto drop;
+
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
+
+ /* Pull UDP header now, skb->data points to guehdr */
+ __skb_pull(skb, sizeof(struct udphdr));
+
+ /* Pull csum through the guehdr now . This can be used if
+ * there is a remote checksum offload.
+ */
+ skb_postpull_rcsum(skb, udp_hdr(skb), len);
+
+ data = &guehdr[1];
+
+ if (guehdr->flags & GUE_FLAG_PRIV) {
+ __be32 flags = *(__be32 *)(data + doffset);
+
+ doffset += GUE_LEN_PRIV;
+
+ if (flags & GUE_PFLAG_REMCSUM) {
+ guehdr = gue_remcsum(skb, guehdr, data + doffset,
+ hdrlen, guehdr->proto_ctype);
+ if (!guehdr)
+ goto drop;
+
+ data = &guehdr[1];
+
+ doffset += GUE_PLEN_REMCSUM;
+ }
}
- return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len);
+ if (unlikely(guehdr->control))
+ return gue_control_message(skb, guehdr);
+
+ __skb_pull(skb, hdrlen);
+ skb_reset_transport_header(skb);
+
+ return -guehdr->proto_ctype;
+
drop:
kfree_skb(skb);
return 0;
@@ -147,6 +241,66 @@ out_unlock:
return err;
}
+static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
+ struct guehdr *guehdr, void *data,
+ size_t hdrlen, u8 ipproto)
+{
+ __be16 *pd = data;
+ u16 start = ntohs(pd[0]);
+ u16 offset = ntohs(pd[1]);
+ u16 poffset = 0;
+ u16 plen;
+ void *ptr;
+ __wsum csum, delta;
+ __sum16 *psum;
+
+ if (skb->remcsum_offload)
+ return guehdr;
+
+ if (start > skb_gro_len(skb) - hdrlen ||
+ offset > skb_gro_len(skb) - hdrlen - sizeof(u16) ||
+ !NAPI_GRO_CB(skb)->csum_valid || skb->remcsum_offload)
+ return NULL;
+
+ plen = hdrlen + offset + sizeof(u16);
+
+ /* Pull checksum that will be written */
+ if (skb_gro_header_hard(skb, off + plen)) {
+ guehdr = skb_gro_header_slow(skb, off + plen, off);
+ if (!guehdr)
+ return NULL;
+ }
+
+ ptr = (void *)guehdr + hdrlen;
+
+ if (ipproto == IPPROTO_IP &&
+ (hdrlen + sizeof(struct iphdr) < plen)) {
+ struct iphdr *ip = (struct iphdr *)(ptr + hdrlen);
+
+ /* If next header happens to be IP we can skip
+ * that for the checksum calculation since the
+ * IP header checksum is zero if correct.
+ */
+ poffset = ip->ihl * 4;
+ }
+
+ csum = csum_sub(NAPI_GRO_CB(skb)->csum,
+ csum_partial(ptr + poffset, start - poffset, 0));
+
+ /* Set derived checksum in packet */
+ psum = (__sum16 *)(ptr + offset);
+ delta = csum_sub(csum_fold(csum), *psum);
+ *psum = csum_fold(csum);
+
+ /* Adjust skb->csum since we changed the packet */
+ skb->csum = csum_add(skb->csum, delta);
+ NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+
+ skb->remcsum_offload = 1;
+
+ return guehdr;
+}
+
static struct sk_buff **gue_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
@@ -154,38 +308,64 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
const struct net_offload *ops;
struct sk_buff **pp = NULL;
struct sk_buff *p;
- u8 proto;
struct guehdr *guehdr;
- unsigned int hlen, guehlen;
- unsigned int off;
+ size_t len, optlen, hdrlen, off;
+ void *data;
+ u16 doffset = 0;
int flush = 1;
off = skb_gro_offset(skb);
- hlen = off + sizeof(*guehdr);
+ len = off + sizeof(*guehdr);
+
guehdr = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- guehdr = skb_gro_header_slow(skb, hlen, off);
+ if (skb_gro_header_hard(skb, len)) {
+ guehdr = skb_gro_header_slow(skb, len, off);
if (unlikely(!guehdr))
goto out;
}
- proto = guehdr->next_hdr;
+ optlen = guehdr->hlen << 2;
+ len += optlen;
- rcu_read_lock();
- offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
- ops = rcu_dereference(offloads[proto]);
- if (WARN_ON(!ops || !ops->callbacks.gro_receive))
- goto out_unlock;
+ if (skb_gro_header_hard(skb, len)) {
+ guehdr = skb_gro_header_slow(skb, len, off);
+ if (unlikely(!guehdr))
+ goto out;
+ }
- guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+ if (unlikely(guehdr->control) || guehdr->version != 0 ||
+ validate_gue_flags(guehdr, optlen))
+ goto out;
- hlen = off + guehlen;
- if (skb_gro_header_hard(skb, hlen)) {
- guehdr = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!guehdr))
- goto out_unlock;
+ hdrlen = sizeof(*guehdr) + optlen;
+
+ /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
+ * this is needed if there is a remote checkcsum offload.
+ */
+ skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
+
+ data = &guehdr[1];
+
+ if (guehdr->flags & GUE_FLAG_PRIV) {
+ __be32 flags = *(__be32 *)(data + doffset);
+
+ doffset += GUE_LEN_PRIV;
+
+ if (flags & GUE_PFLAG_REMCSUM) {
+ guehdr = gue_gro_remcsum(skb, off, guehdr,
+ data + doffset, hdrlen,
+ guehdr->proto_ctype);
+ if (!guehdr)
+ goto out;
+
+ data = &guehdr[1];
+
+ doffset += GUE_PLEN_REMCSUM;
+ }
}
+ skb_gro_pull(skb, hdrlen);
+
flush = 0;
for (p = *head; p; p = p->next) {
@@ -197,7 +377,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
guehdr2 = (struct guehdr *)(p->data + off);
/* Compare base GUE header to be equal (covers
- * hlen, version, next_hdr, and flags.
+ * hlen, version, proto_ctype, and flags.
*/
if (guehdr->word != guehdr2->word) {
NAPI_GRO_CB(p)->same_flow = 0;
@@ -212,10 +392,11 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
}
}
- skb_gro_pull(skb, guehlen);
-
- /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
- skb_gro_postpull_rcsum(skb, guehdr, guehlen);
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[guehdr->proto_ctype]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+ goto out_unlock;
pp = ops->callbacks.gro_receive(head, skb);
@@ -236,7 +417,7 @@ static int gue_gro_complete(struct sk_buff *skb, int nhoff)
u8 proto;
int err = -ENOENT;
- proto = guehdr->next_hdr;
+ proto = guehdr->proto_ctype;
guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
@@ -487,6 +668,125 @@ static const struct genl_ops fou_nl_ops[] = {
},
};
+static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ struct flowi4 *fl4, u8 *protocol, __be16 sport)
+{
+ struct udphdr *uh;
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+
+ uh = udp_hdr(skb);
+
+ uh->dest = e->dport;
+ uh->source = sport;
+ uh->len = htons(skb->len);
+ uh->check = 0;
+ udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+ fl4->saddr, fl4->daddr, skb->len);
+
+ *protocol = IPPROTO_UDP;
+}
+
+int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ __be16 sport;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+ fou_build_udp(skb, e, fl4, protocol, sport);
+
+ return 0;
+}
+EXPORT_SYMBOL(fou_build_header);
+
+int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ struct guehdr *guehdr;
+ size_t hdrlen, optlen = 0;
+ __be16 sport;
+ void *data;
+ bool need_priv = false;
+
+ if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ csum = false;
+ optlen += GUE_PLEN_REMCSUM;
+ type |= SKB_GSO_TUNNEL_REMCSUM;
+ need_priv = true;
+ }
+
+ optlen += need_priv ? GUE_LEN_PRIV : 0;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ /* Get source port (based on flow hash) before skb_push */
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ skb_push(skb, hdrlen);
+
+ guehdr = (struct guehdr *)skb->data;
+
+ guehdr->control = 0;
+ guehdr->version = 0;
+ guehdr->hlen = optlen >> 2;
+ guehdr->flags = 0;
+ guehdr->proto_ctype = *protocol;
+
+ data = &guehdr[1];
+
+ if (need_priv) {
+ __be32 *flags = data;
+
+ guehdr->flags |= GUE_FLAG_PRIV;
+ *flags = 0;
+ data += GUE_LEN_PRIV;
+
+ if (type & SKB_GSO_TUNNEL_REMCSUM) {
+ u16 csum_start = skb_checksum_start_offset(skb);
+ __be16 *pd = data;
+
+ if (csum_start < hdrlen)
+ return -EINVAL;
+
+ csum_start -= hdrlen;
+ pd[0] = htons(csum_start);
+ pd[1] = htons(csum_start + skb->csum_offset);
+
+ if (!skb_is_gso(skb)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->encapsulation = 0;
+ }
+
+ *flags |= GUE_PFLAG_REMCSUM;
+ data += GUE_PLEN_REMCSUM;
+ }
+
+ }
+
+ fou_build_udp(skb, e, fl4, protocol, sport);
+
+ return 0;
+}
+EXPORT_SYMBOL(gue_build_header);
+
static int __init fou_init(void)
{
int ret;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index dedb21e99914..31802afce34f 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -104,7 +104,7 @@ static void geneve_build_header(struct genevehdr *geneveh,
memcpy(geneveh->options, options, options_len);
}
-/* Transmit a fully formated Geneve frame.
+/* Transmit a fully formatted Geneve frame.
*
* When calling this function. The skb->data should point
* to the geneve header which is fully formed.
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index fb70e3ecc3e4..666cf364df86 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -112,17 +112,17 @@
#ifdef CONFIG_IP_MULTICAST
/* Parameter names and values are taken from igmp-v2-06 draft */
-#define IGMP_V1_Router_Present_Timeout (400*HZ)
-#define IGMP_V2_Router_Present_Timeout (400*HZ)
-#define IGMP_V2_Unsolicited_Report_Interval (10*HZ)
-#define IGMP_V3_Unsolicited_Report_Interval (1*HZ)
-#define IGMP_Query_Response_Interval (10*HZ)
-#define IGMP_Query_Robustness_Variable 2
+#define IGMP_V1_ROUTER_PRESENT_TIMEOUT (400*HZ)
+#define IGMP_V2_ROUTER_PRESENT_TIMEOUT (400*HZ)
+#define IGMP_V2_UNSOLICITED_REPORT_INTERVAL (10*HZ)
+#define IGMP_V3_UNSOLICITED_REPORT_INTERVAL (1*HZ)
+#define IGMP_QUERY_RESPONSE_INTERVAL (10*HZ)
+#define IGMP_QUERY_ROBUSTNESS_VARIABLE 2
-#define IGMP_Initial_Report_Delay (1)
+#define IGMP_INITIAL_REPORT_DELAY (1)
-/* IGMP_Initial_Report_Delay is not from IGMP specs!
+/* IGMP_INITIAL_REPORT_DELAY is not from IGMP specs!
* IGMP specs require to report membership immediately after
* joining a group, but we delay the first report by a
* small interval. It seems more natural and still does not
@@ -318,9 +318,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
-#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb))
-
-static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
+static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
{
struct sk_buff *skb;
struct rtable *rt;
@@ -330,6 +328,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
struct flowi4 fl4;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
+ unsigned int size = mtu;
while (1) {
skb = alloc_skb(size + hlen + tlen,
@@ -341,7 +340,6 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
return NULL;
}
skb->priority = TC_PRIO_CONTROL;
- igmp_skb_size(skb) = size;
rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
0, 0,
@@ -354,6 +352,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
+ skb->reserved_tailroom = skb_end_offset(skb) -
+ min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
@@ -423,8 +423,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
return skb;
}
-#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \
- skb_tailroom(skb)) : 0)
+#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0)
static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
int type, int gdeleted, int sdeleted)
@@ -879,15 +878,15 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (ih->code == 0) {
/* Alas, old v1 router presents here. */
- max_delay = IGMP_Query_Response_Interval;
+ max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
in_dev->mr_v1_seen = jiffies +
- IGMP_V1_Router_Present_Timeout;
+ IGMP_V1_ROUTER_PRESENT_TIMEOUT;
group = 0;
} else {
/* v2 router present */
max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
in_dev->mr_v2_seen = jiffies +
- IGMP_V2_Router_Present_Timeout;
+ IGMP_V2_ROUTER_PRESENT_TIMEOUT;
}
/* cancel the interface change timer */
in_dev->mr_ifc_count = 0;
@@ -899,7 +898,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
return true; /* ignore bogus packet; freed by caller */
} else if (IGMP_V1_SEEN(in_dev)) {
/* This is a v3 query with v1 queriers present */
- max_delay = IGMP_Query_Response_Interval;
+ max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
group = 0;
} else if (IGMP_V2_SEEN(in_dev)) {
/* this is a v3 query with v2 queriers present;
@@ -1218,7 +1217,7 @@ static void igmp_group_added(struct ip_mc_list *im)
return;
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
spin_lock_bh(&im->lock);
- igmp_start_timer(im, IGMP_Initial_Report_Delay);
+ igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
spin_unlock_bh(&im->lock);
return;
}
@@ -1541,7 +1540,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
#ifdef CONFIG_IP_MULTICAST
-int sysctl_igmp_qrv __read_mostly = IGMP_Query_Robustness_Variable;
+int sysctl_igmp_qrv __read_mostly = IGMP_QUERY_ROBUSTNESS_VARIABLE;
#endif
static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
@@ -2687,11 +2686,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
if (v == SEQ_START_TOKEN) {
- seq_printf(seq,
- "%3s %6s "
- "%10s %10s %6s %6s\n", "Idx",
- "Device", "MCA",
- "SRC", "INC", "EXC");
+ seq_puts(seq, "Idx Device MCA SRC INC EXC\n");
} else {
seq_printf(seq,
"%3d %6.6s 0x%08x "
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2811cc18701a..4d964dadd655 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -80,7 +80,7 @@ struct ipq {
struct inet_peer *peer;
};
-static inline u8 ip4_frag_ecn(u8 tos)
+static u8 ip4_frag_ecn(u8 tos)
{
return 1 << (tos & INET_ECN_MASK);
}
@@ -148,7 +148,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL;
}
-static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
+static void ip4_frag_free(struct inet_frag_queue *q)
{
struct ipq *qp;
@@ -160,7 +160,7 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
/* Destruction primitives. */
-static __inline__ void ipq_put(struct ipq *ipq)
+static void ipq_put(struct ipq *ipq)
{
inet_frag_put(&ipq->q, &ip4_frags);
}
@@ -236,7 +236,7 @@ out:
/* Find the correct entry in the "incomplete datagrams" queue for
* this IP datagram, and create new one, if nothing is found.
*/
-static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
+static struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
{
struct inet_frag_queue *q;
struct ip4_create_arg arg;
@@ -256,7 +256,7 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
}
/* Is the fragment too far ahead to be part of ipq? */
-static inline int ip_frag_too_far(struct ipq *qp)
+static int ip_frag_too_far(struct ipq *qp)
{
struct inet_peer *peer = qp->peer;
unsigned int max = sysctl_ipfrag_max_dist;
@@ -795,16 +795,16 @@ static void __init ip4_frags_ctl_register(void)
register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
}
#else
-static inline int ip4_frags_ns_ctl_register(struct net *net)
+static int ip4_frags_ns_ctl_register(struct net *net)
{
return 0;
}
-static inline void ip4_frags_ns_ctl_unregister(struct net *net)
+static void ip4_frags_ns_ctl_unregister(struct net *net)
{
}
-static inline void __init ip4_frags_ctl_register(void)
+static void __init ip4_frags_ctl_register(void)
{
}
#endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 12055fdbe716..ac8491245e5b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -789,7 +789,7 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT,
t->encap.dport) ||
nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
- t->encap.dport))
+ t->encap.flags))
goto nla_put_failure;
return 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index bc6471d4abcd..4a929adf2ab7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -662,12 +662,10 @@ slow_path:
if (len < left) {
len &= ~7;
}
- /*
- * Allocate buffer.
- */
- if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
- NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
+ /* Allocate buffer */
+ skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC);
+ if (!skb2) {
err = -ENOMEM;
goto fail;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c373a9ad4555..21894df66262 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -424,7 +424,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
msg->msg_flags |= MSG_TRUNC;
copied = len;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto out_free_skb;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 0bb8e141eacc..c3587e1c8b82 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,7 +56,10 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/udp.h>
-#include <net/gue.h>
+
+#if IS_ENABLED(CONFIG_NET_FOU)
+#include <net/fou.h>
+#endif
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -494,10 +497,12 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e)
switch (e->type) {
case TUNNEL_ENCAP_NONE:
return 0;
+#if IS_ENABLED(CONFIG_NET_FOU)
case TUNNEL_ENCAP_FOU:
- return sizeof(struct udphdr);
+ return fou_encap_hlen(e);
case TUNNEL_ENCAP_GUE:
- return sizeof(struct udphdr) + sizeof(struct guehdr);
+ return gue_encap_hlen(e);
+#endif
default:
return -EINVAL;
}
@@ -526,60 +531,18 @@ int ip_tunnel_encap_setup(struct ip_tunnel *t,
}
EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
-static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
-{
- struct udphdr *uh;
- __be16 sport;
- bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
- int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
-
- skb = iptunnel_handle_offloads(skb, csum, type);
-
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- /* Get length and hash before making space in skb */
-
- sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
- skb, 0, 0, false);
-
- skb_push(skb, hdr_len);
-
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- if (e->type == TUNNEL_ENCAP_GUE) {
- struct guehdr *guehdr = (struct guehdr *)&uh[1];
-
- guehdr->version = 0;
- guehdr->hlen = 0;
- guehdr->flags = 0;
- guehdr->next_hdr = *protocol;
- }
-
- uh->dest = e->dport;
- uh->source = sport;
- uh->len = htons(skb->len);
- uh->check = 0;
- udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
- fl4->saddr, fl4->daddr, skb->len);
-
- *protocol = IPPROTO_UDP;
-
- return 0;
-}
-
int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
u8 *protocol, struct flowi4 *fl4)
{
switch (t->encap.type) {
case TUNNEL_ENCAP_NONE:
return 0;
+#if IS_ENABLED(CONFIG_NET_FOU)
case TUNNEL_ENCAP_FOU:
+ return fou_build_header(skb, &t->encap, protocol, fl4);
case TUNNEL_ENCAP_GUE:
- return fou_build_header(skb, &t->encap, t->encap_hlen,
- protocol, fl4);
+ return gue_build_header(skb, &t->encap, protocol, fl4);
+#endif
default:
return -EINVAL;
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 648fa1490ea7..7fa18bc7e47f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -115,7 +115,7 @@
*/
int ic_set_manually __initdata = 0; /* IPconfig parameters set manually */
-static int ic_enable __initdata = 0; /* IP config enabled? */
+static int ic_enable __initdata; /* IP config enabled? */
/* Protocol choice */
int ic_proto_enabled __initdata = 0
@@ -130,7 +130,7 @@ int ic_proto_enabled __initdata = 0
#endif
;
-static int ic_host_name_set __initdata = 0; /* Host name set by us? */
+static int ic_host_name_set __initdata; /* Host name set by us? */
__be32 ic_myaddr = NONE; /* My IP address */
static __be32 ic_netmask = NONE; /* Netmask for local subnet */
@@ -160,17 +160,17 @@ static u8 ic_domain[64]; /* DNS (not NIS) domain name */
static char user_dev_name[IFNAMSIZ] __initdata = { 0, };
/* Protocols supported by available interfaces */
-static int ic_proto_have_if __initdata = 0;
+static int ic_proto_have_if __initdata;
/* MTU for boot device */
-static int ic_dev_mtu __initdata = 0;
+static int ic_dev_mtu __initdata;
#ifdef IPCONFIG_DYNAMIC
static DEFINE_SPINLOCK(ic_recv_lock);
-static volatile int ic_got_reply __initdata = 0; /* Proto(s) that replied */
+static volatile int ic_got_reply __initdata; /* Proto(s) that replied */
#endif
#ifdef IPCONFIG_DHCP
-static int ic_dhcp_msgtype __initdata = 0; /* DHCP msg type received */
+static int ic_dhcp_msgtype __initdata; /* DHCP msg type received */
#endif
@@ -186,8 +186,8 @@ struct ic_device {
__be32 xid;
};
-static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
-static struct net_device *ic_dev __initdata = NULL; /* Selected device */
+static struct ic_device *ic_first_dev __initdata; /* List of open device */
+static struct net_device *ic_dev __initdata; /* Selected device */
static bool __init ic_is_init_dev(struct net_device *dev)
{
@@ -498,7 +498,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
struct arphdr *rarp;
unsigned char *rarp_ptr;
__be32 sip, tip;
- unsigned char *sha, *tha; /* s for "source", t for "target" */
+ unsigned char *tha; /* t for "target" */
struct ic_device *d;
if (!net_eq(dev_net(dev), &init_net))
@@ -549,7 +549,6 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
goto drop_unlock; /* should never happen */
/* Extract variable-width fields */
- sha = rarp_ptr;
rarp_ptr += dev->addr_len;
memcpy(&sip, rarp_ptr, 4);
rarp_ptr += 4;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 37096d64730e..40403114f00a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -465,7 +465,7 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
tunnel->encap.dport) ||
nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
- tunnel->encap.dport))
+ tunnel->encap.flags))
goto nla_put_failure;
return 0;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 57f7c9804139..736236c3e554 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -875,7 +875,7 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
/* Don't bother checking the checksum */
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto done;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8e3eb39f84e7..f0d4eb8b99b9 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -296,12 +296,12 @@ static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals,
int j;
if (count) {
- seq_printf(seq, "\nIcmpMsg:");
+ seq_puts(seq, "\nIcmpMsg:");
for (j = 0; j < count; ++j)
seq_printf(seq, " %sType%u",
type[j] & 0x100 ? "Out" : "In",
type[j] & 0xff);
- seq_printf(seq, "\nIcmpMsg:");
+ seq_puts(seq, "\nIcmpMsg:");
for (j = 0; j < count; ++j)
seq_printf(seq, " %lu", vals[j]);
}
@@ -342,7 +342,7 @@ static void icmp_put(struct seq_file *seq)
seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
for (i = 0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " In%s", icmpmibmap[i].name);
- seq_printf(seq, " OutMsgs OutErrors");
+ seq_puts(seq, " OutMsgs OutErrors");
for (i = 0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name);
seq_printf(seq, "\nIcmp: %lu %lu %lu",
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 739db3100c23..ee8fa4bf3b73 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -718,7 +718,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
copied = len;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto done;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 32b98d0207b4..45fe60c5238e 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -19,10 +19,6 @@
#include <net/tcp.h>
#include <net/route.h>
-/* Timestamps: lowest bits store TCP options */
-#define TSBITS 6
-#define TSMASK (((__u32)1 << TSBITS) - 1)
-
extern int sysctl_tcp_syncookies;
static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
@@ -30,6 +26,30 @@ static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
+/* TCP Timestamp: 6 lowest bits of timestamp sent in the cookie SYN-ACK
+ * stores TCP options:
+ *
+ * MSB LSB
+ * | 31 ... 6 | 5 | 4 | 3 2 1 0 |
+ * | Timestamp | ECN | SACK | WScale |
+ *
+ * When we receive a valid cookie-ACK, we look at the echoed tsval (if
+ * any) to figure out which TCP options we should use for the rebuilt
+ * connection.
+ *
+ * A WScale setting of '0xf' (which is an invalid scaling value)
+ * means that original syn did not include the TCP window scaling option.
+ */
+#define TS_OPT_WSCALE_MASK 0xf
+#define TS_OPT_SACK BIT(4)
+#define TS_OPT_ECN BIT(5)
+/* There is no TS_OPT_TIMESTAMP:
+ * if ACK contains timestamp option, we already know it was
+ * requested/supported by the syn/synack exchange.
+ */
+#define TSBITS 6
+#define TSMASK (((__u32)1 << TSBITS) - 1)
+
static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
ipv4_cookie_scratch);
@@ -67,9 +87,11 @@ __u32 cookie_init_timestamp(struct request_sock *req)
ireq = inet_rsk(req);
- options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
- options |= ireq->sack_ok << 4;
- options |= ireq->ecn_ok << 5;
+ options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK;
+ if (ireq->sack_ok)
+ options |= TS_OPT_SACK;
+ if (ireq->ecn_ok)
+ options |= TS_OPT_ECN;
ts = ts_now & ~TSMASK;
ts |= options;
@@ -219,16 +241,13 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
* additional tcp options in the timestamp.
* This extracts these options from the timestamp echo.
*
- * The lowest 4 bits store snd_wscale.
- * next 2 bits indicate SACK and ECN support.
- *
- * return false if we decode an option that should not be.
+ * return false if we decode a tcp option that is disabled
+ * on the host.
*/
-bool cookie_check_timestamp(struct tcp_options_received *tcp_opt,
- struct net *net, bool *ecn_ok)
+bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt)
{
/* echoed timestamp, lowest bits contain options */
- u32 options = tcp_opt->rcv_tsecr & TSMASK;
+ u32 options = tcp_opt->rcv_tsecr;
if (!tcp_opt->saw_tstamp) {
tcp_clear_options(tcp_opt);
@@ -238,22 +257,35 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt,
if (!sysctl_tcp_timestamps)
return false;
- tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0;
- *ecn_ok = (options >> 5) & 1;
- if (*ecn_ok && !net->ipv4.sysctl_tcp_ecn)
- return false;
+ tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
if (tcp_opt->sack_ok && !sysctl_tcp_sack)
return false;
- if ((options & 0xf) == 0xf)
+ if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
return true; /* no window scaling */
tcp_opt->wscale_ok = 1;
- tcp_opt->snd_wscale = options & 0xf;
+ tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
+
return sysctl_tcp_window_scaling != 0;
}
-EXPORT_SYMBOL(cookie_check_timestamp);
+EXPORT_SYMBOL(cookie_timestamp_decode);
+
+bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
+ const struct net *net, const struct dst_entry *dst)
+{
+ bool ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN;
+
+ if (!ecn_ok)
+ return false;
+
+ if (net->ipv4.sysctl_tcp_ecn)
+ return true;
+
+ return dst_feature(dst, RTAX_FEATURE_ECN);
+}
+EXPORT_SYMBOL(cookie_ecn_ok);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
{
@@ -269,14 +301,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
int mss;
struct rtable *rt;
__u8 rcv_wscale;
- bool ecn_ok = false;
struct flowi4 fl4;
if (!sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;
- if (tcp_synq_no_recent_overflow(sk) ||
- (mss = __cookie_v4_check(ip_hdr(skb), th, cookie)) == 0) {
+ if (tcp_synq_no_recent_overflow(sk))
+ goto out;
+
+ mss = __cookie_v4_check(ip_hdr(skb), th, cookie);
+ if (mss == 0) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
@@ -287,7 +321,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
memset(&tcp_opt, 0, sizeof(tcp_opt));
tcp_parse_options(skb, &tcp_opt, 0, NULL);
- if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
+ if (!cookie_timestamp_decode(&tcp_opt))
goto out;
ret = NULL;
@@ -305,7 +339,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->ir_loc_addr = ip_hdr(skb)->daddr;
ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
ireq->ir_mark = inet_request_mark(sk, skb);
- ireq->ecn_ok = ecn_ok;
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
ireq->wscale_ok = tcp_opt.wscale_ok;
@@ -354,6 +387,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
dst_metric(&rt->dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
+ ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
ret = get_cookie_sock(sk, skb, req, &rt->dst);
/* ip_queue_xmit() depends on our flow being setup
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index b3c53c8b331e..e0ee384a448f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -496,6 +496,13 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "tcp_max_reordering",
+ .data = &sysctl_tcp_max_reordering,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "tcp_dsack",
.data = &sysctl_tcp_dsack,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39ec0c379545..c239f4740d10 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1377,7 +1377,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
/* XXX -- need to support SO_PEEK_OFF */
skb_queue_walk(&sk->sk_write_queue, skb) {
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len);
+ err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
if (err)
break;
@@ -1833,8 +1833,7 @@ do_prequeue:
}
if (!(flags & MSG_TRUNC)) {
- err = skb_copy_datagram_iovec(skb, offset,
- msg->msg_iov, used);
+ err = skb_copy_datagram_msg(skb, offset, msg, used);
if (err) {
/* Exception. Bailout! */
if (!copied)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index b1c5970d47a1..27ead0dd16bc 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -1,5 +1,5 @@
/*
- * Plugable TCP congestion control support and newReno
+ * Pluggable TCP congestion control support and newReno
* congestion control.
* Based on ideas from I/O scheduler support and Web100.
*
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 88fa2d160685..5f979c7f5135 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -81,6 +81,7 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
int sysctl_tcp_sack __read_mostly = 1;
int sysctl_tcp_fack __read_mostly = 1;
int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_max_reordering __read_mostly = 300;
EXPORT_SYMBOL(sysctl_tcp_reordering);
int sysctl_tcp_dsack __read_mostly = 1;
int sysctl_tcp_app_win __read_mostly = 31;
@@ -833,7 +834,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
if (metric > tp->reordering) {
int mib_idx;
- tp->reordering = min(TCP_MAX_REORDERING, metric);
+ tp->reordering = min(sysctl_tcp_max_reordering, metric);
/* This exciting event is worth to be remembered. 8) */
if (ts)
@@ -5030,7 +5031,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
/* step 3: check security and precedence [ignored] */
/* step 4: Check for a SYN
- * RFC 5691 4.2 : Send a challenge ack
+ * RFC 5961 4.2 : Send a challenge ack
*/
if (th->syn) {
syn_challenge:
@@ -5867,7 +5868,7 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
* If we receive a SYN packet with these bits set, it means a
* network is playing bad games with TOS bits. In order to
* avoid possible false congestion notifications, we disable
- * TCP ECN negociation.
+ * TCP ECN negotiation.
*
* Exception: tcp_ca wants ECN. This is required for DCTCP
* congestion control; it requires setting ECT on all packets,
@@ -5877,20 +5878,22 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
*/
static void tcp_ecn_create_request(struct request_sock *req,
const struct sk_buff *skb,
- const struct sock *listen_sk)
+ const struct sock *listen_sk,
+ const struct dst_entry *dst)
{
const struct tcphdr *th = tcp_hdr(skb);
const struct net *net = sock_net(listen_sk);
bool th_ecn = th->ece && th->cwr;
- bool ect, need_ecn;
+ bool ect, need_ecn, ecn_ok;
if (!th_ecn)
return;
ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
need_ecn = tcp_ca_needs_ecn(listen_sk);
+ ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN);
- if (!ect && !need_ecn && net->ipv4.sysctl_tcp_ecn)
+ if (!ect && !need_ecn && ecn_ok)
inet_rsk(req)->ecn_ok = 1;
else if (ect && need_ecn)
inet_rsk(req)->ecn_ok = 1;
@@ -5955,13 +5958,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
- if (!want_cookie || tmp_opt.tstamp_ok)
- tcp_ecn_create_request(req, skb, sk);
-
- if (want_cookie) {
- isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
- req->cookie_ts = tmp_opt.tstamp_ok;
- } else if (!isn) {
+ if (!want_cookie && !isn) {
/* VJ's idea. We save last timestamp seen
* from the destination in peer table, when entering
* state TIME-WAIT, and check against it before
@@ -6009,6 +6006,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_free;
}
+ tcp_ecn_create_request(req, skb, sk, dst);
+
+ if (want_cookie) {
+ isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
+ req->cookie_ts = tmp_opt.tstamp_ok;
+ if (!tmp_opt.tstamp_ok)
+ inet_rsk(req)->ecn_ok = 0;
+ }
+
tcp_rsk(req)->snt_isn = isn;
tcp_openreq_init_rwin(req, sk, dst);
fastopen = !want_cookie &&
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 5b90f2f447a5..9d7930ba8e0f 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -94,9 +94,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
- SKB_GSO_MPLS |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a3d453b94747..0b88158dd4a7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -333,10 +333,19 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
+ tcp_ca_needs_ecn(sk);
+
+ if (!use_ecn) {
+ const struct dst_entry *dst = __sk_dst_get(sk);
+
+ if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
+ use_ecn = true;
+ }
tp->ecn_flags = 0;
- if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
- tcp_ca_needs_ecn(sk)) {
+
+ if (use_ecn) {
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
tp->ecn_flags = TCP_ECN_OK;
if (tcp_ca_needs_ecn(sk))
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cd0db5471bb5..df19027f44f3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1281,8 +1281,8 @@ try_again:
}
if (skb_csum_unnecessary(skb))
- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
- msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
+ msg, copied);
else {
err = skb_copy_and_csum_datagram_iovec(skb,
sizeof(struct udphdr),
@@ -1777,14 +1777,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (ret > 0)
return -ret;
return 0;
- } else {
- if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
- return __udp4_lib_mcast_deliver(net, skb, uh,
- saddr, daddr, udptable);
-
- sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
}
+ if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
+ return __udp4_lib_mcast_deliver(net, skb, uh,
+ saddr, daddr, udptable);
+
+ sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk != NULL) {
int ret;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6480cea7aa53..d3e537ef6b7f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -29,7 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
netdev_features_t features),
- __be16 new_protocol)
+ __be16 new_protocol, bool is_ipv6)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
@@ -39,7 +39,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t enc_features;
int udp_offset, outer_hlen;
unsigned int oldlen;
- bool need_csum;
+ bool need_csum = !!(skb_shinfo(skb)->gso_type &
+ SKB_GSO_UDP_TUNNEL_CSUM);
+ bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
+ bool offload_csum = false, dont_encap = (need_csum || remcsum);
oldlen = (u16)~skb->len;
@@ -52,10 +55,13 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
skb->protocol = new_protocol;
+ skb->encap_hdr_csum = need_csum;
+ skb->remcsum_offload = remcsum;
- need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
- if (need_csum)
- skb->encap_hdr_csum = 1;
+ /* Try to offload checksum if possible */
+ offload_csum = !!(need_csum &&
+ (skb->dev->features &
+ (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM)));
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & features;
@@ -72,11 +78,21 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
do {
struct udphdr *uh;
int len;
-
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
+ __be32 delta;
+
+ if (dont_encap) {
+ skb->encapsulation = 0;
+ skb->ip_summed = CHECKSUM_NONE;
+ } else {
+ /* Only set up inner headers if we might be offloading
+ * inner checksum.
+ */
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
skb->mac_len = mac_len;
+ skb->protocol = protocol;
skb_push(skb, outer_hlen);
skb_reset_mac_header(skb);
@@ -86,19 +102,36 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
uh = udp_hdr(skb);
uh->len = htons(len);
- if (need_csum) {
- __be32 delta = htonl(oldlen + len);
+ if (!need_csum)
+ continue;
- uh->check = ~csum_fold((__force __wsum)
- ((__force u32)uh->check +
- (__force u32)delta));
+ delta = htonl(oldlen + len);
+
+ uh->check = ~csum_fold((__force __wsum)
+ ((__force u32)uh->check +
+ (__force u32)delta));
+ if (offload_csum) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ } else if (remcsum) {
+ /* Need to calculate checksum from scratch,
+ * inner checksums are never when doing
+ * remote_checksum_offload.
+ */
+
+ skb->csum = skb_checksum(skb, udp_offset,
+ skb->len - udp_offset,
+ 0);
+ uh->check = csum_fold(skb->csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
uh->check = gso_make_checksum(skb, ~uh->check);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
}
-
- skb->protocol = protocol;
} while ((skb = skb->next));
out:
return segs;
@@ -134,7 +167,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
}
segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
- protocol);
+ protocol, is_ipv6);
out_unlock:
rcu_read_unlock();
@@ -172,9 +205,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_IPIP |
- SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
- SKB_GSO_MPLS) ||
+ SKB_GSO_GRE | SKB_GSO_GRE_CSUM) ||
!(type & (SKB_GSO_UDP))))
goto out;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0169ccf5aa4f..06e897832a7a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1170,6 +1170,9 @@ enum {
IPV6_SADDR_RULE_PRIVACY,
IPV6_SADDR_RULE_ORCHID,
IPV6_SADDR_RULE_PREFIX,
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ IPV6_SADDR_RULE_NOT_OPTIMISTIC,
+#endif
IPV6_SADDR_RULE_MAX
};
@@ -1197,6 +1200,15 @@ static inline int ipv6_saddr_preferred(int type)
return 0;
}
+static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic;
+#else
+ return false;
+#endif
+}
+
static int ipv6_get_saddr_eval(struct net *net,
struct ipv6_saddr_score *score,
struct ipv6_saddr_dst *dst,
@@ -1257,10 +1269,16 @@ static int ipv6_get_saddr_eval(struct net *net,
score->scopedist = ret;
break;
case IPV6_SADDR_RULE_PREFERRED:
+ {
/* Rule 3: Avoid deprecated and optimistic addresses */
+ u8 avoid = IFA_F_DEPRECATED;
+
+ if (!ipv6_use_optimistic_addr(score->ifa->idev))
+ avoid |= IFA_F_OPTIMISTIC;
ret = ipv6_saddr_preferred(score->addr_type) ||
- !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC));
+ !(score->ifa->flags & avoid);
break;
+ }
#ifdef CONFIG_IPV6_MIP6
case IPV6_SADDR_RULE_HOA:
{
@@ -1306,6 +1324,14 @@ static int ipv6_get_saddr_eval(struct net *net,
ret = score->ifa->prefix_len;
score->matchlen = ret;
break;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ case IPV6_SADDR_RULE_NOT_OPTIMISTIC:
+ /* Optimistic addresses still have lower precedence than other
+ * preferred addresses.
+ */
+ ret = !(score->ifa->flags & IFA_F_OPTIMISTIC);
+ break;
+#endif
default:
ret = 0;
}
@@ -2315,8 +2341,8 @@ ok:
else
stored_lft = 0;
if (!update_lft && !create && stored_lft) {
- const u32 minimum_lft = min(
- stored_lft, (u32)MIN_VALID_LIFETIME);
+ const u32 minimum_lft = min_t(u32,
+ stored_lft, MIN_VALID_LIFETIME);
valid_lft = max(valid_lft, minimum_lft);
/* RFC4862 Section 5.5.3e:
@@ -3222,8 +3248,15 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
* Optimistic nodes can start receiving
* Frames right away
*/
- if (ifp->flags & IFA_F_OPTIMISTIC)
+ if (ifp->flags & IFA_F_OPTIMISTIC) {
ip6_ins_rt(ifp->rt);
+ if (ipv6_use_optimistic_addr(idev)) {
+ /* Because optimistic nodes can use this address,
+ * notify listeners. If DAD fails, RTM_DELADDR is sent.
+ */
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ }
+ }
addrconf_dad_kick(ifp);
out:
@@ -4330,6 +4363,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+ array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
#endif
#ifdef CONFIG_IPV6_MROUTE
array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
@@ -5156,6 +5190,14 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "use_optimistic",
+ .data = &ipv6_devconf.use_optimistic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+
+ },
#endif
#ifdef CONFIG_IPV6_MROUTE
{
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2cdc38338be3..5c6996e44b14 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -351,7 +351,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
msg->msg_flags |= MSG_TRUNC;
copied = len;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto out_free_skb;
@@ -445,7 +445,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len,
msg->msg_flags |= MSG_TRUNC;
copied = len;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto out_free_skb;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 83fc3a385a26..d21d7b22eebc 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -544,12 +544,12 @@ static int esp_init_authenc(struct xfrm_state *x)
BUG_ON(!aalg_desc);
err = -EINVAL;
- if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+ if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
crypto_aead_authsize(aead)) {
- NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
- x->aalg->alg_name,
- crypto_aead_authsize(aead),
- aalg_desc->uinfo.auth.icv_fullbits/8);
+ pr_info("ESP: %s digestsize %u != %hu\n",
+ x->aalg->alg_name,
+ crypto_aead_authsize(aead),
+ aalg_desc->uinfo.auth.icv_fullbits / 8);
goto free_key;
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index bfde361b6134..601d896f22d0 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -47,7 +47,7 @@
#include <net/xfrm.h>
#endif
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
/*
* Parsing tlv encoded headers.
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 97ae70077a4f..62c1037d9e83 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -1009,4 +1009,3 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
return table;
}
#endif
-
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3dd7d4ebd7cd..7221021b2d97 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -769,10 +769,9 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v)
static int ip6fl_seq_show(struct seq_file *seq, void *v)
{
struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
- if (v == SEQ_START_TOKEN)
- seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
- "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
- else {
+ if (v == SEQ_START_TOKEN) {
+ seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n");
+ } else {
struct ip6_flowlabel *fl = v;
seq_printf(seq,
"%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 4564e1fca3eb..f6e2533c1145 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -902,7 +902,7 @@ static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb,
struct net_device_stats *stats = &t->dev->stats;
int ret;
- if (!ip6_tnl_xmit_ctl(t))
+ if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
goto tx_err;
switch (skb->protocol) {
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index a071563a7e6e..fd76ce938c32 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -78,7 +78,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_SIT |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
- SKB_GSO_MPLS |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_TCPV6 |
0)))
goto out;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8e950c250ada..916d2a166a9b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -747,13 +747,11 @@ slow_path:
if (len < left) {
len &= ~7;
}
- /*
- * Allocate buffer.
- */
- if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
- hroom + troom, GFP_ATOMIC)) == NULL) {
- NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
+ /* Allocate buffer */
+ frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
+ hroom + troom, GFP_ATOMIC);
+ if (!frag) {
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_FRAGFAILS);
err = -ENOMEM;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9cb94cfa0ae7..e2b6cfba873c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -183,6 +183,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
unsigned int hash = HASH(remote, local);
struct ip6_tnl *t;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+ struct in6_addr any;
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
@@ -190,6 +191,22 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_
(t->dev->flags & IFF_UP))
return t;
}
+
+ memset(&any, 0, sizeof(any));
+ hash = HASH(&any, local);
+ for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+
+ hash = HASH(remote, &any);
+ for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
+ if (ipv6_addr_equal(remote, &t->parms.raddr) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+
t = rcu_dereference(ip6n->tnls_wc[0]);
if (t && (t->dev->flags & IFF_UP))
return t;
@@ -474,6 +491,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
int rel_msg = 0;
u8 rel_type = ICMPV6_DEST_UNREACH;
u8 rel_code = ICMPV6_ADDR_UNREACH;
+ u8 tproto;
__u32 rel_info = 0;
__u16 len;
int err = -ENOENT;
@@ -487,7 +505,8 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
&ipv6h->saddr)) == NULL)
goto out;
- if (t->parms.proto != ipproto && t->parms.proto != 0)
+ tproto = ACCESS_ONCE(t->parms.proto);
+ if (tproto != ipproto && tproto != 0)
goto out;
err = 0;
@@ -788,6 +807,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
{
struct ip6_tnl *t;
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ u8 tproto;
int err;
rcu_read_lock();
@@ -796,7 +816,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
&ipv6h->daddr)) != NULL) {
struct pcpu_sw_netstats *tstats;
- if (t->parms.proto != ipproto && t->parms.proto != 0) {
+ tproto = ACCESS_ONCE(t->parms.proto);
+ if (tproto != ipproto && tproto != 0) {
rcu_read_unlock();
goto discard;
}
@@ -902,24 +923,28 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
-int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
+int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
+ const struct in6_addr *laddr,
+ const struct in6_addr *raddr)
{
struct __ip6_tnl_parm *p = &t->parms;
int ret = 0;
struct net *net = t->net;
- if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
+ ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
+ (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
struct net_device *ldev = NULL;
rcu_read_lock();
if (p->link)
ldev = dev_get_by_index_rcu(net, p->link);
- if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
+ if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
- else if (!ipv6_addr_is_multicast(&p->raddr) &&
- unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
+ else if (!ipv6_addr_is_multicast(raddr) &&
+ unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
else
@@ -968,8 +993,34 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
u8 proto;
int err = -1;
- if (!fl6->flowi6_mark)
+ /* NBMA tunnel */
+ if (ipv6_addr_any(&t->parms.raddr)) {
+ struct in6_addr *addr6;
+ struct neighbour *neigh;
+ int addr_type;
+
+ if (!skb_dst(skb))
+ goto tx_err_link_failure;
+
+ neigh = dst_neigh_lookup(skb_dst(skb),
+ &ipv6_hdr(skb)->daddr);
+ if (!neigh)
+ goto tx_err_link_failure;
+
+ addr6 = (struct in6_addr *)&neigh->primary_key;
+ addr_type = ipv6_addr_type(addr6);
+
+ if (addr_type == IPV6_ADDR_ANY)
+ addr6 = &ipv6_hdr(skb)->daddr;
+
+ memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
+ neigh_release(neigh);
+ } else if (!fl6->flowi6_mark)
dst = ip6_tnl_dst_check(t);
+
+ if (!ip6_tnl_xmit_ctl(t, &fl6->saddr, &fl6->daddr))
+ goto tx_err_link_failure;
+
if (!dst) {
ndst = ip6_route_output(net, NULL, fl6);
@@ -1075,10 +1126,11 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
struct flowi6 fl6;
__u8 dsfield;
__u32 mtu;
+ u8 tproto;
int err;
- if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
- !ip6_tnl_xmit_ctl(t))
+ tproto = ACCESS_ONCE(t->parms.proto);
+ if (tproto != IPPROTO_IPIP && tproto != 0)
return -1;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
@@ -1117,10 +1169,12 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
struct flowi6 fl6;
__u8 dsfield;
__u32 mtu;
+ u8 tproto;
int err;
- if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
- !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h))
+ tproto = ACCESS_ONCE(t->parms.proto);
+ if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
+ ip6_tnl_addr_conflict(t, ipv6h))
return -1;
offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
@@ -1282,6 +1336,14 @@ static int ip6_tnl_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
return err;
}
+static int ip6_tnl0_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p)
+{
+ /* for default tnl0 device allow to change only the proto */
+ t->parms.proto = p->proto;
+ netdev_state_change(t->dev);
+ return 0;
+}
+
static void
ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u)
{
@@ -1381,7 +1443,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
break;
ip6_tnl_parm_from_user(&p1, &p);
t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
- if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) {
+ if (cmd == SIOCCHGTUNNEL) {
if (t != NULL) {
if (t->dev != dev) {
err = -EEXIST;
@@ -1389,8 +1451,10 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
} else
t = netdev_priv(dev);
-
- err = ip6_tnl_update(t, &p1);
+ if (dev == ip6n->fb_tnl_dev)
+ err = ip6_tnl0_update(t, &p1);
+ else
+ err = ip6_tnl_update(t, &p1);
}
if (t) {
err = 0;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 31089d153fd3..ec84d03491c7 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -412,6 +412,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
struct net_device_stats *stats = &t->dev->stats;
struct dst_entry *dst = skb_dst(skb);
struct net_device *tdev;
+ struct xfrm_state *x;
int err = -1;
if (!dst)
@@ -425,7 +426,12 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
goto tx_err_link_failure;
}
- if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr))
+ x = dst->xfrm;
+ if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr))
+ goto tx_err_link_failure;
+
+ if (!ip6_tnl_xmit_ctl(t, (const struct in6_addr *)&x->props.saddr,
+ (const struct in6_addr *)&x->id.daddr))
goto tx_err_link_failure;
tdev = dst->dev;
@@ -480,7 +486,7 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
ipv6h = ipv6_hdr(skb);
if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) ||
- !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h))
+ vti6_addr_conflict(t, ipv6h))
goto tx_err;
xfrm_decode_session(skb, &fl, AF_INET6);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0171f08325c3..467f310dbbb3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2090,7 +2090,7 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
- /* For an (*,G) entry, we only check that the incomming
+ /* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9648de2b6745..5ce107c8aab3 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1550,7 +1550,7 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
hdr->daddr = *daddr;
}
-static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size)
+static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
{
struct net_device *dev = idev->dev;
struct net *net = dev_net(dev);
@@ -1561,13 +1561,13 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size)
const struct in6_addr *saddr;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
+ unsigned int size = mtu + hlen + tlen;
int err;
u8 ra[8] = { IPPROTO_ICMPV6, 0,
IPV6_TLV_ROUTERALERT, 2, 0, 0,
IPV6_TLV_PADN, 0 };
/* we assume size > sizeof(ra) here */
- size += hlen + tlen;
/* limit our allocations to order-0 page */
size = min_t(int, size, SKB_MAX_ORDER(0, 0));
skb = sock_alloc_send_skb(sk, size, 1, &err);
@@ -1576,6 +1576,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size)
return NULL;
skb->priority = TC_PRIO_CONTROL;
+ skb->reserved_tailroom = skb_end_offset(skb) -
+ min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
@@ -1690,8 +1692,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
return skb;
}
-#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \
- skb_tailroom(skb)) : 0)
+#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0)
static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
int type, int gdeleted, int sdeleted, int crsend)
@@ -2823,11 +2824,7 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
if (v == SEQ_START_TOKEN) {
- seq_printf(seq,
- "%3s %6s "
- "%32s %32s %6s %6s\n", "Idx",
- "Device", "Multicast Address",
- "Source Address", "INC", "EXC");
+ seq_puts(seq, "Idx Device Multicast Address Source Address INC EXC\n");
} else {
seq_printf(seq,
"%3d %6.6s %pi6 %pi6 %6lu %6lu\n",
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 896af8807979..0cbcf98f2cab 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -486,11 +486,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
}
if (skb_csum_unnecessary(skb)) {
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
} else if (msg->msg_flags&MSG_TRUNC) {
if (__skb_checksum_complete(skb))
goto csum_copy_err;
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
} else {
err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
if (err == -EINVAL)
@@ -548,7 +548,8 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
if (!rp->checksum)
goto send;
- if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
+ skb = skb_peek(&sk->sk_write_queue);
+ if (!skb)
goto out;
offset = rp->offset;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1a157ca2ebc1..51ab096ae574 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -69,7 +69,7 @@ struct ip6frag_skb_cb {
#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb))
-static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
+static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
@@ -178,7 +178,7 @@ static void ip6_frag_expire(unsigned long data)
ip6_expire_frag_queue(net, fq, &ip6_frags);
}
-static __inline__ struct frag_queue *
+static struct frag_queue *
fq_find(struct net *net, __be32 id, const struct in6_addr *src,
const struct in6_addr *dst, u8 ecn)
{
@@ -684,21 +684,21 @@ static void ip6_frags_sysctl_unregister(void)
unregister_net_sysctl_table(ip6_ctl_header);
}
#else
-static inline int ip6_frags_ns_sysctl_register(struct net *net)
+static int ip6_frags_ns_sysctl_register(struct net *net)
{
return 0;
}
-static inline void ip6_frags_ns_sysctl_unregister(struct net *net)
+static void ip6_frags_ns_sysctl_unregister(struct net *net)
{
}
-static inline int ip6_frags_sysctl_register(void)
+static int ip6_frags_sysctl_register(void)
{
return 0;
}
-static inline void ip6_frags_sysctl_unregister(void)
+static void ip6_frags_sysctl_unregister(void)
{
}
#endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a318dd89b6d9..c91083156edb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -772,23 +772,22 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
}
#endif
-#define BACKTRACK(__net, saddr) \
-do { \
- if (rt == __net->ipv6.ip6_null_entry) { \
- struct fib6_node *pn; \
- while (1) { \
- if (fn->fn_flags & RTN_TL_ROOT) \
- goto out; \
- pn = fn->parent; \
- if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
- fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
- else \
- fn = pn; \
- if (fn->fn_flags & RTN_RTINFO) \
- goto restart; \
- } \
- } \
-} while (0)
+static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
+ struct in6_addr *saddr)
+{
+ struct fib6_node *pn;
+ while (1) {
+ if (fn->fn_flags & RTN_TL_ROOT)
+ return NULL;
+ pn = fn->parent;
+ if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
+ fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
+ else
+ fn = pn;
+ if (fn->fn_flags & RTN_RTINFO)
+ return fn;
+ }
+}
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
@@ -804,8 +803,11 @@ restart:
rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
- BACKTRACK(net, &fl6->saddr);
-out:
+ if (rt == net->ipv6.ip6_null_entry) {
+ fn = fib6_backtrack(fn, &fl6->saddr);
+ if (fn)
+ goto restart;
+ }
dst_use(&rt->dst, jiffies);
read_unlock_bh(&table->tb6_lock);
return rt;
@@ -915,33 +917,48 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
struct flowi6 *fl6, int flags)
{
- struct fib6_node *fn;
+ struct fib6_node *fn, *saved_fn;
struct rt6_info *rt, *nrt;
int strict = 0;
int attempts = 3;
int err;
- int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
strict |= flags & RT6_LOOKUP_F_IFACE;
+ if (net->ipv6.devconf_all->forwarding == 0)
+ strict |= RT6_LOOKUP_F_REACHABLE;
-relookup:
+redo_fib6_lookup_lock:
read_lock_bh(&table->tb6_lock);
-restart_2:
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
+ saved_fn = fn;
-restart:
- rt = rt6_select(fn, oif, strict | reachable);
+redo_rt6_select:
+ rt = rt6_select(fn, oif, strict);
if (rt->rt6i_nsiblings)
- rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
- BACKTRACK(net, &fl6->saddr);
- if (rt == net->ipv6.ip6_null_entry ||
- rt->rt6i_flags & RTF_CACHE)
- goto out;
+ rt = rt6_multipath_select(rt, fl6, oif, strict);
+ if (rt == net->ipv6.ip6_null_entry) {
+ fn = fib6_backtrack(fn, &fl6->saddr);
+ if (fn)
+ goto redo_rt6_select;
+ else if (strict & RT6_LOOKUP_F_REACHABLE) {
+ /* also consider unreachable route */
+ strict &= ~RT6_LOOKUP_F_REACHABLE;
+ fn = saved_fn;
+ goto redo_rt6_select;
+ } else {
+ dst_hold(&rt->dst);
+ read_unlock_bh(&table->tb6_lock);
+ goto out2;
+ }
+ }
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
+ if (rt->rt6i_flags & RTF_CACHE)
+ goto out2;
+
if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
else if (!(rt->dst.flags & DST_HOST))
@@ -967,15 +984,8 @@ restart:
* released someone could insert this route. Relookup.
*/
ip6_rt_put(rt);
- goto relookup;
+ goto redo_fib6_lookup_lock;
-out:
- if (reachable) {
- reachable = 0;
- goto restart_2;
- }
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
out2:
rt->dst.lastuse = jiffies;
rt->dst.__use++;
@@ -1235,10 +1245,12 @@ restart:
rt = net->ipv6.ip6_null_entry;
else if (rt->dst.error) {
rt = net->ipv6.ip6_null_entry;
- goto out;
+ } else if (rt == net->ipv6.ip6_null_entry) {
+ fn = fib6_backtrack(fn, &fl6->saddr);
+ if (fn)
+ goto restart;
}
- BACKTRACK(net, &fl6->saddr);
-out:
+
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a24557a1c1d8..660496de6125 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1711,7 +1711,7 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
tunnel->encap.dport) ||
nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
- tunnel->encap.dport))
+ tunnel->encap.flags))
goto nla_put_failure;
return 0;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 2f25cb6347ca..7337fc7947e2 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -166,13 +166,15 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
int mss;
struct dst_entry *dst;
__u8 rcv_wscale;
- bool ecn_ok = false;
if (!sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;
- if (tcp_synq_no_recent_overflow(sk) ||
- (mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie)) == 0) {
+ if (tcp_synq_no_recent_overflow(sk))
+ goto out;
+
+ mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie);
+ if (mss == 0) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
@@ -183,7 +185,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
memset(&tcp_opt, 0, sizeof(tcp_opt));
tcp_parse_options(skb, &tcp_opt, 0, NULL);
- if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
+ if (!cookie_timestamp_decode(&tcp_opt))
goto out;
ret = NULL;
@@ -220,7 +222,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
req->expires = 0UL;
req->num_retrans = 0;
- ireq->ecn_ok = ecn_ok;
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
ireq->wscale_ok = tcp_opt.wscale_ok;
@@ -261,6 +262,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
dst_metric(dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
+ ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
ret = get_cookie_sock(sk, skb, req, dst);
out:
@@ -269,4 +271,3 @@ out_free:
reqsk_free(req);
return NULL;
}
-
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f6ba535b6feb..9b6809232b17 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -424,8 +424,8 @@ try_again:
}
if (skb_csum_unnecessary(skb))
- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
- msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
+ msg, copied);
else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 6b8f543f6ac6..b6aa8ed18257 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,11 +42,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_GRE |
SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
- SKB_GSO_SIT |
- SKB_GSO_MPLS) ||
+ SKB_GSO_SIT) ||
!(type & (SKB_GSO_UDP))))
goto out;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 91729b807c7d..a0c75366c93b 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -306,7 +306,7 @@ void ipxitf_down(struct ipx_interface *intrfc)
spin_unlock_bh(&ipx_interfaces_lock);
}
-static __inline__ void __ipxitf_put(struct ipx_interface *intrfc)
+static void __ipxitf_put(struct ipx_interface *intrfc)
{
if (atomic_dec_and_test(&intrfc->refcnt))
__ipxitf_down(intrfc);
@@ -1805,8 +1805,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_flags |= MSG_TRUNC;
}
- rc = skb_copy_datagram_iovec(skb, sizeof(struct ipxhdr), msg->msg_iov,
- copied);
+ rc = skb_copy_datagram_msg(skb, sizeof(struct ipxhdr), msg, copied);
if (rc)
goto out_free;
if (skb->tstamp.tv64)
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index e15c16a517e7..c1d247ebe916 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -45,7 +45,7 @@ static int ipx_seq_interface_show(struct seq_file *seq, void *v)
}
i = list_entry(v, struct ipx_interface, node);
- seq_printf(seq, "%08lX ", (unsigned long int)ntohl(i->if_netnum));
+ seq_printf(seq, "%08X ", ntohl(i->if_netnum));
seq_printf(seq, "%02X%02X%02X%02X%02X%02X ",
i->if_node[0], i->if_node[1], i->if_node[2],
i->if_node[3], i->if_node[4], i->if_node[5]);
@@ -87,10 +87,10 @@ static int ipx_seq_route_show(struct seq_file *seq, void *v)
rt = list_entry(v, struct ipx_route, node);
- seq_printf(seq, "%08lX ", (unsigned long int)ntohl(rt->ir_net));
+ seq_printf(seq, "%08X ", ntohl(rt->ir_net));
if (rt->ir_routed)
- seq_printf(seq, "%08lX %02X%02X%02X%02X%02X%02X\n",
- (long unsigned int)ntohl(rt->ir_intrfc->if_netnum),
+ seq_printf(seq, "%08X %02X%02X%02X%02X%02X%02X\n",
+ ntohl(rt->ir_intrfc->if_netnum),
rt->ir_router_node[0], rt->ir_router_node[1],
rt->ir_router_node[2], rt->ir_router_node[3],
rt->ir_router_node[4], rt->ir_router_node[5]);
@@ -194,19 +194,19 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v)
s = v;
ipxs = ipx_sk(s);
#ifdef CONFIG_IPX_INTERN
- seq_printf(seq, "%08lX:%02X%02X%02X%02X%02X%02X:%04X ",
- (unsigned long)ntohl(ipxs->intrfc->if_netnum),
+ seq_printf(seq, "%08X:%02X%02X%02X%02X%02X%02X:%04X ",
+ ntohl(ipxs->intrfc->if_netnum),
ipxs->node[0], ipxs->node[1], ipxs->node[2], ipxs->node[3],
ipxs->node[4], ipxs->node[5], ntohs(ipxs->port));
#else
- seq_printf(seq, "%08lX:%04X ", (unsigned long) ntohl(ipxs->intrfc->if_netnum),
+ seq_printf(seq, "%08X:%04X ", ntohl(ipxs->intrfc->if_netnum),
ntohs(ipxs->port));
#endif /* CONFIG_IPX_INTERN */
if (s->sk_state != TCP_ESTABLISHED)
seq_printf(seq, "%-28s", "Not_Connected");
else {
- seq_printf(seq, "%08lX:%02X%02X%02X%02X%02X%02X:%04X ",
- (unsigned long)ntohl(ipxs->dest_addr.net),
+ seq_printf(seq, "%08X:%02X%02X%02X%02X%02X%02X:%04X ",
+ ntohl(ipxs->dest_addr.net),
ipxs->dest_addr.node[0], ipxs->dest_addr.node[1],
ipxs->dest_addr.node[2], ipxs->dest_addr.node[3],
ipxs->dest_addr.node[4], ipxs->dest_addr.node[5],
diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c
index ad7c03dedaab..0dafcc561ed6 100644
--- a/net/ipx/sysctl_net_ipx.c
+++ b/net/ipx/sysctl_net_ipx.c
@@ -9,14 +9,12 @@
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <net/net_namespace.h>
+#include <net/ipx.h>
#ifndef CONFIG_SYSCTL
#error This file should not be compiled without CONFIG_SYSCTL defined
#endif
-/* From af_ipx.c */
-extern int sysctl_ipx_pprop_broadcasting;
-
static struct ctl_table ipx_table[] = {
{
.procname = "ipx_pprop_broadcasting",
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 92fafd485deb..980bc2670a13 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1396,7 +1396,7 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
copied = size;
msg->msg_flags |= MSG_TRUNC;
}
- skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ skb_copy_datagram_msg(skb, 0, msg, copied);
skb_free_datagram(sk, skb);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a089b6b91650..057b5647ef92 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1355,7 +1355,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN;
cskb = skb;
- if (skb_copy_datagram_iovec(cskb, offset, msg->msg_iov, copied)) {
+ if (skb_copy_datagram_msg(cskb, offset, msg, copied)) {
if (!(flags & MSG_PEEK))
skb_queue_head(&sk->sk_receive_queue, skb);
return -EFAULT;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1847ec4e3930..e5883091a8c6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3654,7 +3654,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
}
skb_reset_transport_header(skb);
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto out_free;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 369a9822488c..a6cc1fed2b52 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -528,7 +528,7 @@ static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
copied = len;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto done;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 0edb263cc002..2177b960da87 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -672,7 +672,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
copied = len;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto done;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index b704a9356208..c559bcdf4679 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -208,7 +208,7 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
else if (len < skb->len)
msg->msg_flags |= MSG_TRUNC;
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+ err = skb_copy_datagram_msg(skb, 0, msg, len);
if (likely(err == 0))
err = len;
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 3cdaa046c1bc..fc60d9d738b5 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -73,6 +73,7 @@ static void __lapb_remove_cb(struct lapb_cb *lapb)
lapb_put(lapb);
}
}
+EXPORT_SYMBOL(lapb_register);
/*
* Add a socket to the bound sockets list.
@@ -195,6 +196,7 @@ out:
write_unlock_bh(&lapb_list_lock);
return rc;
}
+EXPORT_SYMBOL(lapb_unregister);
int lapb_getparms(struct net_device *dev, struct lapb_parms_struct *parms)
{
@@ -227,6 +229,7 @@ int lapb_getparms(struct net_device *dev, struct lapb_parms_struct *parms)
out:
return rc;
}
+EXPORT_SYMBOL(lapb_getparms);
int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms)
{
@@ -262,6 +265,7 @@ out_put:
out:
return rc;
}
+EXPORT_SYMBOL(lapb_setparms);
int lapb_connect_request(struct net_device *dev)
{
@@ -290,6 +294,7 @@ out_put:
out:
return rc;
}
+EXPORT_SYMBOL(lapb_connect_request);
int lapb_disconnect_request(struct net_device *dev)
{
@@ -334,6 +339,7 @@ out_put:
out:
return rc;
}
+EXPORT_SYMBOL(lapb_disconnect_request);
int lapb_data_request(struct net_device *dev, struct sk_buff *skb)
{
@@ -355,6 +361,7 @@ out_put:
out:
return rc;
}
+EXPORT_SYMBOL(lapb_data_request);
int lapb_data_received(struct net_device *dev, struct sk_buff *skb)
{
@@ -369,6 +376,7 @@ int lapb_data_received(struct net_device *dev, struct sk_buff *skb)
return rc;
}
+EXPORT_SYMBOL(lapb_data_received);
void lapb_connect_confirmation(struct lapb_cb *lapb, int reason)
{
@@ -415,15 +423,6 @@ int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb)
return used;
}
-EXPORT_SYMBOL(lapb_register);
-EXPORT_SYMBOL(lapb_unregister);
-EXPORT_SYMBOL(lapb_getparms);
-EXPORT_SYMBOL(lapb_setparms);
-EXPORT_SYMBOL(lapb_connect_request);
-EXPORT_SYMBOL(lapb_disconnect_request);
-EXPORT_SYMBOL(lapb_data_request);
-EXPORT_SYMBOL(lapb_data_received);
-
static int __init lapb_init(void)
{
return 0;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index bb9cbc17d926..af662669f951 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -819,8 +819,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
used = len;
if (!(flags & MSG_TRUNC)) {
- int rc = skb_copy_datagram_iovec(skb, offset,
- msg->msg_iov, used);
+ int rc = skb_copy_datagram_msg(skb, offset, msg, used);
if (rc) {
/* Exception. Bailout! */
if (!copied)
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index 25c31c0a3fdb..6daf391b3e84 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -15,7 +15,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
-#include <asm/errno.h>
+#include <linux/errno.h>
#include <net/llc_if.h>
#include <net/llc_sap.h>
#include <net/llc_s_ev.h>
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index e3545f21a099..ca27837974fe 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -34,8 +34,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
SKB_GSO_GRE_CSUM |
- SKB_GSO_IPIP |
- SKB_GSO_MPLS)))
+ SKB_GSO_IPIP)))
goto out;
/* Setup inner SKB. */
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f1de72de273e..580b79452bec 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2401,7 +2401,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
}
skb_reset_transport_header(data_skb);
- err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
if (msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 1b06a1fcf3e8..7e13f6afcd1f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1167,7 +1167,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_flags |= MSG_TRUNC;
}
- er = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ er = skb_copy_datagram_msg(skb, 0, msg, copied);
if (er < 0) {
skb_free_datagram(sk, skb);
release_sock(sk);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 51f077a92fa9..83bc785d5855 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -832,7 +832,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
copied = min_t(unsigned int, rlen, len);
cskb = skb;
- if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) {
+ if (skb_copy_datagram_msg(cskb, 0, msg, copied)) {
if (!(flags & MSG_PEEK))
skb_queue_head(&sk->sk_receive_queue, skb);
return -EFAULT;
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 11c3544ea546..9d7d2b7ba5e4 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -269,7 +269,7 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock,
copied = len;
}
- rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ rc = skb_copy_datagram_msg(skb, 0, msg, copied);
skb_free_datagram(sk, skb);
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index ba3bb8203b99..454ce12efbbf 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -29,11 +29,12 @@ config OPENVSWITCH
If unsure, say N.
config OPENVSWITCH_GRE
- bool "Open vSwitch GRE tunneling support"
+ tristate "Open vSwitch GRE tunneling support"
+ select NET_MPLS_GSO
depends on INET
depends on OPENVSWITCH
- depends on NET_IPGRE_DEMUX && !(OPENVSWITCH=y && NET_IPGRE_DEMUX=m)
- default y
+ depends on NET_IPGRE_DEMUX
+ default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create GRE
vport.
@@ -43,11 +44,11 @@ config OPENVSWITCH_GRE
If unsure, say Y.
config OPENVSWITCH_VXLAN
- bool "Open vSwitch VXLAN tunneling support"
+ tristate "Open vSwitch VXLAN tunneling support"
depends on INET
depends on OPENVSWITCH
- depends on VXLAN && !(OPENVSWITCH=y && VXLAN=m)
- default y
+ depends on VXLAN
+ default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create vxlan vport.
@@ -56,11 +57,11 @@ config OPENVSWITCH_VXLAN
If unsure, say Y.
config OPENVSWITCH_GENEVE
- bool "Open vSwitch Geneve tunneling support"
+ tristate "Open vSwitch Geneve tunneling support"
depends on INET
depends on OPENVSWITCH
- depends on GENEVE && !(OPENVSWITCH=y && GENEVE=m)
- default y
+ depends on GENEVE
+ default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create geneve vport.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 9a33a273c375..91b9478413ef 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,14 +15,6 @@ openvswitch-y := \
vport-internal_dev.o \
vport-netdev.o
-ifneq ($(CONFIG_OPENVSWITCH_GENEVE),)
-openvswitch-y += vport-geneve.o
-endif
-
-ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
-openvswitch-y += vport-vxlan.o
-endif
-
-ifneq ($(CONFIG_OPENVSWITCH_GRE),)
-openvswitch-y += vport-gre.o
-endif
+obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
+obj-$(CONFIG_OPENVSWITCH_VXLAN) += vport-vxlan.o
+obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 006886dbee36..f7e589159e4a 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -28,10 +28,12 @@
#include <linux/in6.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
+
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/checksum.h>
#include <net/dsfield.h>
+#include <net/mpls.h>
#include <net/sctp/checksum.h>
#include "datapath.h"
@@ -118,6 +120,92 @@ static int make_writable(struct sk_buff *skb, int write_len)
return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
}
+static int push_mpls(struct sk_buff *skb,
+ const struct ovs_action_push_mpls *mpls)
+{
+ __be32 *new_mpls_lse;
+ struct ethhdr *hdr;
+
+ /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
+ if (skb->encapsulation)
+ return -ENOTSUPP;
+
+ if (skb_cow_head(skb, MPLS_HLEN) < 0)
+ return -ENOMEM;
+
+ skb_push(skb, MPLS_HLEN);
+ memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
+ skb->mac_len);
+ skb_reset_mac_header(skb);
+
+ new_mpls_lse = (__be32 *)skb_mpls_header(skb);
+ *new_mpls_lse = mpls->mpls_lse;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
+ MPLS_HLEN, 0));
+
+ hdr = eth_hdr(skb);
+ hdr->h_proto = mpls->mpls_ethertype;
+
+ skb_set_inner_protocol(skb, skb->protocol);
+ skb->protocol = mpls->mpls_ethertype;
+
+ return 0;
+}
+
+static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
+{
+ struct ethhdr *hdr;
+ int err;
+
+ err = make_writable(skb, skb->mac_len + MPLS_HLEN);
+ if (unlikely(err))
+ return err;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_sub(skb->csum,
+ csum_partial(skb_mpls_header(skb),
+ MPLS_HLEN, 0));
+
+ memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
+ skb->mac_len);
+
+ __skb_pull(skb, MPLS_HLEN);
+ skb_reset_mac_header(skb);
+
+ /* skb_mpls_header() is used to locate the ethertype
+ * field correctly in the presence of VLAN tags.
+ */
+ hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
+ hdr->h_proto = ethertype;
+ if (eth_p_mpls(skb->protocol))
+ skb->protocol = ethertype;
+ return 0;
+}
+
+static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
+{
+ __be32 *stack;
+ int err;
+
+ err = make_writable(skb, skb->mac_len + MPLS_HLEN);
+ if (unlikely(err))
+ return err;
+
+ stack = (__be32 *)skb_mpls_header(skb);
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ __be32 diff[] = { ~(*stack), *mpls_lse };
+
+ skb->csum = ~csum_partial((char *)diff, sizeof(diff),
+ ~skb->csum);
+ }
+
+ *stack = *mpls_lse;
+
+ return 0;
+}
+
/* remove VLAN header from packet and update csum accordingly. */
static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
{
@@ -140,10 +228,12 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
vlan_set_encap_proto(skb, vhdr);
skb->mac_header += VLAN_HLEN;
+
if (skb_network_offset(skb) < ETH_HLEN)
skb_set_network_header(skb, ETH_HLEN);
- skb_reset_mac_len(skb);
+ /* Update mac_len for subsequent MPLS actions */
+ skb_reset_mac_len(skb);
return 0;
}
@@ -186,6 +276,8 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
return -ENOMEM;
+ /* Update mac_len for subsequent MPLS actions */
+ skb->mac_len += VLAN_HLEN;
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(skb->data
@@ -459,21 +551,14 @@ static int set_sctp(struct sk_buff *skb,
return 0;
}
-static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
{
- struct vport *vport;
-
- if (unlikely(!skb))
- return -ENOMEM;
+ struct vport *vport = ovs_vport_rcu(dp, out_port);
- vport = ovs_vport_rcu(dp, out_port);
- if (unlikely(!vport)) {
+ if (likely(vport))
+ ovs_vport_send(vport, skb);
+ else
kfree_skb(skb);
- return -ENODEV;
- }
-
- ovs_vport_send(vport, skb);
- return 0;
}
static int output_userspace(struct datapath *dp, struct sk_buff *skb,
@@ -504,11 +589,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
return ovs_dp_upcall(dp, skb, &upcall);
}
-static bool last_action(const struct nlattr *a, int rem)
-{
- return a->nla_len == rem;
-}
-
static int sample(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr)
{
@@ -543,7 +623,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
* user space. This skb will be consumed by its caller.
*/
if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
- last_action(a, rem)))
+ nla_is_last(a, rem)))
return output_userspace(dp, skb, key, a);
skb = skb_clone(skb, GFP_ATOMIC);
@@ -617,6 +697,10 @@ static int execute_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_SCTP:
err = set_sctp(skb, nla_data(nested_attr));
break;
+
+ case OVS_KEY_ATTR_MPLS:
+ err = set_mpls(skb, nla_data(nested_attr));
+ break;
}
return err;
@@ -633,7 +717,7 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb,
if (err)
return err;
- if (!last_action(a, rem)) {
+ if (!nla_is_last(a, rem)) {
/* Recirc action is the not the last action
* of the action list, need to clone the skb.
*/
@@ -677,8 +761,12 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
a = nla_next(a, &rem)) {
int err = 0;
- if (prev_port != -1) {
- do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
+ if (unlikely(prev_port != -1)) {
+ struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC);
+
+ if (out_skb)
+ do_output(dp, out_skb, prev_port);
+
prev_port = -1;
}
@@ -695,6 +783,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
execute_hash(skb, key, a);
break;
+ case OVS_ACTION_ATTR_PUSH_MPLS:
+ err = push_mpls(skb, nla_data(a));
+ break;
+
+ case OVS_ACTION_ATTR_POP_MPLS:
+ err = pop_mpls(skb, nla_get_be16(a));
+ break;
+
case OVS_ACTION_ATTR_PUSH_VLAN:
err = push_vlan(skb, nla_data(a));
if (unlikely(err)) /* skb already freed. */
@@ -707,7 +803,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
case OVS_ACTION_ATTR_RECIRC:
err = execute_recirc(dp, skb, key, a, rem);
- if (last_action(a, rem)) {
+ if (nla_is_last(a, rem)) {
/* If this is the last action, the skb has
* been consumed or freed.
* Return immediately.
@@ -769,14 +865,11 @@ static void process_deferred_actions(struct datapath *dp)
/* Execute a list of actions against 'skb'. */
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *key)
+ struct sw_flow_actions *acts, struct sw_flow_key *key)
{
int level = this_cpu_read(exec_actions_level);
- struct sw_flow_actions *acts;
int err;
- acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
-
this_cpu_inc(exec_actions_level);
OVS_CB(skb)->egress_tun_info = NULL;
err = do_execute_actions(dp, skb, key,
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index e6d7255183eb..014485ec4b0d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -59,6 +59,7 @@
#include "vport-netdev.h"
int ovs_net_id __read_mostly;
+EXPORT_SYMBOL(ovs_net_id);
static struct genl_family dp_packet_genl_family;
static struct genl_family dp_flow_genl_family;
@@ -130,6 +131,7 @@ int lockdep_ovsl_is_held(void)
else
return 1;
}
+EXPORT_SYMBOL(lockdep_ovsl_is_held);
#endif
static struct vport *new_vport(const struct vport_parms *);
@@ -138,19 +140,30 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
const struct dp_upcall_info *);
-/* Must be called with rcu_read_lock or ovs_mutex. */
-static struct datapath *get_dp(struct net *net, int dp_ifindex)
+/* Must be called with rcu_read_lock. */
+static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
{
- struct datapath *dp = NULL;
- struct net_device *dev;
+ struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, dp_ifindex);
if (dev) {
struct vport *vport = ovs_internal_dev_get_vport(dev);
if (vport)
- dp = vport->dp;
+ return vport->dp;
}
+
+ return NULL;
+}
+
+/* The caller must hold either ovs_mutex or rcu_read_lock to keep the
+ * returned dp pointer valid.
+ */
+static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
+{
+ struct datapath *dp;
+
+ WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
+ rcu_read_lock();
+ dp = get_dp_rcu(net, dp_ifindex);
rcu_read_unlock();
return dp;
@@ -185,6 +198,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
{
struct datapath *dp = container_of(rcu, struct datapath, rcu);
+ ovs_flow_tbl_destroy(&dp->table);
free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
@@ -243,6 +257,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
const struct vport *p = OVS_CB(skb)->input_vport;
struct datapath *dp = p->dp;
struct sw_flow *flow;
+ struct sw_flow_actions *sf_acts;
struct dp_stats_percpu *stats;
u64 *stats_counter;
u32 n_mask_hit;
@@ -268,10 +283,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
goto out;
}
- OVS_CB(skb)->flow = flow;
+ ovs_flow_stats_update(flow, key->tp.flags, skb);
+ sf_acts = rcu_dereference(flow->sf_acts);
+ ovs_execute_actions(dp, skb, sf_acts, key);
- ovs_flow_stats_update(OVS_CB(skb)->flow, key->tp.flags, skb);
- ovs_execute_actions(dp, skb, key);
stats_counter = &stats->n_hit;
out:
@@ -360,37 +375,12 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
return err;
}
-static size_t key_attr_size(void)
-{
- return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
- + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
- + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
- + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
- + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
- + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
- + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
- + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
- + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
- + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
- + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
- + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */
- + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
- + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
- + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
- + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
- + nla_total_size(28); /* OVS_KEY_ATTR_ND */
-}
-
static size_t upcall_msg_size(const struct nlattr *userdata,
unsigned int hdrlen)
{
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
- + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */
+ + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
/* OVS_PACKET_ATTR_USERDATA */
if (userdata)
@@ -510,6 +500,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct sw_flow_actions *acts;
struct sk_buff *packet;
struct sw_flow *flow;
+ struct sw_flow_actions *sf_acts;
struct datapath *dp;
struct ethhdr *eth;
struct vport *input_vport;
@@ -552,25 +543,18 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
if (err)
goto err_flow_free;
- acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS]));
- err = PTR_ERR(acts);
- if (IS_ERR(acts))
- goto err_flow_free;
-
err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
- &flow->key, 0, &acts);
+ &flow->key, &acts);
if (err)
goto err_flow_free;
rcu_assign_pointer(flow->sf_acts, acts);
-
OVS_CB(packet)->egress_tun_info = NULL;
- OVS_CB(packet)->flow = flow;
packet->priority = flow->key.phy.priority;
packet->mark = flow->key.phy.skb_mark;
rcu_read_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
goto err_unlock;
@@ -583,9 +567,10 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err_unlock;
OVS_CB(packet)->input_vport = input_vport;
+ sf_acts = rcu_dereference(flow->sf_acts);
local_bh_disable();
- err = ovs_execute_actions(dp, packet, &flow->key);
+ err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
local_bh_enable();
rcu_read_unlock();
@@ -662,8 +647,8 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats,
static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
{
return NLMSG_ALIGN(sizeof(struct ovs_header))
- + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */
- + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
+ + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
@@ -671,58 +656,67 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
}
/* Called with ovs_mutex or RCU read lock. */
-static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
- struct sk_buff *skb, u32 portid,
- u32 seq, u32 flags, u8 cmd)
+static int ovs_flow_cmd_fill_match(const struct sw_flow *flow,
+ struct sk_buff *skb)
{
- const int skb_orig_len = skb->len;
- struct nlattr *start;
- struct ovs_flow_stats stats;
- __be16 tcp_flags;
- unsigned long used;
- struct ovs_header *ovs_header;
struct nlattr *nla;
int err;
- ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
- if (!ovs_header)
- return -EMSGSIZE;
-
- ovs_header->dp_ifindex = dp_ifindex;
-
/* Fill flow key. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
if (!nla)
- goto nla_put_failure;
+ return -EMSGSIZE;
err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb);
if (err)
- goto error;
+ return err;
+
nla_nest_end(skb, nla);
+ /* Fill flow mask. */
nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK);
if (!nla)
- goto nla_put_failure;
+ return -EMSGSIZE;
err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb);
if (err)
- goto error;
+ return err;
nla_nest_end(skb, nla);
+ return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
+ struct sk_buff *skb)
+{
+ struct ovs_flow_stats stats;
+ __be16 tcp_flags;
+ unsigned long used;
ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
if (used &&
nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
- goto nla_put_failure;
+ return -EMSGSIZE;
if (stats.n_packets &&
nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
- goto nla_put_failure;
+ return -EMSGSIZE;
if ((u8)ntohs(tcp_flags) &&
nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
- goto nla_put_failure;
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
+ struct sk_buff *skb, int skb_orig_len)
+{
+ struct nlattr *start;
+ int err;
/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
* this is the first flow to be dumped into 'skb'. This is unusual for
@@ -746,17 +740,47 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
nla_nest_end(skb, start);
else {
if (skb_orig_len)
- goto error;
+ return err;
nla_nest_cancel(skb, start);
}
- } else if (skb_orig_len)
- goto nla_put_failure;
+ } else if (skb_orig_len) {
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
+ struct sk_buff *skb, u32 portid,
+ u32 seq, u32 flags, u8 cmd)
+{
+ const int skb_orig_len = skb->len;
+ struct ovs_header *ovs_header;
+ int err;
+
+ ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
+ flags, cmd);
+ if (!ovs_header)
+ return -EMSGSIZE;
+
+ ovs_header->dp_ifindex = dp_ifindex;
+
+ err = ovs_flow_cmd_fill_match(flow, skb);
+ if (err)
+ goto error;
+
+ err = ovs_flow_cmd_fill_stats(flow, skb);
+ if (err)
+ goto error;
+
+ err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+ if (err)
+ goto error;
return genlmsg_end(skb, ovs_header);
-nla_put_failure:
- err = -EMSGSIZE;
error:
genlmsg_cancel(skb, ovs_header);
return err;
@@ -814,10 +838,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
/* Must have key and actions. */
error = -EINVAL;
- if (!a[OVS_FLOW_ATTR_KEY])
+ if (!a[OVS_FLOW_ATTR_KEY]) {
+ OVS_NLERR("Flow key attribute not present in new flow.\n");
goto error;
- if (!a[OVS_FLOW_ATTR_ACTIONS])
+ }
+ if (!a[OVS_FLOW_ATTR_ACTIONS]) {
+ OVS_NLERR("Flow actions attribute not present in new flow.\n");
goto error;
+ }
/* Most of the time we need to allocate a new flow, do it before
* locking.
@@ -838,16 +866,11 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
/* Validate actions. */
- acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
- error = PTR_ERR(acts);
- if (IS_ERR(acts))
- goto err_kfree_flow;
-
error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
- 0, &acts);
+ &acts);
if (error) {
OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
- goto err_kfree_acts;
+ goto err_kfree_flow;
}
reply = ovs_flow_cmd_alloc_info(acts, info, false);
@@ -938,6 +961,7 @@ error:
return error;
}
+/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
const struct sw_flow_key *key,
const struct sw_flow_mask *mask)
@@ -946,15 +970,10 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
struct sw_flow_key masked_key;
int error;
- acts = ovs_nla_alloc_flow_actions(nla_len(a));
- if (IS_ERR(acts))
- return acts;
-
ovs_flow_mask_key(&masked_key, key, mask);
- error = ovs_nla_copy_actions(a, &masked_key, 0, &acts);
+ error = ovs_nla_copy_actions(a, &masked_key, &acts);
if (error) {
- OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
- kfree(acts);
+ OVS_NLERR("Actions may not be safe on all matching packets.\n");
return ERR_PTR(error);
}
@@ -976,8 +995,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
/* Extract key. */
error = -EINVAL;
- if (!a[OVS_FLOW_ATTR_KEY])
+ if (!a[OVS_FLOW_ATTR_KEY]) {
+ OVS_NLERR("Flow key attribute not present in set flow.\n");
goto error;
+ }
ovs_match_init(&match, &key, &mask);
error = ovs_nla_get_match(&match,
@@ -992,10 +1013,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
error = PTR_ERR(acts);
goto error;
}
- }
- /* Can allocate before locking if have acts. */
- if (acts) {
+ /* Can allocate before locking if have acts. */
reply = ovs_flow_cmd_alloc_info(acts, info, false);
if (IS_ERR(reply)) {
error = PTR_ERR(reply);
@@ -1179,7 +1198,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct datapath *dp;
rcu_read_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp) {
rcu_read_unlock();
return -ENODEV;
@@ -1442,7 +1461,7 @@ err_destroy_ports_array:
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
- ovs_flow_tbl_destroy(&dp->table, false);
+ ovs_flow_tbl_destroy(&dp->table);
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
@@ -1474,8 +1493,6 @@ static void __dp_destroy(struct datapath *dp)
ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
/* RCU destroy the flow table */
- ovs_flow_tbl_destroy(&dp->table, true);
-
call_rcu(&dp->rcu, destroy_dp_rcu);
}
@@ -1764,6 +1781,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
return -ENOMEM;
ovs_lock();
+restart:
dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
err = -ENODEV;
if (!dp)
@@ -1795,8 +1813,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
vport = new_vport(&parms);
err = PTR_ERR(vport);
- if (IS_ERR(vport))
+ if (IS_ERR(vport)) {
+ if (err == -EAGAIN)
+ goto restart;
goto exit_unlock_free;
+ }
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
info->snd_seq, 0, OVS_VPORT_CMD_NEW);
@@ -1939,7 +1960,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
int i, j = 0;
rcu_read_lock();
- dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+ dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
if (!dp) {
rcu_read_unlock();
return -ENODEV;
@@ -2112,12 +2133,18 @@ static int __init dp_init(void)
if (err)
goto error_netns_exit;
+ err = ovs_netdev_init();
+ if (err)
+ goto error_unreg_notifier;
+
err = dp_register_genl();
if (err < 0)
- goto error_unreg_notifier;
+ goto error_unreg_netdev;
return 0;
+error_unreg_netdev:
+ ovs_netdev_exit();
error_unreg_notifier:
unregister_netdevice_notifier(&ovs_dp_device_notifier);
error_netns_exit:
@@ -2137,6 +2164,7 @@ error:
static void dp_cleanup(void)
{
dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
+ ovs_netdev_exit();
unregister_netdevice_notifier(&ovs_dp_device_notifier);
unregister_pernet_device(&ovs_net_ops);
rcu_barrier();
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 974135439c5c..1c56a80d6677 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -94,14 +94,12 @@ struct datapath {
/**
* struct ovs_skb_cb - OVS data in skb CB
- * @flow: The flow associated with this packet. May be %NULL if no flow.
* @egress_tun_key: Tunnel information about this packet on egress path.
* NULL if the packet is not being tunneled.
* @input_vport: The original vport packet came in on. This value is cached
* when a packet is received by OVS.
*/
struct ovs_skb_cb {
- struct sw_flow *flow;
struct ovs_tunnel_info *egress_tun_info;
struct vport *input_vport;
};
@@ -194,7 +192,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
u8 cmd);
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
- struct sw_flow_key *);
+ struct sw_flow_actions *acts, struct sw_flow_key *);
void ovs_dp_notify_wq(struct work_struct *work);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2b78789ea7c5..90a21010fc8f 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -32,6 +32,7 @@
#include <linux/if_arp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/mpls.h>
#include <linux/sctp.h>
#include <linux/smp.h>
#include <linux/tcp.h>
@@ -42,6 +43,7 @@
#include <net/ip.h>
#include <net/ip_tunnels.h>
#include <net/ipv6.h>
+#include <net/mpls.h>
#include <net/ndisc.h>
#include "datapath.h"
@@ -480,6 +482,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
return -ENOMEM;
skb_reset_network_header(skb);
+ skb_reset_mac_len(skb);
__skb_push(skb, skb->data - skb_mac_header(skb));
/* Network layer. */
@@ -584,6 +587,33 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
+ } else if (eth_p_mpls(key->eth.type)) {
+ size_t stack_len = MPLS_HLEN;
+
+ /* In the presence of an MPLS label stack the end of the L2
+ * header and the beginning of the L3 header differ.
+ *
+ * Advance network_header to the beginning of the L3
+ * header. mac_len corresponds to the end of the L2 header.
+ */
+ while (1) {
+ __be32 lse;
+
+ error = check_header(skb, skb->mac_len + stack_len);
+ if (unlikely(error))
+ return 0;
+
+ memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
+
+ if (stack_len == MPLS_HLEN)
+ memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
+
+ skb_set_network_header(skb, skb->mac_len + stack_len);
+ if (lse & htonl(MPLS_LS_S_MASK))
+ break;
+
+ stack_len += MPLS_HLEN;
+ }
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 71813318c8c7..4962bee81a11 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -102,12 +102,17 @@ struct sw_flow_key {
__be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
__be16 type; /* Ethernet frame type. */
} eth;
- struct {
- u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
- u8 tos; /* IP ToS. */
- u8 ttl; /* IP TTL/hop limit. */
- u8 frag; /* One of OVS_FRAG_TYPE_*. */
- } ip;
+ union {
+ struct {
+ __be32 top_lse; /* top label stack entry */
+ } mpls;
+ struct {
+ u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
+ u8 tos; /* IP ToS. */
+ u8 ttl; /* IP TTL/hop limit. */
+ u8 frag; /* One of OVS_FRAG_TYPE_*. */
+ } ip;
+ };
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 939bcb32100f..ed3109761827 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -46,24 +46,22 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
+#include <net/mpls.h>
#include "flow_netlink.h"
-static void update_range__(struct sw_flow_match *match,
- size_t offset, size_t size, bool is_mask)
+static void update_range(struct sw_flow_match *match,
+ size_t offset, size_t size, bool is_mask)
{
- struct sw_flow_key_range *range = NULL;
+ struct sw_flow_key_range *range;
size_t start = rounddown(offset, sizeof(long));
size_t end = roundup(offset + size, sizeof(long));
if (!is_mask)
range = &match->range;
- else if (match->mask)
+ else
range = &match->mask->range;
- if (!range)
- return;
-
if (range->start == range->end) {
range->start = start;
range->end = end;
@@ -79,22 +77,20 @@ static void update_range__(struct sw_flow_match *match,
#define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
do { \
- update_range__(match, offsetof(struct sw_flow_key, field), \
- sizeof((match)->key->field), is_mask); \
- if (is_mask) { \
- if ((match)->mask) \
- (match)->mask->key.field = value; \
- } else { \
+ update_range(match, offsetof(struct sw_flow_key, field), \
+ sizeof((match)->key->field), is_mask); \
+ if (is_mask) \
+ (match)->mask->key.field = value; \
+ else \
(match)->key->field = value; \
- } \
} while (0)
#define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \
do { \
- update_range__(match, offset, len, is_mask); \
+ update_range(match, offset, len, is_mask); \
if (is_mask) \
memcpy((u8 *)&(match)->mask->key + offset, value_p, \
- len); \
+ len); \
else \
memcpy((u8 *)(match)->key + offset, value_p, len); \
} while (0)
@@ -103,18 +99,16 @@ static void update_range__(struct sw_flow_match *match,
SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
value_p, len, is_mask)
-#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
- do { \
- update_range__(match, offsetof(struct sw_flow_key, field), \
- sizeof((match)->key->field), is_mask); \
- if (is_mask) { \
- if ((match)->mask) \
- memset((u8 *)&(match)->mask->key.field, value,\
- sizeof((match)->mask->key.field)); \
- } else { \
+#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \
+ do { \
+ update_range(match, offsetof(struct sw_flow_key, field), \
+ sizeof((match)->key->field), is_mask); \
+ if (is_mask) \
+ memset((u8 *)&(match)->mask->key.field, value, \
+ sizeof((match)->mask->key.field)); \
+ else \
memset((u8 *)&(match)->key->field, value, \
sizeof((match)->key->field)); \
- } \
} while (0)
static bool match_validate(const struct sw_flow_match *match,
@@ -134,7 +128,8 @@ static bool match_validate(const struct sw_flow_match *match,
| (1 << OVS_KEY_ATTR_ICMP)
| (1 << OVS_KEY_ATTR_ICMPV6)
| (1 << OVS_KEY_ATTR_ARP)
- | (1 << OVS_KEY_ATTR_ND));
+ | (1 << OVS_KEY_ATTR_ND)
+ | (1 << OVS_KEY_ATTR_MPLS));
/* Always allowed mask fields. */
mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
@@ -149,6 +144,12 @@ static bool match_validate(const struct sw_flow_match *match,
mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
}
+ if (eth_p_mpls(match->key->eth.type)) {
+ key_expected |= 1 << OVS_KEY_ATTR_MPLS;
+ if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
+ mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
+ }
+
if (match->key->eth.type == htons(ETH_P_IP)) {
key_expected |= 1 << OVS_KEY_ATTR_IPV4;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
@@ -244,6 +245,38 @@ static bool match_validate(const struct sw_flow_match *match,
return true;
}
+size_t ovs_key_attr_size(void)
+{
+ /* Whenever adding new OVS_KEY_ FIELDS, we should consider
+ * updating this function.
+ */
+ BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22);
+
+ return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */
+ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */
+ + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
+ + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
+ + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
+ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
+ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
+ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */
+ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */
+ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+ + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */
+ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */
+ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */
+ + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */
+ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */
+ + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */
+ + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */
+ + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */
+ + nla_total_size(28); /* OVS_KEY_ATTR_ND */
+}
+
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_ENCAP] = -1,
@@ -266,6 +299,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
[OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
[OVS_KEY_ATTR_TUNNEL] = -1,
+ [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
};
static bool is_all_zero(const u8 *fp, size_t size)
@@ -572,10 +606,13 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
- if (is_mask)
+ if (is_mask) {
in_port = 0xffffffff; /* Always exact match in_port. */
- else if (in_port >= DP_MAX_PORTS)
+ } else if (in_port >= DP_MAX_PORTS) {
+ OVS_NLERR("Port (%d) exceeds maximum allowable (%d).\n",
+ in_port, DP_MAX_PORTS);
return -EINVAL;
+ }
SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
*attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
@@ -602,7 +639,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
const struct nlattr **a, bool is_mask)
{
int err;
- u64 orig_attrs = attrs;
err = metadata_from_nlattrs(match, &attrs, a, is_mask);
if (err)
@@ -634,8 +670,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
- } else if (!is_mask)
- SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+ }
if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
__be16 eth_type;
@@ -735,6 +770,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
attrs &= ~(1 << OVS_KEY_ATTR_ARP);
}
+ if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
+ const struct ovs_key_mpls *mpls_key;
+
+ mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
+ SW_FLOW_KEY_PUT(match, mpls.top_lse,
+ mpls_key->mpls_lse, is_mask);
+
+ attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
+ }
+
if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
const struct ovs_key_tcp *tcp_key;
@@ -745,15 +790,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
}
if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
- if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) {
- SW_FLOW_KEY_PUT(match, tp.flags,
- nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
- is_mask);
- } else {
- SW_FLOW_KEY_PUT(match, tp.flags,
- nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
- is_mask);
- }
+ SW_FLOW_KEY_PUT(match, tp.flags,
+ nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
+ is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
}
@@ -812,8 +851,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
attrs &= ~(1 << OVS_KEY_ATTR_ND);
}
- if (attrs != 0)
+ if (attrs != 0) {
+ OVS_NLERR("Unknown key attributes (%llx).\n",
+ (unsigned long long)attrs);
return -EINVAL;
+ }
return 0;
}
@@ -853,8 +895,8 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val)
* attribute specifies the mask field of the wildcarded flow.
*/
int ovs_nla_get_match(struct sw_flow_match *match,
- const struct nlattr *key,
- const struct nlattr *mask)
+ const struct nlattr *nla_key,
+ const struct nlattr *nla_mask)
{
const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
const struct nlattr *encap;
@@ -864,7 +906,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
bool encap_valid = false;
int err;
- err = parse_flow_nlattrs(key, a, &key_attrs);
+ err = parse_flow_nlattrs(nla_key, a, &key_attrs);
if (err)
return err;
@@ -905,36 +947,43 @@ int ovs_nla_get_match(struct sw_flow_match *match,
if (err)
return err;
- if (match->mask && !mask) {
- /* Create an exact match mask. We need to set to 0xff all the
- * 'match->mask' fields that have been touched in 'match->key'.
- * We cannot simply memset 'match->mask', because padding bytes
- * and fields not specified in 'match->key' should be left to 0.
- * Instead, we use a stream of netlink attributes, copied from
- * 'key' and set to 0xff: ovs_key_from_nlattrs() will take care
- * of filling 'match->mask' appropriately.
- */
- newmask = kmemdup(key, nla_total_size(nla_len(key)),
- GFP_KERNEL);
- if (!newmask)
- return -ENOMEM;
+ if (match->mask) {
+ if (!nla_mask) {
+ /* Create an exact match mask. We need to set to 0xff
+ * all the 'match->mask' fields that have been touched
+ * in 'match->key'. We cannot simply memset
+ * 'match->mask', because padding bytes and fields not
+ * specified in 'match->key' should be left to 0.
+ * Instead, we use a stream of netlink attributes,
+ * copied from 'key' and set to 0xff.
+ * ovs_key_from_nlattrs() will take care of filling
+ * 'match->mask' appropriately.
+ */
+ newmask = kmemdup(nla_key,
+ nla_total_size(nla_len(nla_key)),
+ GFP_KERNEL);
+ if (!newmask)
+ return -ENOMEM;
- mask_set_nlattr(newmask, 0xff);
+ mask_set_nlattr(newmask, 0xff);
- /* The userspace does not send tunnel attributes that are 0,
- * but we should not wildcard them nonetheless.
- */
- if (match->key->tun_key.ipv4_dst)
- SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true);
+ /* The userspace does not send tunnel attributes that
+ * are 0, but we should not wildcard them nonetheless.
+ */
+ if (match->key->tun_key.ipv4_dst)
+ SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
+ 0xff, true);
- mask = newmask;
- }
+ nla_mask = newmask;
+ }
- if (mask) {
- err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
+ err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs);
if (err)
goto free_newmask;
+ /* Always match on tci. */
+ SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+
if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
__be16 eth_type = 0;
__be16 tci = 0;
@@ -1140,6 +1189,14 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
arp_key->arp_op = htons(output->ip.proto);
ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
+ } else if (eth_p_mpls(swkey->eth.type)) {
+ struct ovs_key_mpls *mpls_key;
+
+ nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
+ if (!nla)
+ goto nla_put_failure;
+ mpls_key = nla_data(nla);
+ mpls_key->mpls_lse = output->mpls.top_lse;
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1226,12 +1283,14 @@ nla_put_failure:
#define MAX_ACTIONS_BUFSIZE (32 * 1024)
-struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size)
+static struct sw_flow_actions *nla_alloc_flow_actions(int size)
{
struct sw_flow_actions *sfa;
- if (size > MAX_ACTIONS_BUFSIZE)
+ if (size > MAX_ACTIONS_BUFSIZE) {
+ OVS_NLERR("Flow action size (%u bytes) exceeds maximum", size);
return ERR_PTR(-EINVAL);
+ }
sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
if (!sfa)
@@ -1269,7 +1328,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = MAX_ACTIONS_BUFSIZE;
}
- acts = ovs_nla_alloc_flow_actions(new_acts_size);
+ acts = nla_alloc_flow_actions(new_acts_size);
if (IS_ERR(acts))
return (void *)acts;
@@ -1336,9 +1395,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
a->nla_len = sfa->actions_len - st_offset;
}
+static int __ovs_nla_copy_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ int depth, struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci);
+
static int validate_and_copy_sample(const struct nlattr *attr,
const struct sw_flow_key *key, int depth,
- struct sw_flow_actions **sfa)
+ struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -1375,7 +1440,8 @@ static int validate_and_copy_sample(const struct nlattr *attr,
if (st_acts < 0)
return st_acts;
- err = ovs_nla_copy_actions(actions, key, depth + 1, sfa);
+ err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa,
+ eth_type, vlan_tci);
if (err)
return err;
@@ -1385,10 +1451,10 @@ static int validate_and_copy_sample(const struct nlattr *attr,
return 0;
}
-static int validate_tp_port(const struct sw_flow_key *flow_key)
+static int validate_tp_port(const struct sw_flow_key *flow_key,
+ __be16 eth_type)
{
- if ((flow_key->eth.type == htons(ETH_P_IP) ||
- flow_key->eth.type == htons(ETH_P_IPV6)) &&
+ if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
(flow_key->tp.src || flow_key->tp.dst))
return 0;
@@ -1483,7 +1549,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa,
- bool *set_tun)
+ bool *set_tun, __be16 eth_type)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
@@ -1508,6 +1574,9 @@ static int validate_set(const struct nlattr *a,
break;
case OVS_KEY_ATTR_TUNNEL:
+ if (eth_p_mpls(eth_type))
+ return -EINVAL;
+
*set_tun = true;
err = validate_and_copy_set_tun(a, sfa);
if (err)
@@ -1515,7 +1584,7 @@ static int validate_set(const struct nlattr *a,
break;
case OVS_KEY_ATTR_IPV4:
- if (flow_key->eth.type != htons(ETH_P_IP))
+ if (eth_type != htons(ETH_P_IP))
return -EINVAL;
if (!flow_key->ip.proto)
@@ -1531,7 +1600,7 @@ static int validate_set(const struct nlattr *a,
break;
case OVS_KEY_ATTR_IPV6:
- if (flow_key->eth.type != htons(ETH_P_IPV6))
+ if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
if (!flow_key->ip.proto)
@@ -1553,19 +1622,24 @@ static int validate_set(const struct nlattr *a,
if (flow_key->ip.proto != IPPROTO_TCP)
return -EINVAL;
- return validate_tp_port(flow_key);
+ return validate_tp_port(flow_key, eth_type);
case OVS_KEY_ATTR_UDP:
if (flow_key->ip.proto != IPPROTO_UDP)
return -EINVAL;
- return validate_tp_port(flow_key);
+ return validate_tp_port(flow_key, eth_type);
+
+ case OVS_KEY_ATTR_MPLS:
+ if (!eth_p_mpls(eth_type))
+ return -EINVAL;
+ break;
case OVS_KEY_ATTR_SCTP:
if (flow_key->ip.proto != IPPROTO_SCTP)
return -EINVAL;
- return validate_tp_port(flow_key);
+ return validate_tp_port(flow_key, eth_type);
default:
return -EINVAL;
@@ -1609,12 +1683,13 @@ static int copy_action(const struct nlattr *from,
return 0;
}
-int ovs_nla_copy_actions(const struct nlattr *attr,
- const struct sw_flow_key *key,
- int depth,
- struct sw_flow_actions **sfa)
+static int __ovs_nla_copy_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ int depth, struct sw_flow_actions **sfa,
+ __be16 eth_type, __be16 vlan_tci)
{
const struct nlattr *a;
+ bool out_tnl_port = false;
int rem, err;
if (depth >= SAMPLE_ACTION_DEPTH)
@@ -1626,6 +1701,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
[OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+ [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
+ [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
@@ -1655,6 +1732,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
case OVS_ACTION_ATTR_OUTPUT:
if (nla_get_u32(a) >= DP_MAX_PORTS)
return -EINVAL;
+ out_tnl_port = false;
+
break;
case OVS_ACTION_ATTR_HASH: {
@@ -1671,6 +1750,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
}
case OVS_ACTION_ATTR_POP_VLAN:
+ vlan_tci = htons(0);
break;
case OVS_ACTION_ATTR_PUSH_VLAN:
@@ -1679,25 +1759,73 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return -EINVAL;
if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
return -EINVAL;
+ vlan_tci = vlan->vlan_tci;
break;
case OVS_ACTION_ATTR_RECIRC:
break;
+ case OVS_ACTION_ATTR_PUSH_MPLS: {
+ const struct ovs_action_push_mpls *mpls = nla_data(a);
+
+ /* Networking stack do not allow simultaneous Tunnel
+ * and MPLS GSO.
+ */
+ if (out_tnl_port)
+ return -EINVAL;
+
+ if (!eth_p_mpls(mpls->mpls_ethertype))
+ return -EINVAL;
+ /* Prohibit push MPLS other than to a white list
+ * for packets that have a known tag order.
+ */
+ if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+ (eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6) &&
+ eth_type != htons(ETH_P_ARP) &&
+ eth_type != htons(ETH_P_RARP) &&
+ !eth_p_mpls(eth_type)))
+ return -EINVAL;
+ eth_type = mpls->mpls_ethertype;
+ break;
+ }
+
+ case OVS_ACTION_ATTR_POP_MPLS:
+ if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
+ !eth_p_mpls(eth_type))
+ return -EINVAL;
+
+ /* Disallow subsequent L2.5+ set and mpls_pop actions
+ * as there is no check here to ensure that the new
+ * eth_type is valid and thus set actions could
+ * write off the end of the packet or otherwise
+ * corrupt it.
+ *
+ * Support for these actions is planned using packet
+ * recirculation.
+ */
+ eth_type = htons(0);
+ break;
+
case OVS_ACTION_ATTR_SET:
- err = validate_set(a, key, sfa, &skip_copy);
+ err = validate_set(a, key, sfa,
+ &out_tnl_port, eth_type);
if (err)
return err;
+
+ skip_copy = out_tnl_port;
break;
case OVS_ACTION_ATTR_SAMPLE:
- err = validate_and_copy_sample(a, key, depth, sfa);
+ err = validate_and_copy_sample(a, key, depth, sfa,
+ eth_type, vlan_tci);
if (err)
return err;
skip_copy = true;
break;
default:
+ OVS_NLERR("Unknown tunnel attribute (%d).\n", type);
return -EINVAL;
}
if (!skip_copy) {
@@ -1713,6 +1841,24 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return 0;
}
+int ovs_nla_copy_actions(const struct nlattr *attr,
+ const struct sw_flow_key *key,
+ struct sw_flow_actions **sfa)
+{
+ int err;
+
+ *sfa = nla_alloc_flow_actions(nla_len(attr));
+ if (IS_ERR(*sfa))
+ return PTR_ERR(*sfa);
+
+ err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
+ key->eth.tci);
+ if (err)
+ kfree(*sfa);
+
+ return err;
+}
+
static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
{
const struct nlattr *a;
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 206e45add888..eb0b177300ad 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -37,6 +37,8 @@
#include "flow.h"
+size_t ovs_key_attr_size(void);
+
void ovs_match_init(struct sw_flow_match *match,
struct sw_flow_key *key, struct sw_flow_mask *mask);
@@ -49,12 +51,11 @@ int ovs_nla_get_match(struct sw_flow_match *match,
const struct nlattr *);
int ovs_nla_copy_actions(const struct nlattr *attr,
- const struct sw_flow_key *key, int depth,
+ const struct sw_flow_key *key,
struct sw_flow_actions **sfa);
int ovs_nla_put_actions(const struct nlattr *attr,
int len, struct sk_buff *skb);
-struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len);
void ovs_nla_free_flow_actions(struct sw_flow_actions *);
#endif /* flow_netlink.h */
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index cf2d853646f0..90f8b40a350b 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007-2013 Nicira, Inc.
+ * Copyright (c) 2007-2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -250,11 +250,14 @@ skip_flows:
__table_instance_destroy(ti);
}
-void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
+/* No need for locking this function is called from RCU callback or
+ * error path.
+ */
+void ovs_flow_tbl_destroy(struct flow_table *table)
{
- struct table_instance *ti = ovsl_dereference(table->ti);
+ struct table_instance *ti = rcu_dereference_raw(table->ti);
- table_instance_destroy(ti, deferred);
+ table_instance_destroy(ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 5918bff7f3f6..f682c8c07f44 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -62,7 +62,7 @@ void ovs_flow_free(struct sw_flow *, bool deferred);
int ovs_flow_tbl_init(struct flow_table *);
int ovs_flow_tbl_count(struct flow_table *table);
-void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
+void ovs_flow_tbl_destroy(struct flow_table *table);
int ovs_flow_tbl_flush(struct flow_table *flow_table);
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 106a9d80b663..70c9765011f4 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -17,6 +17,7 @@
#include <linux/rculist.h>
#include <linux/udp.h>
#include <linux/if_vlan.h>
+#include <linux/module.h>
#include <net/geneve.h>
#include <net/icmp.h>
@@ -28,6 +29,8 @@
#include "datapath.h"
#include "vport.h"
+static struct vport_ops ovs_geneve_vport_ops;
+
/**
* struct geneve_port - Keeps track of open UDP ports
* @gs: The socket created for this port number.
@@ -225,11 +228,29 @@ static const char *geneve_get_name(const struct vport *vport)
return geneve_port->name;
}
-const struct vport_ops ovs_geneve_vport_ops = {
+static struct vport_ops ovs_geneve_vport_ops = {
.type = OVS_VPORT_TYPE_GENEVE,
.create = geneve_tnl_create,
.destroy = geneve_tnl_destroy,
.get_name = geneve_get_name,
.get_options = geneve_get_options,
.send = geneve_tnl_send,
+ .owner = THIS_MODULE,
};
+
+static int __init ovs_geneve_tnl_init(void)
+{
+ return ovs_vport_ops_register(&ovs_geneve_vport_ops);
+}
+
+static void __exit ovs_geneve_tnl_exit(void)
+{
+ ovs_vport_ops_unregister(&ovs_geneve_vport_ops);
+}
+
+module_init(ovs_geneve_tnl_init);
+module_exit(ovs_geneve_tnl_exit);
+
+MODULE_DESCRIPTION("OVS: Geneve swiching port");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("vport-type-5");
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 108b82da2fd9..00270b608844 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -29,6 +29,7 @@
#include <linux/jhash.h>
#include <linux/list.h>
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/workqueue.h>
#include <linux/rculist.h>
#include <net/route.h>
@@ -45,6 +46,8 @@
#include "datapath.h"
#include "vport.h"
+static struct vport_ops ovs_gre_vport_ops;
+
/* Returns the least-significant 32 bits of a __be64. */
static __be32 be64_get_low32(__be64 x)
{
@@ -281,10 +284,28 @@ static void gre_tnl_destroy(struct vport *vport)
gre_exit();
}
-const struct vport_ops ovs_gre_vport_ops = {
+static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
.destroy = gre_tnl_destroy,
.get_name = gre_get_name,
.send = gre_tnl_send,
+ .owner = THIS_MODULE,
};
+
+static int __init ovs_gre_tnl_init(void)
+{
+ return ovs_vport_ops_register(&ovs_gre_vport_ops);
+}
+
+static void __exit ovs_gre_tnl_exit(void)
+{
+ ovs_vport_ops_unregister(&ovs_gre_vport_ops);
+}
+
+module_init(ovs_gre_tnl_init);
+module_exit(ovs_gre_tnl_exit);
+
+MODULE_DESCRIPTION("OVS: GRE switching port");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("vport-type-3");
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 84516126e5f3..6a55f7105505 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -36,6 +36,8 @@ struct internal_dev {
struct vport *vport;
};
+static struct vport_ops ovs_internal_vport_ops;
+
static struct internal_dev *internal_dev_priv(struct net_device *netdev)
{
return netdev_priv(netdev);
@@ -222,6 +224,11 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
struct net_device *netdev = netdev_vport_priv(vport)->dev;
int len;
+ if (unlikely(!(netdev->flags & IFF_UP))) {
+ kfree_skb(skb);
+ return 0;
+ }
+
len = skb->len;
skb_dst_drop(skb);
@@ -238,7 +245,7 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
return len;
}
-const struct vport_ops ovs_internal_vport_ops = {
+static struct vport_ops ovs_internal_vport_ops = {
.type = OVS_VPORT_TYPE_INTERNAL,
.create = internal_dev_create,
.destroy = internal_dev_destroy,
@@ -261,10 +268,21 @@ struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
int ovs_internal_dev_rtnl_link_register(void)
{
- return rtnl_link_register(&internal_dev_link_ops);
+ int err;
+
+ err = rtnl_link_register(&internal_dev_link_ops);
+ if (err < 0)
+ return err;
+
+ err = ovs_vport_ops_register(&ovs_internal_vport_ops);
+ if (err < 0)
+ rtnl_link_unregister(&internal_dev_link_ops);
+
+ return err;
}
void ovs_internal_dev_rtnl_link_unregister(void)
{
+ ovs_vport_ops_unregister(&ovs_internal_vport_ops);
rtnl_link_unregister(&internal_dev_link_ops);
}
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index d21f77d875ba..877ee74b4f08 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -33,6 +33,8 @@
#include "vport-internal_dev.h"
#include "vport-netdev.h"
+static struct vport_ops ovs_netdev_vport_ops;
+
/* Must be called with rcu_read_lock. */
static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
{
@@ -224,10 +226,20 @@ struct vport *ovs_netdev_get_vport(struct net_device *dev)
return NULL;
}
-const struct vport_ops ovs_netdev_vport_ops = {
+static struct vport_ops ovs_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_NETDEV,
.create = netdev_create,
.destroy = netdev_destroy,
.get_name = ovs_netdev_get_name,
.send = netdev_send,
};
+
+int __init ovs_netdev_init(void)
+{
+ return ovs_vport_ops_register(&ovs_netdev_vport_ops);
+}
+
+void ovs_netdev_exit(void)
+{
+ ovs_vport_ops_unregister(&ovs_netdev_vport_ops);
+}
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 8df01c1127e5..6f7038e79c52 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -41,4 +41,7 @@ netdev_vport_priv(const struct vport *vport)
const char *ovs_netdev_get_name(const struct vport *);
void ovs_netdev_detach_dev(struct vport *);
+int __init ovs_netdev_init(void);
+void ovs_netdev_exit(void);
+
#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 2735e01dca73..965e7500c5a6 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -24,6 +24,7 @@
#include <linux/net.h>
#include <linux/rculist.h>
#include <linux/udp.h>
+#include <linux/module.h>
#include <net/icmp.h>
#include <net/ip.h>
@@ -50,6 +51,8 @@ struct vxlan_port {
char name[IFNAMSIZ];
};
+static struct vport_ops ovs_vxlan_vport_ops;
+
static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
{
return vport_priv(vport);
@@ -192,11 +195,29 @@ static const char *vxlan_get_name(const struct vport *vport)
return vxlan_port->name;
}
-const struct vport_ops ovs_vxlan_vport_ops = {
+static struct vport_ops ovs_vxlan_vport_ops = {
.type = OVS_VPORT_TYPE_VXLAN,
.create = vxlan_tnl_create,
.destroy = vxlan_tnl_destroy,
.get_name = vxlan_get_name,
.get_options = vxlan_get_options,
.send = vxlan_tnl_send,
+ .owner = THIS_MODULE,
};
+
+static int __init ovs_vxlan_tnl_init(void)
+{
+ return ovs_vport_ops_register(&ovs_vxlan_vport_ops);
+}
+
+static void __exit ovs_vxlan_tnl_exit(void)
+{
+ ovs_vport_ops_unregister(&ovs_vxlan_vport_ops);
+}
+
+module_init(ovs_vxlan_tnl_init);
+module_exit(ovs_vxlan_tnl_exit);
+
+MODULE_DESCRIPTION("OVS: VXLAN switching port");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("vport-type-4");
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 6015802ebe6f..8168ef021337 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -28,6 +28,7 @@
#include <linux/rtnetlink.h>
#include <linux/compat.h>
#include <net/net_namespace.h>
+#include <linux/module.h>
#include "datapath.h"
#include "vport.h"
@@ -36,22 +37,7 @@
static void ovs_vport_record_error(struct vport *,
enum vport_err_type err_type);
-/* List of statically compiled vport implementations. Don't forget to also
- * add yours to the list at the bottom of vport.h. */
-static const struct vport_ops *vport_ops_list[] = {
- &ovs_netdev_vport_ops,
- &ovs_internal_vport_ops,
-
-#ifdef CONFIG_OPENVSWITCH_GRE
- &ovs_gre_vport_ops,
-#endif
-#ifdef CONFIG_OPENVSWITCH_VXLAN
- &ovs_vxlan_vport_ops,
-#endif
-#ifdef CONFIG_OPENVSWITCH_GENEVE
- &ovs_geneve_vport_ops,
-#endif
-};
+static LIST_HEAD(vport_ops_list);
/* Protected by RCU read lock for reading, ovs_mutex for writing. */
static struct hlist_head *dev_table;
@@ -88,6 +74,32 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)
return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
}
+int ovs_vport_ops_register(struct vport_ops *ops)
+{
+ int err = -EEXIST;
+ struct vport_ops *o;
+
+ ovs_lock();
+ list_for_each_entry(o, &vport_ops_list, list)
+ if (ops->type == o->type)
+ goto errout;
+
+ list_add_tail(&ops->list, &vport_ops_list);
+ err = 0;
+errout:
+ ovs_unlock();
+ return err;
+}
+EXPORT_SYMBOL(ovs_vport_ops_register);
+
+void ovs_vport_ops_unregister(struct vport_ops *ops)
+{
+ ovs_lock();
+ list_del(&ops->list);
+ ovs_unlock();
+}
+EXPORT_SYMBOL(ovs_vport_ops_unregister);
+
/**
* ovs_vport_locate - find a port that has already been created
*
@@ -153,6 +165,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
return vport;
}
+EXPORT_SYMBOL(ovs_vport_alloc);
/**
* ovs_vport_free - uninitialize and free vport
@@ -173,6 +186,18 @@ void ovs_vport_free(struct vport *vport)
free_percpu(vport->percpu_stats);
kfree(vport);
}
+EXPORT_SYMBOL(ovs_vport_free);
+
+static struct vport_ops *ovs_vport_lookup(const struct vport_parms *parms)
+{
+ struct vport_ops *ops;
+
+ list_for_each_entry(ops, &vport_ops_list, list)
+ if (ops->type == parms->type)
+ return ops;
+
+ return NULL;
+}
/**
* ovs_vport_add - add vport device (for kernel callers)
@@ -184,31 +209,40 @@ void ovs_vport_free(struct vport *vport)
*/
struct vport *ovs_vport_add(const struct vport_parms *parms)
{
+ struct vport_ops *ops;
struct vport *vport;
- int err = 0;
- int i;
- for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
- if (vport_ops_list[i]->type == parms->type) {
- struct hlist_head *bucket;
+ ops = ovs_vport_lookup(parms);
+ if (ops) {
+ struct hlist_head *bucket;
- vport = vport_ops_list[i]->create(parms);
- if (IS_ERR(vport)) {
- err = PTR_ERR(vport);
- goto out;
- }
+ if (!try_module_get(ops->owner))
+ return ERR_PTR(-EAFNOSUPPORT);
- bucket = hash_bucket(ovs_dp_get_net(vport->dp),
- vport->ops->get_name(vport));
- hlist_add_head_rcu(&vport->hash_node, bucket);
+ vport = ops->create(parms);
+ if (IS_ERR(vport)) {
+ module_put(ops->owner);
return vport;
}
+
+ bucket = hash_bucket(ovs_dp_get_net(vport->dp),
+ vport->ops->get_name(vport));
+ hlist_add_head_rcu(&vport->hash_node, bucket);
+ return vport;
}
- err = -EAFNOSUPPORT;
+ /* Unlock to attempt module load and return -EAGAIN if load
+ * was successful as we need to restart the port addition
+ * workflow.
+ */
+ ovs_unlock();
+ request_module("vport-type-%d", parms->type);
+ ovs_lock();
-out:
- return ERR_PTR(err);
+ if (!ovs_vport_lookup(parms))
+ return ERR_PTR(-EAFNOSUPPORT);
+ else
+ return ERR_PTR(-EAGAIN);
}
/**
@@ -242,6 +276,8 @@ void ovs_vport_del(struct vport *vport)
hlist_del_rcu(&vport->hash_node);
vport->ops->destroy(vport);
+
+ module_put(vport->ops->owner);
}
/**
@@ -457,6 +493,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
}
ovs_dp_process_packet(skb, &key);
}
+EXPORT_SYMBOL(ovs_vport_receive);
/**
* ovs_vport_send - send a packet on a device
@@ -535,3 +572,4 @@ void ovs_vport_deferred_free(struct vport *vport)
call_rcu(&vport->rcu, free_vport_rcu);
}
+EXPORT_SYMBOL(ovs_vport_deferred_free);
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 8942125de3a6..e41c3facf799 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -161,6 +161,9 @@ struct vport_ops {
const char *(*get_name)(const struct vport *);
int (*send)(struct vport *, struct sk_buff *);
+
+ struct module *owner;
+ struct list_head list;
};
enum vport_err_type {
@@ -209,14 +212,6 @@ static inline struct vport *vport_from_priv(void *priv)
void ovs_vport_receive(struct vport *, struct sk_buff *,
struct ovs_tunnel_info *);
-/* List of statically compiled vport implementations. Don't forget to also
- * add yours to the list at the top of vport.c. */
-extern const struct vport_ops ovs_netdev_vport_ops;
-extern const struct vport_ops ovs_internal_vport_ops;
-extern const struct vport_ops ovs_gre_vport_ops;
-extern const struct vport_ops ovs_vxlan_vport_ops;
-extern const struct vport_ops ovs_geneve_vport_ops;
-
static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
const void *start, unsigned int len)
{
@@ -224,4 +219,7 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
}
+int ovs_vport_ops_register(struct vport_ops *ops);
+void ovs_vport_ops_unregister(struct vport_ops *ops);
+
#endif /* vport.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 87d20f48ff06..4cd13d8de44b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2953,7 +2953,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_flags |= MSG_TRUNC;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto out_free;
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 290352c0e6b4..0918bc21eae6 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -150,7 +150,7 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk,
copylen = len;
}
- rval = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copylen);
+ rval = skb_copy_datagram_msg(skb, 0, msg, copylen);
if (rval) {
rval = -EFAULT;
goto out;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 70a547ea5177..44b2123e22b8 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1296,7 +1296,7 @@ copy:
else
len = skb->len;
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+ err = skb_copy_datagram_msg(skb, 0, msg, len);
if (!err)
err = (flags & MSG_TRUNC) ? skb->len : len;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index a85c1a086ae4..9b600c20a7a3 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1249,7 +1249,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
msg->msg_flags |= MSG_TRUNC;
}
- skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ skb_copy_datagram_msg(skb, 0, msg, copied);
if (msg->msg_name) {
struct sockaddr_rose *srose;
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index e9aaa65c0778..4575485ad1b4 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -180,7 +180,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
if (copy > len - copied)
copy = len - copied;
- ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy);
+ ret = skb_copy_datagram_msg(skb, offset, msg, copy);
if (ret < 0)
goto copy_error;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index d6bcbd9f7791..7fffc2272701 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -1,5 +1,5 @@
/*
- * net/sched/gact.c Generic actions
+ * net/sched/act_gact.c Generic actions
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 8a64a0734aee..cbc8dd7dd48a 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -1,5 +1,5 @@
/*
- * net/sched/ipt.c iptables target interface
+ * net/sched/act_ipt.c iptables target interface
*
*TODO: Add other tables. For now we only support the ipv4 table targets
*
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index eb48306033d9..5953517ec059 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -1,5 +1,5 @@
/*
- * net/sched/mirred.c packet mirroring and redirect actions
+ * net/sched/act_mirred.c packet mirroring and redirect actions
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 5f9bcb2e080b..59649d588d79 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -1,5 +1,5 @@
/*
- * net/sched/pedit.c Generic packet editor
+ * net/sched/act_pedit.c Generic packet editor
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 69791ca77a05..9a1c42a43f92 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -1,5 +1,5 @@
/*
- * net/sched/police.c Input police filter.
+ * net/sched/act_police.c Input police filter
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 992c2317ce88..6a8d9488613a 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -1,5 +1,5 @@
/*
- * net/sched/simp.c Simple example of an action
+ * net/sched/act_simple.c Simple example of an action
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b34331967e02..179f1c8c0d8b 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -139,33 +139,20 @@ struct netem_sched_data {
/* Time stamp put into socket buffer control block
* Only valid when skbs are in our internal t(ime)fifo queue.
+ *
+ * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
+ * and skb->next & skb->prev are scratch space for a qdisc,
+ * we save skb->tstamp value in skb->cb[] before destroying it.
*/
struct netem_skb_cb {
psched_time_t time_to_send;
ktime_t tstamp_save;
};
-/* Because space in skb->cb[] is tight, netem overloads skb->next/prev/tstamp
- * to hold a rb_node structure.
- *
- * If struct sk_buff layout is changed, the following checks will complain.
- */
-static struct rb_node *netem_rb_node(struct sk_buff *skb)
-{
- BUILD_BUG_ON(offsetof(struct sk_buff, next) != 0);
- BUILD_BUG_ON(offsetof(struct sk_buff, prev) !=
- offsetof(struct sk_buff, next) + sizeof(skb->next));
- BUILD_BUG_ON(offsetof(struct sk_buff, tstamp) !=
- offsetof(struct sk_buff, prev) + sizeof(skb->prev));
- BUILD_BUG_ON(sizeof(struct rb_node) > sizeof(skb->next) +
- sizeof(skb->prev) +
- sizeof(skb->tstamp));
- return (struct rb_node *)&skb->next;
-}
static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
{
- return (struct sk_buff *)rb;
+ return container_of(rb, struct sk_buff, rbnode);
}
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
@@ -403,8 +390,8 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
else
p = &parent->rb_left;
}
- rb_link_node(netem_rb_node(nskb), parent, p);
- rb_insert_color(netem_rb_node(nskb), &q->t_root);
+ rb_link_node(&nskb->rbnode, parent, p);
+ rb_insert_color(&nskb->rbnode, &q->t_root);
sch->q.qlen++;
}
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 34229ee7f379..0697eda5aed8 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -417,7 +417,7 @@ static void *sctp_remaddr_seq_start(struct seq_file *seq, loff_t *pos)
if (*pos == 0)
seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX "
- "REM_ADDR_RTX START\n");
+ "REM_ADDR_RTX START STATE\n");
return (void *)pos;
}
@@ -490,14 +490,20 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
* Note: We don't have a way to tally this at the moment
* so lets just leave it as zero for the moment
*/
- seq_printf(seq, "0 ");
+ seq_puts(seq, "0 ");
/*
* remote address start time (START). This is also not
* currently implemented, but we can record it with a
* jiffies marker in a subsequent patch
*/
- seq_printf(seq, "0");
+ seq_puts(seq, "0 ");
+
+ /*
+ * The current state of this destination. I.e.
+ * SCTP_ACTIVE, SCTP_INACTIVE, ...
+ */
+ seq_printf(seq, "%d", tsp->state);
seq_printf(seq, "\n");
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 634a2abb5f3a..2120292c842d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2095,7 +2095,7 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
if (copied > len)
copied = len;
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
event = sctp_skb2event(skb);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 74745a47d72a..ec18076e81ec 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -91,7 +91,7 @@ struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
* @*headbuf: in: NULL for first frag, otherwise value returned from prev call
* out: set when successful non-complete reassembly, otherwise NULL
* @*buf: in: the buffer to append. Always defined
- * out: head buf after sucessful complete reassembly, otherwise NULL
+ * out: head buf after successful complete reassembly, otherwise NULL
* Returns 1 when reassembly complete, otherwise 0
*/
int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
@@ -311,7 +311,7 @@ bool tipc_msg_bundle(struct sk_buff *bbuf, struct sk_buff *buf, u32 mtu)
* @mtu: max allowable size for the bundle buffer, inclusive header
* @dnode: destination node for message. (Not always present in header)
* Replaces buffer if successful
- * Returns true if sucess, otherwise false
+ * Returns true if success, otherwise false
*/
bool tipc_msg_make_bundle(struct sk_buff **buf, u32 mtu, u32 dnode)
{
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 51bddc236a15..591bbfa082a0 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1372,8 +1372,7 @@ restart:
sz = buf_len;
m->msg_flags |= MSG_TRUNC;
}
- res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg),
- m->msg_iov, sz);
+ res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz);
if (res)
goto exit;
res = sz;
@@ -1473,8 +1472,8 @@ restart:
needed = (buf_len - sz_copied);
sz_to_copy = (sz <= needed) ? sz : needed;
- res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset,
- m->msg_iov, sz_to_copy);
+ res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset,
+ m, sz_to_copy);
if (res)
goto exit;
@@ -1556,7 +1555,7 @@ static void tipc_data_ready(struct sock *sk)
* @tsk: TIPC socket
* @msg: message
*
- * Returns 0 (TIPC_OK) if everyting ok, -TIPC_ERR_NO_PORT otherwise
+ * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise
*/
static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
{
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e96884380732..5eee625d113f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1825,7 +1825,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
else if (size < skb->len - skip)
msg->msg_flags |= MSG_TRUNC;
- err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
+ err = skb_copy_datagram_msg(skb, skip, msg, size);
if (err)
goto out_free;
@@ -2030,8 +2030,8 @@ again:
}
chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
- if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
- msg->msg_iov, chunk)) {
+ if (skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
+ msg, chunk)) {
if (copied == 0)
copied = -EFAULT;
break;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 9bb63ffec4f2..a57ddef7d5af 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1773,8 +1773,7 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
}
/* Place the datagram payload in the user's iovec. */
- err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov,
- payload_len);
+ err = skb_copy_datagram_msg(skb, sizeof(*dg), msg, payload_len);
if (err)
goto out;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5ad4418ef093..59e785bfde65 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1335,7 +1335,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
/* Currently, each datagram always contains a complete record */
msg->msg_flags |= MSG_EOR;
- rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ rc = skb_copy_datagram_msg(skb, 0, msg, copied);
if (rc)
goto out_free_dgram;