aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-06-30 12:29:39 -0700
committerDavid S. Miller <davem@davemloft.net>2020-06-30 12:29:39 -0700
commitb9fcf0a0d826a3a556ae368a42461c920928c6c5 (patch)
treeb208f21f9d66e179bf519fbd98b70af99a9760d1
parentgenetlink: get rid of family->attrbuf (diff)
parentnet: xfrmi: implement header_ops->parse_protocol for AF_PACKET (diff)
downloadwireguard-linux-b9fcf0a0d826a3a556ae368a42461c920928c6c5.tar.xz
wireguard-linux-b9fcf0a0d826a3a556ae368a42461c920928c6c5.zip
Merge branch 'support-AF_PACKET-for-layer-3-devices'
Jason A. Donenfeld says: ==================== support AF_PACKET for layer 3 devices Hans reported that packets injected by a correct-looking and trivial libpcap-based program were not being accepted by wireguard. In investigating that, I noticed that a few devices weren't properly handling AF_PACKET-injected packets, and so this series introduces a bit of shared infrastructure to support that. The basic problem begins with socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)) sockets. When sendto is called, AF_PACKET examines the headers of the packet with this logic: static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) { if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && sock->type == SOCK_RAW) { skb_reset_mac_header(skb); skb->protocol = dev_parse_header_protocol(skb); } skb_probe_transport_header(skb); } The middle condition there triggers, and we jump to dev_parse_header_protocol. Note that this is the only caller of dev_parse_header_protocol in the kernel, and I assume it was designed for this purpose: static inline __be16 dev_parse_header_protocol(const struct sk_buff *skb) { const struct net_device *dev = skb->dev; if (!dev->header_ops || !dev->header_ops->parse_protocol) return 0; return dev->header_ops->parse_protocol(skb); } Since AF_PACKET already knows which netdev the packet is going to, the dev_parse_header_protocol function can see if that netdev has a way it prefers to figure out the protocol from the header. This, again, is the only use of parse_protocol in the kernel. At the moment, it's only used with ethernet devices, via eth_header_parse_protocol. This makes sense, as mostly people are used to AF_PACKET-injecting ethernet frames rather than layer 3 frames. But with nothing in place for layer 3 netdevs, this function winds up returning 0, and skb->protocol then is set to 0, and then by the time it hits the netdev's ndo_start_xmit, the driver doesn't know what to do with it. This is a problem because drivers very much rely on skb->protocol being correct, and routinely reject packets where it's incorrect. That's why having this parsing happen for injected packets is quite important. In wireguard, ipip, and ipip6, for example, packets from AF_PACKET are just dropped entirely. For tun devices, it's sort of uglier, with the tun "packet information" header being passed to userspace containing a bogus protocol value. Some userspace programs are ill-equipped to deal with that. (But of course, that doesn't happen with tap devices, which benefit from the similar shared infrastructure for layer 2 netdevs, further motiviating this patchset for layer 3 netdevs.) This patchset addresses the issue by first adding a layer 3 header parse function, much akin to the existing one for layer 2 packets, and then adds a shared header_ops structure that, also much akin to the existing one for layer 2 packets. Then it wires it up to a few immediate places that stuck out as requiring it, and does a bit of cleanup. This patchset seems like it's fixing real bugs, so it might be appropriate for stable. But they're also very old bugs, so if you'd rather not backport to stable, that'd make sense to me too. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/tun.c2
-rw-r--r--drivers/net/wireguard/device.c1
-rw-r--r--drivers/net/wireguard/queueing.h19
-rw-r--r--drivers/net/wireguard/receive.c2
-rw-r--r--include/net/ip_tunnels.h3
-rw-r--r--net/ipv4/ip_tunnel_core.c18
-rw-r--r--net/ipv4/ip_vti.c1
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv6/ip6_tunnel.c1
-rw-r--r--net/ipv6/ip6_vti.c1
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/xfrm/xfrm_interface.c2
12 files changed, 34 insertions, 18 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 858b012074bd..7adeb91bd368 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -62,6 +62,7 @@
#include <net/rtnetlink.h>
#include <net/sock.h>
#include <net/xdp.h>
+#include <net/ip_tunnels.h>
#include <linux/seq_file.h>
#include <linux/uio.h>
#include <linux/skb_array.h>
@@ -1351,6 +1352,7 @@ static void tun_net_init(struct net_device *dev)
switch (tun->flags & TUN_TYPE_MASK) {
case IFF_TUN:
dev->netdev_ops = &tun_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
/* Point-to-Point TUN Device */
dev->hard_header_len = 0;
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
index a8f151b1b5fa..c9f65e96ccb0 100644
--- a/drivers/net/wireguard/device.c
+++ b/drivers/net/wireguard/device.c
@@ -262,6 +262,7 @@ static void wg_setup(struct net_device *dev)
max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
dev->netdev_ops = &netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->hard_header_len = 0;
dev->addr_len = 0;
dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
index c58df439dbbe..dfb674e03076 100644
--- a/drivers/net/wireguard/queueing.h
+++ b/drivers/net/wireguard/queueing.h
@@ -11,6 +11,7 @@
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <net/ip_tunnels.h>
struct wg_device;
struct wg_peer;
@@ -65,25 +66,9 @@ struct packet_cb {
#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
-/* Returns either the correct skb->protocol value, or 0 if invalid. */
-static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb)
-{
- if (skb_network_header(skb) >= skb->head &&
- (skb_network_header(skb) + sizeof(struct iphdr)) <=
- skb_tail_pointer(skb) &&
- ip_hdr(skb)->version == 4)
- return htons(ETH_P_IP);
- if (skb_network_header(skb) >= skb->head &&
- (skb_network_header(skb) + sizeof(struct ipv6hdr)) <=
- skb_tail_pointer(skb) &&
- ipv6_hdr(skb)->version == 6)
- return htons(ETH_P_IPV6);
- return 0;
-}
-
static inline bool wg_check_packet_protocol(struct sk_buff *skb)
{
- __be16 real_protocol = wg_examine_packet_protocol(skb);
+ __be16 real_protocol = ip_tunnel_parse_protocol(skb);
return real_protocol && skb->protocol == real_protocol;
}
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
index 9b2ab6fc91cd..2c9551ea6dc7 100644
--- a/drivers/net/wireguard/receive.c
+++ b/drivers/net/wireguard/receive.c
@@ -387,7 +387,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer,
*/
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ~0; /* All levels */
- skb->protocol = wg_examine_packet_protocol(skb);
+ skb->protocol = ip_tunnel_parse_protocol(skb);
if (skb->protocol == htons(ETH_P_IP)) {
len = ntohs(ip_hdr(skb)->tot_len);
if (unlikely(len < sizeof(struct iphdr)))
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 076e5d7db7d3..36025dea7612 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -290,6 +290,9 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p, __u32 fwmark);
void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
+extern const struct header_ops ip_tunnel_header_ops;
+__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb);
+
struct ip_tunnel_encap_ops {
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 181b7a2a0247..f8b419e2475c 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -844,3 +844,21 @@ void ip_tunnel_unneed_metadata(void)
static_branch_dec(&ip_tunnel_metadata_cnt);
}
EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata);
+
+/* Returns either the correct skb->protocol value, or 0 if invalid. */
+__be16 ip_tunnel_parse_protocol(const struct sk_buff *skb)
+{
+ if (skb_network_header(skb) >= skb->head &&
+ (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) &&
+ ip_hdr(skb)->version == 4)
+ return htons(ETH_P_IP);
+ if (skb_network_header(skb) >= skb->head &&
+ (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) &&
+ ipv6_hdr(skb)->version == 6)
+ return htons(ETH_P_IPV6);
+ return 0;
+}
+EXPORT_SYMBOL(ip_tunnel_parse_protocol);
+
+const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol };
+EXPORT_SYMBOL(ip_tunnel_header_ops);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1d9c8cff5ac3..460ca1099e8a 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -441,6 +441,7 @@ static const struct net_device_ops vti_netdev_ops = {
static void vti_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &vti_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->type = ARPHRD_TUNNEL;
ip_tunnel_setup(dev, vti_net_id);
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 40fea52c8277..75d35e76bec2 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -361,6 +361,7 @@ static const struct net_device_ops ipip_netdev_ops = {
static void ipip_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipip_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->type = ARPHRD_TUNNEL;
dev->flags = IFF_NOARP;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 821d96c720b9..a18c378ca5f4 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1846,6 +1846,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
static void ip6_tnl_dev_setup(struct net_device *dev)
{
dev->netdev_ops = &ip6_tnl_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ip6_dev_free;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 1147f647b9a0..0d964160a9dd 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -905,6 +905,7 @@ static const struct net_device_ops vti6_netdev_ops = {
static void vti6_dev_setup(struct net_device *dev)
{
dev->netdev_ops = &vti6_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = vti6_dev_free;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1fbb4dfbb191..5e2c34c0ac97 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1421,6 +1421,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
dev->netdev_ops = &ipip6_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->needs_free_netdev = true;
dev->priv_destructor = ipip6_dev_free;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index c407ecbc5d46..b615729812e5 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -37,6 +37,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
+#include <net/ip_tunnels.h>
#include <net/addrconf.h>
#include <net/xfrm.h>
#include <net/net_namespace.h>
@@ -581,6 +582,7 @@ static const struct net_device_ops xfrmi_netdev_ops = {
static void xfrmi_dev_setup(struct net_device *dev)
{
dev->netdev_ops = &xfrmi_netdev_ops;
+ dev->header_ops = &ip_tunnel_header_ops;
dev->type = ARPHRD_NONE;
dev->mtu = ETH_DATA_LEN;
dev->min_mtu = ETH_MIN_MTU;